1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default)]
1265 pub causal_generation_ms: u64,
1266 #[serde(default)]
1268 pub causal_samples_generated: usize,
1269 #[serde(default)]
1271 pub causal_validation_passed: Option<bool>,
1272 #[serde(default)]
1274 pub sourcing_project_count: usize,
1275 #[serde(default)]
1276 pub rfx_event_count: usize,
1277 #[serde(default)]
1278 pub bid_count: usize,
1279 #[serde(default)]
1280 pub contract_count: usize,
1281 #[serde(default)]
1282 pub catalog_item_count: usize,
1283 #[serde(default)]
1284 pub scorecard_count: usize,
1285 #[serde(default)]
1287 pub financial_statement_count: usize,
1288 #[serde(default)]
1289 pub bank_reconciliation_count: usize,
1290 #[serde(default)]
1292 pub payroll_run_count: usize,
1293 #[serde(default)]
1294 pub time_entry_count: usize,
1295 #[serde(default)]
1296 pub expense_report_count: usize,
1297 #[serde(default)]
1298 pub benefit_enrollment_count: usize,
1299 #[serde(default)]
1300 pub pension_plan_count: usize,
1301 #[serde(default)]
1302 pub stock_grant_count: usize,
1303 #[serde(default)]
1305 pub revenue_contract_count: usize,
1306 #[serde(default)]
1307 pub impairment_test_count: usize,
1308 #[serde(default)]
1309 pub business_combination_count: usize,
1310 #[serde(default)]
1311 pub ecl_model_count: usize,
1312 #[serde(default)]
1313 pub provision_count: usize,
1314 #[serde(default)]
1316 pub production_order_count: usize,
1317 #[serde(default)]
1318 pub quality_inspection_count: usize,
1319 #[serde(default)]
1320 pub cycle_count_count: usize,
1321 #[serde(default)]
1322 pub bom_component_count: usize,
1323 #[serde(default)]
1324 pub inventory_movement_count: usize,
1325 #[serde(default)]
1327 pub sales_quote_count: usize,
1328 #[serde(default)]
1329 pub kpi_count: usize,
1330 #[serde(default)]
1331 pub budget_line_count: usize,
1332 #[serde(default)]
1334 pub tax_jurisdiction_count: usize,
1335 #[serde(default)]
1336 pub tax_code_count: usize,
1337 #[serde(default)]
1339 pub esg_emission_count: usize,
1340 #[serde(default)]
1341 pub esg_disclosure_count: usize,
1342 #[serde(default)]
1344 pub ic_matched_pair_count: usize,
1345 #[serde(default)]
1346 pub ic_elimination_count: usize,
1347 #[serde(default)]
1349 pub ic_transaction_count: usize,
1350 #[serde(default)]
1352 pub fa_subledger_count: usize,
1353 #[serde(default)]
1355 pub inventory_subledger_count: usize,
1356 #[serde(default)]
1358 pub treasury_debt_instrument_count: usize,
1359 #[serde(default)]
1361 pub treasury_hedging_instrument_count: usize,
1362 #[serde(default)]
1364 pub project_count: usize,
1365 #[serde(default)]
1367 pub project_change_order_count: usize,
1368 #[serde(default)]
1370 pub tax_provision_count: usize,
1371 #[serde(default)]
1373 pub opening_balance_count: usize,
1374 #[serde(default)]
1376 pub subledger_reconciliation_count: usize,
1377 #[serde(default)]
1379 pub tax_line_count: usize,
1380 #[serde(default)]
1382 pub project_cost_line_count: usize,
1383 #[serde(default)]
1385 pub cash_position_count: usize,
1386 #[serde(default)]
1388 pub cash_forecast_count: usize,
1389 #[serde(default)]
1391 pub cash_pool_count: usize,
1392 #[serde(default)]
1394 pub process_evolution_event_count: usize,
1395 #[serde(default)]
1397 pub organizational_event_count: usize,
1398 #[serde(default)]
1400 pub counterfactual_pair_count: usize,
1401 #[serde(default)]
1403 pub red_flag_count: usize,
1404 #[serde(default)]
1406 pub collusion_ring_count: usize,
1407 #[serde(default)]
1409 pub temporal_version_chain_count: usize,
1410 #[serde(default)]
1412 pub entity_relationship_node_count: usize,
1413 #[serde(default)]
1415 pub entity_relationship_edge_count: usize,
1416 #[serde(default)]
1418 pub cross_process_link_count: usize,
1419 #[serde(default)]
1421 pub disruption_event_count: usize,
1422 #[serde(default)]
1424 pub industry_gl_account_count: usize,
1425 #[serde(default)]
1427 pub period_close_je_count: usize,
1428}
1429
1430pub struct EnhancedOrchestrator {
1432 config: GeneratorConfig,
1433 phase_config: PhaseConfig,
1434 coa: Option<Arc<ChartOfAccounts>>,
1435 master_data: MasterDataSnapshot,
1436 seed: u64,
1437 multi_progress: Option<MultiProgress>,
1438 resource_guard: ResourceGuard,
1440 output_path: Option<PathBuf>,
1442 copula_generators: Vec<CopulaGeneratorSpec>,
1444 country_pack_registry: datasynth_core::CountryPackRegistry,
1446 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453 datasynth_config::validate_config(&config)?;
1454
1455 let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457 let resource_guard = Self::build_resource_guard(&config, None);
1459
1460 let country_pack_registry = match &config.country_packs {
1462 Some(cp) => {
1463 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464 .map_err(|e| SynthError::config(e.to_string()))?
1465 }
1466 None => datasynth_core::CountryPackRegistry::builtin_only()
1467 .map_err(|e| SynthError::config(e.to_string()))?,
1468 };
1469
1470 Ok(Self {
1471 config,
1472 phase_config,
1473 coa: None,
1474 master_data: MasterDataSnapshot::default(),
1475 seed,
1476 multi_progress: None,
1477 resource_guard,
1478 output_path: None,
1479 copula_generators: Vec::new(),
1480 country_pack_registry,
1481 phase_sink: None,
1482 })
1483 }
1484
1485 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487 Self::new(config, PhaseConfig::default())
1488 }
1489
1490 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492 self.phase_sink = Some(sink);
1493 self
1494 }
1495
1496 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498 self.phase_sink = Some(sink);
1499 }
1500
1501 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503 if let Some(ref sink) = self.phase_sink {
1504 for item in items {
1505 if let Ok(value) = serde_json::to_value(item) {
1506 if let Err(e) = sink.emit(phase, type_name, &value) {
1507 warn!(
1508 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509 );
1510 }
1511 }
1512 }
1513 if let Err(e) = sink.phase_complete(phase) {
1514 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515 }
1516 }
1517 }
1518
1519 pub fn with_progress(mut self, show: bool) -> Self {
1521 self.phase_config.show_progress = show;
1522 if show {
1523 self.multi_progress = Some(MultiProgress::new());
1524 }
1525 self
1526 }
1527
1528 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530 let path = path.into();
1531 self.output_path = Some(path.clone());
1532 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534 self
1535 }
1536
1537 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539 &self.country_pack_registry
1540 }
1541
1542 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544 self.country_pack_registry.get_by_str(country)
1545 }
1546
1547 fn primary_country_code(&self) -> &str {
1550 self.config
1551 .companies
1552 .first()
1553 .map(|c| c.country.as_str())
1554 .unwrap_or("US")
1555 }
1556
1557 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559 self.country_pack_for(self.primary_country_code())
1560 }
1561
1562 fn resolve_coa_framework(&self) -> CoAFramework {
1564 if self.config.accounting_standards.enabled {
1565 match self.config.accounting_standards.framework {
1566 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567 return CoAFramework::FrenchPcg;
1568 }
1569 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570 return CoAFramework::GermanSkr04;
1571 }
1572 _ => {}
1573 }
1574 }
1575 let pack = self.primary_pack();
1577 match pack.accounting.framework.as_str() {
1578 "french_gaap" => CoAFramework::FrenchPcg,
1579 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580 _ => CoAFramework::UsGaap,
1581 }
1582 }
1583
1584 pub fn has_copulas(&self) -> bool {
1589 !self.copula_generators.is_empty()
1590 }
1591
1592 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598 &self.copula_generators
1599 }
1600
1601 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605 &mut self.copula_generators
1606 }
1607
1608 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612 self.copula_generators
1613 .iter_mut()
1614 .find(|c| c.name == copula_name)
1615 .map(|c| c.generator.sample())
1616 }
1617
1618 pub fn from_fingerprint(
1641 fingerprint_path: &std::path::Path,
1642 phase_config: PhaseConfig,
1643 scale: f64,
1644 ) -> SynthResult<Self> {
1645 info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647 let reader = FingerprintReader::new();
1649 let fingerprint = reader
1650 .read_from_file(fingerprint_path)
1651 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654 }
1655
1656 pub fn from_fingerprint_data(
1663 fingerprint: Fingerprint,
1664 phase_config: PhaseConfig,
1665 scale: f64,
1666 ) -> SynthResult<Self> {
1667 info!(
1668 "Synthesizing config from fingerprint (version: {}, tables: {})",
1669 fingerprint.manifest.version,
1670 fingerprint.schema.tables.len()
1671 );
1672
1673 let seed: u64 = rand::random();
1675 info!("Fingerprint synthesis seed: {}", seed);
1676
1677 let options = SynthesisOptions {
1679 scale,
1680 seed: Some(seed),
1681 preserve_correlations: true,
1682 inject_anomalies: true,
1683 };
1684 let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686 let synthesis_result = synthesizer
1688 .synthesize_full(&fingerprint, seed)
1689 .map_err(|e| {
1690 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691 })?;
1692
1693 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695 Self::base_config_for_industry(industry)
1696 } else {
1697 Self::base_config_for_industry("manufacturing")
1698 };
1699
1700 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703 info!(
1705 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706 fingerprint.schema.tables.len(),
1707 scale,
1708 synthesis_result.copula_generators.len()
1709 );
1710
1711 if !synthesis_result.copula_generators.is_empty() {
1712 for spec in &synthesis_result.copula_generators {
1713 info!(
1714 " Copula '{}' for table '{}': {} columns",
1715 spec.name,
1716 spec.table,
1717 spec.columns.len()
1718 );
1719 }
1720 }
1721
1722 let mut orchestrator = Self::new(config, phase_config)?;
1724
1725 orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728 Ok(orchestrator)
1729 }
1730
1731 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733 use datasynth_config::presets::create_preset;
1734 use datasynth_config::TransactionVolume;
1735 use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737 let sector = match industry.to_lowercase().as_str() {
1738 "manufacturing" => IndustrySector::Manufacturing,
1739 "retail" => IndustrySector::Retail,
1740 "financial" | "financial_services" => IndustrySector::FinancialServices,
1741 "healthcare" => IndustrySector::Healthcare,
1742 "technology" | "tech" => IndustrySector::Technology,
1743 _ => IndustrySector::Manufacturing,
1744 };
1745
1746 create_preset(
1748 sector,
1749 1, 12, CoAComplexity::Medium,
1752 TransactionVolume::TenK,
1753 )
1754 }
1755
1756 fn apply_config_patch(
1758 mut config: GeneratorConfig,
1759 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760 ) -> GeneratorConfig {
1761 use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763 for (key, value) in patch.values() {
1764 match (key.as_str(), value) {
1765 ("transactions.count", ConfigValue::Integer(n)) => {
1768 info!(
1769 "Fingerprint suggests {} transactions (apply via company volumes)",
1770 n
1771 );
1772 }
1773 ("global.period_months", ConfigValue::Integer(n)) => {
1774 config.global.period_months = (*n).clamp(1, 120) as u32;
1775 }
1776 ("global.start_date", ConfigValue::String(s)) => {
1777 config.global.start_date = s.clone();
1778 }
1779 ("global.seed", ConfigValue::Integer(n)) => {
1780 config.global.seed = Some(*n as u64);
1781 }
1782 ("fraud.enabled", ConfigValue::Bool(b)) => {
1783 config.fraud.enabled = *b;
1784 }
1785 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786 config.fraud.fraud_rate = *f;
1787 }
1788 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789 config.data_quality.enabled = *b;
1790 }
1791 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793 config.fraud.enabled = *b;
1794 }
1795 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796 config.fraud.fraud_rate = *f;
1797 }
1798 _ => {
1799 debug!("Ignoring unknown config patch key: {}", key);
1800 }
1801 }
1802 }
1803
1804 config
1805 }
1806
1807 fn build_resource_guard(
1809 config: &GeneratorConfig,
1810 output_path: Option<PathBuf>,
1811 ) -> ResourceGuard {
1812 let mut builder = ResourceGuardBuilder::new();
1813
1814 if config.global.memory_limit_mb > 0 {
1816 builder = builder.memory_limit(config.global.memory_limit_mb);
1817 }
1818
1819 if let Some(path) = output_path {
1821 builder = builder.output_path(path).min_free_disk(100); }
1823
1824 builder = builder.conservative();
1826
1827 builder.build()
1828 }
1829
1830 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835 self.resource_guard.check()
1836 }
1837
1838 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840 let level = self.resource_guard.check()?;
1841
1842 if level != DegradationLevel::Normal {
1843 warn!(
1844 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845 phase,
1846 level,
1847 self.resource_guard.current_memory_mb(),
1848 self.resource_guard.available_disk_mb()
1849 );
1850 }
1851
1852 Ok(level)
1853 }
1854
1855 fn get_degradation_actions(&self) -> DegradationActions {
1857 self.resource_guard.get_actions()
1858 }
1859
1860 fn check_memory_limit(&self) -> SynthResult<()> {
1862 self.check_resources()?;
1863 Ok(())
1864 }
1865
1866 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868 info!("Starting enhanced generation workflow");
1869 info!(
1870 "Config: industry={:?}, period_months={}, companies={}",
1871 self.config.global.industry,
1872 self.config.global.period_months,
1873 self.config.companies.len()
1874 );
1875
1876 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879 datasynth_core::serde_decimal::set_numeric_native(is_native);
1880 struct NumericModeGuard;
1881 impl Drop for NumericModeGuard {
1882 fn drop(&mut self) {
1883 datasynth_core::serde_decimal::set_numeric_native(false);
1884 }
1885 }
1886 let _numeric_guard = if is_native {
1887 Some(NumericModeGuard)
1888 } else {
1889 None
1890 };
1891
1892 let initial_level = self.check_resources_with_log("initial")?;
1894 if initial_level == DegradationLevel::Emergency {
1895 return Err(SynthError::resource(
1896 "Insufficient resources to start generation",
1897 ));
1898 }
1899
1900 let mut stats = EnhancedGenerationStatistics {
1901 companies_count: self.config.companies.len(),
1902 period_months: self.config.global.period_months,
1903 ..Default::default()
1904 };
1905
1906 let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909 self.phase_master_data(&mut stats)?;
1911
1912 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917 let (mut document_flows, mut subledger, fa_journal_entries) =
1919 self.phase_document_flows(&mut stats)?;
1920
1921 self.emit_phase_items(
1923 "document_flows",
1924 "PurchaseOrder",
1925 &document_flows.purchase_orders,
1926 );
1927 self.emit_phase_items(
1928 "document_flows",
1929 "GoodsReceipt",
1930 &document_flows.goods_receipts,
1931 );
1932 self.emit_phase_items(
1933 "document_flows",
1934 "VendorInvoice",
1935 &document_flows.vendor_invoices,
1936 );
1937 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948 .iter()
1949 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950 .collect();
1951 if !opening_balance_jes.is_empty() {
1952 debug!(
1953 "Prepending {} opening balance JEs to entries",
1954 opening_balance_jes.len()
1955 );
1956 }
1957
1958 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961 if !opening_balance_jes.is_empty() {
1964 let mut combined = opening_balance_jes;
1965 combined.extend(entries);
1966 entries = combined;
1967 }
1968
1969 if !fa_journal_entries.is_empty() {
1971 debug!(
1972 "Appending {} FA acquisition JEs to main entries",
1973 fa_journal_entries.len()
1974 );
1975 entries.extend(fa_journal_entries);
1976 }
1977
1978 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981 let actions = self.get_degradation_actions();
1983
1984 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987 if !sourcing.contracts.is_empty() {
1990 let mut linked_count = 0usize;
1991 let po_vendor_pairs: Vec<(String, String)> = document_flows
1993 .p2p_chains
1994 .iter()
1995 .map(|chain| {
1996 (
1997 chain.purchase_order.vendor_id.clone(),
1998 chain.purchase_order.header.document_id.clone(),
1999 )
2000 })
2001 .collect();
2002
2003 for chain in &mut document_flows.p2p_chains {
2004 if chain.purchase_order.contract_id.is_none() {
2005 if let Some(contract) = sourcing
2006 .contracts
2007 .iter()
2008 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009 {
2010 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011 linked_count += 1;
2012 }
2013 }
2014 }
2015
2016 for contract in &mut sourcing.contracts {
2018 let po_ids: Vec<String> = po_vendor_pairs
2019 .iter()
2020 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021 .map(|(_, po_id)| po_id.clone())
2022 .collect();
2023 if !po_ids.is_empty() {
2024 contract.purchase_order_ids = po_ids;
2025 }
2026 }
2027
2028 if linked_count > 0 {
2029 debug!(
2030 "Linked {} purchase orders to S2C contracts by vendor match",
2031 linked_count
2032 );
2033 }
2034 }
2035
2036 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039 if !intercompany.seller_journal_entries.is_empty()
2041 || !intercompany.buyer_journal_entries.is_empty()
2042 {
2043 let ic_je_count = intercompany.seller_journal_entries.len()
2044 + intercompany.buyer_journal_entries.len();
2045 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047 debug!(
2048 "Appended {} IC journal entries to main entries",
2049 ic_je_count
2050 );
2051 }
2052
2053 if !intercompany.elimination_entries.is_empty() {
2055 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056 &intercompany.elimination_entries,
2057 );
2058 if !elim_jes.is_empty() {
2059 debug!(
2060 "Appended {} elimination journal entries to main entries",
2061 elim_jes.len()
2062 );
2063 let elim_debit: rust_decimal::Decimal =
2065 elim_jes.iter().map(|je| je.total_debit()).sum();
2066 let elim_credit: rust_decimal::Decimal =
2067 elim_jes.iter().map(|je| je.total_credit()).sum();
2068 let elim_diff = (elim_debit - elim_credit).abs();
2069 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2071 return Err(datasynth_core::error::SynthError::generation(format!(
2072 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2073 elim_debit, elim_credit, elim_diff, tolerance
2074 )));
2075 }
2076 debug!(
2077 "IC elimination balance verified: debits={}, credits={} (diff={})",
2078 elim_debit, elim_credit, elim_diff
2079 );
2080 entries.extend(elim_jes);
2081 }
2082 }
2083
2084 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2086 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2087 document_flows
2088 .customer_invoices
2089 .extend(ic_docs.seller_invoices.iter().cloned());
2090 document_flows
2091 .purchase_orders
2092 .extend(ic_docs.buyer_orders.iter().cloned());
2093 document_flows
2094 .goods_receipts
2095 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2096 document_flows
2097 .vendor_invoices
2098 .extend(ic_docs.buyer_invoices.iter().cloned());
2099 debug!(
2100 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2101 ic_docs.seller_invoices.len(),
2102 ic_docs.buyer_orders.len(),
2103 ic_docs.buyer_goods_receipts.len(),
2104 ic_docs.buyer_invoices.len(),
2105 );
2106 }
2107 }
2108
2109 let hr = self.phase_hr_data(&mut stats)?;
2111
2112 if !hr.payroll_runs.is_empty() {
2114 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2115 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2116 entries.extend(payroll_jes);
2117 }
2118
2119 if !hr.pension_journal_entries.is_empty() {
2121 debug!(
2122 "Generated {} JEs from pension plans",
2123 hr.pension_journal_entries.len()
2124 );
2125 entries.extend(hr.pension_journal_entries.iter().cloned());
2126 }
2127
2128 if !hr.stock_comp_journal_entries.is_empty() {
2130 debug!(
2131 "Generated {} JEs from stock-based compensation",
2132 hr.stock_comp_journal_entries.len()
2133 );
2134 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2135 }
2136
2137 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2139
2140 if !manufacturing_snap.production_orders.is_empty() {
2142 let currency = self
2143 .config
2144 .companies
2145 .first()
2146 .map(|c| c.currency.as_str())
2147 .unwrap_or("USD");
2148 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2149 &manufacturing_snap.production_orders,
2150 &manufacturing_snap.quality_inspections,
2151 currency,
2152 );
2153 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2154 entries.extend(mfg_jes);
2155 }
2156
2157 if !manufacturing_snap.quality_inspections.is_empty() {
2159 let framework = match self.config.accounting_standards.framework {
2160 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2161 _ => "US_GAAP",
2162 };
2163 for company in &self.config.companies {
2164 let company_orders: Vec<_> = manufacturing_snap
2165 .production_orders
2166 .iter()
2167 .filter(|o| o.company_code == company.code)
2168 .cloned()
2169 .collect();
2170 let company_inspections: Vec<_> = manufacturing_snap
2171 .quality_inspections
2172 .iter()
2173 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2174 .cloned()
2175 .collect();
2176 if company_inspections.is_empty() {
2177 continue;
2178 }
2179 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2180 let warranty_result = warranty_gen.generate(
2181 &company.code,
2182 &company_orders,
2183 &company_inspections,
2184 &company.currency,
2185 framework,
2186 );
2187 if !warranty_result.journal_entries.is_empty() {
2188 debug!(
2189 "Generated {} warranty provision JEs for {}",
2190 warranty_result.journal_entries.len(),
2191 company.code
2192 );
2193 entries.extend(warranty_result.journal_entries);
2194 }
2195 }
2196 }
2197
2198 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2200 {
2201 let cogs_currency = self
2202 .config
2203 .companies
2204 .first()
2205 .map(|c| c.currency.as_str())
2206 .unwrap_or("USD");
2207 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2208 &document_flows.deliveries,
2209 &manufacturing_snap.production_orders,
2210 cogs_currency,
2211 );
2212 if !cogs_jes.is_empty() {
2213 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2214 entries.extend(cogs_jes);
2215 }
2216 }
2217
2218 if !manufacturing_snap.inventory_movements.is_empty()
2224 && !subledger.inventory_positions.is_empty()
2225 {
2226 use datasynth_core::models::MovementType as MfgMovementType;
2227 let mut receipt_count = 0usize;
2228 let mut issue_count = 0usize;
2229 for movement in &manufacturing_snap.inventory_movements {
2230 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2232 p.material_id == movement.material_code
2233 && p.company_code == movement.entity_code
2234 }) {
2235 match movement.movement_type {
2236 MfgMovementType::GoodsReceipt => {
2237 pos.add_quantity(
2239 movement.quantity,
2240 movement.value,
2241 movement.movement_date,
2242 );
2243 receipt_count += 1;
2244 }
2245 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2246 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2248 issue_count += 1;
2249 }
2250 _ => {}
2251 }
2252 }
2253 }
2254 debug!(
2255 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2256 manufacturing_snap.inventory_movements.len(),
2257 receipt_count,
2258 issue_count,
2259 );
2260 }
2261
2262 if !entries.is_empty() {
2265 stats.total_entries = entries.len() as u64;
2266 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2267 debug!(
2268 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2269 stats.total_entries, stats.total_line_items
2270 );
2271 }
2272
2273 if self.config.internal_controls.enabled && !entries.is_empty() {
2275 info!("Phase 7b: Applying internal controls to journal entries");
2276 let control_config = ControlGeneratorConfig {
2277 exception_rate: self.config.internal_controls.exception_rate,
2278 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2279 enable_sox_marking: true,
2280 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2281 self.config.internal_controls.sox_materiality_threshold,
2282 )
2283 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2284 ..Default::default()
2285 };
2286 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2287 for entry in &mut entries {
2288 control_gen.apply_controls(entry, &coa);
2289 }
2290 let with_controls = entries
2291 .iter()
2292 .filter(|e| !e.header.control_ids.is_empty())
2293 .count();
2294 info!(
2295 "Applied controls to {} entries ({} with control IDs assigned)",
2296 entries.len(),
2297 with_controls
2298 );
2299 }
2300
2301 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2305 .iter()
2306 .filter(|e| e.header.sod_violation)
2307 .filter_map(|e| {
2308 e.header.sod_conflict_type.map(|ct| {
2309 use datasynth_core::models::{RiskLevel, SodViolation};
2310 let severity = match ct {
2311 datasynth_core::models::SodConflictType::PaymentReleaser
2312 | datasynth_core::models::SodConflictType::RequesterApprover => {
2313 RiskLevel::Critical
2314 }
2315 datasynth_core::models::SodConflictType::PreparerApprover
2316 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2317 | datasynth_core::models::SodConflictType::JournalEntryPoster
2318 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2319 RiskLevel::High
2320 }
2321 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2322 RiskLevel::Medium
2323 }
2324 };
2325 let action = format!(
2326 "SoD conflict {:?} on entry {} ({})",
2327 ct, e.header.document_id, e.header.company_code
2328 );
2329 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2330 })
2331 })
2332 .collect();
2333 if !sod_violations.is_empty() {
2334 info!(
2335 "Phase 7c: Extracted {} SoD violations from {} entries",
2336 sod_violations.len(),
2337 entries.len()
2338 );
2339 }
2340
2341 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2343
2344 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2346
2347 self.emit_phase_items(
2349 "anomaly_injection",
2350 "LabeledAnomaly",
2351 &anomaly_labels.labels,
2352 );
2353
2354 {
2358 use std::collections::HashMap;
2359 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2372 for je in &entries {
2373 if je.header.is_fraud {
2374 if let Some(ref fraud_type) = je.header.fraud_type {
2375 if let Some(ref reference) = je.header.reference {
2376 fraud_map.insert(reference.clone(), *fraud_type);
2378 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2381 if !bare.is_empty() {
2382 fraud_map.insert(bare.to_string(), *fraud_type);
2383 }
2384 }
2385 }
2386 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2388 }
2389 }
2390 }
2391 if !fraud_map.is_empty() {
2392 let mut propagated = 0usize;
2393 macro_rules! propagate_to {
2395 ($collection:expr) => {
2396 for doc in &mut $collection {
2397 if doc.header.propagate_fraud(&fraud_map) {
2398 propagated += 1;
2399 }
2400 }
2401 };
2402 }
2403 propagate_to!(document_flows.purchase_orders);
2404 propagate_to!(document_flows.goods_receipts);
2405 propagate_to!(document_flows.vendor_invoices);
2406 propagate_to!(document_flows.payments);
2407 propagate_to!(document_flows.sales_orders);
2408 propagate_to!(document_flows.deliveries);
2409 propagate_to!(document_flows.customer_invoices);
2410 if propagated > 0 {
2411 info!(
2412 "Propagated fraud labels to {} document flow records",
2413 propagated
2414 );
2415 }
2416 }
2417 }
2418
2419 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2421
2422 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2424
2425 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2427
2428 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2430
2431 let balance_validation = self.phase_balance_validation(&entries)?;
2433
2434 let subledger_reconciliation =
2436 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2437
2438 let (data_quality_stats, quality_issues) =
2440 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2441
2442 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2444
2445 {
2447 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2452 for je in &entries {
2453 if je.header.is_fraud || je.header.is_anomaly {
2454 continue;
2455 }
2456 let diff = (je.total_debit() - je.total_credit()).abs();
2457 if diff > tolerance {
2458 unbalanced_clean += 1;
2459 if unbalanced_clean <= 3 {
2460 warn!(
2461 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2462 je.header.document_id,
2463 je.total_debit(),
2464 je.total_credit(),
2465 diff
2466 );
2467 }
2468 }
2469 }
2470 if unbalanced_clean > 0 {
2471 return Err(datasynth_core::error::SynthError::generation(format!(
2472 "{} non-anomaly JEs are unbalanced (debits != credits). \
2473 First few logged above. Tolerance={}",
2474 unbalanced_clean, tolerance
2475 )));
2476 }
2477 debug!(
2478 "Phase 10c: All {} non-anomaly JEs individually balanced",
2479 entries
2480 .iter()
2481 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2482 .count()
2483 );
2484
2485 let company_codes: Vec<String> = self
2487 .config
2488 .companies
2489 .iter()
2490 .map(|c| c.code.clone())
2491 .collect();
2492 for company_code in &company_codes {
2493 let mut assets = rust_decimal::Decimal::ZERO;
2494 let mut liab_equity = rust_decimal::Decimal::ZERO;
2495
2496 for entry in &entries {
2497 if entry.header.company_code != *company_code {
2498 continue;
2499 }
2500 for line in &entry.lines {
2501 let acct = &line.gl_account;
2502 let net = line.debit_amount - line.credit_amount;
2503 if acct.starts_with('1') {
2505 assets += net;
2506 }
2507 else if acct.starts_with('2') || acct.starts_with('3') {
2509 liab_equity -= net; }
2511 }
2514 }
2515
2516 let bs_diff = (assets - liab_equity).abs();
2517 if bs_diff > tolerance {
2518 warn!(
2519 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2520 revenue/expense closing entries may not fully offset",
2521 company_code, assets, liab_equity, bs_diff
2522 );
2523 } else {
2527 debug!(
2528 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2529 company_code, assets, liab_equity, bs_diff
2530 );
2531 }
2532 }
2533
2534 info!("Phase 10c: All generation-time accounting assertions passed");
2535 }
2536
2537 let audit = self.phase_audit_data(&entries, &mut stats)?;
2539
2540 let mut banking = self.phase_banking_data(&mut stats)?;
2542
2543 if self.phase_config.generate_banking
2548 && !document_flows.payments.is_empty()
2549 && !banking.accounts.is_empty()
2550 {
2551 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2552 if bridge_rate > 0.0 {
2553 let mut bridge =
2554 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2555 self.seed,
2556 );
2557 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2558 &document_flows.payments,
2559 &banking.customers,
2560 &banking.accounts,
2561 bridge_rate,
2562 );
2563 info!(
2564 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2565 bridge_stats.bridged_count,
2566 bridge_stats.transactions_emitted,
2567 bridge_stats.fraud_propagated,
2568 );
2569 let bridged_count = bridged_txns.len();
2570 banking.transactions.extend(bridged_txns);
2571
2572 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2575 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2576 &mut banking.transactions,
2577 );
2578 }
2579
2580 banking.suspicious_count = banking
2582 .transactions
2583 .iter()
2584 .filter(|t| t.is_suspicious)
2585 .count();
2586 stats.banking_transaction_count = banking.transactions.len();
2587 stats.banking_suspicious_count = banking.suspicious_count;
2588 }
2589 }
2590
2591 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2593
2594 self.phase_llm_enrichment(&mut stats);
2596
2597 self.phase_diffusion_enhancement(&mut stats);
2599
2600 self.phase_causal_overlay(&mut stats);
2602
2603 let mut financial_reporting = self.phase_financial_reporting(
2607 &document_flows,
2608 &entries,
2609 &coa,
2610 &hr,
2611 &audit,
2612 &mut stats,
2613 )?;
2614
2615 {
2617 use datasynth_core::models::StatementType;
2618 for stmt in &financial_reporting.consolidated_statements {
2619 if stmt.statement_type == StatementType::BalanceSheet {
2620 let total_assets: rust_decimal::Decimal = stmt
2621 .line_items
2622 .iter()
2623 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2624 .map(|li| li.amount)
2625 .sum();
2626 let total_le: rust_decimal::Decimal = stmt
2627 .line_items
2628 .iter()
2629 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2630 .map(|li| li.amount)
2631 .sum();
2632 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2633 warn!(
2634 "BS equation imbalance: assets={}, L+E={}",
2635 total_assets, total_le
2636 );
2637 }
2638 }
2639 }
2640 }
2641
2642 let accounting_standards =
2644 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2645
2646 if !accounting_standards.ecl_journal_entries.is_empty() {
2648 debug!(
2649 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2650 accounting_standards.ecl_journal_entries.len()
2651 );
2652 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2653 }
2654
2655 if !accounting_standards.provision_journal_entries.is_empty() {
2657 debug!(
2658 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2659 accounting_standards.provision_journal_entries.len()
2660 );
2661 entries.extend(
2662 accounting_standards
2663 .provision_journal_entries
2664 .iter()
2665 .cloned(),
2666 );
2667 }
2668
2669 let ocpm = self.phase_ocpm_events(
2671 &document_flows,
2672 &sourcing,
2673 &hr,
2674 &manufacturing_snap,
2675 &banking,
2676 &audit,
2677 &financial_reporting,
2678 &mut stats,
2679 )?;
2680
2681 if let Some(ref event_log) = ocpm.event_log {
2683 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2684 }
2685
2686 if let Some(ref event_log) = ocpm.event_log {
2688 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2690 std::collections::HashMap::new();
2691 for (idx, event) in event_log.events.iter().enumerate() {
2692 if let Some(ref doc_ref) = event.document_ref {
2693 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2694 }
2695 }
2696
2697 if !doc_index.is_empty() {
2698 let mut annotated = 0usize;
2699 for entry in &mut entries {
2700 let doc_id_str = entry.header.document_id.to_string();
2701 let mut matched_indices: Vec<usize> = Vec::new();
2703 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2704 matched_indices.extend(indices);
2705 }
2706 if let Some(ref reference) = entry.header.reference {
2707 let bare_ref = reference
2708 .find(':')
2709 .map(|i| &reference[i + 1..])
2710 .unwrap_or(reference.as_str());
2711 if let Some(indices) = doc_index.get(bare_ref) {
2712 for &idx in indices {
2713 if !matched_indices.contains(&idx) {
2714 matched_indices.push(idx);
2715 }
2716 }
2717 }
2718 }
2719 if !matched_indices.is_empty() {
2721 for &idx in &matched_indices {
2722 let event = &event_log.events[idx];
2723 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2724 entry.header.ocpm_event_ids.push(event.event_id);
2725 }
2726 for obj_ref in &event.object_refs {
2727 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2728 entry.header.ocpm_object_ids.push(obj_ref.object_id);
2729 }
2730 }
2731 if entry.header.ocpm_case_id.is_none() {
2732 entry.header.ocpm_case_id = event.case_id;
2733 }
2734 }
2735 annotated += 1;
2736 }
2737 }
2738 debug!(
2739 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2740 annotated
2741 );
2742 }
2743 }
2744
2745 let sales_kpi_budgets =
2747 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2748
2749 let treasury =
2753 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2754
2755 if !treasury.journal_entries.is_empty() {
2757 debug!(
2758 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2759 treasury.journal_entries.len()
2760 );
2761 entries.extend(treasury.journal_entries.iter().cloned());
2762 }
2763
2764 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2766
2767 if !tax.tax_posting_journal_entries.is_empty() {
2769 debug!(
2770 "Merging {} tax posting JEs into GL",
2771 tax.tax_posting_journal_entries.len()
2772 );
2773 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2774 }
2775
2776 {
2780 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2781
2782 let framework_str = {
2783 use datasynth_config::schema::AccountingFrameworkConfig;
2784 match self
2785 .config
2786 .accounting_standards
2787 .framework
2788 .unwrap_or_default()
2789 {
2790 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2791 "IFRS"
2792 }
2793 _ => "US_GAAP",
2794 }
2795 };
2796
2797 let depreciation_total: rust_decimal::Decimal = entries
2799 .iter()
2800 .filter(|je| je.header.document_type == "CL")
2801 .flat_map(|je| je.lines.iter())
2802 .filter(|l| l.gl_account.starts_with("6000"))
2803 .map(|l| l.debit_amount)
2804 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2805
2806 let interest_paid: rust_decimal::Decimal = entries
2808 .iter()
2809 .flat_map(|je| je.lines.iter())
2810 .filter(|l| l.gl_account.starts_with("7100"))
2811 .map(|l| l.debit_amount)
2812 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2813
2814 let tax_paid: rust_decimal::Decimal = entries
2816 .iter()
2817 .flat_map(|je| je.lines.iter())
2818 .filter(|l| l.gl_account.starts_with("8000"))
2819 .map(|l| l.debit_amount)
2820 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2821
2822 let capex: rust_decimal::Decimal = entries
2824 .iter()
2825 .flat_map(|je| je.lines.iter())
2826 .filter(|l| l.gl_account.starts_with("1500"))
2827 .map(|l| l.debit_amount)
2828 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2829
2830 let dividends_paid: rust_decimal::Decimal = entries
2832 .iter()
2833 .flat_map(|je| je.lines.iter())
2834 .filter(|l| l.gl_account == "2170")
2835 .map(|l| l.debit_amount)
2836 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2837
2838 let cf_data = CashFlowSourceData {
2839 depreciation_total,
2840 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
2842 delta_ap: rust_decimal::Decimal::ZERO,
2843 delta_inventory: rust_decimal::Decimal::ZERO,
2844 capex,
2845 debt_issuance: rust_decimal::Decimal::ZERO,
2846 debt_repayment: rust_decimal::Decimal::ZERO,
2847 interest_paid,
2848 tax_paid,
2849 dividends_paid,
2850 framework: framework_str.to_string(),
2851 };
2852
2853 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2854 if !enhanced_cf_items.is_empty() {
2855 use datasynth_core::models::StatementType;
2857 let merge_count = enhanced_cf_items.len();
2858 for stmt in financial_reporting
2859 .financial_statements
2860 .iter_mut()
2861 .chain(financial_reporting.consolidated_statements.iter_mut())
2862 .chain(
2863 financial_reporting
2864 .standalone_statements
2865 .values_mut()
2866 .flat_map(|v| v.iter_mut()),
2867 )
2868 {
2869 if stmt.statement_type == StatementType::CashFlowStatement {
2870 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2871 }
2872 }
2873 info!(
2874 "Enhanced cash flow: {} supplementary items merged into CF statements",
2875 merge_count
2876 );
2877 }
2878 }
2879
2880 self.generate_notes_to_financial_statements(
2883 &mut financial_reporting,
2884 &accounting_standards,
2885 &tax,
2886 &hr,
2887 &audit,
2888 &treasury,
2889 );
2890
2891 if self.config.companies.len() >= 2 && !entries.is_empty() {
2895 let companies: Vec<(String, String)> = self
2896 .config
2897 .companies
2898 .iter()
2899 .map(|c| (c.code.clone(), c.name.clone()))
2900 .collect();
2901 let ic_elim: rust_decimal::Decimal =
2902 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2903 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2904 .unwrap_or(NaiveDate::MIN);
2905 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2906 let period_label = format!(
2907 "{}-{:02}",
2908 end_date.year(),
2909 (end_date - chrono::Days::new(1)).month()
2910 );
2911
2912 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2913 let (je_segments, je_recon) =
2914 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2915 if !je_segments.is_empty() {
2916 info!(
2917 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2918 je_segments.len(),
2919 ic_elim,
2920 );
2921 if financial_reporting.segment_reports.is_empty() {
2923 financial_reporting.segment_reports = je_segments;
2924 financial_reporting.segment_reconciliations = vec![je_recon];
2925 } else {
2926 financial_reporting.segment_reports.extend(je_segments);
2927 financial_reporting.segment_reconciliations.push(je_recon);
2928 }
2929 }
2930 }
2931
2932 let esg_snap =
2934 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2935
2936 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2938
2939 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2941
2942 let disruption_events = self.phase_disruption_events(&mut stats)?;
2944
2945 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2947
2948 let (entity_relationship_graph, cross_process_links) =
2950 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2951
2952 let industry_output = self.phase_industry_data(&mut stats);
2954
2955 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2957
2958 if self.config.diffusion.enabled
2960 && (self.config.diffusion.backend == "neural"
2961 || self.config.diffusion.backend == "hybrid")
2962 {
2963 debug!(
2964 "Neural enhancement requested (backend={}). \
2965 Train from generated data or load pre-trained model via config.",
2966 self.config.diffusion.backend
2967 );
2968 }
2977
2978 self.phase_hypergraph_export(
2980 &coa,
2981 &entries,
2982 &document_flows,
2983 &sourcing,
2984 &hr,
2985 &manufacturing_snap,
2986 &banking,
2987 &audit,
2988 &financial_reporting,
2989 &ocpm,
2990 &compliance_regulations,
2991 &mut stats,
2992 )?;
2993
2994 if self.phase_config.generate_graph_export {
2997 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2998 }
2999
3000 if self.config.streaming.enabled {
3002 info!("Note: streaming config is enabled but batch mode does not use it");
3003 }
3004 if self.config.vendor_network.enabled {
3005 debug!("Vendor network config available; relationship graph generation is partial");
3006 }
3007 if self.config.customer_segmentation.enabled {
3008 debug!("Customer segmentation config available; segment-aware generation is partial");
3009 }
3010
3011 let resource_stats = self.resource_guard.stats();
3013 info!(
3014 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3015 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3016 resource_stats.disk.estimated_bytes_written,
3017 resource_stats.degradation_level
3018 );
3019
3020 if let Some(ref sink) = self.phase_sink {
3022 if let Err(e) = sink.flush() {
3023 warn!("Stream sink flush failed: {e}");
3024 }
3025 }
3026
3027 let lineage = self.build_lineage_graph();
3029
3030 let gate_result = if self.config.quality_gates.enabled {
3032 let profile_name = &self.config.quality_gates.profile;
3033 match datasynth_eval::gates::get_profile(profile_name) {
3034 Some(profile) => {
3035 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3037
3038 if balance_validation.validated {
3040 eval.coherence.balance =
3041 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3042 equation_balanced: balance_validation.is_balanced,
3043 max_imbalance: (balance_validation.total_debits
3044 - balance_validation.total_credits)
3045 .abs(),
3046 periods_evaluated: 1,
3047 periods_imbalanced: if balance_validation.is_balanced {
3048 0
3049 } else {
3050 1
3051 },
3052 period_results: Vec::new(),
3053 companies_evaluated: self.config.companies.len(),
3054 });
3055 }
3056
3057 eval.coherence.passes = balance_validation.is_balanced;
3059 if !balance_validation.is_balanced {
3060 eval.coherence
3061 .failures
3062 .push("Balance sheet equation not satisfied".to_string());
3063 }
3064
3065 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3067 eval.statistical.passes = !entries.is_empty();
3068
3069 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3072
3073 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3074 info!(
3075 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3076 profile_name, result.gates_passed, result.gates_total, result.summary
3077 );
3078 Some(result)
3079 }
3080 None => {
3081 warn!(
3082 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3083 profile_name
3084 );
3085 None
3086 }
3087 }
3088 } else {
3089 None
3090 };
3091
3092 let internal_controls = if self.config.internal_controls.enabled {
3094 InternalControl::standard_controls()
3095 } else {
3096 Vec::new()
3097 };
3098
3099 Ok(EnhancedGenerationResult {
3100 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3101 master_data: std::mem::take(&mut self.master_data),
3102 document_flows,
3103 subledger,
3104 ocpm,
3105 audit,
3106 banking,
3107 graph_export,
3108 sourcing,
3109 financial_reporting,
3110 hr,
3111 accounting_standards,
3112 manufacturing: manufacturing_snap,
3113 sales_kpi_budgets,
3114 tax,
3115 esg: esg_snap,
3116 treasury,
3117 project_accounting,
3118 process_evolution,
3119 organizational_events,
3120 disruption_events,
3121 intercompany,
3122 journal_entries: entries,
3123 anomaly_labels,
3124 balance_validation,
3125 data_quality_stats,
3126 quality_issues,
3127 statistics: stats,
3128 lineage: Some(lineage),
3129 gate_result,
3130 internal_controls,
3131 sod_violations,
3132 opening_balances,
3133 subledger_reconciliation,
3134 counterfactual_pairs,
3135 red_flags,
3136 collusion_rings,
3137 temporal_vendor_chains,
3138 entity_relationship_graph,
3139 cross_process_links,
3140 industry_output,
3141 compliance_regulations,
3142 })
3143 }
3144
3145 fn phase_chart_of_accounts(
3151 &mut self,
3152 stats: &mut EnhancedGenerationStatistics,
3153 ) -> SynthResult<Arc<ChartOfAccounts>> {
3154 info!("Phase 1: Generating Chart of Accounts");
3155 let coa = self.generate_coa()?;
3156 stats.accounts_count = coa.account_count();
3157 info!(
3158 "Chart of Accounts generated: {} accounts",
3159 stats.accounts_count
3160 );
3161 self.check_resources_with_log("post-coa")?;
3162 Ok(coa)
3163 }
3164
3165 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3167 if self.phase_config.generate_master_data {
3168 info!("Phase 2: Generating Master Data");
3169 self.generate_master_data()?;
3170 stats.vendor_count = self.master_data.vendors.len();
3171 stats.customer_count = self.master_data.customers.len();
3172 stats.material_count = self.master_data.materials.len();
3173 stats.asset_count = self.master_data.assets.len();
3174 stats.employee_count = self.master_data.employees.len();
3175 info!(
3176 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3177 stats.vendor_count, stats.customer_count, stats.material_count,
3178 stats.asset_count, stats.employee_count
3179 );
3180 self.check_resources_with_log("post-master-data")?;
3181 } else {
3182 debug!("Phase 2: Skipped (master data generation disabled)");
3183 }
3184 Ok(())
3185 }
3186
3187 fn phase_document_flows(
3189 &mut self,
3190 stats: &mut EnhancedGenerationStatistics,
3191 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3192 let mut document_flows = DocumentFlowSnapshot::default();
3193 let mut subledger = SubledgerSnapshot::default();
3194 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3197
3198 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3199 info!("Phase 3: Generating Document Flows");
3200 self.generate_document_flows(&mut document_flows)?;
3201 stats.p2p_chain_count = document_flows.p2p_chains.len();
3202 stats.o2c_chain_count = document_flows.o2c_chains.len();
3203 info!(
3204 "Document flows generated: {} P2P chains, {} O2C chains",
3205 stats.p2p_chain_count, stats.o2c_chain_count
3206 );
3207
3208 debug!("Phase 3b: Linking document flows to subledgers");
3210 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3211 stats.ap_invoice_count = subledger.ap_invoices.len();
3212 stats.ar_invoice_count = subledger.ar_invoices.len();
3213 debug!(
3214 "Subledgers linked: {} AP invoices, {} AR invoices",
3215 stats.ap_invoice_count, stats.ar_invoice_count
3216 );
3217
3218 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3223 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3224 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3225 debug!("Payment settlements applied to AP and AR subledgers");
3226
3227 if let Ok(start_date) =
3230 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3231 {
3232 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3233 - chrono::Days::new(1);
3234 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3235 for company in &self.config.companies {
3242 let ar_report = ARAgingReport::from_invoices(
3243 company.code.clone(),
3244 &subledger.ar_invoices,
3245 as_of_date,
3246 );
3247 subledger.ar_aging_reports.push(ar_report);
3248
3249 let ap_report = APAgingReport::from_invoices(
3250 company.code.clone(),
3251 &subledger.ap_invoices,
3252 as_of_date,
3253 );
3254 subledger.ap_aging_reports.push(ap_report);
3255 }
3256 debug!(
3257 "AR/AP aging reports built: {} AR, {} AP",
3258 subledger.ar_aging_reports.len(),
3259 subledger.ap_aging_reports.len()
3260 );
3261
3262 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3264 {
3265 use datasynth_generators::DunningGenerator;
3266 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3267 for company in &self.config.companies {
3268 let currency = company.currency.as_str();
3269 let mut company_invoices: Vec<
3272 datasynth_core::models::subledger::ar::ARInvoice,
3273 > = subledger
3274 .ar_invoices
3275 .iter()
3276 .filter(|inv| inv.company_code == company.code)
3277 .cloned()
3278 .collect();
3279
3280 if company_invoices.is_empty() {
3281 continue;
3282 }
3283
3284 let result = dunning_gen.execute_dunning_run(
3285 &company.code,
3286 as_of_date,
3287 &mut company_invoices,
3288 currency,
3289 );
3290
3291 for updated in &company_invoices {
3293 if let Some(orig) = subledger
3294 .ar_invoices
3295 .iter_mut()
3296 .find(|i| i.invoice_number == updated.invoice_number)
3297 {
3298 orig.dunning_info = updated.dunning_info.clone();
3299 }
3300 }
3301
3302 subledger.dunning_runs.push(result.dunning_run);
3303 subledger.dunning_letters.extend(result.letters);
3304 dunning_journal_entries.extend(result.journal_entries);
3306 }
3307 debug!(
3308 "Dunning runs complete: {} runs, {} letters",
3309 subledger.dunning_runs.len(),
3310 subledger.dunning_letters.len()
3311 );
3312 }
3313 }
3314
3315 self.check_resources_with_log("post-document-flows")?;
3316 } else {
3317 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3318 }
3319
3320 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3322 if !self.master_data.assets.is_empty() {
3323 debug!("Generating FA subledger records");
3324 let company_code = self
3325 .config
3326 .companies
3327 .first()
3328 .map(|c| c.code.as_str())
3329 .unwrap_or("1000");
3330 let currency = self
3331 .config
3332 .companies
3333 .first()
3334 .map(|c| c.currency.as_str())
3335 .unwrap_or("USD");
3336
3337 let mut fa_gen = datasynth_generators::FAGenerator::new(
3338 datasynth_generators::FAGeneratorConfig::default(),
3339 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3340 );
3341
3342 for asset in &self.master_data.assets {
3343 let (record, je) = fa_gen.generate_asset_acquisition(
3344 company_code,
3345 &format!("{:?}", asset.asset_class),
3346 &asset.description,
3347 asset.acquisition_date,
3348 currency,
3349 asset.cost_center.as_deref(),
3350 );
3351 subledger.fa_records.push(record);
3352 fa_journal_entries.push(je);
3353 }
3354
3355 stats.fa_subledger_count = subledger.fa_records.len();
3356 debug!(
3357 "FA subledger records generated: {} (with {} acquisition JEs)",
3358 stats.fa_subledger_count,
3359 fa_journal_entries.len()
3360 );
3361 }
3362
3363 if !self.master_data.materials.is_empty() {
3365 debug!("Generating Inventory subledger records");
3366 let first_company = self.config.companies.first();
3367 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3368 let inv_currency = first_company
3369 .map(|c| c.currency.clone())
3370 .unwrap_or_else(|| "USD".to_string());
3371
3372 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3373 datasynth_generators::InventoryGeneratorConfig::default(),
3374 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3375 inv_currency.clone(),
3376 );
3377
3378 for (i, material) in self.master_data.materials.iter().enumerate() {
3379 let plant = format!("PLANT{:02}", (i % 3) + 1);
3380 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3381 let initial_qty = rust_decimal::Decimal::from(
3382 material
3383 .safety_stock
3384 .to_string()
3385 .parse::<i64>()
3386 .unwrap_or(100),
3387 );
3388
3389 let position = inv_gen.generate_position(
3390 company_code,
3391 &plant,
3392 &storage_loc,
3393 &material.material_id,
3394 &material.description,
3395 initial_qty,
3396 Some(material.standard_cost),
3397 &inv_currency,
3398 );
3399 subledger.inventory_positions.push(position);
3400 }
3401
3402 stats.inventory_subledger_count = subledger.inventory_positions.len();
3403 debug!(
3404 "Inventory subledger records generated: {}",
3405 stats.inventory_subledger_count
3406 );
3407 }
3408
3409 if !subledger.fa_records.is_empty() {
3411 if let Ok(start_date) =
3412 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3413 {
3414 let company_code = self
3415 .config
3416 .companies
3417 .first()
3418 .map(|c| c.code.as_str())
3419 .unwrap_or("1000");
3420 let fiscal_year = start_date.year();
3421 let start_period = start_date.month();
3422 let end_period =
3423 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3424
3425 let depr_cfg = FaDepreciationScheduleConfig {
3426 fiscal_year,
3427 start_period,
3428 end_period,
3429 seed_offset: 800,
3430 };
3431 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3432 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3433 let run_count = runs.len();
3434 subledger.depreciation_runs = runs;
3435 debug!(
3436 "Depreciation runs generated: {} runs for {} periods",
3437 run_count, self.config.global.period_months
3438 );
3439 }
3440 }
3441
3442 if !subledger.inventory_positions.is_empty() {
3444 if let Ok(start_date) =
3445 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3446 {
3447 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3448 - chrono::Days::new(1);
3449
3450 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3451 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3452
3453 for company in &self.config.companies {
3454 let result = inv_val_gen.generate(
3455 &company.code,
3456 &subledger.inventory_positions,
3457 as_of_date,
3458 );
3459 subledger.inventory_valuations.push(result);
3460 }
3461 debug!(
3462 "Inventory valuations generated: {} company reports",
3463 subledger.inventory_valuations.len()
3464 );
3465 }
3466 }
3467
3468 Ok((document_flows, subledger, fa_journal_entries))
3469 }
3470
3471 #[allow(clippy::too_many_arguments)]
3473 fn phase_ocpm_events(
3474 &mut self,
3475 document_flows: &DocumentFlowSnapshot,
3476 sourcing: &SourcingSnapshot,
3477 hr: &HrSnapshot,
3478 manufacturing: &ManufacturingSnapshot,
3479 banking: &BankingSnapshot,
3480 audit: &AuditSnapshot,
3481 financial_reporting: &FinancialReportingSnapshot,
3482 stats: &mut EnhancedGenerationStatistics,
3483 ) -> SynthResult<OcpmSnapshot> {
3484 let degradation = self.check_resources()?;
3485 if degradation >= DegradationLevel::Reduced {
3486 debug!(
3487 "Phase skipped due to resource pressure (degradation: {:?})",
3488 degradation
3489 );
3490 return Ok(OcpmSnapshot::default());
3491 }
3492 if self.phase_config.generate_ocpm_events {
3493 info!("Phase 3c: Generating OCPM Events");
3494 let ocpm_snapshot = self.generate_ocpm_events(
3495 document_flows,
3496 sourcing,
3497 hr,
3498 manufacturing,
3499 banking,
3500 audit,
3501 financial_reporting,
3502 )?;
3503 stats.ocpm_event_count = ocpm_snapshot.event_count;
3504 stats.ocpm_object_count = ocpm_snapshot.object_count;
3505 stats.ocpm_case_count = ocpm_snapshot.case_count;
3506 info!(
3507 "OCPM events generated: {} events, {} objects, {} cases",
3508 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3509 );
3510 self.check_resources_with_log("post-ocpm")?;
3511 Ok(ocpm_snapshot)
3512 } else {
3513 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3514 Ok(OcpmSnapshot::default())
3515 }
3516 }
3517
3518 fn phase_journal_entries(
3520 &mut self,
3521 coa: &Arc<ChartOfAccounts>,
3522 document_flows: &DocumentFlowSnapshot,
3523 _stats: &mut EnhancedGenerationStatistics,
3524 ) -> SynthResult<Vec<JournalEntry>> {
3525 let mut entries = Vec::new();
3526
3527 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3529 debug!("Phase 4a: Generating JEs from document flows");
3530 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3531 debug!("Generated {} JEs from document flows", flow_entries.len());
3532 entries.extend(flow_entries);
3533 }
3534
3535 if self.phase_config.generate_journal_entries {
3537 info!("Phase 4: Generating Journal Entries");
3538 let je_entries = self.generate_journal_entries(coa)?;
3539 info!("Generated {} standalone journal entries", je_entries.len());
3540 entries.extend(je_entries);
3541 } else {
3542 debug!("Phase 4: Skipped (journal entry generation disabled)");
3543 }
3544
3545 if !entries.is_empty() {
3546 self.check_resources_with_log("post-journal-entries")?;
3549 }
3550
3551 Ok(entries)
3552 }
3553
3554 fn phase_anomaly_injection(
3556 &mut self,
3557 entries: &mut [JournalEntry],
3558 actions: &DegradationActions,
3559 stats: &mut EnhancedGenerationStatistics,
3560 ) -> SynthResult<AnomalyLabels> {
3561 if self.phase_config.inject_anomalies
3562 && !entries.is_empty()
3563 && !actions.skip_anomaly_injection
3564 {
3565 info!("Phase 5: Injecting Anomalies");
3566 let result = self.inject_anomalies(entries)?;
3567 stats.anomalies_injected = result.labels.len();
3568 info!("Injected {} anomalies", stats.anomalies_injected);
3569 self.check_resources_with_log("post-anomaly-injection")?;
3570 Ok(result)
3571 } else if actions.skip_anomaly_injection {
3572 warn!("Phase 5: Skipped due to resource degradation");
3573 Ok(AnomalyLabels::default())
3574 } else {
3575 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3576 Ok(AnomalyLabels::default())
3577 }
3578 }
3579
3580 fn phase_balance_validation(
3582 &mut self,
3583 entries: &[JournalEntry],
3584 ) -> SynthResult<BalanceValidationResult> {
3585 if self.phase_config.validate_balances && !entries.is_empty() {
3586 debug!("Phase 6: Validating Balances");
3587 let balance_validation = self.validate_journal_entries(entries)?;
3588 if balance_validation.is_balanced {
3589 debug!("Balance validation passed");
3590 } else {
3591 warn!(
3592 "Balance validation found {} errors",
3593 balance_validation.validation_errors.len()
3594 );
3595 }
3596 Ok(balance_validation)
3597 } else {
3598 Ok(BalanceValidationResult::default())
3599 }
3600 }
3601
3602 fn phase_data_quality_injection(
3604 &mut self,
3605 entries: &mut [JournalEntry],
3606 actions: &DegradationActions,
3607 stats: &mut EnhancedGenerationStatistics,
3608 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3609 if self.phase_config.inject_data_quality
3610 && !entries.is_empty()
3611 && !actions.skip_data_quality
3612 {
3613 info!("Phase 7: Injecting Data Quality Variations");
3614 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3615 stats.data_quality_issues = dq_stats.records_with_issues;
3616 info!("Injected {} data quality issues", stats.data_quality_issues);
3617 self.check_resources_with_log("post-data-quality")?;
3618 Ok((dq_stats, quality_issues))
3619 } else if actions.skip_data_quality {
3620 warn!("Phase 7: Skipped due to resource degradation");
3621 Ok((DataQualityStats::default(), Vec::new()))
3622 } else {
3623 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3624 Ok((DataQualityStats::default(), Vec::new()))
3625 }
3626 }
3627
3628 fn phase_period_close(
3638 &mut self,
3639 entries: &mut Vec<JournalEntry>,
3640 subledger: &SubledgerSnapshot,
3641 stats: &mut EnhancedGenerationStatistics,
3642 ) -> SynthResult<()> {
3643 if !self.phase_config.generate_period_close || entries.is_empty() {
3644 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3645 return Ok(());
3646 }
3647
3648 info!("Phase 10b: Generating period-close journal entries");
3649
3650 use datasynth_core::accounts::{
3651 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3652 };
3653 use rust_decimal::Decimal;
3654
3655 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3656 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3657 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3658 let close_date = end_date - chrono::Days::new(1);
3660
3661 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3666 .config
3667 .companies
3668 .iter()
3669 .map(|c| c.code.clone())
3670 .collect();
3671
3672 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3674 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3675
3676 let period_months = self.config.global.period_months;
3680 for asset in &subledger.fa_records {
3681 use datasynth_core::models::subledger::fa::AssetStatus;
3683 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3684 continue;
3685 }
3686 let useful_life_months = asset.useful_life_months();
3687 if useful_life_months == 0 {
3688 continue;
3690 }
3691 let salvage_value = asset.salvage_value();
3692 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3693 if depreciable_base == Decimal::ZERO {
3694 continue;
3695 }
3696 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3697 * Decimal::from(period_months))
3698 .round_dp(2);
3699 if period_depr <= Decimal::ZERO {
3700 continue;
3701 }
3702
3703 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3704 depr_header.document_type = "CL".to_string();
3705 depr_header.header_text = Some(format!(
3706 "Depreciation - {} {}",
3707 asset.asset_number, asset.description
3708 ));
3709 depr_header.created_by = "CLOSE_ENGINE".to_string();
3710 depr_header.source = TransactionSource::Automated;
3711 depr_header.business_process = Some(BusinessProcess::R2R);
3712
3713 let doc_id = depr_header.document_id;
3714 let mut depr_je = JournalEntry::new(depr_header);
3715
3716 depr_je.add_line(JournalEntryLine::debit(
3718 doc_id,
3719 1,
3720 expense_accounts::DEPRECIATION.to_string(),
3721 period_depr,
3722 ));
3723 depr_je.add_line(JournalEntryLine::credit(
3725 doc_id,
3726 2,
3727 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3728 period_depr,
3729 ));
3730
3731 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3732 close_jes.push(depr_je);
3733 }
3734
3735 if !subledger.fa_records.is_empty() {
3736 debug!(
3737 "Generated {} depreciation JEs from {} FA records",
3738 close_jes.len(),
3739 subledger.fa_records.len()
3740 );
3741 }
3742
3743 {
3747 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3748 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3749
3750 let accrual_items: &[(&str, &str, &str)] = &[
3752 ("Accrued Utilities", "6200", "2100"),
3753 ("Accrued Rent", "6300", "2100"),
3754 ("Accrued Interest", "6100", "2150"),
3755 ];
3756
3757 for company_code in &company_codes {
3758 let company_revenue: Decimal = entries
3760 .iter()
3761 .filter(|e| e.header.company_code == *company_code)
3762 .flat_map(|e| e.lines.iter())
3763 .filter(|l| l.gl_account.starts_with('4'))
3764 .map(|l| l.credit_amount - l.debit_amount)
3765 .fold(Decimal::ZERO, |acc, v| acc + v);
3766
3767 if company_revenue <= Decimal::ZERO {
3768 continue;
3769 }
3770
3771 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3773 if accrual_base <= Decimal::ZERO {
3774 continue;
3775 }
3776
3777 for (description, expense_acct, liability_acct) in accrual_items {
3778 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3779 company_code,
3780 description,
3781 accrual_base,
3782 expense_acct,
3783 liability_acct,
3784 close_date,
3785 None,
3786 );
3787 close_jes.push(accrual_je);
3788 if let Some(rev_je) = reversal_je {
3789 close_jes.push(rev_je);
3790 }
3791 }
3792 }
3793
3794 debug!(
3795 "Generated accrual entries for {} companies",
3796 company_codes.len()
3797 );
3798 }
3799
3800 for company_code in &company_codes {
3801 let mut total_revenue = Decimal::ZERO;
3806 let mut total_expenses = Decimal::ZERO;
3807
3808 for entry in entries.iter() {
3809 if entry.header.company_code != *company_code {
3810 continue;
3811 }
3812 for line in &entry.lines {
3813 let category = AccountCategory::from_account(&line.gl_account);
3814 match category {
3815 AccountCategory::Revenue => {
3816 total_revenue += line.credit_amount - line.debit_amount;
3818 }
3819 AccountCategory::Cogs
3820 | AccountCategory::OperatingExpense
3821 | AccountCategory::OtherIncomeExpense
3822 | AccountCategory::Tax => {
3823 total_expenses += line.debit_amount - line.credit_amount;
3825 }
3826 _ => {}
3827 }
3828 }
3829 }
3830
3831 let pre_tax_income = total_revenue - total_expenses;
3832
3833 if pre_tax_income == Decimal::ZERO {
3835 debug!(
3836 "Company {}: no pre-tax income, skipping period close",
3837 company_code
3838 );
3839 continue;
3840 }
3841
3842 if pre_tax_income > Decimal::ZERO {
3844 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3846
3847 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3848 tax_header.document_type = "CL".to_string();
3849 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3850 tax_header.created_by = "CLOSE_ENGINE".to_string();
3851 tax_header.source = TransactionSource::Automated;
3852 tax_header.business_process = Some(BusinessProcess::R2R);
3853
3854 let doc_id = tax_header.document_id;
3855 let mut tax_je = JournalEntry::new(tax_header);
3856
3857 tax_je.add_line(JournalEntryLine::debit(
3859 doc_id,
3860 1,
3861 tax_accounts::TAX_EXPENSE.to_string(),
3862 tax_amount,
3863 ));
3864 tax_je.add_line(JournalEntryLine::credit(
3866 doc_id,
3867 2,
3868 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3869 tax_amount,
3870 ));
3871
3872 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3873 close_jes.push(tax_je);
3874 } else {
3875 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3878 if dta_amount > Decimal::ZERO {
3879 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3880 dta_header.document_type = "CL".to_string();
3881 dta_header.header_text =
3882 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3883 dta_header.created_by = "CLOSE_ENGINE".to_string();
3884 dta_header.source = TransactionSource::Automated;
3885 dta_header.business_process = Some(BusinessProcess::R2R);
3886
3887 let doc_id = dta_header.document_id;
3888 let mut dta_je = JournalEntry::new(dta_header);
3889
3890 dta_je.add_line(JournalEntryLine::debit(
3892 doc_id,
3893 1,
3894 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3895 dta_amount,
3896 ));
3897 dta_je.add_line(JournalEntryLine::credit(
3900 doc_id,
3901 2,
3902 tax_accounts::TAX_EXPENSE.to_string(),
3903 dta_amount,
3904 ));
3905
3906 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3907 close_jes.push(dta_je);
3908 debug!(
3909 "Company {}: loss year — recognised DTA of {}",
3910 company_code, dta_amount
3911 );
3912 }
3913 }
3914
3915 let tax_provision = if pre_tax_income > Decimal::ZERO {
3921 (pre_tax_income * tax_rate).round_dp(2)
3922 } else {
3923 Decimal::ZERO
3924 };
3925 let net_income = pre_tax_income - tax_provision;
3926
3927 if net_income > Decimal::ZERO {
3928 use datasynth_generators::DividendGenerator;
3929 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
3931 let currency_str = self
3932 .config
3933 .companies
3934 .iter()
3935 .find(|c| c.code == *company_code)
3936 .map(|c| c.currency.as_str())
3937 .unwrap_or("USD");
3938 let div_result = div_gen.generate(
3939 company_code,
3940 close_date,
3941 Decimal::new(1, 0), dividend_amount,
3943 currency_str,
3944 );
3945 let div_je_count = div_result.journal_entries.len();
3946 close_jes.extend(div_result.journal_entries);
3947 debug!(
3948 "Company {}: declared dividend of {} ({} JEs)",
3949 company_code, dividend_amount, div_je_count
3950 );
3951 }
3952
3953 if net_income != Decimal::ZERO {
3958 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3959 close_header.document_type = "CL".to_string();
3960 close_header.header_text =
3961 Some(format!("Income statement close - {}", company_code));
3962 close_header.created_by = "CLOSE_ENGINE".to_string();
3963 close_header.source = TransactionSource::Automated;
3964 close_header.business_process = Some(BusinessProcess::R2R);
3965
3966 let doc_id = close_header.document_id;
3967 let mut close_je = JournalEntry::new(close_header);
3968
3969 let abs_net_income = net_income.abs();
3970
3971 if net_income > Decimal::ZERO {
3972 close_je.add_line(JournalEntryLine::debit(
3974 doc_id,
3975 1,
3976 equity_accounts::INCOME_SUMMARY.to_string(),
3977 abs_net_income,
3978 ));
3979 close_je.add_line(JournalEntryLine::credit(
3980 doc_id,
3981 2,
3982 equity_accounts::RETAINED_EARNINGS.to_string(),
3983 abs_net_income,
3984 ));
3985 } else {
3986 close_je.add_line(JournalEntryLine::debit(
3988 doc_id,
3989 1,
3990 equity_accounts::RETAINED_EARNINGS.to_string(),
3991 abs_net_income,
3992 ));
3993 close_je.add_line(JournalEntryLine::credit(
3994 doc_id,
3995 2,
3996 equity_accounts::INCOME_SUMMARY.to_string(),
3997 abs_net_income,
3998 ));
3999 }
4000
4001 debug_assert!(
4002 close_je.is_balanced(),
4003 "Income statement closing JE must be balanced"
4004 );
4005 close_jes.push(close_je);
4006 }
4007 }
4008
4009 let close_count = close_jes.len();
4010 if close_count > 0 {
4011 info!("Generated {} period-close journal entries", close_count);
4012 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4013 entries.extend(close_jes);
4014 stats.period_close_je_count = close_count;
4015
4016 stats.total_entries = entries.len() as u64;
4018 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4019 } else {
4020 debug!("No period-close entries generated (no income statement activity)");
4021 }
4022
4023 Ok(())
4024 }
4025
4026 fn phase_audit_data(
4028 &mut self,
4029 entries: &[JournalEntry],
4030 stats: &mut EnhancedGenerationStatistics,
4031 ) -> SynthResult<AuditSnapshot> {
4032 if self.phase_config.generate_audit {
4033 info!("Phase 8: Generating Audit Data");
4034 let audit_snapshot = self.generate_audit_data(entries)?;
4035 stats.audit_engagement_count = audit_snapshot.engagements.len();
4036 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4037 stats.audit_evidence_count = audit_snapshot.evidence.len();
4038 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4039 stats.audit_finding_count = audit_snapshot.findings.len();
4040 stats.audit_judgment_count = audit_snapshot.judgments.len();
4041 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4042 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4043 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4044 stats.audit_sample_count = audit_snapshot.samples.len();
4045 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4046 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4047 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4048 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4049 stats.audit_related_party_transaction_count =
4050 audit_snapshot.related_party_transactions.len();
4051 info!(
4052 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4053 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4054 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4055 {} RP transactions",
4056 stats.audit_engagement_count,
4057 stats.audit_workpaper_count,
4058 stats.audit_evidence_count,
4059 stats.audit_risk_count,
4060 stats.audit_finding_count,
4061 stats.audit_judgment_count,
4062 stats.audit_confirmation_count,
4063 stats.audit_procedure_step_count,
4064 stats.audit_sample_count,
4065 stats.audit_analytical_result_count,
4066 stats.audit_ia_function_count,
4067 stats.audit_ia_report_count,
4068 stats.audit_related_party_count,
4069 stats.audit_related_party_transaction_count,
4070 );
4071 self.check_resources_with_log("post-audit")?;
4072 Ok(audit_snapshot)
4073 } else {
4074 debug!("Phase 8: Skipped (audit generation disabled)");
4075 Ok(AuditSnapshot::default())
4076 }
4077 }
4078
4079 fn phase_banking_data(
4081 &mut self,
4082 stats: &mut EnhancedGenerationStatistics,
4083 ) -> SynthResult<BankingSnapshot> {
4084 if self.phase_config.generate_banking {
4085 info!("Phase 9: Generating Banking KYC/AML Data");
4086 let banking_snapshot = self.generate_banking_data()?;
4087 stats.banking_customer_count = banking_snapshot.customers.len();
4088 stats.banking_account_count = banking_snapshot.accounts.len();
4089 stats.banking_transaction_count = banking_snapshot.transactions.len();
4090 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4091 info!(
4092 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4093 stats.banking_customer_count, stats.banking_account_count,
4094 stats.banking_transaction_count, stats.banking_suspicious_count
4095 );
4096 self.check_resources_with_log("post-banking")?;
4097 Ok(banking_snapshot)
4098 } else {
4099 debug!("Phase 9: Skipped (banking generation disabled)");
4100 Ok(BankingSnapshot::default())
4101 }
4102 }
4103
4104 fn phase_graph_export(
4106 &mut self,
4107 entries: &[JournalEntry],
4108 coa: &Arc<ChartOfAccounts>,
4109 stats: &mut EnhancedGenerationStatistics,
4110 ) -> SynthResult<GraphExportSnapshot> {
4111 if self.phase_config.generate_graph_export && !entries.is_empty() {
4112 info!("Phase 10: Exporting Accounting Network Graphs");
4113 match self.export_graphs(entries, coa, stats) {
4114 Ok(snapshot) => {
4115 info!(
4116 "Graph export complete: {} graphs ({} nodes, {} edges)",
4117 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4118 );
4119 Ok(snapshot)
4120 }
4121 Err(e) => {
4122 warn!("Phase 10: Graph export failed: {}", e);
4123 Ok(GraphExportSnapshot::default())
4124 }
4125 }
4126 } else {
4127 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4128 Ok(GraphExportSnapshot::default())
4129 }
4130 }
4131
4132 #[allow(clippy::too_many_arguments)]
4134 fn phase_hypergraph_export(
4135 &self,
4136 coa: &Arc<ChartOfAccounts>,
4137 entries: &[JournalEntry],
4138 document_flows: &DocumentFlowSnapshot,
4139 sourcing: &SourcingSnapshot,
4140 hr: &HrSnapshot,
4141 manufacturing: &ManufacturingSnapshot,
4142 banking: &BankingSnapshot,
4143 audit: &AuditSnapshot,
4144 financial_reporting: &FinancialReportingSnapshot,
4145 ocpm: &OcpmSnapshot,
4146 compliance: &ComplianceRegulationsSnapshot,
4147 stats: &mut EnhancedGenerationStatistics,
4148 ) -> SynthResult<()> {
4149 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4150 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4151 match self.export_hypergraph(
4152 coa,
4153 entries,
4154 document_flows,
4155 sourcing,
4156 hr,
4157 manufacturing,
4158 banking,
4159 audit,
4160 financial_reporting,
4161 ocpm,
4162 compliance,
4163 stats,
4164 ) {
4165 Ok(info) => {
4166 info!(
4167 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4168 info.node_count, info.edge_count, info.hyperedge_count
4169 );
4170 }
4171 Err(e) => {
4172 warn!("Phase 10b: Hypergraph export failed: {}", e);
4173 }
4174 }
4175 } else {
4176 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4177 }
4178 Ok(())
4179 }
4180
4181 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4187 if !self.config.llm.enabled {
4188 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4189 return;
4190 }
4191
4192 info!("Phase 11: Starting LLM Enrichment");
4193 let start = std::time::Instant::now();
4194
4195 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4196 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4199 let schema_provider = &self.config.llm.provider;
4200 let api_key_env = match schema_provider.as_str() {
4201 "openai" => Some("OPENAI_API_KEY"),
4202 "anthropic" => Some("ANTHROPIC_API_KEY"),
4203 "custom" => Some("LLM_API_KEY"),
4204 _ => None,
4205 };
4206 if let Some(key_env) = api_key_env {
4207 if std::env::var(key_env).is_ok() {
4208 let llm_config = datasynth_core::llm::LlmConfig {
4209 model: self.config.llm.model.clone(),
4210 api_key_env: key_env.to_string(),
4211 ..datasynth_core::llm::LlmConfig::default()
4212 };
4213 match HttpLlmProvider::new(llm_config) {
4214 Ok(p) => Arc::new(p),
4215 Err(e) => {
4216 warn!(
4217 "Failed to create HttpLlmProvider: {}; falling back to mock",
4218 e
4219 );
4220 Arc::new(MockLlmProvider::new(self.seed))
4221 }
4222 }
4223 } else {
4224 Arc::new(MockLlmProvider::new(self.seed))
4225 }
4226 } else {
4227 Arc::new(MockLlmProvider::new(self.seed))
4228 }
4229 };
4230 let enricher = VendorLlmEnricher::new(provider);
4231
4232 let industry = format!("{:?}", self.config.global.industry);
4233 let max_enrichments = self
4234 .config
4235 .llm
4236 .max_vendor_enrichments
4237 .min(self.master_data.vendors.len());
4238
4239 let mut enriched_count = 0usize;
4240 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4241 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4242 Ok(name) => {
4243 vendor.name = name;
4244 enriched_count += 1;
4245 }
4246 Err(e) => {
4247 warn!(
4248 "LLM vendor enrichment failed for {}: {}",
4249 vendor.vendor_id, e
4250 );
4251 }
4252 }
4253 }
4254
4255 enriched_count
4256 }));
4257
4258 match result {
4259 Ok(enriched_count) => {
4260 stats.llm_vendors_enriched = enriched_count;
4261 let elapsed = start.elapsed();
4262 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4263 info!(
4264 "Phase 11 complete: {} vendors enriched in {}ms",
4265 enriched_count, stats.llm_enrichment_ms
4266 );
4267 }
4268 Err(_) => {
4269 let elapsed = start.elapsed();
4270 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4271 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4272 }
4273 }
4274 }
4275
4276 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4282 if !self.config.diffusion.enabled {
4283 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4284 return;
4285 }
4286
4287 info!("Phase 12: Starting Diffusion Enhancement");
4288 let start = std::time::Instant::now();
4289
4290 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4291 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4294
4295 let diffusion_config = DiffusionConfig {
4296 n_steps: self.config.diffusion.n_steps,
4297 seed: self.seed,
4298 ..Default::default()
4299 };
4300
4301 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4302
4303 let n_samples = self.config.diffusion.sample_size;
4304 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4306
4307 samples.len()
4308 }));
4309
4310 match result {
4311 Ok(sample_count) => {
4312 stats.diffusion_samples_generated = sample_count;
4313 let elapsed = start.elapsed();
4314 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4315 info!(
4316 "Phase 12 complete: {} diffusion samples generated in {}ms",
4317 sample_count, stats.diffusion_enhancement_ms
4318 );
4319 }
4320 Err(_) => {
4321 let elapsed = start.elapsed();
4322 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4323 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4324 }
4325 }
4326 }
4327
4328 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4335 if !self.config.causal.enabled {
4336 debug!("Phase 13: Skipped (causal generation disabled)");
4337 return;
4338 }
4339
4340 info!("Phase 13: Starting Causal Overlay");
4341 let start = std::time::Instant::now();
4342
4343 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4344 let graph = match self.config.causal.template.as_str() {
4346 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4347 _ => CausalGraph::fraud_detection_template(),
4348 };
4349
4350 let scm = StructuralCausalModel::new(graph.clone())
4351 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4352
4353 let n_samples = self.config.causal.sample_size;
4354 let samples = scm
4355 .generate(n_samples, self.seed)
4356 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4357
4358 let validation_passed = if self.config.causal.validate {
4360 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4361 if report.valid {
4362 info!(
4363 "Causal validation passed: all {} checks OK",
4364 report.checks.len()
4365 );
4366 } else {
4367 warn!(
4368 "Causal validation: {} violations detected: {:?}",
4369 report.violations.len(),
4370 report.violations
4371 );
4372 }
4373 Some(report.valid)
4374 } else {
4375 None
4376 };
4377
4378 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4379 }));
4380
4381 match result {
4382 Ok(Ok((sample_count, validation_passed))) => {
4383 stats.causal_samples_generated = sample_count;
4384 stats.causal_validation_passed = validation_passed;
4385 let elapsed = start.elapsed();
4386 stats.causal_generation_ms = elapsed.as_millis() as u64;
4387 info!(
4388 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4389 sample_count, stats.causal_generation_ms, validation_passed,
4390 );
4391 }
4392 Ok(Err(e)) => {
4393 let elapsed = start.elapsed();
4394 stats.causal_generation_ms = elapsed.as_millis() as u64;
4395 warn!("Phase 13: Causal generation failed: {}", e);
4396 }
4397 Err(_) => {
4398 let elapsed = start.elapsed();
4399 stats.causal_generation_ms = elapsed.as_millis() as u64;
4400 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4401 }
4402 }
4403 }
4404
4405 fn phase_sourcing_data(
4407 &mut self,
4408 stats: &mut EnhancedGenerationStatistics,
4409 ) -> SynthResult<SourcingSnapshot> {
4410 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4411 debug!("Phase 14: Skipped (sourcing generation disabled)");
4412 return Ok(SourcingSnapshot::default());
4413 }
4414 let degradation = self.check_resources()?;
4415 if degradation >= DegradationLevel::Reduced {
4416 debug!(
4417 "Phase skipped due to resource pressure (degradation: {:?})",
4418 degradation
4419 );
4420 return Ok(SourcingSnapshot::default());
4421 }
4422
4423 info!("Phase 14: Generating S2C Sourcing Data");
4424 let seed = self.seed;
4425
4426 let vendor_ids: Vec<String> = self
4428 .master_data
4429 .vendors
4430 .iter()
4431 .map(|v| v.vendor_id.clone())
4432 .collect();
4433 if vendor_ids.is_empty() {
4434 debug!("Phase 14: Skipped (no vendors available)");
4435 return Ok(SourcingSnapshot::default());
4436 }
4437
4438 let categories: Vec<(String, String)> = vec![
4439 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4440 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4441 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4442 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4443 ("CAT-LOG".to_string(), "Logistics".to_string()),
4444 ];
4445 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4446 .iter()
4447 .map(|(id, name)| {
4448 (
4449 id.clone(),
4450 name.clone(),
4451 rust_decimal::Decimal::from(100_000),
4452 )
4453 })
4454 .collect();
4455
4456 let company_code = self
4457 .config
4458 .companies
4459 .first()
4460 .map(|c| c.code.as_str())
4461 .unwrap_or("1000");
4462 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4463 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4464 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4465 let fiscal_year = start_date.year() as u16;
4466 let owner_ids: Vec<String> = self
4467 .master_data
4468 .employees
4469 .iter()
4470 .take(5)
4471 .map(|e| e.employee_id.clone())
4472 .collect();
4473 let owner_id = owner_ids
4474 .first()
4475 .map(std::string::String::as_str)
4476 .unwrap_or("BUYER-001");
4477
4478 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4480 let spend_analyses =
4481 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4482
4483 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4485 let sourcing_projects = if owner_ids.is_empty() {
4486 Vec::new()
4487 } else {
4488 project_gen.generate(
4489 company_code,
4490 &categories_with_spend,
4491 &owner_ids,
4492 start_date,
4493 self.config.global.period_months,
4494 )
4495 };
4496 stats.sourcing_project_count = sourcing_projects.len();
4497
4498 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4500 let mut qual_gen = QualificationGenerator::new(seed + 2);
4501 let qualifications = qual_gen.generate(
4502 company_code,
4503 &qual_vendor_ids,
4504 sourcing_projects.first().map(|p| p.project_id.as_str()),
4505 owner_id,
4506 start_date,
4507 );
4508
4509 let mut rfx_gen = RfxGenerator::new(seed + 3);
4511 let rfx_events: Vec<RfxEvent> = sourcing_projects
4512 .iter()
4513 .map(|proj| {
4514 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4515 rfx_gen.generate(
4516 company_code,
4517 &proj.project_id,
4518 &proj.category_id,
4519 &qualified_vids,
4520 owner_id,
4521 start_date,
4522 50000.0,
4523 )
4524 })
4525 .collect();
4526 stats.rfx_event_count = rfx_events.len();
4527
4528 let mut bid_gen = BidGenerator::new(seed + 4);
4530 let mut all_bids = Vec::new();
4531 for rfx in &rfx_events {
4532 let bidder_count = vendor_ids.len().clamp(2, 5);
4533 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4534 let bids = bid_gen.generate(rfx, &responding, start_date);
4535 all_bids.extend(bids);
4536 }
4537 stats.bid_count = all_bids.len();
4538
4539 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4541 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4542 .iter()
4543 .map(|rfx| {
4544 let rfx_bids: Vec<SupplierBid> = all_bids
4545 .iter()
4546 .filter(|b| b.rfx_id == rfx.rfx_id)
4547 .cloned()
4548 .collect();
4549 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4550 })
4551 .collect();
4552
4553 let mut contract_gen = ContractGenerator::new(seed + 6);
4555 let contracts: Vec<ProcurementContract> = bid_evaluations
4556 .iter()
4557 .zip(rfx_events.iter())
4558 .filter_map(|(eval, rfx)| {
4559 eval.ranked_bids.first().and_then(|winner| {
4560 all_bids
4561 .iter()
4562 .find(|b| b.bid_id == winner.bid_id)
4563 .map(|winning_bid| {
4564 contract_gen.generate_from_bid(
4565 winning_bid,
4566 Some(&rfx.sourcing_project_id),
4567 &rfx.category_id,
4568 owner_id,
4569 start_date,
4570 )
4571 })
4572 })
4573 })
4574 .collect();
4575 stats.contract_count = contracts.len();
4576
4577 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4579 let catalog_items = catalog_gen.generate(&contracts);
4580 stats.catalog_item_count = catalog_items.len();
4581
4582 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4584 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4585 .iter()
4586 .fold(
4587 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4588 |mut acc, c| {
4589 acc.entry(c.vendor_id.clone()).or_default().push(c);
4590 acc
4591 },
4592 )
4593 .into_iter()
4594 .collect();
4595 let scorecards = scorecard_gen.generate(
4596 company_code,
4597 &vendor_contracts,
4598 start_date,
4599 end_date,
4600 owner_id,
4601 );
4602 stats.scorecard_count = scorecards.len();
4603
4604 let mut sourcing_projects = sourcing_projects;
4607 for project in &mut sourcing_projects {
4608 project.rfx_ids = rfx_events
4610 .iter()
4611 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4612 .map(|rfx| rfx.rfx_id.clone())
4613 .collect();
4614
4615 project.contract_id = contracts
4617 .iter()
4618 .find(|c| {
4619 c.sourcing_project_id
4620 .as_deref()
4621 .is_some_and(|sp| sp == project.project_id)
4622 })
4623 .map(|c| c.contract_id.clone());
4624
4625 project.spend_analysis_id = spend_analyses
4627 .iter()
4628 .find(|sa| sa.category_id == project.category_id)
4629 .map(|sa| sa.category_id.clone());
4630 }
4631
4632 info!(
4633 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4634 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4635 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4636 );
4637 self.check_resources_with_log("post-sourcing")?;
4638
4639 Ok(SourcingSnapshot {
4640 spend_analyses,
4641 sourcing_projects,
4642 qualifications,
4643 rfx_events,
4644 bids: all_bids,
4645 bid_evaluations,
4646 contracts,
4647 catalog_items,
4648 scorecards,
4649 })
4650 }
4651
4652 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4658 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4659
4660 let parent_code = self
4661 .config
4662 .companies
4663 .first()
4664 .map(|c| c.code.clone())
4665 .unwrap_or_else(|| "PARENT".to_string());
4666
4667 let mut group = GroupStructure::new(parent_code);
4668
4669 for company in self.config.companies.iter().skip(1) {
4670 let sub =
4671 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4672 group.add_subsidiary(sub);
4673 }
4674
4675 group
4676 }
4677
4678 fn phase_intercompany(
4680 &mut self,
4681 journal_entries: &[JournalEntry],
4682 stats: &mut EnhancedGenerationStatistics,
4683 ) -> SynthResult<IntercompanySnapshot> {
4684 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4686 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4687 return Ok(IntercompanySnapshot::default());
4688 }
4689
4690 if self.config.companies.len() < 2 {
4692 debug!(
4693 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4694 self.config.companies.len()
4695 );
4696 return Ok(IntercompanySnapshot::default());
4697 }
4698
4699 info!("Phase 14b: Generating Intercompany Transactions");
4700
4701 let group_structure = self.build_group_structure();
4704 debug!(
4705 "Group structure built: parent={}, subsidiaries={}",
4706 group_structure.parent_entity,
4707 group_structure.subsidiaries.len()
4708 );
4709
4710 let seed = self.seed;
4711 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4712 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4713 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4714
4715 let parent_code = self.config.companies[0].code.clone();
4718 let mut ownership_structure =
4719 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4720
4721 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4722 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4723 format!("REL{:03}", i + 1),
4724 parent_code.clone(),
4725 company.code.clone(),
4726 rust_decimal::Decimal::from(100), start_date,
4728 );
4729 ownership_structure.add_relationship(relationship);
4730 }
4731
4732 let tp_method = match self.config.intercompany.transfer_pricing_method {
4734 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4735 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4736 }
4737 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4738 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4739 }
4740 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4741 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4742 }
4743 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4744 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4745 }
4746 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4747 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4748 }
4749 };
4750
4751 let ic_currency = self
4753 .config
4754 .companies
4755 .first()
4756 .map(|c| c.currency.clone())
4757 .unwrap_or_else(|| "USD".to_string());
4758 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4759 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4760 transfer_pricing_method: tp_method,
4761 markup_percent: rust_decimal::Decimal::from_f64_retain(
4762 self.config.intercompany.markup_percent,
4763 )
4764 .unwrap_or(rust_decimal::Decimal::from(5)),
4765 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4766 default_currency: ic_currency,
4767 ..Default::default()
4768 };
4769
4770 let mut ic_generator = datasynth_generators::ICGenerator::new(
4772 ic_gen_config,
4773 ownership_structure.clone(),
4774 seed + 50,
4775 );
4776
4777 let transactions_per_day = 3;
4780 let matched_pairs = ic_generator.generate_transactions_for_period(
4781 start_date,
4782 end_date,
4783 transactions_per_day,
4784 );
4785
4786 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4788 debug!(
4789 "Generated {} IC seller invoices, {} IC buyer POs",
4790 ic_doc_chains.seller_invoices.len(),
4791 ic_doc_chains.buyer_orders.len()
4792 );
4793
4794 let mut seller_entries = Vec::new();
4796 let mut buyer_entries = Vec::new();
4797 let fiscal_year = start_date.year();
4798
4799 for pair in &matched_pairs {
4800 let fiscal_period = pair.posting_date.month();
4801 let (seller_je, buyer_je) =
4802 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4803 seller_entries.push(seller_je);
4804 buyer_entries.push(buyer_je);
4805 }
4806
4807 let matching_config = datasynth_generators::ICMatchingConfig {
4809 base_currency: self
4810 .config
4811 .companies
4812 .first()
4813 .map(|c| c.currency.clone())
4814 .unwrap_or_else(|| "USD".to_string()),
4815 ..Default::default()
4816 };
4817 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4818 matching_engine.load_matched_pairs(&matched_pairs);
4819 let matching_result = matching_engine.run_matching(end_date);
4820
4821 let mut elimination_entries = Vec::new();
4823 if self.config.intercompany.generate_eliminations {
4824 let elim_config = datasynth_generators::EliminationConfig {
4825 consolidation_entity: "GROUP".to_string(),
4826 base_currency: self
4827 .config
4828 .companies
4829 .first()
4830 .map(|c| c.currency.clone())
4831 .unwrap_or_else(|| "USD".to_string()),
4832 ..Default::default()
4833 };
4834
4835 let mut elim_generator =
4836 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4837
4838 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4839 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4840 matching_result
4841 .matched_balances
4842 .iter()
4843 .chain(matching_result.unmatched_balances.iter())
4844 .cloned()
4845 .collect();
4846
4847 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4859 std::collections::HashMap::new();
4860 let mut equity_amounts: std::collections::HashMap<
4861 String,
4862 std::collections::HashMap<String, rust_decimal::Decimal>,
4863 > = std::collections::HashMap::new();
4864 {
4865 use rust_decimal::Decimal;
4866 let hundred = Decimal::from(100u32);
4867 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4871 for sub in &group_structure.subsidiaries {
4872 let net_assets = {
4873 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4874 if na > Decimal::ZERO {
4875 na
4876 } else {
4877 Decimal::from(1_000_000u64)
4878 }
4879 };
4880 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4882 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4883
4884 let mut eq_map = std::collections::HashMap::new();
4887 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4888 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4889 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4890 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4891 }
4892 }
4893
4894 let journal = elim_generator.generate_eliminations(
4895 &fiscal_period,
4896 end_date,
4897 &all_balances,
4898 &matched_pairs,
4899 &investment_amounts,
4900 &equity_amounts,
4901 );
4902
4903 elimination_entries = journal.entries.clone();
4904 }
4905
4906 let matched_pair_count = matched_pairs.len();
4907 let elimination_entry_count = elimination_entries.len();
4908 let match_rate = matching_result.match_rate;
4909
4910 stats.ic_matched_pair_count = matched_pair_count;
4911 stats.ic_elimination_count = elimination_entry_count;
4912 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4913
4914 info!(
4915 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4916 matched_pair_count,
4917 stats.ic_transaction_count,
4918 seller_entries.len(),
4919 buyer_entries.len(),
4920 elimination_entry_count,
4921 match_rate * 100.0
4922 );
4923 self.check_resources_with_log("post-intercompany")?;
4924
4925 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4929 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4930 use rust_decimal::Decimal;
4931
4932 let eight_pct = Decimal::new(8, 2); group_structure
4935 .subsidiaries
4936 .iter()
4937 .filter(|sub| {
4938 sub.nci_percentage > Decimal::ZERO
4939 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4940 })
4941 .map(|sub| {
4942 let net_assets_from_jes =
4946 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4947
4948 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4949 net_assets_from_jes.round_dp(2)
4950 } else {
4951 Decimal::from(1_000_000u64)
4953 };
4954
4955 let net_income = (net_assets * eight_pct).round_dp(2);
4957
4958 NciMeasurement::compute(
4959 sub.entity_code.clone(),
4960 sub.nci_percentage,
4961 net_assets,
4962 net_income,
4963 )
4964 })
4965 .collect()
4966 };
4967
4968 if !nci_measurements.is_empty() {
4969 info!(
4970 "NCI measurements: {} subsidiaries with non-controlling interests",
4971 nci_measurements.len()
4972 );
4973 }
4974
4975 Ok(IntercompanySnapshot {
4976 group_structure: Some(group_structure),
4977 matched_pairs,
4978 seller_journal_entries: seller_entries,
4979 buyer_journal_entries: buyer_entries,
4980 elimination_entries,
4981 nci_measurements,
4982 ic_document_chains: Some(ic_doc_chains),
4983 matched_pair_count,
4984 elimination_entry_count,
4985 match_rate,
4986 })
4987 }
4988
4989 fn phase_financial_reporting(
4991 &mut self,
4992 document_flows: &DocumentFlowSnapshot,
4993 journal_entries: &[JournalEntry],
4994 coa: &Arc<ChartOfAccounts>,
4995 _hr: &HrSnapshot,
4996 _audit: &AuditSnapshot,
4997 stats: &mut EnhancedGenerationStatistics,
4998 ) -> SynthResult<FinancialReportingSnapshot> {
4999 let fs_enabled = self.phase_config.generate_financial_statements
5000 || self.config.financial_reporting.enabled;
5001 let br_enabled = self.phase_config.generate_bank_reconciliation;
5002
5003 if !fs_enabled && !br_enabled {
5004 debug!("Phase 15: Skipped (financial reporting disabled)");
5005 return Ok(FinancialReportingSnapshot::default());
5006 }
5007
5008 info!("Phase 15: Generating Financial Reporting Data");
5009
5010 let seed = self.seed;
5011 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5012 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5013
5014 let mut financial_statements = Vec::new();
5015 let mut bank_reconciliations = Vec::new();
5016 let mut trial_balances = Vec::new();
5017 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5018 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5019 Vec::new();
5020 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5022 std::collections::HashMap::new();
5023 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5025 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5027
5028 if fs_enabled {
5036 let has_journal_entries = !journal_entries.is_empty();
5037
5038 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5041 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5043
5044 let elimination_entries: Vec<&JournalEntry> = journal_entries
5046 .iter()
5047 .filter(|je| je.header.is_elimination)
5048 .collect();
5049
5050 for period in 0..self.config.global.period_months {
5052 let period_start = start_date + chrono::Months::new(period);
5053 let period_end =
5054 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5055 let fiscal_year = period_end.year() as u16;
5056 let fiscal_period = period_end.month() as u8;
5057 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5058
5059 let mut entity_tb_map: std::collections::HashMap<
5062 String,
5063 std::collections::HashMap<String, rust_decimal::Decimal>,
5064 > = std::collections::HashMap::new();
5065
5066 for (company_idx, company) in self.config.companies.iter().enumerate() {
5068 let company_code = company.code.as_str();
5069 let currency = company.currency.as_str();
5070 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5073 let mut company_fs_gen =
5074 FinancialStatementGenerator::new(seed + company_seed_offset);
5075
5076 if has_journal_entries {
5077 let tb_entries = Self::build_cumulative_trial_balance(
5078 journal_entries,
5079 coa,
5080 company_code,
5081 start_date,
5082 period_end,
5083 fiscal_year,
5084 fiscal_period,
5085 );
5086
5087 let entity_cat_map =
5089 entity_tb_map.entry(company_code.to_string()).or_default();
5090 for tb_entry in &tb_entries {
5091 let net = tb_entry.debit_balance - tb_entry.credit_balance;
5092 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5093 }
5094
5095 let stmts = company_fs_gen.generate(
5096 company_code,
5097 currency,
5098 &tb_entries,
5099 period_start,
5100 period_end,
5101 fiscal_year,
5102 fiscal_period,
5103 None,
5104 "SYS-AUTOCLOSE",
5105 );
5106
5107 let mut entity_stmts = Vec::new();
5108 for stmt in stmts {
5109 if stmt.statement_type == StatementType::CashFlowStatement {
5110 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5111 let cf_items = Self::build_cash_flow_from_trial_balances(
5112 &tb_entries,
5113 None,
5114 net_income,
5115 );
5116 entity_stmts.push(FinancialStatement {
5117 cash_flow_items: cf_items,
5118 ..stmt
5119 });
5120 } else {
5121 entity_stmts.push(stmt);
5122 }
5123 }
5124
5125 financial_statements.extend(entity_stmts.clone());
5127
5128 standalone_statements
5130 .entry(company_code.to_string())
5131 .or_default()
5132 .extend(entity_stmts);
5133
5134 if company_idx == 0 {
5137 trial_balances.push(PeriodTrialBalance {
5138 fiscal_year,
5139 fiscal_period,
5140 period_start,
5141 period_end,
5142 entries: tb_entries,
5143 });
5144 }
5145 } else {
5146 let tb_entries = Self::build_trial_balance_from_entries(
5148 journal_entries,
5149 coa,
5150 company_code,
5151 fiscal_year,
5152 fiscal_period,
5153 );
5154
5155 let stmts = company_fs_gen.generate(
5156 company_code,
5157 currency,
5158 &tb_entries,
5159 period_start,
5160 period_end,
5161 fiscal_year,
5162 fiscal_period,
5163 None,
5164 "SYS-AUTOCLOSE",
5165 );
5166 financial_statements.extend(stmts.clone());
5167 standalone_statements
5168 .entry(company_code.to_string())
5169 .or_default()
5170 .extend(stmts);
5171
5172 if company_idx == 0 && !tb_entries.is_empty() {
5173 trial_balances.push(PeriodTrialBalance {
5174 fiscal_year,
5175 fiscal_period,
5176 period_start,
5177 period_end,
5178 entries: tb_entries,
5179 });
5180 }
5181 }
5182 }
5183
5184 let group_currency = self
5187 .config
5188 .companies
5189 .first()
5190 .map(|c| c.currency.as_str())
5191 .unwrap_or("USD");
5192
5193 let period_eliminations: Vec<JournalEntry> = elimination_entries
5195 .iter()
5196 .filter(|je| {
5197 je.header.fiscal_year == fiscal_year
5198 && je.header.fiscal_period == fiscal_period
5199 })
5200 .map(|je| (*je).clone())
5201 .collect();
5202
5203 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5204 &entity_tb_map,
5205 &period_eliminations,
5206 &period_label,
5207 );
5208
5209 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5212 .line_items
5213 .iter()
5214 .map(|li| {
5215 let net = li.post_elimination_total;
5216 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5217 (net, rust_decimal::Decimal::ZERO)
5218 } else {
5219 (rust_decimal::Decimal::ZERO, -net)
5220 };
5221 datasynth_generators::TrialBalanceEntry {
5222 account_code: li.account_category.clone(),
5223 account_name: li.account_category.clone(),
5224 category: li.account_category.clone(),
5225 debit_balance: debit,
5226 credit_balance: credit,
5227 }
5228 })
5229 .collect();
5230
5231 let mut cons_stmts = cons_gen.generate(
5232 "GROUP",
5233 group_currency,
5234 &cons_tb,
5235 period_start,
5236 period_end,
5237 fiscal_year,
5238 fiscal_period,
5239 None,
5240 "SYS-AUTOCLOSE",
5241 );
5242
5243 let bs_categories: &[&str] = &[
5247 "CASH",
5248 "RECEIVABLES",
5249 "INVENTORY",
5250 "FIXEDASSETS",
5251 "PAYABLES",
5252 "ACCRUEDLIABILITIES",
5253 "LONGTERMDEBT",
5254 "EQUITY",
5255 ];
5256 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5257 cons_line_items.into_iter().partition(|li| {
5258 let upper = li.label.to_uppercase();
5259 bs_categories.iter().any(|c| upper == *c)
5260 });
5261
5262 for stmt in &mut cons_stmts {
5263 stmt.is_consolidated = true;
5264 match stmt.statement_type {
5265 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5266 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5267 _ => {} }
5269 }
5270
5271 consolidated_statements.extend(cons_stmts);
5272 consolidation_schedules.push(schedule);
5273 }
5274
5275 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5281 info!(
5282 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5283 stats.financial_statement_count,
5284 consolidated_statements.len(),
5285 has_journal_entries
5286 );
5287
5288 let entity_seeds: Vec<SegmentSeed> = self
5293 .config
5294 .companies
5295 .iter()
5296 .map(|c| SegmentSeed {
5297 code: c.code.clone(),
5298 name: c.name.clone(),
5299 currency: c.currency.clone(),
5300 })
5301 .collect();
5302
5303 let mut seg_gen = SegmentGenerator::new(seed + 30);
5304
5305 for period in 0..self.config.global.period_months {
5310 let period_end =
5311 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5312 let fiscal_year = period_end.year() as u16;
5313 let fiscal_period = period_end.month() as u8;
5314 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5315
5316 use datasynth_core::models::StatementType;
5317
5318 let cons_is = consolidated_statements.iter().find(|s| {
5320 s.fiscal_year == fiscal_year
5321 && s.fiscal_period == fiscal_period
5322 && s.statement_type == StatementType::IncomeStatement
5323 });
5324 let cons_bs = consolidated_statements.iter().find(|s| {
5325 s.fiscal_year == fiscal_year
5326 && s.fiscal_period == fiscal_period
5327 && s.statement_type == StatementType::BalanceSheet
5328 });
5329
5330 let is_stmt = cons_is.or_else(|| {
5332 financial_statements.iter().find(|s| {
5333 s.fiscal_year == fiscal_year
5334 && s.fiscal_period == fiscal_period
5335 && s.statement_type == StatementType::IncomeStatement
5336 })
5337 });
5338 let bs_stmt = cons_bs.or_else(|| {
5339 financial_statements.iter().find(|s| {
5340 s.fiscal_year == fiscal_year
5341 && s.fiscal_period == fiscal_period
5342 && s.statement_type == StatementType::BalanceSheet
5343 })
5344 });
5345
5346 let consolidated_revenue = is_stmt
5347 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5348 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5350
5351 let consolidated_profit = is_stmt
5352 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5353 .map(|li| li.amount)
5354 .unwrap_or(rust_decimal::Decimal::ZERO);
5355
5356 let consolidated_assets = bs_stmt
5357 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5358 .map(|li| li.amount)
5359 .unwrap_or(rust_decimal::Decimal::ZERO);
5360
5361 if consolidated_revenue == rust_decimal::Decimal::ZERO
5363 && consolidated_assets == rust_decimal::Decimal::ZERO
5364 {
5365 continue;
5366 }
5367
5368 let group_code = self
5369 .config
5370 .companies
5371 .first()
5372 .map(|c| c.code.as_str())
5373 .unwrap_or("GROUP");
5374
5375 let total_depr: rust_decimal::Decimal = journal_entries
5378 .iter()
5379 .filter(|je| je.header.document_type == "CL")
5380 .flat_map(|je| je.lines.iter())
5381 .filter(|l| l.gl_account.starts_with("6000"))
5382 .map(|l| l.debit_amount)
5383 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5384 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5385 Some(total_depr)
5386 } else {
5387 None
5388 };
5389
5390 let (segs, recon) = seg_gen.generate(
5391 group_code,
5392 &period_label,
5393 consolidated_revenue,
5394 consolidated_profit,
5395 consolidated_assets,
5396 &entity_seeds,
5397 depr_param,
5398 );
5399 segment_reports.extend(segs);
5400 segment_reconciliations.push(recon);
5401 }
5402
5403 info!(
5404 "Segment reports generated: {} segments, {} reconciliations",
5405 segment_reports.len(),
5406 segment_reconciliations.len()
5407 );
5408 }
5409
5410 if br_enabled && !document_flows.payments.is_empty() {
5412 let employee_ids: Vec<String> = self
5413 .master_data
5414 .employees
5415 .iter()
5416 .map(|e| e.employee_id.clone())
5417 .collect();
5418 let mut br_gen =
5419 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5420
5421 for company in &self.config.companies {
5423 let company_payments: Vec<PaymentReference> = document_flows
5424 .payments
5425 .iter()
5426 .filter(|p| p.header.company_code == company.code)
5427 .map(|p| PaymentReference {
5428 id: p.header.document_id.clone(),
5429 amount: if p.is_vendor { p.amount } else { -p.amount },
5430 date: p.header.document_date,
5431 reference: p
5432 .check_number
5433 .clone()
5434 .or_else(|| p.wire_reference.clone())
5435 .unwrap_or_else(|| p.header.document_id.clone()),
5436 })
5437 .collect();
5438
5439 if company_payments.is_empty() {
5440 continue;
5441 }
5442
5443 let bank_account_id = format!("{}-MAIN", company.code);
5444
5445 for period in 0..self.config.global.period_months {
5447 let period_start = start_date + chrono::Months::new(period);
5448 let period_end =
5449 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5450
5451 let period_payments: Vec<PaymentReference> = company_payments
5452 .iter()
5453 .filter(|p| p.date >= period_start && p.date <= period_end)
5454 .cloned()
5455 .collect();
5456
5457 let recon = br_gen.generate(
5458 &company.code,
5459 &bank_account_id,
5460 period_start,
5461 period_end,
5462 &company.currency,
5463 &period_payments,
5464 );
5465 bank_reconciliations.push(recon);
5466 }
5467 }
5468 info!(
5469 "Bank reconciliations generated: {} reconciliations",
5470 bank_reconciliations.len()
5471 );
5472 }
5473
5474 stats.bank_reconciliation_count = bank_reconciliations.len();
5475 self.check_resources_with_log("post-financial-reporting")?;
5476
5477 if !trial_balances.is_empty() {
5478 info!(
5479 "Period-close trial balances captured: {} periods",
5480 trial_balances.len()
5481 );
5482 }
5483
5484 let notes_to_financial_statements = Vec::new();
5488
5489 Ok(FinancialReportingSnapshot {
5490 financial_statements,
5491 standalone_statements,
5492 consolidated_statements,
5493 consolidation_schedules,
5494 bank_reconciliations,
5495 trial_balances,
5496 segment_reports,
5497 segment_reconciliations,
5498 notes_to_financial_statements,
5499 })
5500 }
5501
5502 fn generate_notes_to_financial_statements(
5509 &self,
5510 financial_reporting: &mut FinancialReportingSnapshot,
5511 accounting_standards: &AccountingStandardsSnapshot,
5512 tax: &TaxSnapshot,
5513 hr: &HrSnapshot,
5514 audit: &AuditSnapshot,
5515 treasury: &TreasurySnapshot,
5516 ) {
5517 use datasynth_config::schema::AccountingFrameworkConfig;
5518 use datasynth_core::models::StatementType;
5519 use datasynth_generators::period_close::notes_generator::{
5520 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5521 };
5522
5523 let seed = self.seed;
5524 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5525 {
5526 Ok(d) => d,
5527 Err(_) => return,
5528 };
5529
5530 let mut notes_gen = NotesGenerator::new(seed + 4235);
5531
5532 for company in &self.config.companies {
5533 let last_period_end = start_date
5534 + chrono::Months::new(self.config.global.period_months)
5535 - chrono::Days::new(1);
5536 let fiscal_year = last_period_end.year() as u16;
5537
5538 let entity_is = financial_reporting
5540 .standalone_statements
5541 .get(&company.code)
5542 .and_then(|stmts| {
5543 stmts.iter().find(|s| {
5544 s.fiscal_year == fiscal_year
5545 && s.statement_type == StatementType::IncomeStatement
5546 })
5547 });
5548 let entity_bs = financial_reporting
5549 .standalone_statements
5550 .get(&company.code)
5551 .and_then(|stmts| {
5552 stmts.iter().find(|s| {
5553 s.fiscal_year == fiscal_year
5554 && s.statement_type == StatementType::BalanceSheet
5555 })
5556 });
5557
5558 let revenue_amount = entity_is
5560 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5561 .map(|li| li.amount);
5562 let ppe_gross = entity_bs
5563 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5564 .map(|li| li.amount);
5565
5566 let framework = match self
5567 .config
5568 .accounting_standards
5569 .framework
5570 .unwrap_or_default()
5571 {
5572 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5573 "IFRS".to_string()
5574 }
5575 _ => "US GAAP".to_string(),
5576 };
5577
5578 let (entity_dta, entity_dtl) = {
5581 let mut dta = rust_decimal::Decimal::ZERO;
5582 let mut dtl = rust_decimal::Decimal::ZERO;
5583 for rf in &tax.deferred_tax.rollforwards {
5584 if rf.entity_code == company.code {
5585 dta += rf.closing_dta;
5586 dtl += rf.closing_dtl;
5587 }
5588 }
5589 (
5590 if dta > rust_decimal::Decimal::ZERO {
5591 Some(dta)
5592 } else {
5593 None
5594 },
5595 if dtl > rust_decimal::Decimal::ZERO {
5596 Some(dtl)
5597 } else {
5598 None
5599 },
5600 )
5601 };
5602
5603 let entity_provisions: Vec<_> = accounting_standards
5606 .provisions
5607 .iter()
5608 .filter(|p| p.entity_code == company.code)
5609 .collect();
5610 let provision_count = entity_provisions.len();
5611 let total_provisions = if provision_count > 0 {
5612 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5613 } else {
5614 None
5615 };
5616
5617 let entity_pension_plan_count = hr
5619 .pension_plans
5620 .iter()
5621 .filter(|p| p.entity_code == company.code)
5622 .count();
5623 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5624 let sum: rust_decimal::Decimal = hr
5625 .pension_disclosures
5626 .iter()
5627 .filter(|d| {
5628 hr.pension_plans
5629 .iter()
5630 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5631 })
5632 .map(|d| d.net_pension_liability)
5633 .sum();
5634 let plan_assets_sum: rust_decimal::Decimal = hr
5635 .pension_plan_assets
5636 .iter()
5637 .filter(|a| {
5638 hr.pension_plans
5639 .iter()
5640 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5641 })
5642 .map(|a| a.fair_value_closing)
5643 .sum();
5644 if entity_pension_plan_count > 0 {
5645 Some(sum + plan_assets_sum)
5646 } else {
5647 None
5648 }
5649 };
5650 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5651 let sum: rust_decimal::Decimal = hr
5652 .pension_plan_assets
5653 .iter()
5654 .filter(|a| {
5655 hr.pension_plans
5656 .iter()
5657 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5658 })
5659 .map(|a| a.fair_value_closing)
5660 .sum();
5661 if entity_pension_plan_count > 0 {
5662 Some(sum)
5663 } else {
5664 None
5665 }
5666 };
5667
5668 let rp_count = audit.related_party_transactions.len();
5671 let se_count = audit.subsequent_events.len();
5672 let adjusting_count = audit
5673 .subsequent_events
5674 .iter()
5675 .filter(|e| {
5676 matches!(
5677 e.classification,
5678 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5679 )
5680 })
5681 .count();
5682
5683 let ctx = NotesGeneratorContext {
5684 entity_code: company.code.clone(),
5685 framework,
5686 period: format!("FY{}", fiscal_year),
5687 period_end: last_period_end,
5688 currency: company.currency.clone(),
5689 revenue_amount,
5690 total_ppe_gross: ppe_gross,
5691 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5692 deferred_tax_asset: entity_dta,
5694 deferred_tax_liability: entity_dtl,
5695 provision_count,
5697 total_provisions,
5698 pension_plan_count: entity_pension_plan_count,
5700 total_dbo: entity_total_dbo,
5701 total_plan_assets: entity_total_plan_assets,
5702 related_party_transaction_count: rp_count,
5704 subsequent_event_count: se_count,
5705 adjusting_event_count: adjusting_count,
5706 ..NotesGeneratorContext::default()
5707 };
5708
5709 let entity_notes = notes_gen.generate(&ctx);
5710 let standard_note_count = entity_notes.len() as u32;
5711 info!(
5712 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5713 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5714 );
5715 financial_reporting
5716 .notes_to_financial_statements
5717 .extend(entity_notes);
5718
5719 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5721 .debt_instruments
5722 .iter()
5723 .filter(|d| d.entity_id == company.code)
5724 .map(|d| {
5725 (
5726 format!("{:?}", d.instrument_type),
5727 d.principal,
5728 d.maturity_date.to_string(),
5729 )
5730 })
5731 .collect();
5732
5733 let hedge_count = treasury.hedge_relationships.len();
5734 let effective_hedges = treasury
5735 .hedge_relationships
5736 .iter()
5737 .filter(|h| h.is_effective)
5738 .count();
5739 let total_notional: rust_decimal::Decimal = treasury
5740 .hedging_instruments
5741 .iter()
5742 .map(|h| h.notional_amount)
5743 .sum();
5744 let total_fair_value: rust_decimal::Decimal = treasury
5745 .hedging_instruments
5746 .iter()
5747 .map(|h| h.fair_value)
5748 .sum();
5749
5750 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5752 .provisions
5753 .iter()
5754 .filter(|p| p.entity_code == company.code)
5755 .map(|p| p.id.as_str())
5756 .collect();
5757 let provision_movements: Vec<(
5758 String,
5759 rust_decimal::Decimal,
5760 rust_decimal::Decimal,
5761 rust_decimal::Decimal,
5762 )> = accounting_standards
5763 .provision_movements
5764 .iter()
5765 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5766 .map(|m| {
5767 let prov_type = accounting_standards
5768 .provisions
5769 .iter()
5770 .find(|p| p.id == m.provision_id)
5771 .map(|p| format!("{:?}", p.provision_type))
5772 .unwrap_or_else(|| "Unknown".to_string());
5773 (prov_type, m.opening, m.additions, m.closing)
5774 })
5775 .collect();
5776
5777 let enhanced_ctx = EnhancedNotesContext {
5778 entity_code: company.code.clone(),
5779 period: format!("FY{}", fiscal_year),
5780 currency: company.currency.clone(),
5781 finished_goods_value: rust_decimal::Decimal::ZERO,
5783 wip_value: rust_decimal::Decimal::ZERO,
5784 raw_materials_value: rust_decimal::Decimal::ZERO,
5785 debt_instruments,
5786 hedge_count,
5787 effective_hedges,
5788 total_notional,
5789 total_fair_value,
5790 provision_movements,
5791 };
5792
5793 let enhanced_notes =
5794 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5795 if !enhanced_notes.is_empty() {
5796 info!(
5797 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5798 company.code,
5799 enhanced_notes.len(),
5800 enhanced_ctx.debt_instruments.len(),
5801 hedge_count,
5802 enhanced_ctx.provision_movements.len(),
5803 );
5804 financial_reporting
5805 .notes_to_financial_statements
5806 .extend(enhanced_notes);
5807 }
5808 }
5809 }
5810
5811 fn build_trial_balance_from_entries(
5817 journal_entries: &[JournalEntry],
5818 coa: &ChartOfAccounts,
5819 company_code: &str,
5820 fiscal_year: u16,
5821 fiscal_period: u8,
5822 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5823 use rust_decimal::Decimal;
5824
5825 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5827 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5828
5829 for je in journal_entries {
5830 if je.header.company_code != company_code
5832 || je.header.fiscal_year != fiscal_year
5833 || je.header.fiscal_period != fiscal_period
5834 {
5835 continue;
5836 }
5837
5838 for line in &je.lines {
5839 let acct = &line.gl_account;
5840 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5841 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5842 }
5843 }
5844
5845 let mut all_accounts: Vec<&String> = account_debits
5847 .keys()
5848 .chain(account_credits.keys())
5849 .collect::<std::collections::HashSet<_>>()
5850 .into_iter()
5851 .collect();
5852 all_accounts.sort();
5853
5854 let mut entries = Vec::new();
5855
5856 for acct_number in all_accounts {
5857 let debit = account_debits
5858 .get(acct_number)
5859 .copied()
5860 .unwrap_or(Decimal::ZERO);
5861 let credit = account_credits
5862 .get(acct_number)
5863 .copied()
5864 .unwrap_or(Decimal::ZERO);
5865
5866 if debit.is_zero() && credit.is_zero() {
5867 continue;
5868 }
5869
5870 let account_name = coa
5872 .get_account(acct_number)
5873 .map(|gl| gl.short_description.clone())
5874 .unwrap_or_else(|| format!("Account {acct_number}"));
5875
5876 let category = Self::category_from_account_code(acct_number);
5881
5882 entries.push(datasynth_generators::TrialBalanceEntry {
5883 account_code: acct_number.clone(),
5884 account_name,
5885 category,
5886 debit_balance: debit,
5887 credit_balance: credit,
5888 });
5889 }
5890
5891 entries
5892 }
5893
5894 fn build_cumulative_trial_balance(
5901 journal_entries: &[JournalEntry],
5902 coa: &ChartOfAccounts,
5903 company_code: &str,
5904 start_date: NaiveDate,
5905 period_end: NaiveDate,
5906 fiscal_year: u16,
5907 fiscal_period: u8,
5908 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5909 use rust_decimal::Decimal;
5910
5911 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5913 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5914
5915 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5917 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5918
5919 for je in journal_entries {
5920 if je.header.company_code != company_code {
5921 continue;
5922 }
5923
5924 for line in &je.lines {
5925 let acct = &line.gl_account;
5926 let category = Self::category_from_account_code(acct);
5927 let is_bs_account = matches!(
5928 category.as_str(),
5929 "Cash"
5930 | "Receivables"
5931 | "Inventory"
5932 | "FixedAssets"
5933 | "Payables"
5934 | "AccruedLiabilities"
5935 | "LongTermDebt"
5936 | "Equity"
5937 );
5938
5939 if is_bs_account {
5940 if je.header.document_date <= period_end
5942 && je.header.document_date >= start_date
5943 {
5944 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5945 line.debit_amount;
5946 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5947 line.credit_amount;
5948 }
5949 } else {
5950 if je.header.fiscal_year == fiscal_year
5952 && je.header.fiscal_period == fiscal_period
5953 {
5954 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5955 line.debit_amount;
5956 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5957 line.credit_amount;
5958 }
5959 }
5960 }
5961 }
5962
5963 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5965 all_accounts.extend(bs_debits.keys().cloned());
5966 all_accounts.extend(bs_credits.keys().cloned());
5967 all_accounts.extend(is_debits.keys().cloned());
5968 all_accounts.extend(is_credits.keys().cloned());
5969
5970 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5971 sorted_accounts.sort();
5972
5973 let mut entries = Vec::new();
5974
5975 for acct_number in &sorted_accounts {
5976 let category = Self::category_from_account_code(acct_number);
5977 let is_bs_account = matches!(
5978 category.as_str(),
5979 "Cash"
5980 | "Receivables"
5981 | "Inventory"
5982 | "FixedAssets"
5983 | "Payables"
5984 | "AccruedLiabilities"
5985 | "LongTermDebt"
5986 | "Equity"
5987 );
5988
5989 let (debit, credit) = if is_bs_account {
5990 (
5991 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5992 bs_credits
5993 .get(acct_number)
5994 .copied()
5995 .unwrap_or(Decimal::ZERO),
5996 )
5997 } else {
5998 (
5999 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6000 is_credits
6001 .get(acct_number)
6002 .copied()
6003 .unwrap_or(Decimal::ZERO),
6004 )
6005 };
6006
6007 if debit.is_zero() && credit.is_zero() {
6008 continue;
6009 }
6010
6011 let account_name = coa
6012 .get_account(acct_number)
6013 .map(|gl| gl.short_description.clone())
6014 .unwrap_or_else(|| format!("Account {acct_number}"));
6015
6016 entries.push(datasynth_generators::TrialBalanceEntry {
6017 account_code: acct_number.clone(),
6018 account_name,
6019 category,
6020 debit_balance: debit,
6021 credit_balance: credit,
6022 });
6023 }
6024
6025 entries
6026 }
6027
6028 fn build_cash_flow_from_trial_balances(
6033 current_tb: &[datasynth_generators::TrialBalanceEntry],
6034 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6035 net_income: rust_decimal::Decimal,
6036 ) -> Vec<CashFlowItem> {
6037 use rust_decimal::Decimal;
6038
6039 let aggregate =
6041 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6042 let mut map: HashMap<String, Decimal> = HashMap::new();
6043 for entry in tb {
6044 let net = entry.debit_balance - entry.credit_balance;
6045 *map.entry(entry.category.clone()).or_default() += net;
6046 }
6047 map
6048 };
6049
6050 let current = aggregate(current_tb);
6051 let prior = prior_tb.map(aggregate);
6052
6053 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6055 *map.get(key).unwrap_or(&Decimal::ZERO)
6056 };
6057
6058 let change = |key: &str| -> Decimal {
6060 let curr = get(¤t, key);
6061 match &prior {
6062 Some(p) => curr - get(p, key),
6063 None => curr,
6064 }
6065 };
6066
6067 let fixed_asset_change = change("FixedAssets");
6070 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6071 -fixed_asset_change
6072 } else {
6073 Decimal::ZERO
6074 };
6075
6076 let ar_change = change("Receivables");
6078 let inventory_change = change("Inventory");
6079 let ap_change = change("Payables");
6081 let accrued_change = change("AccruedLiabilities");
6082
6083 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6084 + (-ap_change)
6085 + (-accrued_change);
6086
6087 let capex = if fixed_asset_change > Decimal::ZERO {
6089 -fixed_asset_change
6090 } else {
6091 Decimal::ZERO
6092 };
6093 let investing_cf = capex;
6094
6095 let debt_change = -change("LongTermDebt");
6097 let equity_change = -change("Equity");
6098 let financing_cf = debt_change + equity_change;
6099
6100 let net_change = operating_cf + investing_cf + financing_cf;
6101
6102 vec![
6103 CashFlowItem {
6104 item_code: "CF-NI".to_string(),
6105 label: "Net Income".to_string(),
6106 category: CashFlowCategory::Operating,
6107 amount: net_income,
6108 amount_prior: None,
6109 sort_order: 1,
6110 is_total: false,
6111 },
6112 CashFlowItem {
6113 item_code: "CF-DEP".to_string(),
6114 label: "Depreciation & Amortization".to_string(),
6115 category: CashFlowCategory::Operating,
6116 amount: depreciation_addback,
6117 amount_prior: None,
6118 sort_order: 2,
6119 is_total: false,
6120 },
6121 CashFlowItem {
6122 item_code: "CF-AR".to_string(),
6123 label: "Change in Accounts Receivable".to_string(),
6124 category: CashFlowCategory::Operating,
6125 amount: -ar_change,
6126 amount_prior: None,
6127 sort_order: 3,
6128 is_total: false,
6129 },
6130 CashFlowItem {
6131 item_code: "CF-AP".to_string(),
6132 label: "Change in Accounts Payable".to_string(),
6133 category: CashFlowCategory::Operating,
6134 amount: -ap_change,
6135 amount_prior: None,
6136 sort_order: 4,
6137 is_total: false,
6138 },
6139 CashFlowItem {
6140 item_code: "CF-INV".to_string(),
6141 label: "Change in Inventory".to_string(),
6142 category: CashFlowCategory::Operating,
6143 amount: -inventory_change,
6144 amount_prior: None,
6145 sort_order: 5,
6146 is_total: false,
6147 },
6148 CashFlowItem {
6149 item_code: "CF-OP".to_string(),
6150 label: "Net Cash from Operating Activities".to_string(),
6151 category: CashFlowCategory::Operating,
6152 amount: operating_cf,
6153 amount_prior: None,
6154 sort_order: 6,
6155 is_total: true,
6156 },
6157 CashFlowItem {
6158 item_code: "CF-CAPEX".to_string(),
6159 label: "Capital Expenditures".to_string(),
6160 category: CashFlowCategory::Investing,
6161 amount: capex,
6162 amount_prior: None,
6163 sort_order: 7,
6164 is_total: false,
6165 },
6166 CashFlowItem {
6167 item_code: "CF-INV-T".to_string(),
6168 label: "Net Cash from Investing Activities".to_string(),
6169 category: CashFlowCategory::Investing,
6170 amount: investing_cf,
6171 amount_prior: None,
6172 sort_order: 8,
6173 is_total: true,
6174 },
6175 CashFlowItem {
6176 item_code: "CF-DEBT".to_string(),
6177 label: "Net Borrowings / (Repayments)".to_string(),
6178 category: CashFlowCategory::Financing,
6179 amount: debt_change,
6180 amount_prior: None,
6181 sort_order: 9,
6182 is_total: false,
6183 },
6184 CashFlowItem {
6185 item_code: "CF-EQ".to_string(),
6186 label: "Equity Changes".to_string(),
6187 category: CashFlowCategory::Financing,
6188 amount: equity_change,
6189 amount_prior: None,
6190 sort_order: 10,
6191 is_total: false,
6192 },
6193 CashFlowItem {
6194 item_code: "CF-FIN-T".to_string(),
6195 label: "Net Cash from Financing Activities".to_string(),
6196 category: CashFlowCategory::Financing,
6197 amount: financing_cf,
6198 amount_prior: None,
6199 sort_order: 11,
6200 is_total: true,
6201 },
6202 CashFlowItem {
6203 item_code: "CF-NET".to_string(),
6204 label: "Net Change in Cash".to_string(),
6205 category: CashFlowCategory::Operating,
6206 amount: net_change,
6207 amount_prior: None,
6208 sort_order: 12,
6209 is_total: true,
6210 },
6211 ]
6212 }
6213
6214 fn calculate_net_income_from_tb(
6218 tb: &[datasynth_generators::TrialBalanceEntry],
6219 ) -> rust_decimal::Decimal {
6220 use rust_decimal::Decimal;
6221
6222 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6223 for entry in tb {
6224 let net = entry.debit_balance - entry.credit_balance;
6225 *aggregated.entry(entry.category.clone()).or_default() += net;
6226 }
6227
6228 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6229 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6230 let opex = *aggregated
6231 .get("OperatingExpenses")
6232 .unwrap_or(&Decimal::ZERO);
6233 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6234 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6235
6236 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6239 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6241 operating_income - tax
6242 }
6243
6244 fn category_from_account_code(code: &str) -> String {
6251 let prefix: String = code.chars().take(2).collect();
6252 match prefix.as_str() {
6253 "10" => "Cash",
6254 "11" => "Receivables",
6255 "12" | "13" | "14" => "Inventory",
6256 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6257 "20" => "Payables",
6258 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6259 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6260 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6261 "40" | "41" | "42" | "43" | "44" => "Revenue",
6262 "50" | "51" | "52" => "CostOfSales",
6263 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6264 "OperatingExpenses"
6265 }
6266 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6267 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6268 _ => "OperatingExpenses",
6269 }
6270 .to_string()
6271 }
6272
6273 fn phase_hr_data(
6275 &mut self,
6276 stats: &mut EnhancedGenerationStatistics,
6277 ) -> SynthResult<HrSnapshot> {
6278 if !self.phase_config.generate_hr {
6279 debug!("Phase 16: Skipped (HR generation disabled)");
6280 return Ok(HrSnapshot::default());
6281 }
6282
6283 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6284
6285 let seed = self.seed;
6286 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6287 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6288 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6289 let company_code = self
6290 .config
6291 .companies
6292 .first()
6293 .map(|c| c.code.as_str())
6294 .unwrap_or("1000");
6295 let currency = self
6296 .config
6297 .companies
6298 .first()
6299 .map(|c| c.currency.as_str())
6300 .unwrap_or("USD");
6301
6302 let employee_ids: Vec<String> = self
6303 .master_data
6304 .employees
6305 .iter()
6306 .map(|e| e.employee_id.clone())
6307 .collect();
6308
6309 if employee_ids.is_empty() {
6310 debug!("Phase 16: Skipped (no employees available)");
6311 return Ok(HrSnapshot::default());
6312 }
6313
6314 let cost_center_ids: Vec<String> = self
6317 .master_data
6318 .employees
6319 .iter()
6320 .filter_map(|e| e.cost_center.clone())
6321 .collect::<std::collections::HashSet<_>>()
6322 .into_iter()
6323 .collect();
6324
6325 let mut snapshot = HrSnapshot::default();
6326
6327 if self.config.hr.payroll.enabled {
6329 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6330 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6331
6332 let payroll_pack = self.primary_pack();
6334
6335 payroll_gen.set_country_pack(payroll_pack.clone());
6338
6339 let employees_with_salary: Vec<(
6340 String,
6341 rust_decimal::Decimal,
6342 Option<String>,
6343 Option<String>,
6344 )> = self
6345 .master_data
6346 .employees
6347 .iter()
6348 .map(|e| {
6349 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6352 e.base_salary
6353 } else {
6354 rust_decimal::Decimal::from(60_000)
6355 };
6356 (
6357 e.employee_id.clone(),
6358 annual, e.cost_center.clone(),
6360 e.department_id.clone(),
6361 )
6362 })
6363 .collect();
6364
6365 let change_history = &self.master_data.employee_change_history;
6368 let has_changes = !change_history.is_empty();
6369 if has_changes {
6370 debug!(
6371 "Payroll will incorporate {} employee change events",
6372 change_history.len()
6373 );
6374 }
6375
6376 for month in 0..self.config.global.period_months {
6377 let period_start = start_date + chrono::Months::new(month);
6378 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6379 let (run, items) = if has_changes {
6380 payroll_gen.generate_with_changes(
6381 company_code,
6382 &employees_with_salary,
6383 period_start,
6384 period_end,
6385 currency,
6386 change_history,
6387 )
6388 } else {
6389 payroll_gen.generate(
6390 company_code,
6391 &employees_with_salary,
6392 period_start,
6393 period_end,
6394 currency,
6395 )
6396 };
6397 snapshot.payroll_runs.push(run);
6398 snapshot.payroll_run_count += 1;
6399 snapshot.payroll_line_item_count += items.len();
6400 snapshot.payroll_line_items.extend(items);
6401 }
6402 }
6403
6404 if self.config.hr.time_attendance.enabled {
6406 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6407 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6408 let entries = time_gen.generate(
6409 &employee_ids,
6410 start_date,
6411 end_date,
6412 &self.config.hr.time_attendance,
6413 );
6414 snapshot.time_entry_count = entries.len();
6415 snapshot.time_entries = entries;
6416 }
6417
6418 if self.config.hr.expenses.enabled {
6420 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6421 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6422 expense_gen.set_country_pack(self.primary_pack().clone());
6423 let company_currency = self
6424 .config
6425 .companies
6426 .first()
6427 .map(|c| c.currency.as_str())
6428 .unwrap_or("USD");
6429 let reports = expense_gen.generate_with_currency(
6430 &employee_ids,
6431 start_date,
6432 end_date,
6433 &self.config.hr.expenses,
6434 company_currency,
6435 );
6436 snapshot.expense_report_count = reports.len();
6437 snapshot.expense_reports = reports;
6438 }
6439
6440 if self.config.hr.payroll.enabled {
6442 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6443 let employee_pairs: Vec<(String, String)> = self
6444 .master_data
6445 .employees
6446 .iter()
6447 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6448 .collect();
6449 let enrollments =
6450 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6451 snapshot.benefit_enrollment_count = enrollments.len();
6452 snapshot.benefit_enrollments = enrollments;
6453 }
6454
6455 if self.phase_config.generate_hr {
6457 let entity_name = self
6458 .config
6459 .companies
6460 .first()
6461 .map(|c| c.name.as_str())
6462 .unwrap_or("Entity");
6463 let period_months = self.config.global.period_months;
6464 let period_label = {
6465 let y = start_date.year();
6466 let m = start_date.month();
6467 if period_months >= 12 {
6468 format!("FY{y}")
6469 } else {
6470 format!("{y}-{m:02}")
6471 }
6472 };
6473 let reporting_date =
6474 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6475
6476 let avg_salary: Option<rust_decimal::Decimal> = {
6481 let employee_count = employee_ids.len();
6482 if self.config.hr.payroll.enabled
6483 && employee_count > 0
6484 && !snapshot.payroll_runs.is_empty()
6485 {
6486 let total_gross: rust_decimal::Decimal = snapshot
6488 .payroll_runs
6489 .iter()
6490 .filter(|r| r.company_code == company_code)
6491 .map(|r| r.total_gross)
6492 .sum();
6493 if total_gross > rust_decimal::Decimal::ZERO {
6494 let annual_total = if period_months > 0 && period_months < 12 {
6496 total_gross * rust_decimal::Decimal::from(12u32)
6497 / rust_decimal::Decimal::from(period_months)
6498 } else {
6499 total_gross
6500 };
6501 Some(
6502 (annual_total / rust_decimal::Decimal::from(employee_count))
6503 .round_dp(2),
6504 )
6505 } else {
6506 None
6507 }
6508 } else {
6509 None
6510 }
6511 };
6512
6513 let mut pension_gen =
6514 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6515 let pension_snap = pension_gen.generate(
6516 company_code,
6517 entity_name,
6518 &period_label,
6519 reporting_date,
6520 employee_ids.len(),
6521 currency,
6522 avg_salary,
6523 period_months,
6524 );
6525 snapshot.pension_plan_count = pension_snap.plans.len();
6526 snapshot.pension_plans = pension_snap.plans;
6527 snapshot.pension_obligations = pension_snap.obligations;
6528 snapshot.pension_plan_assets = pension_snap.plan_assets;
6529 snapshot.pension_disclosures = pension_snap.disclosures;
6530 snapshot.pension_journal_entries = pension_snap.journal_entries;
6535 }
6536
6537 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6539 let period_months = self.config.global.period_months;
6540 let period_label = {
6541 let y = start_date.year();
6542 let m = start_date.month();
6543 if period_months >= 12 {
6544 format!("FY{y}")
6545 } else {
6546 format!("{y}-{m:02}")
6547 }
6548 };
6549 let reporting_date =
6550 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6551
6552 let mut stock_comp_gen =
6553 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6554 let stock_snap = stock_comp_gen.generate(
6555 company_code,
6556 &employee_ids,
6557 start_date,
6558 &period_label,
6559 reporting_date,
6560 currency,
6561 );
6562 snapshot.stock_grant_count = stock_snap.grants.len();
6563 snapshot.stock_grants = stock_snap.grants;
6564 snapshot.stock_comp_expenses = stock_snap.expenses;
6565 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6566 }
6567
6568 stats.payroll_run_count = snapshot.payroll_run_count;
6569 stats.time_entry_count = snapshot.time_entry_count;
6570 stats.expense_report_count = snapshot.expense_report_count;
6571 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6572 stats.pension_plan_count = snapshot.pension_plan_count;
6573 stats.stock_grant_count = snapshot.stock_grant_count;
6574
6575 info!(
6576 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6577 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6578 snapshot.time_entry_count, snapshot.expense_report_count,
6579 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6580 snapshot.stock_grant_count
6581 );
6582 self.check_resources_with_log("post-hr")?;
6583
6584 Ok(snapshot)
6585 }
6586
6587 fn phase_accounting_standards(
6589 &mut self,
6590 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6591 journal_entries: &[JournalEntry],
6592 stats: &mut EnhancedGenerationStatistics,
6593 ) -> SynthResult<AccountingStandardsSnapshot> {
6594 if !self.phase_config.generate_accounting_standards {
6595 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6596 return Ok(AccountingStandardsSnapshot::default());
6597 }
6598 info!("Phase 17: Generating Accounting Standards Data");
6599
6600 let seed = self.seed;
6601 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6602 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6603 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6604 let company_code = self
6605 .config
6606 .companies
6607 .first()
6608 .map(|c| c.code.as_str())
6609 .unwrap_or("1000");
6610 let currency = self
6611 .config
6612 .companies
6613 .first()
6614 .map(|c| c.currency.as_str())
6615 .unwrap_or("USD");
6616
6617 let framework = match self.config.accounting_standards.framework {
6622 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6623 datasynth_standards::framework::AccountingFramework::UsGaap
6624 }
6625 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6626 datasynth_standards::framework::AccountingFramework::Ifrs
6627 }
6628 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6629 datasynth_standards::framework::AccountingFramework::DualReporting
6630 }
6631 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6632 datasynth_standards::framework::AccountingFramework::FrenchGaap
6633 }
6634 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6635 datasynth_standards::framework::AccountingFramework::GermanGaap
6636 }
6637 None => {
6638 let pack = self.primary_pack();
6640 let pack_fw = pack.accounting.framework.as_str();
6641 match pack_fw {
6642 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6643 "dual_reporting" => {
6644 datasynth_standards::framework::AccountingFramework::DualReporting
6645 }
6646 "french_gaap" => {
6647 datasynth_standards::framework::AccountingFramework::FrenchGaap
6648 }
6649 "german_gaap" | "hgb" => {
6650 datasynth_standards::framework::AccountingFramework::GermanGaap
6651 }
6652 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6654 }
6655 }
6656 };
6657
6658 let mut snapshot = AccountingStandardsSnapshot::default();
6659
6660 if self.config.accounting_standards.revenue_recognition.enabled {
6662 let customer_ids: Vec<String> = self
6663 .master_data
6664 .customers
6665 .iter()
6666 .map(|c| c.customer_id.clone())
6667 .collect();
6668
6669 if !customer_ids.is_empty() {
6670 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6671 let contracts = rev_gen.generate(
6672 company_code,
6673 &customer_ids,
6674 start_date,
6675 end_date,
6676 currency,
6677 &self.config.accounting_standards.revenue_recognition,
6678 framework,
6679 );
6680 snapshot.revenue_contract_count = contracts.len();
6681 snapshot.contracts = contracts;
6682 }
6683 }
6684
6685 if self.config.accounting_standards.impairment.enabled {
6687 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6688 .master_data
6689 .assets
6690 .iter()
6691 .map(|a| {
6692 (
6693 a.asset_id.clone(),
6694 a.description.clone(),
6695 a.acquisition_cost,
6696 )
6697 })
6698 .collect();
6699
6700 if !asset_data.is_empty() {
6701 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6702 let tests = imp_gen.generate(
6703 company_code,
6704 &asset_data,
6705 end_date,
6706 &self.config.accounting_standards.impairment,
6707 framework,
6708 );
6709 snapshot.impairment_test_count = tests.len();
6710 snapshot.impairment_tests = tests;
6711 }
6712 }
6713
6714 if self
6716 .config
6717 .accounting_standards
6718 .business_combinations
6719 .enabled
6720 {
6721 let bc_config = &self.config.accounting_standards.business_combinations;
6722 let framework_str = match framework {
6723 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6724 _ => "US_GAAP",
6725 };
6726 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6727 let bc_snap = bc_gen.generate(
6728 company_code,
6729 currency,
6730 start_date,
6731 end_date,
6732 bc_config.acquisition_count,
6733 framework_str,
6734 );
6735 snapshot.business_combination_count = bc_snap.combinations.len();
6736 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6737 snapshot.business_combinations = bc_snap.combinations;
6738 }
6739
6740 if self
6742 .config
6743 .accounting_standards
6744 .expected_credit_loss
6745 .enabled
6746 {
6747 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6748 let framework_str = match framework {
6749 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6750 _ => "ASC_326",
6751 };
6752
6753 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6756
6757 let mut ecl_gen = EclGenerator::new(seed + 43);
6758
6759 let bucket_exposures: Vec<(
6761 datasynth_core::models::subledger::ar::AgingBucket,
6762 rust_decimal::Decimal,
6763 )> = if ar_aging_reports.is_empty() {
6764 use datasynth_core::models::subledger::ar::AgingBucket;
6766 vec![
6767 (
6768 AgingBucket::Current,
6769 rust_decimal::Decimal::from(500_000_u32),
6770 ),
6771 (
6772 AgingBucket::Days1To30,
6773 rust_decimal::Decimal::from(120_000_u32),
6774 ),
6775 (
6776 AgingBucket::Days31To60,
6777 rust_decimal::Decimal::from(45_000_u32),
6778 ),
6779 (
6780 AgingBucket::Days61To90,
6781 rust_decimal::Decimal::from(15_000_u32),
6782 ),
6783 (
6784 AgingBucket::Over90Days,
6785 rust_decimal::Decimal::from(8_000_u32),
6786 ),
6787 ]
6788 } else {
6789 use datasynth_core::models::subledger::ar::AgingBucket;
6790 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6792 std::collections::HashMap::new();
6793 for report in ar_aging_reports {
6794 for (bucket, amount) in &report.bucket_totals {
6795 *totals.entry(*bucket).or_default() += amount;
6796 }
6797 }
6798 AgingBucket::all()
6799 .into_iter()
6800 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6801 .collect()
6802 };
6803
6804 let ecl_snap = ecl_gen.generate(
6805 company_code,
6806 end_date,
6807 &bucket_exposures,
6808 ecl_config,
6809 &period_label,
6810 framework_str,
6811 );
6812
6813 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6814 snapshot.ecl_models = ecl_snap.ecl_models;
6815 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6816 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6817 }
6818
6819 {
6821 let framework_str = match framework {
6822 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6823 _ => "US_GAAP",
6824 };
6825
6826 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6831 .max(rust_decimal::Decimal::from(100_000_u32));
6832
6833 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6834
6835 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6836 let prov_snap = prov_gen.generate(
6837 company_code,
6838 currency,
6839 revenue_proxy,
6840 end_date,
6841 &period_label,
6842 framework_str,
6843 None, );
6845
6846 snapshot.provision_count = prov_snap.provisions.len();
6847 snapshot.provisions = prov_snap.provisions;
6848 snapshot.provision_movements = prov_snap.movements;
6849 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6850 snapshot.provision_journal_entries = prov_snap.journal_entries;
6851 }
6852
6853 {
6857 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6858
6859 let presentation_currency = self
6860 .config
6861 .global
6862 .presentation_currency
6863 .clone()
6864 .unwrap_or_else(|| self.config.global.group_currency.clone());
6865
6866 let mut rate_table = FxRateTable::new(&presentation_currency);
6869
6870 let base_rates = base_rates_usd();
6874 for (ccy, rate) in &base_rates {
6875 rate_table.add_rate(FxRate::new(
6876 ccy,
6877 "USD",
6878 RateType::Closing,
6879 end_date,
6880 *rate,
6881 "SYNTHETIC",
6882 ));
6883 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6886 rate_table.add_rate(FxRate::new(
6887 ccy,
6888 "USD",
6889 RateType::Average,
6890 end_date,
6891 avg,
6892 "SYNTHETIC",
6893 ));
6894 }
6895
6896 let mut translation_results = Vec::new();
6897 for company in &self.config.companies {
6898 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6901 .max(rust_decimal::Decimal::from(100_000_u32));
6902
6903 let func_ccy = company
6904 .functional_currency
6905 .clone()
6906 .unwrap_or_else(|| company.currency.clone());
6907
6908 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6909 &company.code,
6910 &func_ccy,
6911 &presentation_currency,
6912 &ias21_period_label,
6913 end_date,
6914 company_revenue,
6915 &rate_table,
6916 );
6917 translation_results.push(result);
6918 }
6919
6920 snapshot.currency_translation_count = translation_results.len();
6921 snapshot.currency_translation_results = translation_results;
6922 }
6923
6924 stats.revenue_contract_count = snapshot.revenue_contract_count;
6925 stats.impairment_test_count = snapshot.impairment_test_count;
6926 stats.business_combination_count = snapshot.business_combination_count;
6927 stats.ecl_model_count = snapshot.ecl_model_count;
6928 stats.provision_count = snapshot.provision_count;
6929
6930 info!(
6931 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6932 snapshot.revenue_contract_count,
6933 snapshot.impairment_test_count,
6934 snapshot.business_combination_count,
6935 snapshot.ecl_model_count,
6936 snapshot.provision_count,
6937 snapshot.currency_translation_count
6938 );
6939 self.check_resources_with_log("post-accounting-standards")?;
6940
6941 Ok(snapshot)
6942 }
6943
6944 fn phase_manufacturing(
6946 &mut self,
6947 stats: &mut EnhancedGenerationStatistics,
6948 ) -> SynthResult<ManufacturingSnapshot> {
6949 if !self.phase_config.generate_manufacturing {
6950 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6951 return Ok(ManufacturingSnapshot::default());
6952 }
6953 info!("Phase 18: Generating Manufacturing Data");
6954
6955 let seed = self.seed;
6956 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6957 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6958 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6959 let company_code = self
6960 .config
6961 .companies
6962 .first()
6963 .map(|c| c.code.as_str())
6964 .unwrap_or("1000");
6965
6966 let material_data: Vec<(String, String)> = self
6967 .master_data
6968 .materials
6969 .iter()
6970 .map(|m| (m.material_id.clone(), m.description.clone()))
6971 .collect();
6972
6973 if material_data.is_empty() {
6974 debug!("Phase 18: Skipped (no materials available)");
6975 return Ok(ManufacturingSnapshot::default());
6976 }
6977
6978 let mut snapshot = ManufacturingSnapshot::default();
6979
6980 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6982 let production_orders = prod_gen.generate(
6983 company_code,
6984 &material_data,
6985 start_date,
6986 end_date,
6987 &self.config.manufacturing.production_orders,
6988 &self.config.manufacturing.costing,
6989 &self.config.manufacturing.routing,
6990 );
6991 snapshot.production_order_count = production_orders.len();
6992
6993 let inspection_data: Vec<(String, String, String)> = production_orders
6995 .iter()
6996 .map(|po| {
6997 (
6998 po.order_id.clone(),
6999 po.material_id.clone(),
7000 po.material_description.clone(),
7001 )
7002 })
7003 .collect();
7004
7005 snapshot.production_orders = production_orders;
7006
7007 if !inspection_data.is_empty() {
7008 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7009 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7010 snapshot.quality_inspection_count = inspections.len();
7011 snapshot.quality_inspections = inspections;
7012 }
7013
7014 let storage_locations: Vec<(String, String)> = material_data
7016 .iter()
7017 .enumerate()
7018 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7019 .collect();
7020
7021 let employee_ids: Vec<String> = self
7022 .master_data
7023 .employees
7024 .iter()
7025 .map(|e| e.employee_id.clone())
7026 .collect();
7027 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7028 .with_employee_pool(employee_ids);
7029 let mut cycle_count_total = 0usize;
7030 for month in 0..self.config.global.period_months {
7031 let count_date = start_date + chrono::Months::new(month);
7032 let items_per_count = storage_locations.len().clamp(10, 50);
7033 let cc = cc_gen.generate(
7034 company_code,
7035 &storage_locations,
7036 count_date,
7037 items_per_count,
7038 );
7039 snapshot.cycle_counts.push(cc);
7040 cycle_count_total += 1;
7041 }
7042 snapshot.cycle_count_count = cycle_count_total;
7043
7044 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7046 let bom_components = bom_gen.generate(company_code, &material_data);
7047 snapshot.bom_component_count = bom_components.len();
7048 snapshot.bom_components = bom_components;
7049
7050 let currency = self
7052 .config
7053 .companies
7054 .first()
7055 .map(|c| c.currency.as_str())
7056 .unwrap_or("USD");
7057 let production_order_ids: Vec<String> = snapshot
7058 .production_orders
7059 .iter()
7060 .map(|po| po.order_id.clone())
7061 .collect();
7062 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7063 let inventory_movements = inv_mov_gen.generate_with_production_orders(
7064 company_code,
7065 &material_data,
7066 start_date,
7067 end_date,
7068 2,
7069 currency,
7070 &production_order_ids,
7071 );
7072 snapshot.inventory_movement_count = inventory_movements.len();
7073 snapshot.inventory_movements = inventory_movements;
7074
7075 stats.production_order_count = snapshot.production_order_count;
7076 stats.quality_inspection_count = snapshot.quality_inspection_count;
7077 stats.cycle_count_count = snapshot.cycle_count_count;
7078 stats.bom_component_count = snapshot.bom_component_count;
7079 stats.inventory_movement_count = snapshot.inventory_movement_count;
7080
7081 info!(
7082 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7083 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7084 snapshot.bom_component_count, snapshot.inventory_movement_count
7085 );
7086 self.check_resources_with_log("post-manufacturing")?;
7087
7088 Ok(snapshot)
7089 }
7090
7091 fn phase_sales_kpi_budgets(
7093 &mut self,
7094 coa: &Arc<ChartOfAccounts>,
7095 financial_reporting: &FinancialReportingSnapshot,
7096 stats: &mut EnhancedGenerationStatistics,
7097 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7098 if !self.phase_config.generate_sales_kpi_budgets {
7099 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7100 return Ok(SalesKpiBudgetsSnapshot::default());
7101 }
7102 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7103
7104 let seed = self.seed;
7105 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7106 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7107 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7108 let company_code = self
7109 .config
7110 .companies
7111 .first()
7112 .map(|c| c.code.as_str())
7113 .unwrap_or("1000");
7114
7115 let mut snapshot = SalesKpiBudgetsSnapshot::default();
7116
7117 if self.config.sales_quotes.enabled {
7119 let customer_data: Vec<(String, String)> = self
7120 .master_data
7121 .customers
7122 .iter()
7123 .map(|c| (c.customer_id.clone(), c.name.clone()))
7124 .collect();
7125 let material_data: Vec<(String, String)> = self
7126 .master_data
7127 .materials
7128 .iter()
7129 .map(|m| (m.material_id.clone(), m.description.clone()))
7130 .collect();
7131
7132 if !customer_data.is_empty() && !material_data.is_empty() {
7133 let employee_ids: Vec<String> = self
7134 .master_data
7135 .employees
7136 .iter()
7137 .map(|e| e.employee_id.clone())
7138 .collect();
7139 let customer_ids: Vec<String> = self
7140 .master_data
7141 .customers
7142 .iter()
7143 .map(|c| c.customer_id.clone())
7144 .collect();
7145 let company_currency = self
7146 .config
7147 .companies
7148 .first()
7149 .map(|c| c.currency.as_str())
7150 .unwrap_or("USD");
7151
7152 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7153 .with_pools(employee_ids, customer_ids);
7154 let quotes = quote_gen.generate_with_currency(
7155 company_code,
7156 &customer_data,
7157 &material_data,
7158 start_date,
7159 end_date,
7160 &self.config.sales_quotes,
7161 company_currency,
7162 );
7163 snapshot.sales_quote_count = quotes.len();
7164 snapshot.sales_quotes = quotes;
7165 }
7166 }
7167
7168 if self.config.financial_reporting.management_kpis.enabled {
7170 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7171 let mut kpis = kpi_gen.generate(
7172 company_code,
7173 start_date,
7174 end_date,
7175 &self.config.financial_reporting.management_kpis,
7176 );
7177
7178 {
7180 use rust_decimal::Decimal;
7181
7182 if let Some(income_stmt) =
7183 financial_reporting.financial_statements.iter().find(|fs| {
7184 fs.statement_type == StatementType::IncomeStatement
7185 && fs.company_code == company_code
7186 })
7187 {
7188 let total_revenue: Decimal = income_stmt
7190 .line_items
7191 .iter()
7192 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7193 .map(|li| li.amount)
7194 .sum();
7195 let total_cogs: Decimal = income_stmt
7196 .line_items
7197 .iter()
7198 .filter(|li| {
7199 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7200 && !li.is_total
7201 })
7202 .map(|li| li.amount.abs())
7203 .sum();
7204 let total_opex: Decimal = income_stmt
7205 .line_items
7206 .iter()
7207 .filter(|li| {
7208 li.section.contains("Expense")
7209 && !li.is_total
7210 && !li.section.contains("Cost")
7211 })
7212 .map(|li| li.amount.abs())
7213 .sum();
7214
7215 if total_revenue > Decimal::ZERO {
7216 let hundred = Decimal::from(100);
7217 let gross_margin_pct =
7218 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7219 let operating_income = total_revenue - total_cogs - total_opex;
7220 let op_margin_pct =
7221 (operating_income * hundred / total_revenue).round_dp(2);
7222
7223 for kpi in &mut kpis {
7225 if kpi.name == "Gross Margin" {
7226 kpi.value = gross_margin_pct;
7227 } else if kpi.name == "Operating Margin" {
7228 kpi.value = op_margin_pct;
7229 }
7230 }
7231 }
7232 }
7233
7234 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7236 fs.statement_type == StatementType::BalanceSheet
7237 && fs.company_code == company_code
7238 }) {
7239 let current_assets: Decimal = bs
7240 .line_items
7241 .iter()
7242 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7243 .map(|li| li.amount)
7244 .sum();
7245 let current_liabilities: Decimal = bs
7246 .line_items
7247 .iter()
7248 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7249 .map(|li| li.amount.abs())
7250 .sum();
7251
7252 if current_liabilities > Decimal::ZERO {
7253 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7254 for kpi in &mut kpis {
7255 if kpi.name == "Current Ratio" {
7256 kpi.value = current_ratio;
7257 }
7258 }
7259 }
7260 }
7261 }
7262
7263 snapshot.kpi_count = kpis.len();
7264 snapshot.kpis = kpis;
7265 }
7266
7267 if self.config.financial_reporting.budgets.enabled {
7269 let account_data: Vec<(String, String)> = coa
7270 .accounts
7271 .iter()
7272 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7273 .collect();
7274
7275 if !account_data.is_empty() {
7276 let fiscal_year = start_date.year() as u32;
7277 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7278 let budget = budget_gen.generate(
7279 company_code,
7280 fiscal_year,
7281 &account_data,
7282 &self.config.financial_reporting.budgets,
7283 );
7284 snapshot.budget_line_count = budget.line_items.len();
7285 snapshot.budgets.push(budget);
7286 }
7287 }
7288
7289 stats.sales_quote_count = snapshot.sales_quote_count;
7290 stats.kpi_count = snapshot.kpi_count;
7291 stats.budget_line_count = snapshot.budget_line_count;
7292
7293 info!(
7294 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7295 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7296 );
7297 self.check_resources_with_log("post-sales-kpi-budgets")?;
7298
7299 Ok(snapshot)
7300 }
7301
7302 fn compute_pre_tax_income(
7309 company_code: &str,
7310 journal_entries: &[JournalEntry],
7311 ) -> rust_decimal::Decimal {
7312 use datasynth_core::accounts::AccountCategory;
7313 use rust_decimal::Decimal;
7314
7315 let mut total_revenue = Decimal::ZERO;
7316 let mut total_expenses = Decimal::ZERO;
7317
7318 for je in journal_entries {
7319 if je.header.company_code != company_code {
7320 continue;
7321 }
7322 for line in &je.lines {
7323 let cat = AccountCategory::from_account(&line.gl_account);
7324 match cat {
7325 AccountCategory::Revenue => {
7326 total_revenue += line.credit_amount - line.debit_amount;
7327 }
7328 AccountCategory::Cogs
7329 | AccountCategory::OperatingExpense
7330 | AccountCategory::OtherIncomeExpense => {
7331 total_expenses += line.debit_amount - line.credit_amount;
7332 }
7333 _ => {}
7334 }
7335 }
7336 }
7337
7338 let pti = (total_revenue - total_expenses).round_dp(2);
7339 if pti == rust_decimal::Decimal::ZERO {
7340 rust_decimal::Decimal::from(1_000_000u32)
7343 } else {
7344 pti
7345 }
7346 }
7347
7348 fn phase_tax_generation(
7350 &mut self,
7351 document_flows: &DocumentFlowSnapshot,
7352 journal_entries: &[JournalEntry],
7353 stats: &mut EnhancedGenerationStatistics,
7354 ) -> SynthResult<TaxSnapshot> {
7355 if !self.phase_config.generate_tax {
7356 debug!("Phase 20: Skipped (tax generation disabled)");
7357 return Ok(TaxSnapshot::default());
7358 }
7359 info!("Phase 20: Generating Tax Data");
7360
7361 let seed = self.seed;
7362 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7363 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7364 let fiscal_year = start_date.year();
7365 let company_code = self
7366 .config
7367 .companies
7368 .first()
7369 .map(|c| c.code.as_str())
7370 .unwrap_or("1000");
7371
7372 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7373 seed + 370,
7374 self.config.tax.clone(),
7375 );
7376
7377 let pack = self.primary_pack().clone();
7378 let (jurisdictions, codes) =
7379 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7380
7381 let mut provisions = Vec::new();
7383 if self.config.tax.provisions.enabled {
7384 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7385 for company in &self.config.companies {
7386 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7387 let statutory_rate = rust_decimal::Decimal::new(
7388 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7389 2,
7390 );
7391 let provision = provision_gen.generate(
7392 &company.code,
7393 start_date,
7394 pre_tax_income,
7395 statutory_rate,
7396 );
7397 provisions.push(provision);
7398 }
7399 }
7400
7401 let mut tax_lines = Vec::new();
7403 if !codes.is_empty() {
7404 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7405 datasynth_generators::TaxLineGeneratorConfig::default(),
7406 codes.clone(),
7407 seed + 372,
7408 );
7409
7410 let buyer_country = self
7413 .config
7414 .companies
7415 .first()
7416 .map(|c| c.country.as_str())
7417 .unwrap_or("US");
7418 for vi in &document_flows.vendor_invoices {
7419 let lines = tax_line_gen.generate_for_document(
7420 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7421 &vi.header.document_id,
7422 buyer_country, buyer_country,
7424 vi.payable_amount,
7425 vi.header.document_date,
7426 None,
7427 );
7428 tax_lines.extend(lines);
7429 }
7430
7431 for ci in &document_flows.customer_invoices {
7433 let lines = tax_line_gen.generate_for_document(
7434 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7435 &ci.header.document_id,
7436 buyer_country, buyer_country,
7438 ci.total_gross_amount,
7439 ci.header.document_date,
7440 None,
7441 );
7442 tax_lines.extend(lines);
7443 }
7444 }
7445
7446 let deferred_tax = {
7448 let companies: Vec<(&str, &str)> = self
7449 .config
7450 .companies
7451 .iter()
7452 .map(|c| (c.code.as_str(), c.country.as_str()))
7453 .collect();
7454 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7455 deferred_gen.generate(&companies, start_date, journal_entries)
7456 };
7457
7458 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7461 std::collections::HashMap::new();
7462 for vi in &document_flows.vendor_invoices {
7463 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7464 }
7465 for ci in &document_flows.customer_invoices {
7466 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7467 }
7468
7469 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7471 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7472 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7473 &tax_lines,
7474 company_code,
7475 &doc_dates,
7476 end_date,
7477 );
7478 debug!("Generated {} tax posting JEs", jes.len());
7479 jes
7480 } else {
7481 Vec::new()
7482 };
7483
7484 let snapshot = TaxSnapshot {
7485 jurisdiction_count: jurisdictions.len(),
7486 code_count: codes.len(),
7487 jurisdictions,
7488 codes,
7489 tax_provisions: provisions,
7490 tax_lines,
7491 tax_returns: Vec::new(),
7492 withholding_records: Vec::new(),
7493 tax_anomaly_labels: Vec::new(),
7494 deferred_tax,
7495 tax_posting_journal_entries,
7496 };
7497
7498 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7499 stats.tax_code_count = snapshot.code_count;
7500 stats.tax_provision_count = snapshot.tax_provisions.len();
7501 stats.tax_line_count = snapshot.tax_lines.len();
7502
7503 info!(
7504 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7505 snapshot.jurisdiction_count,
7506 snapshot.code_count,
7507 snapshot.tax_provisions.len(),
7508 snapshot.deferred_tax.temporary_differences.len(),
7509 snapshot.deferred_tax.journal_entries.len(),
7510 snapshot.tax_posting_journal_entries.len(),
7511 );
7512 self.check_resources_with_log("post-tax")?;
7513
7514 Ok(snapshot)
7515 }
7516
7517 fn phase_esg_generation(
7519 &mut self,
7520 document_flows: &DocumentFlowSnapshot,
7521 manufacturing: &ManufacturingSnapshot,
7522 stats: &mut EnhancedGenerationStatistics,
7523 ) -> SynthResult<EsgSnapshot> {
7524 if !self.phase_config.generate_esg {
7525 debug!("Phase 21: Skipped (ESG generation disabled)");
7526 return Ok(EsgSnapshot::default());
7527 }
7528 let degradation = self.check_resources()?;
7529 if degradation >= DegradationLevel::Reduced {
7530 debug!(
7531 "Phase skipped due to resource pressure (degradation: {:?})",
7532 degradation
7533 );
7534 return Ok(EsgSnapshot::default());
7535 }
7536 info!("Phase 21: Generating ESG Data");
7537
7538 let seed = self.seed;
7539 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7540 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7541 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7542 let entity_id = self
7543 .config
7544 .companies
7545 .first()
7546 .map(|c| c.code.as_str())
7547 .unwrap_or("1000");
7548
7549 let esg_cfg = &self.config.esg;
7550 let mut snapshot = EsgSnapshot::default();
7551
7552 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7554 esg_cfg.environmental.energy.clone(),
7555 seed + 80,
7556 );
7557 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7558
7559 let facility_count = esg_cfg.environmental.energy.facility_count;
7561 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7562 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7563
7564 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7566 seed + 82,
7567 esg_cfg.environmental.waste.diversion_target,
7568 facility_count,
7569 );
7570 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7571
7572 let mut emission_gen =
7574 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7575
7576 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7578 .iter()
7579 .map(|e| datasynth_generators::EnergyInput {
7580 facility_id: e.facility_id.clone(),
7581 energy_type: match e.energy_source {
7582 EnergySourceType::NaturalGas => {
7583 datasynth_generators::EnergyInputType::NaturalGas
7584 }
7585 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7586 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7587 _ => datasynth_generators::EnergyInputType::Electricity,
7588 },
7589 consumption_kwh: e.consumption_kwh,
7590 period: e.period,
7591 })
7592 .collect();
7593
7594 if !manufacturing.production_orders.is_empty() {
7596 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7597 &manufacturing.production_orders,
7598 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7601 if !mfg_energy.is_empty() {
7602 info!(
7603 "ESG: {} energy inputs derived from {} production orders",
7604 mfg_energy.len(),
7605 manufacturing.production_orders.len(),
7606 );
7607 energy_inputs.extend(mfg_energy);
7608 }
7609 }
7610
7611 let mut emissions = Vec::new();
7612 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7613 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7614
7615 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7617 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7618 for payment in &document_flows.payments {
7619 if payment.is_vendor {
7620 *totals
7621 .entry(payment.business_partner_id.clone())
7622 .or_default() += payment.amount;
7623 }
7624 }
7625 totals
7626 };
7627 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7628 .master_data
7629 .vendors
7630 .iter()
7631 .map(|v| {
7632 let spend = vendor_payment_totals
7633 .get(&v.vendor_id)
7634 .copied()
7635 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7636 datasynth_generators::VendorSpendInput {
7637 vendor_id: v.vendor_id.clone(),
7638 category: format!("{:?}", v.vendor_type).to_lowercase(),
7639 spend,
7640 country: v.country.clone(),
7641 }
7642 })
7643 .collect();
7644 if !vendor_spend.is_empty() {
7645 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7646 entity_id,
7647 &vendor_spend,
7648 start_date,
7649 end_date,
7650 ));
7651 }
7652
7653 let headcount = self.master_data.employees.len() as u32;
7655 if headcount > 0 {
7656 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7657 emissions.extend(emission_gen.generate_scope3_business_travel(
7658 entity_id,
7659 travel_spend,
7660 start_date,
7661 ));
7662 emissions
7663 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7664 }
7665
7666 snapshot.emission_count = emissions.len();
7667 snapshot.emissions = emissions;
7668 snapshot.energy = energy_records;
7669
7670 let mut workforce_gen =
7672 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7673 let total_headcount = headcount.max(100);
7674 snapshot.diversity =
7675 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7676 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7677
7678 if !self.master_data.employees.is_empty() {
7680 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7681 entity_id,
7682 &self.master_data.employees,
7683 end_date,
7684 );
7685 if !hr_diversity.is_empty() {
7686 info!(
7687 "ESG: {} diversity metrics derived from {} actual employees",
7688 hr_diversity.len(),
7689 self.master_data.employees.len(),
7690 );
7691 snapshot.diversity.extend(hr_diversity);
7692 }
7693 }
7694
7695 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7696 entity_id,
7697 facility_count,
7698 start_date,
7699 end_date,
7700 );
7701
7702 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7705 entity_id,
7706 &snapshot.safety_incidents,
7707 total_hours,
7708 start_date,
7709 );
7710 snapshot.safety_metrics = vec![safety_metric];
7711
7712 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7714 seed + 85,
7715 esg_cfg.governance.board_size,
7716 esg_cfg.governance.independence_target,
7717 );
7718 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7719
7720 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7722 esg_cfg.supply_chain_esg.clone(),
7723 seed + 86,
7724 );
7725 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7726 .master_data
7727 .vendors
7728 .iter()
7729 .map(|v| datasynth_generators::VendorInput {
7730 vendor_id: v.vendor_id.clone(),
7731 country: v.country.clone(),
7732 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7733 quality_score: None,
7734 })
7735 .collect();
7736 snapshot.supplier_assessments =
7737 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7738
7739 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7741 seed + 87,
7742 esg_cfg.reporting.clone(),
7743 esg_cfg.climate_scenarios.clone(),
7744 );
7745 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7746 snapshot.disclosures = disclosure_gen.generate_disclosures(
7747 entity_id,
7748 &snapshot.materiality,
7749 start_date,
7750 end_date,
7751 );
7752 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7753 snapshot.disclosure_count = snapshot.disclosures.len();
7754
7755 if esg_cfg.anomaly_rate > 0.0 {
7757 let mut anomaly_injector =
7758 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7759 let mut labels = Vec::new();
7760 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7761 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7762 labels.extend(
7763 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7764 );
7765 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7766 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7767 snapshot.anomaly_labels = labels;
7768 }
7769
7770 stats.esg_emission_count = snapshot.emission_count;
7771 stats.esg_disclosure_count = snapshot.disclosure_count;
7772
7773 info!(
7774 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7775 snapshot.emission_count,
7776 snapshot.disclosure_count,
7777 snapshot.supplier_assessments.len()
7778 );
7779 self.check_resources_with_log("post-esg")?;
7780
7781 Ok(snapshot)
7782 }
7783
7784 fn phase_treasury_data(
7786 &mut self,
7787 document_flows: &DocumentFlowSnapshot,
7788 subledger: &SubledgerSnapshot,
7789 intercompany: &IntercompanySnapshot,
7790 stats: &mut EnhancedGenerationStatistics,
7791 ) -> SynthResult<TreasurySnapshot> {
7792 if !self.phase_config.generate_treasury {
7793 debug!("Phase 22: Skipped (treasury generation disabled)");
7794 return Ok(TreasurySnapshot::default());
7795 }
7796 let degradation = self.check_resources()?;
7797 if degradation >= DegradationLevel::Reduced {
7798 debug!(
7799 "Phase skipped due to resource pressure (degradation: {:?})",
7800 degradation
7801 );
7802 return Ok(TreasurySnapshot::default());
7803 }
7804 info!("Phase 22: Generating Treasury Data");
7805
7806 let seed = self.seed;
7807 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7808 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7809 let currency = self
7810 .config
7811 .companies
7812 .first()
7813 .map(|c| c.currency.as_str())
7814 .unwrap_or("USD");
7815 let entity_id = self
7816 .config
7817 .companies
7818 .first()
7819 .map(|c| c.code.as_str())
7820 .unwrap_or("1000");
7821
7822 let mut snapshot = TreasurySnapshot::default();
7823
7824 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7826 self.config.treasury.debt.clone(),
7827 seed + 90,
7828 );
7829 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7830
7831 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7833 self.config.treasury.hedging.clone(),
7834 seed + 91,
7835 );
7836 for debt in &snapshot.debt_instruments {
7837 if debt.rate_type == InterestRateType::Variable {
7838 let swap = hedge_gen.generate_ir_swap(
7839 currency,
7840 debt.principal,
7841 debt.origination_date,
7842 debt.maturity_date,
7843 );
7844 snapshot.hedging_instruments.push(swap);
7845 }
7846 }
7847
7848 {
7851 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7852 for payment in &document_flows.payments {
7853 if payment.currency != currency {
7854 let entry = fx_map
7855 .entry(payment.currency.clone())
7856 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7857 entry.0 += payment.amount;
7858 if payment.header.document_date > entry.1 {
7860 entry.1 = payment.header.document_date;
7861 }
7862 }
7863 }
7864 if !fx_map.is_empty() {
7865 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7866 .into_iter()
7867 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7868 datasynth_generators::treasury::FxExposure {
7869 currency_pair: format!("{foreign_ccy}/{currency}"),
7870 foreign_currency: foreign_ccy,
7871 net_amount,
7872 settlement_date,
7873 description: "AP payment FX exposure".to_string(),
7874 }
7875 })
7876 .collect();
7877 let (fx_instruments, fx_relationships) =
7878 hedge_gen.generate(start_date, &fx_exposures);
7879 snapshot.hedging_instruments.extend(fx_instruments);
7880 snapshot.hedge_relationships.extend(fx_relationships);
7881 }
7882 }
7883
7884 if self.config.treasury.anomaly_rate > 0.0 {
7886 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7887 seed + 92,
7888 self.config.treasury.anomaly_rate,
7889 );
7890 let mut labels = Vec::new();
7891 labels.extend(
7892 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7893 );
7894 snapshot.treasury_anomaly_labels = labels;
7895 }
7896
7897 if self.config.treasury.cash_positioning.enabled {
7899 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7900
7901 for payment in &document_flows.payments {
7903 cash_flows.push(datasynth_generators::treasury::CashFlow {
7904 date: payment.header.document_date,
7905 account_id: format!("{entity_id}-MAIN"),
7906 amount: payment.amount,
7907 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7908 });
7909 }
7910
7911 for chain in &document_flows.o2c_chains {
7913 if let Some(ref receipt) = chain.customer_receipt {
7914 cash_flows.push(datasynth_generators::treasury::CashFlow {
7915 date: receipt.header.document_date,
7916 account_id: format!("{entity_id}-MAIN"),
7917 amount: receipt.amount,
7918 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7919 });
7920 }
7921 for receipt in &chain.remainder_receipts {
7923 cash_flows.push(datasynth_generators::treasury::CashFlow {
7924 date: receipt.header.document_date,
7925 account_id: format!("{entity_id}-MAIN"),
7926 amount: receipt.amount,
7927 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7928 });
7929 }
7930 }
7931
7932 if !cash_flows.is_empty() {
7933 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7934 self.config.treasury.cash_positioning.clone(),
7935 seed + 93,
7936 );
7937 let account_id = format!("{entity_id}-MAIN");
7938 snapshot.cash_positions = cash_gen.generate(
7939 entity_id,
7940 &account_id,
7941 currency,
7942 &cash_flows,
7943 start_date,
7944 start_date + chrono::Months::new(self.config.global.period_months),
7945 rust_decimal::Decimal::new(1_000_000, 0), );
7947 }
7948 }
7949
7950 if self.config.treasury.cash_forecasting.enabled {
7952 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7953
7954 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7956 .ar_invoices
7957 .iter()
7958 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7959 .map(|inv| {
7960 let days_past_due = if inv.due_date < end_date {
7961 (end_date - inv.due_date).num_days().max(0) as u32
7962 } else {
7963 0
7964 };
7965 datasynth_generators::treasury::ArAgingItem {
7966 expected_date: inv.due_date,
7967 amount: inv.amount_remaining,
7968 days_past_due,
7969 document_id: inv.invoice_number.clone(),
7970 }
7971 })
7972 .collect();
7973
7974 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7976 .ap_invoices
7977 .iter()
7978 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7979 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7980 payment_date: inv.due_date,
7981 amount: inv.amount_remaining,
7982 document_id: inv.invoice_number.clone(),
7983 })
7984 .collect();
7985
7986 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7987 self.config.treasury.cash_forecasting.clone(),
7988 seed + 94,
7989 );
7990 let forecast = forecast_gen.generate(
7991 entity_id,
7992 currency,
7993 end_date,
7994 &ar_items,
7995 &ap_items,
7996 &[], );
7998 snapshot.cash_forecasts.push(forecast);
7999 }
8000
8001 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8003 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8004 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8005 self.config.treasury.cash_pooling.clone(),
8006 seed + 95,
8007 );
8008
8009 let account_ids: Vec<String> = snapshot
8011 .cash_positions
8012 .iter()
8013 .map(|cp| cp.bank_account_id.clone())
8014 .collect::<std::collections::HashSet<_>>()
8015 .into_iter()
8016 .collect();
8017
8018 if let Some(pool) =
8019 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8020 {
8021 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8023 for cp in &snapshot.cash_positions {
8024 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8025 }
8026
8027 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8028 latest_balances
8029 .into_iter()
8030 .filter(|(id, _)| pool.participant_accounts.contains(id))
8031 .map(
8032 |(id, balance)| datasynth_generators::treasury::AccountBalance {
8033 account_id: id,
8034 balance,
8035 },
8036 )
8037 .collect();
8038
8039 let sweeps =
8040 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8041 snapshot.cash_pool_sweeps = sweeps;
8042 snapshot.cash_pools.push(pool);
8043 }
8044 }
8045
8046 if self.config.treasury.bank_guarantees.enabled {
8048 let vendor_names: Vec<String> = self
8049 .master_data
8050 .vendors
8051 .iter()
8052 .map(|v| v.name.clone())
8053 .collect();
8054 if !vendor_names.is_empty() {
8055 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8056 self.config.treasury.bank_guarantees.clone(),
8057 seed + 96,
8058 );
8059 snapshot.bank_guarantees =
8060 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8061 }
8062 }
8063
8064 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8066 let entity_ids: Vec<String> = self
8067 .config
8068 .companies
8069 .iter()
8070 .map(|c| c.code.clone())
8071 .collect();
8072 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8073 .matched_pairs
8074 .iter()
8075 .map(|mp| {
8076 (
8077 mp.seller_company.clone(),
8078 mp.buyer_company.clone(),
8079 mp.amount,
8080 )
8081 })
8082 .collect();
8083 if entity_ids.len() >= 2 {
8084 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8085 self.config.treasury.netting.clone(),
8086 seed + 97,
8087 );
8088 snapshot.netting_runs = netting_gen.generate(
8089 &entity_ids,
8090 currency,
8091 start_date,
8092 self.config.global.period_months,
8093 &ic_amounts,
8094 );
8095 }
8096 }
8097
8098 {
8100 use datasynth_generators::treasury::TreasuryAccounting;
8101
8102 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8103 let mut treasury_jes = Vec::new();
8104
8105 if !snapshot.debt_instruments.is_empty() {
8107 let debt_jes =
8108 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8109 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8110 treasury_jes.extend(debt_jes);
8111 }
8112
8113 if !snapshot.hedging_instruments.is_empty() {
8115 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8116 &snapshot.hedging_instruments,
8117 &snapshot.hedge_relationships,
8118 end_date,
8119 entity_id,
8120 );
8121 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8122 treasury_jes.extend(hedge_jes);
8123 }
8124
8125 if !snapshot.cash_pool_sweeps.is_empty() {
8127 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8128 &snapshot.cash_pool_sweeps,
8129 entity_id,
8130 );
8131 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8132 treasury_jes.extend(sweep_jes);
8133 }
8134
8135 if !treasury_jes.is_empty() {
8136 debug!("Total treasury journal entries: {}", treasury_jes.len());
8137 }
8138 snapshot.journal_entries = treasury_jes;
8139 }
8140
8141 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8142 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8143 stats.cash_position_count = snapshot.cash_positions.len();
8144 stats.cash_forecast_count = snapshot.cash_forecasts.len();
8145 stats.cash_pool_count = snapshot.cash_pools.len();
8146
8147 info!(
8148 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8149 snapshot.debt_instruments.len(),
8150 snapshot.hedging_instruments.len(),
8151 snapshot.cash_positions.len(),
8152 snapshot.cash_forecasts.len(),
8153 snapshot.cash_pools.len(),
8154 snapshot.bank_guarantees.len(),
8155 snapshot.netting_runs.len(),
8156 snapshot.journal_entries.len(),
8157 );
8158 self.check_resources_with_log("post-treasury")?;
8159
8160 Ok(snapshot)
8161 }
8162
8163 fn phase_project_accounting(
8165 &mut self,
8166 document_flows: &DocumentFlowSnapshot,
8167 hr: &HrSnapshot,
8168 stats: &mut EnhancedGenerationStatistics,
8169 ) -> SynthResult<ProjectAccountingSnapshot> {
8170 if !self.phase_config.generate_project_accounting {
8171 debug!("Phase 23: Skipped (project accounting disabled)");
8172 return Ok(ProjectAccountingSnapshot::default());
8173 }
8174 let degradation = self.check_resources()?;
8175 if degradation >= DegradationLevel::Reduced {
8176 debug!(
8177 "Phase skipped due to resource pressure (degradation: {:?})",
8178 degradation
8179 );
8180 return Ok(ProjectAccountingSnapshot::default());
8181 }
8182 info!("Phase 23: Generating Project Accounting Data");
8183
8184 let seed = self.seed;
8185 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8186 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8187 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8188 let company_code = self
8189 .config
8190 .companies
8191 .first()
8192 .map(|c| c.code.as_str())
8193 .unwrap_or("1000");
8194
8195 let mut snapshot = ProjectAccountingSnapshot::default();
8196
8197 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8199 self.config.project_accounting.clone(),
8200 seed + 95,
8201 );
8202 let pool = project_gen.generate(company_code, start_date, end_date);
8203 snapshot.projects = pool.projects.clone();
8204
8205 {
8207 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8208 Vec::new();
8209
8210 for te in &hr.time_entries {
8212 let total_hours = te.hours_regular + te.hours_overtime;
8213 if total_hours > 0.0 {
8214 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8215 id: te.entry_id.clone(),
8216 entity_id: company_code.to_string(),
8217 date: te.date,
8218 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8219 .unwrap_or(rust_decimal::Decimal::ZERO),
8220 source_type: CostSourceType::TimeEntry,
8221 hours: Some(
8222 rust_decimal::Decimal::from_f64_retain(total_hours)
8223 .unwrap_or(rust_decimal::Decimal::ZERO),
8224 ),
8225 });
8226 }
8227 }
8228
8229 for er in &hr.expense_reports {
8231 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8232 id: er.report_id.clone(),
8233 entity_id: company_code.to_string(),
8234 date: er.submission_date,
8235 amount: er.total_amount,
8236 source_type: CostSourceType::ExpenseReport,
8237 hours: None,
8238 });
8239 }
8240
8241 for po in &document_flows.purchase_orders {
8243 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8244 id: po.header.document_id.clone(),
8245 entity_id: company_code.to_string(),
8246 date: po.header.document_date,
8247 amount: po.total_net_amount,
8248 source_type: CostSourceType::PurchaseOrder,
8249 hours: None,
8250 });
8251 }
8252
8253 for vi in &document_flows.vendor_invoices {
8255 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8256 id: vi.header.document_id.clone(),
8257 entity_id: company_code.to_string(),
8258 date: vi.header.document_date,
8259 amount: vi.payable_amount,
8260 source_type: CostSourceType::VendorInvoice,
8261 hours: None,
8262 });
8263 }
8264
8265 if !source_docs.is_empty() && !pool.projects.is_empty() {
8266 let mut cost_gen =
8267 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8268 self.config.project_accounting.cost_allocation.clone(),
8269 seed + 99,
8270 );
8271 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8272 }
8273 }
8274
8275 if self.config.project_accounting.change_orders.enabled {
8277 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8278 self.config.project_accounting.change_orders.clone(),
8279 seed + 96,
8280 );
8281 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8282 }
8283
8284 if self.config.project_accounting.milestones.enabled {
8286 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8287 self.config.project_accounting.milestones.clone(),
8288 seed + 97,
8289 );
8290 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8291 }
8292
8293 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8295 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8296 self.config.project_accounting.earned_value.clone(),
8297 seed + 98,
8298 );
8299 snapshot.earned_value_metrics =
8300 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8301 }
8302
8303 if self.config.project_accounting.revenue_recognition.enabled
8305 && !snapshot.projects.is_empty()
8306 && !snapshot.cost_lines.is_empty()
8307 {
8308 use datasynth_generators::project_accounting::RevenueGenerator;
8309 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8310 let avg_contract_value =
8311 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8312 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8313
8314 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8317 snapshot
8318 .projects
8319 .iter()
8320 .filter(|p| {
8321 matches!(
8322 p.project_type,
8323 datasynth_core::models::ProjectType::Customer
8324 )
8325 })
8326 .map(|p| {
8327 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8328 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8329 } else {
8331 avg_contract_value
8332 };
8333 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8335 })
8336 .collect();
8337
8338 if !contract_values.is_empty() {
8339 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8340 snapshot.revenue_records = rev_gen.generate(
8341 &snapshot.projects,
8342 &snapshot.cost_lines,
8343 &contract_values,
8344 start_date,
8345 end_date,
8346 );
8347 debug!(
8348 "Generated {} revenue recognition records for {} customer projects",
8349 snapshot.revenue_records.len(),
8350 contract_values.len()
8351 );
8352 }
8353 }
8354
8355 stats.project_count = snapshot.projects.len();
8356 stats.project_change_order_count = snapshot.change_orders.len();
8357 stats.project_cost_line_count = snapshot.cost_lines.len();
8358
8359 info!(
8360 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8361 snapshot.projects.len(),
8362 snapshot.change_orders.len(),
8363 snapshot.milestones.len(),
8364 snapshot.earned_value_metrics.len()
8365 );
8366 self.check_resources_with_log("post-project-accounting")?;
8367
8368 Ok(snapshot)
8369 }
8370
8371 fn phase_evolution_events(
8373 &mut self,
8374 stats: &mut EnhancedGenerationStatistics,
8375 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8376 if !self.phase_config.generate_evolution_events {
8377 debug!("Phase 24: Skipped (evolution events disabled)");
8378 return Ok((Vec::new(), Vec::new()));
8379 }
8380 info!("Phase 24: Generating Process Evolution + Organizational Events");
8381
8382 let seed = self.seed;
8383 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8384 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8385 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8386
8387 let mut proc_gen =
8389 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8390 seed + 100,
8391 );
8392 let process_events = proc_gen.generate_events(start_date, end_date);
8393
8394 let company_codes: Vec<String> = self
8396 .config
8397 .companies
8398 .iter()
8399 .map(|c| c.code.clone())
8400 .collect();
8401 let mut org_gen =
8402 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8403 seed + 101,
8404 );
8405 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8406
8407 stats.process_evolution_event_count = process_events.len();
8408 stats.organizational_event_count = org_events.len();
8409
8410 info!(
8411 "Evolution events generated: {} process evolution, {} organizational",
8412 process_events.len(),
8413 org_events.len()
8414 );
8415 self.check_resources_with_log("post-evolution-events")?;
8416
8417 Ok((process_events, org_events))
8418 }
8419
8420 fn phase_disruption_events(
8423 &self,
8424 stats: &mut EnhancedGenerationStatistics,
8425 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8426 if !self.config.organizational_events.enabled {
8427 debug!("Phase 24b: Skipped (organizational events disabled)");
8428 return Ok(Vec::new());
8429 }
8430 info!("Phase 24b: Generating Disruption Events");
8431
8432 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8433 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8434 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8435
8436 let company_codes: Vec<String> = self
8437 .config
8438 .companies
8439 .iter()
8440 .map(|c| c.code.clone())
8441 .collect();
8442
8443 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8444 let events = gen.generate(start_date, end_date, &company_codes);
8445
8446 stats.disruption_event_count = events.len();
8447 info!("Disruption events generated: {} events", events.len());
8448 self.check_resources_with_log("post-disruption-events")?;
8449
8450 Ok(events)
8451 }
8452
8453 fn phase_counterfactuals(
8460 &self,
8461 journal_entries: &[JournalEntry],
8462 stats: &mut EnhancedGenerationStatistics,
8463 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8464 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8465 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8466 return Ok(Vec::new());
8467 }
8468 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8469
8470 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8471
8472 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8473
8474 let specs = [
8476 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8477 CounterfactualSpec::ShiftDate { days: -14 },
8478 CounterfactualSpec::SelfApprove,
8479 CounterfactualSpec::SplitTransaction { split_count: 3 },
8480 ];
8481
8482 let pairs: Vec<_> = journal_entries
8483 .iter()
8484 .enumerate()
8485 .map(|(i, je)| {
8486 let spec = &specs[i % specs.len()];
8487 gen.generate(je, spec)
8488 })
8489 .collect();
8490
8491 stats.counterfactual_pair_count = pairs.len();
8492 info!(
8493 "Counterfactual pairs generated: {} pairs from {} journal entries",
8494 pairs.len(),
8495 journal_entries.len()
8496 );
8497 self.check_resources_with_log("post-counterfactuals")?;
8498
8499 Ok(pairs)
8500 }
8501
8502 fn phase_red_flags(
8509 &self,
8510 anomaly_labels: &AnomalyLabels,
8511 document_flows: &DocumentFlowSnapshot,
8512 stats: &mut EnhancedGenerationStatistics,
8513 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8514 if !self.config.fraud.enabled {
8515 debug!("Phase 26: Skipped (fraud generation disabled)");
8516 return Ok(Vec::new());
8517 }
8518 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8519
8520 use datasynth_generators::fraud::RedFlagGenerator;
8521
8522 let generator = RedFlagGenerator::new();
8523 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8524
8525 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8527 .labels
8528 .iter()
8529 .filter(|label| label.anomaly_type.is_intentional())
8530 .map(|label| label.document_id.as_str())
8531 .collect();
8532
8533 let mut flags = Vec::new();
8534
8535 for chain in &document_flows.p2p_chains {
8537 let doc_id = &chain.purchase_order.header.document_id;
8538 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8539 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8540 }
8541
8542 for chain in &document_flows.o2c_chains {
8544 let doc_id = &chain.sales_order.header.document_id;
8545 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8546 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8547 }
8548
8549 stats.red_flag_count = flags.len();
8550 info!(
8551 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8552 flags.len(),
8553 document_flows.p2p_chains.len(),
8554 document_flows.o2c_chains.len(),
8555 fraud_doc_ids.len()
8556 );
8557 self.check_resources_with_log("post-red-flags")?;
8558
8559 Ok(flags)
8560 }
8561
8562 fn phase_collusion_rings(
8568 &mut self,
8569 stats: &mut EnhancedGenerationStatistics,
8570 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8571 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8572 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8573 return Ok(Vec::new());
8574 }
8575 info!("Phase 26b: Generating Collusion Rings");
8576
8577 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8578 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8579 let months = self.config.global.period_months;
8580
8581 let employee_ids: Vec<String> = self
8582 .master_data
8583 .employees
8584 .iter()
8585 .map(|e| e.employee_id.clone())
8586 .collect();
8587 let vendor_ids: Vec<String> = self
8588 .master_data
8589 .vendors
8590 .iter()
8591 .map(|v| v.vendor_id.clone())
8592 .collect();
8593
8594 let mut generator =
8595 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8596 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8597
8598 stats.collusion_ring_count = rings.len();
8599 info!(
8600 "Collusion rings generated: {} rings, total members: {}",
8601 rings.len(),
8602 rings
8603 .iter()
8604 .map(datasynth_generators::fraud::CollusionRing::size)
8605 .sum::<usize>()
8606 );
8607 self.check_resources_with_log("post-collusion-rings")?;
8608
8609 Ok(rings)
8610 }
8611
8612 fn phase_temporal_attributes(
8617 &mut self,
8618 stats: &mut EnhancedGenerationStatistics,
8619 ) -> SynthResult<
8620 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8621 > {
8622 if !self.config.temporal_attributes.enabled {
8623 debug!("Phase 27: Skipped (temporal attributes disabled)");
8624 return Ok(Vec::new());
8625 }
8626 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8627
8628 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8629 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8630
8631 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8635 || self.config.temporal_attributes.enabled;
8636 let temporal_config = {
8637 let ta = &self.config.temporal_attributes;
8638 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8639 .enabled(ta.enabled)
8640 .closed_probability(ta.valid_time.closed_probability)
8641 .avg_validity_days(ta.valid_time.avg_validity_days)
8642 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8643 .with_version_chains(if generate_version_chains {
8644 ta.avg_versions_per_entity
8645 } else {
8646 1.0
8647 })
8648 .build()
8649 };
8650 let temporal_config = if self
8652 .config
8653 .temporal_attributes
8654 .transaction_time
8655 .allow_backdating
8656 {
8657 let mut c = temporal_config;
8658 c.transaction_time.allow_backdating = true;
8659 c.transaction_time.backdating_probability = self
8660 .config
8661 .temporal_attributes
8662 .transaction_time
8663 .backdating_probability;
8664 c.transaction_time.max_backdate_days = self
8665 .config
8666 .temporal_attributes
8667 .transaction_time
8668 .max_backdate_days;
8669 c
8670 } else {
8671 temporal_config
8672 };
8673 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8674 temporal_config,
8675 self.seed + 130,
8676 start_date,
8677 );
8678
8679 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8680 self.seed + 130,
8681 datasynth_core::GeneratorType::Vendor,
8682 );
8683
8684 let chains: Vec<_> = self
8685 .master_data
8686 .vendors
8687 .iter()
8688 .map(|vendor| {
8689 let id = uuid_factory.next();
8690 gen.generate_version_chain(vendor.clone(), id)
8691 })
8692 .collect();
8693
8694 stats.temporal_version_chain_count = chains.len();
8695 info!("Temporal version chains generated: {} chains", chains.len());
8696 self.check_resources_with_log("post-temporal-attributes")?;
8697
8698 Ok(chains)
8699 }
8700
8701 fn phase_entity_relationships(
8711 &self,
8712 journal_entries: &[JournalEntry],
8713 document_flows: &DocumentFlowSnapshot,
8714 stats: &mut EnhancedGenerationStatistics,
8715 ) -> SynthResult<(
8716 Option<datasynth_core::models::EntityGraph>,
8717 Vec<datasynth_core::models::CrossProcessLink>,
8718 )> {
8719 use datasynth_generators::relationships::{
8720 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8721 TransactionSummary,
8722 };
8723
8724 let rs_enabled = self.config.relationship_strength.enabled;
8725 let cpl_enabled = self.config.cross_process_links.enabled
8726 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8727
8728 if !rs_enabled && !cpl_enabled {
8729 debug!(
8730 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8731 );
8732 return Ok((None, Vec::new()));
8733 }
8734
8735 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8736
8737 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8738 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8739
8740 let company_code = self
8741 .config
8742 .companies
8743 .first()
8744 .map(|c| c.code.as_str())
8745 .unwrap_or("1000");
8746
8747 let gen_config = EntityGraphConfig {
8749 enabled: rs_enabled,
8750 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8751 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8752 enable_return_flows: false,
8753 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8754 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8755 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8757 1.0
8758 } else {
8759 0.30
8760 },
8761 ..Default::default()
8762 },
8763 strength_config: datasynth_generators::relationships::StrengthConfig {
8764 transaction_volume_weight: self
8765 .config
8766 .relationship_strength
8767 .calculation
8768 .transaction_volume_weight,
8769 transaction_count_weight: self
8770 .config
8771 .relationship_strength
8772 .calculation
8773 .transaction_count_weight,
8774 duration_weight: self
8775 .config
8776 .relationship_strength
8777 .calculation
8778 .relationship_duration_weight,
8779 recency_weight: self.config.relationship_strength.calculation.recency_weight,
8780 mutual_connections_weight: self
8781 .config
8782 .relationship_strength
8783 .calculation
8784 .mutual_connections_weight,
8785 recency_half_life_days: self
8786 .config
8787 .relationship_strength
8788 .calculation
8789 .recency_half_life_days,
8790 },
8791 ..Default::default()
8792 };
8793
8794 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8795
8796 let entity_graph = if rs_enabled {
8798 let vendor_summaries: Vec<EntitySummary> = self
8800 .master_data
8801 .vendors
8802 .iter()
8803 .map(|v| {
8804 EntitySummary::new(
8805 &v.vendor_id,
8806 &v.name,
8807 datasynth_core::models::GraphEntityType::Vendor,
8808 start_date,
8809 )
8810 })
8811 .collect();
8812
8813 let customer_summaries: Vec<EntitySummary> = self
8814 .master_data
8815 .customers
8816 .iter()
8817 .map(|c| {
8818 EntitySummary::new(
8819 &c.customer_id,
8820 &c.name,
8821 datasynth_core::models::GraphEntityType::Customer,
8822 start_date,
8823 )
8824 })
8825 .collect();
8826
8827 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8832 std::collections::HashMap::new();
8833
8834 for je in journal_entries {
8835 let cc = je.header.company_code.clone();
8836 let posting_date = je.header.posting_date;
8837 for line in &je.lines {
8838 if let Some(ref tp) = line.trading_partner {
8839 let amount = if line.debit_amount > line.credit_amount {
8840 line.debit_amount
8841 } else {
8842 line.credit_amount
8843 };
8844 let entry = txn_summaries
8845 .entry((cc.clone(), tp.clone()))
8846 .or_insert_with(|| TransactionSummary {
8847 total_volume: rust_decimal::Decimal::ZERO,
8848 transaction_count: 0,
8849 first_transaction_date: posting_date,
8850 last_transaction_date: posting_date,
8851 related_entities: std::collections::HashSet::new(),
8852 });
8853 entry.total_volume += amount;
8854 entry.transaction_count += 1;
8855 if posting_date < entry.first_transaction_date {
8856 entry.first_transaction_date = posting_date;
8857 }
8858 if posting_date > entry.last_transaction_date {
8859 entry.last_transaction_date = posting_date;
8860 }
8861 entry.related_entities.insert(cc.clone());
8862 }
8863 }
8864 }
8865
8866 for chain in &document_flows.p2p_chains {
8869 let cc = chain.purchase_order.header.company_code.clone();
8870 let vendor_id = chain.purchase_order.vendor_id.clone();
8871 let po_date = chain.purchase_order.header.document_date;
8872 let amount = chain.purchase_order.total_net_amount;
8873
8874 let entry = txn_summaries
8875 .entry((cc.clone(), vendor_id))
8876 .or_insert_with(|| TransactionSummary {
8877 total_volume: rust_decimal::Decimal::ZERO,
8878 transaction_count: 0,
8879 first_transaction_date: po_date,
8880 last_transaction_date: po_date,
8881 related_entities: std::collections::HashSet::new(),
8882 });
8883 entry.total_volume += amount;
8884 entry.transaction_count += 1;
8885 if po_date < entry.first_transaction_date {
8886 entry.first_transaction_date = po_date;
8887 }
8888 if po_date > entry.last_transaction_date {
8889 entry.last_transaction_date = po_date;
8890 }
8891 entry.related_entities.insert(cc);
8892 }
8893
8894 for chain in &document_flows.o2c_chains {
8896 let cc = chain.sales_order.header.company_code.clone();
8897 let customer_id = chain.sales_order.customer_id.clone();
8898 let so_date = chain.sales_order.header.document_date;
8899 let amount = chain.sales_order.total_net_amount;
8900
8901 let entry = txn_summaries
8902 .entry((cc.clone(), customer_id))
8903 .or_insert_with(|| TransactionSummary {
8904 total_volume: rust_decimal::Decimal::ZERO,
8905 transaction_count: 0,
8906 first_transaction_date: so_date,
8907 last_transaction_date: so_date,
8908 related_entities: std::collections::HashSet::new(),
8909 });
8910 entry.total_volume += amount;
8911 entry.transaction_count += 1;
8912 if so_date < entry.first_transaction_date {
8913 entry.first_transaction_date = so_date;
8914 }
8915 if so_date > entry.last_transaction_date {
8916 entry.last_transaction_date = so_date;
8917 }
8918 entry.related_entities.insert(cc);
8919 }
8920
8921 let as_of_date = journal_entries
8922 .last()
8923 .map(|je| je.header.posting_date)
8924 .unwrap_or(start_date);
8925
8926 let graph = gen.generate_entity_graph(
8927 company_code,
8928 as_of_date,
8929 &vendor_summaries,
8930 &customer_summaries,
8931 &txn_summaries,
8932 );
8933
8934 info!(
8935 "Entity relationship graph: {} nodes, {} edges",
8936 graph.nodes.len(),
8937 graph.edges.len()
8938 );
8939 stats.entity_relationship_node_count = graph.nodes.len();
8940 stats.entity_relationship_edge_count = graph.edges.len();
8941 Some(graph)
8942 } else {
8943 None
8944 };
8945
8946 let cross_process_links = if cpl_enabled {
8948 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8950 .p2p_chains
8951 .iter()
8952 .flat_map(|chain| {
8953 let vendor_id = chain.purchase_order.vendor_id.clone();
8954 let cc = chain.purchase_order.header.company_code.clone();
8955 chain.goods_receipts.iter().flat_map(move |gr| {
8956 gr.items.iter().filter_map({
8957 let doc_id = gr.header.document_id.clone();
8958 let v_id = vendor_id.clone();
8959 let company = cc.clone();
8960 let receipt_date = gr.header.document_date;
8961 move |item| {
8962 item.base
8963 .material_id
8964 .as_ref()
8965 .map(|mat_id| GoodsReceiptRef {
8966 document_id: doc_id.clone(),
8967 material_id: mat_id.clone(),
8968 quantity: item.base.quantity,
8969 receipt_date,
8970 vendor_id: v_id.clone(),
8971 company_code: company.clone(),
8972 })
8973 }
8974 })
8975 })
8976 })
8977 .collect();
8978
8979 let del_refs: Vec<DeliveryRef> = document_flows
8981 .o2c_chains
8982 .iter()
8983 .flat_map(|chain| {
8984 let customer_id = chain.sales_order.customer_id.clone();
8985 let cc = chain.sales_order.header.company_code.clone();
8986 chain.deliveries.iter().flat_map(move |del| {
8987 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8988 del.items.iter().filter_map({
8989 let doc_id = del.header.document_id.clone();
8990 let c_id = customer_id.clone();
8991 let company = cc.clone();
8992 move |item| {
8993 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8994 document_id: doc_id.clone(),
8995 material_id: mat_id.clone(),
8996 quantity: item.base.quantity,
8997 delivery_date,
8998 customer_id: c_id.clone(),
8999 company_code: company.clone(),
9000 })
9001 }
9002 })
9003 })
9004 })
9005 .collect();
9006
9007 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9008 info!("Cross-process links generated: {} links", links.len());
9009 stats.cross_process_link_count = links.len();
9010 links
9011 } else {
9012 Vec::new()
9013 };
9014
9015 self.check_resources_with_log("post-entity-relationships")?;
9016 Ok((entity_graph, cross_process_links))
9017 }
9018
9019 fn phase_industry_data(
9021 &self,
9022 stats: &mut EnhancedGenerationStatistics,
9023 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9024 if !self.config.industry_specific.enabled {
9025 return None;
9026 }
9027 info!("Phase 29: Generating industry-specific data");
9028 let output = datasynth_generators::industry::factory::generate_industry_output(
9029 self.config.global.industry,
9030 );
9031 stats.industry_gl_account_count = output.gl_accounts.len();
9032 info!(
9033 "Industry data generated: {} GL accounts for {:?}",
9034 output.gl_accounts.len(),
9035 self.config.global.industry
9036 );
9037 Some(output)
9038 }
9039
9040 fn phase_opening_balances(
9042 &mut self,
9043 coa: &Arc<ChartOfAccounts>,
9044 stats: &mut EnhancedGenerationStatistics,
9045 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9046 if !self.config.balance.generate_opening_balances {
9047 debug!("Phase 3b: Skipped (opening balance generation disabled)");
9048 return Ok(Vec::new());
9049 }
9050 info!("Phase 3b: Generating Opening Balances");
9051
9052 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9053 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9054 let fiscal_year = start_date.year();
9055
9056 let industry = match self.config.global.industry {
9057 IndustrySector::Manufacturing => IndustryType::Manufacturing,
9058 IndustrySector::Retail => IndustryType::Retail,
9059 IndustrySector::FinancialServices => IndustryType::Financial,
9060 IndustrySector::Healthcare => IndustryType::Healthcare,
9061 IndustrySector::Technology => IndustryType::Technology,
9062 _ => IndustryType::Manufacturing,
9063 };
9064
9065 let config = datasynth_generators::OpeningBalanceConfig {
9066 industry,
9067 ..Default::default()
9068 };
9069 let mut gen =
9070 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9071
9072 let mut results = Vec::new();
9073 for company in &self.config.companies {
9074 let spec = OpeningBalanceSpec::new(
9075 company.code.clone(),
9076 start_date,
9077 fiscal_year,
9078 company.currency.clone(),
9079 rust_decimal::Decimal::new(10_000_000, 0),
9080 industry,
9081 );
9082 let ob = gen.generate(&spec, coa, start_date, &company.code);
9083 results.push(ob);
9084 }
9085
9086 stats.opening_balance_count = results.len();
9087 info!("Opening balances generated: {} companies", results.len());
9088 self.check_resources_with_log("post-opening-balances")?;
9089
9090 Ok(results)
9091 }
9092
9093 fn phase_subledger_reconciliation(
9095 &mut self,
9096 subledger: &SubledgerSnapshot,
9097 entries: &[JournalEntry],
9098 stats: &mut EnhancedGenerationStatistics,
9099 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9100 if !self.config.balance.reconcile_subledgers {
9101 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9102 return Ok(Vec::new());
9103 }
9104 info!("Phase 9b: Reconciling GL to subledger balances");
9105
9106 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9107 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9108 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9109
9110 let tracker_config = BalanceTrackerConfig {
9112 validate_on_each_entry: false,
9113 track_history: false,
9114 fail_on_validation_error: false,
9115 ..Default::default()
9116 };
9117 let recon_currency = self
9118 .config
9119 .companies
9120 .first()
9121 .map(|c| c.currency.clone())
9122 .unwrap_or_else(|| "USD".to_string());
9123 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9124 let validation_errors = tracker.apply_entries(entries);
9125 if !validation_errors.is_empty() {
9126 warn!(
9127 error_count = validation_errors.len(),
9128 "Balance tracker encountered validation errors during subledger reconciliation"
9129 );
9130 for err in &validation_errors {
9131 debug!("Balance validation error: {:?}", err);
9132 }
9133 }
9134
9135 let mut engine = datasynth_generators::ReconciliationEngine::new(
9136 datasynth_generators::ReconciliationConfig::default(),
9137 );
9138
9139 let mut results = Vec::new();
9140 let company_code = self
9141 .config
9142 .companies
9143 .first()
9144 .map(|c| c.code.as_str())
9145 .unwrap_or("1000");
9146
9147 if !subledger.ar_invoices.is_empty() {
9149 let gl_balance = tracker
9150 .get_account_balance(
9151 company_code,
9152 datasynth_core::accounts::control_accounts::AR_CONTROL,
9153 )
9154 .map(|b| b.closing_balance)
9155 .unwrap_or_default();
9156 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9157 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9158 }
9159
9160 if !subledger.ap_invoices.is_empty() {
9162 let gl_balance = tracker
9163 .get_account_balance(
9164 company_code,
9165 datasynth_core::accounts::control_accounts::AP_CONTROL,
9166 )
9167 .map(|b| b.closing_balance)
9168 .unwrap_or_default();
9169 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9170 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9171 }
9172
9173 if !subledger.fa_records.is_empty() {
9175 let gl_asset_balance = tracker
9176 .get_account_balance(
9177 company_code,
9178 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9179 )
9180 .map(|b| b.closing_balance)
9181 .unwrap_or_default();
9182 let gl_accum_depr_balance = tracker
9183 .get_account_balance(
9184 company_code,
9185 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9186 )
9187 .map(|b| b.closing_balance)
9188 .unwrap_or_default();
9189 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9190 subledger.fa_records.iter().collect();
9191 let (asset_recon, depr_recon) = engine.reconcile_fa(
9192 company_code,
9193 end_date,
9194 gl_asset_balance,
9195 gl_accum_depr_balance,
9196 &fa_refs,
9197 );
9198 results.push(asset_recon);
9199 results.push(depr_recon);
9200 }
9201
9202 if !subledger.inventory_positions.is_empty() {
9204 let gl_balance = tracker
9205 .get_account_balance(
9206 company_code,
9207 datasynth_core::accounts::control_accounts::INVENTORY,
9208 )
9209 .map(|b| b.closing_balance)
9210 .unwrap_or_default();
9211 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9212 subledger.inventory_positions.iter().collect();
9213 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9214 }
9215
9216 stats.subledger_reconciliation_count = results.len();
9217 let passed = results.iter().filter(|r| r.is_balanced()).count();
9218 let failed = results.len() - passed;
9219 info!(
9220 "Subledger reconciliation: {} checks, {} passed, {} failed",
9221 results.len(),
9222 passed,
9223 failed
9224 );
9225 self.check_resources_with_log("post-subledger-reconciliation")?;
9226
9227 Ok(results)
9228 }
9229
9230 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9232 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9233
9234 let coa_framework = self.resolve_coa_framework();
9235
9236 let mut gen = ChartOfAccountsGenerator::new(
9237 self.config.chart_of_accounts.complexity,
9238 self.config.global.industry,
9239 self.seed,
9240 )
9241 .with_coa_framework(coa_framework);
9242
9243 let coa = Arc::new(gen.generate());
9244 self.coa = Some(Arc::clone(&coa));
9245
9246 if let Some(pb) = pb {
9247 pb.finish_with_message("Chart of Accounts complete");
9248 }
9249
9250 Ok(coa)
9251 }
9252
9253 fn generate_master_data(&mut self) -> SynthResult<()> {
9255 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9256 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9257 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9258
9259 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9261
9262 let pack = self.primary_pack().clone();
9264
9265 let vendors_per_company = self.phase_config.vendors_per_company;
9267 let customers_per_company = self.phase_config.customers_per_company;
9268 let materials_per_company = self.phase_config.materials_per_company;
9269 let assets_per_company = self.phase_config.assets_per_company;
9270 let coa_framework = self.resolve_coa_framework();
9271
9272 let per_company_results: Vec<_> = self
9275 .config
9276 .companies
9277 .par_iter()
9278 .enumerate()
9279 .map(|(i, company)| {
9280 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9281 let pack = pack.clone();
9282
9283 let mut vendor_gen = VendorGenerator::new(company_seed);
9285 vendor_gen.set_country_pack(pack.clone());
9286 vendor_gen.set_coa_framework(coa_framework);
9287 vendor_gen.set_counter_offset(i * vendors_per_company);
9288 if self.config.vendor_network.enabled {
9290 let vn = &self.config.vendor_network;
9291 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9292 enabled: true,
9293 depth: vn.depth,
9294 tier1_count: datasynth_generators::TierCountConfig::new(
9295 vn.tier1.min,
9296 vn.tier1.max,
9297 ),
9298 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9299 vn.tier2_per_parent.min,
9300 vn.tier2_per_parent.max,
9301 ),
9302 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9303 vn.tier3_per_parent.min,
9304 vn.tier3_per_parent.max,
9305 ),
9306 cluster_distribution: datasynth_generators::ClusterDistribution {
9307 reliable_strategic: vn.clusters.reliable_strategic,
9308 standard_operational: vn.clusters.standard_operational,
9309 transactional: vn.clusters.transactional,
9310 problematic: vn.clusters.problematic,
9311 },
9312 concentration_limits: datasynth_generators::ConcentrationLimits {
9313 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9314 max_top5: vn.dependencies.top_5_concentration,
9315 },
9316 ..datasynth_generators::VendorNetworkConfig::default()
9317 });
9318 }
9319 let vendor_pool =
9320 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9321
9322 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9324 customer_gen.set_country_pack(pack.clone());
9325 customer_gen.set_coa_framework(coa_framework);
9326 customer_gen.set_counter_offset(i * customers_per_company);
9327 if self.config.customer_segmentation.enabled {
9329 let cs = &self.config.customer_segmentation;
9330 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9331 enabled: true,
9332 segment_distribution: datasynth_generators::SegmentDistribution {
9333 enterprise: cs.value_segments.enterprise.customer_share,
9334 mid_market: cs.value_segments.mid_market.customer_share,
9335 smb: cs.value_segments.smb.customer_share,
9336 consumer: cs.value_segments.consumer.customer_share,
9337 },
9338 referral_config: datasynth_generators::ReferralConfig {
9339 enabled: cs.networks.referrals.enabled,
9340 referral_rate: cs.networks.referrals.referral_rate,
9341 ..Default::default()
9342 },
9343 hierarchy_config: datasynth_generators::HierarchyConfig {
9344 enabled: cs.networks.corporate_hierarchies.enabled,
9345 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9346 ..Default::default()
9347 },
9348 ..Default::default()
9349 };
9350 customer_gen.set_segmentation_config(seg_cfg);
9351 }
9352 let customer_pool = customer_gen.generate_customer_pool(
9353 customers_per_company,
9354 &company.code,
9355 start_date,
9356 );
9357
9358 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9360 material_gen.set_country_pack(pack.clone());
9361 material_gen.set_counter_offset(i * materials_per_company);
9362 let material_pool = material_gen.generate_material_pool(
9363 materials_per_company,
9364 &company.code,
9365 start_date,
9366 );
9367
9368 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9370 let asset_pool = asset_gen.generate_asset_pool(
9371 assets_per_company,
9372 &company.code,
9373 (start_date, end_date),
9374 );
9375
9376 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9378 employee_gen.set_country_pack(pack);
9379 let employee_pool =
9380 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9381
9382 let employee_change_history =
9384 employee_gen.generate_all_change_history(&employee_pool, end_date);
9385
9386 let employee_ids: Vec<String> = employee_pool
9388 .employees
9389 .iter()
9390 .map(|e| e.employee_id.clone())
9391 .collect();
9392 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9393 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9394
9395 (
9396 vendor_pool.vendors,
9397 customer_pool.customers,
9398 material_pool.materials,
9399 asset_pool.assets,
9400 employee_pool.employees,
9401 employee_change_history,
9402 cost_centers,
9403 )
9404 })
9405 .collect();
9406
9407 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9409 per_company_results
9410 {
9411 self.master_data.vendors.extend(vendors);
9412 self.master_data.customers.extend(customers);
9413 self.master_data.materials.extend(materials);
9414 self.master_data.assets.extend(assets);
9415 self.master_data.employees.extend(employees);
9416 self.master_data.cost_centers.extend(cost_centers);
9417 self.master_data
9418 .employee_change_history
9419 .extend(change_history);
9420 }
9421
9422 if let Some(pb) = &pb {
9423 pb.inc(total);
9424 }
9425 if let Some(pb) = pb {
9426 pb.finish_with_message("Master data generation complete");
9427 }
9428
9429 Ok(())
9430 }
9431
9432 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9434 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9435 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9436
9437 let months = (self.config.global.period_months as usize).max(1);
9440 let p2p_count = self
9441 .phase_config
9442 .p2p_chains
9443 .min(self.master_data.vendors.len() * 2 * months);
9444 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9445
9446 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9448 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9449 p2p_gen.set_country_pack(self.primary_pack().clone());
9450
9451 for i in 0..p2p_count {
9452 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9453 let materials: Vec<&Material> = self
9454 .master_data
9455 .materials
9456 .iter()
9457 .skip(i % self.master_data.materials.len().max(1))
9458 .take(2.min(self.master_data.materials.len()))
9459 .collect();
9460
9461 if materials.is_empty() {
9462 continue;
9463 }
9464
9465 let company = &self.config.companies[i % self.config.companies.len()];
9466 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9467 let fiscal_period = po_date.month() as u8;
9468 let created_by = if self.master_data.employees.is_empty() {
9469 "SYSTEM"
9470 } else {
9471 self.master_data.employees[i % self.master_data.employees.len()]
9472 .user_id
9473 .as_str()
9474 };
9475
9476 let chain = p2p_gen.generate_chain(
9477 &company.code,
9478 vendor,
9479 &materials,
9480 po_date,
9481 start_date.year() as u16,
9482 fiscal_period,
9483 created_by,
9484 );
9485
9486 flows.purchase_orders.push(chain.purchase_order.clone());
9488 flows.goods_receipts.extend(chain.goods_receipts.clone());
9489 if let Some(vi) = &chain.vendor_invoice {
9490 flows.vendor_invoices.push(vi.clone());
9491 }
9492 if let Some(payment) = &chain.payment {
9493 flows.payments.push(payment.clone());
9494 }
9495 for remainder in &chain.remainder_payments {
9496 flows.payments.push(remainder.clone());
9497 }
9498 flows.p2p_chains.push(chain);
9499
9500 if let Some(pb) = &pb {
9501 pb.inc(1);
9502 }
9503 }
9504
9505 if let Some(pb) = pb {
9506 pb.finish_with_message("P2P document flows complete");
9507 }
9508
9509 let o2c_count = self
9512 .phase_config
9513 .o2c_chains
9514 .min(self.master_data.customers.len() * 2 * months);
9515 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9516
9517 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9519 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9520 o2c_gen.set_country_pack(self.primary_pack().clone());
9521
9522 for i in 0..o2c_count {
9523 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9524 let materials: Vec<&Material> = self
9525 .master_data
9526 .materials
9527 .iter()
9528 .skip(i % self.master_data.materials.len().max(1))
9529 .take(2.min(self.master_data.materials.len()))
9530 .collect();
9531
9532 if materials.is_empty() {
9533 continue;
9534 }
9535
9536 let company = &self.config.companies[i % self.config.companies.len()];
9537 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9538 let fiscal_period = so_date.month() as u8;
9539 let created_by = if self.master_data.employees.is_empty() {
9540 "SYSTEM"
9541 } else {
9542 self.master_data.employees[i % self.master_data.employees.len()]
9543 .user_id
9544 .as_str()
9545 };
9546
9547 let chain = o2c_gen.generate_chain(
9548 &company.code,
9549 customer,
9550 &materials,
9551 so_date,
9552 start_date.year() as u16,
9553 fiscal_period,
9554 created_by,
9555 );
9556
9557 flows.sales_orders.push(chain.sales_order.clone());
9559 flows.deliveries.extend(chain.deliveries.clone());
9560 if let Some(ci) = &chain.customer_invoice {
9561 flows.customer_invoices.push(ci.clone());
9562 }
9563 if let Some(receipt) = &chain.customer_receipt {
9564 flows.payments.push(receipt.clone());
9565 }
9566 for receipt in &chain.remainder_receipts {
9568 flows.payments.push(receipt.clone());
9569 }
9570 flows.o2c_chains.push(chain);
9571
9572 if let Some(pb) = &pb {
9573 pb.inc(1);
9574 }
9575 }
9576
9577 if let Some(pb) = pb {
9578 pb.finish_with_message("O2C document flows complete");
9579 }
9580
9581 {
9585 let mut refs = Vec::new();
9586 for doc in &flows.purchase_orders {
9587 refs.extend(doc.header.document_references.iter().cloned());
9588 }
9589 for doc in &flows.goods_receipts {
9590 refs.extend(doc.header.document_references.iter().cloned());
9591 }
9592 for doc in &flows.vendor_invoices {
9593 refs.extend(doc.header.document_references.iter().cloned());
9594 }
9595 for doc in &flows.sales_orders {
9596 refs.extend(doc.header.document_references.iter().cloned());
9597 }
9598 for doc in &flows.deliveries {
9599 refs.extend(doc.header.document_references.iter().cloned());
9600 }
9601 for doc in &flows.customer_invoices {
9602 refs.extend(doc.header.document_references.iter().cloned());
9603 }
9604 for doc in &flows.payments {
9605 refs.extend(doc.header.document_references.iter().cloned());
9606 }
9607 debug!(
9608 "Collected {} document cross-references from document headers",
9609 refs.len()
9610 );
9611 flows.document_references = refs;
9612 }
9613
9614 Ok(())
9615 }
9616
9617 fn generate_journal_entries(
9619 &mut self,
9620 coa: &Arc<ChartOfAccounts>,
9621 ) -> SynthResult<Vec<JournalEntry>> {
9622 use datasynth_core::traits::ParallelGenerator;
9623
9624 let total = self.calculate_total_transactions();
9625 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9626
9627 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9628 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9629 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9630
9631 let company_codes: Vec<String> = self
9632 .config
9633 .companies
9634 .iter()
9635 .map(|c| c.code.clone())
9636 .collect();
9637
9638 let generator = JournalEntryGenerator::new_with_params(
9639 self.config.transactions.clone(),
9640 Arc::clone(coa),
9641 company_codes,
9642 start_date,
9643 end_date,
9644 self.seed,
9645 );
9646
9647 let je_pack = self.primary_pack();
9651
9652 let mut generator = generator
9653 .with_master_data(
9654 &self.master_data.vendors,
9655 &self.master_data.customers,
9656 &self.master_data.materials,
9657 )
9658 .with_country_pack_names(je_pack)
9659 .with_country_pack_temporal(
9660 self.config.temporal_patterns.clone(),
9661 self.seed + 200,
9662 je_pack,
9663 )
9664 .with_persona_errors(true)
9665 .with_fraud_config(self.config.fraud.clone());
9666
9667 if self.config.temporal.enabled {
9669 let drift_config = self.config.temporal.to_core_config();
9670 generator = generator.with_drift_config(drift_config, self.seed + 100);
9671 }
9672
9673 self.check_memory_limit()?;
9675
9676 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9678
9679 let entries = if total >= 10_000 && num_threads > 1 {
9683 let sub_generators = generator.split(num_threads);
9686 let entries_per_thread = total as usize / num_threads;
9687 let remainder = total as usize % num_threads;
9688
9689 let batches: Vec<Vec<JournalEntry>> = sub_generators
9690 .into_par_iter()
9691 .enumerate()
9692 .map(|(i, mut gen)| {
9693 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9694 gen.generate_batch(count)
9695 })
9696 .collect();
9697
9698 let entries = JournalEntryGenerator::merge_results(batches);
9700
9701 if let Some(pb) = &pb {
9702 pb.inc(total);
9703 }
9704 entries
9705 } else {
9706 let mut entries = Vec::with_capacity(total as usize);
9708 for _ in 0..total {
9709 let entry = generator.generate();
9710 entries.push(entry);
9711 if let Some(pb) = &pb {
9712 pb.inc(1);
9713 }
9714 }
9715 entries
9716 };
9717
9718 if let Some(pb) = pb {
9719 pb.finish_with_message("Journal entries complete");
9720 }
9721
9722 Ok(entries)
9723 }
9724
9725 fn generate_jes_from_document_flows(
9730 &mut self,
9731 flows: &DocumentFlowSnapshot,
9732 ) -> SynthResult<Vec<JournalEntry>> {
9733 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9734 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9735
9736 let je_config = match self.resolve_coa_framework() {
9737 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9738 CoAFramework::GermanSkr04 => {
9739 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9740 DocumentFlowJeConfig::from(&fa)
9741 }
9742 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9743 };
9744
9745 let populate_fec = je_config.populate_fec_fields;
9746 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9747
9748 if populate_fec {
9752 let mut aux_lookup = std::collections::HashMap::new();
9753 for vendor in &self.master_data.vendors {
9754 if let Some(ref aux) = vendor.auxiliary_gl_account {
9755 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9756 }
9757 }
9758 for customer in &self.master_data.customers {
9759 if let Some(ref aux) = customer.auxiliary_gl_account {
9760 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9761 }
9762 }
9763 if !aux_lookup.is_empty() {
9764 generator.set_auxiliary_account_lookup(aux_lookup);
9765 }
9766 }
9767
9768 let mut entries = Vec::new();
9769
9770 for chain in &flows.p2p_chains {
9772 let chain_entries = generator.generate_from_p2p_chain(chain);
9773 entries.extend(chain_entries);
9774 if let Some(pb) = &pb {
9775 pb.inc(1);
9776 }
9777 }
9778
9779 for chain in &flows.o2c_chains {
9781 let chain_entries = generator.generate_from_o2c_chain(chain);
9782 entries.extend(chain_entries);
9783 if let Some(pb) = &pb {
9784 pb.inc(1);
9785 }
9786 }
9787
9788 if let Some(pb) = pb {
9789 pb.finish_with_message(format!(
9790 "Generated {} JEs from document flows",
9791 entries.len()
9792 ));
9793 }
9794
9795 Ok(entries)
9796 }
9797
9798 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9804 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9805
9806 let mut jes = Vec::with_capacity(payroll_runs.len());
9807
9808 for run in payroll_runs {
9809 let mut je = JournalEntry::new_simple(
9810 format!("JE-PAYROLL-{}", run.payroll_id),
9811 run.company_code.clone(),
9812 run.run_date,
9813 format!("Payroll {}", run.payroll_id),
9814 );
9815
9816 je.add_line(JournalEntryLine {
9818 line_number: 1,
9819 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9820 debit_amount: run.total_gross,
9821 reference: Some(run.payroll_id.clone()),
9822 text: Some(format!(
9823 "Payroll {} ({} employees)",
9824 run.payroll_id, run.employee_count
9825 )),
9826 ..Default::default()
9827 });
9828
9829 je.add_line(JournalEntryLine {
9831 line_number: 2,
9832 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9833 credit_amount: run.total_gross,
9834 reference: Some(run.payroll_id.clone()),
9835 ..Default::default()
9836 });
9837
9838 jes.push(je);
9839 }
9840
9841 jes
9842 }
9843
9844 fn link_document_flows_to_subledgers(
9849 &mut self,
9850 flows: &DocumentFlowSnapshot,
9851 ) -> SynthResult<SubledgerSnapshot> {
9852 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9853 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9854
9855 let vendor_names: std::collections::HashMap<String, String> = self
9857 .master_data
9858 .vendors
9859 .iter()
9860 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9861 .collect();
9862 let customer_names: std::collections::HashMap<String, String> = self
9863 .master_data
9864 .customers
9865 .iter()
9866 .map(|c| (c.customer_id.clone(), c.name.clone()))
9867 .collect();
9868
9869 let mut linker = DocumentFlowLinker::new()
9870 .with_vendor_names(vendor_names)
9871 .with_customer_names(customer_names);
9872
9873 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9875 if let Some(pb) = &pb {
9876 pb.inc(flows.vendor_invoices.len() as u64);
9877 }
9878
9879 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9881 if let Some(pb) = &pb {
9882 pb.inc(flows.customer_invoices.len() as u64);
9883 }
9884
9885 if let Some(pb) = pb {
9886 pb.finish_with_message(format!(
9887 "Linked {} AP and {} AR invoices",
9888 ap_invoices.len(),
9889 ar_invoices.len()
9890 ));
9891 }
9892
9893 Ok(SubledgerSnapshot {
9894 ap_invoices,
9895 ar_invoices,
9896 fa_records: Vec::new(),
9897 inventory_positions: Vec::new(),
9898 inventory_movements: Vec::new(),
9899 ar_aging_reports: Vec::new(),
9901 ap_aging_reports: Vec::new(),
9902 depreciation_runs: Vec::new(),
9904 inventory_valuations: Vec::new(),
9905 dunning_runs: Vec::new(),
9907 dunning_letters: Vec::new(),
9908 })
9909 }
9910
9911 #[allow(clippy::too_many_arguments)]
9916 fn generate_ocpm_events(
9917 &mut self,
9918 flows: &DocumentFlowSnapshot,
9919 sourcing: &SourcingSnapshot,
9920 hr: &HrSnapshot,
9921 manufacturing: &ManufacturingSnapshot,
9922 banking: &BankingSnapshot,
9923 audit: &AuditSnapshot,
9924 financial_reporting: &FinancialReportingSnapshot,
9925 ) -> SynthResult<OcpmSnapshot> {
9926 let total_chains = flows.p2p_chains.len()
9927 + flows.o2c_chains.len()
9928 + sourcing.sourcing_projects.len()
9929 + hr.payroll_runs.len()
9930 + manufacturing.production_orders.len()
9931 + banking.customers.len()
9932 + audit.engagements.len()
9933 + financial_reporting.bank_reconciliations.len();
9934 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9935
9936 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9938 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9939
9940 let ocpm_config = OcpmGeneratorConfig {
9942 generate_p2p: true,
9943 generate_o2c: true,
9944 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9945 generate_h2r: !hr.payroll_runs.is_empty(),
9946 generate_mfg: !manufacturing.production_orders.is_empty(),
9947 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9948 generate_bank: !banking.customers.is_empty(),
9949 generate_audit: !audit.engagements.is_empty(),
9950 happy_path_rate: 0.75,
9951 exception_path_rate: 0.20,
9952 error_path_rate: 0.05,
9953 add_duration_variability: true,
9954 duration_std_dev_factor: 0.3,
9955 };
9956 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9957 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9958
9959 let available_users: Vec<String> = self
9961 .master_data
9962 .employees
9963 .iter()
9964 .take(20)
9965 .map(|e| e.user_id.clone())
9966 .collect();
9967
9968 let fallback_date =
9970 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9971 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9972 .unwrap_or(fallback_date);
9973 let base_midnight = base_date
9974 .and_hms_opt(0, 0, 0)
9975 .expect("midnight is always valid");
9976 let base_datetime =
9977 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9978
9979 let add_result = |event_log: &mut OcpmEventLog,
9981 result: datasynth_ocpm::CaseGenerationResult| {
9982 for event in result.events {
9983 event_log.add_event(event);
9984 }
9985 for object in result.objects {
9986 event_log.add_object(object);
9987 }
9988 for relationship in result.relationships {
9989 event_log.add_relationship(relationship);
9990 }
9991 for corr in result.correlation_events {
9992 event_log.add_correlation_event(corr);
9993 }
9994 event_log.add_case(result.case_trace);
9995 };
9996
9997 for chain in &flows.p2p_chains {
9999 let po = &chain.purchase_order;
10000 let documents = P2pDocuments::new(
10001 &po.header.document_id,
10002 &po.vendor_id,
10003 &po.header.company_code,
10004 po.total_net_amount,
10005 &po.header.currency,
10006 &ocpm_uuid_factory,
10007 )
10008 .with_goods_receipt(
10009 chain
10010 .goods_receipts
10011 .first()
10012 .map(|gr| gr.header.document_id.as_str())
10013 .unwrap_or(""),
10014 &ocpm_uuid_factory,
10015 )
10016 .with_invoice(
10017 chain
10018 .vendor_invoice
10019 .as_ref()
10020 .map(|vi| vi.header.document_id.as_str())
10021 .unwrap_or(""),
10022 &ocpm_uuid_factory,
10023 )
10024 .with_payment(
10025 chain
10026 .payment
10027 .as_ref()
10028 .map(|p| p.header.document_id.as_str())
10029 .unwrap_or(""),
10030 &ocpm_uuid_factory,
10031 );
10032
10033 let start_time =
10034 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10035 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10036 add_result(&mut event_log, result);
10037
10038 if let Some(pb) = &pb {
10039 pb.inc(1);
10040 }
10041 }
10042
10043 for chain in &flows.o2c_chains {
10045 let so = &chain.sales_order;
10046 let documents = O2cDocuments::new(
10047 &so.header.document_id,
10048 &so.customer_id,
10049 &so.header.company_code,
10050 so.total_net_amount,
10051 &so.header.currency,
10052 &ocpm_uuid_factory,
10053 )
10054 .with_delivery(
10055 chain
10056 .deliveries
10057 .first()
10058 .map(|d| d.header.document_id.as_str())
10059 .unwrap_or(""),
10060 &ocpm_uuid_factory,
10061 )
10062 .with_invoice(
10063 chain
10064 .customer_invoice
10065 .as_ref()
10066 .map(|ci| ci.header.document_id.as_str())
10067 .unwrap_or(""),
10068 &ocpm_uuid_factory,
10069 )
10070 .with_receipt(
10071 chain
10072 .customer_receipt
10073 .as_ref()
10074 .map(|r| r.header.document_id.as_str())
10075 .unwrap_or(""),
10076 &ocpm_uuid_factory,
10077 );
10078
10079 let start_time =
10080 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10081 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10082 add_result(&mut event_log, result);
10083
10084 if let Some(pb) = &pb {
10085 pb.inc(1);
10086 }
10087 }
10088
10089 for project in &sourcing.sourcing_projects {
10091 let vendor_id = sourcing
10093 .contracts
10094 .iter()
10095 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10096 .map(|c| c.vendor_id.clone())
10097 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10098 .or_else(|| {
10099 self.master_data
10100 .vendors
10101 .first()
10102 .map(|v| v.vendor_id.clone())
10103 })
10104 .unwrap_or_else(|| "V000".to_string());
10105 let mut docs = S2cDocuments::new(
10106 &project.project_id,
10107 &vendor_id,
10108 &project.company_code,
10109 project.estimated_annual_spend,
10110 &ocpm_uuid_factory,
10111 );
10112 if let Some(rfx) = sourcing
10114 .rfx_events
10115 .iter()
10116 .find(|r| r.sourcing_project_id == project.project_id)
10117 {
10118 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10119 if let Some(bid) = sourcing.bids.iter().find(|b| {
10121 b.rfx_id == rfx.rfx_id
10122 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10123 }) {
10124 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10125 }
10126 }
10127 if let Some(contract) = sourcing
10129 .contracts
10130 .iter()
10131 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10132 {
10133 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10134 }
10135 let start_time = base_datetime - chrono::Duration::days(90);
10136 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10137 add_result(&mut event_log, result);
10138
10139 if let Some(pb) = &pb {
10140 pb.inc(1);
10141 }
10142 }
10143
10144 for run in &hr.payroll_runs {
10146 let employee_id = hr
10148 .payroll_line_items
10149 .iter()
10150 .find(|li| li.payroll_id == run.payroll_id)
10151 .map(|li| li.employee_id.as_str())
10152 .unwrap_or("EMP000");
10153 let docs = H2rDocuments::new(
10154 &run.payroll_id,
10155 employee_id,
10156 &run.company_code,
10157 run.total_gross,
10158 &ocpm_uuid_factory,
10159 )
10160 .with_time_entries(
10161 hr.time_entries
10162 .iter()
10163 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10164 .take(5)
10165 .map(|t| t.entry_id.as_str())
10166 .collect(),
10167 );
10168 let start_time = base_datetime - chrono::Duration::days(30);
10169 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10170 add_result(&mut event_log, result);
10171
10172 if let Some(pb) = &pb {
10173 pb.inc(1);
10174 }
10175 }
10176
10177 for order in &manufacturing.production_orders {
10179 let mut docs = MfgDocuments::new(
10180 &order.order_id,
10181 &order.material_id,
10182 &order.company_code,
10183 order.planned_quantity,
10184 &ocpm_uuid_factory,
10185 )
10186 .with_operations(
10187 order
10188 .operations
10189 .iter()
10190 .map(|o| format!("OP-{:04}", o.operation_number))
10191 .collect::<Vec<_>>()
10192 .iter()
10193 .map(std::string::String::as_str)
10194 .collect(),
10195 );
10196 if let Some(insp) = manufacturing
10198 .quality_inspections
10199 .iter()
10200 .find(|i| i.reference_id == order.order_id)
10201 {
10202 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10203 }
10204 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10206 cc.items
10207 .iter()
10208 .any(|item| item.material_id == order.material_id)
10209 }) {
10210 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10211 }
10212 let start_time = base_datetime - chrono::Duration::days(60);
10213 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10214 add_result(&mut event_log, result);
10215
10216 if let Some(pb) = &pb {
10217 pb.inc(1);
10218 }
10219 }
10220
10221 for customer in &banking.customers {
10223 let customer_id_str = customer.customer_id.to_string();
10224 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10225 if let Some(account) = banking
10227 .accounts
10228 .iter()
10229 .find(|a| a.primary_owner_id == customer.customer_id)
10230 {
10231 let account_id_str = account.account_id.to_string();
10232 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10233 let txn_strs: Vec<String> = banking
10235 .transactions
10236 .iter()
10237 .filter(|t| t.account_id == account.account_id)
10238 .take(10)
10239 .map(|t| t.transaction_id.to_string())
10240 .collect();
10241 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10242 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10243 .transactions
10244 .iter()
10245 .filter(|t| t.account_id == account.account_id)
10246 .take(10)
10247 .map(|t| t.amount)
10248 .collect();
10249 if !txn_ids.is_empty() {
10250 docs = docs.with_transactions(txn_ids, txn_amounts);
10251 }
10252 }
10253 let start_time = base_datetime - chrono::Duration::days(180);
10254 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10255 add_result(&mut event_log, result);
10256
10257 if let Some(pb) = &pb {
10258 pb.inc(1);
10259 }
10260 }
10261
10262 for engagement in &audit.engagements {
10264 let engagement_id_str = engagement.engagement_id.to_string();
10265 let docs = AuditDocuments::new(
10266 &engagement_id_str,
10267 &engagement.client_entity_id,
10268 &ocpm_uuid_factory,
10269 )
10270 .with_workpapers(
10271 audit
10272 .workpapers
10273 .iter()
10274 .filter(|w| w.engagement_id == engagement.engagement_id)
10275 .take(10)
10276 .map(|w| w.workpaper_id.to_string())
10277 .collect::<Vec<_>>()
10278 .iter()
10279 .map(std::string::String::as_str)
10280 .collect(),
10281 )
10282 .with_evidence(
10283 audit
10284 .evidence
10285 .iter()
10286 .filter(|e| e.engagement_id == engagement.engagement_id)
10287 .take(10)
10288 .map(|e| e.evidence_id.to_string())
10289 .collect::<Vec<_>>()
10290 .iter()
10291 .map(std::string::String::as_str)
10292 .collect(),
10293 )
10294 .with_risks(
10295 audit
10296 .risk_assessments
10297 .iter()
10298 .filter(|r| r.engagement_id == engagement.engagement_id)
10299 .take(5)
10300 .map(|r| r.risk_id.to_string())
10301 .collect::<Vec<_>>()
10302 .iter()
10303 .map(std::string::String::as_str)
10304 .collect(),
10305 )
10306 .with_findings(
10307 audit
10308 .findings
10309 .iter()
10310 .filter(|f| f.engagement_id == engagement.engagement_id)
10311 .take(5)
10312 .map(|f| f.finding_id.to_string())
10313 .collect::<Vec<_>>()
10314 .iter()
10315 .map(std::string::String::as_str)
10316 .collect(),
10317 )
10318 .with_judgments(
10319 audit
10320 .judgments
10321 .iter()
10322 .filter(|j| j.engagement_id == engagement.engagement_id)
10323 .take(5)
10324 .map(|j| j.judgment_id.to_string())
10325 .collect::<Vec<_>>()
10326 .iter()
10327 .map(std::string::String::as_str)
10328 .collect(),
10329 );
10330 let start_time = base_datetime - chrono::Duration::days(120);
10331 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10332 add_result(&mut event_log, result);
10333
10334 if let Some(pb) = &pb {
10335 pb.inc(1);
10336 }
10337 }
10338
10339 for recon in &financial_reporting.bank_reconciliations {
10341 let docs = BankReconDocuments::new(
10342 &recon.reconciliation_id,
10343 &recon.bank_account_id,
10344 &recon.company_code,
10345 recon.bank_ending_balance,
10346 &ocpm_uuid_factory,
10347 )
10348 .with_statement_lines(
10349 recon
10350 .statement_lines
10351 .iter()
10352 .take(20)
10353 .map(|l| l.line_id.as_str())
10354 .collect(),
10355 )
10356 .with_reconciling_items(
10357 recon
10358 .reconciling_items
10359 .iter()
10360 .take(10)
10361 .map(|i| i.item_id.as_str())
10362 .collect(),
10363 );
10364 let start_time = base_datetime - chrono::Duration::days(30);
10365 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10366 add_result(&mut event_log, result);
10367
10368 if let Some(pb) = &pb {
10369 pb.inc(1);
10370 }
10371 }
10372
10373 event_log.compute_variants();
10375
10376 let summary = event_log.summary();
10377
10378 if let Some(pb) = pb {
10379 pb.finish_with_message(format!(
10380 "Generated {} OCPM events, {} objects",
10381 summary.event_count, summary.object_count
10382 ));
10383 }
10384
10385 Ok(OcpmSnapshot {
10386 event_count: summary.event_count,
10387 object_count: summary.object_count,
10388 case_count: summary.case_count,
10389 event_log: Some(event_log),
10390 })
10391 }
10392
10393 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10395 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10396
10397 let total_rate = if self.config.anomaly_injection.enabled {
10400 self.config.anomaly_injection.rates.total_rate
10401 } else if self.config.fraud.enabled {
10402 self.config.fraud.fraud_rate
10403 } else {
10404 0.02
10405 };
10406
10407 let fraud_rate = if self.config.anomaly_injection.enabled {
10408 self.config.anomaly_injection.rates.fraud_rate
10409 } else {
10410 AnomalyRateConfig::default().fraud_rate
10411 };
10412
10413 let error_rate = if self.config.anomaly_injection.enabled {
10414 self.config.anomaly_injection.rates.error_rate
10415 } else {
10416 AnomalyRateConfig::default().error_rate
10417 };
10418
10419 let process_issue_rate = if self.config.anomaly_injection.enabled {
10420 self.config.anomaly_injection.rates.process_rate
10421 } else {
10422 AnomalyRateConfig::default().process_issue_rate
10423 };
10424
10425 let anomaly_config = AnomalyInjectorConfig {
10426 rates: AnomalyRateConfig {
10427 total_rate,
10428 fraud_rate,
10429 error_rate,
10430 process_issue_rate,
10431 ..Default::default()
10432 },
10433 seed: self.seed + 5000,
10434 ..Default::default()
10435 };
10436
10437 let mut injector = AnomalyInjector::new(anomaly_config);
10438 let result = injector.process_entries(entries);
10439
10440 if let Some(pb) = &pb {
10441 pb.inc(entries.len() as u64);
10442 pb.finish_with_message("Anomaly injection complete");
10443 }
10444
10445 let mut by_type = HashMap::new();
10446 for label in &result.labels {
10447 *by_type
10448 .entry(format!("{:?}", label.anomaly_type))
10449 .or_insert(0) += 1;
10450 }
10451
10452 Ok(AnomalyLabels {
10453 labels: result.labels,
10454 summary: Some(result.summary),
10455 by_type,
10456 })
10457 }
10458
10459 fn validate_journal_entries(
10468 &mut self,
10469 entries: &[JournalEntry],
10470 ) -> SynthResult<BalanceValidationResult> {
10471 let clean_entries: Vec<&JournalEntry> = entries
10473 .iter()
10474 .filter(|e| {
10475 e.header
10476 .header_text
10477 .as_ref()
10478 .map(|t| !t.contains("[HUMAN_ERROR:"))
10479 .unwrap_or(true)
10480 })
10481 .collect();
10482
10483 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10484
10485 let config = BalanceTrackerConfig {
10487 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10491 };
10492 let validation_currency = self
10493 .config
10494 .companies
10495 .first()
10496 .map(|c| c.currency.clone())
10497 .unwrap_or_else(|| "USD".to_string());
10498
10499 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10500
10501 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10503 let errors = tracker.apply_entries(&clean_refs);
10504
10505 if let Some(pb) = &pb {
10506 pb.inc(entries.len() as u64);
10507 }
10508
10509 let has_unbalanced = tracker
10512 .get_validation_errors()
10513 .iter()
10514 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10515
10516 let mut all_errors = errors;
10519 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10520 let company_codes: Vec<String> = self
10521 .config
10522 .companies
10523 .iter()
10524 .map(|c| c.code.clone())
10525 .collect();
10526
10527 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10528 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10529 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10530
10531 for company_code in &company_codes {
10532 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10533 all_errors.push(e);
10534 }
10535 }
10536
10537 let stats = tracker.get_statistics();
10539
10540 let is_balanced = all_errors.is_empty();
10542
10543 if let Some(pb) = pb {
10544 let msg = if is_balanced {
10545 "Balance validation passed"
10546 } else {
10547 "Balance validation completed with errors"
10548 };
10549 pb.finish_with_message(msg);
10550 }
10551
10552 Ok(BalanceValidationResult {
10553 validated: true,
10554 is_balanced,
10555 entries_processed: stats.entries_processed,
10556 total_debits: stats.total_debits,
10557 total_credits: stats.total_credits,
10558 accounts_tracked: stats.accounts_tracked,
10559 companies_tracked: stats.companies_tracked,
10560 validation_errors: all_errors,
10561 has_unbalanced_entries: has_unbalanced,
10562 })
10563 }
10564
10565 fn inject_data_quality(
10570 &mut self,
10571 entries: &mut [JournalEntry],
10572 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10573 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10574
10575 let config = if self.config.data_quality.enabled {
10578 let dq = &self.config.data_quality;
10579 DataQualityConfig {
10580 enable_missing_values: dq.missing_values.enabled,
10581 missing_values: datasynth_generators::MissingValueConfig {
10582 global_rate: dq.effective_missing_rate(),
10583 ..Default::default()
10584 },
10585 enable_format_variations: dq.format_variations.enabled,
10586 format_variations: datasynth_generators::FormatVariationConfig {
10587 date_variation_rate: dq.format_variations.dates.rate,
10588 amount_variation_rate: dq.format_variations.amounts.rate,
10589 identifier_variation_rate: dq.format_variations.identifiers.rate,
10590 ..Default::default()
10591 },
10592 enable_duplicates: dq.duplicates.enabled,
10593 duplicates: datasynth_generators::DuplicateConfig {
10594 duplicate_rate: dq.effective_duplicate_rate(),
10595 ..Default::default()
10596 },
10597 enable_typos: dq.typos.enabled,
10598 typos: datasynth_generators::TypoConfig {
10599 char_error_rate: dq.effective_typo_rate(),
10600 ..Default::default()
10601 },
10602 enable_encoding_issues: dq.encoding_issues.enabled,
10603 encoding_issue_rate: dq.encoding_issues.rate,
10604 seed: self.seed.wrapping_add(77), track_statistics: true,
10606 }
10607 } else {
10608 DataQualityConfig::minimal()
10609 };
10610 let mut injector = DataQualityInjector::new(config);
10611
10612 injector.set_country_pack(self.primary_pack().clone());
10614
10615 let context = HashMap::new();
10617
10618 for entry in entries.iter_mut() {
10619 if let Some(text) = &entry.header.header_text {
10621 let processed = injector.process_text_field(
10622 "header_text",
10623 text,
10624 &entry.header.document_id.to_string(),
10625 &context,
10626 );
10627 match processed {
10628 Some(new_text) if new_text != *text => {
10629 entry.header.header_text = Some(new_text);
10630 }
10631 None => {
10632 entry.header.header_text = None; }
10634 _ => {}
10635 }
10636 }
10637
10638 if let Some(ref_text) = &entry.header.reference {
10640 let processed = injector.process_text_field(
10641 "reference",
10642 ref_text,
10643 &entry.header.document_id.to_string(),
10644 &context,
10645 );
10646 match processed {
10647 Some(new_text) if new_text != *ref_text => {
10648 entry.header.reference = Some(new_text);
10649 }
10650 None => {
10651 entry.header.reference = None;
10652 }
10653 _ => {}
10654 }
10655 }
10656
10657 let user_persona = entry.header.user_persona.clone();
10659 if let Some(processed) = injector.process_text_field(
10660 "user_persona",
10661 &user_persona,
10662 &entry.header.document_id.to_string(),
10663 &context,
10664 ) {
10665 if processed != user_persona {
10666 entry.header.user_persona = processed;
10667 }
10668 }
10669
10670 for line in &mut entry.lines {
10672 if let Some(ref text) = line.line_text {
10674 let processed = injector.process_text_field(
10675 "line_text",
10676 text,
10677 &entry.header.document_id.to_string(),
10678 &context,
10679 );
10680 match processed {
10681 Some(new_text) if new_text != *text => {
10682 line.line_text = Some(new_text);
10683 }
10684 None => {
10685 line.line_text = None;
10686 }
10687 _ => {}
10688 }
10689 }
10690
10691 if let Some(cc) = &line.cost_center {
10693 let processed = injector.process_text_field(
10694 "cost_center",
10695 cc,
10696 &entry.header.document_id.to_string(),
10697 &context,
10698 );
10699 match processed {
10700 Some(new_cc) if new_cc != *cc => {
10701 line.cost_center = Some(new_cc);
10702 }
10703 None => {
10704 line.cost_center = None;
10705 }
10706 _ => {}
10707 }
10708 }
10709 }
10710
10711 if let Some(pb) = &pb {
10712 pb.inc(1);
10713 }
10714 }
10715
10716 if let Some(pb) = pb {
10717 pb.finish_with_message("Data quality injection complete");
10718 }
10719
10720 let quality_issues = injector.issues().to_vec();
10721 Ok((injector.stats().clone(), quality_issues))
10722 }
10723
10724 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10735 let use_fsm = self
10737 .config
10738 .audit
10739 .fsm
10740 .as_ref()
10741 .map(|f| f.enabled)
10742 .unwrap_or(false);
10743
10744 if use_fsm {
10745 return self.generate_audit_data_with_fsm(entries);
10746 }
10747
10748 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10750 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10751 let fiscal_year = start_date.year() as u16;
10752 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10753
10754 let total_revenue: rust_decimal::Decimal = entries
10756 .iter()
10757 .flat_map(|e| e.lines.iter())
10758 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10759 .map(|l| l.credit_amount)
10760 .sum();
10761
10762 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10764
10765 let mut snapshot = AuditSnapshot::default();
10766
10767 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10769 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10770 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10771 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10772 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10773 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10774 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10775 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10776 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10777 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10778 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10779 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10780
10781 let accounts: Vec<String> = self
10783 .coa
10784 .as_ref()
10785 .map(|coa| {
10786 coa.get_postable_accounts()
10787 .iter()
10788 .map(|acc| acc.account_code().to_string())
10789 .collect()
10790 })
10791 .unwrap_or_default();
10792
10793 for (i, company) in self.config.companies.iter().enumerate() {
10795 let company_revenue = total_revenue
10797 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10798
10799 let engagements_for_company =
10801 self.phase_config.audit_engagements / self.config.companies.len().max(1);
10802 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10803 1
10804 } else {
10805 0
10806 };
10807
10808 for _eng_idx in 0..(engagements_for_company + extra) {
10809 let mut engagement = engagement_gen.generate_engagement(
10811 &company.code,
10812 &company.name,
10813 fiscal_year,
10814 period_end,
10815 company_revenue,
10816 None, );
10818
10819 if !self.master_data.employees.is_empty() {
10821 let emp_count = self.master_data.employees.len();
10822 let base = (i * 10 + _eng_idx) % emp_count;
10824 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10825 .employee_id
10826 .clone();
10827 engagement.engagement_manager_id = self.master_data.employees
10828 [(base + 1) % emp_count]
10829 .employee_id
10830 .clone();
10831 let real_team: Vec<String> = engagement
10832 .team_member_ids
10833 .iter()
10834 .enumerate()
10835 .map(|(j, _)| {
10836 self.master_data.employees[(base + 2 + j) % emp_count]
10837 .employee_id
10838 .clone()
10839 })
10840 .collect();
10841 engagement.team_member_ids = real_team;
10842 }
10843
10844 if let Some(pb) = &pb {
10845 pb.inc(1);
10846 }
10847
10848 let team_members: Vec<String> = engagement.team_member_ids.clone();
10850
10851 let workpapers =
10853 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10854
10855 for wp in &workpapers {
10856 if let Some(pb) = &pb {
10857 pb.inc(1);
10858 }
10859
10860 let evidence = evidence_gen.generate_evidence_for_workpaper(
10862 wp,
10863 &team_members,
10864 wp.preparer_date,
10865 );
10866
10867 for _ in &evidence {
10868 if let Some(pb) = &pb {
10869 pb.inc(1);
10870 }
10871 }
10872
10873 snapshot.evidence.extend(evidence);
10874 }
10875
10876 let risks =
10878 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10879
10880 for _ in &risks {
10881 if let Some(pb) = &pb {
10882 pb.inc(1);
10883 }
10884 }
10885 snapshot.risk_assessments.extend(risks);
10886
10887 let findings = finding_gen.generate_findings_for_engagement(
10889 &engagement,
10890 &workpapers,
10891 &team_members,
10892 );
10893
10894 for _ in &findings {
10895 if let Some(pb) = &pb {
10896 pb.inc(1);
10897 }
10898 }
10899 snapshot.findings.extend(findings);
10900
10901 let judgments =
10903 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10904
10905 for _ in &judgments {
10906 if let Some(pb) = &pb {
10907 pb.inc(1);
10908 }
10909 }
10910 snapshot.judgments.extend(judgments);
10911
10912 let (confs, resps) =
10914 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10915 snapshot.confirmations.extend(confs);
10916 snapshot.confirmation_responses.extend(resps);
10917
10918 let team_pairs: Vec<(String, String)> = team_members
10920 .iter()
10921 .map(|id| {
10922 let name = self
10923 .master_data
10924 .employees
10925 .iter()
10926 .find(|e| e.employee_id == *id)
10927 .map(|e| e.display_name.clone())
10928 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10929 (id.clone(), name)
10930 })
10931 .collect();
10932 for wp in &workpapers {
10933 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10934 snapshot.procedure_steps.extend(steps);
10935 }
10936
10937 for wp in &workpapers {
10939 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10940 snapshot.samples.push(sample);
10941 }
10942 }
10943
10944 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10946 snapshot.analytical_results.extend(analytical);
10947
10948 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10950 snapshot.ia_functions.push(ia_func);
10951 snapshot.ia_reports.extend(ia_reports);
10952
10953 let vendor_names: Vec<String> = self
10955 .master_data
10956 .vendors
10957 .iter()
10958 .map(|v| v.name.clone())
10959 .collect();
10960 let customer_names: Vec<String> = self
10961 .master_data
10962 .customers
10963 .iter()
10964 .map(|c| c.name.clone())
10965 .collect();
10966 let (parties, rp_txns) =
10967 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10968 snapshot.related_parties.extend(parties);
10969 snapshot.related_party_transactions.extend(rp_txns);
10970
10971 snapshot.workpapers.extend(workpapers);
10973
10974 {
10976 let scope_id = format!(
10977 "SCOPE-{}-{}",
10978 engagement.engagement_id.simple(),
10979 &engagement.client_entity_id
10980 );
10981 let scope = datasynth_core::models::audit::AuditScope::new(
10982 scope_id.clone(),
10983 engagement.engagement_id.to_string(),
10984 engagement.client_entity_id.clone(),
10985 engagement.materiality,
10986 );
10987 let mut eng = engagement;
10989 eng.scope_id = Some(scope_id);
10990 snapshot.audit_scopes.push(scope);
10991 snapshot.engagements.push(eng);
10992 }
10993 }
10994 }
10995
10996 if self.config.companies.len() > 1 {
11000 let group_materiality = snapshot
11003 .engagements
11004 .first()
11005 .map(|e| e.materiality)
11006 .unwrap_or_else(|| {
11007 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11008 total_revenue * pct
11009 });
11010
11011 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11012 let group_engagement_id = snapshot
11013 .engagements
11014 .first()
11015 .map(|e| e.engagement_id.to_string())
11016 .unwrap_or_else(|| "GROUP-ENG".to_string());
11017
11018 let component_snapshot = component_gen.generate(
11019 &self.config.companies,
11020 group_materiality,
11021 &group_engagement_id,
11022 period_end,
11023 );
11024
11025 snapshot.component_auditors = component_snapshot.component_auditors;
11026 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11027 snapshot.component_instructions = component_snapshot.component_instructions;
11028 snapshot.component_reports = component_snapshot.component_reports;
11029
11030 info!(
11031 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11032 snapshot.component_auditors.len(),
11033 snapshot.component_instructions.len(),
11034 snapshot.component_reports.len(),
11035 );
11036 }
11037
11038 {
11042 let applicable_framework = self
11043 .config
11044 .accounting_standards
11045 .framework
11046 .as_ref()
11047 .map(|f| format!("{f:?}"))
11048 .unwrap_or_else(|| "IFRS".to_string());
11049
11050 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11051 let entity_count = self.config.companies.len();
11052
11053 for engagement in &snapshot.engagements {
11054 let company = self
11055 .config
11056 .companies
11057 .iter()
11058 .find(|c| c.code == engagement.client_entity_id);
11059 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11060 let letter_date = engagement.planning_start;
11061 let letter = letter_gen.generate(
11062 &engagement.engagement_id.to_string(),
11063 &engagement.client_name,
11064 entity_count,
11065 engagement.period_end_date,
11066 currency,
11067 &applicable_framework,
11068 letter_date,
11069 );
11070 snapshot.engagement_letters.push(letter);
11071 }
11072
11073 info!(
11074 "ISA 210 engagement letters: {} generated",
11075 snapshot.engagement_letters.len()
11076 );
11077 }
11078
11079 {
11083 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11084 let entity_codes: Vec<String> = self
11085 .config
11086 .companies
11087 .iter()
11088 .map(|c| c.code.clone())
11089 .collect();
11090 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11091 info!(
11092 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11093 subsequent.len(),
11094 subsequent
11095 .iter()
11096 .filter(|e| matches!(
11097 e.classification,
11098 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11099 ))
11100 .count(),
11101 subsequent
11102 .iter()
11103 .filter(|e| matches!(
11104 e.classification,
11105 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11106 ))
11107 .count(),
11108 );
11109 snapshot.subsequent_events = subsequent;
11110 }
11111
11112 {
11116 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11117 let entity_codes: Vec<String> = self
11118 .config
11119 .companies
11120 .iter()
11121 .map(|c| c.code.clone())
11122 .collect();
11123 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11124 info!(
11125 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11126 soc_snapshot.service_organizations.len(),
11127 soc_snapshot.soc_reports.len(),
11128 soc_snapshot.user_entity_controls.len(),
11129 );
11130 snapshot.service_organizations = soc_snapshot.service_organizations;
11131 snapshot.soc_reports = soc_snapshot.soc_reports;
11132 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11133 }
11134
11135 {
11139 use datasynth_generators::audit::going_concern_generator::{
11140 GoingConcernGenerator, GoingConcernInput,
11141 };
11142 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11143 let entity_codes: Vec<String> = self
11144 .config
11145 .companies
11146 .iter()
11147 .map(|c| c.code.clone())
11148 .collect();
11149 let assessment_date = period_end + chrono::Duration::days(75);
11151 let period_label = format!("FY{}", period_end.year());
11152
11153 let gc_inputs: Vec<GoingConcernInput> = self
11164 .config
11165 .companies
11166 .iter()
11167 .map(|company| {
11168 let code = &company.code;
11169 let mut revenue = rust_decimal::Decimal::ZERO;
11170 let mut expenses = rust_decimal::Decimal::ZERO;
11171 let mut current_assets = rust_decimal::Decimal::ZERO;
11172 let mut current_liabs = rust_decimal::Decimal::ZERO;
11173 let mut total_debt = rust_decimal::Decimal::ZERO;
11174
11175 for je in entries.iter().filter(|je| &je.header.company_code == code) {
11176 for line in &je.lines {
11177 let acct = line.gl_account.as_str();
11178 let net = line.debit_amount - line.credit_amount;
11179 if acct.starts_with('4') {
11180 revenue -= net;
11182 } else if acct.starts_with('6') {
11183 expenses += net;
11185 }
11186 if acct.starts_with('1') {
11188 if let Ok(n) = acct.parse::<u32>() {
11190 if (1000..=1499).contains(&n) {
11191 current_assets += net;
11192 }
11193 }
11194 } else if acct.starts_with('2') {
11195 if let Ok(n) = acct.parse::<u32>() {
11196 if (2000..=2499).contains(&n) {
11197 current_liabs -= net; } else if (2500..=2999).contains(&n) {
11200 total_debt -= net;
11202 }
11203 }
11204 }
11205 }
11206 }
11207
11208 let net_income = revenue - expenses;
11209 let working_capital = current_assets - current_liabs;
11210 let operating_cash_flow = net_income;
11213
11214 GoingConcernInput {
11215 entity_code: code.clone(),
11216 net_income,
11217 working_capital,
11218 operating_cash_flow,
11219 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11220 assessment_date,
11221 }
11222 })
11223 .collect();
11224
11225 let assessments = if gc_inputs.is_empty() {
11226 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11227 } else {
11228 gc_gen.generate_for_entities_with_inputs(
11229 &entity_codes,
11230 &gc_inputs,
11231 assessment_date,
11232 &period_label,
11233 )
11234 };
11235 info!(
11236 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11237 assessments.len(),
11238 assessments.iter().filter(|a| matches!(
11239 a.auditor_conclusion,
11240 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11241 )).count(),
11242 assessments.iter().filter(|a| matches!(
11243 a.auditor_conclusion,
11244 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11245 )).count(),
11246 assessments.iter().filter(|a| matches!(
11247 a.auditor_conclusion,
11248 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11249 )).count(),
11250 );
11251 snapshot.going_concern_assessments = assessments;
11252 }
11253
11254 {
11258 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11259 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11260 let entity_codes: Vec<String> = self
11261 .config
11262 .companies
11263 .iter()
11264 .map(|c| c.code.clone())
11265 .collect();
11266 let estimates = est_gen.generate_for_entities(&entity_codes);
11267 info!(
11268 "ISA 540 accounting estimates: {} estimates across {} entities \
11269 ({} with retrospective reviews, {} with auditor point estimates)",
11270 estimates.len(),
11271 entity_codes.len(),
11272 estimates
11273 .iter()
11274 .filter(|e| e.retrospective_review.is_some())
11275 .count(),
11276 estimates
11277 .iter()
11278 .filter(|e| e.auditor_point_estimate.is_some())
11279 .count(),
11280 );
11281 snapshot.accounting_estimates = estimates;
11282 }
11283
11284 {
11288 use datasynth_generators::audit::audit_opinion_generator::{
11289 AuditOpinionGenerator, AuditOpinionInput,
11290 };
11291
11292 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11293
11294 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11296 .engagements
11297 .iter()
11298 .map(|eng| {
11299 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11301 .findings
11302 .iter()
11303 .filter(|f| f.engagement_id == eng.engagement_id)
11304 .cloned()
11305 .collect();
11306
11307 let gc = snapshot
11309 .going_concern_assessments
11310 .iter()
11311 .find(|g| g.entity_code == eng.client_entity_id)
11312 .cloned();
11313
11314 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11316 snapshot.component_reports.clone();
11317
11318 let auditor = self
11319 .master_data
11320 .employees
11321 .first()
11322 .map(|e| e.display_name.clone())
11323 .unwrap_or_else(|| "Global Audit LLP".into());
11324
11325 let partner = self
11326 .master_data
11327 .employees
11328 .get(1)
11329 .map(|e| e.display_name.clone())
11330 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11331
11332 AuditOpinionInput {
11333 entity_code: eng.client_entity_id.clone(),
11334 entity_name: eng.client_name.clone(),
11335 engagement_id: eng.engagement_id,
11336 period_end: eng.period_end_date,
11337 findings: eng_findings,
11338 going_concern: gc,
11339 component_reports: comp_reports,
11340 is_us_listed: {
11342 let fw = &self.config.audit_standards.isa_compliance.framework;
11343 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11344 },
11345 auditor_name: auditor,
11346 engagement_partner: partner,
11347 }
11348 })
11349 .collect();
11350
11351 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11352
11353 for go in &generated_opinions {
11354 snapshot
11355 .key_audit_matters
11356 .extend(go.key_audit_matters.clone());
11357 }
11358 snapshot.audit_opinions = generated_opinions
11359 .into_iter()
11360 .map(|go| go.opinion)
11361 .collect();
11362
11363 info!(
11364 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11365 snapshot.audit_opinions.len(),
11366 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11367 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11368 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11369 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11370 );
11371 }
11372
11373 {
11377 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11378
11379 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11380
11381 for (i, company) in self.config.companies.iter().enumerate() {
11382 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11384 .engagements
11385 .iter()
11386 .filter(|e| e.client_entity_id == company.code)
11387 .map(|e| e.engagement_id)
11388 .collect();
11389
11390 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11391 .findings
11392 .iter()
11393 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11394 .cloned()
11395 .collect();
11396
11397 let emp_count = self.master_data.employees.len();
11399 let ceo_name = if emp_count > 0 {
11400 self.master_data.employees[i % emp_count]
11401 .display_name
11402 .clone()
11403 } else {
11404 format!("CEO of {}", company.name)
11405 };
11406 let cfo_name = if emp_count > 1 {
11407 self.master_data.employees[(i + 1) % emp_count]
11408 .display_name
11409 .clone()
11410 } else {
11411 format!("CFO of {}", company.name)
11412 };
11413
11414 let materiality = snapshot
11416 .engagements
11417 .iter()
11418 .find(|e| e.client_entity_id == company.code)
11419 .map(|e| e.materiality)
11420 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11421
11422 let input = SoxGeneratorInput {
11423 company_code: company.code.clone(),
11424 company_name: company.name.clone(),
11425 fiscal_year,
11426 period_end,
11427 findings: company_findings,
11428 ceo_name,
11429 cfo_name,
11430 materiality_threshold: materiality,
11431 revenue_percent: rust_decimal::Decimal::from(100),
11432 assets_percent: rust_decimal::Decimal::from(100),
11433 significant_accounts: vec![
11434 "Revenue".into(),
11435 "Accounts Receivable".into(),
11436 "Inventory".into(),
11437 "Fixed Assets".into(),
11438 "Accounts Payable".into(),
11439 ],
11440 };
11441
11442 let (certs, assessment) = sox_gen.generate(&input);
11443 snapshot.sox_302_certifications.extend(certs);
11444 snapshot.sox_404_assessments.push(assessment);
11445 }
11446
11447 info!(
11448 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11449 snapshot.sox_302_certifications.len(),
11450 snapshot.sox_404_assessments.len(),
11451 snapshot
11452 .sox_404_assessments
11453 .iter()
11454 .filter(|a| a.icfr_effective)
11455 .count(),
11456 snapshot
11457 .sox_404_assessments
11458 .iter()
11459 .filter(|a| !a.icfr_effective)
11460 .count(),
11461 );
11462 }
11463
11464 {
11468 use datasynth_generators::audit::materiality_generator::{
11469 MaterialityGenerator, MaterialityInput,
11470 };
11471
11472 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11473
11474 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11478
11479 for company in &self.config.companies {
11480 let company_code = company.code.clone();
11481
11482 let company_revenue: rust_decimal::Decimal = entries
11484 .iter()
11485 .filter(|e| e.company_code() == company_code)
11486 .flat_map(|e| e.lines.iter())
11487 .filter(|l| l.account_code.starts_with('4'))
11488 .map(|l| l.credit_amount)
11489 .sum();
11490
11491 let total_assets: rust_decimal::Decimal = entries
11493 .iter()
11494 .filter(|e| e.company_code() == company_code)
11495 .flat_map(|e| e.lines.iter())
11496 .filter(|l| l.account_code.starts_with('1'))
11497 .map(|l| l.debit_amount)
11498 .sum();
11499
11500 let total_expenses: rust_decimal::Decimal = entries
11502 .iter()
11503 .filter(|e| e.company_code() == company_code)
11504 .flat_map(|e| e.lines.iter())
11505 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11506 .map(|l| l.debit_amount)
11507 .sum();
11508
11509 let equity: rust_decimal::Decimal = entries
11511 .iter()
11512 .filter(|e| e.company_code() == company_code)
11513 .flat_map(|e| e.lines.iter())
11514 .filter(|l| l.account_code.starts_with('3'))
11515 .map(|l| l.credit_amount)
11516 .sum();
11517
11518 let pretax_income = company_revenue - total_expenses;
11519
11520 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11522 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11523 .unwrap_or(rust_decimal::Decimal::ONE);
11524 (
11525 total_revenue * w,
11526 total_revenue * w * rust_decimal::Decimal::from(3),
11527 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11528 total_revenue * w * rust_decimal::Decimal::from(2),
11529 )
11530 } else {
11531 (company_revenue, total_assets, pretax_income, equity)
11532 };
11533
11534 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11537 entity_code: company_code,
11538 period: format!("FY{}", fiscal_year),
11539 revenue: rev,
11540 pretax_income: pti,
11541 total_assets: assets,
11542 equity: eq,
11543 gross_profit,
11544 });
11545 }
11546
11547 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11548
11549 info!(
11550 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11551 {} total assets, {} equity benchmarks)",
11552 snapshot.materiality_calculations.len(),
11553 snapshot
11554 .materiality_calculations
11555 .iter()
11556 .filter(|m| matches!(
11557 m.benchmark,
11558 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11559 ))
11560 .count(),
11561 snapshot
11562 .materiality_calculations
11563 .iter()
11564 .filter(|m| matches!(
11565 m.benchmark,
11566 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11567 ))
11568 .count(),
11569 snapshot
11570 .materiality_calculations
11571 .iter()
11572 .filter(|m| matches!(
11573 m.benchmark,
11574 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11575 ))
11576 .count(),
11577 snapshot
11578 .materiality_calculations
11579 .iter()
11580 .filter(|m| matches!(
11581 m.benchmark,
11582 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11583 ))
11584 .count(),
11585 );
11586 }
11587
11588 {
11592 use datasynth_generators::audit::cra_generator::CraGenerator;
11593
11594 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11595
11596 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11598 .audit_scopes
11599 .iter()
11600 .map(|s| (s.entity_code.clone(), s.id.clone()))
11601 .collect();
11602
11603 for company in &self.config.companies {
11604 let cras = cra_gen.generate_for_entity(&company.code, None);
11605 let scope_id = entity_scope_map.get(&company.code).cloned();
11606 let cras_with_scope: Vec<_> = cras
11607 .into_iter()
11608 .map(|mut cra| {
11609 cra.scope_id = scope_id.clone();
11610 cra
11611 })
11612 .collect();
11613 snapshot.combined_risk_assessments.extend(cras_with_scope);
11614 }
11615
11616 let significant_count = snapshot
11617 .combined_risk_assessments
11618 .iter()
11619 .filter(|c| c.significant_risk)
11620 .count();
11621 let high_cra_count = snapshot
11622 .combined_risk_assessments
11623 .iter()
11624 .filter(|c| {
11625 matches!(
11626 c.combined_risk,
11627 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11628 )
11629 })
11630 .count();
11631
11632 info!(
11633 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11634 snapshot.combined_risk_assessments.len(),
11635 significant_count,
11636 high_cra_count,
11637 );
11638 }
11639
11640 {
11644 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11645
11646 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11647
11648 for company in &self.config.companies {
11650 let entity_code = company.code.clone();
11651
11652 let tolerable_error = snapshot
11654 .materiality_calculations
11655 .iter()
11656 .find(|m| m.entity_code == entity_code)
11657 .map(|m| m.tolerable_error);
11658
11659 let entity_cras: Vec<_> = snapshot
11661 .combined_risk_assessments
11662 .iter()
11663 .filter(|c| c.entity_code == entity_code)
11664 .cloned()
11665 .collect();
11666
11667 if !entity_cras.is_empty() {
11668 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11669 snapshot.sampling_plans.extend(plans);
11670 snapshot.sampled_items.extend(items);
11671 }
11672 }
11673
11674 let misstatement_count = snapshot
11675 .sampled_items
11676 .iter()
11677 .filter(|i| i.misstatement_found)
11678 .count();
11679
11680 info!(
11681 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11682 snapshot.sampling_plans.len(),
11683 snapshot.sampled_items.len(),
11684 misstatement_count,
11685 );
11686 }
11687
11688 {
11692 use datasynth_generators::audit::scots_generator::{
11693 ScotsGenerator, ScotsGeneratorConfig,
11694 };
11695
11696 let ic_enabled = self.config.intercompany.enabled;
11697
11698 let config = ScotsGeneratorConfig {
11699 intercompany_enabled: ic_enabled,
11700 ..ScotsGeneratorConfig::default()
11701 };
11702 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11703
11704 for company in &self.config.companies {
11705 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11706 snapshot
11707 .significant_transaction_classes
11708 .extend(entity_scots);
11709 }
11710
11711 let estimation_count = snapshot
11712 .significant_transaction_classes
11713 .iter()
11714 .filter(|s| {
11715 matches!(
11716 s.transaction_type,
11717 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11718 )
11719 })
11720 .count();
11721
11722 info!(
11723 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11724 snapshot.significant_transaction_classes.len(),
11725 estimation_count,
11726 );
11727 }
11728
11729 {
11733 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11734
11735 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11736 let entity_codes: Vec<String> = self
11737 .config
11738 .companies
11739 .iter()
11740 .map(|c| c.code.clone())
11741 .collect();
11742 let unusual_flags =
11743 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11744 info!(
11745 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11746 unusual_flags.len(),
11747 unusual_flags
11748 .iter()
11749 .filter(|f| matches!(
11750 f.severity,
11751 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11752 ))
11753 .count(),
11754 unusual_flags
11755 .iter()
11756 .filter(|f| matches!(
11757 f.severity,
11758 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11759 ))
11760 .count(),
11761 unusual_flags
11762 .iter()
11763 .filter(|f| matches!(
11764 f.severity,
11765 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11766 ))
11767 .count(),
11768 );
11769 snapshot.unusual_items = unusual_flags;
11770 }
11771
11772 {
11776 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11777
11778 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11779 let entity_codes: Vec<String> = self
11780 .config
11781 .companies
11782 .iter()
11783 .map(|c| c.code.clone())
11784 .collect();
11785 let current_period_label = format!("FY{fiscal_year}");
11786 let prior_period_label = format!("FY{}", fiscal_year - 1);
11787 let analytical_rels = ar_gen.generate_for_entities(
11788 &entity_codes,
11789 entries,
11790 ¤t_period_label,
11791 &prior_period_label,
11792 );
11793 let out_of_range = analytical_rels
11794 .iter()
11795 .filter(|r| !r.within_expected_range)
11796 .count();
11797 info!(
11798 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11799 analytical_rels.len(),
11800 out_of_range,
11801 );
11802 snapshot.analytical_relationships = analytical_rels;
11803 }
11804
11805 if let Some(pb) = pb {
11806 pb.finish_with_message(format!(
11807 "Audit data: {} engagements, {} workpapers, {} evidence, \
11808 {} confirmations, {} procedure steps, {} samples, \
11809 {} analytical, {} IA funcs, {} related parties, \
11810 {} component auditors, {} letters, {} subsequent events, \
11811 {} service orgs, {} going concern, {} accounting estimates, \
11812 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11813 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11814 {} unusual items, {} analytical relationships",
11815 snapshot.engagements.len(),
11816 snapshot.workpapers.len(),
11817 snapshot.evidence.len(),
11818 snapshot.confirmations.len(),
11819 snapshot.procedure_steps.len(),
11820 snapshot.samples.len(),
11821 snapshot.analytical_results.len(),
11822 snapshot.ia_functions.len(),
11823 snapshot.related_parties.len(),
11824 snapshot.component_auditors.len(),
11825 snapshot.engagement_letters.len(),
11826 snapshot.subsequent_events.len(),
11827 snapshot.service_organizations.len(),
11828 snapshot.going_concern_assessments.len(),
11829 snapshot.accounting_estimates.len(),
11830 snapshot.audit_opinions.len(),
11831 snapshot.key_audit_matters.len(),
11832 snapshot.sox_302_certifications.len(),
11833 snapshot.sox_404_assessments.len(),
11834 snapshot.materiality_calculations.len(),
11835 snapshot.combined_risk_assessments.len(),
11836 snapshot.sampling_plans.len(),
11837 snapshot.significant_transaction_classes.len(),
11838 snapshot.unusual_items.len(),
11839 snapshot.analytical_relationships.len(),
11840 ));
11841 }
11842
11843 {
11850 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11851 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11852 debug!(
11853 "PCAOB-ISA mappings generated: {} mappings",
11854 snapshot.isa_pcaob_mappings.len()
11855 );
11856 }
11857
11858 {
11865 use datasynth_standards::audit::isa_reference::IsaStandard;
11866 snapshot.isa_mappings = IsaStandard::standard_entries();
11867 debug!(
11868 "ISA standard entries generated: {} standards",
11869 snapshot.isa_mappings.len()
11870 );
11871 }
11872
11873 {
11876 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11877 .engagements
11878 .iter()
11879 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11880 .collect();
11881
11882 for rpt in &mut snapshot.related_party_transactions {
11883 if rpt.journal_entry_id.is_some() {
11884 continue; }
11886 let entity = engagement_by_id
11887 .get(&rpt.engagement_id.to_string())
11888 .copied()
11889 .unwrap_or("");
11890
11891 let best_je = entries
11893 .iter()
11894 .filter(|je| je.header.company_code == entity)
11895 .min_by_key(|je| {
11896 (je.header.posting_date - rpt.transaction_date)
11897 .num_days()
11898 .abs()
11899 });
11900
11901 if let Some(je) = best_je {
11902 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11903 }
11904 }
11905
11906 let linked = snapshot
11907 .related_party_transactions
11908 .iter()
11909 .filter(|t| t.journal_entry_id.is_some())
11910 .count();
11911 debug!(
11912 "Linked {}/{} related party transactions to journal entries",
11913 linked,
11914 snapshot.related_party_transactions.len()
11915 );
11916 }
11917
11918 Ok(snapshot)
11919 }
11920
11921 fn generate_audit_data_with_fsm(
11928 &mut self,
11929 entries: &[JournalEntry],
11930 ) -> SynthResult<AuditSnapshot> {
11931 use datasynth_audit_fsm::{
11932 context::EngagementContext,
11933 engine::AuditFsmEngine,
11934 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11935 };
11936 use rand::SeedableRng;
11937 use rand_chacha::ChaCha8Rng;
11938
11939 info!("Audit FSM: generating audit data via FSM engine");
11940
11941 let fsm_config = self
11942 .config
11943 .audit
11944 .fsm
11945 .as_ref()
11946 .expect("FSM config must be present when FSM is enabled");
11947
11948 let bwp = match fsm_config.blueprint.as_str() {
11950 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11951 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11952 _ => {
11953 warn!(
11954 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11955 fsm_config.blueprint
11956 );
11957 BlueprintWithPreconditions::load_builtin_fsa()
11958 }
11959 }
11960 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11961
11962 let overlay = match fsm_config.overlay.as_str() {
11964 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11965 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11966 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11967 _ => {
11968 warn!(
11969 "Unknown FSM overlay '{}', falling back to builtin:default",
11970 fsm_config.overlay
11971 );
11972 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11973 }
11974 }
11975 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11976
11977 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11979 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11980 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11981
11982 let company = self.config.companies.first();
11984 let company_code = company
11985 .map(|c| c.code.clone())
11986 .unwrap_or_else(|| "UNKNOWN".to_string());
11987 let company_name = company
11988 .map(|c| c.name.clone())
11989 .unwrap_or_else(|| "Unknown Company".to_string());
11990 let currency = company
11991 .map(|c| c.currency.clone())
11992 .unwrap_or_else(|| "USD".to_string());
11993
11994 let entity_entries: Vec<_> = entries
11996 .iter()
11997 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11998 .cloned()
11999 .collect();
12000 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
12004 .iter()
12005 .flat_map(|e| e.lines.iter())
12006 .filter(|l| l.account_code.starts_with('4'))
12007 .map(|l| l.credit_amount - l.debit_amount)
12008 .sum();
12009
12010 let total_assets: rust_decimal::Decimal = entries
12011 .iter()
12012 .flat_map(|e| e.lines.iter())
12013 .filter(|l| l.account_code.starts_with('1'))
12014 .map(|l| l.debit_amount - l.credit_amount)
12015 .sum();
12016
12017 let total_expenses: rust_decimal::Decimal = entries
12018 .iter()
12019 .flat_map(|e| e.lines.iter())
12020 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12021 .map(|l| l.debit_amount)
12022 .sum();
12023
12024 let equity: rust_decimal::Decimal = entries
12025 .iter()
12026 .flat_map(|e| e.lines.iter())
12027 .filter(|l| l.account_code.starts_with('3'))
12028 .map(|l| l.credit_amount - l.debit_amount)
12029 .sum();
12030
12031 let total_debt: rust_decimal::Decimal = entries
12032 .iter()
12033 .flat_map(|e| e.lines.iter())
12034 .filter(|l| l.account_code.starts_with('2'))
12035 .map(|l| l.credit_amount - l.debit_amount)
12036 .sum();
12037
12038 let pretax_income = total_revenue - total_expenses;
12039
12040 let cogs: rust_decimal::Decimal = entries
12041 .iter()
12042 .flat_map(|e| e.lines.iter())
12043 .filter(|l| l.account_code.starts_with('5'))
12044 .map(|l| l.debit_amount)
12045 .sum();
12046 let gross_profit = total_revenue - cogs;
12047
12048 let current_assets: rust_decimal::Decimal = entries
12049 .iter()
12050 .flat_map(|e| e.lines.iter())
12051 .filter(|l| {
12052 l.account_code.starts_with("10")
12053 || l.account_code.starts_with("11")
12054 || l.account_code.starts_with("12")
12055 || l.account_code.starts_with("13")
12056 })
12057 .map(|l| l.debit_amount - l.credit_amount)
12058 .sum();
12059 let current_liabilities: rust_decimal::Decimal = entries
12060 .iter()
12061 .flat_map(|e| e.lines.iter())
12062 .filter(|l| {
12063 l.account_code.starts_with("20")
12064 || l.account_code.starts_with("21")
12065 || l.account_code.starts_with("22")
12066 })
12067 .map(|l| l.credit_amount - l.debit_amount)
12068 .sum();
12069 let working_capital = current_assets - current_liabilities;
12070
12071 let depreciation: rust_decimal::Decimal = entries
12072 .iter()
12073 .flat_map(|e| e.lines.iter())
12074 .filter(|l| l.account_code.starts_with("60"))
12075 .map(|l| l.debit_amount)
12076 .sum();
12077 let operating_cash_flow = pretax_income + depreciation;
12078
12079 let accounts: Vec<String> = self
12081 .coa
12082 .as_ref()
12083 .map(|coa| {
12084 coa.get_postable_accounts()
12085 .iter()
12086 .map(|acc| acc.account_code().to_string())
12087 .collect()
12088 })
12089 .unwrap_or_default();
12090
12091 let team_member_ids: Vec<String> = self
12093 .master_data
12094 .employees
12095 .iter()
12096 .take(8) .map(|e| e.employee_id.clone())
12098 .collect();
12099 let team_member_pairs: Vec<(String, String)> = self
12100 .master_data
12101 .employees
12102 .iter()
12103 .take(8)
12104 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12105 .collect();
12106
12107 let vendor_names: Vec<String> = self
12108 .master_data
12109 .vendors
12110 .iter()
12111 .map(|v| v.name.clone())
12112 .collect();
12113 let customer_names: Vec<String> = self
12114 .master_data
12115 .customers
12116 .iter()
12117 .map(|c| c.name.clone())
12118 .collect();
12119
12120 let entity_codes: Vec<String> = self
12121 .config
12122 .companies
12123 .iter()
12124 .map(|c| c.code.clone())
12125 .collect();
12126
12127 let journal_entry_ids: Vec<String> = entries
12129 .iter()
12130 .take(50)
12131 .map(|e| e.header.document_id.to_string())
12132 .collect();
12133
12134 let mut account_balances = std::collections::HashMap::<String, f64>::new();
12136 for entry in entries {
12137 for line in &entry.lines {
12138 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12139 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12140 *account_balances
12141 .entry(line.account_code.clone())
12142 .or_insert(0.0) += debit_f64 - credit_f64;
12143 }
12144 }
12145
12146 let control_ids: Vec<String> = Vec::new();
12151 let anomaly_refs: Vec<String> = Vec::new();
12152
12153 let mut context = EngagementContext {
12154 company_code,
12155 company_name,
12156 fiscal_year: start_date.year(),
12157 currency,
12158 total_revenue,
12159 total_assets,
12160 engagement_start: start_date,
12161 report_date: period_end,
12162 pretax_income,
12163 equity,
12164 gross_profit,
12165 working_capital,
12166 operating_cash_flow,
12167 total_debt,
12168 team_member_ids,
12169 team_member_pairs,
12170 accounts,
12171 vendor_names,
12172 customer_names,
12173 journal_entry_ids,
12174 account_balances,
12175 control_ids,
12176 anomaly_refs,
12177 journal_entries: entries.to_vec(),
12178 is_us_listed: false,
12179 entity_codes,
12180 auditor_firm_name: "DataSynth Audit LLP".into(),
12181 accounting_framework: self
12182 .config
12183 .accounting_standards
12184 .framework
12185 .map(|f| match f {
12186 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12187 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12188 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12189 "French GAAP"
12190 }
12191 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12192 "German GAAP"
12193 }
12194 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12195 "Dual Reporting"
12196 }
12197 })
12198 .unwrap_or("IFRS")
12199 .into(),
12200 };
12201
12202 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12204 let rng = ChaCha8Rng::seed_from_u64(seed);
12205 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12206
12207 let mut result = engine
12208 .run_engagement(&context)
12209 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12210
12211 info!(
12212 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12213 {} phases completed, duration {:.1}h",
12214 result.event_log.len(),
12215 result.artifacts.total_artifacts(),
12216 result.anomalies.len(),
12217 result.phases_completed.len(),
12218 result.total_duration_hours,
12219 );
12220
12221 let tb_entity = context.company_code.clone();
12223 let tb_fy = context.fiscal_year;
12224 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12225 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12226 entries,
12227 &tb_entity,
12228 tb_fy,
12229 self.coa.as_ref().map(|c| c.as_ref()),
12230 );
12231
12232 let bag = result.artifacts;
12234 let mut snapshot = AuditSnapshot {
12235 engagements: bag.engagements,
12236 engagement_letters: bag.engagement_letters,
12237 materiality_calculations: bag.materiality_calculations,
12238 risk_assessments: bag.risk_assessments,
12239 combined_risk_assessments: bag.combined_risk_assessments,
12240 workpapers: bag.workpapers,
12241 evidence: bag.evidence,
12242 findings: bag.findings,
12243 judgments: bag.judgments,
12244 sampling_plans: bag.sampling_plans,
12245 sampled_items: bag.sampled_items,
12246 analytical_results: bag.analytical_results,
12247 going_concern_assessments: bag.going_concern_assessments,
12248 subsequent_events: bag.subsequent_events,
12249 audit_opinions: bag.audit_opinions,
12250 key_audit_matters: bag.key_audit_matters,
12251 procedure_steps: bag.procedure_steps,
12252 samples: bag.samples,
12253 confirmations: bag.confirmations,
12254 confirmation_responses: bag.confirmation_responses,
12255 fsm_event_trail: Some(result.event_log),
12257 ..Default::default()
12259 };
12260
12261 {
12263 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12264 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12265 }
12266 {
12267 use datasynth_standards::audit::isa_reference::IsaStandard;
12268 snapshot.isa_mappings = IsaStandard::standard_entries();
12269 }
12270
12271 info!(
12272 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12273 {} risk assessments, {} findings, {} materiality calcs",
12274 snapshot.engagements.len(),
12275 snapshot.workpapers.len(),
12276 snapshot.evidence.len(),
12277 snapshot.risk_assessments.len(),
12278 snapshot.findings.len(),
12279 snapshot.materiality_calculations.len(),
12280 );
12281
12282 Ok(snapshot)
12283 }
12284
12285 fn export_graphs(
12292 &mut self,
12293 entries: &[JournalEntry],
12294 _coa: &Arc<ChartOfAccounts>,
12295 stats: &mut EnhancedGenerationStatistics,
12296 ) -> SynthResult<GraphExportSnapshot> {
12297 let pb = self.create_progress_bar(100, "Exporting Graphs");
12298
12299 let mut snapshot = GraphExportSnapshot::default();
12300
12301 let output_dir = self
12303 .output_path
12304 .clone()
12305 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12306 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12307
12308 for graph_type in &self.config.graph_export.graph_types {
12310 if let Some(pb) = &pb {
12311 pb.inc(10);
12312 }
12313
12314 let graph_config = TransactionGraphConfig {
12316 include_vendors: false,
12317 include_customers: false,
12318 create_debit_credit_edges: true,
12319 include_document_nodes: graph_type.include_document_nodes,
12320 min_edge_weight: graph_type.min_edge_weight,
12321 aggregate_parallel_edges: graph_type.aggregate_edges,
12322 framework: None,
12323 };
12324
12325 let mut builder = TransactionGraphBuilder::new(graph_config);
12326 builder.add_journal_entries(entries);
12327 let graph = builder.build();
12328
12329 stats.graph_node_count += graph.node_count();
12331 stats.graph_edge_count += graph.edge_count();
12332
12333 if let Some(pb) = &pb {
12334 pb.inc(40);
12335 }
12336
12337 for format in &self.config.graph_export.formats {
12339 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12340
12341 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12343 warn!("Failed to create graph output directory: {}", e);
12344 continue;
12345 }
12346
12347 match format {
12348 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12349 let pyg_config = PyGExportConfig {
12350 common: datasynth_graph::CommonExportConfig {
12351 export_node_features: true,
12352 export_edge_features: true,
12353 export_node_labels: true,
12354 export_edge_labels: true,
12355 export_masks: true,
12356 train_ratio: self.config.graph_export.train_ratio,
12357 val_ratio: self.config.graph_export.validation_ratio,
12358 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12359 },
12360 one_hot_categoricals: false,
12361 };
12362
12363 let exporter = PyGExporter::new(pyg_config);
12364 match exporter.export(&graph, &format_dir) {
12365 Ok(metadata) => {
12366 snapshot.exports.insert(
12367 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12368 GraphExportInfo {
12369 name: graph_type.name.clone(),
12370 format: "pytorch_geometric".to_string(),
12371 output_path: format_dir.clone(),
12372 node_count: metadata.num_nodes,
12373 edge_count: metadata.num_edges,
12374 },
12375 );
12376 snapshot.graph_count += 1;
12377 }
12378 Err(e) => {
12379 warn!("Failed to export PyTorch Geometric graph: {}", e);
12380 }
12381 }
12382 }
12383 datasynth_config::schema::GraphExportFormat::Neo4j => {
12384 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12385
12386 let neo4j_config = Neo4jExportConfig {
12387 export_node_properties: true,
12388 export_edge_properties: true,
12389 export_features: true,
12390 generate_cypher: true,
12391 generate_admin_import: true,
12392 database_name: "synth".to_string(),
12393 cypher_batch_size: 1000,
12394 };
12395
12396 let exporter = Neo4jExporter::new(neo4j_config);
12397 match exporter.export(&graph, &format_dir) {
12398 Ok(metadata) => {
12399 snapshot.exports.insert(
12400 format!("{}_{}", graph_type.name, "neo4j"),
12401 GraphExportInfo {
12402 name: graph_type.name.clone(),
12403 format: "neo4j".to_string(),
12404 output_path: format_dir.clone(),
12405 node_count: metadata.num_nodes,
12406 edge_count: metadata.num_edges,
12407 },
12408 );
12409 snapshot.graph_count += 1;
12410 }
12411 Err(e) => {
12412 warn!("Failed to export Neo4j graph: {}", e);
12413 }
12414 }
12415 }
12416 datasynth_config::schema::GraphExportFormat::Dgl => {
12417 use datasynth_graph::{DGLExportConfig, DGLExporter};
12418
12419 let dgl_config = DGLExportConfig {
12420 common: datasynth_graph::CommonExportConfig {
12421 export_node_features: true,
12422 export_edge_features: true,
12423 export_node_labels: true,
12424 export_edge_labels: true,
12425 export_masks: true,
12426 train_ratio: self.config.graph_export.train_ratio,
12427 val_ratio: self.config.graph_export.validation_ratio,
12428 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12429 },
12430 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12431 include_pickle_script: true, };
12433
12434 let exporter = DGLExporter::new(dgl_config);
12435 match exporter.export(&graph, &format_dir) {
12436 Ok(metadata) => {
12437 snapshot.exports.insert(
12438 format!("{}_{}", graph_type.name, "dgl"),
12439 GraphExportInfo {
12440 name: graph_type.name.clone(),
12441 format: "dgl".to_string(),
12442 output_path: format_dir.clone(),
12443 node_count: metadata.common.num_nodes,
12444 edge_count: metadata.common.num_edges,
12445 },
12446 );
12447 snapshot.graph_count += 1;
12448 }
12449 Err(e) => {
12450 warn!("Failed to export DGL graph: {}", e);
12451 }
12452 }
12453 }
12454 datasynth_config::schema::GraphExportFormat::RustGraph => {
12455 use datasynth_graph::{
12456 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12457 };
12458
12459 let rustgraph_config = RustGraphExportConfig {
12460 include_features: true,
12461 include_temporal: true,
12462 include_labels: true,
12463 source_name: "datasynth".to_string(),
12464 batch_id: None,
12465 output_format: RustGraphOutputFormat::JsonLines,
12466 export_node_properties: true,
12467 export_edge_properties: true,
12468 pretty_print: false,
12469 };
12470
12471 let exporter = RustGraphExporter::new(rustgraph_config);
12472 match exporter.export(&graph, &format_dir) {
12473 Ok(metadata) => {
12474 snapshot.exports.insert(
12475 format!("{}_{}", graph_type.name, "rustgraph"),
12476 GraphExportInfo {
12477 name: graph_type.name.clone(),
12478 format: "rustgraph".to_string(),
12479 output_path: format_dir.clone(),
12480 node_count: metadata.num_nodes,
12481 edge_count: metadata.num_edges,
12482 },
12483 );
12484 snapshot.graph_count += 1;
12485 }
12486 Err(e) => {
12487 warn!("Failed to export RustGraph: {}", e);
12488 }
12489 }
12490 }
12491 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12492 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12494 }
12495 }
12496 }
12497
12498 if let Some(pb) = &pb {
12499 pb.inc(40);
12500 }
12501 }
12502
12503 stats.graph_export_count = snapshot.graph_count;
12504 snapshot.exported = snapshot.graph_count > 0;
12505
12506 if let Some(pb) = pb {
12507 pb.finish_with_message(format!(
12508 "Graphs exported: {} graphs ({} nodes, {} edges)",
12509 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12510 ));
12511 }
12512
12513 Ok(snapshot)
12514 }
12515
12516 fn build_additional_graphs(
12521 &self,
12522 banking: &BankingSnapshot,
12523 intercompany: &IntercompanySnapshot,
12524 entries: &[JournalEntry],
12525 stats: &mut EnhancedGenerationStatistics,
12526 ) {
12527 let output_dir = self
12528 .output_path
12529 .clone()
12530 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12531 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12532
12533 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12535 info!("Phase 10c: Building banking network graph");
12536 let config = BankingGraphConfig::default();
12537 let mut builder = BankingGraphBuilder::new(config);
12538 builder.add_customers(&banking.customers);
12539 builder.add_accounts(&banking.accounts, &banking.customers);
12540 builder.add_transactions(&banking.transactions);
12541 let graph = builder.build();
12542
12543 let node_count = graph.node_count();
12544 let edge_count = graph.edge_count();
12545 stats.graph_node_count += node_count;
12546 stats.graph_edge_count += edge_count;
12547
12548 for format in &self.config.graph_export.formats {
12550 if matches!(
12551 format,
12552 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12553 ) {
12554 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12555 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12556 warn!("Failed to create banking graph output dir: {}", e);
12557 continue;
12558 }
12559 let pyg_config = PyGExportConfig::default();
12560 let exporter = PyGExporter::new(pyg_config);
12561 if let Err(e) = exporter.export(&graph, &format_dir) {
12562 warn!("Failed to export banking graph as PyG: {}", e);
12563 } else {
12564 info!(
12565 "Banking network graph exported: {} nodes, {} edges",
12566 node_count, edge_count
12567 );
12568 }
12569 }
12570 }
12571 }
12572
12573 let approval_entries: Vec<_> = entries
12575 .iter()
12576 .filter(|je| je.header.approval_workflow.is_some())
12577 .collect();
12578
12579 if !approval_entries.is_empty() {
12580 info!(
12581 "Phase 10c: Building approval network graph ({} entries with approvals)",
12582 approval_entries.len()
12583 );
12584 let config = ApprovalGraphConfig::default();
12585 let mut builder = ApprovalGraphBuilder::new(config);
12586
12587 for je in &approval_entries {
12588 if let Some(ref wf) = je.header.approval_workflow {
12589 for action in &wf.actions {
12590 let record = datasynth_core::models::ApprovalRecord {
12591 approval_id: format!(
12592 "APR-{}-{}",
12593 je.header.document_id, action.approval_level
12594 ),
12595 document_number: je.header.document_id.to_string(),
12596 document_type: "JE".to_string(),
12597 company_code: je.company_code().to_string(),
12598 requester_id: wf.preparer_id.clone(),
12599 requester_name: Some(wf.preparer_name.clone()),
12600 approver_id: action.actor_id.clone(),
12601 approver_name: action.actor_name.clone(),
12602 approval_date: je.posting_date(),
12603 action: format!("{:?}", action.action),
12604 amount: wf.amount,
12605 approval_limit: None,
12606 comments: action.comments.clone(),
12607 delegation_from: None,
12608 is_auto_approved: false,
12609 };
12610 builder.add_approval(&record);
12611 }
12612 }
12613 }
12614
12615 let graph = builder.build();
12616 let node_count = graph.node_count();
12617 let edge_count = graph.edge_count();
12618 stats.graph_node_count += node_count;
12619 stats.graph_edge_count += edge_count;
12620
12621 for format in &self.config.graph_export.formats {
12623 if matches!(
12624 format,
12625 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12626 ) {
12627 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12628 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12629 warn!("Failed to create approval graph output dir: {}", e);
12630 continue;
12631 }
12632 let pyg_config = PyGExportConfig::default();
12633 let exporter = PyGExporter::new(pyg_config);
12634 if let Err(e) = exporter.export(&graph, &format_dir) {
12635 warn!("Failed to export approval graph as PyG: {}", e);
12636 } else {
12637 info!(
12638 "Approval network graph exported: {} nodes, {} edges",
12639 node_count, edge_count
12640 );
12641 }
12642 }
12643 }
12644 }
12645
12646 if self.config.companies.len() >= 2 {
12648 info!(
12649 "Phase 10c: Building entity relationship graph ({} companies)",
12650 self.config.companies.len()
12651 );
12652
12653 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12654 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12655
12656 let parent_code = &self.config.companies[0].code;
12658 let mut companies: Vec<datasynth_core::models::Company> =
12659 Vec::with_capacity(self.config.companies.len());
12660
12661 let first = &self.config.companies[0];
12663 companies.push(datasynth_core::models::Company::parent(
12664 &first.code,
12665 &first.name,
12666 &first.country,
12667 &first.currency,
12668 ));
12669
12670 for cc in self.config.companies.iter().skip(1) {
12672 companies.push(datasynth_core::models::Company::subsidiary(
12673 &cc.code,
12674 &cc.name,
12675 &cc.country,
12676 &cc.currency,
12677 parent_code,
12678 rust_decimal::Decimal::from(100),
12679 ));
12680 }
12681
12682 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12684 self.config
12685 .companies
12686 .iter()
12687 .skip(1)
12688 .enumerate()
12689 .map(|(i, cc)| {
12690 let mut rel =
12691 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12692 format!("REL{:03}", i + 1),
12693 parent_code.clone(),
12694 cc.code.clone(),
12695 rust_decimal::Decimal::from(100),
12696 start_date,
12697 );
12698 rel.functional_currency = cc.currency.clone();
12699 rel
12700 })
12701 .collect();
12702
12703 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12704 builder.add_companies(&companies);
12705 builder.add_ownership_relationships(&relationships);
12706
12707 for pair in &intercompany.matched_pairs {
12709 builder.add_intercompany_edge(
12710 &pair.seller_company,
12711 &pair.buyer_company,
12712 pair.amount,
12713 &format!("{:?}", pair.transaction_type),
12714 );
12715 }
12716
12717 let graph = builder.build();
12718 let node_count = graph.node_count();
12719 let edge_count = graph.edge_count();
12720 stats.graph_node_count += node_count;
12721 stats.graph_edge_count += edge_count;
12722
12723 for format in &self.config.graph_export.formats {
12725 if matches!(
12726 format,
12727 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12728 ) {
12729 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12730 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12731 warn!("Failed to create entity graph output dir: {}", e);
12732 continue;
12733 }
12734 let pyg_config = PyGExportConfig::default();
12735 let exporter = PyGExporter::new(pyg_config);
12736 if let Err(e) = exporter.export(&graph, &format_dir) {
12737 warn!("Failed to export entity graph as PyG: {}", e);
12738 } else {
12739 info!(
12740 "Entity relationship graph exported: {} nodes, {} edges",
12741 node_count, edge_count
12742 );
12743 }
12744 }
12745 }
12746 } else {
12747 debug!(
12748 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12749 self.config.companies.len()
12750 );
12751 }
12752 }
12753
12754 #[allow(clippy::too_many_arguments)]
12761 fn export_hypergraph(
12762 &self,
12763 coa: &Arc<ChartOfAccounts>,
12764 entries: &[JournalEntry],
12765 document_flows: &DocumentFlowSnapshot,
12766 sourcing: &SourcingSnapshot,
12767 hr: &HrSnapshot,
12768 manufacturing: &ManufacturingSnapshot,
12769 banking: &BankingSnapshot,
12770 audit: &AuditSnapshot,
12771 financial_reporting: &FinancialReportingSnapshot,
12772 ocpm: &OcpmSnapshot,
12773 compliance: &ComplianceRegulationsSnapshot,
12774 stats: &mut EnhancedGenerationStatistics,
12775 ) -> SynthResult<HypergraphExportInfo> {
12776 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12777 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12778 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12779 use datasynth_graph::models::hypergraph::AggregationStrategy;
12780
12781 let hg_settings = &self.config.graph_export.hypergraph;
12782
12783 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12785 "truncate" => AggregationStrategy::Truncate,
12786 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12787 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12788 "importance_sample" => AggregationStrategy::ImportanceSample,
12789 _ => AggregationStrategy::PoolByCounterparty,
12790 };
12791
12792 let builder_config = HypergraphConfig {
12793 max_nodes: hg_settings.max_nodes,
12794 aggregation_strategy,
12795 include_coso: hg_settings.governance_layer.include_coso,
12796 include_controls: hg_settings.governance_layer.include_controls,
12797 include_sox: hg_settings.governance_layer.include_sox,
12798 include_vendors: hg_settings.governance_layer.include_vendors,
12799 include_customers: hg_settings.governance_layer.include_customers,
12800 include_employees: hg_settings.governance_layer.include_employees,
12801 include_p2p: hg_settings.process_layer.include_p2p,
12802 include_o2c: hg_settings.process_layer.include_o2c,
12803 include_s2c: hg_settings.process_layer.include_s2c,
12804 include_h2r: hg_settings.process_layer.include_h2r,
12805 include_mfg: hg_settings.process_layer.include_mfg,
12806 include_bank: hg_settings.process_layer.include_bank,
12807 include_audit: hg_settings.process_layer.include_audit,
12808 include_r2r: hg_settings.process_layer.include_r2r,
12809 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12810 docs_per_counterparty_threshold: hg_settings
12811 .process_layer
12812 .docs_per_counterparty_threshold,
12813 include_accounts: hg_settings.accounting_layer.include_accounts,
12814 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12815 include_cross_layer_edges: hg_settings.cross_layer.enabled,
12816 include_compliance: self.config.compliance_regulations.enabled,
12817 include_tax: true,
12818 include_treasury: true,
12819 include_esg: true,
12820 include_project: true,
12821 include_intercompany: true,
12822 include_temporal_events: true,
12823 };
12824
12825 let mut builder = HypergraphBuilder::new(builder_config);
12826
12827 builder.add_coso_framework();
12829
12830 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12833 let controls = InternalControl::standard_controls();
12834 builder.add_controls(&controls);
12835 }
12836
12837 builder.add_vendors(&self.master_data.vendors);
12839 builder.add_customers(&self.master_data.customers);
12840 builder.add_employees(&self.master_data.employees);
12841
12842 builder.add_p2p_documents(
12844 &document_flows.purchase_orders,
12845 &document_flows.goods_receipts,
12846 &document_flows.vendor_invoices,
12847 &document_flows.payments,
12848 );
12849 builder.add_o2c_documents(
12850 &document_flows.sales_orders,
12851 &document_flows.deliveries,
12852 &document_flows.customer_invoices,
12853 );
12854 builder.add_s2c_documents(
12855 &sourcing.sourcing_projects,
12856 &sourcing.qualifications,
12857 &sourcing.rfx_events,
12858 &sourcing.bids,
12859 &sourcing.bid_evaluations,
12860 &sourcing.contracts,
12861 );
12862 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12863 builder.add_mfg_documents(
12864 &manufacturing.production_orders,
12865 &manufacturing.quality_inspections,
12866 &manufacturing.cycle_counts,
12867 );
12868 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12869 builder.add_audit_documents(
12870 &audit.engagements,
12871 &audit.workpapers,
12872 &audit.findings,
12873 &audit.evidence,
12874 &audit.risk_assessments,
12875 &audit.judgments,
12876 &audit.materiality_calculations,
12877 &audit.audit_opinions,
12878 &audit.going_concern_assessments,
12879 );
12880 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12881
12882 if let Some(ref event_log) = ocpm.event_log {
12884 builder.add_ocpm_events(event_log);
12885 }
12886
12887 if self.config.compliance_regulations.enabled
12889 && hg_settings.governance_layer.include_controls
12890 {
12891 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12893 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12894 .standard_records
12895 .iter()
12896 .filter_map(|r| {
12897 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12898 registry.get(&sid).cloned()
12899 })
12900 .collect();
12901
12902 builder.add_compliance_regulations(
12903 &standards,
12904 &compliance.findings,
12905 &compliance.filings,
12906 );
12907 }
12908
12909 builder.add_accounts(coa);
12911 builder.add_journal_entries_as_hyperedges(entries);
12912
12913 let hypergraph = builder.build();
12915
12916 let output_dir = self
12918 .output_path
12919 .clone()
12920 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12921 let hg_dir = output_dir
12922 .join(&self.config.graph_export.output_subdirectory)
12923 .join(&hg_settings.output_subdirectory);
12924
12925 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12927 "unified" => {
12928 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12929 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12930 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12931 })?;
12932 (
12933 metadata.num_nodes,
12934 metadata.num_edges,
12935 metadata.num_hyperedges,
12936 )
12937 }
12938 _ => {
12939 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12941 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12942 SynthError::generation(format!("Hypergraph export failed: {e}"))
12943 })?;
12944 (
12945 metadata.num_nodes,
12946 metadata.num_edges,
12947 metadata.num_hyperedges,
12948 )
12949 }
12950 };
12951
12952 #[cfg(feature = "streaming")]
12954 if let Some(ref target_url) = hg_settings.stream_target {
12955 use crate::stream_client::{StreamClient, StreamConfig};
12956 use std::io::Write as _;
12957
12958 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12959 let stream_config = StreamConfig {
12960 target_url: target_url.clone(),
12961 batch_size: hg_settings.stream_batch_size,
12962 api_key,
12963 ..StreamConfig::default()
12964 };
12965
12966 match StreamClient::new(stream_config) {
12967 Ok(mut client) => {
12968 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12969 match exporter.export_to_writer(&hypergraph, &mut client) {
12970 Ok(_) => {
12971 if let Err(e) = client.flush() {
12972 warn!("Failed to flush stream client: {}", e);
12973 } else {
12974 info!("Streamed {} records to {}", client.total_sent(), target_url);
12975 }
12976 }
12977 Err(e) => {
12978 warn!("Streaming export failed: {}", e);
12979 }
12980 }
12981 }
12982 Err(e) => {
12983 warn!("Failed to create stream client: {}", e);
12984 }
12985 }
12986 }
12987
12988 stats.graph_node_count += num_nodes;
12990 stats.graph_edge_count += num_edges;
12991 stats.graph_export_count += 1;
12992
12993 Ok(HypergraphExportInfo {
12994 node_count: num_nodes,
12995 edge_count: num_edges,
12996 hyperedge_count: num_hyperedges,
12997 output_path: hg_dir,
12998 })
12999 }
13000
13001 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13006 let pb = self.create_progress_bar(100, "Generating Banking Data");
13007
13008 let orchestrator = BankingOrchestratorBuilder::new()
13010 .config(self.config.banking.clone())
13011 .seed(self.seed + 9000)
13012 .country_pack(self.primary_pack().clone())
13013 .build();
13014
13015 if let Some(pb) = &pb {
13016 pb.inc(10);
13017 }
13018
13019 let result = orchestrator.generate();
13021
13022 if let Some(pb) = &pb {
13023 pb.inc(90);
13024 pb.finish_with_message(format!(
13025 "Banking: {} customers, {} transactions",
13026 result.customers.len(),
13027 result.transactions.len()
13028 ));
13029 }
13030
13031 let mut banking_customers = result.customers;
13036 let core_customers = &self.master_data.customers;
13037 if !core_customers.is_empty() {
13038 for (i, bc) in banking_customers.iter_mut().enumerate() {
13039 let core = &core_customers[i % core_customers.len()];
13040 bc.name = CustomerName::business(&core.name);
13041 bc.residence_country = core.country.clone();
13042 bc.enterprise_customer_id = Some(core.customer_id.clone());
13043 }
13044 debug!(
13045 "Cross-referenced {} banking customers with {} core customers",
13046 banking_customers.len(),
13047 core_customers.len()
13048 );
13049 }
13050
13051 Ok(BankingSnapshot {
13052 customers: banking_customers,
13053 accounts: result.accounts,
13054 transactions: result.transactions,
13055 transaction_labels: result.transaction_labels,
13056 customer_labels: result.customer_labels,
13057 account_labels: result.account_labels,
13058 relationship_labels: result.relationship_labels,
13059 narratives: result.narratives,
13060 suspicious_count: result.stats.suspicious_count,
13061 scenario_count: result.scenarios.len(),
13062 })
13063 }
13064
13065 fn calculate_total_transactions(&self) -> u64 {
13067 let months = self.config.global.period_months as f64;
13068 self.config
13069 .companies
13070 .iter()
13071 .map(|c| {
13072 let annual = c.annual_transaction_volume.count() as f64;
13073 let weighted = annual * c.volume_weight;
13074 (weighted * months / 12.0) as u64
13075 })
13076 .sum()
13077 }
13078
13079 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13081 if !self.phase_config.show_progress {
13082 return None;
13083 }
13084
13085 let pb = if let Some(mp) = &self.multi_progress {
13086 mp.add(ProgressBar::new(total))
13087 } else {
13088 ProgressBar::new(total)
13089 };
13090
13091 pb.set_style(
13092 ProgressStyle::default_bar()
13093 .template(&format!(
13094 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13095 ))
13096 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13097 .progress_chars("#>-"),
13098 );
13099
13100 Some(pb)
13101 }
13102
13103 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13105 self.coa.clone()
13106 }
13107
13108 pub fn get_master_data(&self) -> &MasterDataSnapshot {
13110 &self.master_data
13111 }
13112
13113 fn phase_compliance_regulations(
13115 &mut self,
13116 _stats: &mut EnhancedGenerationStatistics,
13117 ) -> SynthResult<ComplianceRegulationsSnapshot> {
13118 if !self.phase_config.generate_compliance_regulations {
13119 return Ok(ComplianceRegulationsSnapshot::default());
13120 }
13121
13122 info!("Phase: Generating Compliance Regulations Data");
13123
13124 let cr_config = &self.config.compliance_regulations;
13125
13126 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13128 self.config
13129 .companies
13130 .iter()
13131 .map(|c| c.country.clone())
13132 .collect::<std::collections::HashSet<_>>()
13133 .into_iter()
13134 .collect()
13135 } else {
13136 cr_config.jurisdictions.clone()
13137 };
13138
13139 let fallback_date =
13141 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13142 let reference_date = cr_config
13143 .reference_date
13144 .as_ref()
13145 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13146 .unwrap_or_else(|| {
13147 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13148 .unwrap_or(fallback_date)
13149 });
13150
13151 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13153 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13154 let cross_reference_records = reg_gen.generate_cross_reference_records();
13155 let jurisdiction_records =
13156 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13157
13158 info!(
13159 " Standards: {} records, {} cross-references, {} jurisdictions",
13160 standard_records.len(),
13161 cross_reference_records.len(),
13162 jurisdiction_records.len()
13163 );
13164
13165 let audit_procedures = if cr_config.audit_procedures.enabled {
13167 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13168 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13169 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13170 confidence_level: cr_config.audit_procedures.confidence_level,
13171 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13172 };
13173 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13174 self.seed + 9000,
13175 proc_config,
13176 );
13177 let registry = reg_gen.registry();
13178 let mut all_procs = Vec::new();
13179 for jurisdiction in &jurisdictions {
13180 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13181 all_procs.extend(procs);
13182 }
13183 info!(" Audit procedures: {}", all_procs.len());
13184 all_procs
13185 } else {
13186 Vec::new()
13187 };
13188
13189 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13191 let finding_config =
13192 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13193 finding_rate: cr_config.findings.finding_rate,
13194 material_weakness_rate: cr_config.findings.material_weakness_rate,
13195 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13196 generate_remediation: cr_config.findings.generate_remediation,
13197 };
13198 let mut finding_gen =
13199 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13200 self.seed + 9100,
13201 finding_config,
13202 );
13203 let mut all_findings = Vec::new();
13204 for company in &self.config.companies {
13205 let company_findings =
13206 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13207 all_findings.extend(company_findings);
13208 }
13209 info!(" Compliance findings: {}", all_findings.len());
13210 all_findings
13211 } else {
13212 Vec::new()
13213 };
13214
13215 let filings = if cr_config.filings.enabled {
13217 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13218 filing_types: cr_config.filings.filing_types.clone(),
13219 generate_status_progression: cr_config.filings.generate_status_progression,
13220 };
13221 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13222 self.seed + 9200,
13223 filing_config,
13224 );
13225 let company_codes: Vec<String> = self
13226 .config
13227 .companies
13228 .iter()
13229 .map(|c| c.code.clone())
13230 .collect();
13231 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13232 .unwrap_or(fallback_date);
13233 let filings = filing_gen.generate_filings(
13234 &company_codes,
13235 &jurisdictions,
13236 start_date,
13237 self.config.global.period_months,
13238 );
13239 info!(" Regulatory filings: {}", filings.len());
13240 filings
13241 } else {
13242 Vec::new()
13243 };
13244
13245 let compliance_graph = if cr_config.graph.enabled {
13247 let graph_config = datasynth_graph::ComplianceGraphConfig {
13248 include_standard_nodes: cr_config.graph.include_compliance_nodes,
13249 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13250 include_cross_references: cr_config.graph.include_cross_references,
13251 include_supersession_edges: cr_config.graph.include_supersession_edges,
13252 include_account_links: cr_config.graph.include_account_links,
13253 include_control_links: cr_config.graph.include_control_links,
13254 include_company_links: cr_config.graph.include_company_links,
13255 };
13256 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13257
13258 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13260 .iter()
13261 .map(|r| datasynth_graph::StandardNodeInput {
13262 standard_id: r.standard_id.clone(),
13263 title: r.title.clone(),
13264 category: r.category.clone(),
13265 domain: r.domain.clone(),
13266 is_active: r.is_active,
13267 features: vec![if r.is_active { 1.0 } else { 0.0 }],
13268 applicable_account_types: r.applicable_account_types.clone(),
13269 applicable_processes: r.applicable_processes.clone(),
13270 })
13271 .collect();
13272 builder.add_standards(&standard_inputs);
13273
13274 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13276 jurisdiction_records
13277 .iter()
13278 .map(|r| datasynth_graph::JurisdictionNodeInput {
13279 country_code: r.country_code.clone(),
13280 country_name: r.country_name.clone(),
13281 framework: r.accounting_framework.clone(),
13282 standard_count: r.standard_count,
13283 tax_rate: r.statutory_tax_rate,
13284 })
13285 .collect();
13286 builder.add_jurisdictions(&jurisdiction_inputs);
13287
13288 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13290 cross_reference_records
13291 .iter()
13292 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13293 from_standard: r.from_standard.clone(),
13294 to_standard: r.to_standard.clone(),
13295 relationship: r.relationship.clone(),
13296 convergence_level: r.convergence_level,
13297 })
13298 .collect();
13299 builder.add_cross_references(&xref_inputs);
13300
13301 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13303 .iter()
13304 .map(|r| datasynth_graph::JurisdictionMappingInput {
13305 country_code: r.jurisdiction.clone(),
13306 standard_id: r.standard_id.clone(),
13307 })
13308 .collect();
13309 builder.add_jurisdiction_mappings(&mapping_inputs);
13310
13311 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13313 .iter()
13314 .map(|p| datasynth_graph::ProcedureNodeInput {
13315 procedure_id: p.procedure_id.clone(),
13316 standard_id: p.standard_id.clone(),
13317 procedure_type: p.procedure_type.clone(),
13318 sample_size: p.sample_size,
13319 confidence_level: p.confidence_level,
13320 })
13321 .collect();
13322 builder.add_procedures(&proc_inputs);
13323
13324 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13326 .iter()
13327 .map(|f| datasynth_graph::FindingNodeInput {
13328 finding_id: f.finding_id.to_string(),
13329 standard_id: f
13330 .related_standards
13331 .first()
13332 .map(|s| s.as_str().to_string())
13333 .unwrap_or_default(),
13334 severity: f.severity.to_string(),
13335 deficiency_level: f.deficiency_level.to_string(),
13336 severity_score: f.deficiency_level.severity_score(),
13337 control_id: f.control_id.clone(),
13338 affected_accounts: f.affected_accounts.clone(),
13339 })
13340 .collect();
13341 builder.add_findings(&finding_inputs);
13342
13343 if cr_config.graph.include_account_links {
13345 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13346 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13347 for std_record in &standard_records {
13348 if let Some(std_obj) =
13349 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13350 &std_record.standard_id,
13351 ))
13352 {
13353 for acct_type in &std_obj.applicable_account_types {
13354 account_links.push(datasynth_graph::AccountLinkInput {
13355 standard_id: std_record.standard_id.clone(),
13356 account_code: acct_type.clone(),
13357 account_name: acct_type.clone(),
13358 });
13359 }
13360 }
13361 }
13362 builder.add_account_links(&account_links);
13363 }
13364
13365 if cr_config.graph.include_control_links {
13367 let mut control_links = Vec::new();
13368 let sox_like_ids: Vec<String> = standard_records
13370 .iter()
13371 .filter(|r| {
13372 r.standard_id.starts_with("SOX")
13373 || r.standard_id.starts_with("PCAOB-AS-2201")
13374 })
13375 .map(|r| r.standard_id.clone())
13376 .collect();
13377 let control_ids = [
13379 ("C001", "Cash Controls"),
13380 ("C002", "Large Transaction Approval"),
13381 ("C010", "PO Approval"),
13382 ("C011", "Three-Way Match"),
13383 ("C020", "Revenue Recognition"),
13384 ("C021", "Credit Check"),
13385 ("C030", "Manual JE Approval"),
13386 ("C031", "Period Close Review"),
13387 ("C032", "Account Reconciliation"),
13388 ("C040", "Payroll Processing"),
13389 ("C050", "Fixed Asset Capitalization"),
13390 ("C060", "Intercompany Elimination"),
13391 ];
13392 for sox_id in &sox_like_ids {
13393 for (ctrl_id, ctrl_name) in &control_ids {
13394 control_links.push(datasynth_graph::ControlLinkInput {
13395 standard_id: sox_id.clone(),
13396 control_id: ctrl_id.to_string(),
13397 control_name: ctrl_name.to_string(),
13398 });
13399 }
13400 }
13401 builder.add_control_links(&control_links);
13402 }
13403
13404 if cr_config.graph.include_company_links {
13406 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13407 .iter()
13408 .enumerate()
13409 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13410 filing_id: format!("F{:04}", i + 1),
13411 filing_type: f.filing_type.to_string(),
13412 company_code: f.company_code.clone(),
13413 jurisdiction: f.jurisdiction.clone(),
13414 status: format!("{:?}", f.status),
13415 })
13416 .collect();
13417 builder.add_filings(&filing_inputs);
13418 }
13419
13420 let graph = builder.build();
13421 info!(
13422 " Compliance graph: {} nodes, {} edges",
13423 graph.nodes.len(),
13424 graph.edges.len()
13425 );
13426 Some(graph)
13427 } else {
13428 None
13429 };
13430
13431 self.check_resources_with_log("post-compliance-regulations")?;
13432
13433 Ok(ComplianceRegulationsSnapshot {
13434 standard_records,
13435 cross_reference_records,
13436 jurisdiction_records,
13437 audit_procedures,
13438 findings,
13439 filings,
13440 compliance_graph,
13441 })
13442 }
13443
13444 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13446 use super::lineage::LineageGraphBuilder;
13447
13448 let mut builder = LineageGraphBuilder::new();
13449
13450 builder.add_config_section("config:global", "Global Config");
13452 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13453 builder.add_config_section("config:transactions", "Transaction Config");
13454
13455 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13457 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13458
13459 builder.configured_by("phase:coa", "config:chart_of_accounts");
13461 builder.configured_by("phase:je", "config:transactions");
13462
13463 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13465 builder.produced_by("output:je", "phase:je");
13466
13467 if self.phase_config.generate_master_data {
13469 builder.add_config_section("config:master_data", "Master Data Config");
13470 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13471 builder.configured_by("phase:master_data", "config:master_data");
13472 builder.input_to("phase:master_data", "phase:je");
13473 }
13474
13475 if self.phase_config.generate_document_flows {
13476 builder.add_config_section("config:document_flows", "Document Flow Config");
13477 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13478 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13479 builder.configured_by("phase:p2p", "config:document_flows");
13480 builder.configured_by("phase:o2c", "config:document_flows");
13481
13482 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13483 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13484 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13485 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13486 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13487
13488 builder.produced_by("output:po", "phase:p2p");
13489 builder.produced_by("output:gr", "phase:p2p");
13490 builder.produced_by("output:vi", "phase:p2p");
13491 builder.produced_by("output:so", "phase:o2c");
13492 builder.produced_by("output:ci", "phase:o2c");
13493 }
13494
13495 if self.phase_config.inject_anomalies {
13496 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13497 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13498 builder.configured_by("phase:anomaly", "config:fraud");
13499 builder.add_output_file(
13500 "output:labels",
13501 "Anomaly Labels",
13502 "labels/anomaly_labels.csv",
13503 );
13504 builder.produced_by("output:labels", "phase:anomaly");
13505 }
13506
13507 if self.phase_config.generate_audit {
13508 builder.add_config_section("config:audit", "Audit Config");
13509 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13510 builder.configured_by("phase:audit", "config:audit");
13511 }
13512
13513 if self.phase_config.generate_banking {
13514 builder.add_config_section("config:banking", "Banking Config");
13515 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13516 builder.configured_by("phase:banking", "config:banking");
13517 }
13518
13519 if self.config.llm.enabled {
13520 builder.add_config_section("config:llm", "LLM Enrichment Config");
13521 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13522 builder.configured_by("phase:llm_enrichment", "config:llm");
13523 }
13524
13525 if self.config.diffusion.enabled {
13526 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13527 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13528 builder.configured_by("phase:diffusion", "config:diffusion");
13529 }
13530
13531 if self.config.causal.enabled {
13532 builder.add_config_section("config:causal", "Causal Generation Config");
13533 builder.add_generator_phase("phase:causal", "Causal Overlay");
13534 builder.configured_by("phase:causal", "config:causal");
13535 }
13536
13537 builder.build()
13538 }
13539
13540 fn compute_company_revenue(
13549 entries: &[JournalEntry],
13550 company_code: &str,
13551 ) -> rust_decimal::Decimal {
13552 use rust_decimal::Decimal;
13553 let mut revenue = Decimal::ZERO;
13554 for je in entries {
13555 if je.header.company_code != company_code {
13556 continue;
13557 }
13558 for line in &je.lines {
13559 if line.gl_account.starts_with('4') {
13560 revenue += line.credit_amount - line.debit_amount;
13562 }
13563 }
13564 }
13565 revenue.max(Decimal::ZERO)
13566 }
13567
13568 fn compute_entity_net_assets(
13572 entries: &[JournalEntry],
13573 entity_code: &str,
13574 ) -> rust_decimal::Decimal {
13575 use rust_decimal::Decimal;
13576 let mut asset_net = Decimal::ZERO;
13577 let mut liability_net = Decimal::ZERO;
13578 for je in entries {
13579 if je.header.company_code != entity_code {
13580 continue;
13581 }
13582 for line in &je.lines {
13583 if line.gl_account.starts_with('1') {
13584 asset_net += line.debit_amount - line.credit_amount;
13585 } else if line.gl_account.starts_with('2') {
13586 liability_net += line.credit_amount - line.debit_amount;
13587 }
13588 }
13589 }
13590 asset_net - liability_net
13591 }
13592}
13593
13594fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13596 match format {
13597 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13598 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13599 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13600 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13601 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13602 }
13603}
13604
13605fn compute_trial_balance_entries(
13610 entries: &[JournalEntry],
13611 entity_code: &str,
13612 fiscal_year: i32,
13613 coa: Option<&ChartOfAccounts>,
13614) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13615 use std::collections::BTreeMap;
13616
13617 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13618 BTreeMap::new();
13619
13620 for je in entries {
13621 for line in &je.lines {
13622 let entry = balances.entry(line.account_code.clone()).or_default();
13623 entry.0 += line.debit_amount;
13624 entry.1 += line.credit_amount;
13625 }
13626 }
13627
13628 balances
13629 .into_iter()
13630 .map(
13631 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13632 account_description: coa
13633 .and_then(|c| c.get_account(&account_code))
13634 .map(|a| a.description().to_string())
13635 .unwrap_or_else(|| account_code.clone()),
13636 account_code,
13637 debit_balance: debit,
13638 credit_balance: credit,
13639 net_balance: debit - credit,
13640 entity_code: entity_code.to_string(),
13641 period: format!("FY{}", fiscal_year),
13642 },
13643 )
13644 .collect()
13645}
13646
13647#[cfg(test)]
13648#[allow(clippy::unwrap_used)]
13649mod tests {
13650 use super::*;
13651 use datasynth_config::schema::*;
13652
13653 fn create_test_config() -> GeneratorConfig {
13654 GeneratorConfig {
13655 global: GlobalConfig {
13656 industry: IndustrySector::Manufacturing,
13657 start_date: "2024-01-01".to_string(),
13658 period_months: 1,
13659 seed: Some(42),
13660 parallel: false,
13661 group_currency: "USD".to_string(),
13662 presentation_currency: None,
13663 worker_threads: 0,
13664 memory_limit_mb: 0,
13665 fiscal_year_months: None,
13666 },
13667 companies: vec![CompanyConfig {
13668 code: "1000".to_string(),
13669 name: "Test Company".to_string(),
13670 currency: "USD".to_string(),
13671 functional_currency: None,
13672 country: "US".to_string(),
13673 annual_transaction_volume: TransactionVolume::TenK,
13674 volume_weight: 1.0,
13675 fiscal_year_variant: "K4".to_string(),
13676 }],
13677 chart_of_accounts: ChartOfAccountsConfig {
13678 complexity: CoAComplexity::Small,
13679 industry_specific: true,
13680 custom_accounts: None,
13681 min_hierarchy_depth: 2,
13682 max_hierarchy_depth: 4,
13683 },
13684 transactions: TransactionConfig::default(),
13685 output: OutputConfig::default(),
13686 fraud: FraudConfig::default(),
13687 internal_controls: InternalControlsConfig::default(),
13688 business_processes: BusinessProcessConfig::default(),
13689 user_personas: UserPersonaConfig::default(),
13690 templates: TemplateConfig::default(),
13691 approval: ApprovalConfig::default(),
13692 departments: DepartmentConfig::default(),
13693 master_data: MasterDataConfig::default(),
13694 document_flows: DocumentFlowConfig::default(),
13695 intercompany: IntercompanyConfig::default(),
13696 balance: BalanceConfig::default(),
13697 ocpm: OcpmConfig::default(),
13698 audit: AuditGenerationConfig::default(),
13699 banking: datasynth_banking::BankingConfig::default(),
13700 data_quality: DataQualitySchemaConfig::default(),
13701 scenario: ScenarioConfig::default(),
13702 temporal: TemporalDriftConfig::default(),
13703 graph_export: GraphExportConfig::default(),
13704 streaming: StreamingSchemaConfig::default(),
13705 rate_limit: RateLimitSchemaConfig::default(),
13706 temporal_attributes: TemporalAttributeSchemaConfig::default(),
13707 relationships: RelationshipSchemaConfig::default(),
13708 accounting_standards: AccountingStandardsConfig::default(),
13709 audit_standards: AuditStandardsConfig::default(),
13710 distributions: Default::default(),
13711 temporal_patterns: Default::default(),
13712 vendor_network: VendorNetworkSchemaConfig::default(),
13713 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13714 relationship_strength: RelationshipStrengthSchemaConfig::default(),
13715 cross_process_links: CrossProcessLinksSchemaConfig::default(),
13716 organizational_events: OrganizationalEventsSchemaConfig::default(),
13717 behavioral_drift: BehavioralDriftSchemaConfig::default(),
13718 market_drift: MarketDriftSchemaConfig::default(),
13719 drift_labeling: DriftLabelingSchemaConfig::default(),
13720 anomaly_injection: Default::default(),
13721 industry_specific: Default::default(),
13722 fingerprint_privacy: Default::default(),
13723 quality_gates: Default::default(),
13724 compliance: Default::default(),
13725 webhooks: Default::default(),
13726 llm: Default::default(),
13727 diffusion: Default::default(),
13728 causal: Default::default(),
13729 source_to_pay: Default::default(),
13730 financial_reporting: Default::default(),
13731 hr: Default::default(),
13732 manufacturing: Default::default(),
13733 sales_quotes: Default::default(),
13734 tax: Default::default(),
13735 treasury: Default::default(),
13736 project_accounting: Default::default(),
13737 esg: Default::default(),
13738 country_packs: None,
13739 scenarios: Default::default(),
13740 session: Default::default(),
13741 compliance_regulations: Default::default(),
13742 }
13743 }
13744
13745 #[test]
13746 fn test_enhanced_orchestrator_creation() {
13747 let config = create_test_config();
13748 let orchestrator = EnhancedOrchestrator::with_defaults(config);
13749 assert!(orchestrator.is_ok());
13750 }
13751
13752 #[test]
13753 fn test_minimal_generation() {
13754 let config = create_test_config();
13755 let phase_config = PhaseConfig {
13756 generate_master_data: false,
13757 generate_document_flows: false,
13758 generate_journal_entries: true,
13759 inject_anomalies: false,
13760 show_progress: false,
13761 ..Default::default()
13762 };
13763
13764 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13765 let result = orchestrator.generate();
13766
13767 assert!(result.is_ok());
13768 let result = result.unwrap();
13769 assert!(!result.journal_entries.is_empty());
13770 }
13771
13772 #[test]
13773 fn test_master_data_generation() {
13774 let config = create_test_config();
13775 let phase_config = PhaseConfig {
13776 generate_master_data: true,
13777 generate_document_flows: false,
13778 generate_journal_entries: false,
13779 inject_anomalies: false,
13780 show_progress: false,
13781 vendors_per_company: 5,
13782 customers_per_company: 5,
13783 materials_per_company: 10,
13784 assets_per_company: 5,
13785 employees_per_company: 10,
13786 ..Default::default()
13787 };
13788
13789 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13790 let result = orchestrator.generate().unwrap();
13791
13792 assert!(!result.master_data.vendors.is_empty());
13793 assert!(!result.master_data.customers.is_empty());
13794 assert!(!result.master_data.materials.is_empty());
13795 }
13796
13797 #[test]
13798 fn test_document_flow_generation() {
13799 let config = create_test_config();
13800 let phase_config = PhaseConfig {
13801 generate_master_data: true,
13802 generate_document_flows: true,
13803 generate_journal_entries: false,
13804 inject_anomalies: false,
13805 inject_data_quality: false,
13806 validate_balances: false,
13807 generate_ocpm_events: false,
13808 show_progress: false,
13809 vendors_per_company: 5,
13810 customers_per_company: 5,
13811 materials_per_company: 10,
13812 assets_per_company: 5,
13813 employees_per_company: 10,
13814 p2p_chains: 5,
13815 o2c_chains: 5,
13816 ..Default::default()
13817 };
13818
13819 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13820 let result = orchestrator.generate().unwrap();
13821
13822 assert!(!result.document_flows.p2p_chains.is_empty());
13824 assert!(!result.document_flows.o2c_chains.is_empty());
13825
13826 assert!(!result.document_flows.purchase_orders.is_empty());
13828 assert!(!result.document_flows.sales_orders.is_empty());
13829 }
13830
13831 #[test]
13832 fn test_anomaly_injection() {
13833 let config = create_test_config();
13834 let phase_config = PhaseConfig {
13835 generate_master_data: false,
13836 generate_document_flows: false,
13837 generate_journal_entries: true,
13838 inject_anomalies: true,
13839 show_progress: false,
13840 ..Default::default()
13841 };
13842
13843 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13844 let result = orchestrator.generate().unwrap();
13845
13846 assert!(!result.journal_entries.is_empty());
13848
13849 assert!(result.anomaly_labels.summary.is_some());
13852 }
13853
13854 #[test]
13855 fn test_full_generation_pipeline() {
13856 let config = create_test_config();
13857 let phase_config = PhaseConfig {
13858 generate_master_data: true,
13859 generate_document_flows: true,
13860 generate_journal_entries: true,
13861 inject_anomalies: false,
13862 inject_data_quality: false,
13863 validate_balances: true,
13864 generate_ocpm_events: false,
13865 show_progress: false,
13866 vendors_per_company: 3,
13867 customers_per_company: 3,
13868 materials_per_company: 5,
13869 assets_per_company: 3,
13870 employees_per_company: 5,
13871 p2p_chains: 3,
13872 o2c_chains: 3,
13873 ..Default::default()
13874 };
13875
13876 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13877 let result = orchestrator.generate().unwrap();
13878
13879 assert!(!result.master_data.vendors.is_empty());
13881 assert!(!result.master_data.customers.is_empty());
13882 assert!(!result.document_flows.p2p_chains.is_empty());
13883 assert!(!result.document_flows.o2c_chains.is_empty());
13884 assert!(!result.journal_entries.is_empty());
13885 assert!(result.statistics.accounts_count > 0);
13886
13887 assert!(!result.subledger.ap_invoices.is_empty());
13889 assert!(!result.subledger.ar_invoices.is_empty());
13890
13891 assert!(result.balance_validation.validated);
13893 assert!(result.balance_validation.entries_processed > 0);
13894 }
13895
13896 #[test]
13897 fn test_subledger_linking() {
13898 let config = create_test_config();
13899 let phase_config = PhaseConfig {
13900 generate_master_data: true,
13901 generate_document_flows: true,
13902 generate_journal_entries: false,
13903 inject_anomalies: false,
13904 inject_data_quality: false,
13905 validate_balances: false,
13906 generate_ocpm_events: false,
13907 show_progress: false,
13908 vendors_per_company: 5,
13909 customers_per_company: 5,
13910 materials_per_company: 10,
13911 assets_per_company: 3,
13912 employees_per_company: 5,
13913 p2p_chains: 5,
13914 o2c_chains: 5,
13915 ..Default::default()
13916 };
13917
13918 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13919 let result = orchestrator.generate().unwrap();
13920
13921 assert!(!result.document_flows.vendor_invoices.is_empty());
13923 assert!(!result.document_flows.customer_invoices.is_empty());
13924
13925 assert!(!result.subledger.ap_invoices.is_empty());
13927 assert!(!result.subledger.ar_invoices.is_empty());
13928
13929 assert_eq!(
13931 result.subledger.ap_invoices.len(),
13932 result.document_flows.vendor_invoices.len()
13933 );
13934
13935 assert_eq!(
13937 result.subledger.ar_invoices.len(),
13938 result.document_flows.customer_invoices.len()
13939 );
13940
13941 assert_eq!(
13943 result.statistics.ap_invoice_count,
13944 result.subledger.ap_invoices.len()
13945 );
13946 assert_eq!(
13947 result.statistics.ar_invoice_count,
13948 result.subledger.ar_invoices.len()
13949 );
13950 }
13951
13952 #[test]
13953 fn test_balance_validation() {
13954 let config = create_test_config();
13955 let phase_config = PhaseConfig {
13956 generate_master_data: false,
13957 generate_document_flows: false,
13958 generate_journal_entries: true,
13959 inject_anomalies: false,
13960 validate_balances: true,
13961 show_progress: false,
13962 ..Default::default()
13963 };
13964
13965 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13966 let result = orchestrator.generate().unwrap();
13967
13968 assert!(result.balance_validation.validated);
13970 assert!(result.balance_validation.entries_processed > 0);
13971
13972 assert!(!result.balance_validation.has_unbalanced_entries);
13974
13975 assert_eq!(
13977 result.balance_validation.total_debits,
13978 result.balance_validation.total_credits
13979 );
13980 }
13981
13982 #[test]
13983 fn test_statistics_accuracy() {
13984 let config = create_test_config();
13985 let phase_config = PhaseConfig {
13986 generate_master_data: true,
13987 generate_document_flows: false,
13988 generate_journal_entries: true,
13989 inject_anomalies: false,
13990 show_progress: false,
13991 vendors_per_company: 10,
13992 customers_per_company: 20,
13993 materials_per_company: 15,
13994 assets_per_company: 5,
13995 employees_per_company: 8,
13996 ..Default::default()
13997 };
13998
13999 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14000 let result = orchestrator.generate().unwrap();
14001
14002 assert_eq!(
14004 result.statistics.vendor_count,
14005 result.master_data.vendors.len()
14006 );
14007 assert_eq!(
14008 result.statistics.customer_count,
14009 result.master_data.customers.len()
14010 );
14011 assert_eq!(
14012 result.statistics.material_count,
14013 result.master_data.materials.len()
14014 );
14015 assert_eq!(
14016 result.statistics.total_entries as usize,
14017 result.journal_entries.len()
14018 );
14019 }
14020
14021 #[test]
14022 fn test_phase_config_defaults() {
14023 let config = PhaseConfig::default();
14024 assert!(config.generate_master_data);
14025 assert!(config.generate_document_flows);
14026 assert!(config.generate_journal_entries);
14027 assert!(!config.inject_anomalies);
14028 assert!(config.validate_balances);
14029 assert!(config.show_progress);
14030 assert!(config.vendors_per_company > 0);
14031 assert!(config.customers_per_company > 0);
14032 }
14033
14034 #[test]
14035 fn test_get_coa_before_generation() {
14036 let config = create_test_config();
14037 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14038
14039 assert!(orchestrator.get_coa().is_none());
14041 }
14042
14043 #[test]
14044 fn test_get_coa_after_generation() {
14045 let config = create_test_config();
14046 let phase_config = PhaseConfig {
14047 generate_master_data: false,
14048 generate_document_flows: false,
14049 generate_journal_entries: true,
14050 inject_anomalies: false,
14051 show_progress: false,
14052 ..Default::default()
14053 };
14054
14055 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14056 let _ = orchestrator.generate().unwrap();
14057
14058 assert!(orchestrator.get_coa().is_some());
14060 }
14061
14062 #[test]
14063 fn test_get_master_data() {
14064 let config = create_test_config();
14065 let phase_config = PhaseConfig {
14066 generate_master_data: true,
14067 generate_document_flows: false,
14068 generate_journal_entries: false,
14069 inject_anomalies: false,
14070 show_progress: false,
14071 vendors_per_company: 5,
14072 customers_per_company: 5,
14073 materials_per_company: 5,
14074 assets_per_company: 5,
14075 employees_per_company: 5,
14076 ..Default::default()
14077 };
14078
14079 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14080 let result = orchestrator.generate().unwrap();
14081
14082 assert!(!result.master_data.vendors.is_empty());
14084 }
14085
14086 #[test]
14087 fn test_with_progress_builder() {
14088 let config = create_test_config();
14089 let orchestrator = EnhancedOrchestrator::with_defaults(config)
14090 .unwrap()
14091 .with_progress(false);
14092
14093 assert!(!orchestrator.phase_config.show_progress);
14095 }
14096
14097 #[test]
14098 fn test_multi_company_generation() {
14099 let mut config = create_test_config();
14100 config.companies.push(CompanyConfig {
14101 code: "2000".to_string(),
14102 name: "Subsidiary".to_string(),
14103 currency: "EUR".to_string(),
14104 functional_currency: None,
14105 country: "DE".to_string(),
14106 annual_transaction_volume: TransactionVolume::TenK,
14107 volume_weight: 0.5,
14108 fiscal_year_variant: "K4".to_string(),
14109 });
14110
14111 let phase_config = PhaseConfig {
14112 generate_master_data: true,
14113 generate_document_flows: false,
14114 generate_journal_entries: true,
14115 inject_anomalies: false,
14116 show_progress: false,
14117 vendors_per_company: 5,
14118 customers_per_company: 5,
14119 materials_per_company: 5,
14120 assets_per_company: 5,
14121 employees_per_company: 5,
14122 ..Default::default()
14123 };
14124
14125 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14126 let result = orchestrator.generate().unwrap();
14127
14128 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
14131 assert!(result.statistics.companies_count == 2);
14132 }
14133
14134 #[test]
14135 fn test_empty_master_data_skips_document_flows() {
14136 let config = create_test_config();
14137 let phase_config = PhaseConfig {
14138 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
14141 inject_anomalies: false,
14142 show_progress: false,
14143 ..Default::default()
14144 };
14145
14146 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14147 let result = orchestrator.generate().unwrap();
14148
14149 assert!(result.document_flows.p2p_chains.is_empty());
14151 assert!(result.document_flows.o2c_chains.is_empty());
14152 }
14153
14154 #[test]
14155 fn test_journal_entry_line_item_count() {
14156 let config = create_test_config();
14157 let phase_config = PhaseConfig {
14158 generate_master_data: false,
14159 generate_document_flows: false,
14160 generate_journal_entries: true,
14161 inject_anomalies: false,
14162 show_progress: false,
14163 ..Default::default()
14164 };
14165
14166 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14167 let result = orchestrator.generate().unwrap();
14168
14169 let calculated_line_items: u64 = result
14171 .journal_entries
14172 .iter()
14173 .map(|e| e.line_count() as u64)
14174 .sum();
14175 assert_eq!(result.statistics.total_line_items, calculated_line_items);
14176 }
14177
14178 #[test]
14179 fn test_audit_generation() {
14180 let config = create_test_config();
14181 let phase_config = PhaseConfig {
14182 generate_master_data: false,
14183 generate_document_flows: false,
14184 generate_journal_entries: true,
14185 inject_anomalies: false,
14186 show_progress: false,
14187 generate_audit: true,
14188 audit_engagements: 2,
14189 workpapers_per_engagement: 5,
14190 evidence_per_workpaper: 2,
14191 risks_per_engagement: 3,
14192 findings_per_engagement: 2,
14193 judgments_per_engagement: 2,
14194 ..Default::default()
14195 };
14196
14197 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14198 let result = orchestrator.generate().unwrap();
14199
14200 assert_eq!(result.audit.engagements.len(), 2);
14202 assert!(!result.audit.workpapers.is_empty());
14203 assert!(!result.audit.evidence.is_empty());
14204 assert!(!result.audit.risk_assessments.is_empty());
14205 assert!(!result.audit.findings.is_empty());
14206 assert!(!result.audit.judgments.is_empty());
14207
14208 assert!(
14210 !result.audit.confirmations.is_empty(),
14211 "ISA 505 confirmations should be generated"
14212 );
14213 assert!(
14214 !result.audit.confirmation_responses.is_empty(),
14215 "ISA 505 confirmation responses should be generated"
14216 );
14217 assert!(
14218 !result.audit.procedure_steps.is_empty(),
14219 "ISA 330 procedure steps should be generated"
14220 );
14221 assert!(
14223 !result.audit.analytical_results.is_empty(),
14224 "ISA 520 analytical procedures should be generated"
14225 );
14226 assert!(
14227 !result.audit.ia_functions.is_empty(),
14228 "ISA 610 IA functions should be generated (one per engagement)"
14229 );
14230 assert!(
14231 !result.audit.related_parties.is_empty(),
14232 "ISA 550 related parties should be generated"
14233 );
14234
14235 assert_eq!(
14237 result.statistics.audit_engagement_count,
14238 result.audit.engagements.len()
14239 );
14240 assert_eq!(
14241 result.statistics.audit_workpaper_count,
14242 result.audit.workpapers.len()
14243 );
14244 assert_eq!(
14245 result.statistics.audit_evidence_count,
14246 result.audit.evidence.len()
14247 );
14248 assert_eq!(
14249 result.statistics.audit_risk_count,
14250 result.audit.risk_assessments.len()
14251 );
14252 assert_eq!(
14253 result.statistics.audit_finding_count,
14254 result.audit.findings.len()
14255 );
14256 assert_eq!(
14257 result.statistics.audit_judgment_count,
14258 result.audit.judgments.len()
14259 );
14260 assert_eq!(
14261 result.statistics.audit_confirmation_count,
14262 result.audit.confirmations.len()
14263 );
14264 assert_eq!(
14265 result.statistics.audit_confirmation_response_count,
14266 result.audit.confirmation_responses.len()
14267 );
14268 assert_eq!(
14269 result.statistics.audit_procedure_step_count,
14270 result.audit.procedure_steps.len()
14271 );
14272 assert_eq!(
14273 result.statistics.audit_sample_count,
14274 result.audit.samples.len()
14275 );
14276 assert_eq!(
14277 result.statistics.audit_analytical_result_count,
14278 result.audit.analytical_results.len()
14279 );
14280 assert_eq!(
14281 result.statistics.audit_ia_function_count,
14282 result.audit.ia_functions.len()
14283 );
14284 assert_eq!(
14285 result.statistics.audit_ia_report_count,
14286 result.audit.ia_reports.len()
14287 );
14288 assert_eq!(
14289 result.statistics.audit_related_party_count,
14290 result.audit.related_parties.len()
14291 );
14292 assert_eq!(
14293 result.statistics.audit_related_party_transaction_count,
14294 result.audit.related_party_transactions.len()
14295 );
14296 }
14297
14298 #[test]
14299 fn test_new_phases_disabled_by_default() {
14300 let config = create_test_config();
14301 assert!(!config.llm.enabled);
14303 assert!(!config.diffusion.enabled);
14304 assert!(!config.causal.enabled);
14305
14306 let phase_config = PhaseConfig {
14307 generate_master_data: false,
14308 generate_document_flows: false,
14309 generate_journal_entries: true,
14310 inject_anomalies: false,
14311 show_progress: false,
14312 ..Default::default()
14313 };
14314
14315 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14316 let result = orchestrator.generate().unwrap();
14317
14318 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14320 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14321 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14322 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14323 assert_eq!(result.statistics.causal_generation_ms, 0);
14324 assert_eq!(result.statistics.causal_samples_generated, 0);
14325 assert!(result.statistics.causal_validation_passed.is_none());
14326 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14327 assert!(result.counterfactual_pairs.is_empty());
14328 }
14329
14330 #[test]
14331 fn test_counterfactual_generation_enabled() {
14332 let config = create_test_config();
14333 let phase_config = PhaseConfig {
14334 generate_master_data: false,
14335 generate_document_flows: false,
14336 generate_journal_entries: true,
14337 inject_anomalies: false,
14338 show_progress: false,
14339 generate_counterfactuals: true,
14340 generate_period_close: false, ..Default::default()
14342 };
14343
14344 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14345 let result = orchestrator.generate().unwrap();
14346
14347 if !result.journal_entries.is_empty() {
14349 assert_eq!(
14350 result.counterfactual_pairs.len(),
14351 result.journal_entries.len()
14352 );
14353 assert_eq!(
14354 result.statistics.counterfactual_pair_count,
14355 result.journal_entries.len()
14356 );
14357 let ids: std::collections::HashSet<_> = result
14359 .counterfactual_pairs
14360 .iter()
14361 .map(|p| p.pair_id.clone())
14362 .collect();
14363 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14364 }
14365 }
14366
14367 #[test]
14368 fn test_llm_enrichment_enabled() {
14369 let mut config = create_test_config();
14370 config.llm.enabled = true;
14371 config.llm.max_vendor_enrichments = 3;
14372
14373 let phase_config = PhaseConfig {
14374 generate_master_data: true,
14375 generate_document_flows: false,
14376 generate_journal_entries: false,
14377 inject_anomalies: false,
14378 show_progress: false,
14379 vendors_per_company: 5,
14380 customers_per_company: 3,
14381 materials_per_company: 3,
14382 assets_per_company: 3,
14383 employees_per_company: 3,
14384 ..Default::default()
14385 };
14386
14387 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14388 let result = orchestrator.generate().unwrap();
14389
14390 assert!(result.statistics.llm_vendors_enriched > 0);
14392 assert!(result.statistics.llm_vendors_enriched <= 3);
14393 }
14394
14395 #[test]
14396 fn test_diffusion_enhancement_enabled() {
14397 let mut config = create_test_config();
14398 config.diffusion.enabled = true;
14399 config.diffusion.n_steps = 50;
14400 config.diffusion.sample_size = 20;
14401
14402 let phase_config = PhaseConfig {
14403 generate_master_data: false,
14404 generate_document_flows: false,
14405 generate_journal_entries: true,
14406 inject_anomalies: false,
14407 show_progress: false,
14408 ..Default::default()
14409 };
14410
14411 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14412 let result = orchestrator.generate().unwrap();
14413
14414 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14416 }
14417
14418 #[test]
14419 fn test_causal_overlay_enabled() {
14420 let mut config = create_test_config();
14421 config.causal.enabled = true;
14422 config.causal.template = "fraud_detection".to_string();
14423 config.causal.sample_size = 100;
14424 config.causal.validate = true;
14425
14426 let phase_config = PhaseConfig {
14427 generate_master_data: false,
14428 generate_document_flows: false,
14429 generate_journal_entries: true,
14430 inject_anomalies: false,
14431 show_progress: false,
14432 ..Default::default()
14433 };
14434
14435 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14436 let result = orchestrator.generate().unwrap();
14437
14438 assert_eq!(result.statistics.causal_samples_generated, 100);
14440 assert!(result.statistics.causal_validation_passed.is_some());
14442 }
14443
14444 #[test]
14445 fn test_causal_overlay_revenue_cycle_template() {
14446 let mut config = create_test_config();
14447 config.causal.enabled = true;
14448 config.causal.template = "revenue_cycle".to_string();
14449 config.causal.sample_size = 50;
14450 config.causal.validate = false;
14451
14452 let phase_config = PhaseConfig {
14453 generate_master_data: false,
14454 generate_document_flows: false,
14455 generate_journal_entries: true,
14456 inject_anomalies: false,
14457 show_progress: false,
14458 ..Default::default()
14459 };
14460
14461 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14462 let result = orchestrator.generate().unwrap();
14463
14464 assert_eq!(result.statistics.causal_samples_generated, 50);
14466 assert!(result.statistics.causal_validation_passed.is_none());
14468 }
14469
14470 #[test]
14471 fn test_all_new_phases_enabled_together() {
14472 let mut config = create_test_config();
14473 config.llm.enabled = true;
14474 config.llm.max_vendor_enrichments = 2;
14475 config.diffusion.enabled = true;
14476 config.diffusion.n_steps = 20;
14477 config.diffusion.sample_size = 10;
14478 config.causal.enabled = true;
14479 config.causal.sample_size = 50;
14480 config.causal.validate = true;
14481
14482 let phase_config = PhaseConfig {
14483 generate_master_data: true,
14484 generate_document_flows: false,
14485 generate_journal_entries: true,
14486 inject_anomalies: false,
14487 show_progress: false,
14488 vendors_per_company: 5,
14489 customers_per_company: 3,
14490 materials_per_company: 3,
14491 assets_per_company: 3,
14492 employees_per_company: 3,
14493 ..Default::default()
14494 };
14495
14496 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14497 let result = orchestrator.generate().unwrap();
14498
14499 assert!(result.statistics.llm_vendors_enriched > 0);
14501 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14502 assert_eq!(result.statistics.causal_samples_generated, 50);
14503 assert!(result.statistics.causal_validation_passed.is_some());
14504 }
14505
14506 #[test]
14507 fn test_statistics_serialization_with_new_fields() {
14508 let stats = EnhancedGenerationStatistics {
14509 total_entries: 100,
14510 total_line_items: 500,
14511 llm_enrichment_ms: 42,
14512 llm_vendors_enriched: 10,
14513 diffusion_enhancement_ms: 100,
14514 diffusion_samples_generated: 50,
14515 causal_generation_ms: 200,
14516 causal_samples_generated: 100,
14517 causal_validation_passed: Some(true),
14518 ..Default::default()
14519 };
14520
14521 let json = serde_json::to_string(&stats).unwrap();
14522 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14523
14524 assert_eq!(deserialized.llm_enrichment_ms, 42);
14525 assert_eq!(deserialized.llm_vendors_enriched, 10);
14526 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14527 assert_eq!(deserialized.diffusion_samples_generated, 50);
14528 assert_eq!(deserialized.causal_generation_ms, 200);
14529 assert_eq!(deserialized.causal_samples_generated, 100);
14530 assert_eq!(deserialized.causal_validation_passed, Some(true));
14531 }
14532
14533 #[test]
14534 fn test_statistics_backward_compat_deserialization() {
14535 let old_json = r#"{
14537 "total_entries": 100,
14538 "total_line_items": 500,
14539 "accounts_count": 50,
14540 "companies_count": 1,
14541 "period_months": 12,
14542 "vendor_count": 10,
14543 "customer_count": 20,
14544 "material_count": 15,
14545 "asset_count": 5,
14546 "employee_count": 8,
14547 "p2p_chain_count": 5,
14548 "o2c_chain_count": 5,
14549 "ap_invoice_count": 5,
14550 "ar_invoice_count": 5,
14551 "ocpm_event_count": 0,
14552 "ocpm_object_count": 0,
14553 "ocpm_case_count": 0,
14554 "audit_engagement_count": 0,
14555 "audit_workpaper_count": 0,
14556 "audit_evidence_count": 0,
14557 "audit_risk_count": 0,
14558 "audit_finding_count": 0,
14559 "audit_judgment_count": 0,
14560 "anomalies_injected": 0,
14561 "data_quality_issues": 0,
14562 "banking_customer_count": 0,
14563 "banking_account_count": 0,
14564 "banking_transaction_count": 0,
14565 "banking_suspicious_count": 0,
14566 "graph_export_count": 0,
14567 "graph_node_count": 0,
14568 "graph_edge_count": 0
14569 }"#;
14570
14571 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14572
14573 assert_eq!(stats.llm_enrichment_ms, 0);
14575 assert_eq!(stats.llm_vendors_enriched, 0);
14576 assert_eq!(stats.diffusion_enhancement_ms, 0);
14577 assert_eq!(stats.diffusion_samples_generated, 0);
14578 assert_eq!(stats.causal_generation_ms, 0);
14579 assert_eq!(stats.causal_samples_generated, 0);
14580 assert!(stats.causal_validation_passed.is_none());
14581 }
14582}