1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 MaterialGenerator,
117 O2CDocumentChain,
118 O2CGenerator,
119 O2CGeneratorConfig,
120 O2CPaymentBehavior,
121 P2PDocumentChain,
122 P2PGenerator,
124 P2PGeneratorConfig,
125 P2PPaymentBehavior,
126 PaymentReference,
127 ProvisionGenerator,
129 QualificationGenerator,
130 RfxGenerator,
131 RiskAssessmentGenerator,
132 RunningBalanceTracker,
134 ScorecardGenerator,
135 SegmentGenerator,
137 SegmentSeed,
138 SourcingProjectGenerator,
139 SpendAnalysisGenerator,
140 ValidationError,
141 VendorGenerator,
143 WorkpaperGenerator,
144};
145use datasynth_graph::{
146 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
147 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
148 TransactionGraphConfig,
149};
150use datasynth_ocpm::{
151 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
152 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
153 OcpmUuidFactory, P2pDocuments, S2cDocuments,
154};
155
156use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
157use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
158use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
159use datasynth_core::llm::MockLlmProvider;
160use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
161use datasynth_core::models::documents::PaymentMethod;
162use datasynth_core::models::IndustrySector;
163use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
164use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
165use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
166use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
167use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
168use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
169use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
170use datasynth_generators::audit::sample_generator::SampleGenerator;
171use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
172use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
173use datasynth_generators::coa_generator::CoAFramework;
174use datasynth_generators::llm_enrichment::VendorLlmEnricher;
175use rayon::prelude::*;
176
177fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
183 let payment_behavior = &schema_config.payment_behavior;
184 let late_dist = &payment_behavior.late_payment_days_distribution;
185
186 P2PGeneratorConfig {
187 three_way_match_rate: schema_config.three_way_match_rate,
188 partial_delivery_rate: schema_config.partial_delivery_rate,
189 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
190 price_variance_rate: schema_config.price_variance_rate,
191 max_price_variance_percent: schema_config.max_price_variance_percent,
192 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
193 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
194 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
195 payment_method_distribution: vec![
196 (PaymentMethod::BankTransfer, 0.60),
197 (PaymentMethod::Check, 0.25),
198 (PaymentMethod::Wire, 0.10),
199 (PaymentMethod::CreditCard, 0.05),
200 ],
201 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
202 payment_behavior: P2PPaymentBehavior {
203 late_payment_rate: payment_behavior.late_payment_rate,
204 late_payment_distribution: LatePaymentDistribution {
205 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
206 late_8_to_14: late_dist.late_8_to_14,
207 very_late_15_to_30: late_dist.very_late_15_to_30,
208 severely_late_31_to_60: late_dist.severely_late_31_to_60,
209 extremely_late_over_60: late_dist.extremely_late_over_60,
210 },
211 partial_payment_rate: payment_behavior.partial_payment_rate,
212 payment_correction_rate: payment_behavior.payment_correction_rate,
213 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
214 },
215 }
216}
217
218fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
220 let payment_behavior = &schema_config.payment_behavior;
221
222 O2CGeneratorConfig {
223 credit_check_failure_rate: schema_config.credit_check_failure_rate,
224 partial_shipment_rate: schema_config.partial_shipment_rate,
225 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
226 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
227 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
228 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
229 bad_debt_rate: schema_config.bad_debt_rate,
230 returns_rate: schema_config.return_rate,
231 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
232 payment_method_distribution: vec![
233 (PaymentMethod::BankTransfer, 0.50),
234 (PaymentMethod::Check, 0.30),
235 (PaymentMethod::Wire, 0.15),
236 (PaymentMethod::CreditCard, 0.05),
237 ],
238 payment_behavior: O2CPaymentBehavior {
239 partial_payment_rate: payment_behavior.partial_payments.rate,
240 short_payment_rate: payment_behavior.short_payments.rate,
241 max_short_percent: payment_behavior.short_payments.max_short_percent,
242 on_account_rate: payment_behavior.on_account_payments.rate,
243 payment_correction_rate: payment_behavior.payment_corrections.rate,
244 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
245 },
246 }
247}
248
249#[derive(Debug, Clone)]
251pub struct PhaseConfig {
252 pub generate_master_data: bool,
254 pub generate_document_flows: bool,
256 pub generate_ocpm_events: bool,
258 pub generate_journal_entries: bool,
260 pub inject_anomalies: bool,
262 pub inject_data_quality: bool,
264 pub validate_balances: bool,
266 pub show_progress: bool,
268 pub vendors_per_company: usize,
270 pub customers_per_company: usize,
272 pub materials_per_company: usize,
274 pub assets_per_company: usize,
276 pub employees_per_company: usize,
278 pub p2p_chains: usize,
280 pub o2c_chains: usize,
282 pub generate_audit: bool,
284 pub audit_engagements: usize,
286 pub workpapers_per_engagement: usize,
288 pub evidence_per_workpaper: usize,
290 pub risks_per_engagement: usize,
292 pub findings_per_engagement: usize,
294 pub judgments_per_engagement: usize,
296 pub generate_banking: bool,
298 pub generate_graph_export: bool,
300 pub generate_sourcing: bool,
302 pub generate_bank_reconciliation: bool,
304 pub generate_financial_statements: bool,
306 pub generate_accounting_standards: bool,
308 pub generate_manufacturing: bool,
310 pub generate_sales_kpi_budgets: bool,
312 pub generate_tax: bool,
314 pub generate_esg: bool,
316 pub generate_intercompany: bool,
318 pub generate_evolution_events: bool,
320 pub generate_counterfactuals: bool,
322 pub generate_compliance_regulations: bool,
324 pub generate_period_close: bool,
326 pub generate_hr: bool,
328 pub generate_treasury: bool,
330 pub generate_project_accounting: bool,
332}
333
334impl Default for PhaseConfig {
335 fn default() -> Self {
336 Self {
337 generate_master_data: true,
338 generate_document_flows: true,
339 generate_ocpm_events: false, generate_journal_entries: true,
341 inject_anomalies: false,
342 inject_data_quality: false, validate_balances: true,
344 show_progress: true,
345 vendors_per_company: 50,
346 customers_per_company: 100,
347 materials_per_company: 200,
348 assets_per_company: 50,
349 employees_per_company: 100,
350 p2p_chains: 100,
351 o2c_chains: 100,
352 generate_audit: false, audit_engagements: 5,
354 workpapers_per_engagement: 20,
355 evidence_per_workpaper: 5,
356 risks_per_engagement: 15,
357 findings_per_engagement: 8,
358 judgments_per_engagement: 10,
359 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
378 }
379}
380
381impl PhaseConfig {
382 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
387 Self {
388 generate_master_data: true,
390 generate_document_flows: true,
391 generate_journal_entries: true,
392 validate_balances: true,
393 generate_period_close: true,
394 generate_evolution_events: true,
395 show_progress: true,
396
397 generate_audit: cfg.audit.enabled,
399 generate_banking: cfg.banking.enabled,
400 generate_graph_export: cfg.graph_export.enabled,
401 generate_sourcing: cfg.source_to_pay.enabled,
402 generate_intercompany: cfg.intercompany.enabled,
403 generate_financial_statements: cfg.financial_reporting.enabled,
404 generate_bank_reconciliation: cfg.financial_reporting.enabled,
405 generate_accounting_standards: cfg.accounting_standards.enabled,
406 generate_manufacturing: cfg.manufacturing.enabled,
407 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
408 generate_tax: cfg.tax.enabled,
409 generate_esg: cfg.esg.enabled,
410 generate_ocpm_events: cfg.ocpm.enabled,
411 generate_compliance_regulations: cfg.compliance_regulations.enabled,
412 generate_hr: cfg.hr.enabled,
413 generate_treasury: cfg.treasury.enabled,
414 generate_project_accounting: cfg.project_accounting.enabled,
415
416 generate_counterfactuals: false,
418
419 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
420 inject_data_quality: cfg.data_quality.enabled,
421
422 vendors_per_company: 50,
424 customers_per_company: 100,
425 materials_per_company: 200,
426 assets_per_company: 50,
427 employees_per_company: 100,
428 p2p_chains: 100,
429 o2c_chains: 100,
430 audit_engagements: 5,
431 workpapers_per_engagement: 20,
432 evidence_per_workpaper: 5,
433 risks_per_engagement: 15,
434 findings_per_engagement: 8,
435 judgments_per_engagement: 10,
436 }
437 }
438}
439
440#[derive(Debug, Clone, Default)]
442pub struct MasterDataSnapshot {
443 pub vendors: Vec<Vendor>,
445 pub customers: Vec<Customer>,
447 pub materials: Vec<Material>,
449 pub assets: Vec<FixedAsset>,
451 pub employees: Vec<Employee>,
453}
454
455#[derive(Debug, Clone)]
457pub struct HypergraphExportInfo {
458 pub node_count: usize,
460 pub edge_count: usize,
462 pub hyperedge_count: usize,
464 pub output_path: PathBuf,
466}
467
468#[derive(Debug, Clone, Default)]
470pub struct DocumentFlowSnapshot {
471 pub p2p_chains: Vec<P2PDocumentChain>,
473 pub o2c_chains: Vec<O2CDocumentChain>,
475 pub purchase_orders: Vec<documents::PurchaseOrder>,
477 pub goods_receipts: Vec<documents::GoodsReceipt>,
479 pub vendor_invoices: Vec<documents::VendorInvoice>,
481 pub sales_orders: Vec<documents::SalesOrder>,
483 pub deliveries: Vec<documents::Delivery>,
485 pub customer_invoices: Vec<documents::CustomerInvoice>,
487 pub payments: Vec<documents::Payment>,
489}
490
491#[derive(Debug, Clone, Default)]
493pub struct SubledgerSnapshot {
494 pub ap_invoices: Vec<APInvoice>,
496 pub ar_invoices: Vec<ARInvoice>,
498 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
500 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
502 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
504 pub ar_aging_reports: Vec<ARAgingReport>,
506 pub ap_aging_reports: Vec<APAgingReport>,
508 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
510 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
512}
513
514#[derive(Debug, Clone, Default)]
516pub struct OcpmSnapshot {
517 pub event_log: Option<OcpmEventLog>,
519 pub event_count: usize,
521 pub object_count: usize,
523 pub case_count: usize,
525}
526
527#[derive(Debug, Clone, Default)]
529pub struct AuditSnapshot {
530 pub engagements: Vec<AuditEngagement>,
532 pub workpapers: Vec<Workpaper>,
534 pub evidence: Vec<AuditEvidence>,
536 pub risk_assessments: Vec<RiskAssessment>,
538 pub findings: Vec<AuditFinding>,
540 pub judgments: Vec<ProfessionalJudgment>,
542 pub confirmations: Vec<ExternalConfirmation>,
544 pub confirmation_responses: Vec<ConfirmationResponse>,
546 pub procedure_steps: Vec<AuditProcedureStep>,
548 pub samples: Vec<AuditSample>,
550 pub analytical_results: Vec<AnalyticalProcedureResult>,
552 pub ia_functions: Vec<InternalAuditFunction>,
554 pub ia_reports: Vec<InternalAuditReport>,
556 pub related_parties: Vec<RelatedParty>,
558 pub related_party_transactions: Vec<RelatedPartyTransaction>,
560 pub component_auditors: Vec<ComponentAuditor>,
563 pub group_audit_plan: Option<GroupAuditPlan>,
565 pub component_instructions: Vec<ComponentInstruction>,
567 pub component_reports: Vec<ComponentAuditorReport>,
569 pub engagement_letters: Vec<EngagementLetter>,
572 pub subsequent_events: Vec<SubsequentEvent>,
575 pub service_organizations: Vec<ServiceOrganization>,
578 pub soc_reports: Vec<SocReport>,
580 pub user_entity_controls: Vec<UserEntityControl>,
582 pub going_concern_assessments:
585 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
586 pub accounting_estimates:
589 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
590 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
593 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
595 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
598 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
600 pub materiality_calculations:
603 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
604 pub combined_risk_assessments:
607 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
608 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
611 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
613 pub significant_transaction_classes:
616 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
617 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
620 pub analytical_relationships:
623 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
624}
625
626#[derive(Debug, Clone, Default)]
628pub struct BankingSnapshot {
629 pub customers: Vec<BankingCustomer>,
631 pub accounts: Vec<BankAccount>,
633 pub transactions: Vec<BankTransaction>,
635 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
637 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
639 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
641 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
643 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
645 pub suspicious_count: usize,
647 pub scenario_count: usize,
649}
650
651#[derive(Debug, Clone, Default, Serialize)]
653pub struct GraphExportSnapshot {
654 pub exported: bool,
656 pub graph_count: usize,
658 pub exports: HashMap<String, GraphExportInfo>,
660}
661
662#[derive(Debug, Clone, Serialize)]
664pub struct GraphExportInfo {
665 pub name: String,
667 pub format: String,
669 pub output_path: PathBuf,
671 pub node_count: usize,
673 pub edge_count: usize,
675}
676
677#[derive(Debug, Clone, Default)]
679pub struct SourcingSnapshot {
680 pub spend_analyses: Vec<SpendAnalysis>,
682 pub sourcing_projects: Vec<SourcingProject>,
684 pub qualifications: Vec<SupplierQualification>,
686 pub rfx_events: Vec<RfxEvent>,
688 pub bids: Vec<SupplierBid>,
690 pub bid_evaluations: Vec<BidEvaluation>,
692 pub contracts: Vec<ProcurementContract>,
694 pub catalog_items: Vec<CatalogItem>,
696 pub scorecards: Vec<SupplierScorecard>,
698}
699
700#[derive(Debug, Clone, Serialize, Deserialize)]
702pub struct PeriodTrialBalance {
703 pub fiscal_year: u16,
705 pub fiscal_period: u8,
707 pub period_start: NaiveDate,
709 pub period_end: NaiveDate,
711 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
713}
714
715#[derive(Debug, Clone, Default)]
717pub struct FinancialReportingSnapshot {
718 pub financial_statements: Vec<FinancialStatement>,
721 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
724 pub consolidated_statements: Vec<FinancialStatement>,
726 pub consolidation_schedules: Vec<ConsolidationSchedule>,
728 pub bank_reconciliations: Vec<BankReconciliation>,
730 pub trial_balances: Vec<PeriodTrialBalance>,
732 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
734 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
736 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
738}
739
740#[derive(Debug, Clone, Default)]
742pub struct HrSnapshot {
743 pub payroll_runs: Vec<PayrollRun>,
745 pub payroll_line_items: Vec<PayrollLineItem>,
747 pub time_entries: Vec<TimeEntry>,
749 pub expense_reports: Vec<ExpenseReport>,
751 pub benefit_enrollments: Vec<BenefitEnrollment>,
753 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
755 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
757 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
759 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
761 pub pension_journal_entries: Vec<JournalEntry>,
763 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
765 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
767 pub stock_comp_journal_entries: Vec<JournalEntry>,
769 pub payroll_run_count: usize,
771 pub payroll_line_item_count: usize,
773 pub time_entry_count: usize,
775 pub expense_report_count: usize,
777 pub benefit_enrollment_count: usize,
779 pub pension_plan_count: usize,
781 pub stock_grant_count: usize,
783}
784
785#[derive(Debug, Clone, Default)]
787pub struct AccountingStandardsSnapshot {
788 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
790 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
792 pub business_combinations:
794 Vec<datasynth_core::models::business_combination::BusinessCombination>,
795 pub business_combination_journal_entries: Vec<JournalEntry>,
797 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
799 pub ecl_provision_movements:
801 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
802 pub ecl_journal_entries: Vec<JournalEntry>,
804 pub provisions: Vec<datasynth_core::models::provision::Provision>,
806 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
808 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
810 pub provision_journal_entries: Vec<JournalEntry>,
812 pub currency_translation_results:
814 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
815 pub revenue_contract_count: usize,
817 pub impairment_test_count: usize,
819 pub business_combination_count: usize,
821 pub ecl_model_count: usize,
823 pub provision_count: usize,
825 pub currency_translation_count: usize,
827}
828
829#[derive(Debug, Clone, Default)]
831pub struct ComplianceRegulationsSnapshot {
832 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
834 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
836 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
838 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
840 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
842 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
844 pub compliance_graph: Option<datasynth_graph::Graph>,
846}
847
848#[derive(Debug, Clone, Default)]
850pub struct ManufacturingSnapshot {
851 pub production_orders: Vec<ProductionOrder>,
853 pub quality_inspections: Vec<QualityInspection>,
855 pub cycle_counts: Vec<CycleCount>,
857 pub bom_components: Vec<BomComponent>,
859 pub inventory_movements: Vec<InventoryMovement>,
861 pub production_order_count: usize,
863 pub quality_inspection_count: usize,
865 pub cycle_count_count: usize,
867 pub bom_component_count: usize,
869 pub inventory_movement_count: usize,
871}
872
873#[derive(Debug, Clone, Default)]
875pub struct SalesKpiBudgetsSnapshot {
876 pub sales_quotes: Vec<SalesQuote>,
878 pub kpis: Vec<ManagementKpi>,
880 pub budgets: Vec<Budget>,
882 pub sales_quote_count: usize,
884 pub kpi_count: usize,
886 pub budget_line_count: usize,
888}
889
890#[derive(Debug, Clone, Default)]
892pub struct AnomalyLabels {
893 pub labels: Vec<LabeledAnomaly>,
895 pub summary: Option<AnomalySummary>,
897 pub by_type: HashMap<String, usize>,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct BalanceValidationResult {
904 pub validated: bool,
906 pub is_balanced: bool,
908 pub entries_processed: u64,
910 pub total_debits: rust_decimal::Decimal,
912 pub total_credits: rust_decimal::Decimal,
914 pub accounts_tracked: usize,
916 pub companies_tracked: usize,
918 pub validation_errors: Vec<ValidationError>,
920 pub has_unbalanced_entries: bool,
922}
923
924#[derive(Debug, Clone, Default)]
926pub struct TaxSnapshot {
927 pub jurisdictions: Vec<TaxJurisdiction>,
929 pub codes: Vec<TaxCode>,
931 pub tax_lines: Vec<TaxLine>,
933 pub tax_returns: Vec<TaxReturn>,
935 pub tax_provisions: Vec<TaxProvision>,
937 pub withholding_records: Vec<WithholdingTaxRecord>,
939 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
941 pub jurisdiction_count: usize,
943 pub code_count: usize,
945 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
947}
948
949#[derive(Debug, Clone, Default, Serialize, Deserialize)]
951pub struct IntercompanySnapshot {
952 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
954 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
956 pub seller_journal_entries: Vec<JournalEntry>,
958 pub buyer_journal_entries: Vec<JournalEntry>,
960 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
962 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
964 pub matched_pair_count: usize,
966 pub elimination_entry_count: usize,
968 pub match_rate: f64,
970}
971
972#[derive(Debug, Clone, Default)]
974pub struct EsgSnapshot {
975 pub emissions: Vec<EmissionRecord>,
977 pub energy: Vec<EnergyConsumption>,
979 pub water: Vec<WaterUsage>,
981 pub waste: Vec<WasteRecord>,
983 pub diversity: Vec<WorkforceDiversityMetric>,
985 pub pay_equity: Vec<PayEquityMetric>,
987 pub safety_incidents: Vec<SafetyIncident>,
989 pub safety_metrics: Vec<SafetyMetric>,
991 pub governance: Vec<GovernanceMetric>,
993 pub supplier_assessments: Vec<SupplierEsgAssessment>,
995 pub materiality: Vec<MaterialityAssessment>,
997 pub disclosures: Vec<EsgDisclosure>,
999 pub climate_scenarios: Vec<ClimateScenario>,
1001 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1003 pub emission_count: usize,
1005 pub disclosure_count: usize,
1007}
1008
1009#[derive(Debug, Clone, Default)]
1011pub struct TreasurySnapshot {
1012 pub cash_positions: Vec<CashPosition>,
1014 pub cash_forecasts: Vec<CashForecast>,
1016 pub cash_pools: Vec<CashPool>,
1018 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1020 pub hedging_instruments: Vec<HedgingInstrument>,
1022 pub hedge_relationships: Vec<HedgeRelationship>,
1024 pub debt_instruments: Vec<DebtInstrument>,
1026 pub bank_guarantees: Vec<BankGuarantee>,
1028 pub netting_runs: Vec<NettingRun>,
1030 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1032}
1033
1034#[derive(Debug, Clone, Default)]
1036pub struct ProjectAccountingSnapshot {
1037 pub projects: Vec<Project>,
1039 pub cost_lines: Vec<ProjectCostLine>,
1041 pub revenue_records: Vec<ProjectRevenue>,
1043 pub earned_value_metrics: Vec<EarnedValueMetric>,
1045 pub change_orders: Vec<ChangeOrder>,
1047 pub milestones: Vec<ProjectMilestone>,
1049}
1050
1051#[derive(Debug, Default)]
1053pub struct EnhancedGenerationResult {
1054 pub chart_of_accounts: ChartOfAccounts,
1056 pub master_data: MasterDataSnapshot,
1058 pub document_flows: DocumentFlowSnapshot,
1060 pub subledger: SubledgerSnapshot,
1062 pub ocpm: OcpmSnapshot,
1064 pub audit: AuditSnapshot,
1066 pub banking: BankingSnapshot,
1068 pub graph_export: GraphExportSnapshot,
1070 pub sourcing: SourcingSnapshot,
1072 pub financial_reporting: FinancialReportingSnapshot,
1074 pub hr: HrSnapshot,
1076 pub accounting_standards: AccountingStandardsSnapshot,
1078 pub manufacturing: ManufacturingSnapshot,
1080 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1082 pub tax: TaxSnapshot,
1084 pub esg: EsgSnapshot,
1086 pub treasury: TreasurySnapshot,
1088 pub project_accounting: ProjectAccountingSnapshot,
1090 pub process_evolution: Vec<ProcessEvolutionEvent>,
1092 pub organizational_events: Vec<OrganizationalEvent>,
1094 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1096 pub intercompany: IntercompanySnapshot,
1098 pub journal_entries: Vec<JournalEntry>,
1100 pub anomaly_labels: AnomalyLabels,
1102 pub balance_validation: BalanceValidationResult,
1104 pub data_quality_stats: DataQualityStats,
1106 pub statistics: EnhancedGenerationStatistics,
1108 pub lineage: Option<super::lineage::LineageGraph>,
1110 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1112 pub internal_controls: Vec<InternalControl>,
1114 pub opening_balances: Vec<GeneratedOpeningBalance>,
1116 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1118 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1120 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1122 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1124 pub temporal_vendor_chains:
1126 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1127 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1129 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1131 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1133 pub compliance_regulations: ComplianceRegulationsSnapshot,
1135}
1136
1137#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1139pub struct EnhancedGenerationStatistics {
1140 pub total_entries: u64,
1142 pub total_line_items: u64,
1144 pub accounts_count: usize,
1146 pub companies_count: usize,
1148 pub period_months: u32,
1150 pub vendor_count: usize,
1152 pub customer_count: usize,
1153 pub material_count: usize,
1154 pub asset_count: usize,
1155 pub employee_count: usize,
1156 pub p2p_chain_count: usize,
1158 pub o2c_chain_count: usize,
1159 pub ap_invoice_count: usize,
1161 pub ar_invoice_count: usize,
1162 pub ocpm_event_count: usize,
1164 pub ocpm_object_count: usize,
1165 pub ocpm_case_count: usize,
1166 pub audit_engagement_count: usize,
1168 pub audit_workpaper_count: usize,
1169 pub audit_evidence_count: usize,
1170 pub audit_risk_count: usize,
1171 pub audit_finding_count: usize,
1172 pub audit_judgment_count: usize,
1173 #[serde(default)]
1175 pub audit_confirmation_count: usize,
1176 #[serde(default)]
1177 pub audit_confirmation_response_count: usize,
1178 #[serde(default)]
1180 pub audit_procedure_step_count: usize,
1181 #[serde(default)]
1182 pub audit_sample_count: usize,
1183 #[serde(default)]
1185 pub audit_analytical_result_count: usize,
1186 #[serde(default)]
1188 pub audit_ia_function_count: usize,
1189 #[serde(default)]
1190 pub audit_ia_report_count: usize,
1191 #[serde(default)]
1193 pub audit_related_party_count: usize,
1194 #[serde(default)]
1195 pub audit_related_party_transaction_count: usize,
1196 pub anomalies_injected: usize,
1198 pub data_quality_issues: usize,
1200 pub banking_customer_count: usize,
1202 pub banking_account_count: usize,
1203 pub banking_transaction_count: usize,
1204 pub banking_suspicious_count: usize,
1205 pub graph_export_count: usize,
1207 pub graph_node_count: usize,
1208 pub graph_edge_count: usize,
1209 #[serde(default)]
1211 pub llm_enrichment_ms: u64,
1212 #[serde(default)]
1214 pub llm_vendors_enriched: usize,
1215 #[serde(default)]
1217 pub diffusion_enhancement_ms: u64,
1218 #[serde(default)]
1220 pub diffusion_samples_generated: usize,
1221 #[serde(default)]
1223 pub causal_generation_ms: u64,
1224 #[serde(default)]
1226 pub causal_samples_generated: usize,
1227 #[serde(default)]
1229 pub causal_validation_passed: Option<bool>,
1230 #[serde(default)]
1232 pub sourcing_project_count: usize,
1233 #[serde(default)]
1234 pub rfx_event_count: usize,
1235 #[serde(default)]
1236 pub bid_count: usize,
1237 #[serde(default)]
1238 pub contract_count: usize,
1239 #[serde(default)]
1240 pub catalog_item_count: usize,
1241 #[serde(default)]
1242 pub scorecard_count: usize,
1243 #[serde(default)]
1245 pub financial_statement_count: usize,
1246 #[serde(default)]
1247 pub bank_reconciliation_count: usize,
1248 #[serde(default)]
1250 pub payroll_run_count: usize,
1251 #[serde(default)]
1252 pub time_entry_count: usize,
1253 #[serde(default)]
1254 pub expense_report_count: usize,
1255 #[serde(default)]
1256 pub benefit_enrollment_count: usize,
1257 #[serde(default)]
1258 pub pension_plan_count: usize,
1259 #[serde(default)]
1260 pub stock_grant_count: usize,
1261 #[serde(default)]
1263 pub revenue_contract_count: usize,
1264 #[serde(default)]
1265 pub impairment_test_count: usize,
1266 #[serde(default)]
1267 pub business_combination_count: usize,
1268 #[serde(default)]
1269 pub ecl_model_count: usize,
1270 #[serde(default)]
1271 pub provision_count: usize,
1272 #[serde(default)]
1274 pub production_order_count: usize,
1275 #[serde(default)]
1276 pub quality_inspection_count: usize,
1277 #[serde(default)]
1278 pub cycle_count_count: usize,
1279 #[serde(default)]
1280 pub bom_component_count: usize,
1281 #[serde(default)]
1282 pub inventory_movement_count: usize,
1283 #[serde(default)]
1285 pub sales_quote_count: usize,
1286 #[serde(default)]
1287 pub kpi_count: usize,
1288 #[serde(default)]
1289 pub budget_line_count: usize,
1290 #[serde(default)]
1292 pub tax_jurisdiction_count: usize,
1293 #[serde(default)]
1294 pub tax_code_count: usize,
1295 #[serde(default)]
1297 pub esg_emission_count: usize,
1298 #[serde(default)]
1299 pub esg_disclosure_count: usize,
1300 #[serde(default)]
1302 pub ic_matched_pair_count: usize,
1303 #[serde(default)]
1304 pub ic_elimination_count: usize,
1305 #[serde(default)]
1307 pub ic_transaction_count: usize,
1308 #[serde(default)]
1310 pub fa_subledger_count: usize,
1311 #[serde(default)]
1313 pub inventory_subledger_count: usize,
1314 #[serde(default)]
1316 pub treasury_debt_instrument_count: usize,
1317 #[serde(default)]
1319 pub treasury_hedging_instrument_count: usize,
1320 #[serde(default)]
1322 pub project_count: usize,
1323 #[serde(default)]
1325 pub project_change_order_count: usize,
1326 #[serde(default)]
1328 pub tax_provision_count: usize,
1329 #[serde(default)]
1331 pub opening_balance_count: usize,
1332 #[serde(default)]
1334 pub subledger_reconciliation_count: usize,
1335 #[serde(default)]
1337 pub tax_line_count: usize,
1338 #[serde(default)]
1340 pub project_cost_line_count: usize,
1341 #[serde(default)]
1343 pub cash_position_count: usize,
1344 #[serde(default)]
1346 pub cash_forecast_count: usize,
1347 #[serde(default)]
1349 pub cash_pool_count: usize,
1350 #[serde(default)]
1352 pub process_evolution_event_count: usize,
1353 #[serde(default)]
1355 pub organizational_event_count: usize,
1356 #[serde(default)]
1358 pub counterfactual_pair_count: usize,
1359 #[serde(default)]
1361 pub red_flag_count: usize,
1362 #[serde(default)]
1364 pub collusion_ring_count: usize,
1365 #[serde(default)]
1367 pub temporal_version_chain_count: usize,
1368 #[serde(default)]
1370 pub entity_relationship_node_count: usize,
1371 #[serde(default)]
1373 pub entity_relationship_edge_count: usize,
1374 #[serde(default)]
1376 pub cross_process_link_count: usize,
1377 #[serde(default)]
1379 pub disruption_event_count: usize,
1380 #[serde(default)]
1382 pub industry_gl_account_count: usize,
1383 #[serde(default)]
1385 pub period_close_je_count: usize,
1386}
1387
1388pub struct EnhancedOrchestrator {
1390 config: GeneratorConfig,
1391 phase_config: PhaseConfig,
1392 coa: Option<Arc<ChartOfAccounts>>,
1393 master_data: MasterDataSnapshot,
1394 seed: u64,
1395 multi_progress: Option<MultiProgress>,
1396 resource_guard: ResourceGuard,
1398 output_path: Option<PathBuf>,
1400 copula_generators: Vec<CopulaGeneratorSpec>,
1402 country_pack_registry: datasynth_core::CountryPackRegistry,
1404 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1406}
1407
1408impl EnhancedOrchestrator {
1409 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1411 datasynth_config::validate_config(&config)?;
1412
1413 let seed = config.global.seed.unwrap_or_else(rand::random);
1414
1415 let resource_guard = Self::build_resource_guard(&config, None);
1417
1418 let country_pack_registry = match &config.country_packs {
1420 Some(cp) => {
1421 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1422 .map_err(|e| SynthError::config(e.to_string()))?
1423 }
1424 None => datasynth_core::CountryPackRegistry::builtin_only()
1425 .map_err(|e| SynthError::config(e.to_string()))?,
1426 };
1427
1428 Ok(Self {
1429 config,
1430 phase_config,
1431 coa: None,
1432 master_data: MasterDataSnapshot::default(),
1433 seed,
1434 multi_progress: None,
1435 resource_guard,
1436 output_path: None,
1437 copula_generators: Vec::new(),
1438 country_pack_registry,
1439 phase_sink: None,
1440 })
1441 }
1442
1443 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1445 Self::new(config, PhaseConfig::default())
1446 }
1447
1448 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1450 self.phase_sink = Some(sink);
1451 self
1452 }
1453
1454 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1456 if let Some(ref sink) = self.phase_sink {
1457 for item in items {
1458 if let Ok(value) = serde_json::to_value(item) {
1459 if let Err(e) = sink.emit(phase, type_name, &value) {
1460 warn!(
1461 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1462 );
1463 }
1464 }
1465 }
1466 if let Err(e) = sink.phase_complete(phase) {
1467 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1468 }
1469 }
1470 }
1471
1472 pub fn with_progress(mut self, show: bool) -> Self {
1474 self.phase_config.show_progress = show;
1475 if show {
1476 self.multi_progress = Some(MultiProgress::new());
1477 }
1478 self
1479 }
1480
1481 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1483 let path = path.into();
1484 self.output_path = Some(path.clone());
1485 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1487 self
1488 }
1489
1490 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1492 &self.country_pack_registry
1493 }
1494
1495 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1497 self.country_pack_registry.get_by_str(country)
1498 }
1499
1500 fn primary_country_code(&self) -> &str {
1503 self.config
1504 .companies
1505 .first()
1506 .map(|c| c.country.as_str())
1507 .unwrap_or("US")
1508 }
1509
1510 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1512 self.country_pack_for(self.primary_country_code())
1513 }
1514
1515 fn resolve_coa_framework(&self) -> CoAFramework {
1517 if self.config.accounting_standards.enabled {
1518 match self.config.accounting_standards.framework {
1519 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1520 return CoAFramework::FrenchPcg;
1521 }
1522 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1523 return CoAFramework::GermanSkr04;
1524 }
1525 _ => {}
1526 }
1527 }
1528 let pack = self.primary_pack();
1530 match pack.accounting.framework.as_str() {
1531 "french_gaap" => CoAFramework::FrenchPcg,
1532 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1533 _ => CoAFramework::UsGaap,
1534 }
1535 }
1536
1537 pub fn has_copulas(&self) -> bool {
1542 !self.copula_generators.is_empty()
1543 }
1544
1545 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1551 &self.copula_generators
1552 }
1553
1554 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1558 &mut self.copula_generators
1559 }
1560
1561 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1565 self.copula_generators
1566 .iter_mut()
1567 .find(|c| c.name == copula_name)
1568 .map(|c| c.generator.sample())
1569 }
1570
1571 pub fn from_fingerprint(
1594 fingerprint_path: &std::path::Path,
1595 phase_config: PhaseConfig,
1596 scale: f64,
1597 ) -> SynthResult<Self> {
1598 info!("Loading fingerprint from: {}", fingerprint_path.display());
1599
1600 let reader = FingerprintReader::new();
1602 let fingerprint = reader
1603 .read_from_file(fingerprint_path)
1604 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1605
1606 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1607 }
1608
1609 pub fn from_fingerprint_data(
1616 fingerprint: Fingerprint,
1617 phase_config: PhaseConfig,
1618 scale: f64,
1619 ) -> SynthResult<Self> {
1620 info!(
1621 "Synthesizing config from fingerprint (version: {}, tables: {})",
1622 fingerprint.manifest.version,
1623 fingerprint.schema.tables.len()
1624 );
1625
1626 let seed: u64 = rand::random();
1628
1629 let options = SynthesisOptions {
1631 scale,
1632 seed: Some(seed),
1633 preserve_correlations: true,
1634 inject_anomalies: true,
1635 };
1636 let synthesizer = ConfigSynthesizer::with_options(options);
1637
1638 let synthesis_result = synthesizer
1640 .synthesize_full(&fingerprint, seed)
1641 .map_err(|e| {
1642 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1643 })?;
1644
1645 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1647 Self::base_config_for_industry(industry)
1648 } else {
1649 Self::base_config_for_industry("manufacturing")
1650 };
1651
1652 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1654
1655 info!(
1657 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1658 fingerprint.schema.tables.len(),
1659 scale,
1660 synthesis_result.copula_generators.len()
1661 );
1662
1663 if !synthesis_result.copula_generators.is_empty() {
1664 for spec in &synthesis_result.copula_generators {
1665 info!(
1666 " Copula '{}' for table '{}': {} columns",
1667 spec.name,
1668 spec.table,
1669 spec.columns.len()
1670 );
1671 }
1672 }
1673
1674 let mut orchestrator = Self::new(config, phase_config)?;
1676
1677 orchestrator.copula_generators = synthesis_result.copula_generators;
1679
1680 Ok(orchestrator)
1681 }
1682
1683 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1685 use datasynth_config::presets::create_preset;
1686 use datasynth_config::TransactionVolume;
1687 use datasynth_core::models::{CoAComplexity, IndustrySector};
1688
1689 let sector = match industry.to_lowercase().as_str() {
1690 "manufacturing" => IndustrySector::Manufacturing,
1691 "retail" => IndustrySector::Retail,
1692 "financial" | "financial_services" => IndustrySector::FinancialServices,
1693 "healthcare" => IndustrySector::Healthcare,
1694 "technology" | "tech" => IndustrySector::Technology,
1695 _ => IndustrySector::Manufacturing,
1696 };
1697
1698 create_preset(
1700 sector,
1701 1, 12, CoAComplexity::Medium,
1704 TransactionVolume::TenK,
1705 )
1706 }
1707
1708 fn apply_config_patch(
1710 mut config: GeneratorConfig,
1711 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1712 ) -> GeneratorConfig {
1713 use datasynth_fingerprint::synthesis::ConfigValue;
1714
1715 for (key, value) in patch.values() {
1716 match (key.as_str(), value) {
1717 ("transactions.count", ConfigValue::Integer(n)) => {
1720 info!(
1721 "Fingerprint suggests {} transactions (apply via company volumes)",
1722 n
1723 );
1724 }
1725 ("global.period_months", ConfigValue::Integer(n)) => {
1726 config.global.period_months = (*n).clamp(1, 120) as u32;
1727 }
1728 ("global.start_date", ConfigValue::String(s)) => {
1729 config.global.start_date = s.clone();
1730 }
1731 ("global.seed", ConfigValue::Integer(n)) => {
1732 config.global.seed = Some(*n as u64);
1733 }
1734 ("fraud.enabled", ConfigValue::Bool(b)) => {
1735 config.fraud.enabled = *b;
1736 }
1737 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1738 config.fraud.fraud_rate = *f;
1739 }
1740 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1741 config.data_quality.enabled = *b;
1742 }
1743 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1745 config.fraud.enabled = *b;
1746 }
1747 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1748 config.fraud.fraud_rate = *f;
1749 }
1750 _ => {
1751 debug!("Ignoring unknown config patch key: {}", key);
1752 }
1753 }
1754 }
1755
1756 config
1757 }
1758
1759 fn build_resource_guard(
1761 config: &GeneratorConfig,
1762 output_path: Option<PathBuf>,
1763 ) -> ResourceGuard {
1764 let mut builder = ResourceGuardBuilder::new();
1765
1766 if config.global.memory_limit_mb > 0 {
1768 builder = builder.memory_limit(config.global.memory_limit_mb);
1769 }
1770
1771 if let Some(path) = output_path {
1773 builder = builder.output_path(path).min_free_disk(100); }
1775
1776 builder = builder.conservative();
1778
1779 builder.build()
1780 }
1781
1782 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1787 self.resource_guard.check()
1788 }
1789
1790 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1792 let level = self.resource_guard.check()?;
1793
1794 if level != DegradationLevel::Normal {
1795 warn!(
1796 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1797 phase,
1798 level,
1799 self.resource_guard.current_memory_mb(),
1800 self.resource_guard.available_disk_mb()
1801 );
1802 }
1803
1804 Ok(level)
1805 }
1806
1807 fn get_degradation_actions(&self) -> DegradationActions {
1809 self.resource_guard.get_actions()
1810 }
1811
1812 fn check_memory_limit(&self) -> SynthResult<()> {
1814 self.check_resources()?;
1815 Ok(())
1816 }
1817
1818 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1820 info!("Starting enhanced generation workflow");
1821 info!(
1822 "Config: industry={:?}, period_months={}, companies={}",
1823 self.config.global.industry,
1824 self.config.global.period_months,
1825 self.config.companies.len()
1826 );
1827
1828 let initial_level = self.check_resources_with_log("initial")?;
1830 if initial_level == DegradationLevel::Emergency {
1831 return Err(SynthError::resource(
1832 "Insufficient resources to start generation",
1833 ));
1834 }
1835
1836 let mut stats = EnhancedGenerationStatistics {
1837 companies_count: self.config.companies.len(),
1838 period_months: self.config.global.period_months,
1839 ..Default::default()
1840 };
1841
1842 let coa = self.phase_chart_of_accounts(&mut stats)?;
1844
1845 self.phase_master_data(&mut stats)?;
1847
1848 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1850 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1851 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1852
1853 let (mut document_flows, subledger, fa_journal_entries) =
1855 self.phase_document_flows(&mut stats)?;
1856
1857 self.emit_phase_items(
1859 "document_flows",
1860 "PurchaseOrder",
1861 &document_flows.purchase_orders,
1862 );
1863 self.emit_phase_items(
1864 "document_flows",
1865 "GoodsReceipt",
1866 &document_flows.goods_receipts,
1867 );
1868 self.emit_phase_items(
1869 "document_flows",
1870 "VendorInvoice",
1871 &document_flows.vendor_invoices,
1872 );
1873 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1874 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1875
1876 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1878
1879 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1884 .iter()
1885 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1886 .collect();
1887 if !opening_balance_jes.is_empty() {
1888 debug!(
1889 "Prepending {} opening balance JEs to entries",
1890 opening_balance_jes.len()
1891 );
1892 }
1893
1894 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1896
1897 if !opening_balance_jes.is_empty() {
1900 let mut combined = opening_balance_jes;
1901 combined.extend(entries);
1902 entries = combined;
1903 }
1904
1905 if !fa_journal_entries.is_empty() {
1907 debug!(
1908 "Appending {} FA acquisition JEs to main entries",
1909 fa_journal_entries.len()
1910 );
1911 entries.extend(fa_journal_entries);
1912 }
1913
1914 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1916
1917 let actions = self.get_degradation_actions();
1919
1920 let sourcing = self.phase_sourcing_data(&mut stats)?;
1922
1923 if !sourcing.contracts.is_empty() {
1925 let mut linked_count = 0usize;
1926 for chain in &mut document_flows.p2p_chains {
1927 if chain.purchase_order.contract_id.is_none() {
1928 if let Some(contract) = sourcing
1929 .contracts
1930 .iter()
1931 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1932 {
1933 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1934 linked_count += 1;
1935 }
1936 }
1937 }
1938 if linked_count > 0 {
1939 debug!(
1940 "Linked {} purchase orders to S2C contracts by vendor match",
1941 linked_count
1942 );
1943 }
1944 }
1945
1946 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
1948
1949 if !intercompany.seller_journal_entries.is_empty()
1951 || !intercompany.buyer_journal_entries.is_empty()
1952 {
1953 let ic_je_count = intercompany.seller_journal_entries.len()
1954 + intercompany.buyer_journal_entries.len();
1955 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1956 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1957 debug!(
1958 "Appended {} IC journal entries to main entries",
1959 ic_je_count
1960 );
1961 }
1962
1963 if !intercompany.elimination_entries.is_empty() {
1965 let elim_jes = datasynth_generators::elimination_to_journal_entries(
1966 &intercompany.elimination_entries,
1967 );
1968 if !elim_jes.is_empty() {
1969 debug!(
1970 "Appended {} elimination journal entries to main entries",
1971 elim_jes.len()
1972 );
1973 entries.extend(elim_jes);
1974 }
1975 }
1976
1977 let hr = self.phase_hr_data(&mut stats)?;
1979
1980 if !hr.payroll_runs.is_empty() {
1982 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1983 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1984 entries.extend(payroll_jes);
1985 }
1986
1987 if !hr.pension_journal_entries.is_empty() {
1989 debug!(
1990 "Generated {} JEs from pension plans",
1991 hr.pension_journal_entries.len()
1992 );
1993 entries.extend(hr.pension_journal_entries.iter().cloned());
1994 }
1995
1996 if !hr.stock_comp_journal_entries.is_empty() {
1998 debug!(
1999 "Generated {} JEs from stock-based compensation",
2000 hr.stock_comp_journal_entries.len()
2001 );
2002 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2003 }
2004
2005 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2007
2008 if !manufacturing_snap.production_orders.is_empty() {
2010 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
2011 debug!("Generated {} JEs from production orders", mfg_jes.len());
2012 entries.extend(mfg_jes);
2013 }
2014
2015 if !entries.is_empty() {
2018 stats.total_entries = entries.len() as u64;
2019 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2020 debug!(
2021 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2022 stats.total_entries, stats.total_line_items
2023 );
2024 }
2025
2026 if self.config.internal_controls.enabled && !entries.is_empty() {
2028 info!("Phase 7b: Applying internal controls to journal entries");
2029 let control_config = ControlGeneratorConfig {
2030 exception_rate: self.config.internal_controls.exception_rate,
2031 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2032 enable_sox_marking: true,
2033 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2034 self.config.internal_controls.sox_materiality_threshold,
2035 )
2036 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2037 ..Default::default()
2038 };
2039 let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
2040 for entry in &mut entries {
2041 control_gen.apply_controls(entry, &coa);
2042 }
2043 let with_controls = entries
2044 .iter()
2045 .filter(|e| !e.header.control_ids.is_empty())
2046 .count();
2047 info!(
2048 "Applied controls to {} entries ({} with control IDs assigned)",
2049 entries.len(),
2050 with_controls
2051 );
2052 }
2053
2054 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2056
2057 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2059
2060 self.emit_phase_items(
2062 "anomaly_injection",
2063 "LabeledAnomaly",
2064 &anomaly_labels.labels,
2065 );
2066
2067 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2069
2070 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2072
2073 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2075
2076 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2078
2079 let balance_validation = self.phase_balance_validation(&entries)?;
2081
2082 let subledger_reconciliation =
2084 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2085
2086 let data_quality_stats =
2088 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2089
2090 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2092
2093 let audit = self.phase_audit_data(&entries, &mut stats)?;
2095
2096 let banking = self.phase_banking_data(&mut stats)?;
2098
2099 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2101
2102 self.phase_llm_enrichment(&mut stats);
2104
2105 self.phase_diffusion_enhancement(&mut stats);
2107
2108 self.phase_causal_overlay(&mut stats);
2110
2111 let mut financial_reporting = self.phase_financial_reporting(
2115 &document_flows,
2116 &entries,
2117 &coa,
2118 &hr,
2119 &audit,
2120 &mut stats,
2121 )?;
2122
2123 let accounting_standards =
2125 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2126
2127 if !accounting_standards.ecl_journal_entries.is_empty() {
2129 debug!(
2130 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2131 accounting_standards.ecl_journal_entries.len()
2132 );
2133 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2134 }
2135
2136 if !accounting_standards.provision_journal_entries.is_empty() {
2138 debug!(
2139 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2140 accounting_standards.provision_journal_entries.len()
2141 );
2142 entries.extend(
2143 accounting_standards
2144 .provision_journal_entries
2145 .iter()
2146 .cloned(),
2147 );
2148 }
2149
2150 let ocpm = self.phase_ocpm_events(
2152 &document_flows,
2153 &sourcing,
2154 &hr,
2155 &manufacturing_snap,
2156 &banking,
2157 &audit,
2158 &financial_reporting,
2159 &mut stats,
2160 )?;
2161
2162 if let Some(ref event_log) = ocpm.event_log {
2164 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2165 }
2166
2167 let sales_kpi_budgets =
2169 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2170
2171 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2173
2174 self.generate_notes_to_financial_statements(
2177 &mut financial_reporting,
2178 &accounting_standards,
2179 &tax,
2180 &hr,
2181 &audit,
2182 );
2183
2184 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
2186
2187 let treasury =
2189 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2190
2191 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2193
2194 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2196
2197 let disruption_events = self.phase_disruption_events(&mut stats)?;
2199
2200 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2202
2203 let (entity_relationship_graph, cross_process_links) =
2205 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2206
2207 let industry_output = self.phase_industry_data(&mut stats);
2209
2210 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2212
2213 self.phase_hypergraph_export(
2215 &coa,
2216 &entries,
2217 &document_flows,
2218 &sourcing,
2219 &hr,
2220 &manufacturing_snap,
2221 &banking,
2222 &audit,
2223 &financial_reporting,
2224 &ocpm,
2225 &compliance_regulations,
2226 &mut stats,
2227 )?;
2228
2229 if self.phase_config.generate_graph_export {
2232 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2233 }
2234
2235 if self.config.streaming.enabled {
2237 info!("Note: streaming config is enabled but batch mode does not use it");
2238 }
2239 if self.config.vendor_network.enabled {
2240 debug!("Vendor network config available; relationship graph generation is partial");
2241 }
2242 if self.config.customer_segmentation.enabled {
2243 debug!("Customer segmentation config available; segment-aware generation is partial");
2244 }
2245
2246 let resource_stats = self.resource_guard.stats();
2248 info!(
2249 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2250 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2251 resource_stats.disk.estimated_bytes_written,
2252 resource_stats.degradation_level
2253 );
2254
2255 if let Some(ref sink) = self.phase_sink {
2257 if let Err(e) = sink.flush() {
2258 warn!("Stream sink flush failed: {e}");
2259 }
2260 }
2261
2262 let lineage = self.build_lineage_graph();
2264
2265 let gate_result = if self.config.quality_gates.enabled {
2267 let profile_name = &self.config.quality_gates.profile;
2268 match datasynth_eval::gates::get_profile(profile_name) {
2269 Some(profile) => {
2270 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2272
2273 if balance_validation.validated {
2275 eval.coherence.balance =
2276 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2277 equation_balanced: balance_validation.is_balanced,
2278 max_imbalance: (balance_validation.total_debits
2279 - balance_validation.total_credits)
2280 .abs(),
2281 periods_evaluated: 1,
2282 periods_imbalanced: if balance_validation.is_balanced {
2283 0
2284 } else {
2285 1
2286 },
2287 period_results: Vec::new(),
2288 companies_evaluated: self.config.companies.len(),
2289 });
2290 }
2291
2292 eval.coherence.passes = balance_validation.is_balanced;
2294 if !balance_validation.is_balanced {
2295 eval.coherence
2296 .failures
2297 .push("Balance sheet equation not satisfied".to_string());
2298 }
2299
2300 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2302 eval.statistical.passes = !entries.is_empty();
2303
2304 eval.quality.overall_score = 0.9; eval.quality.passes = true;
2307
2308 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2309 info!(
2310 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2311 profile_name, result.gates_passed, result.gates_total, result.summary
2312 );
2313 Some(result)
2314 }
2315 None => {
2316 warn!(
2317 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2318 profile_name
2319 );
2320 None
2321 }
2322 }
2323 } else {
2324 None
2325 };
2326
2327 let internal_controls = if self.config.internal_controls.enabled {
2329 InternalControl::standard_controls()
2330 } else {
2331 Vec::new()
2332 };
2333
2334 Ok(EnhancedGenerationResult {
2335 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2336 master_data: std::mem::take(&mut self.master_data),
2337 document_flows,
2338 subledger,
2339 ocpm,
2340 audit,
2341 banking,
2342 graph_export,
2343 sourcing,
2344 financial_reporting,
2345 hr,
2346 accounting_standards,
2347 manufacturing: manufacturing_snap,
2348 sales_kpi_budgets,
2349 tax,
2350 esg: esg_snap,
2351 treasury,
2352 project_accounting,
2353 process_evolution,
2354 organizational_events,
2355 disruption_events,
2356 intercompany,
2357 journal_entries: entries,
2358 anomaly_labels,
2359 balance_validation,
2360 data_quality_stats,
2361 statistics: stats,
2362 lineage: Some(lineage),
2363 gate_result,
2364 internal_controls,
2365 opening_balances,
2366 subledger_reconciliation,
2367 counterfactual_pairs,
2368 red_flags,
2369 collusion_rings,
2370 temporal_vendor_chains,
2371 entity_relationship_graph,
2372 cross_process_links,
2373 industry_output,
2374 compliance_regulations,
2375 })
2376 }
2377
2378 fn phase_chart_of_accounts(
2384 &mut self,
2385 stats: &mut EnhancedGenerationStatistics,
2386 ) -> SynthResult<Arc<ChartOfAccounts>> {
2387 info!("Phase 1: Generating Chart of Accounts");
2388 let coa = self.generate_coa()?;
2389 stats.accounts_count = coa.account_count();
2390 info!(
2391 "Chart of Accounts generated: {} accounts",
2392 stats.accounts_count
2393 );
2394 self.check_resources_with_log("post-coa")?;
2395 Ok(coa)
2396 }
2397
2398 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2400 if self.phase_config.generate_master_data {
2401 info!("Phase 2: Generating Master Data");
2402 self.generate_master_data()?;
2403 stats.vendor_count = self.master_data.vendors.len();
2404 stats.customer_count = self.master_data.customers.len();
2405 stats.material_count = self.master_data.materials.len();
2406 stats.asset_count = self.master_data.assets.len();
2407 stats.employee_count = self.master_data.employees.len();
2408 info!(
2409 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2410 stats.vendor_count, stats.customer_count, stats.material_count,
2411 stats.asset_count, stats.employee_count
2412 );
2413 self.check_resources_with_log("post-master-data")?;
2414 } else {
2415 debug!("Phase 2: Skipped (master data generation disabled)");
2416 }
2417 Ok(())
2418 }
2419
2420 fn phase_document_flows(
2422 &mut self,
2423 stats: &mut EnhancedGenerationStatistics,
2424 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2425 let mut document_flows = DocumentFlowSnapshot::default();
2426 let mut subledger = SubledgerSnapshot::default();
2427
2428 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2429 info!("Phase 3: Generating Document Flows");
2430 self.generate_document_flows(&mut document_flows)?;
2431 stats.p2p_chain_count = document_flows.p2p_chains.len();
2432 stats.o2c_chain_count = document_flows.o2c_chains.len();
2433 info!(
2434 "Document flows generated: {} P2P chains, {} O2C chains",
2435 stats.p2p_chain_count, stats.o2c_chain_count
2436 );
2437
2438 debug!("Phase 3b: Linking document flows to subledgers");
2440 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2441 stats.ap_invoice_count = subledger.ap_invoices.len();
2442 stats.ar_invoice_count = subledger.ar_invoices.len();
2443 debug!(
2444 "Subledgers linked: {} AP invoices, {} AR invoices",
2445 stats.ap_invoice_count, stats.ar_invoice_count
2446 );
2447
2448 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2453 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2454 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2455 debug!("Payment settlements applied to AP and AR subledgers");
2456
2457 if let Ok(start_date) =
2460 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2461 {
2462 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2463 - chrono::Days::new(1);
2464 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2465 for company in &self.config.companies {
2472 let ar_report = ARAgingReport::from_invoices(
2473 company.code.clone(),
2474 &subledger.ar_invoices,
2475 as_of_date,
2476 );
2477 subledger.ar_aging_reports.push(ar_report);
2478
2479 let ap_report = APAgingReport::from_invoices(
2480 company.code.clone(),
2481 &subledger.ap_invoices,
2482 as_of_date,
2483 );
2484 subledger.ap_aging_reports.push(ap_report);
2485 }
2486 debug!(
2487 "AR/AP aging reports built: {} AR, {} AP",
2488 subledger.ar_aging_reports.len(),
2489 subledger.ap_aging_reports.len()
2490 );
2491 }
2492
2493 self.check_resources_with_log("post-document-flows")?;
2494 } else {
2495 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2496 }
2497
2498 let mut fa_journal_entries = Vec::new();
2500 if !self.master_data.assets.is_empty() {
2501 debug!("Generating FA subledger records");
2502 let company_code = self
2503 .config
2504 .companies
2505 .first()
2506 .map(|c| c.code.as_str())
2507 .unwrap_or("1000");
2508 let currency = self
2509 .config
2510 .companies
2511 .first()
2512 .map(|c| c.currency.as_str())
2513 .unwrap_or("USD");
2514
2515 let mut fa_gen = datasynth_generators::FAGenerator::new(
2516 datasynth_generators::FAGeneratorConfig::default(),
2517 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2518 );
2519
2520 for asset in &self.master_data.assets {
2521 let (record, je) = fa_gen.generate_asset_acquisition(
2522 company_code,
2523 &format!("{:?}", asset.asset_class),
2524 &asset.description,
2525 asset.acquisition_date,
2526 currency,
2527 asset.cost_center.as_deref(),
2528 );
2529 subledger.fa_records.push(record);
2530 fa_journal_entries.push(je);
2531 }
2532
2533 stats.fa_subledger_count = subledger.fa_records.len();
2534 debug!(
2535 "FA subledger records generated: {} (with {} acquisition JEs)",
2536 stats.fa_subledger_count,
2537 fa_journal_entries.len()
2538 );
2539 }
2540
2541 if !self.master_data.materials.is_empty() {
2543 debug!("Generating Inventory subledger records");
2544 let first_company = self.config.companies.first();
2545 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2546 let inv_currency = first_company
2547 .map(|c| c.currency.clone())
2548 .unwrap_or_else(|| "USD".to_string());
2549
2550 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2551 datasynth_generators::InventoryGeneratorConfig::default(),
2552 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2553 inv_currency.clone(),
2554 );
2555
2556 for (i, material) in self.master_data.materials.iter().enumerate() {
2557 let plant = format!("PLANT{:02}", (i % 3) + 1);
2558 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2559 let initial_qty = rust_decimal::Decimal::from(
2560 material
2561 .safety_stock
2562 .to_string()
2563 .parse::<i64>()
2564 .unwrap_or(100),
2565 );
2566
2567 let position = inv_gen.generate_position(
2568 company_code,
2569 &plant,
2570 &storage_loc,
2571 &material.material_id,
2572 &material.description,
2573 initial_qty,
2574 Some(material.standard_cost),
2575 &inv_currency,
2576 );
2577 subledger.inventory_positions.push(position);
2578 }
2579
2580 stats.inventory_subledger_count = subledger.inventory_positions.len();
2581 debug!(
2582 "Inventory subledger records generated: {}",
2583 stats.inventory_subledger_count
2584 );
2585 }
2586
2587 if !subledger.fa_records.is_empty() {
2589 if let Ok(start_date) =
2590 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2591 {
2592 let company_code = self
2593 .config
2594 .companies
2595 .first()
2596 .map(|c| c.code.as_str())
2597 .unwrap_or("1000");
2598 let fiscal_year = start_date.year();
2599 let start_period = start_date.month();
2600 let end_period =
2601 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
2602
2603 let depr_cfg = FaDepreciationScheduleConfig {
2604 fiscal_year,
2605 start_period,
2606 end_period,
2607 seed_offset: 800,
2608 };
2609 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
2610 let runs = depr_gen.generate(company_code, &subledger.fa_records);
2611 let run_count = runs.len();
2612 subledger.depreciation_runs = runs;
2613 debug!(
2614 "Depreciation runs generated: {} runs for {} periods",
2615 run_count, self.config.global.period_months
2616 );
2617 }
2618 }
2619
2620 if !subledger.inventory_positions.is_empty() {
2622 if let Ok(start_date) =
2623 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2624 {
2625 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2626 - chrono::Days::new(1);
2627
2628 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
2629 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
2630
2631 for company in &self.config.companies {
2632 let result = inv_val_gen.generate(
2633 &company.code,
2634 &subledger.inventory_positions,
2635 as_of_date,
2636 );
2637 subledger.inventory_valuations.push(result);
2638 }
2639 debug!(
2640 "Inventory valuations generated: {} company reports",
2641 subledger.inventory_valuations.len()
2642 );
2643 }
2644 }
2645
2646 Ok((document_flows, subledger, fa_journal_entries))
2647 }
2648
2649 #[allow(clippy::too_many_arguments)]
2651 fn phase_ocpm_events(
2652 &mut self,
2653 document_flows: &DocumentFlowSnapshot,
2654 sourcing: &SourcingSnapshot,
2655 hr: &HrSnapshot,
2656 manufacturing: &ManufacturingSnapshot,
2657 banking: &BankingSnapshot,
2658 audit: &AuditSnapshot,
2659 financial_reporting: &FinancialReportingSnapshot,
2660 stats: &mut EnhancedGenerationStatistics,
2661 ) -> SynthResult<OcpmSnapshot> {
2662 if self.phase_config.generate_ocpm_events {
2663 info!("Phase 3c: Generating OCPM Events");
2664 let ocpm_snapshot = self.generate_ocpm_events(
2665 document_flows,
2666 sourcing,
2667 hr,
2668 manufacturing,
2669 banking,
2670 audit,
2671 financial_reporting,
2672 )?;
2673 stats.ocpm_event_count = ocpm_snapshot.event_count;
2674 stats.ocpm_object_count = ocpm_snapshot.object_count;
2675 stats.ocpm_case_count = ocpm_snapshot.case_count;
2676 info!(
2677 "OCPM events generated: {} events, {} objects, {} cases",
2678 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2679 );
2680 self.check_resources_with_log("post-ocpm")?;
2681 Ok(ocpm_snapshot)
2682 } else {
2683 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2684 Ok(OcpmSnapshot::default())
2685 }
2686 }
2687
2688 fn phase_journal_entries(
2690 &mut self,
2691 coa: &Arc<ChartOfAccounts>,
2692 document_flows: &DocumentFlowSnapshot,
2693 _stats: &mut EnhancedGenerationStatistics,
2694 ) -> SynthResult<Vec<JournalEntry>> {
2695 let mut entries = Vec::new();
2696
2697 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2699 debug!("Phase 4a: Generating JEs from document flows");
2700 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2701 debug!("Generated {} JEs from document flows", flow_entries.len());
2702 entries.extend(flow_entries);
2703 }
2704
2705 if self.phase_config.generate_journal_entries {
2707 info!("Phase 4: Generating Journal Entries");
2708 let je_entries = self.generate_journal_entries(coa)?;
2709 info!("Generated {} standalone journal entries", je_entries.len());
2710 entries.extend(je_entries);
2711 } else {
2712 debug!("Phase 4: Skipped (journal entry generation disabled)");
2713 }
2714
2715 if !entries.is_empty() {
2716 self.check_resources_with_log("post-journal-entries")?;
2719 }
2720
2721 Ok(entries)
2722 }
2723
2724 fn phase_anomaly_injection(
2726 &mut self,
2727 entries: &mut [JournalEntry],
2728 actions: &DegradationActions,
2729 stats: &mut EnhancedGenerationStatistics,
2730 ) -> SynthResult<AnomalyLabels> {
2731 if self.phase_config.inject_anomalies
2732 && !entries.is_empty()
2733 && !actions.skip_anomaly_injection
2734 {
2735 info!("Phase 5: Injecting Anomalies");
2736 let result = self.inject_anomalies(entries)?;
2737 stats.anomalies_injected = result.labels.len();
2738 info!("Injected {} anomalies", stats.anomalies_injected);
2739 self.check_resources_with_log("post-anomaly-injection")?;
2740 Ok(result)
2741 } else if actions.skip_anomaly_injection {
2742 warn!("Phase 5: Skipped due to resource degradation");
2743 Ok(AnomalyLabels::default())
2744 } else {
2745 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2746 Ok(AnomalyLabels::default())
2747 }
2748 }
2749
2750 fn phase_balance_validation(
2752 &mut self,
2753 entries: &[JournalEntry],
2754 ) -> SynthResult<BalanceValidationResult> {
2755 if self.phase_config.validate_balances && !entries.is_empty() {
2756 debug!("Phase 6: Validating Balances");
2757 let balance_validation = self.validate_journal_entries(entries)?;
2758 if balance_validation.is_balanced {
2759 debug!("Balance validation passed");
2760 } else {
2761 warn!(
2762 "Balance validation found {} errors",
2763 balance_validation.validation_errors.len()
2764 );
2765 }
2766 Ok(balance_validation)
2767 } else {
2768 Ok(BalanceValidationResult::default())
2769 }
2770 }
2771
2772 fn phase_data_quality_injection(
2774 &mut self,
2775 entries: &mut [JournalEntry],
2776 actions: &DegradationActions,
2777 stats: &mut EnhancedGenerationStatistics,
2778 ) -> SynthResult<DataQualityStats> {
2779 if self.phase_config.inject_data_quality
2780 && !entries.is_empty()
2781 && !actions.skip_data_quality
2782 {
2783 info!("Phase 7: Injecting Data Quality Variations");
2784 let dq_stats = self.inject_data_quality(entries)?;
2785 stats.data_quality_issues = dq_stats.records_with_issues;
2786 info!("Injected {} data quality issues", stats.data_quality_issues);
2787 self.check_resources_with_log("post-data-quality")?;
2788 Ok(dq_stats)
2789 } else if actions.skip_data_quality {
2790 warn!("Phase 7: Skipped due to resource degradation");
2791 Ok(DataQualityStats::default())
2792 } else {
2793 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2794 Ok(DataQualityStats::default())
2795 }
2796 }
2797
2798 fn phase_period_close(
2808 &mut self,
2809 entries: &mut Vec<JournalEntry>,
2810 subledger: &SubledgerSnapshot,
2811 stats: &mut EnhancedGenerationStatistics,
2812 ) -> SynthResult<()> {
2813 if !self.phase_config.generate_period_close || entries.is_empty() {
2814 debug!("Phase 10b: Skipped (period close disabled or no entries)");
2815 return Ok(());
2816 }
2817
2818 info!("Phase 10b: Generating period-close journal entries");
2819
2820 use datasynth_core::accounts::{
2821 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
2822 };
2823 use rust_decimal::Decimal;
2824
2825 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2826 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2827 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2828 let close_date = end_date - chrono::Days::new(1);
2830
2831 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
2836 .config
2837 .companies
2838 .iter()
2839 .map(|c| c.code.clone())
2840 .collect();
2841
2842 let mut close_jes: Vec<JournalEntry> = Vec::new();
2843
2844 let period_months = self.config.global.period_months;
2848 for asset in &subledger.fa_records {
2849 use datasynth_core::models::subledger::fa::AssetStatus;
2851 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
2852 continue;
2853 }
2854 let useful_life_months = asset.useful_life_months();
2855 if useful_life_months == 0 {
2856 continue;
2858 }
2859 let salvage_value = asset.salvage_value();
2860 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
2861 if depreciable_base == Decimal::ZERO {
2862 continue;
2863 }
2864 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
2865 * Decimal::from(period_months))
2866 .round_dp(2);
2867 if period_depr <= Decimal::ZERO {
2868 continue;
2869 }
2870
2871 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
2872 depr_header.document_type = "CL".to_string();
2873 depr_header.header_text = Some(format!(
2874 "Depreciation - {} {}",
2875 asset.asset_number, asset.description
2876 ));
2877 depr_header.created_by = "CLOSE_ENGINE".to_string();
2878 depr_header.source = TransactionSource::Automated;
2879 depr_header.business_process = Some(BusinessProcess::R2R);
2880
2881 let doc_id = depr_header.document_id;
2882 let mut depr_je = JournalEntry::new(depr_header);
2883
2884 depr_je.add_line(JournalEntryLine::debit(
2886 doc_id,
2887 1,
2888 expense_accounts::DEPRECIATION.to_string(),
2889 period_depr,
2890 ));
2891 depr_je.add_line(JournalEntryLine::credit(
2893 doc_id,
2894 2,
2895 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
2896 period_depr,
2897 ));
2898
2899 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
2900 close_jes.push(depr_je);
2901 }
2902
2903 if !subledger.fa_records.is_empty() {
2904 debug!(
2905 "Generated {} depreciation JEs from {} FA records",
2906 close_jes.len(),
2907 subledger.fa_records.len()
2908 );
2909 }
2910
2911 for company_code in &company_codes {
2912 let mut total_revenue = Decimal::ZERO;
2917 let mut total_expenses = Decimal::ZERO;
2918
2919 for entry in entries.iter() {
2920 if entry.header.company_code != *company_code {
2921 continue;
2922 }
2923 for line in &entry.lines {
2924 let category = AccountCategory::from_account(&line.gl_account);
2925 match category {
2926 AccountCategory::Revenue => {
2927 total_revenue += line.credit_amount - line.debit_amount;
2929 }
2930 AccountCategory::Cogs
2931 | AccountCategory::OperatingExpense
2932 | AccountCategory::OtherIncomeExpense
2933 | AccountCategory::Tax => {
2934 total_expenses += line.debit_amount - line.credit_amount;
2936 }
2937 _ => {}
2938 }
2939 }
2940 }
2941
2942 let pre_tax_income = total_revenue - total_expenses;
2943
2944 if pre_tax_income == Decimal::ZERO {
2946 debug!(
2947 "Company {}: no pre-tax income, skipping period close",
2948 company_code
2949 );
2950 continue;
2951 }
2952
2953 let tax_amount = if pre_tax_income > Decimal::ZERO {
2956 (pre_tax_income * tax_rate).round_dp(2)
2957 } else {
2958 Decimal::ZERO
2959 };
2960
2961 if tax_amount > Decimal::ZERO {
2962 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
2963 tax_header.document_type = "CL".to_string();
2964 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
2965 tax_header.created_by = "CLOSE_ENGINE".to_string();
2966 tax_header.source = TransactionSource::Automated;
2967 tax_header.business_process = Some(BusinessProcess::R2R);
2968
2969 let doc_id = tax_header.document_id;
2970 let mut tax_je = JournalEntry::new(tax_header);
2971
2972 tax_je.add_line(JournalEntryLine::debit(
2974 doc_id,
2975 1,
2976 tax_accounts::TAX_EXPENSE.to_string(),
2977 tax_amount,
2978 ));
2979 tax_je.add_line(JournalEntryLine::credit(
2981 doc_id,
2982 2,
2983 tax_accounts::SALES_TAX_PAYABLE.to_string(),
2984 tax_amount,
2985 ));
2986
2987 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
2988 close_jes.push(tax_je);
2989 }
2990
2991 let net_income = pre_tax_income - tax_amount;
2994
2995 if net_income != Decimal::ZERO {
2996 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
2997 close_header.document_type = "CL".to_string();
2998 close_header.header_text =
2999 Some(format!("Income statement close - {}", company_code));
3000 close_header.created_by = "CLOSE_ENGINE".to_string();
3001 close_header.source = TransactionSource::Automated;
3002 close_header.business_process = Some(BusinessProcess::R2R);
3003
3004 let doc_id = close_header.document_id;
3005 let mut close_je = JournalEntry::new(close_header);
3006
3007 let abs_net_income = net_income.abs();
3008
3009 if net_income > Decimal::ZERO {
3010 close_je.add_line(JournalEntryLine::debit(
3012 doc_id,
3013 1,
3014 equity_accounts::INCOME_SUMMARY.to_string(),
3015 abs_net_income,
3016 ));
3017 close_je.add_line(JournalEntryLine::credit(
3018 doc_id,
3019 2,
3020 equity_accounts::RETAINED_EARNINGS.to_string(),
3021 abs_net_income,
3022 ));
3023 } else {
3024 close_je.add_line(JournalEntryLine::debit(
3026 doc_id,
3027 1,
3028 equity_accounts::RETAINED_EARNINGS.to_string(),
3029 abs_net_income,
3030 ));
3031 close_je.add_line(JournalEntryLine::credit(
3032 doc_id,
3033 2,
3034 equity_accounts::INCOME_SUMMARY.to_string(),
3035 abs_net_income,
3036 ));
3037 }
3038
3039 debug_assert!(
3040 close_je.is_balanced(),
3041 "Income statement closing JE must be balanced"
3042 );
3043 close_jes.push(close_je);
3044 }
3045 }
3046
3047 let close_count = close_jes.len();
3048 if close_count > 0 {
3049 info!("Generated {} period-close journal entries", close_count);
3050 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3051 entries.extend(close_jes);
3052 stats.period_close_je_count = close_count;
3053
3054 stats.total_entries = entries.len() as u64;
3056 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3057 } else {
3058 debug!("No period-close entries generated (no income statement activity)");
3059 }
3060
3061 Ok(())
3062 }
3063
3064 fn phase_audit_data(
3066 &mut self,
3067 entries: &[JournalEntry],
3068 stats: &mut EnhancedGenerationStatistics,
3069 ) -> SynthResult<AuditSnapshot> {
3070 if self.phase_config.generate_audit {
3071 info!("Phase 8: Generating Audit Data");
3072 let audit_snapshot = self.generate_audit_data(entries)?;
3073 stats.audit_engagement_count = audit_snapshot.engagements.len();
3074 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3075 stats.audit_evidence_count = audit_snapshot.evidence.len();
3076 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3077 stats.audit_finding_count = audit_snapshot.findings.len();
3078 stats.audit_judgment_count = audit_snapshot.judgments.len();
3079 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3080 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3081 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3082 stats.audit_sample_count = audit_snapshot.samples.len();
3083 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3084 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3085 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3086 stats.audit_related_party_count = audit_snapshot.related_parties.len();
3087 stats.audit_related_party_transaction_count =
3088 audit_snapshot.related_party_transactions.len();
3089 info!(
3090 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3091 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3092 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3093 {} RP transactions",
3094 stats.audit_engagement_count,
3095 stats.audit_workpaper_count,
3096 stats.audit_evidence_count,
3097 stats.audit_risk_count,
3098 stats.audit_finding_count,
3099 stats.audit_judgment_count,
3100 stats.audit_confirmation_count,
3101 stats.audit_procedure_step_count,
3102 stats.audit_sample_count,
3103 stats.audit_analytical_result_count,
3104 stats.audit_ia_function_count,
3105 stats.audit_ia_report_count,
3106 stats.audit_related_party_count,
3107 stats.audit_related_party_transaction_count,
3108 );
3109 self.check_resources_with_log("post-audit")?;
3110 Ok(audit_snapshot)
3111 } else {
3112 debug!("Phase 8: Skipped (audit generation disabled)");
3113 Ok(AuditSnapshot::default())
3114 }
3115 }
3116
3117 fn phase_banking_data(
3119 &mut self,
3120 stats: &mut EnhancedGenerationStatistics,
3121 ) -> SynthResult<BankingSnapshot> {
3122 if self.phase_config.generate_banking {
3123 info!("Phase 9: Generating Banking KYC/AML Data");
3124 let banking_snapshot = self.generate_banking_data()?;
3125 stats.banking_customer_count = banking_snapshot.customers.len();
3126 stats.banking_account_count = banking_snapshot.accounts.len();
3127 stats.banking_transaction_count = banking_snapshot.transactions.len();
3128 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3129 info!(
3130 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3131 stats.banking_customer_count, stats.banking_account_count,
3132 stats.banking_transaction_count, stats.banking_suspicious_count
3133 );
3134 self.check_resources_with_log("post-banking")?;
3135 Ok(banking_snapshot)
3136 } else {
3137 debug!("Phase 9: Skipped (banking generation disabled)");
3138 Ok(BankingSnapshot::default())
3139 }
3140 }
3141
3142 fn phase_graph_export(
3144 &mut self,
3145 entries: &[JournalEntry],
3146 coa: &Arc<ChartOfAccounts>,
3147 stats: &mut EnhancedGenerationStatistics,
3148 ) -> SynthResult<GraphExportSnapshot> {
3149 if self.phase_config.generate_graph_export && !entries.is_empty() {
3150 info!("Phase 10: Exporting Accounting Network Graphs");
3151 match self.export_graphs(entries, coa, stats) {
3152 Ok(snapshot) => {
3153 info!(
3154 "Graph export complete: {} graphs ({} nodes, {} edges)",
3155 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3156 );
3157 Ok(snapshot)
3158 }
3159 Err(e) => {
3160 warn!("Phase 10: Graph export failed: {}", e);
3161 Ok(GraphExportSnapshot::default())
3162 }
3163 }
3164 } else {
3165 debug!("Phase 10: Skipped (graph export disabled or no entries)");
3166 Ok(GraphExportSnapshot::default())
3167 }
3168 }
3169
3170 #[allow(clippy::too_many_arguments)]
3172 fn phase_hypergraph_export(
3173 &self,
3174 coa: &Arc<ChartOfAccounts>,
3175 entries: &[JournalEntry],
3176 document_flows: &DocumentFlowSnapshot,
3177 sourcing: &SourcingSnapshot,
3178 hr: &HrSnapshot,
3179 manufacturing: &ManufacturingSnapshot,
3180 banking: &BankingSnapshot,
3181 audit: &AuditSnapshot,
3182 financial_reporting: &FinancialReportingSnapshot,
3183 ocpm: &OcpmSnapshot,
3184 compliance: &ComplianceRegulationsSnapshot,
3185 stats: &mut EnhancedGenerationStatistics,
3186 ) -> SynthResult<()> {
3187 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3188 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3189 match self.export_hypergraph(
3190 coa,
3191 entries,
3192 document_flows,
3193 sourcing,
3194 hr,
3195 manufacturing,
3196 banking,
3197 audit,
3198 financial_reporting,
3199 ocpm,
3200 compliance,
3201 stats,
3202 ) {
3203 Ok(info) => {
3204 info!(
3205 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3206 info.node_count, info.edge_count, info.hyperedge_count
3207 );
3208 }
3209 Err(e) => {
3210 warn!("Phase 10b: Hypergraph export failed: {}", e);
3211 }
3212 }
3213 } else {
3214 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3215 }
3216 Ok(())
3217 }
3218
3219 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3225 if !self.config.llm.enabled {
3226 debug!("Phase 11: Skipped (LLM enrichment disabled)");
3227 return;
3228 }
3229
3230 info!("Phase 11: Starting LLM Enrichment");
3231 let start = std::time::Instant::now();
3232
3233 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3234 let provider = Arc::new(MockLlmProvider::new(self.seed));
3235 let enricher = VendorLlmEnricher::new(provider);
3236
3237 let industry = format!("{:?}", self.config.global.industry);
3238 let max_enrichments = self
3239 .config
3240 .llm
3241 .max_vendor_enrichments
3242 .min(self.master_data.vendors.len());
3243
3244 let mut enriched_count = 0usize;
3245 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3246 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3247 Ok(name) => {
3248 vendor.name = name;
3249 enriched_count += 1;
3250 }
3251 Err(e) => {
3252 warn!(
3253 "LLM vendor enrichment failed for {}: {}",
3254 vendor.vendor_id, e
3255 );
3256 }
3257 }
3258 }
3259
3260 enriched_count
3261 }));
3262
3263 match result {
3264 Ok(enriched_count) => {
3265 stats.llm_vendors_enriched = enriched_count;
3266 let elapsed = start.elapsed();
3267 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3268 info!(
3269 "Phase 11 complete: {} vendors enriched in {}ms",
3270 enriched_count, stats.llm_enrichment_ms
3271 );
3272 }
3273 Err(_) => {
3274 let elapsed = start.elapsed();
3275 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3276 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3277 }
3278 }
3279 }
3280
3281 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3287 if !self.config.diffusion.enabled {
3288 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3289 return;
3290 }
3291
3292 info!("Phase 12: Starting Diffusion Enhancement");
3293 let start = std::time::Instant::now();
3294
3295 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3296 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
3299
3300 let diffusion_config = DiffusionConfig {
3301 n_steps: self.config.diffusion.n_steps,
3302 seed: self.seed,
3303 ..Default::default()
3304 };
3305
3306 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3307
3308 let n_samples = self.config.diffusion.sample_size;
3309 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
3311
3312 samples.len()
3313 }));
3314
3315 match result {
3316 Ok(sample_count) => {
3317 stats.diffusion_samples_generated = sample_count;
3318 let elapsed = start.elapsed();
3319 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3320 info!(
3321 "Phase 12 complete: {} diffusion samples generated in {}ms",
3322 sample_count, stats.diffusion_enhancement_ms
3323 );
3324 }
3325 Err(_) => {
3326 let elapsed = start.elapsed();
3327 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3328 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
3329 }
3330 }
3331 }
3332
3333 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
3340 if !self.config.causal.enabled {
3341 debug!("Phase 13: Skipped (causal generation disabled)");
3342 return;
3343 }
3344
3345 info!("Phase 13: Starting Causal Overlay");
3346 let start = std::time::Instant::now();
3347
3348 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3349 let graph = match self.config.causal.template.as_str() {
3351 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
3352 _ => CausalGraph::fraud_detection_template(),
3353 };
3354
3355 let scm = StructuralCausalModel::new(graph.clone())
3356 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
3357
3358 let n_samples = self.config.causal.sample_size;
3359 let samples = scm
3360 .generate(n_samples, self.seed)
3361 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
3362
3363 let validation_passed = if self.config.causal.validate {
3365 let report = CausalValidator::validate_causal_structure(&samples, &graph);
3366 if report.valid {
3367 info!(
3368 "Causal validation passed: all {} checks OK",
3369 report.checks.len()
3370 );
3371 } else {
3372 warn!(
3373 "Causal validation: {} violations detected: {:?}",
3374 report.violations.len(),
3375 report.violations
3376 );
3377 }
3378 Some(report.valid)
3379 } else {
3380 None
3381 };
3382
3383 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
3384 }));
3385
3386 match result {
3387 Ok(Ok((sample_count, validation_passed))) => {
3388 stats.causal_samples_generated = sample_count;
3389 stats.causal_validation_passed = validation_passed;
3390 let elapsed = start.elapsed();
3391 stats.causal_generation_ms = elapsed.as_millis() as u64;
3392 info!(
3393 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
3394 sample_count, stats.causal_generation_ms, validation_passed,
3395 );
3396 }
3397 Ok(Err(e)) => {
3398 let elapsed = start.elapsed();
3399 stats.causal_generation_ms = elapsed.as_millis() as u64;
3400 warn!("Phase 13: Causal generation failed: {}", e);
3401 }
3402 Err(_) => {
3403 let elapsed = start.elapsed();
3404 stats.causal_generation_ms = elapsed.as_millis() as u64;
3405 warn!("Phase 13: Causal generation failed (panic caught), continuing");
3406 }
3407 }
3408 }
3409
3410 fn phase_sourcing_data(
3412 &mut self,
3413 stats: &mut EnhancedGenerationStatistics,
3414 ) -> SynthResult<SourcingSnapshot> {
3415 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
3416 debug!("Phase 14: Skipped (sourcing generation disabled)");
3417 return Ok(SourcingSnapshot::default());
3418 }
3419
3420 info!("Phase 14: Generating S2C Sourcing Data");
3421 let seed = self.seed;
3422
3423 let vendor_ids: Vec<String> = self
3425 .master_data
3426 .vendors
3427 .iter()
3428 .map(|v| v.vendor_id.clone())
3429 .collect();
3430 if vendor_ids.is_empty() {
3431 debug!("Phase 14: Skipped (no vendors available)");
3432 return Ok(SourcingSnapshot::default());
3433 }
3434
3435 let categories: Vec<(String, String)> = vec![
3436 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
3437 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
3438 ("CAT-IT".to_string(), "IT Equipment".to_string()),
3439 ("CAT-SVC".to_string(), "Professional Services".to_string()),
3440 ("CAT-LOG".to_string(), "Logistics".to_string()),
3441 ];
3442 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
3443 .iter()
3444 .map(|(id, name)| {
3445 (
3446 id.clone(),
3447 name.clone(),
3448 rust_decimal::Decimal::from(100_000),
3449 )
3450 })
3451 .collect();
3452
3453 let company_code = self
3454 .config
3455 .companies
3456 .first()
3457 .map(|c| c.code.as_str())
3458 .unwrap_or("1000");
3459 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3460 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3461 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3462 let fiscal_year = start_date.year() as u16;
3463 let owner_ids: Vec<String> = self
3464 .master_data
3465 .employees
3466 .iter()
3467 .take(5)
3468 .map(|e| e.employee_id.clone())
3469 .collect();
3470 let owner_id = owner_ids
3471 .first()
3472 .map(std::string::String::as_str)
3473 .unwrap_or("BUYER-001");
3474
3475 let mut spend_gen = SpendAnalysisGenerator::new(seed);
3477 let spend_analyses =
3478 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
3479
3480 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
3482 let sourcing_projects = if owner_ids.is_empty() {
3483 Vec::new()
3484 } else {
3485 project_gen.generate(
3486 company_code,
3487 &categories_with_spend,
3488 &owner_ids,
3489 start_date,
3490 self.config.global.period_months,
3491 )
3492 };
3493 stats.sourcing_project_count = sourcing_projects.len();
3494
3495 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
3497 let mut qual_gen = QualificationGenerator::new(seed + 2);
3498 let qualifications = qual_gen.generate(
3499 company_code,
3500 &qual_vendor_ids,
3501 sourcing_projects.first().map(|p| p.project_id.as_str()),
3502 owner_id,
3503 start_date,
3504 );
3505
3506 let mut rfx_gen = RfxGenerator::new(seed + 3);
3508 let rfx_events: Vec<RfxEvent> = sourcing_projects
3509 .iter()
3510 .map(|proj| {
3511 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
3512 rfx_gen.generate(
3513 company_code,
3514 &proj.project_id,
3515 &proj.category_id,
3516 &qualified_vids,
3517 owner_id,
3518 start_date,
3519 50000.0,
3520 )
3521 })
3522 .collect();
3523 stats.rfx_event_count = rfx_events.len();
3524
3525 let mut bid_gen = BidGenerator::new(seed + 4);
3527 let mut all_bids = Vec::new();
3528 for rfx in &rfx_events {
3529 let bidder_count = vendor_ids.len().clamp(2, 5);
3530 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
3531 let bids = bid_gen.generate(rfx, &responding, start_date);
3532 all_bids.extend(bids);
3533 }
3534 stats.bid_count = all_bids.len();
3535
3536 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
3538 let bid_evaluations: Vec<BidEvaluation> = rfx_events
3539 .iter()
3540 .map(|rfx| {
3541 let rfx_bids: Vec<SupplierBid> = all_bids
3542 .iter()
3543 .filter(|b| b.rfx_id == rfx.rfx_id)
3544 .cloned()
3545 .collect();
3546 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
3547 })
3548 .collect();
3549
3550 let mut contract_gen = ContractGenerator::new(seed + 6);
3552 let contracts: Vec<ProcurementContract> = bid_evaluations
3553 .iter()
3554 .zip(rfx_events.iter())
3555 .filter_map(|(eval, rfx)| {
3556 eval.ranked_bids.first().and_then(|winner| {
3557 all_bids
3558 .iter()
3559 .find(|b| b.bid_id == winner.bid_id)
3560 .map(|winning_bid| {
3561 contract_gen.generate_from_bid(
3562 winning_bid,
3563 Some(&rfx.sourcing_project_id),
3564 &rfx.category_id,
3565 owner_id,
3566 start_date,
3567 )
3568 })
3569 })
3570 })
3571 .collect();
3572 stats.contract_count = contracts.len();
3573
3574 let mut catalog_gen = CatalogGenerator::new(seed + 7);
3576 let catalog_items = catalog_gen.generate(&contracts);
3577 stats.catalog_item_count = catalog_items.len();
3578
3579 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
3581 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
3582 .iter()
3583 .fold(
3584 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
3585 |mut acc, c| {
3586 acc.entry(c.vendor_id.clone()).or_default().push(c);
3587 acc
3588 },
3589 )
3590 .into_iter()
3591 .collect();
3592 let scorecards = scorecard_gen.generate(
3593 company_code,
3594 &vendor_contracts,
3595 start_date,
3596 end_date,
3597 owner_id,
3598 );
3599 stats.scorecard_count = scorecards.len();
3600
3601 let mut sourcing_projects = sourcing_projects;
3604 for project in &mut sourcing_projects {
3605 project.rfx_ids = rfx_events
3607 .iter()
3608 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
3609 .map(|rfx| rfx.rfx_id.clone())
3610 .collect();
3611
3612 project.contract_id = contracts
3614 .iter()
3615 .find(|c| {
3616 c.sourcing_project_id
3617 .as_deref()
3618 .is_some_and(|sp| sp == project.project_id)
3619 })
3620 .map(|c| c.contract_id.clone());
3621
3622 project.spend_analysis_id = spend_analyses
3624 .iter()
3625 .find(|sa| sa.category_id == project.category_id)
3626 .map(|sa| sa.category_id.clone());
3627 }
3628
3629 info!(
3630 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
3631 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
3632 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
3633 );
3634 self.check_resources_with_log("post-sourcing")?;
3635
3636 Ok(SourcingSnapshot {
3637 spend_analyses,
3638 sourcing_projects,
3639 qualifications,
3640 rfx_events,
3641 bids: all_bids,
3642 bid_evaluations,
3643 contracts,
3644 catalog_items,
3645 scorecards,
3646 })
3647 }
3648
3649 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
3655 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
3656
3657 let parent_code = self
3658 .config
3659 .companies
3660 .first()
3661 .map(|c| c.code.clone())
3662 .unwrap_or_else(|| "PARENT".to_string());
3663
3664 let mut group = GroupStructure::new(parent_code);
3665
3666 for company in self.config.companies.iter().skip(1) {
3667 let sub =
3668 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
3669 group.add_subsidiary(sub);
3670 }
3671
3672 group
3673 }
3674
3675 fn phase_intercompany(
3677 &mut self,
3678 journal_entries: &[JournalEntry],
3679 stats: &mut EnhancedGenerationStatistics,
3680 ) -> SynthResult<IntercompanySnapshot> {
3681 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
3683 debug!("Phase 14b: Skipped (intercompany generation disabled)");
3684 return Ok(IntercompanySnapshot::default());
3685 }
3686
3687 if self.config.companies.len() < 2 {
3689 debug!(
3690 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
3691 self.config.companies.len()
3692 );
3693 return Ok(IntercompanySnapshot::default());
3694 }
3695
3696 info!("Phase 14b: Generating Intercompany Transactions");
3697
3698 let group_structure = self.build_group_structure();
3701 debug!(
3702 "Group structure built: parent={}, subsidiaries={}",
3703 group_structure.parent_entity,
3704 group_structure.subsidiaries.len()
3705 );
3706
3707 let seed = self.seed;
3708 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3709 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3710 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3711
3712 let parent_code = self.config.companies[0].code.clone();
3715 let mut ownership_structure =
3716 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
3717
3718 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
3719 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
3720 format!("REL{:03}", i + 1),
3721 parent_code.clone(),
3722 company.code.clone(),
3723 rust_decimal::Decimal::from(100), start_date,
3725 );
3726 ownership_structure.add_relationship(relationship);
3727 }
3728
3729 let tp_method = match self.config.intercompany.transfer_pricing_method {
3731 datasynth_config::schema::TransferPricingMethod::CostPlus => {
3732 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
3733 }
3734 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
3735 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
3736 }
3737 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
3738 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
3739 }
3740 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
3741 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
3742 }
3743 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
3744 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
3745 }
3746 };
3747
3748 let ic_currency = self
3750 .config
3751 .companies
3752 .first()
3753 .map(|c| c.currency.clone())
3754 .unwrap_or_else(|| "USD".to_string());
3755 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
3756 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
3757 transfer_pricing_method: tp_method,
3758 markup_percent: rust_decimal::Decimal::from_f64_retain(
3759 self.config.intercompany.markup_percent,
3760 )
3761 .unwrap_or(rust_decimal::Decimal::from(5)),
3762 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
3763 default_currency: ic_currency,
3764 ..Default::default()
3765 };
3766
3767 let mut ic_generator = datasynth_generators::ICGenerator::new(
3769 ic_gen_config,
3770 ownership_structure.clone(),
3771 seed + 50,
3772 );
3773
3774 let transactions_per_day = 3;
3777 let matched_pairs = ic_generator.generate_transactions_for_period(
3778 start_date,
3779 end_date,
3780 transactions_per_day,
3781 );
3782
3783 let mut seller_entries = Vec::new();
3785 let mut buyer_entries = Vec::new();
3786 let fiscal_year = start_date.year();
3787
3788 for pair in &matched_pairs {
3789 let fiscal_period = pair.posting_date.month();
3790 let (seller_je, buyer_je) =
3791 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
3792 seller_entries.push(seller_je);
3793 buyer_entries.push(buyer_je);
3794 }
3795
3796 let matching_config = datasynth_generators::ICMatchingConfig {
3798 base_currency: self
3799 .config
3800 .companies
3801 .first()
3802 .map(|c| c.currency.clone())
3803 .unwrap_or_else(|| "USD".to_string()),
3804 ..Default::default()
3805 };
3806 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
3807 matching_engine.load_matched_pairs(&matched_pairs);
3808 let matching_result = matching_engine.run_matching(end_date);
3809
3810 let mut elimination_entries = Vec::new();
3812 if self.config.intercompany.generate_eliminations {
3813 let elim_config = datasynth_generators::EliminationConfig {
3814 consolidation_entity: "GROUP".to_string(),
3815 base_currency: self
3816 .config
3817 .companies
3818 .first()
3819 .map(|c| c.currency.clone())
3820 .unwrap_or_else(|| "USD".to_string()),
3821 ..Default::default()
3822 };
3823
3824 let mut elim_generator =
3825 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
3826
3827 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
3828 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
3829 matching_result
3830 .matched_balances
3831 .iter()
3832 .chain(matching_result.unmatched_balances.iter())
3833 .cloned()
3834 .collect();
3835
3836 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
3848 std::collections::HashMap::new();
3849 let mut equity_amounts: std::collections::HashMap<
3850 String,
3851 std::collections::HashMap<String, rust_decimal::Decimal>,
3852 > = std::collections::HashMap::new();
3853 {
3854 use rust_decimal::Decimal;
3855 let hundred = Decimal::from(100u32);
3856 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
3860 for sub in &group_structure.subsidiaries {
3861 let net_assets = {
3862 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
3863 if na > Decimal::ZERO {
3864 na
3865 } else {
3866 Decimal::from(1_000_000u64)
3867 }
3868 };
3869 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
3871 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
3872
3873 let mut eq_map = std::collections::HashMap::new();
3876 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
3877 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
3878 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
3879 equity_amounts.insert(sub.entity_code.clone(), eq_map);
3880 }
3881 }
3882
3883 let journal = elim_generator.generate_eliminations(
3884 &fiscal_period,
3885 end_date,
3886 &all_balances,
3887 &matched_pairs,
3888 &investment_amounts,
3889 &equity_amounts,
3890 );
3891
3892 elimination_entries = journal.entries.clone();
3893 }
3894
3895 let matched_pair_count = matched_pairs.len();
3896 let elimination_entry_count = elimination_entries.len();
3897 let match_rate = matching_result.match_rate;
3898
3899 stats.ic_matched_pair_count = matched_pair_count;
3900 stats.ic_elimination_count = elimination_entry_count;
3901 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
3902
3903 info!(
3904 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3905 matched_pair_count,
3906 stats.ic_transaction_count,
3907 seller_entries.len(),
3908 buyer_entries.len(),
3909 elimination_entry_count,
3910 match_rate * 100.0
3911 );
3912 self.check_resources_with_log("post-intercompany")?;
3913
3914 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
3918 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
3919 use rust_decimal::Decimal;
3920
3921 let eight_pct = Decimal::new(8, 2); group_structure
3924 .subsidiaries
3925 .iter()
3926 .filter(|sub| {
3927 sub.nci_percentage > Decimal::ZERO
3928 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
3929 })
3930 .map(|sub| {
3931 let net_assets_from_jes =
3935 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
3936
3937 let net_assets = if net_assets_from_jes > Decimal::ZERO {
3938 net_assets_from_jes.round_dp(2)
3939 } else {
3940 Decimal::from(1_000_000u64)
3942 };
3943
3944 let net_income = (net_assets * eight_pct).round_dp(2);
3946
3947 NciMeasurement::compute(
3948 sub.entity_code.clone(),
3949 sub.nci_percentage,
3950 net_assets,
3951 net_income,
3952 )
3953 })
3954 .collect()
3955 };
3956
3957 if !nci_measurements.is_empty() {
3958 info!(
3959 "NCI measurements: {} subsidiaries with non-controlling interests",
3960 nci_measurements.len()
3961 );
3962 }
3963
3964 Ok(IntercompanySnapshot {
3965 group_structure: Some(group_structure),
3966 matched_pairs,
3967 seller_journal_entries: seller_entries,
3968 buyer_journal_entries: buyer_entries,
3969 elimination_entries,
3970 nci_measurements,
3971 matched_pair_count,
3972 elimination_entry_count,
3973 match_rate,
3974 })
3975 }
3976
3977 fn phase_financial_reporting(
3979 &mut self,
3980 document_flows: &DocumentFlowSnapshot,
3981 journal_entries: &[JournalEntry],
3982 coa: &Arc<ChartOfAccounts>,
3983 _hr: &HrSnapshot,
3984 _audit: &AuditSnapshot,
3985 stats: &mut EnhancedGenerationStatistics,
3986 ) -> SynthResult<FinancialReportingSnapshot> {
3987 let fs_enabled = self.phase_config.generate_financial_statements
3988 || self.config.financial_reporting.enabled;
3989 let br_enabled = self.phase_config.generate_bank_reconciliation;
3990
3991 if !fs_enabled && !br_enabled {
3992 debug!("Phase 15: Skipped (financial reporting disabled)");
3993 return Ok(FinancialReportingSnapshot::default());
3994 }
3995
3996 info!("Phase 15: Generating Financial Reporting Data");
3997
3998 let seed = self.seed;
3999 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4000 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4001
4002 let mut financial_statements = Vec::new();
4003 let mut bank_reconciliations = Vec::new();
4004 let mut trial_balances = Vec::new();
4005 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4006 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4007 Vec::new();
4008 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4010 std::collections::HashMap::new();
4011 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4013 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4015
4016 if fs_enabled {
4024 let has_journal_entries = !journal_entries.is_empty();
4025
4026 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4029 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4031
4032 let elimination_entries: Vec<&JournalEntry> = journal_entries
4034 .iter()
4035 .filter(|je| je.header.is_elimination)
4036 .collect();
4037
4038 for period in 0..self.config.global.period_months {
4040 let period_start = start_date + chrono::Months::new(period);
4041 let period_end =
4042 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4043 let fiscal_year = period_end.year() as u16;
4044 let fiscal_period = period_end.month() as u8;
4045 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4046
4047 let mut entity_tb_map: std::collections::HashMap<
4050 String,
4051 std::collections::HashMap<String, rust_decimal::Decimal>,
4052 > = std::collections::HashMap::new();
4053
4054 for (company_idx, company) in self.config.companies.iter().enumerate() {
4056 let company_code = company.code.as_str();
4057 let currency = company.currency.as_str();
4058 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4061 let mut company_fs_gen =
4062 FinancialStatementGenerator::new(seed + company_seed_offset);
4063
4064 if has_journal_entries {
4065 let tb_entries = Self::build_cumulative_trial_balance(
4066 journal_entries,
4067 coa,
4068 company_code,
4069 start_date,
4070 period_end,
4071 fiscal_year,
4072 fiscal_period,
4073 );
4074
4075 let entity_cat_map =
4077 entity_tb_map.entry(company_code.to_string()).or_default();
4078 for tb_entry in &tb_entries {
4079 let net = tb_entry.debit_balance - tb_entry.credit_balance;
4080 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4081 }
4082
4083 let stmts = company_fs_gen.generate(
4084 company_code,
4085 currency,
4086 &tb_entries,
4087 period_start,
4088 period_end,
4089 fiscal_year,
4090 fiscal_period,
4091 None,
4092 "SYS-AUTOCLOSE",
4093 );
4094
4095 let mut entity_stmts = Vec::new();
4096 for stmt in stmts {
4097 if stmt.statement_type == StatementType::CashFlowStatement {
4098 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4099 let cf_items = Self::build_cash_flow_from_trial_balances(
4100 &tb_entries,
4101 None,
4102 net_income,
4103 );
4104 entity_stmts.push(FinancialStatement {
4105 cash_flow_items: cf_items,
4106 ..stmt
4107 });
4108 } else {
4109 entity_stmts.push(stmt);
4110 }
4111 }
4112
4113 financial_statements.extend(entity_stmts.clone());
4115
4116 standalone_statements
4118 .entry(company_code.to_string())
4119 .or_default()
4120 .extend(entity_stmts);
4121
4122 if company_idx == 0 {
4125 trial_balances.push(PeriodTrialBalance {
4126 fiscal_year,
4127 fiscal_period,
4128 period_start,
4129 period_end,
4130 entries: tb_entries,
4131 });
4132 }
4133 } else {
4134 let tb_entries = Self::build_trial_balance_from_entries(
4136 journal_entries,
4137 coa,
4138 company_code,
4139 fiscal_year,
4140 fiscal_period,
4141 );
4142
4143 let stmts = company_fs_gen.generate(
4144 company_code,
4145 currency,
4146 &tb_entries,
4147 period_start,
4148 period_end,
4149 fiscal_year,
4150 fiscal_period,
4151 None,
4152 "SYS-AUTOCLOSE",
4153 );
4154 financial_statements.extend(stmts.clone());
4155 standalone_statements
4156 .entry(company_code.to_string())
4157 .or_default()
4158 .extend(stmts);
4159
4160 if company_idx == 0 && !tb_entries.is_empty() {
4161 trial_balances.push(PeriodTrialBalance {
4162 fiscal_year,
4163 fiscal_period,
4164 period_start,
4165 period_end,
4166 entries: tb_entries,
4167 });
4168 }
4169 }
4170 }
4171
4172 let group_currency = self
4175 .config
4176 .companies
4177 .first()
4178 .map(|c| c.currency.as_str())
4179 .unwrap_or("USD");
4180
4181 let period_eliminations: Vec<JournalEntry> = elimination_entries
4183 .iter()
4184 .filter(|je| {
4185 je.header.fiscal_year == fiscal_year
4186 && je.header.fiscal_period == fiscal_period
4187 })
4188 .map(|je| (*je).clone())
4189 .collect();
4190
4191 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4192 &entity_tb_map,
4193 &period_eliminations,
4194 &period_label,
4195 );
4196
4197 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4200 .line_items
4201 .iter()
4202 .map(|li| {
4203 let net = li.post_elimination_total;
4204 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4205 (net, rust_decimal::Decimal::ZERO)
4206 } else {
4207 (rust_decimal::Decimal::ZERO, -net)
4208 };
4209 datasynth_generators::TrialBalanceEntry {
4210 account_code: li.account_category.clone(),
4211 account_name: li.account_category.clone(),
4212 category: li.account_category.clone(),
4213 debit_balance: debit,
4214 credit_balance: credit,
4215 }
4216 })
4217 .collect();
4218
4219 let mut cons_stmts = cons_gen.generate(
4220 "GROUP",
4221 group_currency,
4222 &cons_tb,
4223 period_start,
4224 period_end,
4225 fiscal_year,
4226 fiscal_period,
4227 None,
4228 "SYS-AUTOCLOSE",
4229 );
4230
4231 let bs_categories: &[&str] = &[
4235 "CASH",
4236 "RECEIVABLES",
4237 "INVENTORY",
4238 "FIXEDASSETS",
4239 "PAYABLES",
4240 "ACCRUEDLIABILITIES",
4241 "LONGTERMDEBT",
4242 "EQUITY",
4243 ];
4244 let (bs_items, is_items): (Vec<_>, Vec<_>) =
4245 cons_line_items.into_iter().partition(|li| {
4246 let upper = li.label.to_uppercase();
4247 bs_categories.iter().any(|c| upper == *c)
4248 });
4249
4250 for stmt in &mut cons_stmts {
4251 stmt.is_consolidated = true;
4252 match stmt.statement_type {
4253 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4254 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4255 _ => {} }
4257 }
4258
4259 consolidated_statements.extend(cons_stmts);
4260 consolidation_schedules.push(schedule);
4261 }
4262
4263 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
4269 info!(
4270 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4271 stats.financial_statement_count,
4272 consolidated_statements.len(),
4273 has_journal_entries
4274 );
4275
4276 let entity_seeds: Vec<SegmentSeed> = self
4281 .config
4282 .companies
4283 .iter()
4284 .map(|c| SegmentSeed {
4285 code: c.code.clone(),
4286 name: c.name.clone(),
4287 currency: c.currency.clone(),
4288 })
4289 .collect();
4290
4291 let mut seg_gen = SegmentGenerator::new(seed + 30);
4292
4293 for period in 0..self.config.global.period_months {
4298 let period_end =
4299 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4300 let fiscal_year = period_end.year() as u16;
4301 let fiscal_period = period_end.month() as u8;
4302 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4303
4304 use datasynth_core::models::StatementType;
4305
4306 let cons_is = consolidated_statements.iter().find(|s| {
4308 s.fiscal_year == fiscal_year
4309 && s.fiscal_period == fiscal_period
4310 && s.statement_type == StatementType::IncomeStatement
4311 });
4312 let cons_bs = consolidated_statements.iter().find(|s| {
4313 s.fiscal_year == fiscal_year
4314 && s.fiscal_period == fiscal_period
4315 && s.statement_type == StatementType::BalanceSheet
4316 });
4317
4318 let is_stmt = cons_is.or_else(|| {
4320 financial_statements.iter().find(|s| {
4321 s.fiscal_year == fiscal_year
4322 && s.fiscal_period == fiscal_period
4323 && s.statement_type == StatementType::IncomeStatement
4324 })
4325 });
4326 let bs_stmt = cons_bs.or_else(|| {
4327 financial_statements.iter().find(|s| {
4328 s.fiscal_year == fiscal_year
4329 && s.fiscal_period == fiscal_period
4330 && s.statement_type == StatementType::BalanceSheet
4331 })
4332 });
4333
4334 let consolidated_revenue = is_stmt
4335 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4336 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
4338
4339 let consolidated_profit = is_stmt
4340 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
4341 .map(|li| li.amount)
4342 .unwrap_or(rust_decimal::Decimal::ZERO);
4343
4344 let consolidated_assets = bs_stmt
4345 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
4346 .map(|li| li.amount)
4347 .unwrap_or(rust_decimal::Decimal::ZERO);
4348
4349 if consolidated_revenue == rust_decimal::Decimal::ZERO
4351 && consolidated_assets == rust_decimal::Decimal::ZERO
4352 {
4353 continue;
4354 }
4355
4356 let group_code = self
4357 .config
4358 .companies
4359 .first()
4360 .map(|c| c.code.as_str())
4361 .unwrap_or("GROUP");
4362
4363 let (segs, recon) = seg_gen.generate(
4364 group_code,
4365 &period_label,
4366 consolidated_revenue,
4367 consolidated_profit,
4368 consolidated_assets,
4369 &entity_seeds,
4370 );
4371 segment_reports.extend(segs);
4372 segment_reconciliations.push(recon);
4373 }
4374
4375 info!(
4376 "Segment reports generated: {} segments, {} reconciliations",
4377 segment_reports.len(),
4378 segment_reconciliations.len()
4379 );
4380 }
4381
4382 if br_enabled && !document_flows.payments.is_empty() {
4384 let employee_ids: Vec<String> = self
4385 .master_data
4386 .employees
4387 .iter()
4388 .map(|e| e.employee_id.clone())
4389 .collect();
4390 let mut br_gen =
4391 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
4392
4393 for company in &self.config.companies {
4395 let company_payments: Vec<PaymentReference> = document_flows
4396 .payments
4397 .iter()
4398 .filter(|p| p.header.company_code == company.code)
4399 .map(|p| PaymentReference {
4400 id: p.header.document_id.clone(),
4401 amount: if p.is_vendor { p.amount } else { -p.amount },
4402 date: p.header.document_date,
4403 reference: p
4404 .check_number
4405 .clone()
4406 .or_else(|| p.wire_reference.clone())
4407 .unwrap_or_else(|| p.header.document_id.clone()),
4408 })
4409 .collect();
4410
4411 if company_payments.is_empty() {
4412 continue;
4413 }
4414
4415 let bank_account_id = format!("{}-MAIN", company.code);
4416
4417 for period in 0..self.config.global.period_months {
4419 let period_start = start_date + chrono::Months::new(period);
4420 let period_end =
4421 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4422
4423 let period_payments: Vec<PaymentReference> = company_payments
4424 .iter()
4425 .filter(|p| p.date >= period_start && p.date <= period_end)
4426 .cloned()
4427 .collect();
4428
4429 let recon = br_gen.generate(
4430 &company.code,
4431 &bank_account_id,
4432 period_start,
4433 period_end,
4434 &company.currency,
4435 &period_payments,
4436 );
4437 bank_reconciliations.push(recon);
4438 }
4439 }
4440 info!(
4441 "Bank reconciliations generated: {} reconciliations",
4442 bank_reconciliations.len()
4443 );
4444 }
4445
4446 stats.bank_reconciliation_count = bank_reconciliations.len();
4447 self.check_resources_with_log("post-financial-reporting")?;
4448
4449 if !trial_balances.is_empty() {
4450 info!(
4451 "Period-close trial balances captured: {} periods",
4452 trial_balances.len()
4453 );
4454 }
4455
4456 let notes_to_financial_statements = Vec::new();
4460
4461 Ok(FinancialReportingSnapshot {
4462 financial_statements,
4463 standalone_statements,
4464 consolidated_statements,
4465 consolidation_schedules,
4466 bank_reconciliations,
4467 trial_balances,
4468 segment_reports,
4469 segment_reconciliations,
4470 notes_to_financial_statements,
4471 })
4472 }
4473
4474 fn generate_notes_to_financial_statements(
4481 &self,
4482 financial_reporting: &mut FinancialReportingSnapshot,
4483 accounting_standards: &AccountingStandardsSnapshot,
4484 tax: &TaxSnapshot,
4485 hr: &HrSnapshot,
4486 audit: &AuditSnapshot,
4487 ) {
4488 use datasynth_config::schema::AccountingFrameworkConfig;
4489 use datasynth_core::models::StatementType;
4490 use datasynth_generators::period_close::notes_generator::{
4491 NotesGenerator, NotesGeneratorContext,
4492 };
4493
4494 let seed = self.seed;
4495 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4496 {
4497 Ok(d) => d,
4498 Err(_) => return,
4499 };
4500
4501 let mut notes_gen = NotesGenerator::new(seed + 4235);
4502
4503 for company in &self.config.companies {
4504 let last_period_end = start_date
4505 + chrono::Months::new(self.config.global.period_months)
4506 - chrono::Days::new(1);
4507 let fiscal_year = last_period_end.year() as u16;
4508
4509 let entity_is = financial_reporting
4511 .standalone_statements
4512 .get(&company.code)
4513 .and_then(|stmts| {
4514 stmts.iter().find(|s| {
4515 s.fiscal_year == fiscal_year
4516 && s.statement_type == StatementType::IncomeStatement
4517 })
4518 });
4519 let entity_bs = financial_reporting
4520 .standalone_statements
4521 .get(&company.code)
4522 .and_then(|stmts| {
4523 stmts.iter().find(|s| {
4524 s.fiscal_year == fiscal_year
4525 && s.statement_type == StatementType::BalanceSheet
4526 })
4527 });
4528
4529 let revenue_amount = entity_is
4531 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4532 .map(|li| li.amount);
4533 let ppe_gross = entity_bs
4534 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
4535 .map(|li| li.amount);
4536
4537 let framework = match self
4538 .config
4539 .accounting_standards
4540 .framework
4541 .unwrap_or_default()
4542 {
4543 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
4544 "IFRS".to_string()
4545 }
4546 _ => "US GAAP".to_string(),
4547 };
4548
4549 let (entity_dta, entity_dtl) = {
4552 let mut dta = rust_decimal::Decimal::ZERO;
4553 let mut dtl = rust_decimal::Decimal::ZERO;
4554 for rf in &tax.deferred_tax.rollforwards {
4555 if rf.entity_code == company.code {
4556 dta += rf.closing_dta;
4557 dtl += rf.closing_dtl;
4558 }
4559 }
4560 (
4561 if dta > rust_decimal::Decimal::ZERO {
4562 Some(dta)
4563 } else {
4564 None
4565 },
4566 if dtl > rust_decimal::Decimal::ZERO {
4567 Some(dtl)
4568 } else {
4569 None
4570 },
4571 )
4572 };
4573
4574 let entity_provisions: Vec<_> = accounting_standards
4577 .provisions
4578 .iter()
4579 .filter(|p| p.entity_code == company.code)
4580 .collect();
4581 let provision_count = entity_provisions.len();
4582 let total_provisions = if provision_count > 0 {
4583 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
4584 } else {
4585 None
4586 };
4587
4588 let entity_pension_plan_count = hr
4590 .pension_plans
4591 .iter()
4592 .filter(|p| p.entity_code == company.code)
4593 .count();
4594 let entity_total_dbo: Option<rust_decimal::Decimal> = {
4595 let sum: rust_decimal::Decimal = hr
4596 .pension_disclosures
4597 .iter()
4598 .filter(|d| {
4599 hr.pension_plans
4600 .iter()
4601 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
4602 })
4603 .map(|d| d.net_pension_liability)
4604 .sum();
4605 let plan_assets_sum: rust_decimal::Decimal = hr
4606 .pension_plan_assets
4607 .iter()
4608 .filter(|a| {
4609 hr.pension_plans
4610 .iter()
4611 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
4612 })
4613 .map(|a| a.fair_value_closing)
4614 .sum();
4615 if entity_pension_plan_count > 0 {
4616 Some(sum + plan_assets_sum)
4617 } else {
4618 None
4619 }
4620 };
4621 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
4622 let sum: rust_decimal::Decimal = hr
4623 .pension_plan_assets
4624 .iter()
4625 .filter(|a| {
4626 hr.pension_plans
4627 .iter()
4628 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
4629 })
4630 .map(|a| a.fair_value_closing)
4631 .sum();
4632 if entity_pension_plan_count > 0 {
4633 Some(sum)
4634 } else {
4635 None
4636 }
4637 };
4638
4639 let rp_count = audit.related_party_transactions.len();
4642 let se_count = audit.subsequent_events.len();
4643 let adjusting_count = audit
4644 .subsequent_events
4645 .iter()
4646 .filter(|e| {
4647 matches!(
4648 e.classification,
4649 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
4650 )
4651 })
4652 .count();
4653
4654 let ctx = NotesGeneratorContext {
4655 entity_code: company.code.clone(),
4656 framework,
4657 period: format!("FY{}", fiscal_year),
4658 period_end: last_period_end,
4659 currency: company.currency.clone(),
4660 revenue_amount,
4661 total_ppe_gross: ppe_gross,
4662 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
4663 deferred_tax_asset: entity_dta,
4665 deferred_tax_liability: entity_dtl,
4666 provision_count,
4668 total_provisions,
4669 pension_plan_count: entity_pension_plan_count,
4671 total_dbo: entity_total_dbo,
4672 total_plan_assets: entity_total_plan_assets,
4673 related_party_transaction_count: rp_count,
4675 subsequent_event_count: se_count,
4676 adjusting_event_count: adjusting_count,
4677 ..NotesGeneratorContext::default()
4678 };
4679
4680 let entity_notes = notes_gen.generate(&ctx);
4681 info!(
4682 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
4683 company.code,
4684 entity_notes.len(),
4685 entity_dta,
4686 entity_dtl,
4687 provision_count,
4688 );
4689 financial_reporting
4690 .notes_to_financial_statements
4691 .extend(entity_notes);
4692 }
4693 }
4694
4695 fn build_trial_balance_from_entries(
4701 journal_entries: &[JournalEntry],
4702 coa: &ChartOfAccounts,
4703 company_code: &str,
4704 fiscal_year: u16,
4705 fiscal_period: u8,
4706 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
4707 use rust_decimal::Decimal;
4708
4709 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
4711 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
4712
4713 for je in journal_entries {
4714 if je.header.company_code != company_code
4716 || je.header.fiscal_year != fiscal_year
4717 || je.header.fiscal_period != fiscal_period
4718 {
4719 continue;
4720 }
4721
4722 for line in &je.lines {
4723 let acct = &line.gl_account;
4724 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
4725 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
4726 }
4727 }
4728
4729 let mut all_accounts: Vec<&String> = account_debits
4731 .keys()
4732 .chain(account_credits.keys())
4733 .collect::<std::collections::HashSet<_>>()
4734 .into_iter()
4735 .collect();
4736 all_accounts.sort();
4737
4738 let mut entries = Vec::new();
4739
4740 for acct_number in all_accounts {
4741 let debit = account_debits
4742 .get(acct_number)
4743 .copied()
4744 .unwrap_or(Decimal::ZERO);
4745 let credit = account_credits
4746 .get(acct_number)
4747 .copied()
4748 .unwrap_or(Decimal::ZERO);
4749
4750 if debit.is_zero() && credit.is_zero() {
4751 continue;
4752 }
4753
4754 let account_name = coa
4756 .get_account(acct_number)
4757 .map(|gl| gl.short_description.clone())
4758 .unwrap_or_else(|| format!("Account {acct_number}"));
4759
4760 let category = Self::category_from_account_code(acct_number);
4765
4766 entries.push(datasynth_generators::TrialBalanceEntry {
4767 account_code: acct_number.clone(),
4768 account_name,
4769 category,
4770 debit_balance: debit,
4771 credit_balance: credit,
4772 });
4773 }
4774
4775 entries
4776 }
4777
4778 fn build_cumulative_trial_balance(
4785 journal_entries: &[JournalEntry],
4786 coa: &ChartOfAccounts,
4787 company_code: &str,
4788 start_date: NaiveDate,
4789 period_end: NaiveDate,
4790 fiscal_year: u16,
4791 fiscal_period: u8,
4792 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
4793 use rust_decimal::Decimal;
4794
4795 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
4797 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
4798
4799 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
4801 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
4802
4803 for je in journal_entries {
4804 if je.header.company_code != company_code {
4805 continue;
4806 }
4807
4808 for line in &je.lines {
4809 let acct = &line.gl_account;
4810 let category = Self::category_from_account_code(acct);
4811 let is_bs_account = matches!(
4812 category.as_str(),
4813 "Cash"
4814 | "Receivables"
4815 | "Inventory"
4816 | "FixedAssets"
4817 | "Payables"
4818 | "AccruedLiabilities"
4819 | "LongTermDebt"
4820 | "Equity"
4821 );
4822
4823 if is_bs_account {
4824 if je.header.document_date <= period_end
4826 && je.header.document_date >= start_date
4827 {
4828 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4829 line.debit_amount;
4830 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4831 line.credit_amount;
4832 }
4833 } else {
4834 if je.header.fiscal_year == fiscal_year
4836 && je.header.fiscal_period == fiscal_period
4837 {
4838 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4839 line.debit_amount;
4840 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4841 line.credit_amount;
4842 }
4843 }
4844 }
4845 }
4846
4847 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
4849 all_accounts.extend(bs_debits.keys().cloned());
4850 all_accounts.extend(bs_credits.keys().cloned());
4851 all_accounts.extend(is_debits.keys().cloned());
4852 all_accounts.extend(is_credits.keys().cloned());
4853
4854 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
4855 sorted_accounts.sort();
4856
4857 let mut entries = Vec::new();
4858
4859 for acct_number in &sorted_accounts {
4860 let category = Self::category_from_account_code(acct_number);
4861 let is_bs_account = matches!(
4862 category.as_str(),
4863 "Cash"
4864 | "Receivables"
4865 | "Inventory"
4866 | "FixedAssets"
4867 | "Payables"
4868 | "AccruedLiabilities"
4869 | "LongTermDebt"
4870 | "Equity"
4871 );
4872
4873 let (debit, credit) = if is_bs_account {
4874 (
4875 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
4876 bs_credits
4877 .get(acct_number)
4878 .copied()
4879 .unwrap_or(Decimal::ZERO),
4880 )
4881 } else {
4882 (
4883 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
4884 is_credits
4885 .get(acct_number)
4886 .copied()
4887 .unwrap_or(Decimal::ZERO),
4888 )
4889 };
4890
4891 if debit.is_zero() && credit.is_zero() {
4892 continue;
4893 }
4894
4895 let account_name = coa
4896 .get_account(acct_number)
4897 .map(|gl| gl.short_description.clone())
4898 .unwrap_or_else(|| format!("Account {acct_number}"));
4899
4900 entries.push(datasynth_generators::TrialBalanceEntry {
4901 account_code: acct_number.clone(),
4902 account_name,
4903 category,
4904 debit_balance: debit,
4905 credit_balance: credit,
4906 });
4907 }
4908
4909 entries
4910 }
4911
4912 fn build_cash_flow_from_trial_balances(
4917 current_tb: &[datasynth_generators::TrialBalanceEntry],
4918 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
4919 net_income: rust_decimal::Decimal,
4920 ) -> Vec<CashFlowItem> {
4921 use rust_decimal::Decimal;
4922
4923 let aggregate =
4925 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
4926 let mut map: HashMap<String, Decimal> = HashMap::new();
4927 for entry in tb {
4928 let net = entry.debit_balance - entry.credit_balance;
4929 *map.entry(entry.category.clone()).or_default() += net;
4930 }
4931 map
4932 };
4933
4934 let current = aggregate(current_tb);
4935 let prior = prior_tb.map(aggregate);
4936
4937 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
4939 *map.get(key).unwrap_or(&Decimal::ZERO)
4940 };
4941
4942 let change = |key: &str| -> Decimal {
4944 let curr = get(¤t, key);
4945 match &prior {
4946 Some(p) => curr - get(p, key),
4947 None => curr,
4948 }
4949 };
4950
4951 let fixed_asset_change = change("FixedAssets");
4954 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
4955 -fixed_asset_change
4956 } else {
4957 Decimal::ZERO
4958 };
4959
4960 let ar_change = change("Receivables");
4962 let inventory_change = change("Inventory");
4963 let ap_change = change("Payables");
4965 let accrued_change = change("AccruedLiabilities");
4966
4967 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
4968 + (-ap_change)
4969 + (-accrued_change);
4970
4971 let capex = if fixed_asset_change > Decimal::ZERO {
4973 -fixed_asset_change
4974 } else {
4975 Decimal::ZERO
4976 };
4977 let investing_cf = capex;
4978
4979 let debt_change = -change("LongTermDebt");
4981 let equity_change = -change("Equity");
4982 let financing_cf = debt_change + equity_change;
4983
4984 let net_change = operating_cf + investing_cf + financing_cf;
4985
4986 vec![
4987 CashFlowItem {
4988 item_code: "CF-NI".to_string(),
4989 label: "Net Income".to_string(),
4990 category: CashFlowCategory::Operating,
4991 amount: net_income,
4992 amount_prior: None,
4993 sort_order: 1,
4994 is_total: false,
4995 },
4996 CashFlowItem {
4997 item_code: "CF-DEP".to_string(),
4998 label: "Depreciation & Amortization".to_string(),
4999 category: CashFlowCategory::Operating,
5000 amount: depreciation_addback,
5001 amount_prior: None,
5002 sort_order: 2,
5003 is_total: false,
5004 },
5005 CashFlowItem {
5006 item_code: "CF-AR".to_string(),
5007 label: "Change in Accounts Receivable".to_string(),
5008 category: CashFlowCategory::Operating,
5009 amount: -ar_change,
5010 amount_prior: None,
5011 sort_order: 3,
5012 is_total: false,
5013 },
5014 CashFlowItem {
5015 item_code: "CF-AP".to_string(),
5016 label: "Change in Accounts Payable".to_string(),
5017 category: CashFlowCategory::Operating,
5018 amount: -ap_change,
5019 amount_prior: None,
5020 sort_order: 4,
5021 is_total: false,
5022 },
5023 CashFlowItem {
5024 item_code: "CF-INV".to_string(),
5025 label: "Change in Inventory".to_string(),
5026 category: CashFlowCategory::Operating,
5027 amount: -inventory_change,
5028 amount_prior: None,
5029 sort_order: 5,
5030 is_total: false,
5031 },
5032 CashFlowItem {
5033 item_code: "CF-OP".to_string(),
5034 label: "Net Cash from Operating Activities".to_string(),
5035 category: CashFlowCategory::Operating,
5036 amount: operating_cf,
5037 amount_prior: None,
5038 sort_order: 6,
5039 is_total: true,
5040 },
5041 CashFlowItem {
5042 item_code: "CF-CAPEX".to_string(),
5043 label: "Capital Expenditures".to_string(),
5044 category: CashFlowCategory::Investing,
5045 amount: capex,
5046 amount_prior: None,
5047 sort_order: 7,
5048 is_total: false,
5049 },
5050 CashFlowItem {
5051 item_code: "CF-INV-T".to_string(),
5052 label: "Net Cash from Investing Activities".to_string(),
5053 category: CashFlowCategory::Investing,
5054 amount: investing_cf,
5055 amount_prior: None,
5056 sort_order: 8,
5057 is_total: true,
5058 },
5059 CashFlowItem {
5060 item_code: "CF-DEBT".to_string(),
5061 label: "Net Borrowings / (Repayments)".to_string(),
5062 category: CashFlowCategory::Financing,
5063 amount: debt_change,
5064 amount_prior: None,
5065 sort_order: 9,
5066 is_total: false,
5067 },
5068 CashFlowItem {
5069 item_code: "CF-EQ".to_string(),
5070 label: "Equity Changes".to_string(),
5071 category: CashFlowCategory::Financing,
5072 amount: equity_change,
5073 amount_prior: None,
5074 sort_order: 10,
5075 is_total: false,
5076 },
5077 CashFlowItem {
5078 item_code: "CF-FIN-T".to_string(),
5079 label: "Net Cash from Financing Activities".to_string(),
5080 category: CashFlowCategory::Financing,
5081 amount: financing_cf,
5082 amount_prior: None,
5083 sort_order: 11,
5084 is_total: true,
5085 },
5086 CashFlowItem {
5087 item_code: "CF-NET".to_string(),
5088 label: "Net Change in Cash".to_string(),
5089 category: CashFlowCategory::Operating,
5090 amount: net_change,
5091 amount_prior: None,
5092 sort_order: 12,
5093 is_total: true,
5094 },
5095 ]
5096 }
5097
5098 fn calculate_net_income_from_tb(
5102 tb: &[datasynth_generators::TrialBalanceEntry],
5103 ) -> rust_decimal::Decimal {
5104 use rust_decimal::Decimal;
5105
5106 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5107 for entry in tb {
5108 let net = entry.debit_balance - entry.credit_balance;
5109 *aggregated.entry(entry.category.clone()).or_default() += net;
5110 }
5111
5112 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5113 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5114 let opex = *aggregated
5115 .get("OperatingExpenses")
5116 .unwrap_or(&Decimal::ZERO);
5117 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5118 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5119
5120 let operating_income = revenue - cogs - opex - other_expenses - other_income;
5123 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
5125 operating_income - tax
5126 }
5127
5128 fn category_from_account_code(code: &str) -> String {
5135 let prefix: String = code.chars().take(2).collect();
5136 match prefix.as_str() {
5137 "10" => "Cash",
5138 "11" => "Receivables",
5139 "12" | "13" | "14" => "Inventory",
5140 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5141 "20" => "Payables",
5142 "21" | "22" | "23" | "24" => "AccruedLiabilities",
5143 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5144 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5145 "40" | "41" | "42" | "43" | "44" => "Revenue",
5146 "50" | "51" | "52" => "CostOfSales",
5147 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5148 "OperatingExpenses"
5149 }
5150 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5151 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5152 _ => "OperatingExpenses",
5153 }
5154 .to_string()
5155 }
5156
5157 fn phase_hr_data(
5159 &mut self,
5160 stats: &mut EnhancedGenerationStatistics,
5161 ) -> SynthResult<HrSnapshot> {
5162 if !self.phase_config.generate_hr {
5163 debug!("Phase 16: Skipped (HR generation disabled)");
5164 return Ok(HrSnapshot::default());
5165 }
5166
5167 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5168
5169 let seed = self.seed;
5170 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5171 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5172 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5173 let company_code = self
5174 .config
5175 .companies
5176 .first()
5177 .map(|c| c.code.as_str())
5178 .unwrap_or("1000");
5179 let currency = self
5180 .config
5181 .companies
5182 .first()
5183 .map(|c| c.currency.as_str())
5184 .unwrap_or("USD");
5185
5186 let employee_ids: Vec<String> = self
5187 .master_data
5188 .employees
5189 .iter()
5190 .map(|e| e.employee_id.clone())
5191 .collect();
5192
5193 if employee_ids.is_empty() {
5194 debug!("Phase 16: Skipped (no employees available)");
5195 return Ok(HrSnapshot::default());
5196 }
5197
5198 let cost_center_ids: Vec<String> = self
5201 .master_data
5202 .employees
5203 .iter()
5204 .filter_map(|e| e.cost_center.clone())
5205 .collect::<std::collections::HashSet<_>>()
5206 .into_iter()
5207 .collect();
5208
5209 let mut snapshot = HrSnapshot::default();
5210
5211 if self.config.hr.payroll.enabled {
5213 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
5214 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5215
5216 let payroll_pack = self.primary_pack();
5218
5219 payroll_gen.set_country_pack(payroll_pack.clone());
5222
5223 let employees_with_salary: Vec<(
5224 String,
5225 rust_decimal::Decimal,
5226 Option<String>,
5227 Option<String>,
5228 )> = self
5229 .master_data
5230 .employees
5231 .iter()
5232 .map(|e| {
5233 (
5234 e.employee_id.clone(),
5235 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
5237 e.department_id.clone(),
5238 )
5239 })
5240 .collect();
5241
5242 for month in 0..self.config.global.period_months {
5243 let period_start = start_date + chrono::Months::new(month);
5244 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
5245 let (run, items) = payroll_gen.generate(
5246 company_code,
5247 &employees_with_salary,
5248 period_start,
5249 period_end,
5250 currency,
5251 );
5252 snapshot.payroll_runs.push(run);
5253 snapshot.payroll_run_count += 1;
5254 snapshot.payroll_line_item_count += items.len();
5255 snapshot.payroll_line_items.extend(items);
5256 }
5257 }
5258
5259 if self.config.hr.time_attendance.enabled {
5261 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
5262 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5263 let entries = time_gen.generate(
5264 &employee_ids,
5265 start_date,
5266 end_date,
5267 &self.config.hr.time_attendance,
5268 );
5269 snapshot.time_entry_count = entries.len();
5270 snapshot.time_entries = entries;
5271 }
5272
5273 if self.config.hr.expenses.enabled {
5275 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
5276 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5277 expense_gen.set_country_pack(self.primary_pack().clone());
5278 let company_currency = self
5279 .config
5280 .companies
5281 .first()
5282 .map(|c| c.currency.as_str())
5283 .unwrap_or("USD");
5284 let reports = expense_gen.generate_with_currency(
5285 &employee_ids,
5286 start_date,
5287 end_date,
5288 &self.config.hr.expenses,
5289 company_currency,
5290 );
5291 snapshot.expense_report_count = reports.len();
5292 snapshot.expense_reports = reports;
5293 }
5294
5295 if self.config.hr.payroll.enabled {
5297 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
5298 let employee_pairs: Vec<(String, String)> = self
5299 .master_data
5300 .employees
5301 .iter()
5302 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
5303 .collect();
5304 let enrollments =
5305 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
5306 snapshot.benefit_enrollment_count = enrollments.len();
5307 snapshot.benefit_enrollments = enrollments;
5308 }
5309
5310 if self.phase_config.generate_hr {
5312 let entity_name = self
5313 .config
5314 .companies
5315 .first()
5316 .map(|c| c.name.as_str())
5317 .unwrap_or("Entity");
5318 let period_months = self.config.global.period_months;
5319 let period_label = {
5320 let y = start_date.year();
5321 let m = start_date.month();
5322 if period_months >= 12 {
5323 format!("FY{y}")
5324 } else {
5325 format!("{y}-{m:02}")
5326 }
5327 };
5328 let reporting_date =
5329 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5330
5331 let avg_salary: Option<rust_decimal::Decimal> = {
5336 let employee_count = employee_ids.len();
5337 if self.config.hr.payroll.enabled
5338 && employee_count > 0
5339 && !snapshot.payroll_runs.is_empty()
5340 {
5341 let total_gross: rust_decimal::Decimal = snapshot
5343 .payroll_runs
5344 .iter()
5345 .filter(|r| r.company_code == company_code)
5346 .map(|r| r.total_gross)
5347 .sum();
5348 if total_gross > rust_decimal::Decimal::ZERO {
5349 let annual_total = if period_months > 0 && period_months < 12 {
5351 total_gross * rust_decimal::Decimal::from(12u32)
5352 / rust_decimal::Decimal::from(period_months)
5353 } else {
5354 total_gross
5355 };
5356 Some(
5357 (annual_total / rust_decimal::Decimal::from(employee_count))
5358 .round_dp(2),
5359 )
5360 } else {
5361 None
5362 }
5363 } else {
5364 None
5365 }
5366 };
5367
5368 let mut pension_gen =
5369 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
5370 let pension_snap = pension_gen.generate(
5371 company_code,
5372 entity_name,
5373 &period_label,
5374 reporting_date,
5375 employee_ids.len(),
5376 currency,
5377 avg_salary,
5378 period_months,
5379 );
5380 snapshot.pension_plan_count = pension_snap.plans.len();
5381 snapshot.pension_plans = pension_snap.plans;
5382 snapshot.pension_obligations = pension_snap.obligations;
5383 snapshot.pension_plan_assets = pension_snap.plan_assets;
5384 snapshot.pension_disclosures = pension_snap.disclosures;
5385 snapshot.pension_journal_entries = pension_snap.journal_entries;
5390 }
5391
5392 if self.phase_config.generate_hr && !employee_ids.is_empty() {
5394 let period_months = self.config.global.period_months;
5395 let period_label = {
5396 let y = start_date.year();
5397 let m = start_date.month();
5398 if period_months >= 12 {
5399 format!("FY{y}")
5400 } else {
5401 format!("{y}-{m:02}")
5402 }
5403 };
5404 let reporting_date =
5405 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5406
5407 let mut stock_comp_gen =
5408 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
5409 let stock_snap = stock_comp_gen.generate(
5410 company_code,
5411 &employee_ids,
5412 start_date,
5413 &period_label,
5414 reporting_date,
5415 currency,
5416 );
5417 snapshot.stock_grant_count = stock_snap.grants.len();
5418 snapshot.stock_grants = stock_snap.grants;
5419 snapshot.stock_comp_expenses = stock_snap.expenses;
5420 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
5421 }
5422
5423 stats.payroll_run_count = snapshot.payroll_run_count;
5424 stats.time_entry_count = snapshot.time_entry_count;
5425 stats.expense_report_count = snapshot.expense_report_count;
5426 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
5427 stats.pension_plan_count = snapshot.pension_plan_count;
5428 stats.stock_grant_count = snapshot.stock_grant_count;
5429
5430 info!(
5431 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
5432 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
5433 snapshot.time_entry_count, snapshot.expense_report_count,
5434 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
5435 snapshot.stock_grant_count
5436 );
5437 self.check_resources_with_log("post-hr")?;
5438
5439 Ok(snapshot)
5440 }
5441
5442 fn phase_accounting_standards(
5444 &mut self,
5445 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
5446 journal_entries: &[JournalEntry],
5447 stats: &mut EnhancedGenerationStatistics,
5448 ) -> SynthResult<AccountingStandardsSnapshot> {
5449 if !self.phase_config.generate_accounting_standards {
5450 debug!("Phase 17: Skipped (accounting standards generation disabled)");
5451 return Ok(AccountingStandardsSnapshot::default());
5452 }
5453 info!("Phase 17: Generating Accounting Standards Data");
5454
5455 let seed = self.seed;
5456 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5457 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5458 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5459 let company_code = self
5460 .config
5461 .companies
5462 .first()
5463 .map(|c| c.code.as_str())
5464 .unwrap_or("1000");
5465 let currency = self
5466 .config
5467 .companies
5468 .first()
5469 .map(|c| c.currency.as_str())
5470 .unwrap_or("USD");
5471
5472 let framework = match self.config.accounting_standards.framework {
5477 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
5478 datasynth_standards::framework::AccountingFramework::UsGaap
5479 }
5480 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
5481 datasynth_standards::framework::AccountingFramework::Ifrs
5482 }
5483 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
5484 datasynth_standards::framework::AccountingFramework::DualReporting
5485 }
5486 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
5487 datasynth_standards::framework::AccountingFramework::FrenchGaap
5488 }
5489 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
5490 datasynth_standards::framework::AccountingFramework::GermanGaap
5491 }
5492 None => {
5493 let pack = self.primary_pack();
5495 let pack_fw = pack.accounting.framework.as_str();
5496 match pack_fw {
5497 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
5498 "dual_reporting" => {
5499 datasynth_standards::framework::AccountingFramework::DualReporting
5500 }
5501 "french_gaap" => {
5502 datasynth_standards::framework::AccountingFramework::FrenchGaap
5503 }
5504 "german_gaap" | "hgb" => {
5505 datasynth_standards::framework::AccountingFramework::GermanGaap
5506 }
5507 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
5509 }
5510 }
5511 };
5512
5513 let mut snapshot = AccountingStandardsSnapshot::default();
5514
5515 if self.config.accounting_standards.revenue_recognition.enabled {
5517 let customer_ids: Vec<String> = self
5518 .master_data
5519 .customers
5520 .iter()
5521 .map(|c| c.customer_id.clone())
5522 .collect();
5523
5524 if !customer_ids.is_empty() {
5525 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
5526 let contracts = rev_gen.generate(
5527 company_code,
5528 &customer_ids,
5529 start_date,
5530 end_date,
5531 currency,
5532 &self.config.accounting_standards.revenue_recognition,
5533 framework,
5534 );
5535 snapshot.revenue_contract_count = contracts.len();
5536 snapshot.contracts = contracts;
5537 }
5538 }
5539
5540 if self.config.accounting_standards.impairment.enabled {
5542 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
5543 .master_data
5544 .assets
5545 .iter()
5546 .map(|a| {
5547 (
5548 a.asset_id.clone(),
5549 a.description.clone(),
5550 a.acquisition_cost,
5551 )
5552 })
5553 .collect();
5554
5555 if !asset_data.is_empty() {
5556 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
5557 let tests = imp_gen.generate(
5558 company_code,
5559 &asset_data,
5560 end_date,
5561 &self.config.accounting_standards.impairment,
5562 framework,
5563 );
5564 snapshot.impairment_test_count = tests.len();
5565 snapshot.impairment_tests = tests;
5566 }
5567 }
5568
5569 if self
5571 .config
5572 .accounting_standards
5573 .business_combinations
5574 .enabled
5575 {
5576 let bc_config = &self.config.accounting_standards.business_combinations;
5577 let framework_str = match framework {
5578 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5579 _ => "US_GAAP",
5580 };
5581 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
5582 let bc_snap = bc_gen.generate(
5583 company_code,
5584 currency,
5585 start_date,
5586 end_date,
5587 bc_config.acquisition_count,
5588 framework_str,
5589 );
5590 snapshot.business_combination_count = bc_snap.combinations.len();
5591 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
5592 snapshot.business_combinations = bc_snap.combinations;
5593 }
5594
5595 if self
5597 .config
5598 .accounting_standards
5599 .expected_credit_loss
5600 .enabled
5601 {
5602 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
5603 let framework_str = match framework {
5604 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
5605 _ => "ASC_326",
5606 };
5607
5608 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
5611
5612 let mut ecl_gen = EclGenerator::new(seed + 43);
5613
5614 let bucket_exposures: Vec<(
5616 datasynth_core::models::subledger::ar::AgingBucket,
5617 rust_decimal::Decimal,
5618 )> = if ar_aging_reports.is_empty() {
5619 use datasynth_core::models::subledger::ar::AgingBucket;
5621 vec![
5622 (
5623 AgingBucket::Current,
5624 rust_decimal::Decimal::from(500_000_u32),
5625 ),
5626 (
5627 AgingBucket::Days1To30,
5628 rust_decimal::Decimal::from(120_000_u32),
5629 ),
5630 (
5631 AgingBucket::Days31To60,
5632 rust_decimal::Decimal::from(45_000_u32),
5633 ),
5634 (
5635 AgingBucket::Days61To90,
5636 rust_decimal::Decimal::from(15_000_u32),
5637 ),
5638 (
5639 AgingBucket::Over90Days,
5640 rust_decimal::Decimal::from(8_000_u32),
5641 ),
5642 ]
5643 } else {
5644 use datasynth_core::models::subledger::ar::AgingBucket;
5645 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
5647 std::collections::HashMap::new();
5648 for report in ar_aging_reports {
5649 for (bucket, amount) in &report.bucket_totals {
5650 *totals.entry(*bucket).or_default() += amount;
5651 }
5652 }
5653 AgingBucket::all()
5654 .into_iter()
5655 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
5656 .collect()
5657 };
5658
5659 let ecl_snap = ecl_gen.generate(
5660 company_code,
5661 end_date,
5662 &bucket_exposures,
5663 ecl_config,
5664 &period_label,
5665 framework_str,
5666 );
5667
5668 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
5669 snapshot.ecl_models = ecl_snap.ecl_models;
5670 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
5671 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
5672 }
5673
5674 {
5676 let framework_str = match framework {
5677 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5678 _ => "US_GAAP",
5679 };
5680
5681 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
5686 .max(rust_decimal::Decimal::from(100_000_u32));
5687
5688 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
5689
5690 let mut prov_gen = ProvisionGenerator::new(seed + 44);
5691 let prov_snap = prov_gen.generate(
5692 company_code,
5693 currency,
5694 revenue_proxy,
5695 end_date,
5696 &period_label,
5697 framework_str,
5698 );
5699
5700 snapshot.provision_count = prov_snap.provisions.len();
5701 snapshot.provisions = prov_snap.provisions;
5702 snapshot.provision_movements = prov_snap.movements;
5703 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
5704 snapshot.provision_journal_entries = prov_snap.journal_entries;
5705 }
5706
5707 {
5711 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
5712
5713 let presentation_currency = self
5714 .config
5715 .global
5716 .presentation_currency
5717 .clone()
5718 .unwrap_or_else(|| self.config.global.group_currency.clone());
5719
5720 let mut rate_table = FxRateTable::new(&presentation_currency);
5723
5724 let base_rates = base_rates_usd();
5728 for (ccy, rate) in &base_rates {
5729 rate_table.add_rate(FxRate::new(
5730 ccy,
5731 "USD",
5732 RateType::Closing,
5733 end_date,
5734 *rate,
5735 "SYNTHETIC",
5736 ));
5737 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
5740 rate_table.add_rate(FxRate::new(
5741 ccy,
5742 "USD",
5743 RateType::Average,
5744 end_date,
5745 avg,
5746 "SYNTHETIC",
5747 ));
5748 }
5749
5750 let mut translation_results = Vec::new();
5751 for company in &self.config.companies {
5752 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
5755 .max(rust_decimal::Decimal::from(100_000_u32));
5756
5757 let func_ccy = company
5758 .functional_currency
5759 .clone()
5760 .unwrap_or_else(|| company.currency.clone());
5761
5762 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
5763 &company.code,
5764 &func_ccy,
5765 &presentation_currency,
5766 &ias21_period_label,
5767 end_date,
5768 company_revenue,
5769 &rate_table,
5770 );
5771 translation_results.push(result);
5772 }
5773
5774 snapshot.currency_translation_count = translation_results.len();
5775 snapshot.currency_translation_results = translation_results;
5776 }
5777
5778 stats.revenue_contract_count = snapshot.revenue_contract_count;
5779 stats.impairment_test_count = snapshot.impairment_test_count;
5780 stats.business_combination_count = snapshot.business_combination_count;
5781 stats.ecl_model_count = snapshot.ecl_model_count;
5782 stats.provision_count = snapshot.provision_count;
5783
5784 info!(
5785 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
5786 snapshot.revenue_contract_count,
5787 snapshot.impairment_test_count,
5788 snapshot.business_combination_count,
5789 snapshot.ecl_model_count,
5790 snapshot.provision_count,
5791 snapshot.currency_translation_count
5792 );
5793 self.check_resources_with_log("post-accounting-standards")?;
5794
5795 Ok(snapshot)
5796 }
5797
5798 fn phase_manufacturing(
5800 &mut self,
5801 stats: &mut EnhancedGenerationStatistics,
5802 ) -> SynthResult<ManufacturingSnapshot> {
5803 if !self.phase_config.generate_manufacturing {
5804 debug!("Phase 18: Skipped (manufacturing generation disabled)");
5805 return Ok(ManufacturingSnapshot::default());
5806 }
5807 info!("Phase 18: Generating Manufacturing Data");
5808
5809 let seed = self.seed;
5810 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5811 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5812 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5813 let company_code = self
5814 .config
5815 .companies
5816 .first()
5817 .map(|c| c.code.as_str())
5818 .unwrap_or("1000");
5819
5820 let material_data: Vec<(String, String)> = self
5821 .master_data
5822 .materials
5823 .iter()
5824 .map(|m| (m.material_id.clone(), m.description.clone()))
5825 .collect();
5826
5827 if material_data.is_empty() {
5828 debug!("Phase 18: Skipped (no materials available)");
5829 return Ok(ManufacturingSnapshot::default());
5830 }
5831
5832 let mut snapshot = ManufacturingSnapshot::default();
5833
5834 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
5836 let production_orders = prod_gen.generate(
5837 company_code,
5838 &material_data,
5839 start_date,
5840 end_date,
5841 &self.config.manufacturing.production_orders,
5842 &self.config.manufacturing.costing,
5843 &self.config.manufacturing.routing,
5844 );
5845 snapshot.production_order_count = production_orders.len();
5846
5847 let inspection_data: Vec<(String, String, String)> = production_orders
5849 .iter()
5850 .map(|po| {
5851 (
5852 po.order_id.clone(),
5853 po.material_id.clone(),
5854 po.material_description.clone(),
5855 )
5856 })
5857 .collect();
5858
5859 snapshot.production_orders = production_orders;
5860
5861 if !inspection_data.is_empty() {
5862 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
5863 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
5864 snapshot.quality_inspection_count = inspections.len();
5865 snapshot.quality_inspections = inspections;
5866 }
5867
5868 let storage_locations: Vec<(String, String)> = material_data
5870 .iter()
5871 .enumerate()
5872 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
5873 .collect();
5874
5875 let employee_ids: Vec<String> = self
5876 .master_data
5877 .employees
5878 .iter()
5879 .map(|e| e.employee_id.clone())
5880 .collect();
5881 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
5882 .with_employee_pool(employee_ids);
5883 let mut cycle_count_total = 0usize;
5884 for month in 0..self.config.global.period_months {
5885 let count_date = start_date + chrono::Months::new(month);
5886 let items_per_count = storage_locations.len().clamp(10, 50);
5887 let cc = cc_gen.generate(
5888 company_code,
5889 &storage_locations,
5890 count_date,
5891 items_per_count,
5892 );
5893 snapshot.cycle_counts.push(cc);
5894 cycle_count_total += 1;
5895 }
5896 snapshot.cycle_count_count = cycle_count_total;
5897
5898 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
5900 let bom_components = bom_gen.generate(company_code, &material_data);
5901 snapshot.bom_component_count = bom_components.len();
5902 snapshot.bom_components = bom_components;
5903
5904 let currency = self
5906 .config
5907 .companies
5908 .first()
5909 .map(|c| c.currency.as_str())
5910 .unwrap_or("USD");
5911 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
5912 let inventory_movements = inv_mov_gen.generate(
5913 company_code,
5914 &material_data,
5915 start_date,
5916 end_date,
5917 2,
5918 currency,
5919 );
5920 snapshot.inventory_movement_count = inventory_movements.len();
5921 snapshot.inventory_movements = inventory_movements;
5922
5923 stats.production_order_count = snapshot.production_order_count;
5924 stats.quality_inspection_count = snapshot.quality_inspection_count;
5925 stats.cycle_count_count = snapshot.cycle_count_count;
5926 stats.bom_component_count = snapshot.bom_component_count;
5927 stats.inventory_movement_count = snapshot.inventory_movement_count;
5928
5929 info!(
5930 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
5931 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
5932 snapshot.bom_component_count, snapshot.inventory_movement_count
5933 );
5934 self.check_resources_with_log("post-manufacturing")?;
5935
5936 Ok(snapshot)
5937 }
5938
5939 fn phase_sales_kpi_budgets(
5941 &mut self,
5942 coa: &Arc<ChartOfAccounts>,
5943 financial_reporting: &FinancialReportingSnapshot,
5944 stats: &mut EnhancedGenerationStatistics,
5945 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
5946 if !self.phase_config.generate_sales_kpi_budgets {
5947 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
5948 return Ok(SalesKpiBudgetsSnapshot::default());
5949 }
5950 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
5951
5952 let seed = self.seed;
5953 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5954 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5955 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5956 let company_code = self
5957 .config
5958 .companies
5959 .first()
5960 .map(|c| c.code.as_str())
5961 .unwrap_or("1000");
5962
5963 let mut snapshot = SalesKpiBudgetsSnapshot::default();
5964
5965 if self.config.sales_quotes.enabled {
5967 let customer_data: Vec<(String, String)> = self
5968 .master_data
5969 .customers
5970 .iter()
5971 .map(|c| (c.customer_id.clone(), c.name.clone()))
5972 .collect();
5973 let material_data: Vec<(String, String)> = self
5974 .master_data
5975 .materials
5976 .iter()
5977 .map(|m| (m.material_id.clone(), m.description.clone()))
5978 .collect();
5979
5980 if !customer_data.is_empty() && !material_data.is_empty() {
5981 let employee_ids: Vec<String> = self
5982 .master_data
5983 .employees
5984 .iter()
5985 .map(|e| e.employee_id.clone())
5986 .collect();
5987 let customer_ids: Vec<String> = self
5988 .master_data
5989 .customers
5990 .iter()
5991 .map(|c| c.customer_id.clone())
5992 .collect();
5993 let company_currency = self
5994 .config
5995 .companies
5996 .first()
5997 .map(|c| c.currency.as_str())
5998 .unwrap_or("USD");
5999
6000 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6001 .with_pools(employee_ids, customer_ids);
6002 let quotes = quote_gen.generate_with_currency(
6003 company_code,
6004 &customer_data,
6005 &material_data,
6006 start_date,
6007 end_date,
6008 &self.config.sales_quotes,
6009 company_currency,
6010 );
6011 snapshot.sales_quote_count = quotes.len();
6012 snapshot.sales_quotes = quotes;
6013 }
6014 }
6015
6016 if self.config.financial_reporting.management_kpis.enabled {
6018 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6019 let mut kpis = kpi_gen.generate(
6020 company_code,
6021 start_date,
6022 end_date,
6023 &self.config.financial_reporting.management_kpis,
6024 );
6025
6026 {
6028 use rust_decimal::Decimal;
6029
6030 if let Some(income_stmt) =
6031 financial_reporting.financial_statements.iter().find(|fs| {
6032 fs.statement_type == StatementType::IncomeStatement
6033 && fs.company_code == company_code
6034 })
6035 {
6036 let total_revenue: Decimal = income_stmt
6038 .line_items
6039 .iter()
6040 .filter(|li| li.section.contains("Revenue") && !li.is_total)
6041 .map(|li| li.amount)
6042 .sum();
6043 let total_cogs: Decimal = income_stmt
6044 .line_items
6045 .iter()
6046 .filter(|li| {
6047 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6048 && !li.is_total
6049 })
6050 .map(|li| li.amount.abs())
6051 .sum();
6052 let total_opex: Decimal = income_stmt
6053 .line_items
6054 .iter()
6055 .filter(|li| {
6056 li.section.contains("Expense")
6057 && !li.is_total
6058 && !li.section.contains("Cost")
6059 })
6060 .map(|li| li.amount.abs())
6061 .sum();
6062
6063 if total_revenue > Decimal::ZERO {
6064 let hundred = Decimal::from(100);
6065 let gross_margin_pct =
6066 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6067 let operating_income = total_revenue - total_cogs - total_opex;
6068 let op_margin_pct =
6069 (operating_income * hundred / total_revenue).round_dp(2);
6070
6071 for kpi in &mut kpis {
6073 if kpi.name == "Gross Margin" {
6074 kpi.value = gross_margin_pct;
6075 } else if kpi.name == "Operating Margin" {
6076 kpi.value = op_margin_pct;
6077 }
6078 }
6079 }
6080 }
6081
6082 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6084 fs.statement_type == StatementType::BalanceSheet
6085 && fs.company_code == company_code
6086 }) {
6087 let current_assets: Decimal = bs
6088 .line_items
6089 .iter()
6090 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6091 .map(|li| li.amount)
6092 .sum();
6093 let current_liabilities: Decimal = bs
6094 .line_items
6095 .iter()
6096 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6097 .map(|li| li.amount.abs())
6098 .sum();
6099
6100 if current_liabilities > Decimal::ZERO {
6101 let current_ratio = (current_assets / current_liabilities).round_dp(2);
6102 for kpi in &mut kpis {
6103 if kpi.name == "Current Ratio" {
6104 kpi.value = current_ratio;
6105 }
6106 }
6107 }
6108 }
6109 }
6110
6111 snapshot.kpi_count = kpis.len();
6112 snapshot.kpis = kpis;
6113 }
6114
6115 if self.config.financial_reporting.budgets.enabled {
6117 let account_data: Vec<(String, String)> = coa
6118 .accounts
6119 .iter()
6120 .map(|a| (a.account_number.clone(), a.short_description.clone()))
6121 .collect();
6122
6123 if !account_data.is_empty() {
6124 let fiscal_year = start_date.year() as u32;
6125 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6126 let budget = budget_gen.generate(
6127 company_code,
6128 fiscal_year,
6129 &account_data,
6130 &self.config.financial_reporting.budgets,
6131 );
6132 snapshot.budget_line_count = budget.line_items.len();
6133 snapshot.budgets.push(budget);
6134 }
6135 }
6136
6137 stats.sales_quote_count = snapshot.sales_quote_count;
6138 stats.kpi_count = snapshot.kpi_count;
6139 stats.budget_line_count = snapshot.budget_line_count;
6140
6141 info!(
6142 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6143 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6144 );
6145 self.check_resources_with_log("post-sales-kpi-budgets")?;
6146
6147 Ok(snapshot)
6148 }
6149
6150 fn phase_tax_generation(
6152 &mut self,
6153 document_flows: &DocumentFlowSnapshot,
6154 journal_entries: &[JournalEntry],
6155 stats: &mut EnhancedGenerationStatistics,
6156 ) -> SynthResult<TaxSnapshot> {
6157 if !self.phase_config.generate_tax {
6158 debug!("Phase 20: Skipped (tax generation disabled)");
6159 return Ok(TaxSnapshot::default());
6160 }
6161 info!("Phase 20: Generating Tax Data");
6162
6163 let seed = self.seed;
6164 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6165 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6166 let fiscal_year = start_date.year();
6167 let company_code = self
6168 .config
6169 .companies
6170 .first()
6171 .map(|c| c.code.as_str())
6172 .unwrap_or("1000");
6173
6174 let mut gen =
6175 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
6176
6177 let pack = self.primary_pack().clone();
6178 let (jurisdictions, codes) =
6179 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
6180
6181 let mut provisions = Vec::new();
6183 if self.config.tax.provisions.enabled {
6184 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
6185 for company in &self.config.companies {
6186 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
6187 let statutory_rate = rust_decimal::Decimal::new(
6188 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
6189 2,
6190 );
6191 let provision = provision_gen.generate(
6192 &company.code,
6193 start_date,
6194 pre_tax_income,
6195 statutory_rate,
6196 );
6197 provisions.push(provision);
6198 }
6199 }
6200
6201 let mut tax_lines = Vec::new();
6203 if !codes.is_empty() {
6204 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
6205 datasynth_generators::TaxLineGeneratorConfig::default(),
6206 codes.clone(),
6207 seed + 72,
6208 );
6209
6210 let buyer_country = self
6213 .config
6214 .companies
6215 .first()
6216 .map(|c| c.country.as_str())
6217 .unwrap_or("US");
6218 for vi in &document_flows.vendor_invoices {
6219 let lines = tax_line_gen.generate_for_document(
6220 datasynth_core::models::TaxableDocumentType::VendorInvoice,
6221 &vi.header.document_id,
6222 buyer_country, buyer_country,
6224 vi.payable_amount,
6225 vi.header.document_date,
6226 None,
6227 );
6228 tax_lines.extend(lines);
6229 }
6230
6231 for ci in &document_flows.customer_invoices {
6233 let lines = tax_line_gen.generate_for_document(
6234 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
6235 &ci.header.document_id,
6236 buyer_country, buyer_country,
6238 ci.total_gross_amount,
6239 ci.header.document_date,
6240 None,
6241 );
6242 tax_lines.extend(lines);
6243 }
6244 }
6245
6246 let deferred_tax = {
6248 let companies: Vec<(&str, &str)> = self
6249 .config
6250 .companies
6251 .iter()
6252 .map(|c| (c.code.as_str(), c.country.as_str()))
6253 .collect();
6254 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 73);
6255 deferred_gen.generate(&companies, start_date, journal_entries)
6256 };
6257
6258 let snapshot = TaxSnapshot {
6259 jurisdiction_count: jurisdictions.len(),
6260 code_count: codes.len(),
6261 jurisdictions,
6262 codes,
6263 tax_provisions: provisions,
6264 tax_lines,
6265 tax_returns: Vec::new(),
6266 withholding_records: Vec::new(),
6267 tax_anomaly_labels: Vec::new(),
6268 deferred_tax,
6269 };
6270
6271 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
6272 stats.tax_code_count = snapshot.code_count;
6273 stats.tax_provision_count = snapshot.tax_provisions.len();
6274 stats.tax_line_count = snapshot.tax_lines.len();
6275
6276 info!(
6277 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs",
6278 snapshot.jurisdiction_count,
6279 snapshot.code_count,
6280 snapshot.tax_provisions.len(),
6281 snapshot.deferred_tax.temporary_differences.len(),
6282 snapshot.deferred_tax.journal_entries.len(),
6283 );
6284 self.check_resources_with_log("post-tax")?;
6285
6286 Ok(snapshot)
6287 }
6288
6289 fn phase_esg_generation(
6291 &mut self,
6292 document_flows: &DocumentFlowSnapshot,
6293 stats: &mut EnhancedGenerationStatistics,
6294 ) -> SynthResult<EsgSnapshot> {
6295 if !self.phase_config.generate_esg {
6296 debug!("Phase 21: Skipped (ESG generation disabled)");
6297 return Ok(EsgSnapshot::default());
6298 }
6299 info!("Phase 21: Generating ESG Data");
6300
6301 let seed = self.seed;
6302 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6303 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6304 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6305 let entity_id = self
6306 .config
6307 .companies
6308 .first()
6309 .map(|c| c.code.as_str())
6310 .unwrap_or("1000");
6311
6312 let esg_cfg = &self.config.esg;
6313 let mut snapshot = EsgSnapshot::default();
6314
6315 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
6317 esg_cfg.environmental.energy.clone(),
6318 seed + 80,
6319 );
6320 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
6321
6322 let facility_count = esg_cfg.environmental.energy.facility_count;
6324 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
6325 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
6326
6327 let mut waste_gen = datasynth_generators::WasteGenerator::new(
6329 seed + 82,
6330 esg_cfg.environmental.waste.diversion_target,
6331 facility_count,
6332 );
6333 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
6334
6335 let mut emission_gen =
6337 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
6338
6339 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
6341 .iter()
6342 .map(|e| datasynth_generators::EnergyInput {
6343 facility_id: e.facility_id.clone(),
6344 energy_type: match e.energy_source {
6345 EnergySourceType::NaturalGas => {
6346 datasynth_generators::EnergyInputType::NaturalGas
6347 }
6348 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
6349 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
6350 _ => datasynth_generators::EnergyInputType::Electricity,
6351 },
6352 consumption_kwh: e.consumption_kwh,
6353 period: e.period,
6354 })
6355 .collect();
6356
6357 let mut emissions = Vec::new();
6358 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
6359 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
6360
6361 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
6363 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6364 for payment in &document_flows.payments {
6365 if payment.is_vendor {
6366 *totals
6367 .entry(payment.business_partner_id.clone())
6368 .or_default() += payment.amount;
6369 }
6370 }
6371 totals
6372 };
6373 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
6374 .master_data
6375 .vendors
6376 .iter()
6377 .map(|v| {
6378 let spend = vendor_payment_totals
6379 .get(&v.vendor_id)
6380 .copied()
6381 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
6382 datasynth_generators::VendorSpendInput {
6383 vendor_id: v.vendor_id.clone(),
6384 category: format!("{:?}", v.vendor_type).to_lowercase(),
6385 spend,
6386 country: v.country.clone(),
6387 }
6388 })
6389 .collect();
6390 if !vendor_spend.is_empty() {
6391 emissions.extend(emission_gen.generate_scope3_purchased_goods(
6392 entity_id,
6393 &vendor_spend,
6394 start_date,
6395 end_date,
6396 ));
6397 }
6398
6399 let headcount = self.master_data.employees.len() as u32;
6401 if headcount > 0 {
6402 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
6403 emissions.extend(emission_gen.generate_scope3_business_travel(
6404 entity_id,
6405 travel_spend,
6406 start_date,
6407 ));
6408 emissions
6409 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
6410 }
6411
6412 snapshot.emission_count = emissions.len();
6413 snapshot.emissions = emissions;
6414 snapshot.energy = energy_records;
6415
6416 let mut workforce_gen =
6418 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
6419 let total_headcount = headcount.max(100);
6420 snapshot.diversity =
6421 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
6422 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
6423 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
6424 entity_id,
6425 facility_count,
6426 start_date,
6427 end_date,
6428 );
6429
6430 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
6433 entity_id,
6434 &snapshot.safety_incidents,
6435 total_hours,
6436 start_date,
6437 );
6438 snapshot.safety_metrics = vec![safety_metric];
6439
6440 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
6442 seed + 85,
6443 esg_cfg.governance.board_size,
6444 esg_cfg.governance.independence_target,
6445 );
6446 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
6447
6448 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
6450 esg_cfg.supply_chain_esg.clone(),
6451 seed + 86,
6452 );
6453 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
6454 .master_data
6455 .vendors
6456 .iter()
6457 .map(|v| datasynth_generators::VendorInput {
6458 vendor_id: v.vendor_id.clone(),
6459 country: v.country.clone(),
6460 industry: format!("{:?}", v.vendor_type).to_lowercase(),
6461 quality_score: None,
6462 })
6463 .collect();
6464 snapshot.supplier_assessments =
6465 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
6466
6467 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
6469 seed + 87,
6470 esg_cfg.reporting.clone(),
6471 esg_cfg.climate_scenarios.clone(),
6472 );
6473 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
6474 snapshot.disclosures = disclosure_gen.generate_disclosures(
6475 entity_id,
6476 &snapshot.materiality,
6477 start_date,
6478 end_date,
6479 );
6480 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
6481 snapshot.disclosure_count = snapshot.disclosures.len();
6482
6483 if esg_cfg.anomaly_rate > 0.0 {
6485 let mut anomaly_injector =
6486 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
6487 let mut labels = Vec::new();
6488 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
6489 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
6490 labels.extend(
6491 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
6492 );
6493 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
6494 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
6495 snapshot.anomaly_labels = labels;
6496 }
6497
6498 stats.esg_emission_count = snapshot.emission_count;
6499 stats.esg_disclosure_count = snapshot.disclosure_count;
6500
6501 info!(
6502 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
6503 snapshot.emission_count,
6504 snapshot.disclosure_count,
6505 snapshot.supplier_assessments.len()
6506 );
6507 self.check_resources_with_log("post-esg")?;
6508
6509 Ok(snapshot)
6510 }
6511
6512 fn phase_treasury_data(
6514 &mut self,
6515 document_flows: &DocumentFlowSnapshot,
6516 subledger: &SubledgerSnapshot,
6517 intercompany: &IntercompanySnapshot,
6518 stats: &mut EnhancedGenerationStatistics,
6519 ) -> SynthResult<TreasurySnapshot> {
6520 if !self.phase_config.generate_treasury {
6521 debug!("Phase 22: Skipped (treasury generation disabled)");
6522 return Ok(TreasurySnapshot::default());
6523 }
6524 info!("Phase 22: Generating Treasury Data");
6525
6526 let seed = self.seed;
6527 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6528 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6529 let currency = self
6530 .config
6531 .companies
6532 .first()
6533 .map(|c| c.currency.as_str())
6534 .unwrap_or("USD");
6535 let entity_id = self
6536 .config
6537 .companies
6538 .first()
6539 .map(|c| c.code.as_str())
6540 .unwrap_or("1000");
6541
6542 let mut snapshot = TreasurySnapshot::default();
6543
6544 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
6546 self.config.treasury.debt.clone(),
6547 seed + 90,
6548 );
6549 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
6550
6551 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
6553 self.config.treasury.hedging.clone(),
6554 seed + 91,
6555 );
6556 for debt in &snapshot.debt_instruments {
6557 if debt.rate_type == InterestRateType::Variable {
6558 let swap = hedge_gen.generate_ir_swap(
6559 currency,
6560 debt.principal,
6561 debt.origination_date,
6562 debt.maturity_date,
6563 );
6564 snapshot.hedging_instruments.push(swap);
6565 }
6566 }
6567
6568 {
6571 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
6572 for payment in &document_flows.payments {
6573 if payment.currency != currency {
6574 let entry = fx_map
6575 .entry(payment.currency.clone())
6576 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
6577 entry.0 += payment.amount;
6578 if payment.header.document_date > entry.1 {
6580 entry.1 = payment.header.document_date;
6581 }
6582 }
6583 }
6584 if !fx_map.is_empty() {
6585 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
6586 .into_iter()
6587 .map(|(foreign_ccy, (net_amount, settlement_date))| {
6588 datasynth_generators::treasury::FxExposure {
6589 currency_pair: format!("{foreign_ccy}/{currency}"),
6590 foreign_currency: foreign_ccy,
6591 net_amount,
6592 settlement_date,
6593 description: "AP payment FX exposure".to_string(),
6594 }
6595 })
6596 .collect();
6597 let (fx_instruments, fx_relationships) =
6598 hedge_gen.generate(start_date, &fx_exposures);
6599 snapshot.hedging_instruments.extend(fx_instruments);
6600 snapshot.hedge_relationships.extend(fx_relationships);
6601 }
6602 }
6603
6604 if self.config.treasury.anomaly_rate > 0.0 {
6606 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
6607 seed + 92,
6608 self.config.treasury.anomaly_rate,
6609 );
6610 let mut labels = Vec::new();
6611 labels.extend(
6612 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
6613 );
6614 snapshot.treasury_anomaly_labels = labels;
6615 }
6616
6617 if self.config.treasury.cash_positioning.enabled {
6619 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
6620
6621 for payment in &document_flows.payments {
6623 cash_flows.push(datasynth_generators::treasury::CashFlow {
6624 date: payment.header.document_date,
6625 account_id: format!("{entity_id}-MAIN"),
6626 amount: payment.amount,
6627 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
6628 });
6629 }
6630
6631 for chain in &document_flows.o2c_chains {
6633 if let Some(ref receipt) = chain.customer_receipt {
6634 cash_flows.push(datasynth_generators::treasury::CashFlow {
6635 date: receipt.header.document_date,
6636 account_id: format!("{entity_id}-MAIN"),
6637 amount: receipt.amount,
6638 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
6639 });
6640 }
6641 for receipt in &chain.remainder_receipts {
6643 cash_flows.push(datasynth_generators::treasury::CashFlow {
6644 date: receipt.header.document_date,
6645 account_id: format!("{entity_id}-MAIN"),
6646 amount: receipt.amount,
6647 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
6648 });
6649 }
6650 }
6651
6652 if !cash_flows.is_empty() {
6653 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
6654 self.config.treasury.cash_positioning.clone(),
6655 seed + 93,
6656 );
6657 let account_id = format!("{entity_id}-MAIN");
6658 snapshot.cash_positions = cash_gen.generate(
6659 entity_id,
6660 &account_id,
6661 currency,
6662 &cash_flows,
6663 start_date,
6664 start_date + chrono::Months::new(self.config.global.period_months),
6665 rust_decimal::Decimal::new(1_000_000, 0), );
6667 }
6668 }
6669
6670 if self.config.treasury.cash_forecasting.enabled {
6672 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6673
6674 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
6676 .ar_invoices
6677 .iter()
6678 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
6679 .map(|inv| {
6680 let days_past_due = if inv.due_date < end_date {
6681 (end_date - inv.due_date).num_days().max(0) as u32
6682 } else {
6683 0
6684 };
6685 datasynth_generators::treasury::ArAgingItem {
6686 expected_date: inv.due_date,
6687 amount: inv.amount_remaining,
6688 days_past_due,
6689 document_id: inv.invoice_number.clone(),
6690 }
6691 })
6692 .collect();
6693
6694 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
6696 .ap_invoices
6697 .iter()
6698 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
6699 .map(|inv| datasynth_generators::treasury::ApAgingItem {
6700 payment_date: inv.due_date,
6701 amount: inv.amount_remaining,
6702 document_id: inv.invoice_number.clone(),
6703 })
6704 .collect();
6705
6706 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
6707 self.config.treasury.cash_forecasting.clone(),
6708 seed + 94,
6709 );
6710 let forecast = forecast_gen.generate(
6711 entity_id,
6712 currency,
6713 end_date,
6714 &ar_items,
6715 &ap_items,
6716 &[], );
6718 snapshot.cash_forecasts.push(forecast);
6719 }
6720
6721 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
6723 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6724 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
6725 self.config.treasury.cash_pooling.clone(),
6726 seed + 95,
6727 );
6728
6729 let account_ids: Vec<String> = snapshot
6731 .cash_positions
6732 .iter()
6733 .map(|cp| cp.bank_account_id.clone())
6734 .collect::<std::collections::HashSet<_>>()
6735 .into_iter()
6736 .collect();
6737
6738 if let Some(pool) =
6739 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
6740 {
6741 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6743 for cp in &snapshot.cash_positions {
6744 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
6745 }
6746
6747 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
6748 latest_balances
6749 .into_iter()
6750 .filter(|(id, _)| pool.participant_accounts.contains(id))
6751 .map(
6752 |(id, balance)| datasynth_generators::treasury::AccountBalance {
6753 account_id: id,
6754 balance,
6755 },
6756 )
6757 .collect();
6758
6759 let sweeps =
6760 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
6761 snapshot.cash_pool_sweeps = sweeps;
6762 snapshot.cash_pools.push(pool);
6763 }
6764 }
6765
6766 if self.config.treasury.bank_guarantees.enabled {
6768 let vendor_names: Vec<String> = self
6769 .master_data
6770 .vendors
6771 .iter()
6772 .map(|v| v.name.clone())
6773 .collect();
6774 if !vendor_names.is_empty() {
6775 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
6776 self.config.treasury.bank_guarantees.clone(),
6777 seed + 96,
6778 );
6779 snapshot.bank_guarantees =
6780 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
6781 }
6782 }
6783
6784 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
6786 let entity_ids: Vec<String> = self
6787 .config
6788 .companies
6789 .iter()
6790 .map(|c| c.code.clone())
6791 .collect();
6792 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
6793 .matched_pairs
6794 .iter()
6795 .map(|mp| {
6796 (
6797 mp.seller_company.clone(),
6798 mp.buyer_company.clone(),
6799 mp.amount,
6800 )
6801 })
6802 .collect();
6803 if entity_ids.len() >= 2 {
6804 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
6805 self.config.treasury.netting.clone(),
6806 seed + 97,
6807 );
6808 snapshot.netting_runs = netting_gen.generate(
6809 &entity_ids,
6810 currency,
6811 start_date,
6812 self.config.global.period_months,
6813 &ic_amounts,
6814 );
6815 }
6816 }
6817
6818 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
6819 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
6820 stats.cash_position_count = snapshot.cash_positions.len();
6821 stats.cash_forecast_count = snapshot.cash_forecasts.len();
6822 stats.cash_pool_count = snapshot.cash_pools.len();
6823
6824 info!(
6825 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
6826 snapshot.debt_instruments.len(),
6827 snapshot.hedging_instruments.len(),
6828 snapshot.cash_positions.len(),
6829 snapshot.cash_forecasts.len(),
6830 snapshot.cash_pools.len(),
6831 snapshot.bank_guarantees.len(),
6832 snapshot.netting_runs.len(),
6833 );
6834 self.check_resources_with_log("post-treasury")?;
6835
6836 Ok(snapshot)
6837 }
6838
6839 fn phase_project_accounting(
6841 &mut self,
6842 document_flows: &DocumentFlowSnapshot,
6843 hr: &HrSnapshot,
6844 stats: &mut EnhancedGenerationStatistics,
6845 ) -> SynthResult<ProjectAccountingSnapshot> {
6846 if !self.phase_config.generate_project_accounting {
6847 debug!("Phase 23: Skipped (project accounting disabled)");
6848 return Ok(ProjectAccountingSnapshot::default());
6849 }
6850 info!("Phase 23: Generating Project Accounting Data");
6851
6852 let seed = self.seed;
6853 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6854 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6855 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6856 let company_code = self
6857 .config
6858 .companies
6859 .first()
6860 .map(|c| c.code.as_str())
6861 .unwrap_or("1000");
6862
6863 let mut snapshot = ProjectAccountingSnapshot::default();
6864
6865 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
6867 self.config.project_accounting.clone(),
6868 seed + 95,
6869 );
6870 let pool = project_gen.generate(company_code, start_date, end_date);
6871 snapshot.projects = pool.projects.clone();
6872
6873 {
6875 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
6876 Vec::new();
6877
6878 for te in &hr.time_entries {
6880 let total_hours = te.hours_regular + te.hours_overtime;
6881 if total_hours > 0.0 {
6882 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6883 id: te.entry_id.clone(),
6884 entity_id: company_code.to_string(),
6885 date: te.date,
6886 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
6887 .unwrap_or(rust_decimal::Decimal::ZERO),
6888 source_type: CostSourceType::TimeEntry,
6889 hours: Some(
6890 rust_decimal::Decimal::from_f64_retain(total_hours)
6891 .unwrap_or(rust_decimal::Decimal::ZERO),
6892 ),
6893 });
6894 }
6895 }
6896
6897 for er in &hr.expense_reports {
6899 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6900 id: er.report_id.clone(),
6901 entity_id: company_code.to_string(),
6902 date: er.submission_date,
6903 amount: er.total_amount,
6904 source_type: CostSourceType::ExpenseReport,
6905 hours: None,
6906 });
6907 }
6908
6909 for po in &document_flows.purchase_orders {
6911 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6912 id: po.header.document_id.clone(),
6913 entity_id: company_code.to_string(),
6914 date: po.header.document_date,
6915 amount: po.total_net_amount,
6916 source_type: CostSourceType::PurchaseOrder,
6917 hours: None,
6918 });
6919 }
6920
6921 for vi in &document_flows.vendor_invoices {
6923 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6924 id: vi.header.document_id.clone(),
6925 entity_id: company_code.to_string(),
6926 date: vi.header.document_date,
6927 amount: vi.payable_amount,
6928 source_type: CostSourceType::VendorInvoice,
6929 hours: None,
6930 });
6931 }
6932
6933 if !source_docs.is_empty() && !pool.projects.is_empty() {
6934 let mut cost_gen =
6935 datasynth_generators::project_accounting::ProjectCostGenerator::new(
6936 self.config.project_accounting.cost_allocation.clone(),
6937 seed + 99,
6938 );
6939 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
6940 }
6941 }
6942
6943 if self.config.project_accounting.change_orders.enabled {
6945 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
6946 self.config.project_accounting.change_orders.clone(),
6947 seed + 96,
6948 );
6949 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
6950 }
6951
6952 if self.config.project_accounting.milestones.enabled {
6954 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
6955 self.config.project_accounting.milestones.clone(),
6956 seed + 97,
6957 );
6958 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
6959 }
6960
6961 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
6963 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
6964 self.config.project_accounting.earned_value.clone(),
6965 seed + 98,
6966 );
6967 snapshot.earned_value_metrics =
6968 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
6969 }
6970
6971 stats.project_count = snapshot.projects.len();
6972 stats.project_change_order_count = snapshot.change_orders.len();
6973 stats.project_cost_line_count = snapshot.cost_lines.len();
6974
6975 info!(
6976 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
6977 snapshot.projects.len(),
6978 snapshot.change_orders.len(),
6979 snapshot.milestones.len(),
6980 snapshot.earned_value_metrics.len()
6981 );
6982 self.check_resources_with_log("post-project-accounting")?;
6983
6984 Ok(snapshot)
6985 }
6986
6987 fn phase_evolution_events(
6989 &mut self,
6990 stats: &mut EnhancedGenerationStatistics,
6991 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
6992 if !self.phase_config.generate_evolution_events {
6993 debug!("Phase 24: Skipped (evolution events disabled)");
6994 return Ok((Vec::new(), Vec::new()));
6995 }
6996 info!("Phase 24: Generating Process Evolution + Organizational Events");
6997
6998 let seed = self.seed;
6999 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7000 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7001 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7002
7003 let mut proc_gen =
7005 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
7006 seed + 100,
7007 );
7008 let process_events = proc_gen.generate_events(start_date, end_date);
7009
7010 let company_codes: Vec<String> = self
7012 .config
7013 .companies
7014 .iter()
7015 .map(|c| c.code.clone())
7016 .collect();
7017 let mut org_gen =
7018 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
7019 seed + 101,
7020 );
7021 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
7022
7023 stats.process_evolution_event_count = process_events.len();
7024 stats.organizational_event_count = org_events.len();
7025
7026 info!(
7027 "Evolution events generated: {} process evolution, {} organizational",
7028 process_events.len(),
7029 org_events.len()
7030 );
7031 self.check_resources_with_log("post-evolution-events")?;
7032
7033 Ok((process_events, org_events))
7034 }
7035
7036 fn phase_disruption_events(
7039 &self,
7040 stats: &mut EnhancedGenerationStatistics,
7041 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
7042 if !self.config.organizational_events.enabled {
7043 debug!("Phase 24b: Skipped (organizational events disabled)");
7044 return Ok(Vec::new());
7045 }
7046 info!("Phase 24b: Generating Disruption Events");
7047
7048 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7049 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7050 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7051
7052 let company_codes: Vec<String> = self
7053 .config
7054 .companies
7055 .iter()
7056 .map(|c| c.code.clone())
7057 .collect();
7058
7059 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
7060 let events = gen.generate(start_date, end_date, &company_codes);
7061
7062 stats.disruption_event_count = events.len();
7063 info!("Disruption events generated: {} events", events.len());
7064 self.check_resources_with_log("post-disruption-events")?;
7065
7066 Ok(events)
7067 }
7068
7069 fn phase_counterfactuals(
7076 &self,
7077 journal_entries: &[JournalEntry],
7078 stats: &mut EnhancedGenerationStatistics,
7079 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
7080 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
7081 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
7082 return Ok(Vec::new());
7083 }
7084 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
7085
7086 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
7087
7088 let mut gen = CounterfactualGenerator::new(self.seed + 110);
7089
7090 let specs = [
7092 CounterfactualSpec::ScaleAmount { factor: 2.5 },
7093 CounterfactualSpec::ShiftDate { days: -14 },
7094 CounterfactualSpec::SelfApprove,
7095 CounterfactualSpec::SplitTransaction { split_count: 3 },
7096 ];
7097
7098 let pairs: Vec<_> = journal_entries
7099 .iter()
7100 .enumerate()
7101 .map(|(i, je)| {
7102 let spec = &specs[i % specs.len()];
7103 gen.generate(je, spec)
7104 })
7105 .collect();
7106
7107 stats.counterfactual_pair_count = pairs.len();
7108 info!(
7109 "Counterfactual pairs generated: {} pairs from {} journal entries",
7110 pairs.len(),
7111 journal_entries.len()
7112 );
7113 self.check_resources_with_log("post-counterfactuals")?;
7114
7115 Ok(pairs)
7116 }
7117
7118 fn phase_red_flags(
7125 &self,
7126 anomaly_labels: &AnomalyLabels,
7127 document_flows: &DocumentFlowSnapshot,
7128 stats: &mut EnhancedGenerationStatistics,
7129 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
7130 if !self.config.fraud.enabled {
7131 debug!("Phase 26: Skipped (fraud generation disabled)");
7132 return Ok(Vec::new());
7133 }
7134 info!("Phase 26: Generating Fraud Red-Flag Indicators");
7135
7136 use datasynth_generators::fraud::RedFlagGenerator;
7137
7138 let generator = RedFlagGenerator::new();
7139 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
7140
7141 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
7143 .labels
7144 .iter()
7145 .filter(|label| label.anomaly_type.is_intentional())
7146 .map(|label| label.document_id.as_str())
7147 .collect();
7148
7149 let mut flags = Vec::new();
7150
7151 for chain in &document_flows.p2p_chains {
7153 let doc_id = &chain.purchase_order.header.document_id;
7154 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7155 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7156 }
7157
7158 for chain in &document_flows.o2c_chains {
7160 let doc_id = &chain.sales_order.header.document_id;
7161 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7162 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7163 }
7164
7165 stats.red_flag_count = flags.len();
7166 info!(
7167 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
7168 flags.len(),
7169 document_flows.p2p_chains.len(),
7170 document_flows.o2c_chains.len(),
7171 fraud_doc_ids.len()
7172 );
7173 self.check_resources_with_log("post-red-flags")?;
7174
7175 Ok(flags)
7176 }
7177
7178 fn phase_collusion_rings(
7184 &mut self,
7185 stats: &mut EnhancedGenerationStatistics,
7186 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
7187 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
7188 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
7189 return Ok(Vec::new());
7190 }
7191 info!("Phase 26b: Generating Collusion Rings");
7192
7193 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7194 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7195 let months = self.config.global.period_months;
7196
7197 let employee_ids: Vec<String> = self
7198 .master_data
7199 .employees
7200 .iter()
7201 .map(|e| e.employee_id.clone())
7202 .collect();
7203 let vendor_ids: Vec<String> = self
7204 .master_data
7205 .vendors
7206 .iter()
7207 .map(|v| v.vendor_id.clone())
7208 .collect();
7209
7210 let mut generator =
7211 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
7212 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
7213
7214 stats.collusion_ring_count = rings.len();
7215 info!(
7216 "Collusion rings generated: {} rings, total members: {}",
7217 rings.len(),
7218 rings
7219 .iter()
7220 .map(datasynth_generators::fraud::CollusionRing::size)
7221 .sum::<usize>()
7222 );
7223 self.check_resources_with_log("post-collusion-rings")?;
7224
7225 Ok(rings)
7226 }
7227
7228 fn phase_temporal_attributes(
7233 &mut self,
7234 stats: &mut EnhancedGenerationStatistics,
7235 ) -> SynthResult<
7236 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
7237 > {
7238 if !self.config.temporal_attributes.enabled {
7239 debug!("Phase 27: Skipped (temporal attributes disabled)");
7240 return Ok(Vec::new());
7241 }
7242 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
7243
7244 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7245 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7246
7247 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
7251 || self.config.temporal_attributes.enabled;
7252 let temporal_config = {
7253 let ta = &self.config.temporal_attributes;
7254 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
7255 .enabled(ta.enabled)
7256 .closed_probability(ta.valid_time.closed_probability)
7257 .avg_validity_days(ta.valid_time.avg_validity_days)
7258 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
7259 .with_version_chains(if generate_version_chains {
7260 ta.avg_versions_per_entity
7261 } else {
7262 1.0
7263 })
7264 .build()
7265 };
7266 let temporal_config = if self
7268 .config
7269 .temporal_attributes
7270 .transaction_time
7271 .allow_backdating
7272 {
7273 let mut c = temporal_config;
7274 c.transaction_time.allow_backdating = true;
7275 c.transaction_time.backdating_probability = self
7276 .config
7277 .temporal_attributes
7278 .transaction_time
7279 .backdating_probability;
7280 c.transaction_time.max_backdate_days = self
7281 .config
7282 .temporal_attributes
7283 .transaction_time
7284 .max_backdate_days;
7285 c
7286 } else {
7287 temporal_config
7288 };
7289 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
7290 temporal_config,
7291 self.seed + 130,
7292 start_date,
7293 );
7294
7295 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
7296 self.seed + 130,
7297 datasynth_core::GeneratorType::Vendor,
7298 );
7299
7300 let chains: Vec<_> = self
7301 .master_data
7302 .vendors
7303 .iter()
7304 .map(|vendor| {
7305 let id = uuid_factory.next();
7306 gen.generate_version_chain(vendor.clone(), id)
7307 })
7308 .collect();
7309
7310 stats.temporal_version_chain_count = chains.len();
7311 info!("Temporal version chains generated: {} chains", chains.len());
7312 self.check_resources_with_log("post-temporal-attributes")?;
7313
7314 Ok(chains)
7315 }
7316
7317 fn phase_entity_relationships(
7327 &self,
7328 journal_entries: &[JournalEntry],
7329 document_flows: &DocumentFlowSnapshot,
7330 stats: &mut EnhancedGenerationStatistics,
7331 ) -> SynthResult<(
7332 Option<datasynth_core::models::EntityGraph>,
7333 Vec<datasynth_core::models::CrossProcessLink>,
7334 )> {
7335 use datasynth_generators::relationships::{
7336 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
7337 TransactionSummary,
7338 };
7339
7340 let rs_enabled = self.config.relationship_strength.enabled;
7341 let cpl_enabled = self.config.cross_process_links.enabled
7342 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
7343
7344 if !rs_enabled && !cpl_enabled {
7345 debug!(
7346 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
7347 );
7348 return Ok((None, Vec::new()));
7349 }
7350
7351 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
7352
7353 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7354 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7355
7356 let company_code = self
7357 .config
7358 .companies
7359 .first()
7360 .map(|c| c.code.as_str())
7361 .unwrap_or("1000");
7362
7363 let gen_config = EntityGraphConfig {
7365 enabled: rs_enabled,
7366 cross_process: datasynth_generators::relationships::CrossProcessConfig {
7367 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
7368 enable_return_flows: false,
7369 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
7370 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
7371 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
7373 1.0
7374 } else {
7375 0.30
7376 },
7377 ..Default::default()
7378 },
7379 strength_config: datasynth_generators::relationships::StrengthConfig {
7380 transaction_volume_weight: self
7381 .config
7382 .relationship_strength
7383 .calculation
7384 .transaction_volume_weight,
7385 transaction_count_weight: self
7386 .config
7387 .relationship_strength
7388 .calculation
7389 .transaction_count_weight,
7390 duration_weight: self
7391 .config
7392 .relationship_strength
7393 .calculation
7394 .relationship_duration_weight,
7395 recency_weight: self.config.relationship_strength.calculation.recency_weight,
7396 mutual_connections_weight: self
7397 .config
7398 .relationship_strength
7399 .calculation
7400 .mutual_connections_weight,
7401 recency_half_life_days: self
7402 .config
7403 .relationship_strength
7404 .calculation
7405 .recency_half_life_days,
7406 },
7407 ..Default::default()
7408 };
7409
7410 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
7411
7412 let entity_graph = if rs_enabled {
7414 let vendor_summaries: Vec<EntitySummary> = self
7416 .master_data
7417 .vendors
7418 .iter()
7419 .map(|v| {
7420 EntitySummary::new(
7421 &v.vendor_id,
7422 &v.name,
7423 datasynth_core::models::GraphEntityType::Vendor,
7424 start_date,
7425 )
7426 })
7427 .collect();
7428
7429 let customer_summaries: Vec<EntitySummary> = self
7430 .master_data
7431 .customers
7432 .iter()
7433 .map(|c| {
7434 EntitySummary::new(
7435 &c.customer_id,
7436 &c.name,
7437 datasynth_core::models::GraphEntityType::Customer,
7438 start_date,
7439 )
7440 })
7441 .collect();
7442
7443 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
7448 std::collections::HashMap::new();
7449
7450 for je in journal_entries {
7451 let cc = je.header.company_code.clone();
7452 let posting_date = je.header.posting_date;
7453 for line in &je.lines {
7454 if let Some(ref tp) = line.trading_partner {
7455 let amount = if line.debit_amount > line.credit_amount {
7456 line.debit_amount
7457 } else {
7458 line.credit_amount
7459 };
7460 let entry = txn_summaries
7461 .entry((cc.clone(), tp.clone()))
7462 .or_insert_with(|| TransactionSummary {
7463 total_volume: rust_decimal::Decimal::ZERO,
7464 transaction_count: 0,
7465 first_transaction_date: posting_date,
7466 last_transaction_date: posting_date,
7467 related_entities: std::collections::HashSet::new(),
7468 });
7469 entry.total_volume += amount;
7470 entry.transaction_count += 1;
7471 if posting_date < entry.first_transaction_date {
7472 entry.first_transaction_date = posting_date;
7473 }
7474 if posting_date > entry.last_transaction_date {
7475 entry.last_transaction_date = posting_date;
7476 }
7477 entry.related_entities.insert(cc.clone());
7478 }
7479 }
7480 }
7481
7482 for chain in &document_flows.p2p_chains {
7485 let cc = chain.purchase_order.header.company_code.clone();
7486 let vendor_id = chain.purchase_order.vendor_id.clone();
7487 let po_date = chain.purchase_order.header.document_date;
7488 let amount = chain.purchase_order.total_net_amount;
7489
7490 let entry = txn_summaries
7491 .entry((cc.clone(), vendor_id))
7492 .or_insert_with(|| TransactionSummary {
7493 total_volume: rust_decimal::Decimal::ZERO,
7494 transaction_count: 0,
7495 first_transaction_date: po_date,
7496 last_transaction_date: po_date,
7497 related_entities: std::collections::HashSet::new(),
7498 });
7499 entry.total_volume += amount;
7500 entry.transaction_count += 1;
7501 if po_date < entry.first_transaction_date {
7502 entry.first_transaction_date = po_date;
7503 }
7504 if po_date > entry.last_transaction_date {
7505 entry.last_transaction_date = po_date;
7506 }
7507 entry.related_entities.insert(cc);
7508 }
7509
7510 for chain in &document_flows.o2c_chains {
7512 let cc = chain.sales_order.header.company_code.clone();
7513 let customer_id = chain.sales_order.customer_id.clone();
7514 let so_date = chain.sales_order.header.document_date;
7515 let amount = chain.sales_order.total_net_amount;
7516
7517 let entry = txn_summaries
7518 .entry((cc.clone(), customer_id))
7519 .or_insert_with(|| TransactionSummary {
7520 total_volume: rust_decimal::Decimal::ZERO,
7521 transaction_count: 0,
7522 first_transaction_date: so_date,
7523 last_transaction_date: so_date,
7524 related_entities: std::collections::HashSet::new(),
7525 });
7526 entry.total_volume += amount;
7527 entry.transaction_count += 1;
7528 if so_date < entry.first_transaction_date {
7529 entry.first_transaction_date = so_date;
7530 }
7531 if so_date > entry.last_transaction_date {
7532 entry.last_transaction_date = so_date;
7533 }
7534 entry.related_entities.insert(cc);
7535 }
7536
7537 let as_of_date = journal_entries
7538 .last()
7539 .map(|je| je.header.posting_date)
7540 .unwrap_or(start_date);
7541
7542 let graph = gen.generate_entity_graph(
7543 company_code,
7544 as_of_date,
7545 &vendor_summaries,
7546 &customer_summaries,
7547 &txn_summaries,
7548 );
7549
7550 info!(
7551 "Entity relationship graph: {} nodes, {} edges",
7552 graph.nodes.len(),
7553 graph.edges.len()
7554 );
7555 stats.entity_relationship_node_count = graph.nodes.len();
7556 stats.entity_relationship_edge_count = graph.edges.len();
7557 Some(graph)
7558 } else {
7559 None
7560 };
7561
7562 let cross_process_links = if cpl_enabled {
7564 let gr_refs: Vec<GoodsReceiptRef> = document_flows
7566 .p2p_chains
7567 .iter()
7568 .flat_map(|chain| {
7569 let vendor_id = chain.purchase_order.vendor_id.clone();
7570 let cc = chain.purchase_order.header.company_code.clone();
7571 chain.goods_receipts.iter().flat_map(move |gr| {
7572 gr.items.iter().filter_map({
7573 let doc_id = gr.header.document_id.clone();
7574 let v_id = vendor_id.clone();
7575 let company = cc.clone();
7576 let receipt_date = gr.header.document_date;
7577 move |item| {
7578 item.base
7579 .material_id
7580 .as_ref()
7581 .map(|mat_id| GoodsReceiptRef {
7582 document_id: doc_id.clone(),
7583 material_id: mat_id.clone(),
7584 quantity: item.base.quantity,
7585 receipt_date,
7586 vendor_id: v_id.clone(),
7587 company_code: company.clone(),
7588 })
7589 }
7590 })
7591 })
7592 })
7593 .collect();
7594
7595 let del_refs: Vec<DeliveryRef> = document_flows
7597 .o2c_chains
7598 .iter()
7599 .flat_map(|chain| {
7600 let customer_id = chain.sales_order.customer_id.clone();
7601 let cc = chain.sales_order.header.company_code.clone();
7602 chain.deliveries.iter().flat_map(move |del| {
7603 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
7604 del.items.iter().filter_map({
7605 let doc_id = del.header.document_id.clone();
7606 let c_id = customer_id.clone();
7607 let company = cc.clone();
7608 move |item| {
7609 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
7610 document_id: doc_id.clone(),
7611 material_id: mat_id.clone(),
7612 quantity: item.base.quantity,
7613 delivery_date,
7614 customer_id: c_id.clone(),
7615 company_code: company.clone(),
7616 })
7617 }
7618 })
7619 })
7620 })
7621 .collect();
7622
7623 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
7624 info!("Cross-process links generated: {} links", links.len());
7625 stats.cross_process_link_count = links.len();
7626 links
7627 } else {
7628 Vec::new()
7629 };
7630
7631 self.check_resources_with_log("post-entity-relationships")?;
7632 Ok((entity_graph, cross_process_links))
7633 }
7634
7635 fn phase_industry_data(
7637 &self,
7638 stats: &mut EnhancedGenerationStatistics,
7639 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
7640 if !self.config.industry_specific.enabled {
7641 return None;
7642 }
7643 info!("Phase 29: Generating industry-specific data");
7644 let output = datasynth_generators::industry::factory::generate_industry_output(
7645 self.config.global.industry,
7646 );
7647 stats.industry_gl_account_count = output.gl_accounts.len();
7648 info!(
7649 "Industry data generated: {} GL accounts for {:?}",
7650 output.gl_accounts.len(),
7651 self.config.global.industry
7652 );
7653 Some(output)
7654 }
7655
7656 fn phase_opening_balances(
7658 &mut self,
7659 coa: &Arc<ChartOfAccounts>,
7660 stats: &mut EnhancedGenerationStatistics,
7661 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
7662 if !self.config.balance.generate_opening_balances {
7663 debug!("Phase 3b: Skipped (opening balance generation disabled)");
7664 return Ok(Vec::new());
7665 }
7666 info!("Phase 3b: Generating Opening Balances");
7667
7668 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7669 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7670 let fiscal_year = start_date.year();
7671
7672 let industry = match self.config.global.industry {
7673 IndustrySector::Manufacturing => IndustryType::Manufacturing,
7674 IndustrySector::Retail => IndustryType::Retail,
7675 IndustrySector::FinancialServices => IndustryType::Financial,
7676 IndustrySector::Healthcare => IndustryType::Healthcare,
7677 IndustrySector::Technology => IndustryType::Technology,
7678 _ => IndustryType::Manufacturing,
7679 };
7680
7681 let config = datasynth_generators::OpeningBalanceConfig {
7682 industry,
7683 ..Default::default()
7684 };
7685 let mut gen =
7686 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
7687
7688 let mut results = Vec::new();
7689 for company in &self.config.companies {
7690 let spec = OpeningBalanceSpec::new(
7691 company.code.clone(),
7692 start_date,
7693 fiscal_year,
7694 company.currency.clone(),
7695 rust_decimal::Decimal::new(10_000_000, 0),
7696 industry,
7697 );
7698 let ob = gen.generate(&spec, coa, start_date, &company.code);
7699 results.push(ob);
7700 }
7701
7702 stats.opening_balance_count = results.len();
7703 info!("Opening balances generated: {} companies", results.len());
7704 self.check_resources_with_log("post-opening-balances")?;
7705
7706 Ok(results)
7707 }
7708
7709 fn phase_subledger_reconciliation(
7711 &mut self,
7712 subledger: &SubledgerSnapshot,
7713 entries: &[JournalEntry],
7714 stats: &mut EnhancedGenerationStatistics,
7715 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
7716 if !self.config.balance.reconcile_subledgers {
7717 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
7718 return Ok(Vec::new());
7719 }
7720 info!("Phase 9b: Reconciling GL to subledger balances");
7721
7722 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7723 .map(|d| d + chrono::Months::new(self.config.global.period_months))
7724 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7725
7726 let tracker_config = BalanceTrackerConfig {
7728 validate_on_each_entry: false,
7729 track_history: false,
7730 fail_on_validation_error: false,
7731 ..Default::default()
7732 };
7733 let recon_currency = self
7734 .config
7735 .companies
7736 .first()
7737 .map(|c| c.currency.clone())
7738 .unwrap_or_else(|| "USD".to_string());
7739 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
7740 let validation_errors = tracker.apply_entries(entries);
7741 if !validation_errors.is_empty() {
7742 warn!(
7743 error_count = validation_errors.len(),
7744 "Balance tracker encountered validation errors during subledger reconciliation"
7745 );
7746 for err in &validation_errors {
7747 debug!("Balance validation error: {:?}", err);
7748 }
7749 }
7750
7751 let mut engine = datasynth_generators::ReconciliationEngine::new(
7752 datasynth_generators::ReconciliationConfig::default(),
7753 );
7754
7755 let mut results = Vec::new();
7756 let company_code = self
7757 .config
7758 .companies
7759 .first()
7760 .map(|c| c.code.as_str())
7761 .unwrap_or("1000");
7762
7763 if !subledger.ar_invoices.is_empty() {
7765 let gl_balance = tracker
7766 .get_account_balance(
7767 company_code,
7768 datasynth_core::accounts::control_accounts::AR_CONTROL,
7769 )
7770 .map(|b| b.closing_balance)
7771 .unwrap_or_default();
7772 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
7773 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
7774 }
7775
7776 if !subledger.ap_invoices.is_empty() {
7778 let gl_balance = tracker
7779 .get_account_balance(
7780 company_code,
7781 datasynth_core::accounts::control_accounts::AP_CONTROL,
7782 )
7783 .map(|b| b.closing_balance)
7784 .unwrap_or_default();
7785 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
7786 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
7787 }
7788
7789 if !subledger.fa_records.is_empty() {
7791 let gl_asset_balance = tracker
7792 .get_account_balance(
7793 company_code,
7794 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
7795 )
7796 .map(|b| b.closing_balance)
7797 .unwrap_or_default();
7798 let gl_accum_depr_balance = tracker
7799 .get_account_balance(
7800 company_code,
7801 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
7802 )
7803 .map(|b| b.closing_balance)
7804 .unwrap_or_default();
7805 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
7806 subledger.fa_records.iter().collect();
7807 let (asset_recon, depr_recon) = engine.reconcile_fa(
7808 company_code,
7809 end_date,
7810 gl_asset_balance,
7811 gl_accum_depr_balance,
7812 &fa_refs,
7813 );
7814 results.push(asset_recon);
7815 results.push(depr_recon);
7816 }
7817
7818 if !subledger.inventory_positions.is_empty() {
7820 let gl_balance = tracker
7821 .get_account_balance(
7822 company_code,
7823 datasynth_core::accounts::control_accounts::INVENTORY,
7824 )
7825 .map(|b| b.closing_balance)
7826 .unwrap_or_default();
7827 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
7828 subledger.inventory_positions.iter().collect();
7829 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
7830 }
7831
7832 stats.subledger_reconciliation_count = results.len();
7833 info!(
7834 "Subledger reconciliation complete: {} reconciliations",
7835 results.len()
7836 );
7837 self.check_resources_with_log("post-subledger-reconciliation")?;
7838
7839 Ok(results)
7840 }
7841
7842 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
7844 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
7845
7846 let coa_framework = self.resolve_coa_framework();
7847
7848 let mut gen = ChartOfAccountsGenerator::new(
7849 self.config.chart_of_accounts.complexity,
7850 self.config.global.industry,
7851 self.seed,
7852 )
7853 .with_coa_framework(coa_framework);
7854
7855 let coa = Arc::new(gen.generate());
7856 self.coa = Some(Arc::clone(&coa));
7857
7858 if let Some(pb) = pb {
7859 pb.finish_with_message("Chart of Accounts complete");
7860 }
7861
7862 Ok(coa)
7863 }
7864
7865 fn generate_master_data(&mut self) -> SynthResult<()> {
7867 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7868 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7869 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7870
7871 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
7873
7874 let pack = self.primary_pack().clone();
7876
7877 let vendors_per_company = self.phase_config.vendors_per_company;
7879 let customers_per_company = self.phase_config.customers_per_company;
7880 let materials_per_company = self.phase_config.materials_per_company;
7881 let assets_per_company = self.phase_config.assets_per_company;
7882 let coa_framework = self.resolve_coa_framework();
7883
7884 let per_company_results: Vec<_> = self
7887 .config
7888 .companies
7889 .par_iter()
7890 .enumerate()
7891 .map(|(i, company)| {
7892 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
7893 let pack = pack.clone();
7894
7895 let mut vendor_gen = VendorGenerator::new(company_seed);
7897 vendor_gen.set_country_pack(pack.clone());
7898 vendor_gen.set_coa_framework(coa_framework);
7899 vendor_gen.set_counter_offset(i * vendors_per_company);
7900 let vendor_pool =
7901 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
7902
7903 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
7905 customer_gen.set_country_pack(pack.clone());
7906 customer_gen.set_coa_framework(coa_framework);
7907 customer_gen.set_counter_offset(i * customers_per_company);
7908 let customer_pool = customer_gen.generate_customer_pool(
7909 customers_per_company,
7910 &company.code,
7911 start_date,
7912 );
7913
7914 let mut material_gen = MaterialGenerator::new(company_seed + 200);
7916 material_gen.set_country_pack(pack.clone());
7917 material_gen.set_counter_offset(i * materials_per_company);
7918 let material_pool = material_gen.generate_material_pool(
7919 materials_per_company,
7920 &company.code,
7921 start_date,
7922 );
7923
7924 let mut asset_gen = AssetGenerator::new(company_seed + 300);
7926 let asset_pool = asset_gen.generate_asset_pool(
7927 assets_per_company,
7928 &company.code,
7929 (start_date, end_date),
7930 );
7931
7932 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
7934 employee_gen.set_country_pack(pack);
7935 let employee_pool =
7936 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
7937
7938 (
7939 vendor_pool.vendors,
7940 customer_pool.customers,
7941 material_pool.materials,
7942 asset_pool.assets,
7943 employee_pool.employees,
7944 )
7945 })
7946 .collect();
7947
7948 for (vendors, customers, materials, assets, employees) in per_company_results {
7950 self.master_data.vendors.extend(vendors);
7951 self.master_data.customers.extend(customers);
7952 self.master_data.materials.extend(materials);
7953 self.master_data.assets.extend(assets);
7954 self.master_data.employees.extend(employees);
7955 }
7956
7957 if let Some(pb) = &pb {
7958 pb.inc(total);
7959 }
7960 if let Some(pb) = pb {
7961 pb.finish_with_message("Master data generation complete");
7962 }
7963
7964 Ok(())
7965 }
7966
7967 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
7969 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7970 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7971
7972 let months = (self.config.global.period_months as usize).max(1);
7975 let p2p_count = self
7976 .phase_config
7977 .p2p_chains
7978 .min(self.master_data.vendors.len() * 2 * months);
7979 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
7980
7981 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
7983 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
7984 p2p_gen.set_country_pack(self.primary_pack().clone());
7985
7986 for i in 0..p2p_count {
7987 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
7988 let materials: Vec<&Material> = self
7989 .master_data
7990 .materials
7991 .iter()
7992 .skip(i % self.master_data.materials.len().max(1))
7993 .take(2.min(self.master_data.materials.len()))
7994 .collect();
7995
7996 if materials.is_empty() {
7997 continue;
7998 }
7999
8000 let company = &self.config.companies[i % self.config.companies.len()];
8001 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
8002 let fiscal_period = po_date.month() as u8;
8003 let created_by = if self.master_data.employees.is_empty() {
8004 "SYSTEM"
8005 } else {
8006 self.master_data.employees[i % self.master_data.employees.len()]
8007 .user_id
8008 .as_str()
8009 };
8010
8011 let chain = p2p_gen.generate_chain(
8012 &company.code,
8013 vendor,
8014 &materials,
8015 po_date,
8016 start_date.year() as u16,
8017 fiscal_period,
8018 created_by,
8019 );
8020
8021 flows.purchase_orders.push(chain.purchase_order.clone());
8023 flows.goods_receipts.extend(chain.goods_receipts.clone());
8024 if let Some(vi) = &chain.vendor_invoice {
8025 flows.vendor_invoices.push(vi.clone());
8026 }
8027 if let Some(payment) = &chain.payment {
8028 flows.payments.push(payment.clone());
8029 }
8030 for remainder in &chain.remainder_payments {
8031 flows.payments.push(remainder.clone());
8032 }
8033 flows.p2p_chains.push(chain);
8034
8035 if let Some(pb) = &pb {
8036 pb.inc(1);
8037 }
8038 }
8039
8040 if let Some(pb) = pb {
8041 pb.finish_with_message("P2P document flows complete");
8042 }
8043
8044 let o2c_count = self
8047 .phase_config
8048 .o2c_chains
8049 .min(self.master_data.customers.len() * 2 * months);
8050 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
8051
8052 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
8054 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
8055 o2c_gen.set_country_pack(self.primary_pack().clone());
8056
8057 for i in 0..o2c_count {
8058 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
8059 let materials: Vec<&Material> = self
8060 .master_data
8061 .materials
8062 .iter()
8063 .skip(i % self.master_data.materials.len().max(1))
8064 .take(2.min(self.master_data.materials.len()))
8065 .collect();
8066
8067 if materials.is_empty() {
8068 continue;
8069 }
8070
8071 let company = &self.config.companies[i % self.config.companies.len()];
8072 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
8073 let fiscal_period = so_date.month() as u8;
8074 let created_by = if self.master_data.employees.is_empty() {
8075 "SYSTEM"
8076 } else {
8077 self.master_data.employees[i % self.master_data.employees.len()]
8078 .user_id
8079 .as_str()
8080 };
8081
8082 let chain = o2c_gen.generate_chain(
8083 &company.code,
8084 customer,
8085 &materials,
8086 so_date,
8087 start_date.year() as u16,
8088 fiscal_period,
8089 created_by,
8090 );
8091
8092 flows.sales_orders.push(chain.sales_order.clone());
8094 flows.deliveries.extend(chain.deliveries.clone());
8095 if let Some(ci) = &chain.customer_invoice {
8096 flows.customer_invoices.push(ci.clone());
8097 }
8098 if let Some(receipt) = &chain.customer_receipt {
8099 flows.payments.push(receipt.clone());
8100 }
8101 for receipt in &chain.remainder_receipts {
8103 flows.payments.push(receipt.clone());
8104 }
8105 flows.o2c_chains.push(chain);
8106
8107 if let Some(pb) = &pb {
8108 pb.inc(1);
8109 }
8110 }
8111
8112 if let Some(pb) = pb {
8113 pb.finish_with_message("O2C document flows complete");
8114 }
8115
8116 Ok(())
8117 }
8118
8119 fn generate_journal_entries(
8121 &mut self,
8122 coa: &Arc<ChartOfAccounts>,
8123 ) -> SynthResult<Vec<JournalEntry>> {
8124 use datasynth_core::traits::ParallelGenerator;
8125
8126 let total = self.calculate_total_transactions();
8127 let pb = self.create_progress_bar(total, "Generating Journal Entries");
8128
8129 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8130 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8131 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8132
8133 let company_codes: Vec<String> = self
8134 .config
8135 .companies
8136 .iter()
8137 .map(|c| c.code.clone())
8138 .collect();
8139
8140 let generator = JournalEntryGenerator::new_with_params(
8141 self.config.transactions.clone(),
8142 Arc::clone(coa),
8143 company_codes,
8144 start_date,
8145 end_date,
8146 self.seed,
8147 );
8148
8149 let je_pack = self.primary_pack();
8153
8154 let mut generator = generator
8155 .with_master_data(
8156 &self.master_data.vendors,
8157 &self.master_data.customers,
8158 &self.master_data.materials,
8159 )
8160 .with_country_pack_names(je_pack)
8161 .with_country_pack_temporal(
8162 self.config.temporal_patterns.clone(),
8163 self.seed + 200,
8164 je_pack,
8165 )
8166 .with_persona_errors(true)
8167 .with_fraud_config(self.config.fraud.clone());
8168
8169 if self.config.temporal.enabled {
8171 let drift_config = self.config.temporal.to_core_config();
8172 generator = generator.with_drift_config(drift_config, self.seed + 100);
8173 }
8174
8175 self.check_memory_limit()?;
8177
8178 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
8180
8181 let entries = if total >= 10_000 && num_threads > 1 {
8185 let sub_generators = generator.split(num_threads);
8188 let entries_per_thread = total as usize / num_threads;
8189 let remainder = total as usize % num_threads;
8190
8191 let batches: Vec<Vec<JournalEntry>> = sub_generators
8192 .into_par_iter()
8193 .enumerate()
8194 .map(|(i, mut gen)| {
8195 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
8196 gen.generate_batch(count)
8197 })
8198 .collect();
8199
8200 let entries = JournalEntryGenerator::merge_results(batches);
8202
8203 if let Some(pb) = &pb {
8204 pb.inc(total);
8205 }
8206 entries
8207 } else {
8208 let mut entries = Vec::with_capacity(total as usize);
8210 for _ in 0..total {
8211 let entry = generator.generate();
8212 entries.push(entry);
8213 if let Some(pb) = &pb {
8214 pb.inc(1);
8215 }
8216 }
8217 entries
8218 };
8219
8220 if let Some(pb) = pb {
8221 pb.finish_with_message("Journal entries complete");
8222 }
8223
8224 Ok(entries)
8225 }
8226
8227 fn generate_jes_from_document_flows(
8232 &mut self,
8233 flows: &DocumentFlowSnapshot,
8234 ) -> SynthResult<Vec<JournalEntry>> {
8235 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
8236 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
8237
8238 let je_config = match self.resolve_coa_framework() {
8239 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
8240 CoAFramework::GermanSkr04 => {
8241 let fa = datasynth_core::FrameworkAccounts::german_gaap();
8242 DocumentFlowJeConfig::from(&fa)
8243 }
8244 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
8245 };
8246
8247 let populate_fec = je_config.populate_fec_fields;
8248 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
8249
8250 if populate_fec {
8254 let mut aux_lookup = std::collections::HashMap::new();
8255 for vendor in &self.master_data.vendors {
8256 if let Some(ref aux) = vendor.auxiliary_gl_account {
8257 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
8258 }
8259 }
8260 for customer in &self.master_data.customers {
8261 if let Some(ref aux) = customer.auxiliary_gl_account {
8262 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
8263 }
8264 }
8265 if !aux_lookup.is_empty() {
8266 generator.set_auxiliary_account_lookup(aux_lookup);
8267 }
8268 }
8269
8270 let mut entries = Vec::new();
8271
8272 for chain in &flows.p2p_chains {
8274 let chain_entries = generator.generate_from_p2p_chain(chain);
8275 entries.extend(chain_entries);
8276 if let Some(pb) = &pb {
8277 pb.inc(1);
8278 }
8279 }
8280
8281 for chain in &flows.o2c_chains {
8283 let chain_entries = generator.generate_from_o2c_chain(chain);
8284 entries.extend(chain_entries);
8285 if let Some(pb) = &pb {
8286 pb.inc(1);
8287 }
8288 }
8289
8290 if let Some(pb) = pb {
8291 pb.finish_with_message(format!(
8292 "Generated {} JEs from document flows",
8293 entries.len()
8294 ));
8295 }
8296
8297 Ok(entries)
8298 }
8299
8300 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
8306 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
8307
8308 let mut jes = Vec::with_capacity(payroll_runs.len());
8309
8310 for run in payroll_runs {
8311 let mut je = JournalEntry::new_simple(
8312 format!("JE-PAYROLL-{}", run.payroll_id),
8313 run.company_code.clone(),
8314 run.run_date,
8315 format!("Payroll {}", run.payroll_id),
8316 );
8317
8318 je.add_line(JournalEntryLine {
8320 line_number: 1,
8321 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
8322 debit_amount: run.total_gross,
8323 reference: Some(run.payroll_id.clone()),
8324 text: Some(format!(
8325 "Payroll {} ({} employees)",
8326 run.payroll_id, run.employee_count
8327 )),
8328 ..Default::default()
8329 });
8330
8331 je.add_line(JournalEntryLine {
8333 line_number: 2,
8334 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
8335 credit_amount: run.total_gross,
8336 reference: Some(run.payroll_id.clone()),
8337 ..Default::default()
8338 });
8339
8340 jes.push(je);
8341 }
8342
8343 jes
8344 }
8345
8346 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
8352 use datasynth_core::accounts::{control_accounts, expense_accounts};
8353 use datasynth_core::models::ProductionOrderStatus;
8354
8355 let mut jes = Vec::new();
8356
8357 for order in production_orders {
8358 if !matches!(
8360 order.status,
8361 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
8362 ) {
8363 continue;
8364 }
8365
8366 let mut je = JournalEntry::new_simple(
8367 format!("JE-MFG-{}", order.order_id),
8368 order.company_code.clone(),
8369 order.actual_end.unwrap_or(order.planned_end),
8370 format!(
8371 "Production Order {} - {}",
8372 order.order_id, order.material_description
8373 ),
8374 );
8375
8376 je.add_line(JournalEntryLine {
8378 line_number: 1,
8379 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
8380 debit_amount: order.actual_cost,
8381 reference: Some(order.order_id.clone()),
8382 text: Some(format!(
8383 "Material consumption for {}",
8384 order.material_description
8385 )),
8386 quantity: Some(order.actual_quantity),
8387 unit: Some("EA".to_string()),
8388 ..Default::default()
8389 });
8390
8391 je.add_line(JournalEntryLine {
8393 line_number: 2,
8394 gl_account: control_accounts::INVENTORY.to_string(),
8395 credit_amount: order.actual_cost,
8396 reference: Some(order.order_id.clone()),
8397 ..Default::default()
8398 });
8399
8400 jes.push(je);
8401 }
8402
8403 jes
8404 }
8405
8406 fn link_document_flows_to_subledgers(
8411 &mut self,
8412 flows: &DocumentFlowSnapshot,
8413 ) -> SynthResult<SubledgerSnapshot> {
8414 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
8415 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
8416
8417 let vendor_names: std::collections::HashMap<String, String> = self
8419 .master_data
8420 .vendors
8421 .iter()
8422 .map(|v| (v.vendor_id.clone(), v.name.clone()))
8423 .collect();
8424 let customer_names: std::collections::HashMap<String, String> = self
8425 .master_data
8426 .customers
8427 .iter()
8428 .map(|c| (c.customer_id.clone(), c.name.clone()))
8429 .collect();
8430
8431 let mut linker = DocumentFlowLinker::new()
8432 .with_vendor_names(vendor_names)
8433 .with_customer_names(customer_names);
8434
8435 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
8437 if let Some(pb) = &pb {
8438 pb.inc(flows.vendor_invoices.len() as u64);
8439 }
8440
8441 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
8443 if let Some(pb) = &pb {
8444 pb.inc(flows.customer_invoices.len() as u64);
8445 }
8446
8447 if let Some(pb) = pb {
8448 pb.finish_with_message(format!(
8449 "Linked {} AP and {} AR invoices",
8450 ap_invoices.len(),
8451 ar_invoices.len()
8452 ));
8453 }
8454
8455 Ok(SubledgerSnapshot {
8456 ap_invoices,
8457 ar_invoices,
8458 fa_records: Vec::new(),
8459 inventory_positions: Vec::new(),
8460 inventory_movements: Vec::new(),
8461 ar_aging_reports: Vec::new(),
8463 ap_aging_reports: Vec::new(),
8464 depreciation_runs: Vec::new(),
8466 inventory_valuations: Vec::new(),
8467 })
8468 }
8469
8470 #[allow(clippy::too_many_arguments)]
8475 fn generate_ocpm_events(
8476 &mut self,
8477 flows: &DocumentFlowSnapshot,
8478 sourcing: &SourcingSnapshot,
8479 hr: &HrSnapshot,
8480 manufacturing: &ManufacturingSnapshot,
8481 banking: &BankingSnapshot,
8482 audit: &AuditSnapshot,
8483 financial_reporting: &FinancialReportingSnapshot,
8484 ) -> SynthResult<OcpmSnapshot> {
8485 let total_chains = flows.p2p_chains.len()
8486 + flows.o2c_chains.len()
8487 + sourcing.sourcing_projects.len()
8488 + hr.payroll_runs.len()
8489 + manufacturing.production_orders.len()
8490 + banking.customers.len()
8491 + audit.engagements.len()
8492 + financial_reporting.bank_reconciliations.len();
8493 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
8494
8495 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
8497 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
8498
8499 let ocpm_config = OcpmGeneratorConfig {
8501 generate_p2p: true,
8502 generate_o2c: true,
8503 generate_s2c: !sourcing.sourcing_projects.is_empty(),
8504 generate_h2r: !hr.payroll_runs.is_empty(),
8505 generate_mfg: !manufacturing.production_orders.is_empty(),
8506 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
8507 generate_bank: !banking.customers.is_empty(),
8508 generate_audit: !audit.engagements.is_empty(),
8509 happy_path_rate: 0.75,
8510 exception_path_rate: 0.20,
8511 error_path_rate: 0.05,
8512 add_duration_variability: true,
8513 duration_std_dev_factor: 0.3,
8514 };
8515 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
8516 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
8517
8518 let available_users: Vec<String> = self
8520 .master_data
8521 .employees
8522 .iter()
8523 .take(20)
8524 .map(|e| e.user_id.clone())
8525 .collect();
8526
8527 let fallback_date =
8529 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
8530 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8531 .unwrap_or(fallback_date);
8532 let base_midnight = base_date
8533 .and_hms_opt(0, 0, 0)
8534 .expect("midnight is always valid");
8535 let base_datetime =
8536 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
8537
8538 let add_result = |event_log: &mut OcpmEventLog,
8540 result: datasynth_ocpm::CaseGenerationResult| {
8541 for event in result.events {
8542 event_log.add_event(event);
8543 }
8544 for object in result.objects {
8545 event_log.add_object(object);
8546 }
8547 for relationship in result.relationships {
8548 event_log.add_relationship(relationship);
8549 }
8550 for corr in result.correlation_events {
8551 event_log.add_correlation_event(corr);
8552 }
8553 event_log.add_case(result.case_trace);
8554 };
8555
8556 for chain in &flows.p2p_chains {
8558 let po = &chain.purchase_order;
8559 let documents = P2pDocuments::new(
8560 &po.header.document_id,
8561 &po.vendor_id,
8562 &po.header.company_code,
8563 po.total_net_amount,
8564 &po.header.currency,
8565 &ocpm_uuid_factory,
8566 )
8567 .with_goods_receipt(
8568 chain
8569 .goods_receipts
8570 .first()
8571 .map(|gr| gr.header.document_id.as_str())
8572 .unwrap_or(""),
8573 &ocpm_uuid_factory,
8574 )
8575 .with_invoice(
8576 chain
8577 .vendor_invoice
8578 .as_ref()
8579 .map(|vi| vi.header.document_id.as_str())
8580 .unwrap_or(""),
8581 &ocpm_uuid_factory,
8582 )
8583 .with_payment(
8584 chain
8585 .payment
8586 .as_ref()
8587 .map(|p| p.header.document_id.as_str())
8588 .unwrap_or(""),
8589 &ocpm_uuid_factory,
8590 );
8591
8592 let start_time =
8593 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
8594 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
8595 add_result(&mut event_log, result);
8596
8597 if let Some(pb) = &pb {
8598 pb.inc(1);
8599 }
8600 }
8601
8602 for chain in &flows.o2c_chains {
8604 let so = &chain.sales_order;
8605 let documents = O2cDocuments::new(
8606 &so.header.document_id,
8607 &so.customer_id,
8608 &so.header.company_code,
8609 so.total_net_amount,
8610 &so.header.currency,
8611 &ocpm_uuid_factory,
8612 )
8613 .with_delivery(
8614 chain
8615 .deliveries
8616 .first()
8617 .map(|d| d.header.document_id.as_str())
8618 .unwrap_or(""),
8619 &ocpm_uuid_factory,
8620 )
8621 .with_invoice(
8622 chain
8623 .customer_invoice
8624 .as_ref()
8625 .map(|ci| ci.header.document_id.as_str())
8626 .unwrap_or(""),
8627 &ocpm_uuid_factory,
8628 )
8629 .with_receipt(
8630 chain
8631 .customer_receipt
8632 .as_ref()
8633 .map(|r| r.header.document_id.as_str())
8634 .unwrap_or(""),
8635 &ocpm_uuid_factory,
8636 );
8637
8638 let start_time =
8639 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
8640 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
8641 add_result(&mut event_log, result);
8642
8643 if let Some(pb) = &pb {
8644 pb.inc(1);
8645 }
8646 }
8647
8648 for project in &sourcing.sourcing_projects {
8650 let vendor_id = sourcing
8652 .contracts
8653 .iter()
8654 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
8655 .map(|c| c.vendor_id.clone())
8656 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
8657 .or_else(|| {
8658 self.master_data
8659 .vendors
8660 .first()
8661 .map(|v| v.vendor_id.clone())
8662 })
8663 .unwrap_or_else(|| "V000".to_string());
8664 let mut docs = S2cDocuments::new(
8665 &project.project_id,
8666 &vendor_id,
8667 &project.company_code,
8668 project.estimated_annual_spend,
8669 &ocpm_uuid_factory,
8670 );
8671 if let Some(rfx) = sourcing
8673 .rfx_events
8674 .iter()
8675 .find(|r| r.sourcing_project_id == project.project_id)
8676 {
8677 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
8678 if let Some(bid) = sourcing.bids.iter().find(|b| {
8680 b.rfx_id == rfx.rfx_id
8681 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
8682 }) {
8683 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
8684 }
8685 }
8686 if let Some(contract) = sourcing
8688 .contracts
8689 .iter()
8690 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
8691 {
8692 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
8693 }
8694 let start_time = base_datetime - chrono::Duration::days(90);
8695 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
8696 add_result(&mut event_log, result);
8697
8698 if let Some(pb) = &pb {
8699 pb.inc(1);
8700 }
8701 }
8702
8703 for run in &hr.payroll_runs {
8705 let employee_id = hr
8707 .payroll_line_items
8708 .iter()
8709 .find(|li| li.payroll_id == run.payroll_id)
8710 .map(|li| li.employee_id.as_str())
8711 .unwrap_or("EMP000");
8712 let docs = H2rDocuments::new(
8713 &run.payroll_id,
8714 employee_id,
8715 &run.company_code,
8716 run.total_gross,
8717 &ocpm_uuid_factory,
8718 )
8719 .with_time_entries(
8720 hr.time_entries
8721 .iter()
8722 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
8723 .take(5)
8724 .map(|t| t.entry_id.as_str())
8725 .collect(),
8726 );
8727 let start_time = base_datetime - chrono::Duration::days(30);
8728 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
8729 add_result(&mut event_log, result);
8730
8731 if let Some(pb) = &pb {
8732 pb.inc(1);
8733 }
8734 }
8735
8736 for order in &manufacturing.production_orders {
8738 let mut docs = MfgDocuments::new(
8739 &order.order_id,
8740 &order.material_id,
8741 &order.company_code,
8742 order.planned_quantity,
8743 &ocpm_uuid_factory,
8744 )
8745 .with_operations(
8746 order
8747 .operations
8748 .iter()
8749 .map(|o| format!("OP-{:04}", o.operation_number))
8750 .collect::<Vec<_>>()
8751 .iter()
8752 .map(std::string::String::as_str)
8753 .collect(),
8754 );
8755 if let Some(insp) = manufacturing
8757 .quality_inspections
8758 .iter()
8759 .find(|i| i.reference_id == order.order_id)
8760 {
8761 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
8762 }
8763 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
8765 cc.items
8766 .iter()
8767 .any(|item| item.material_id == order.material_id)
8768 }) {
8769 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
8770 }
8771 let start_time = base_datetime - chrono::Duration::days(60);
8772 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
8773 add_result(&mut event_log, result);
8774
8775 if let Some(pb) = &pb {
8776 pb.inc(1);
8777 }
8778 }
8779
8780 for customer in &banking.customers {
8782 let customer_id_str = customer.customer_id.to_string();
8783 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
8784 if let Some(account) = banking
8786 .accounts
8787 .iter()
8788 .find(|a| a.primary_owner_id == customer.customer_id)
8789 {
8790 let account_id_str = account.account_id.to_string();
8791 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
8792 let txn_strs: Vec<String> = banking
8794 .transactions
8795 .iter()
8796 .filter(|t| t.account_id == account.account_id)
8797 .take(10)
8798 .map(|t| t.transaction_id.to_string())
8799 .collect();
8800 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
8801 let txn_amounts: Vec<rust_decimal::Decimal> = banking
8802 .transactions
8803 .iter()
8804 .filter(|t| t.account_id == account.account_id)
8805 .take(10)
8806 .map(|t| t.amount)
8807 .collect();
8808 if !txn_ids.is_empty() {
8809 docs = docs.with_transactions(txn_ids, txn_amounts);
8810 }
8811 }
8812 let start_time = base_datetime - chrono::Duration::days(180);
8813 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
8814 add_result(&mut event_log, result);
8815
8816 if let Some(pb) = &pb {
8817 pb.inc(1);
8818 }
8819 }
8820
8821 for engagement in &audit.engagements {
8823 let engagement_id_str = engagement.engagement_id.to_string();
8824 let docs = AuditDocuments::new(
8825 &engagement_id_str,
8826 &engagement.client_entity_id,
8827 &ocpm_uuid_factory,
8828 )
8829 .with_workpapers(
8830 audit
8831 .workpapers
8832 .iter()
8833 .filter(|w| w.engagement_id == engagement.engagement_id)
8834 .take(10)
8835 .map(|w| w.workpaper_id.to_string())
8836 .collect::<Vec<_>>()
8837 .iter()
8838 .map(std::string::String::as_str)
8839 .collect(),
8840 )
8841 .with_evidence(
8842 audit
8843 .evidence
8844 .iter()
8845 .filter(|e| e.engagement_id == engagement.engagement_id)
8846 .take(10)
8847 .map(|e| e.evidence_id.to_string())
8848 .collect::<Vec<_>>()
8849 .iter()
8850 .map(std::string::String::as_str)
8851 .collect(),
8852 )
8853 .with_risks(
8854 audit
8855 .risk_assessments
8856 .iter()
8857 .filter(|r| r.engagement_id == engagement.engagement_id)
8858 .take(5)
8859 .map(|r| r.risk_id.to_string())
8860 .collect::<Vec<_>>()
8861 .iter()
8862 .map(std::string::String::as_str)
8863 .collect(),
8864 )
8865 .with_findings(
8866 audit
8867 .findings
8868 .iter()
8869 .filter(|f| f.engagement_id == engagement.engagement_id)
8870 .take(5)
8871 .map(|f| f.finding_id.to_string())
8872 .collect::<Vec<_>>()
8873 .iter()
8874 .map(std::string::String::as_str)
8875 .collect(),
8876 )
8877 .with_judgments(
8878 audit
8879 .judgments
8880 .iter()
8881 .filter(|j| j.engagement_id == engagement.engagement_id)
8882 .take(5)
8883 .map(|j| j.judgment_id.to_string())
8884 .collect::<Vec<_>>()
8885 .iter()
8886 .map(std::string::String::as_str)
8887 .collect(),
8888 );
8889 let start_time = base_datetime - chrono::Duration::days(120);
8890 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
8891 add_result(&mut event_log, result);
8892
8893 if let Some(pb) = &pb {
8894 pb.inc(1);
8895 }
8896 }
8897
8898 for recon in &financial_reporting.bank_reconciliations {
8900 let docs = BankReconDocuments::new(
8901 &recon.reconciliation_id,
8902 &recon.bank_account_id,
8903 &recon.company_code,
8904 recon.bank_ending_balance,
8905 &ocpm_uuid_factory,
8906 )
8907 .with_statement_lines(
8908 recon
8909 .statement_lines
8910 .iter()
8911 .take(20)
8912 .map(|l| l.line_id.as_str())
8913 .collect(),
8914 )
8915 .with_reconciling_items(
8916 recon
8917 .reconciling_items
8918 .iter()
8919 .take(10)
8920 .map(|i| i.item_id.as_str())
8921 .collect(),
8922 );
8923 let start_time = base_datetime - chrono::Duration::days(30);
8924 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
8925 add_result(&mut event_log, result);
8926
8927 if let Some(pb) = &pb {
8928 pb.inc(1);
8929 }
8930 }
8931
8932 event_log.compute_variants();
8934
8935 let summary = event_log.summary();
8936
8937 if let Some(pb) = pb {
8938 pb.finish_with_message(format!(
8939 "Generated {} OCPM events, {} objects",
8940 summary.event_count, summary.object_count
8941 ));
8942 }
8943
8944 Ok(OcpmSnapshot {
8945 event_count: summary.event_count,
8946 object_count: summary.object_count,
8947 case_count: summary.case_count,
8948 event_log: Some(event_log),
8949 })
8950 }
8951
8952 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
8954 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
8955
8956 let total_rate = if self.config.anomaly_injection.enabled {
8959 self.config.anomaly_injection.rates.total_rate
8960 } else if self.config.fraud.enabled {
8961 self.config.fraud.fraud_rate
8962 } else {
8963 0.02
8964 };
8965
8966 let fraud_rate = if self.config.anomaly_injection.enabled {
8967 self.config.anomaly_injection.rates.fraud_rate
8968 } else {
8969 AnomalyRateConfig::default().fraud_rate
8970 };
8971
8972 let error_rate = if self.config.anomaly_injection.enabled {
8973 self.config.anomaly_injection.rates.error_rate
8974 } else {
8975 AnomalyRateConfig::default().error_rate
8976 };
8977
8978 let process_issue_rate = if self.config.anomaly_injection.enabled {
8979 self.config.anomaly_injection.rates.process_rate
8980 } else {
8981 AnomalyRateConfig::default().process_issue_rate
8982 };
8983
8984 let anomaly_config = AnomalyInjectorConfig {
8985 rates: AnomalyRateConfig {
8986 total_rate,
8987 fraud_rate,
8988 error_rate,
8989 process_issue_rate,
8990 ..Default::default()
8991 },
8992 seed: self.seed + 5000,
8993 ..Default::default()
8994 };
8995
8996 let mut injector = AnomalyInjector::new(anomaly_config);
8997 let result = injector.process_entries(entries);
8998
8999 if let Some(pb) = &pb {
9000 pb.inc(entries.len() as u64);
9001 pb.finish_with_message("Anomaly injection complete");
9002 }
9003
9004 let mut by_type = HashMap::new();
9005 for label in &result.labels {
9006 *by_type
9007 .entry(format!("{:?}", label.anomaly_type))
9008 .or_insert(0) += 1;
9009 }
9010
9011 Ok(AnomalyLabels {
9012 labels: result.labels,
9013 summary: Some(result.summary),
9014 by_type,
9015 })
9016 }
9017
9018 fn validate_journal_entries(
9027 &mut self,
9028 entries: &[JournalEntry],
9029 ) -> SynthResult<BalanceValidationResult> {
9030 let clean_entries: Vec<&JournalEntry> = entries
9032 .iter()
9033 .filter(|e| {
9034 e.header
9035 .header_text
9036 .as_ref()
9037 .map(|t| !t.contains("[HUMAN_ERROR:"))
9038 .unwrap_or(true)
9039 })
9040 .collect();
9041
9042 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
9043
9044 let config = BalanceTrackerConfig {
9046 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
9050 };
9051 let validation_currency = self
9052 .config
9053 .companies
9054 .first()
9055 .map(|c| c.currency.clone())
9056 .unwrap_or_else(|| "USD".to_string());
9057
9058 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
9059
9060 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
9062 let errors = tracker.apply_entries(&clean_refs);
9063
9064 if let Some(pb) = &pb {
9065 pb.inc(entries.len() as u64);
9066 }
9067
9068 let has_unbalanced = tracker
9071 .get_validation_errors()
9072 .iter()
9073 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
9074
9075 let mut all_errors = errors;
9078 all_errors.extend(tracker.get_validation_errors().iter().cloned());
9079 let company_codes: Vec<String> = self
9080 .config
9081 .companies
9082 .iter()
9083 .map(|c| c.code.clone())
9084 .collect();
9085
9086 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9087 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9088 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9089
9090 for company_code in &company_codes {
9091 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
9092 all_errors.push(e);
9093 }
9094 }
9095
9096 let stats = tracker.get_statistics();
9098
9099 let is_balanced = all_errors.is_empty();
9101
9102 if let Some(pb) = pb {
9103 let msg = if is_balanced {
9104 "Balance validation passed"
9105 } else {
9106 "Balance validation completed with errors"
9107 };
9108 pb.finish_with_message(msg);
9109 }
9110
9111 Ok(BalanceValidationResult {
9112 validated: true,
9113 is_balanced,
9114 entries_processed: stats.entries_processed,
9115 total_debits: stats.total_debits,
9116 total_credits: stats.total_credits,
9117 accounts_tracked: stats.accounts_tracked,
9118 companies_tracked: stats.companies_tracked,
9119 validation_errors: all_errors,
9120 has_unbalanced_entries: has_unbalanced,
9121 })
9122 }
9123
9124 fn inject_data_quality(
9129 &mut self,
9130 entries: &mut [JournalEntry],
9131 ) -> SynthResult<DataQualityStats> {
9132 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
9133
9134 let config = if self.config.data_quality.enabled {
9137 let dq = &self.config.data_quality;
9138 DataQualityConfig {
9139 enable_missing_values: dq.missing_values.enabled,
9140 missing_values: datasynth_generators::MissingValueConfig {
9141 global_rate: dq.effective_missing_rate(),
9142 ..Default::default()
9143 },
9144 enable_format_variations: dq.format_variations.enabled,
9145 format_variations: datasynth_generators::FormatVariationConfig {
9146 date_variation_rate: dq.format_variations.dates.rate,
9147 amount_variation_rate: dq.format_variations.amounts.rate,
9148 identifier_variation_rate: dq.format_variations.identifiers.rate,
9149 ..Default::default()
9150 },
9151 enable_duplicates: dq.duplicates.enabled,
9152 duplicates: datasynth_generators::DuplicateConfig {
9153 duplicate_rate: dq.effective_duplicate_rate(),
9154 ..Default::default()
9155 },
9156 enable_typos: dq.typos.enabled,
9157 typos: datasynth_generators::TypoConfig {
9158 char_error_rate: dq.effective_typo_rate(),
9159 ..Default::default()
9160 },
9161 enable_encoding_issues: dq.encoding_issues.enabled,
9162 encoding_issue_rate: dq.encoding_issues.rate,
9163 seed: self.seed.wrapping_add(77), track_statistics: true,
9165 }
9166 } else {
9167 DataQualityConfig::minimal()
9168 };
9169 let mut injector = DataQualityInjector::new(config);
9170
9171 injector.set_country_pack(self.primary_pack().clone());
9173
9174 let context = HashMap::new();
9176
9177 for entry in entries.iter_mut() {
9178 if let Some(text) = &entry.header.header_text {
9180 let processed = injector.process_text_field(
9181 "header_text",
9182 text,
9183 &entry.header.document_id.to_string(),
9184 &context,
9185 );
9186 match processed {
9187 Some(new_text) if new_text != *text => {
9188 entry.header.header_text = Some(new_text);
9189 }
9190 None => {
9191 entry.header.header_text = None; }
9193 _ => {}
9194 }
9195 }
9196
9197 if let Some(ref_text) = &entry.header.reference {
9199 let processed = injector.process_text_field(
9200 "reference",
9201 ref_text,
9202 &entry.header.document_id.to_string(),
9203 &context,
9204 );
9205 match processed {
9206 Some(new_text) if new_text != *ref_text => {
9207 entry.header.reference = Some(new_text);
9208 }
9209 None => {
9210 entry.header.reference = None;
9211 }
9212 _ => {}
9213 }
9214 }
9215
9216 let user_persona = entry.header.user_persona.clone();
9218 if let Some(processed) = injector.process_text_field(
9219 "user_persona",
9220 &user_persona,
9221 &entry.header.document_id.to_string(),
9222 &context,
9223 ) {
9224 if processed != user_persona {
9225 entry.header.user_persona = processed;
9226 }
9227 }
9228
9229 for line in &mut entry.lines {
9231 if let Some(ref text) = line.line_text {
9233 let processed = injector.process_text_field(
9234 "line_text",
9235 text,
9236 &entry.header.document_id.to_string(),
9237 &context,
9238 );
9239 match processed {
9240 Some(new_text) if new_text != *text => {
9241 line.line_text = Some(new_text);
9242 }
9243 None => {
9244 line.line_text = None;
9245 }
9246 _ => {}
9247 }
9248 }
9249
9250 if let Some(cc) = &line.cost_center {
9252 let processed = injector.process_text_field(
9253 "cost_center",
9254 cc,
9255 &entry.header.document_id.to_string(),
9256 &context,
9257 );
9258 match processed {
9259 Some(new_cc) if new_cc != *cc => {
9260 line.cost_center = Some(new_cc);
9261 }
9262 None => {
9263 line.cost_center = None;
9264 }
9265 _ => {}
9266 }
9267 }
9268 }
9269
9270 if let Some(pb) = &pb {
9271 pb.inc(1);
9272 }
9273 }
9274
9275 if let Some(pb) = pb {
9276 pb.finish_with_message("Data quality injection complete");
9277 }
9278
9279 Ok(injector.stats().clone())
9280 }
9281
9282 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
9293 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9294 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9295 let fiscal_year = start_date.year() as u16;
9296 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
9297
9298 let total_revenue: rust_decimal::Decimal = entries
9300 .iter()
9301 .flat_map(|e| e.lines.iter())
9302 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
9303 .map(|l| l.credit_amount)
9304 .sum();
9305
9306 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
9308
9309 let mut snapshot = AuditSnapshot::default();
9310
9311 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
9313 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
9314 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
9315 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
9316 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
9317 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
9318 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
9319 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
9320 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
9321 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
9322 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
9323 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
9324
9325 let accounts: Vec<String> = self
9327 .coa
9328 .as_ref()
9329 .map(|coa| {
9330 coa.get_postable_accounts()
9331 .iter()
9332 .map(|acc| acc.account_code().to_string())
9333 .collect()
9334 })
9335 .unwrap_or_default();
9336
9337 for (i, company) in self.config.companies.iter().enumerate() {
9339 let company_revenue = total_revenue
9341 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
9342
9343 let engagements_for_company =
9345 self.phase_config.audit_engagements / self.config.companies.len().max(1);
9346 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
9347 1
9348 } else {
9349 0
9350 };
9351
9352 for _eng_idx in 0..(engagements_for_company + extra) {
9353 let mut engagement = engagement_gen.generate_engagement(
9355 &company.code,
9356 &company.name,
9357 fiscal_year,
9358 period_end,
9359 company_revenue,
9360 None, );
9362
9363 if !self.master_data.employees.is_empty() {
9365 let emp_count = self.master_data.employees.len();
9366 let base = (i * 10 + _eng_idx) % emp_count;
9368 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
9369 .employee_id
9370 .clone();
9371 engagement.engagement_manager_id = self.master_data.employees
9372 [(base + 1) % emp_count]
9373 .employee_id
9374 .clone();
9375 let real_team: Vec<String> = engagement
9376 .team_member_ids
9377 .iter()
9378 .enumerate()
9379 .map(|(j, _)| {
9380 self.master_data.employees[(base + 2 + j) % emp_count]
9381 .employee_id
9382 .clone()
9383 })
9384 .collect();
9385 engagement.team_member_ids = real_team;
9386 }
9387
9388 if let Some(pb) = &pb {
9389 pb.inc(1);
9390 }
9391
9392 let team_members: Vec<String> = engagement.team_member_ids.clone();
9394
9395 let workpapers =
9397 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
9398
9399 for wp in &workpapers {
9400 if let Some(pb) = &pb {
9401 pb.inc(1);
9402 }
9403
9404 let evidence = evidence_gen.generate_evidence_for_workpaper(
9406 wp,
9407 &team_members,
9408 wp.preparer_date,
9409 );
9410
9411 for _ in &evidence {
9412 if let Some(pb) = &pb {
9413 pb.inc(1);
9414 }
9415 }
9416
9417 snapshot.evidence.extend(evidence);
9418 }
9419
9420 let risks =
9422 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
9423
9424 for _ in &risks {
9425 if let Some(pb) = &pb {
9426 pb.inc(1);
9427 }
9428 }
9429 snapshot.risk_assessments.extend(risks);
9430
9431 let findings = finding_gen.generate_findings_for_engagement(
9433 &engagement,
9434 &workpapers,
9435 &team_members,
9436 );
9437
9438 for _ in &findings {
9439 if let Some(pb) = &pb {
9440 pb.inc(1);
9441 }
9442 }
9443 snapshot.findings.extend(findings);
9444
9445 let judgments =
9447 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
9448
9449 for _ in &judgments {
9450 if let Some(pb) = &pb {
9451 pb.inc(1);
9452 }
9453 }
9454 snapshot.judgments.extend(judgments);
9455
9456 let (confs, resps) =
9458 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
9459 snapshot.confirmations.extend(confs);
9460 snapshot.confirmation_responses.extend(resps);
9461
9462 let team_pairs: Vec<(String, String)> = team_members
9464 .iter()
9465 .map(|id| {
9466 let name = self
9467 .master_data
9468 .employees
9469 .iter()
9470 .find(|e| e.employee_id == *id)
9471 .map(|e| e.display_name.clone())
9472 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
9473 (id.clone(), name)
9474 })
9475 .collect();
9476 for wp in &workpapers {
9477 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
9478 snapshot.procedure_steps.extend(steps);
9479 }
9480
9481 for wp in &workpapers {
9483 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
9484 snapshot.samples.push(sample);
9485 }
9486 }
9487
9488 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
9490 snapshot.analytical_results.extend(analytical);
9491
9492 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
9494 snapshot.ia_functions.push(ia_func);
9495 snapshot.ia_reports.extend(ia_reports);
9496
9497 let vendor_names: Vec<String> = self
9499 .master_data
9500 .vendors
9501 .iter()
9502 .map(|v| v.name.clone())
9503 .collect();
9504 let customer_names: Vec<String> = self
9505 .master_data
9506 .customers
9507 .iter()
9508 .map(|c| c.name.clone())
9509 .collect();
9510 let (parties, rp_txns) =
9511 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
9512 snapshot.related_parties.extend(parties);
9513 snapshot.related_party_transactions.extend(rp_txns);
9514
9515 snapshot.workpapers.extend(workpapers);
9517 snapshot.engagements.push(engagement);
9518 }
9519 }
9520
9521 if self.config.companies.len() > 1 {
9525 let group_materiality = snapshot
9528 .engagements
9529 .first()
9530 .map(|e| e.materiality)
9531 .unwrap_or_else(|| {
9532 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
9533 total_revenue * pct
9534 });
9535
9536 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
9537 let group_engagement_id = snapshot
9538 .engagements
9539 .first()
9540 .map(|e| e.engagement_id.to_string())
9541 .unwrap_or_else(|| "GROUP-ENG".to_string());
9542
9543 let component_snapshot = component_gen.generate(
9544 &self.config.companies,
9545 group_materiality,
9546 &group_engagement_id,
9547 period_end,
9548 );
9549
9550 snapshot.component_auditors = component_snapshot.component_auditors;
9551 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
9552 snapshot.component_instructions = component_snapshot.component_instructions;
9553 snapshot.component_reports = component_snapshot.component_reports;
9554
9555 info!(
9556 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
9557 snapshot.component_auditors.len(),
9558 snapshot.component_instructions.len(),
9559 snapshot.component_reports.len(),
9560 );
9561 }
9562
9563 {
9567 let applicable_framework = self
9568 .config
9569 .accounting_standards
9570 .framework
9571 .as_ref()
9572 .map(|f| format!("{f:?}"))
9573 .unwrap_or_else(|| "IFRS".to_string());
9574
9575 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
9576 let entity_count = self.config.companies.len();
9577
9578 for engagement in &snapshot.engagements {
9579 let company = self
9580 .config
9581 .companies
9582 .iter()
9583 .find(|c| c.code == engagement.client_entity_id);
9584 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
9585 let letter_date = engagement.planning_start;
9586 let letter = letter_gen.generate(
9587 &engagement.engagement_id.to_string(),
9588 &engagement.client_name,
9589 entity_count,
9590 engagement.period_end_date,
9591 currency,
9592 &applicable_framework,
9593 letter_date,
9594 );
9595 snapshot.engagement_letters.push(letter);
9596 }
9597
9598 info!(
9599 "ISA 210 engagement letters: {} generated",
9600 snapshot.engagement_letters.len()
9601 );
9602 }
9603
9604 {
9608 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
9609 let entity_codes: Vec<String> = self
9610 .config
9611 .companies
9612 .iter()
9613 .map(|c| c.code.clone())
9614 .collect();
9615 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
9616 info!(
9617 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
9618 subsequent.len(),
9619 subsequent
9620 .iter()
9621 .filter(|e| matches!(
9622 e.classification,
9623 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
9624 ))
9625 .count(),
9626 subsequent
9627 .iter()
9628 .filter(|e| matches!(
9629 e.classification,
9630 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
9631 ))
9632 .count(),
9633 );
9634 snapshot.subsequent_events = subsequent;
9635 }
9636
9637 {
9641 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
9642 let entity_codes: Vec<String> = self
9643 .config
9644 .companies
9645 .iter()
9646 .map(|c| c.code.clone())
9647 .collect();
9648 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
9649 info!(
9650 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
9651 soc_snapshot.service_organizations.len(),
9652 soc_snapshot.soc_reports.len(),
9653 soc_snapshot.user_entity_controls.len(),
9654 );
9655 snapshot.service_organizations = soc_snapshot.service_organizations;
9656 snapshot.soc_reports = soc_snapshot.soc_reports;
9657 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
9658 }
9659
9660 {
9664 use datasynth_generators::audit::going_concern_generator::{
9665 GoingConcernGenerator, GoingConcernInput,
9666 };
9667 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
9668 let entity_codes: Vec<String> = self
9669 .config
9670 .companies
9671 .iter()
9672 .map(|c| c.code.clone())
9673 .collect();
9674 let assessment_date = period_end + chrono::Duration::days(75);
9676 let period_label = format!("FY{}", period_end.year());
9677
9678 let gc_inputs: Vec<GoingConcernInput> = self
9689 .config
9690 .companies
9691 .iter()
9692 .map(|company| {
9693 let code = &company.code;
9694 let mut revenue = rust_decimal::Decimal::ZERO;
9695 let mut expenses = rust_decimal::Decimal::ZERO;
9696 let mut current_assets = rust_decimal::Decimal::ZERO;
9697 let mut current_liabs = rust_decimal::Decimal::ZERO;
9698 let mut total_debt = rust_decimal::Decimal::ZERO;
9699
9700 for je in entries.iter().filter(|je| &je.header.company_code == code) {
9701 for line in &je.lines {
9702 let acct = line.gl_account.as_str();
9703 let net = line.debit_amount - line.credit_amount;
9704 if acct.starts_with('4') {
9705 revenue -= net;
9707 } else if acct.starts_with('6') {
9708 expenses += net;
9710 }
9711 if acct.starts_with('1') {
9713 if let Ok(n) = acct.parse::<u32>() {
9715 if (1000..=1499).contains(&n) {
9716 current_assets += net;
9717 }
9718 }
9719 } else if acct.starts_with('2') {
9720 if let Ok(n) = acct.parse::<u32>() {
9721 if (2000..=2499).contains(&n) {
9722 current_liabs -= net; } else if (2500..=2999).contains(&n) {
9725 total_debt -= net;
9727 }
9728 }
9729 }
9730 }
9731 }
9732
9733 let net_income = revenue - expenses;
9734 let working_capital = current_assets - current_liabs;
9735 let operating_cash_flow = net_income;
9738
9739 GoingConcernInput {
9740 entity_code: code.clone(),
9741 net_income,
9742 working_capital,
9743 operating_cash_flow,
9744 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
9745 assessment_date,
9746 }
9747 })
9748 .collect();
9749
9750 let assessments = if gc_inputs.is_empty() {
9751 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
9752 } else {
9753 gc_gen.generate_for_entities_with_inputs(
9754 &entity_codes,
9755 &gc_inputs,
9756 assessment_date,
9757 &period_label,
9758 )
9759 };
9760 info!(
9761 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
9762 assessments.len(),
9763 assessments.iter().filter(|a| matches!(
9764 a.auditor_conclusion,
9765 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
9766 )).count(),
9767 assessments.iter().filter(|a| matches!(
9768 a.auditor_conclusion,
9769 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
9770 )).count(),
9771 assessments.iter().filter(|a| matches!(
9772 a.auditor_conclusion,
9773 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
9774 )).count(),
9775 );
9776 snapshot.going_concern_assessments = assessments;
9777 }
9778
9779 {
9783 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
9784 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
9785 let entity_codes: Vec<String> = self
9786 .config
9787 .companies
9788 .iter()
9789 .map(|c| c.code.clone())
9790 .collect();
9791 let estimates = est_gen.generate_for_entities(&entity_codes);
9792 info!(
9793 "ISA 540 accounting estimates: {} estimates across {} entities \
9794 ({} with retrospective reviews, {} with auditor point estimates)",
9795 estimates.len(),
9796 entity_codes.len(),
9797 estimates
9798 .iter()
9799 .filter(|e| e.retrospective_review.is_some())
9800 .count(),
9801 estimates
9802 .iter()
9803 .filter(|e| e.auditor_point_estimate.is_some())
9804 .count(),
9805 );
9806 snapshot.accounting_estimates = estimates;
9807 }
9808
9809 {
9813 use datasynth_generators::audit::audit_opinion_generator::{
9814 AuditOpinionGenerator, AuditOpinionInput,
9815 };
9816
9817 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
9818
9819 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
9821 .engagements
9822 .iter()
9823 .map(|eng| {
9824 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
9826 .findings
9827 .iter()
9828 .filter(|f| f.engagement_id == eng.engagement_id)
9829 .cloned()
9830 .collect();
9831
9832 let gc = snapshot
9834 .going_concern_assessments
9835 .iter()
9836 .find(|g| g.entity_code == eng.client_entity_id)
9837 .cloned();
9838
9839 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
9841 snapshot.component_reports.clone();
9842
9843 let auditor = self
9844 .master_data
9845 .employees
9846 .first()
9847 .map(|e| e.display_name.clone())
9848 .unwrap_or_else(|| "Global Audit LLP".into());
9849
9850 let partner = self
9851 .master_data
9852 .employees
9853 .get(1)
9854 .map(|e| e.display_name.clone())
9855 .unwrap_or_else(|| eng.engagement_partner_id.clone());
9856
9857 AuditOpinionInput {
9858 entity_code: eng.client_entity_id.clone(),
9859 entity_name: eng.client_name.clone(),
9860 engagement_id: eng.engagement_id,
9861 period_end: eng.period_end_date,
9862 findings: eng_findings,
9863 going_concern: gc,
9864 component_reports: comp_reports,
9865 is_us_listed: {
9867 let fw = &self.config.audit_standards.isa_compliance.framework;
9868 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
9869 },
9870 auditor_name: auditor,
9871 engagement_partner: partner,
9872 }
9873 })
9874 .collect();
9875
9876 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
9877
9878 for go in &generated_opinions {
9879 snapshot
9880 .key_audit_matters
9881 .extend(go.key_audit_matters.clone());
9882 }
9883 snapshot.audit_opinions = generated_opinions
9884 .into_iter()
9885 .map(|go| go.opinion)
9886 .collect();
9887
9888 info!(
9889 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
9890 snapshot.audit_opinions.len(),
9891 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
9892 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
9893 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
9894 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
9895 );
9896 }
9897
9898 {
9902 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
9903
9904 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
9905
9906 for (i, company) in self.config.companies.iter().enumerate() {
9907 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
9909 .engagements
9910 .iter()
9911 .filter(|e| e.client_entity_id == company.code)
9912 .map(|e| e.engagement_id)
9913 .collect();
9914
9915 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
9916 .findings
9917 .iter()
9918 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
9919 .cloned()
9920 .collect();
9921
9922 let emp_count = self.master_data.employees.len();
9924 let ceo_name = if emp_count > 0 {
9925 self.master_data.employees[i % emp_count]
9926 .display_name
9927 .clone()
9928 } else {
9929 format!("CEO of {}", company.name)
9930 };
9931 let cfo_name = if emp_count > 1 {
9932 self.master_data.employees[(i + 1) % emp_count]
9933 .display_name
9934 .clone()
9935 } else {
9936 format!("CFO of {}", company.name)
9937 };
9938
9939 let materiality = snapshot
9941 .engagements
9942 .iter()
9943 .find(|e| e.client_entity_id == company.code)
9944 .map(|e| e.materiality)
9945 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
9946
9947 let input = SoxGeneratorInput {
9948 company_code: company.code.clone(),
9949 company_name: company.name.clone(),
9950 fiscal_year,
9951 period_end,
9952 findings: company_findings,
9953 ceo_name,
9954 cfo_name,
9955 materiality_threshold: materiality,
9956 revenue_percent: rust_decimal::Decimal::from(100),
9957 assets_percent: rust_decimal::Decimal::from(100),
9958 significant_accounts: vec![
9959 "Revenue".into(),
9960 "Accounts Receivable".into(),
9961 "Inventory".into(),
9962 "Fixed Assets".into(),
9963 "Accounts Payable".into(),
9964 ],
9965 };
9966
9967 let (certs, assessment) = sox_gen.generate(&input);
9968 snapshot.sox_302_certifications.extend(certs);
9969 snapshot.sox_404_assessments.push(assessment);
9970 }
9971
9972 info!(
9973 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
9974 snapshot.sox_302_certifications.len(),
9975 snapshot.sox_404_assessments.len(),
9976 snapshot
9977 .sox_404_assessments
9978 .iter()
9979 .filter(|a| a.icfr_effective)
9980 .count(),
9981 snapshot
9982 .sox_404_assessments
9983 .iter()
9984 .filter(|a| !a.icfr_effective)
9985 .count(),
9986 );
9987 }
9988
9989 {
9993 use datasynth_generators::audit::materiality_generator::{
9994 MaterialityGenerator, MaterialityInput,
9995 };
9996
9997 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
9998
9999 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
10003
10004 for company in &self.config.companies {
10005 let company_code = company.code.clone();
10006
10007 let company_revenue: rust_decimal::Decimal = entries
10009 .iter()
10010 .filter(|e| e.company_code() == company_code)
10011 .flat_map(|e| e.lines.iter())
10012 .filter(|l| l.account_code.starts_with('4'))
10013 .map(|l| l.credit_amount)
10014 .sum();
10015
10016 let total_assets: rust_decimal::Decimal = entries
10018 .iter()
10019 .filter(|e| e.company_code() == company_code)
10020 .flat_map(|e| e.lines.iter())
10021 .filter(|l| l.account_code.starts_with('1'))
10022 .map(|l| l.debit_amount)
10023 .sum();
10024
10025 let total_expenses: rust_decimal::Decimal = entries
10027 .iter()
10028 .filter(|e| e.company_code() == company_code)
10029 .flat_map(|e| e.lines.iter())
10030 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
10031 .map(|l| l.debit_amount)
10032 .sum();
10033
10034 let equity: rust_decimal::Decimal = entries
10036 .iter()
10037 .filter(|e| e.company_code() == company_code)
10038 .flat_map(|e| e.lines.iter())
10039 .filter(|l| l.account_code.starts_with('3'))
10040 .map(|l| l.credit_amount)
10041 .sum();
10042
10043 let pretax_income = company_revenue - total_expenses;
10044
10045 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
10047 let w = rust_decimal::Decimal::try_from(company.volume_weight)
10048 .unwrap_or(rust_decimal::Decimal::ONE);
10049 (
10050 total_revenue * w,
10051 total_revenue * w * rust_decimal::Decimal::from(3),
10052 total_revenue * w * rust_decimal::Decimal::new(1, 1),
10053 total_revenue * w * rust_decimal::Decimal::from(2),
10054 )
10055 } else {
10056 (company_revenue, total_assets, pretax_income, equity)
10057 };
10058
10059 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
10062 entity_code: company_code,
10063 period: format!("FY{}", fiscal_year),
10064 revenue: rev,
10065 pretax_income: pti,
10066 total_assets: assets,
10067 equity: eq,
10068 gross_profit,
10069 });
10070 }
10071
10072 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
10073
10074 info!(
10075 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
10076 {} total assets, {} equity benchmarks)",
10077 snapshot.materiality_calculations.len(),
10078 snapshot
10079 .materiality_calculations
10080 .iter()
10081 .filter(|m| matches!(
10082 m.benchmark,
10083 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
10084 ))
10085 .count(),
10086 snapshot
10087 .materiality_calculations
10088 .iter()
10089 .filter(|m| matches!(
10090 m.benchmark,
10091 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
10092 ))
10093 .count(),
10094 snapshot
10095 .materiality_calculations
10096 .iter()
10097 .filter(|m| matches!(
10098 m.benchmark,
10099 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
10100 ))
10101 .count(),
10102 snapshot
10103 .materiality_calculations
10104 .iter()
10105 .filter(|m| matches!(
10106 m.benchmark,
10107 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
10108 ))
10109 .count(),
10110 );
10111 }
10112
10113 {
10117 use datasynth_generators::audit::cra_generator::CraGenerator;
10118
10119 let mut cra_gen = CraGenerator::new(self.seed + 8315);
10120
10121 for company in &self.config.companies {
10122 let cras = cra_gen.generate_for_entity(&company.code, None);
10123 snapshot.combined_risk_assessments.extend(cras);
10124 }
10125
10126 let significant_count = snapshot
10127 .combined_risk_assessments
10128 .iter()
10129 .filter(|c| c.significant_risk)
10130 .count();
10131 let high_cra_count = snapshot
10132 .combined_risk_assessments
10133 .iter()
10134 .filter(|c| {
10135 matches!(
10136 c.combined_risk,
10137 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
10138 )
10139 })
10140 .count();
10141
10142 info!(
10143 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
10144 snapshot.combined_risk_assessments.len(),
10145 significant_count,
10146 high_cra_count,
10147 );
10148 }
10149
10150 {
10154 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
10155
10156 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
10157
10158 for company in &self.config.companies {
10160 let entity_code = company.code.clone();
10161
10162 let tolerable_error = snapshot
10164 .materiality_calculations
10165 .iter()
10166 .find(|m| m.entity_code == entity_code)
10167 .map(|m| m.tolerable_error);
10168
10169 let entity_cras: Vec<_> = snapshot
10171 .combined_risk_assessments
10172 .iter()
10173 .filter(|c| c.entity_code == entity_code)
10174 .cloned()
10175 .collect();
10176
10177 if !entity_cras.is_empty() {
10178 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
10179 snapshot.sampling_plans.extend(plans);
10180 snapshot.sampled_items.extend(items);
10181 }
10182 }
10183
10184 let misstatement_count = snapshot
10185 .sampled_items
10186 .iter()
10187 .filter(|i| i.misstatement_found)
10188 .count();
10189
10190 info!(
10191 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
10192 snapshot.sampling_plans.len(),
10193 snapshot.sampled_items.len(),
10194 misstatement_count,
10195 );
10196 }
10197
10198 {
10202 use datasynth_generators::audit::scots_generator::{
10203 ScotsGenerator, ScotsGeneratorConfig,
10204 };
10205
10206 let ic_enabled = self.config.intercompany.enabled;
10207
10208 let config = ScotsGeneratorConfig {
10209 intercompany_enabled: ic_enabled,
10210 ..ScotsGeneratorConfig::default()
10211 };
10212 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
10213
10214 for company in &self.config.companies {
10215 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
10216 snapshot
10217 .significant_transaction_classes
10218 .extend(entity_scots);
10219 }
10220
10221 let estimation_count = snapshot
10222 .significant_transaction_classes
10223 .iter()
10224 .filter(|s| {
10225 matches!(
10226 s.transaction_type,
10227 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
10228 )
10229 })
10230 .count();
10231
10232 info!(
10233 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
10234 snapshot.significant_transaction_classes.len(),
10235 estimation_count,
10236 );
10237 }
10238
10239 {
10243 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
10244
10245 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
10246 let entity_codes: Vec<String> = self
10247 .config
10248 .companies
10249 .iter()
10250 .map(|c| c.code.clone())
10251 .collect();
10252 let unusual_flags =
10253 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
10254 info!(
10255 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
10256 unusual_flags.len(),
10257 unusual_flags
10258 .iter()
10259 .filter(|f| matches!(
10260 f.severity,
10261 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
10262 ))
10263 .count(),
10264 unusual_flags
10265 .iter()
10266 .filter(|f| matches!(
10267 f.severity,
10268 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
10269 ))
10270 .count(),
10271 unusual_flags
10272 .iter()
10273 .filter(|f| matches!(
10274 f.severity,
10275 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
10276 ))
10277 .count(),
10278 );
10279 snapshot.unusual_items = unusual_flags;
10280 }
10281
10282 {
10286 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
10287
10288 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
10289 let entity_codes: Vec<String> = self
10290 .config
10291 .companies
10292 .iter()
10293 .map(|c| c.code.clone())
10294 .collect();
10295 let current_period_label = format!("FY{fiscal_year}");
10296 let prior_period_label = format!("FY{}", fiscal_year - 1);
10297 let analytical_rels = ar_gen.generate_for_entities(
10298 &entity_codes,
10299 entries,
10300 ¤t_period_label,
10301 &prior_period_label,
10302 );
10303 let out_of_range = analytical_rels
10304 .iter()
10305 .filter(|r| !r.within_expected_range)
10306 .count();
10307 info!(
10308 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
10309 analytical_rels.len(),
10310 out_of_range,
10311 );
10312 snapshot.analytical_relationships = analytical_rels;
10313 }
10314
10315 if let Some(pb) = pb {
10316 pb.finish_with_message(format!(
10317 "Audit data: {} engagements, {} workpapers, {} evidence, \
10318 {} confirmations, {} procedure steps, {} samples, \
10319 {} analytical, {} IA funcs, {} related parties, \
10320 {} component auditors, {} letters, {} subsequent events, \
10321 {} service orgs, {} going concern, {} accounting estimates, \
10322 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
10323 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
10324 {} unusual items, {} analytical relationships",
10325 snapshot.engagements.len(),
10326 snapshot.workpapers.len(),
10327 snapshot.evidence.len(),
10328 snapshot.confirmations.len(),
10329 snapshot.procedure_steps.len(),
10330 snapshot.samples.len(),
10331 snapshot.analytical_results.len(),
10332 snapshot.ia_functions.len(),
10333 snapshot.related_parties.len(),
10334 snapshot.component_auditors.len(),
10335 snapshot.engagement_letters.len(),
10336 snapshot.subsequent_events.len(),
10337 snapshot.service_organizations.len(),
10338 snapshot.going_concern_assessments.len(),
10339 snapshot.accounting_estimates.len(),
10340 snapshot.audit_opinions.len(),
10341 snapshot.key_audit_matters.len(),
10342 snapshot.sox_302_certifications.len(),
10343 snapshot.sox_404_assessments.len(),
10344 snapshot.materiality_calculations.len(),
10345 snapshot.combined_risk_assessments.len(),
10346 snapshot.sampling_plans.len(),
10347 snapshot.significant_transaction_classes.len(),
10348 snapshot.unusual_items.len(),
10349 snapshot.analytical_relationships.len(),
10350 ));
10351 }
10352
10353 Ok(snapshot)
10354 }
10355
10356 fn export_graphs(
10363 &mut self,
10364 entries: &[JournalEntry],
10365 _coa: &Arc<ChartOfAccounts>,
10366 stats: &mut EnhancedGenerationStatistics,
10367 ) -> SynthResult<GraphExportSnapshot> {
10368 let pb = self.create_progress_bar(100, "Exporting Graphs");
10369
10370 let mut snapshot = GraphExportSnapshot::default();
10371
10372 let output_dir = self
10374 .output_path
10375 .clone()
10376 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
10377 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
10378
10379 for graph_type in &self.config.graph_export.graph_types {
10381 if let Some(pb) = &pb {
10382 pb.inc(10);
10383 }
10384
10385 let graph_config = TransactionGraphConfig {
10387 include_vendors: false,
10388 include_customers: false,
10389 create_debit_credit_edges: true,
10390 include_document_nodes: graph_type.include_document_nodes,
10391 min_edge_weight: graph_type.min_edge_weight,
10392 aggregate_parallel_edges: graph_type.aggregate_edges,
10393 framework: None,
10394 };
10395
10396 let mut builder = TransactionGraphBuilder::new(graph_config);
10397 builder.add_journal_entries(entries);
10398 let graph = builder.build();
10399
10400 stats.graph_node_count += graph.node_count();
10402 stats.graph_edge_count += graph.edge_count();
10403
10404 if let Some(pb) = &pb {
10405 pb.inc(40);
10406 }
10407
10408 for format in &self.config.graph_export.formats {
10410 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
10411
10412 if let Err(e) = std::fs::create_dir_all(&format_dir) {
10414 warn!("Failed to create graph output directory: {}", e);
10415 continue;
10416 }
10417
10418 match format {
10419 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
10420 let pyg_config = PyGExportConfig {
10421 common: datasynth_graph::CommonExportConfig {
10422 export_node_features: true,
10423 export_edge_features: true,
10424 export_node_labels: true,
10425 export_edge_labels: true,
10426 export_masks: true,
10427 train_ratio: self.config.graph_export.train_ratio,
10428 val_ratio: self.config.graph_export.validation_ratio,
10429 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
10430 },
10431 one_hot_categoricals: false,
10432 };
10433
10434 let exporter = PyGExporter::new(pyg_config);
10435 match exporter.export(&graph, &format_dir) {
10436 Ok(metadata) => {
10437 snapshot.exports.insert(
10438 format!("{}_{}", graph_type.name, "pytorch_geometric"),
10439 GraphExportInfo {
10440 name: graph_type.name.clone(),
10441 format: "pytorch_geometric".to_string(),
10442 output_path: format_dir.clone(),
10443 node_count: metadata.num_nodes,
10444 edge_count: metadata.num_edges,
10445 },
10446 );
10447 snapshot.graph_count += 1;
10448 }
10449 Err(e) => {
10450 warn!("Failed to export PyTorch Geometric graph: {}", e);
10451 }
10452 }
10453 }
10454 datasynth_config::schema::GraphExportFormat::Neo4j => {
10455 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
10456
10457 let neo4j_config = Neo4jExportConfig {
10458 export_node_properties: true,
10459 export_edge_properties: true,
10460 export_features: true,
10461 generate_cypher: true,
10462 generate_admin_import: true,
10463 database_name: "synth".to_string(),
10464 cypher_batch_size: 1000,
10465 };
10466
10467 let exporter = Neo4jExporter::new(neo4j_config);
10468 match exporter.export(&graph, &format_dir) {
10469 Ok(metadata) => {
10470 snapshot.exports.insert(
10471 format!("{}_{}", graph_type.name, "neo4j"),
10472 GraphExportInfo {
10473 name: graph_type.name.clone(),
10474 format: "neo4j".to_string(),
10475 output_path: format_dir.clone(),
10476 node_count: metadata.num_nodes,
10477 edge_count: metadata.num_edges,
10478 },
10479 );
10480 snapshot.graph_count += 1;
10481 }
10482 Err(e) => {
10483 warn!("Failed to export Neo4j graph: {}", e);
10484 }
10485 }
10486 }
10487 datasynth_config::schema::GraphExportFormat::Dgl => {
10488 use datasynth_graph::{DGLExportConfig, DGLExporter};
10489
10490 let dgl_config = DGLExportConfig {
10491 common: datasynth_graph::CommonExportConfig {
10492 export_node_features: true,
10493 export_edge_features: true,
10494 export_node_labels: true,
10495 export_edge_labels: true,
10496 export_masks: true,
10497 train_ratio: self.config.graph_export.train_ratio,
10498 val_ratio: self.config.graph_export.validation_ratio,
10499 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
10500 },
10501 heterogeneous: false,
10502 include_pickle_script: true, };
10504
10505 let exporter = DGLExporter::new(dgl_config);
10506 match exporter.export(&graph, &format_dir) {
10507 Ok(metadata) => {
10508 snapshot.exports.insert(
10509 format!("{}_{}", graph_type.name, "dgl"),
10510 GraphExportInfo {
10511 name: graph_type.name.clone(),
10512 format: "dgl".to_string(),
10513 output_path: format_dir.clone(),
10514 node_count: metadata.common.num_nodes,
10515 edge_count: metadata.common.num_edges,
10516 },
10517 );
10518 snapshot.graph_count += 1;
10519 }
10520 Err(e) => {
10521 warn!("Failed to export DGL graph: {}", e);
10522 }
10523 }
10524 }
10525 datasynth_config::schema::GraphExportFormat::RustGraph => {
10526 use datasynth_graph::{
10527 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
10528 };
10529
10530 let rustgraph_config = RustGraphExportConfig {
10531 include_features: true,
10532 include_temporal: true,
10533 include_labels: true,
10534 source_name: "datasynth".to_string(),
10535 batch_id: None,
10536 output_format: RustGraphOutputFormat::JsonLines,
10537 export_node_properties: true,
10538 export_edge_properties: true,
10539 pretty_print: false,
10540 };
10541
10542 let exporter = RustGraphExporter::new(rustgraph_config);
10543 match exporter.export(&graph, &format_dir) {
10544 Ok(metadata) => {
10545 snapshot.exports.insert(
10546 format!("{}_{}", graph_type.name, "rustgraph"),
10547 GraphExportInfo {
10548 name: graph_type.name.clone(),
10549 format: "rustgraph".to_string(),
10550 output_path: format_dir.clone(),
10551 node_count: metadata.num_nodes,
10552 edge_count: metadata.num_edges,
10553 },
10554 );
10555 snapshot.graph_count += 1;
10556 }
10557 Err(e) => {
10558 warn!("Failed to export RustGraph: {}", e);
10559 }
10560 }
10561 }
10562 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
10563 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
10565 }
10566 }
10567 }
10568
10569 if let Some(pb) = &pb {
10570 pb.inc(40);
10571 }
10572 }
10573
10574 stats.graph_export_count = snapshot.graph_count;
10575 snapshot.exported = snapshot.graph_count > 0;
10576
10577 if let Some(pb) = pb {
10578 pb.finish_with_message(format!(
10579 "Graphs exported: {} graphs ({} nodes, {} edges)",
10580 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
10581 ));
10582 }
10583
10584 Ok(snapshot)
10585 }
10586
10587 fn build_additional_graphs(
10592 &self,
10593 banking: &BankingSnapshot,
10594 intercompany: &IntercompanySnapshot,
10595 entries: &[JournalEntry],
10596 stats: &mut EnhancedGenerationStatistics,
10597 ) {
10598 let output_dir = self
10599 .output_path
10600 .clone()
10601 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
10602 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
10603
10604 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
10606 info!("Phase 10c: Building banking network graph");
10607 let config = BankingGraphConfig::default();
10608 let mut builder = BankingGraphBuilder::new(config);
10609 builder.add_customers(&banking.customers);
10610 builder.add_accounts(&banking.accounts, &banking.customers);
10611 builder.add_transactions(&banking.transactions);
10612 let graph = builder.build();
10613
10614 let node_count = graph.node_count();
10615 let edge_count = graph.edge_count();
10616 stats.graph_node_count += node_count;
10617 stats.graph_edge_count += edge_count;
10618
10619 for format in &self.config.graph_export.formats {
10621 if matches!(
10622 format,
10623 datasynth_config::schema::GraphExportFormat::PytorchGeometric
10624 ) {
10625 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
10626 if let Err(e) = std::fs::create_dir_all(&format_dir) {
10627 warn!("Failed to create banking graph output dir: {}", e);
10628 continue;
10629 }
10630 let pyg_config = PyGExportConfig::default();
10631 let exporter = PyGExporter::new(pyg_config);
10632 if let Err(e) = exporter.export(&graph, &format_dir) {
10633 warn!("Failed to export banking graph as PyG: {}", e);
10634 } else {
10635 info!(
10636 "Banking network graph exported: {} nodes, {} edges",
10637 node_count, edge_count
10638 );
10639 }
10640 }
10641 }
10642 }
10643
10644 let approval_entries: Vec<_> = entries
10646 .iter()
10647 .filter(|je| je.header.approval_workflow.is_some())
10648 .collect();
10649
10650 if !approval_entries.is_empty() {
10651 info!(
10652 "Phase 10c: Building approval network graph ({} entries with approvals)",
10653 approval_entries.len()
10654 );
10655 let config = ApprovalGraphConfig::default();
10656 let mut builder = ApprovalGraphBuilder::new(config);
10657
10658 for je in &approval_entries {
10659 if let Some(ref wf) = je.header.approval_workflow {
10660 for action in &wf.actions {
10661 let record = datasynth_core::models::ApprovalRecord {
10662 approval_id: format!(
10663 "APR-{}-{}",
10664 je.header.document_id, action.approval_level
10665 ),
10666 document_number: je.header.document_id.to_string(),
10667 document_type: "JE".to_string(),
10668 company_code: je.company_code().to_string(),
10669 requester_id: wf.preparer_id.clone(),
10670 requester_name: Some(wf.preparer_name.clone()),
10671 approver_id: action.actor_id.clone(),
10672 approver_name: action.actor_name.clone(),
10673 approval_date: je.posting_date(),
10674 action: format!("{:?}", action.action),
10675 amount: wf.amount,
10676 approval_limit: None,
10677 comments: action.comments.clone(),
10678 delegation_from: None,
10679 is_auto_approved: false,
10680 };
10681 builder.add_approval(&record);
10682 }
10683 }
10684 }
10685
10686 let graph = builder.build();
10687 let node_count = graph.node_count();
10688 let edge_count = graph.edge_count();
10689 stats.graph_node_count += node_count;
10690 stats.graph_edge_count += edge_count;
10691
10692 for format in &self.config.graph_export.formats {
10694 if matches!(
10695 format,
10696 datasynth_config::schema::GraphExportFormat::PytorchGeometric
10697 ) {
10698 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
10699 if let Err(e) = std::fs::create_dir_all(&format_dir) {
10700 warn!("Failed to create approval graph output dir: {}", e);
10701 continue;
10702 }
10703 let pyg_config = PyGExportConfig::default();
10704 let exporter = PyGExporter::new(pyg_config);
10705 if let Err(e) = exporter.export(&graph, &format_dir) {
10706 warn!("Failed to export approval graph as PyG: {}", e);
10707 } else {
10708 info!(
10709 "Approval network graph exported: {} nodes, {} edges",
10710 node_count, edge_count
10711 );
10712 }
10713 }
10714 }
10715 }
10716
10717 if self.config.companies.len() >= 2 {
10719 info!(
10720 "Phase 10c: Building entity relationship graph ({} companies)",
10721 self.config.companies.len()
10722 );
10723
10724 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10725 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
10726
10727 let parent_code = &self.config.companies[0].code;
10729 let mut companies: Vec<datasynth_core::models::Company> =
10730 Vec::with_capacity(self.config.companies.len());
10731
10732 let first = &self.config.companies[0];
10734 companies.push(datasynth_core::models::Company::parent(
10735 &first.code,
10736 &first.name,
10737 &first.country,
10738 &first.currency,
10739 ));
10740
10741 for cc in self.config.companies.iter().skip(1) {
10743 companies.push(datasynth_core::models::Company::subsidiary(
10744 &cc.code,
10745 &cc.name,
10746 &cc.country,
10747 &cc.currency,
10748 parent_code,
10749 rust_decimal::Decimal::from(100),
10750 ));
10751 }
10752
10753 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
10755 self.config
10756 .companies
10757 .iter()
10758 .skip(1)
10759 .enumerate()
10760 .map(|(i, cc)| {
10761 let mut rel =
10762 datasynth_core::models::intercompany::IntercompanyRelationship::new(
10763 format!("REL{:03}", i + 1),
10764 parent_code.clone(),
10765 cc.code.clone(),
10766 rust_decimal::Decimal::from(100),
10767 start_date,
10768 );
10769 rel.functional_currency = cc.currency.clone();
10770 rel
10771 })
10772 .collect();
10773
10774 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
10775 builder.add_companies(&companies);
10776 builder.add_ownership_relationships(&relationships);
10777
10778 for pair in &intercompany.matched_pairs {
10780 builder.add_intercompany_edge(
10781 &pair.seller_company,
10782 &pair.buyer_company,
10783 pair.amount,
10784 &format!("{:?}", pair.transaction_type),
10785 );
10786 }
10787
10788 let graph = builder.build();
10789 let node_count = graph.node_count();
10790 let edge_count = graph.edge_count();
10791 stats.graph_node_count += node_count;
10792 stats.graph_edge_count += edge_count;
10793
10794 for format in &self.config.graph_export.formats {
10796 if matches!(
10797 format,
10798 datasynth_config::schema::GraphExportFormat::PytorchGeometric
10799 ) {
10800 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
10801 if let Err(e) = std::fs::create_dir_all(&format_dir) {
10802 warn!("Failed to create entity graph output dir: {}", e);
10803 continue;
10804 }
10805 let pyg_config = PyGExportConfig::default();
10806 let exporter = PyGExporter::new(pyg_config);
10807 if let Err(e) = exporter.export(&graph, &format_dir) {
10808 warn!("Failed to export entity graph as PyG: {}", e);
10809 } else {
10810 info!(
10811 "Entity relationship graph exported: {} nodes, {} edges",
10812 node_count, edge_count
10813 );
10814 }
10815 }
10816 }
10817 } else {
10818 debug!(
10819 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
10820 self.config.companies.len()
10821 );
10822 }
10823 }
10824
10825 #[allow(clippy::too_many_arguments)]
10832 fn export_hypergraph(
10833 &self,
10834 coa: &Arc<ChartOfAccounts>,
10835 entries: &[JournalEntry],
10836 document_flows: &DocumentFlowSnapshot,
10837 sourcing: &SourcingSnapshot,
10838 hr: &HrSnapshot,
10839 manufacturing: &ManufacturingSnapshot,
10840 banking: &BankingSnapshot,
10841 audit: &AuditSnapshot,
10842 financial_reporting: &FinancialReportingSnapshot,
10843 ocpm: &OcpmSnapshot,
10844 compliance: &ComplianceRegulationsSnapshot,
10845 stats: &mut EnhancedGenerationStatistics,
10846 ) -> SynthResult<HypergraphExportInfo> {
10847 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
10848 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
10849 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
10850 use datasynth_graph::models::hypergraph::AggregationStrategy;
10851
10852 let hg_settings = &self.config.graph_export.hypergraph;
10853
10854 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
10856 "truncate" => AggregationStrategy::Truncate,
10857 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
10858 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
10859 "importance_sample" => AggregationStrategy::ImportanceSample,
10860 _ => AggregationStrategy::PoolByCounterparty,
10861 };
10862
10863 let builder_config = HypergraphConfig {
10864 max_nodes: hg_settings.max_nodes,
10865 aggregation_strategy,
10866 include_coso: hg_settings.governance_layer.include_coso,
10867 include_controls: hg_settings.governance_layer.include_controls,
10868 include_sox: hg_settings.governance_layer.include_sox,
10869 include_vendors: hg_settings.governance_layer.include_vendors,
10870 include_customers: hg_settings.governance_layer.include_customers,
10871 include_employees: hg_settings.governance_layer.include_employees,
10872 include_p2p: hg_settings.process_layer.include_p2p,
10873 include_o2c: hg_settings.process_layer.include_o2c,
10874 include_s2c: hg_settings.process_layer.include_s2c,
10875 include_h2r: hg_settings.process_layer.include_h2r,
10876 include_mfg: hg_settings.process_layer.include_mfg,
10877 include_bank: hg_settings.process_layer.include_bank,
10878 include_audit: hg_settings.process_layer.include_audit,
10879 include_r2r: hg_settings.process_layer.include_r2r,
10880 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
10881 docs_per_counterparty_threshold: hg_settings
10882 .process_layer
10883 .docs_per_counterparty_threshold,
10884 include_accounts: hg_settings.accounting_layer.include_accounts,
10885 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
10886 include_cross_layer_edges: hg_settings.cross_layer.enabled,
10887 include_compliance: self.config.compliance_regulations.enabled,
10888 include_tax: true,
10889 include_treasury: true,
10890 include_esg: true,
10891 include_project: true,
10892 include_intercompany: true,
10893 include_temporal_events: true,
10894 };
10895
10896 let mut builder = HypergraphBuilder::new(builder_config);
10897
10898 builder.add_coso_framework();
10900
10901 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
10904 let controls = InternalControl::standard_controls();
10905 builder.add_controls(&controls);
10906 }
10907
10908 builder.add_vendors(&self.master_data.vendors);
10910 builder.add_customers(&self.master_data.customers);
10911 builder.add_employees(&self.master_data.employees);
10912
10913 builder.add_p2p_documents(
10915 &document_flows.purchase_orders,
10916 &document_flows.goods_receipts,
10917 &document_flows.vendor_invoices,
10918 &document_flows.payments,
10919 );
10920 builder.add_o2c_documents(
10921 &document_flows.sales_orders,
10922 &document_flows.deliveries,
10923 &document_flows.customer_invoices,
10924 );
10925 builder.add_s2c_documents(
10926 &sourcing.sourcing_projects,
10927 &sourcing.qualifications,
10928 &sourcing.rfx_events,
10929 &sourcing.bids,
10930 &sourcing.bid_evaluations,
10931 &sourcing.contracts,
10932 );
10933 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
10934 builder.add_mfg_documents(
10935 &manufacturing.production_orders,
10936 &manufacturing.quality_inspections,
10937 &manufacturing.cycle_counts,
10938 );
10939 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
10940 builder.add_audit_documents(
10941 &audit.engagements,
10942 &audit.workpapers,
10943 &audit.findings,
10944 &audit.evidence,
10945 &audit.risk_assessments,
10946 &audit.judgments,
10947 );
10948 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
10949
10950 if let Some(ref event_log) = ocpm.event_log {
10952 builder.add_ocpm_events(event_log);
10953 }
10954
10955 if self.config.compliance_regulations.enabled
10957 && hg_settings.governance_layer.include_controls
10958 {
10959 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
10961 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
10962 .standard_records
10963 .iter()
10964 .filter_map(|r| {
10965 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
10966 registry.get(&sid).cloned()
10967 })
10968 .collect();
10969
10970 builder.add_compliance_regulations(
10971 &standards,
10972 &compliance.findings,
10973 &compliance.filings,
10974 );
10975 }
10976
10977 builder.add_accounts(coa);
10979 builder.add_journal_entries_as_hyperedges(entries);
10980
10981 let hypergraph = builder.build();
10983
10984 let output_dir = self
10986 .output_path
10987 .clone()
10988 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
10989 let hg_dir = output_dir
10990 .join(&self.config.graph_export.output_subdirectory)
10991 .join(&hg_settings.output_subdirectory);
10992
10993 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
10995 "unified" => {
10996 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
10997 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
10998 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
10999 })?;
11000 (
11001 metadata.num_nodes,
11002 metadata.num_edges,
11003 metadata.num_hyperedges,
11004 )
11005 }
11006 _ => {
11007 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
11009 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
11010 SynthError::generation(format!("Hypergraph export failed: {e}"))
11011 })?;
11012 (
11013 metadata.num_nodes,
11014 metadata.num_edges,
11015 metadata.num_hyperedges,
11016 )
11017 }
11018 };
11019
11020 #[cfg(feature = "streaming")]
11022 if let Some(ref target_url) = hg_settings.stream_target {
11023 use crate::stream_client::{StreamClient, StreamConfig};
11024 use std::io::Write as _;
11025
11026 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
11027 let stream_config = StreamConfig {
11028 target_url: target_url.clone(),
11029 batch_size: hg_settings.stream_batch_size,
11030 api_key,
11031 ..StreamConfig::default()
11032 };
11033
11034 match StreamClient::new(stream_config) {
11035 Ok(mut client) => {
11036 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
11037 match exporter.export_to_writer(&hypergraph, &mut client) {
11038 Ok(_) => {
11039 if let Err(e) = client.flush() {
11040 warn!("Failed to flush stream client: {}", e);
11041 } else {
11042 info!("Streamed {} records to {}", client.total_sent(), target_url);
11043 }
11044 }
11045 Err(e) => {
11046 warn!("Streaming export failed: {}", e);
11047 }
11048 }
11049 }
11050 Err(e) => {
11051 warn!("Failed to create stream client: {}", e);
11052 }
11053 }
11054 }
11055
11056 stats.graph_node_count += num_nodes;
11058 stats.graph_edge_count += num_edges;
11059 stats.graph_export_count += 1;
11060
11061 Ok(HypergraphExportInfo {
11062 node_count: num_nodes,
11063 edge_count: num_edges,
11064 hyperedge_count: num_hyperedges,
11065 output_path: hg_dir,
11066 })
11067 }
11068
11069 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
11074 let pb = self.create_progress_bar(100, "Generating Banking Data");
11075
11076 let orchestrator = BankingOrchestratorBuilder::new()
11078 .config(self.config.banking.clone())
11079 .seed(self.seed + 9000)
11080 .country_pack(self.primary_pack().clone())
11081 .build();
11082
11083 if let Some(pb) = &pb {
11084 pb.inc(10);
11085 }
11086
11087 let result = orchestrator.generate();
11089
11090 if let Some(pb) = &pb {
11091 pb.inc(90);
11092 pb.finish_with_message(format!(
11093 "Banking: {} customers, {} transactions",
11094 result.customers.len(),
11095 result.transactions.len()
11096 ));
11097 }
11098
11099 let mut banking_customers = result.customers;
11104 let core_customers = &self.master_data.customers;
11105 if !core_customers.is_empty() {
11106 for (i, bc) in banking_customers.iter_mut().enumerate() {
11107 let core = &core_customers[i % core_customers.len()];
11108 bc.name = CustomerName::business(&core.name);
11109 bc.residence_country = core.country.clone();
11110 bc.enterprise_customer_id = Some(core.customer_id.clone());
11111 }
11112 debug!(
11113 "Cross-referenced {} banking customers with {} core customers",
11114 banking_customers.len(),
11115 core_customers.len()
11116 );
11117 }
11118
11119 Ok(BankingSnapshot {
11120 customers: banking_customers,
11121 accounts: result.accounts,
11122 transactions: result.transactions,
11123 transaction_labels: result.transaction_labels,
11124 customer_labels: result.customer_labels,
11125 account_labels: result.account_labels,
11126 relationship_labels: result.relationship_labels,
11127 narratives: result.narratives,
11128 suspicious_count: result.stats.suspicious_count,
11129 scenario_count: result.scenarios.len(),
11130 })
11131 }
11132
11133 fn calculate_total_transactions(&self) -> u64 {
11135 let months = self.config.global.period_months as f64;
11136 self.config
11137 .companies
11138 .iter()
11139 .map(|c| {
11140 let annual = c.annual_transaction_volume.count() as f64;
11141 let weighted = annual * c.volume_weight;
11142 (weighted * months / 12.0) as u64
11143 })
11144 .sum()
11145 }
11146
11147 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
11149 if !self.phase_config.show_progress {
11150 return None;
11151 }
11152
11153 let pb = if let Some(mp) = &self.multi_progress {
11154 mp.add(ProgressBar::new(total))
11155 } else {
11156 ProgressBar::new(total)
11157 };
11158
11159 pb.set_style(
11160 ProgressStyle::default_bar()
11161 .template(&format!(
11162 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
11163 ))
11164 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
11165 .progress_chars("#>-"),
11166 );
11167
11168 Some(pb)
11169 }
11170
11171 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
11173 self.coa.clone()
11174 }
11175
11176 pub fn get_master_data(&self) -> &MasterDataSnapshot {
11178 &self.master_data
11179 }
11180
11181 fn phase_compliance_regulations(
11183 &mut self,
11184 _stats: &mut EnhancedGenerationStatistics,
11185 ) -> SynthResult<ComplianceRegulationsSnapshot> {
11186 if !self.phase_config.generate_compliance_regulations {
11187 return Ok(ComplianceRegulationsSnapshot::default());
11188 }
11189
11190 info!("Phase: Generating Compliance Regulations Data");
11191
11192 let cr_config = &self.config.compliance_regulations;
11193
11194 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
11196 self.config
11197 .companies
11198 .iter()
11199 .map(|c| c.country.clone())
11200 .collect::<std::collections::HashSet<_>>()
11201 .into_iter()
11202 .collect()
11203 } else {
11204 cr_config.jurisdictions.clone()
11205 };
11206
11207 let fallback_date =
11209 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
11210 let reference_date = cr_config
11211 .reference_date
11212 .as_ref()
11213 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
11214 .unwrap_or_else(|| {
11215 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11216 .unwrap_or(fallback_date)
11217 });
11218
11219 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
11221 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
11222 let cross_reference_records = reg_gen.generate_cross_reference_records();
11223 let jurisdiction_records =
11224 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
11225
11226 info!(
11227 " Standards: {} records, {} cross-references, {} jurisdictions",
11228 standard_records.len(),
11229 cross_reference_records.len(),
11230 jurisdiction_records.len()
11231 );
11232
11233 let audit_procedures = if cr_config.audit_procedures.enabled {
11235 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
11236 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
11237 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
11238 confidence_level: cr_config.audit_procedures.confidence_level,
11239 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
11240 };
11241 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
11242 self.seed + 9000,
11243 proc_config,
11244 );
11245 let registry = reg_gen.registry();
11246 let mut all_procs = Vec::new();
11247 for jurisdiction in &jurisdictions {
11248 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
11249 all_procs.extend(procs);
11250 }
11251 info!(" Audit procedures: {}", all_procs.len());
11252 all_procs
11253 } else {
11254 Vec::new()
11255 };
11256
11257 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
11259 let finding_config =
11260 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
11261 finding_rate: cr_config.findings.finding_rate,
11262 material_weakness_rate: cr_config.findings.material_weakness_rate,
11263 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
11264 generate_remediation: cr_config.findings.generate_remediation,
11265 };
11266 let mut finding_gen =
11267 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
11268 self.seed + 9100,
11269 finding_config,
11270 );
11271 let mut all_findings = Vec::new();
11272 for company in &self.config.companies {
11273 let company_findings =
11274 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
11275 all_findings.extend(company_findings);
11276 }
11277 info!(" Compliance findings: {}", all_findings.len());
11278 all_findings
11279 } else {
11280 Vec::new()
11281 };
11282
11283 let filings = if cr_config.filings.enabled {
11285 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
11286 filing_types: cr_config.filings.filing_types.clone(),
11287 generate_status_progression: cr_config.filings.generate_status_progression,
11288 };
11289 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
11290 self.seed + 9200,
11291 filing_config,
11292 );
11293 let company_codes: Vec<String> = self
11294 .config
11295 .companies
11296 .iter()
11297 .map(|c| c.code.clone())
11298 .collect();
11299 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11300 .unwrap_or(fallback_date);
11301 let filings = filing_gen.generate_filings(
11302 &company_codes,
11303 &jurisdictions,
11304 start_date,
11305 self.config.global.period_months,
11306 );
11307 info!(" Regulatory filings: {}", filings.len());
11308 filings
11309 } else {
11310 Vec::new()
11311 };
11312
11313 let compliance_graph = if cr_config.graph.enabled {
11315 let graph_config = datasynth_graph::ComplianceGraphConfig {
11316 include_standard_nodes: cr_config.graph.include_compliance_nodes,
11317 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
11318 include_cross_references: cr_config.graph.include_cross_references,
11319 include_supersession_edges: cr_config.graph.include_supersession_edges,
11320 include_account_links: cr_config.graph.include_account_links,
11321 include_control_links: cr_config.graph.include_control_links,
11322 include_company_links: cr_config.graph.include_company_links,
11323 };
11324 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
11325
11326 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
11328 .iter()
11329 .map(|r| datasynth_graph::StandardNodeInput {
11330 standard_id: r.standard_id.clone(),
11331 title: r.title.clone(),
11332 category: r.category.clone(),
11333 domain: r.domain.clone(),
11334 is_active: r.is_active,
11335 features: vec![if r.is_active { 1.0 } else { 0.0 }],
11336 applicable_account_types: r.applicable_account_types.clone(),
11337 applicable_processes: r.applicable_processes.clone(),
11338 })
11339 .collect();
11340 builder.add_standards(&standard_inputs);
11341
11342 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
11344 jurisdiction_records
11345 .iter()
11346 .map(|r| datasynth_graph::JurisdictionNodeInput {
11347 country_code: r.country_code.clone(),
11348 country_name: r.country_name.clone(),
11349 framework: r.accounting_framework.clone(),
11350 standard_count: r.standard_count,
11351 tax_rate: r.statutory_tax_rate,
11352 })
11353 .collect();
11354 builder.add_jurisdictions(&jurisdiction_inputs);
11355
11356 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
11358 cross_reference_records
11359 .iter()
11360 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
11361 from_standard: r.from_standard.clone(),
11362 to_standard: r.to_standard.clone(),
11363 relationship: r.relationship.clone(),
11364 convergence_level: r.convergence_level,
11365 })
11366 .collect();
11367 builder.add_cross_references(&xref_inputs);
11368
11369 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
11371 .iter()
11372 .map(|r| datasynth_graph::JurisdictionMappingInput {
11373 country_code: r.jurisdiction.clone(),
11374 standard_id: r.standard_id.clone(),
11375 })
11376 .collect();
11377 builder.add_jurisdiction_mappings(&mapping_inputs);
11378
11379 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
11381 .iter()
11382 .map(|p| datasynth_graph::ProcedureNodeInput {
11383 procedure_id: p.procedure_id.clone(),
11384 standard_id: p.standard_id.clone(),
11385 procedure_type: p.procedure_type.clone(),
11386 sample_size: p.sample_size,
11387 confidence_level: p.confidence_level,
11388 })
11389 .collect();
11390 builder.add_procedures(&proc_inputs);
11391
11392 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
11394 .iter()
11395 .map(|f| datasynth_graph::FindingNodeInput {
11396 finding_id: f.finding_id.to_string(),
11397 standard_id: f
11398 .related_standards
11399 .first()
11400 .map(|s| s.as_str().to_string())
11401 .unwrap_or_default(),
11402 severity: f.severity.to_string(),
11403 deficiency_level: f.deficiency_level.to_string(),
11404 severity_score: f.deficiency_level.severity_score(),
11405 control_id: f.control_id.clone(),
11406 affected_accounts: f.affected_accounts.clone(),
11407 })
11408 .collect();
11409 builder.add_findings(&finding_inputs);
11410
11411 if cr_config.graph.include_account_links {
11413 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
11414 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
11415 for std_record in &standard_records {
11416 if let Some(std_obj) =
11417 registry.get(&datasynth_core::models::compliance::StandardId::parse(
11418 &std_record.standard_id,
11419 ))
11420 {
11421 for acct_type in &std_obj.applicable_account_types {
11422 account_links.push(datasynth_graph::AccountLinkInput {
11423 standard_id: std_record.standard_id.clone(),
11424 account_code: acct_type.clone(),
11425 account_name: acct_type.clone(),
11426 });
11427 }
11428 }
11429 }
11430 builder.add_account_links(&account_links);
11431 }
11432
11433 if cr_config.graph.include_control_links {
11435 let mut control_links = Vec::new();
11436 let sox_like_ids: Vec<String> = standard_records
11438 .iter()
11439 .filter(|r| {
11440 r.standard_id.starts_with("SOX")
11441 || r.standard_id.starts_with("PCAOB-AS-2201")
11442 })
11443 .map(|r| r.standard_id.clone())
11444 .collect();
11445 let control_ids = [
11447 ("C001", "Cash Controls"),
11448 ("C002", "Large Transaction Approval"),
11449 ("C010", "PO Approval"),
11450 ("C011", "Three-Way Match"),
11451 ("C020", "Revenue Recognition"),
11452 ("C021", "Credit Check"),
11453 ("C030", "Manual JE Approval"),
11454 ("C031", "Period Close Review"),
11455 ("C032", "Account Reconciliation"),
11456 ("C040", "Payroll Processing"),
11457 ("C050", "Fixed Asset Capitalization"),
11458 ("C060", "Intercompany Elimination"),
11459 ];
11460 for sox_id in &sox_like_ids {
11461 for (ctrl_id, ctrl_name) in &control_ids {
11462 control_links.push(datasynth_graph::ControlLinkInput {
11463 standard_id: sox_id.clone(),
11464 control_id: ctrl_id.to_string(),
11465 control_name: ctrl_name.to_string(),
11466 });
11467 }
11468 }
11469 builder.add_control_links(&control_links);
11470 }
11471
11472 if cr_config.graph.include_company_links {
11474 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
11475 .iter()
11476 .enumerate()
11477 .map(|(i, f)| datasynth_graph::FilingNodeInput {
11478 filing_id: format!("F{:04}", i + 1),
11479 filing_type: f.filing_type.to_string(),
11480 company_code: f.company_code.clone(),
11481 jurisdiction: f.jurisdiction.clone(),
11482 status: format!("{:?}", f.status),
11483 })
11484 .collect();
11485 builder.add_filings(&filing_inputs);
11486 }
11487
11488 let graph = builder.build();
11489 info!(
11490 " Compliance graph: {} nodes, {} edges",
11491 graph.nodes.len(),
11492 graph.edges.len()
11493 );
11494 Some(graph)
11495 } else {
11496 None
11497 };
11498
11499 self.check_resources_with_log("post-compliance-regulations")?;
11500
11501 Ok(ComplianceRegulationsSnapshot {
11502 standard_records,
11503 cross_reference_records,
11504 jurisdiction_records,
11505 audit_procedures,
11506 findings,
11507 filings,
11508 compliance_graph,
11509 })
11510 }
11511
11512 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
11514 use super::lineage::LineageGraphBuilder;
11515
11516 let mut builder = LineageGraphBuilder::new();
11517
11518 builder.add_config_section("config:global", "Global Config");
11520 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
11521 builder.add_config_section("config:transactions", "Transaction Config");
11522
11523 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
11525 builder.add_generator_phase("phase:je", "Journal Entry Generation");
11526
11527 builder.configured_by("phase:coa", "config:chart_of_accounts");
11529 builder.configured_by("phase:je", "config:transactions");
11530
11531 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
11533 builder.produced_by("output:je", "phase:je");
11534
11535 if self.phase_config.generate_master_data {
11537 builder.add_config_section("config:master_data", "Master Data Config");
11538 builder.add_generator_phase("phase:master_data", "Master Data Generation");
11539 builder.configured_by("phase:master_data", "config:master_data");
11540 builder.input_to("phase:master_data", "phase:je");
11541 }
11542
11543 if self.phase_config.generate_document_flows {
11544 builder.add_config_section("config:document_flows", "Document Flow Config");
11545 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
11546 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
11547 builder.configured_by("phase:p2p", "config:document_flows");
11548 builder.configured_by("phase:o2c", "config:document_flows");
11549
11550 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
11551 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
11552 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
11553 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
11554 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
11555
11556 builder.produced_by("output:po", "phase:p2p");
11557 builder.produced_by("output:gr", "phase:p2p");
11558 builder.produced_by("output:vi", "phase:p2p");
11559 builder.produced_by("output:so", "phase:o2c");
11560 builder.produced_by("output:ci", "phase:o2c");
11561 }
11562
11563 if self.phase_config.inject_anomalies {
11564 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
11565 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
11566 builder.configured_by("phase:anomaly", "config:fraud");
11567 builder.add_output_file(
11568 "output:labels",
11569 "Anomaly Labels",
11570 "labels/anomaly_labels.csv",
11571 );
11572 builder.produced_by("output:labels", "phase:anomaly");
11573 }
11574
11575 if self.phase_config.generate_audit {
11576 builder.add_config_section("config:audit", "Audit Config");
11577 builder.add_generator_phase("phase:audit", "Audit Data Generation");
11578 builder.configured_by("phase:audit", "config:audit");
11579 }
11580
11581 if self.phase_config.generate_banking {
11582 builder.add_config_section("config:banking", "Banking Config");
11583 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
11584 builder.configured_by("phase:banking", "config:banking");
11585 }
11586
11587 if self.config.llm.enabled {
11588 builder.add_config_section("config:llm", "LLM Enrichment Config");
11589 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
11590 builder.configured_by("phase:llm_enrichment", "config:llm");
11591 }
11592
11593 if self.config.diffusion.enabled {
11594 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
11595 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
11596 builder.configured_by("phase:diffusion", "config:diffusion");
11597 }
11598
11599 if self.config.causal.enabled {
11600 builder.add_config_section("config:causal", "Causal Generation Config");
11601 builder.add_generator_phase("phase:causal", "Causal Overlay");
11602 builder.configured_by("phase:causal", "config:causal");
11603 }
11604
11605 builder.build()
11606 }
11607
11608 fn compute_company_revenue(
11617 entries: &[JournalEntry],
11618 company_code: &str,
11619 ) -> rust_decimal::Decimal {
11620 use rust_decimal::Decimal;
11621 let mut revenue = Decimal::ZERO;
11622 for je in entries {
11623 if je.header.company_code != company_code {
11624 continue;
11625 }
11626 for line in &je.lines {
11627 if line.gl_account.starts_with('4') {
11628 revenue += line.credit_amount - line.debit_amount;
11630 }
11631 }
11632 }
11633 revenue.max(Decimal::ZERO)
11634 }
11635
11636 fn compute_entity_net_assets(
11640 entries: &[JournalEntry],
11641 entity_code: &str,
11642 ) -> rust_decimal::Decimal {
11643 use rust_decimal::Decimal;
11644 let mut asset_net = Decimal::ZERO;
11645 let mut liability_net = Decimal::ZERO;
11646 for je in entries {
11647 if je.header.company_code != entity_code {
11648 continue;
11649 }
11650 for line in &je.lines {
11651 if line.gl_account.starts_with('1') {
11652 asset_net += line.debit_amount - line.credit_amount;
11653 } else if line.gl_account.starts_with('2') {
11654 liability_net += line.credit_amount - line.debit_amount;
11655 }
11656 }
11657 }
11658 asset_net - liability_net
11659 }
11660}
11661
11662fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
11664 match format {
11665 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
11666 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
11667 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
11668 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
11669 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
11670 }
11671}
11672
11673#[cfg(test)]
11674#[allow(clippy::unwrap_used)]
11675mod tests {
11676 use super::*;
11677 use datasynth_config::schema::*;
11678
11679 fn create_test_config() -> GeneratorConfig {
11680 GeneratorConfig {
11681 global: GlobalConfig {
11682 industry: IndustrySector::Manufacturing,
11683 start_date: "2024-01-01".to_string(),
11684 period_months: 1,
11685 seed: Some(42),
11686 parallel: false,
11687 group_currency: "USD".to_string(),
11688 presentation_currency: None,
11689 worker_threads: 0,
11690 memory_limit_mb: 0,
11691 fiscal_year_months: None,
11692 },
11693 companies: vec![CompanyConfig {
11694 code: "1000".to_string(),
11695 name: "Test Company".to_string(),
11696 currency: "USD".to_string(),
11697 functional_currency: None,
11698 country: "US".to_string(),
11699 annual_transaction_volume: TransactionVolume::TenK,
11700 volume_weight: 1.0,
11701 fiscal_year_variant: "K4".to_string(),
11702 }],
11703 chart_of_accounts: ChartOfAccountsConfig {
11704 complexity: CoAComplexity::Small,
11705 industry_specific: true,
11706 custom_accounts: None,
11707 min_hierarchy_depth: 2,
11708 max_hierarchy_depth: 4,
11709 },
11710 transactions: TransactionConfig::default(),
11711 output: OutputConfig::default(),
11712 fraud: FraudConfig::default(),
11713 internal_controls: InternalControlsConfig::default(),
11714 business_processes: BusinessProcessConfig::default(),
11715 user_personas: UserPersonaConfig::default(),
11716 templates: TemplateConfig::default(),
11717 approval: ApprovalConfig::default(),
11718 departments: DepartmentConfig::default(),
11719 master_data: MasterDataConfig::default(),
11720 document_flows: DocumentFlowConfig::default(),
11721 intercompany: IntercompanyConfig::default(),
11722 balance: BalanceConfig::default(),
11723 ocpm: OcpmConfig::default(),
11724 audit: AuditGenerationConfig::default(),
11725 banking: datasynth_banking::BankingConfig::default(),
11726 data_quality: DataQualitySchemaConfig::default(),
11727 scenario: ScenarioConfig::default(),
11728 temporal: TemporalDriftConfig::default(),
11729 graph_export: GraphExportConfig::default(),
11730 streaming: StreamingSchemaConfig::default(),
11731 rate_limit: RateLimitSchemaConfig::default(),
11732 temporal_attributes: TemporalAttributeSchemaConfig::default(),
11733 relationships: RelationshipSchemaConfig::default(),
11734 accounting_standards: AccountingStandardsConfig::default(),
11735 audit_standards: AuditStandardsConfig::default(),
11736 distributions: Default::default(),
11737 temporal_patterns: Default::default(),
11738 vendor_network: VendorNetworkSchemaConfig::default(),
11739 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
11740 relationship_strength: RelationshipStrengthSchemaConfig::default(),
11741 cross_process_links: CrossProcessLinksSchemaConfig::default(),
11742 organizational_events: OrganizationalEventsSchemaConfig::default(),
11743 behavioral_drift: BehavioralDriftSchemaConfig::default(),
11744 market_drift: MarketDriftSchemaConfig::default(),
11745 drift_labeling: DriftLabelingSchemaConfig::default(),
11746 anomaly_injection: Default::default(),
11747 industry_specific: Default::default(),
11748 fingerprint_privacy: Default::default(),
11749 quality_gates: Default::default(),
11750 compliance: Default::default(),
11751 webhooks: Default::default(),
11752 llm: Default::default(),
11753 diffusion: Default::default(),
11754 causal: Default::default(),
11755 source_to_pay: Default::default(),
11756 financial_reporting: Default::default(),
11757 hr: Default::default(),
11758 manufacturing: Default::default(),
11759 sales_quotes: Default::default(),
11760 tax: Default::default(),
11761 treasury: Default::default(),
11762 project_accounting: Default::default(),
11763 esg: Default::default(),
11764 country_packs: None,
11765 scenarios: Default::default(),
11766 session: Default::default(),
11767 compliance_regulations: Default::default(),
11768 }
11769 }
11770
11771 #[test]
11772 fn test_enhanced_orchestrator_creation() {
11773 let config = create_test_config();
11774 let orchestrator = EnhancedOrchestrator::with_defaults(config);
11775 assert!(orchestrator.is_ok());
11776 }
11777
11778 #[test]
11779 fn test_minimal_generation() {
11780 let config = create_test_config();
11781 let phase_config = PhaseConfig {
11782 generate_master_data: false,
11783 generate_document_flows: false,
11784 generate_journal_entries: true,
11785 inject_anomalies: false,
11786 show_progress: false,
11787 ..Default::default()
11788 };
11789
11790 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11791 let result = orchestrator.generate();
11792
11793 assert!(result.is_ok());
11794 let result = result.unwrap();
11795 assert!(!result.journal_entries.is_empty());
11796 }
11797
11798 #[test]
11799 fn test_master_data_generation() {
11800 let config = create_test_config();
11801 let phase_config = PhaseConfig {
11802 generate_master_data: true,
11803 generate_document_flows: false,
11804 generate_journal_entries: false,
11805 inject_anomalies: false,
11806 show_progress: false,
11807 vendors_per_company: 5,
11808 customers_per_company: 5,
11809 materials_per_company: 10,
11810 assets_per_company: 5,
11811 employees_per_company: 10,
11812 ..Default::default()
11813 };
11814
11815 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11816 let result = orchestrator.generate().unwrap();
11817
11818 assert!(!result.master_data.vendors.is_empty());
11819 assert!(!result.master_data.customers.is_empty());
11820 assert!(!result.master_data.materials.is_empty());
11821 }
11822
11823 #[test]
11824 fn test_document_flow_generation() {
11825 let config = create_test_config();
11826 let phase_config = PhaseConfig {
11827 generate_master_data: true,
11828 generate_document_flows: true,
11829 generate_journal_entries: false,
11830 inject_anomalies: false,
11831 inject_data_quality: false,
11832 validate_balances: false,
11833 generate_ocpm_events: false,
11834 show_progress: false,
11835 vendors_per_company: 5,
11836 customers_per_company: 5,
11837 materials_per_company: 10,
11838 assets_per_company: 5,
11839 employees_per_company: 10,
11840 p2p_chains: 5,
11841 o2c_chains: 5,
11842 ..Default::default()
11843 };
11844
11845 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11846 let result = orchestrator.generate().unwrap();
11847
11848 assert!(!result.document_flows.p2p_chains.is_empty());
11850 assert!(!result.document_flows.o2c_chains.is_empty());
11851
11852 assert!(!result.document_flows.purchase_orders.is_empty());
11854 assert!(!result.document_flows.sales_orders.is_empty());
11855 }
11856
11857 #[test]
11858 fn test_anomaly_injection() {
11859 let config = create_test_config();
11860 let phase_config = PhaseConfig {
11861 generate_master_data: false,
11862 generate_document_flows: false,
11863 generate_journal_entries: true,
11864 inject_anomalies: true,
11865 show_progress: false,
11866 ..Default::default()
11867 };
11868
11869 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11870 let result = orchestrator.generate().unwrap();
11871
11872 assert!(!result.journal_entries.is_empty());
11874
11875 assert!(result.anomaly_labels.summary.is_some());
11878 }
11879
11880 #[test]
11881 fn test_full_generation_pipeline() {
11882 let config = create_test_config();
11883 let phase_config = PhaseConfig {
11884 generate_master_data: true,
11885 generate_document_flows: true,
11886 generate_journal_entries: true,
11887 inject_anomalies: false,
11888 inject_data_quality: false,
11889 validate_balances: true,
11890 generate_ocpm_events: false,
11891 show_progress: false,
11892 vendors_per_company: 3,
11893 customers_per_company: 3,
11894 materials_per_company: 5,
11895 assets_per_company: 3,
11896 employees_per_company: 5,
11897 p2p_chains: 3,
11898 o2c_chains: 3,
11899 ..Default::default()
11900 };
11901
11902 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11903 let result = orchestrator.generate().unwrap();
11904
11905 assert!(!result.master_data.vendors.is_empty());
11907 assert!(!result.master_data.customers.is_empty());
11908 assert!(!result.document_flows.p2p_chains.is_empty());
11909 assert!(!result.document_flows.o2c_chains.is_empty());
11910 assert!(!result.journal_entries.is_empty());
11911 assert!(result.statistics.accounts_count > 0);
11912
11913 assert!(!result.subledger.ap_invoices.is_empty());
11915 assert!(!result.subledger.ar_invoices.is_empty());
11916
11917 assert!(result.balance_validation.validated);
11919 assert!(result.balance_validation.entries_processed > 0);
11920 }
11921
11922 #[test]
11923 fn test_subledger_linking() {
11924 let config = create_test_config();
11925 let phase_config = PhaseConfig {
11926 generate_master_data: true,
11927 generate_document_flows: true,
11928 generate_journal_entries: false,
11929 inject_anomalies: false,
11930 inject_data_quality: false,
11931 validate_balances: false,
11932 generate_ocpm_events: false,
11933 show_progress: false,
11934 vendors_per_company: 5,
11935 customers_per_company: 5,
11936 materials_per_company: 10,
11937 assets_per_company: 3,
11938 employees_per_company: 5,
11939 p2p_chains: 5,
11940 o2c_chains: 5,
11941 ..Default::default()
11942 };
11943
11944 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11945 let result = orchestrator.generate().unwrap();
11946
11947 assert!(!result.document_flows.vendor_invoices.is_empty());
11949 assert!(!result.document_flows.customer_invoices.is_empty());
11950
11951 assert!(!result.subledger.ap_invoices.is_empty());
11953 assert!(!result.subledger.ar_invoices.is_empty());
11954
11955 assert_eq!(
11957 result.subledger.ap_invoices.len(),
11958 result.document_flows.vendor_invoices.len()
11959 );
11960
11961 assert_eq!(
11963 result.subledger.ar_invoices.len(),
11964 result.document_flows.customer_invoices.len()
11965 );
11966
11967 assert_eq!(
11969 result.statistics.ap_invoice_count,
11970 result.subledger.ap_invoices.len()
11971 );
11972 assert_eq!(
11973 result.statistics.ar_invoice_count,
11974 result.subledger.ar_invoices.len()
11975 );
11976 }
11977
11978 #[test]
11979 fn test_balance_validation() {
11980 let config = create_test_config();
11981 let phase_config = PhaseConfig {
11982 generate_master_data: false,
11983 generate_document_flows: false,
11984 generate_journal_entries: true,
11985 inject_anomalies: false,
11986 validate_balances: true,
11987 show_progress: false,
11988 ..Default::default()
11989 };
11990
11991 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11992 let result = orchestrator.generate().unwrap();
11993
11994 assert!(result.balance_validation.validated);
11996 assert!(result.balance_validation.entries_processed > 0);
11997
11998 assert!(!result.balance_validation.has_unbalanced_entries);
12000
12001 assert_eq!(
12003 result.balance_validation.total_debits,
12004 result.balance_validation.total_credits
12005 );
12006 }
12007
12008 #[test]
12009 fn test_statistics_accuracy() {
12010 let config = create_test_config();
12011 let phase_config = PhaseConfig {
12012 generate_master_data: true,
12013 generate_document_flows: false,
12014 generate_journal_entries: true,
12015 inject_anomalies: false,
12016 show_progress: false,
12017 vendors_per_company: 10,
12018 customers_per_company: 20,
12019 materials_per_company: 15,
12020 assets_per_company: 5,
12021 employees_per_company: 8,
12022 ..Default::default()
12023 };
12024
12025 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12026 let result = orchestrator.generate().unwrap();
12027
12028 assert_eq!(
12030 result.statistics.vendor_count,
12031 result.master_data.vendors.len()
12032 );
12033 assert_eq!(
12034 result.statistics.customer_count,
12035 result.master_data.customers.len()
12036 );
12037 assert_eq!(
12038 result.statistics.material_count,
12039 result.master_data.materials.len()
12040 );
12041 assert_eq!(
12042 result.statistics.total_entries as usize,
12043 result.journal_entries.len()
12044 );
12045 }
12046
12047 #[test]
12048 fn test_phase_config_defaults() {
12049 let config = PhaseConfig::default();
12050 assert!(config.generate_master_data);
12051 assert!(config.generate_document_flows);
12052 assert!(config.generate_journal_entries);
12053 assert!(!config.inject_anomalies);
12054 assert!(config.validate_balances);
12055 assert!(config.show_progress);
12056 assert!(config.vendors_per_company > 0);
12057 assert!(config.customers_per_company > 0);
12058 }
12059
12060 #[test]
12061 fn test_get_coa_before_generation() {
12062 let config = create_test_config();
12063 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
12064
12065 assert!(orchestrator.get_coa().is_none());
12067 }
12068
12069 #[test]
12070 fn test_get_coa_after_generation() {
12071 let config = create_test_config();
12072 let phase_config = PhaseConfig {
12073 generate_master_data: false,
12074 generate_document_flows: false,
12075 generate_journal_entries: true,
12076 inject_anomalies: false,
12077 show_progress: false,
12078 ..Default::default()
12079 };
12080
12081 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12082 let _ = orchestrator.generate().unwrap();
12083
12084 assert!(orchestrator.get_coa().is_some());
12086 }
12087
12088 #[test]
12089 fn test_get_master_data() {
12090 let config = create_test_config();
12091 let phase_config = PhaseConfig {
12092 generate_master_data: true,
12093 generate_document_flows: false,
12094 generate_journal_entries: false,
12095 inject_anomalies: false,
12096 show_progress: false,
12097 vendors_per_company: 5,
12098 customers_per_company: 5,
12099 materials_per_company: 5,
12100 assets_per_company: 5,
12101 employees_per_company: 5,
12102 ..Default::default()
12103 };
12104
12105 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12106 let result = orchestrator.generate().unwrap();
12107
12108 assert!(!result.master_data.vendors.is_empty());
12110 }
12111
12112 #[test]
12113 fn test_with_progress_builder() {
12114 let config = create_test_config();
12115 let orchestrator = EnhancedOrchestrator::with_defaults(config)
12116 .unwrap()
12117 .with_progress(false);
12118
12119 assert!(!orchestrator.phase_config.show_progress);
12121 }
12122
12123 #[test]
12124 fn test_multi_company_generation() {
12125 let mut config = create_test_config();
12126 config.companies.push(CompanyConfig {
12127 code: "2000".to_string(),
12128 name: "Subsidiary".to_string(),
12129 currency: "EUR".to_string(),
12130 functional_currency: None,
12131 country: "DE".to_string(),
12132 annual_transaction_volume: TransactionVolume::TenK,
12133 volume_weight: 0.5,
12134 fiscal_year_variant: "K4".to_string(),
12135 });
12136
12137 let phase_config = PhaseConfig {
12138 generate_master_data: true,
12139 generate_document_flows: false,
12140 generate_journal_entries: true,
12141 inject_anomalies: false,
12142 show_progress: false,
12143 vendors_per_company: 5,
12144 customers_per_company: 5,
12145 materials_per_company: 5,
12146 assets_per_company: 5,
12147 employees_per_company: 5,
12148 ..Default::default()
12149 };
12150
12151 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12152 let result = orchestrator.generate().unwrap();
12153
12154 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
12157 assert!(result.statistics.companies_count == 2);
12158 }
12159
12160 #[test]
12161 fn test_empty_master_data_skips_document_flows() {
12162 let config = create_test_config();
12163 let phase_config = PhaseConfig {
12164 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
12167 inject_anomalies: false,
12168 show_progress: false,
12169 ..Default::default()
12170 };
12171
12172 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12173 let result = orchestrator.generate().unwrap();
12174
12175 assert!(result.document_flows.p2p_chains.is_empty());
12177 assert!(result.document_flows.o2c_chains.is_empty());
12178 }
12179
12180 #[test]
12181 fn test_journal_entry_line_item_count() {
12182 let config = create_test_config();
12183 let phase_config = PhaseConfig {
12184 generate_master_data: false,
12185 generate_document_flows: false,
12186 generate_journal_entries: true,
12187 inject_anomalies: false,
12188 show_progress: false,
12189 ..Default::default()
12190 };
12191
12192 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12193 let result = orchestrator.generate().unwrap();
12194
12195 let calculated_line_items: u64 = result
12197 .journal_entries
12198 .iter()
12199 .map(|e| e.line_count() as u64)
12200 .sum();
12201 assert_eq!(result.statistics.total_line_items, calculated_line_items);
12202 }
12203
12204 #[test]
12205 fn test_audit_generation() {
12206 let config = create_test_config();
12207 let phase_config = PhaseConfig {
12208 generate_master_data: false,
12209 generate_document_flows: false,
12210 generate_journal_entries: true,
12211 inject_anomalies: false,
12212 show_progress: false,
12213 generate_audit: true,
12214 audit_engagements: 2,
12215 workpapers_per_engagement: 5,
12216 evidence_per_workpaper: 2,
12217 risks_per_engagement: 3,
12218 findings_per_engagement: 2,
12219 judgments_per_engagement: 2,
12220 ..Default::default()
12221 };
12222
12223 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12224 let result = orchestrator.generate().unwrap();
12225
12226 assert_eq!(result.audit.engagements.len(), 2);
12228 assert!(!result.audit.workpapers.is_empty());
12229 assert!(!result.audit.evidence.is_empty());
12230 assert!(!result.audit.risk_assessments.is_empty());
12231 assert!(!result.audit.findings.is_empty());
12232 assert!(!result.audit.judgments.is_empty());
12233
12234 assert!(
12236 !result.audit.confirmations.is_empty(),
12237 "ISA 505 confirmations should be generated"
12238 );
12239 assert!(
12240 !result.audit.confirmation_responses.is_empty(),
12241 "ISA 505 confirmation responses should be generated"
12242 );
12243 assert!(
12244 !result.audit.procedure_steps.is_empty(),
12245 "ISA 330 procedure steps should be generated"
12246 );
12247 assert!(
12249 !result.audit.analytical_results.is_empty(),
12250 "ISA 520 analytical procedures should be generated"
12251 );
12252 assert!(
12253 !result.audit.ia_functions.is_empty(),
12254 "ISA 610 IA functions should be generated (one per engagement)"
12255 );
12256 assert!(
12257 !result.audit.related_parties.is_empty(),
12258 "ISA 550 related parties should be generated"
12259 );
12260
12261 assert_eq!(
12263 result.statistics.audit_engagement_count,
12264 result.audit.engagements.len()
12265 );
12266 assert_eq!(
12267 result.statistics.audit_workpaper_count,
12268 result.audit.workpapers.len()
12269 );
12270 assert_eq!(
12271 result.statistics.audit_evidence_count,
12272 result.audit.evidence.len()
12273 );
12274 assert_eq!(
12275 result.statistics.audit_risk_count,
12276 result.audit.risk_assessments.len()
12277 );
12278 assert_eq!(
12279 result.statistics.audit_finding_count,
12280 result.audit.findings.len()
12281 );
12282 assert_eq!(
12283 result.statistics.audit_judgment_count,
12284 result.audit.judgments.len()
12285 );
12286 assert_eq!(
12287 result.statistics.audit_confirmation_count,
12288 result.audit.confirmations.len()
12289 );
12290 assert_eq!(
12291 result.statistics.audit_confirmation_response_count,
12292 result.audit.confirmation_responses.len()
12293 );
12294 assert_eq!(
12295 result.statistics.audit_procedure_step_count,
12296 result.audit.procedure_steps.len()
12297 );
12298 assert_eq!(
12299 result.statistics.audit_sample_count,
12300 result.audit.samples.len()
12301 );
12302 assert_eq!(
12303 result.statistics.audit_analytical_result_count,
12304 result.audit.analytical_results.len()
12305 );
12306 assert_eq!(
12307 result.statistics.audit_ia_function_count,
12308 result.audit.ia_functions.len()
12309 );
12310 assert_eq!(
12311 result.statistics.audit_ia_report_count,
12312 result.audit.ia_reports.len()
12313 );
12314 assert_eq!(
12315 result.statistics.audit_related_party_count,
12316 result.audit.related_parties.len()
12317 );
12318 assert_eq!(
12319 result.statistics.audit_related_party_transaction_count,
12320 result.audit.related_party_transactions.len()
12321 );
12322 }
12323
12324 #[test]
12325 fn test_new_phases_disabled_by_default() {
12326 let config = create_test_config();
12327 assert!(!config.llm.enabled);
12329 assert!(!config.diffusion.enabled);
12330 assert!(!config.causal.enabled);
12331
12332 let phase_config = PhaseConfig {
12333 generate_master_data: false,
12334 generate_document_flows: false,
12335 generate_journal_entries: true,
12336 inject_anomalies: false,
12337 show_progress: false,
12338 ..Default::default()
12339 };
12340
12341 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12342 let result = orchestrator.generate().unwrap();
12343
12344 assert_eq!(result.statistics.llm_enrichment_ms, 0);
12346 assert_eq!(result.statistics.llm_vendors_enriched, 0);
12347 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
12348 assert_eq!(result.statistics.diffusion_samples_generated, 0);
12349 assert_eq!(result.statistics.causal_generation_ms, 0);
12350 assert_eq!(result.statistics.causal_samples_generated, 0);
12351 assert!(result.statistics.causal_validation_passed.is_none());
12352 assert_eq!(result.statistics.counterfactual_pair_count, 0);
12353 assert!(result.counterfactual_pairs.is_empty());
12354 }
12355
12356 #[test]
12357 fn test_counterfactual_generation_enabled() {
12358 let config = create_test_config();
12359 let phase_config = PhaseConfig {
12360 generate_master_data: false,
12361 generate_document_flows: false,
12362 generate_journal_entries: true,
12363 inject_anomalies: false,
12364 show_progress: false,
12365 generate_counterfactuals: true,
12366 generate_period_close: false, ..Default::default()
12368 };
12369
12370 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12371 let result = orchestrator.generate().unwrap();
12372
12373 if !result.journal_entries.is_empty() {
12375 assert_eq!(
12376 result.counterfactual_pairs.len(),
12377 result.journal_entries.len()
12378 );
12379 assert_eq!(
12380 result.statistics.counterfactual_pair_count,
12381 result.journal_entries.len()
12382 );
12383 let ids: std::collections::HashSet<_> = result
12385 .counterfactual_pairs
12386 .iter()
12387 .map(|p| p.pair_id.clone())
12388 .collect();
12389 assert_eq!(ids.len(), result.counterfactual_pairs.len());
12390 }
12391 }
12392
12393 #[test]
12394 fn test_llm_enrichment_enabled() {
12395 let mut config = create_test_config();
12396 config.llm.enabled = true;
12397 config.llm.max_vendor_enrichments = 3;
12398
12399 let phase_config = PhaseConfig {
12400 generate_master_data: true,
12401 generate_document_flows: false,
12402 generate_journal_entries: false,
12403 inject_anomalies: false,
12404 show_progress: false,
12405 vendors_per_company: 5,
12406 customers_per_company: 3,
12407 materials_per_company: 3,
12408 assets_per_company: 3,
12409 employees_per_company: 3,
12410 ..Default::default()
12411 };
12412
12413 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12414 let result = orchestrator.generate().unwrap();
12415
12416 assert!(result.statistics.llm_vendors_enriched > 0);
12418 assert!(result.statistics.llm_vendors_enriched <= 3);
12419 }
12420
12421 #[test]
12422 fn test_diffusion_enhancement_enabled() {
12423 let mut config = create_test_config();
12424 config.diffusion.enabled = true;
12425 config.diffusion.n_steps = 50;
12426 config.diffusion.sample_size = 20;
12427
12428 let phase_config = PhaseConfig {
12429 generate_master_data: false,
12430 generate_document_flows: false,
12431 generate_journal_entries: true,
12432 inject_anomalies: false,
12433 show_progress: false,
12434 ..Default::default()
12435 };
12436
12437 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12438 let result = orchestrator.generate().unwrap();
12439
12440 assert_eq!(result.statistics.diffusion_samples_generated, 20);
12442 }
12443
12444 #[test]
12445 fn test_causal_overlay_enabled() {
12446 let mut config = create_test_config();
12447 config.causal.enabled = true;
12448 config.causal.template = "fraud_detection".to_string();
12449 config.causal.sample_size = 100;
12450 config.causal.validate = true;
12451
12452 let phase_config = PhaseConfig {
12453 generate_master_data: false,
12454 generate_document_flows: false,
12455 generate_journal_entries: true,
12456 inject_anomalies: false,
12457 show_progress: false,
12458 ..Default::default()
12459 };
12460
12461 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12462 let result = orchestrator.generate().unwrap();
12463
12464 assert_eq!(result.statistics.causal_samples_generated, 100);
12466 assert!(result.statistics.causal_validation_passed.is_some());
12468 }
12469
12470 #[test]
12471 fn test_causal_overlay_revenue_cycle_template() {
12472 let mut config = create_test_config();
12473 config.causal.enabled = true;
12474 config.causal.template = "revenue_cycle".to_string();
12475 config.causal.sample_size = 50;
12476 config.causal.validate = false;
12477
12478 let phase_config = PhaseConfig {
12479 generate_master_data: false,
12480 generate_document_flows: false,
12481 generate_journal_entries: true,
12482 inject_anomalies: false,
12483 show_progress: false,
12484 ..Default::default()
12485 };
12486
12487 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12488 let result = orchestrator.generate().unwrap();
12489
12490 assert_eq!(result.statistics.causal_samples_generated, 50);
12492 assert!(result.statistics.causal_validation_passed.is_none());
12494 }
12495
12496 #[test]
12497 fn test_all_new_phases_enabled_together() {
12498 let mut config = create_test_config();
12499 config.llm.enabled = true;
12500 config.llm.max_vendor_enrichments = 2;
12501 config.diffusion.enabled = true;
12502 config.diffusion.n_steps = 20;
12503 config.diffusion.sample_size = 10;
12504 config.causal.enabled = true;
12505 config.causal.sample_size = 50;
12506 config.causal.validate = true;
12507
12508 let phase_config = PhaseConfig {
12509 generate_master_data: true,
12510 generate_document_flows: false,
12511 generate_journal_entries: true,
12512 inject_anomalies: false,
12513 show_progress: false,
12514 vendors_per_company: 5,
12515 customers_per_company: 3,
12516 materials_per_company: 3,
12517 assets_per_company: 3,
12518 employees_per_company: 3,
12519 ..Default::default()
12520 };
12521
12522 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12523 let result = orchestrator.generate().unwrap();
12524
12525 assert!(result.statistics.llm_vendors_enriched > 0);
12527 assert_eq!(result.statistics.diffusion_samples_generated, 10);
12528 assert_eq!(result.statistics.causal_samples_generated, 50);
12529 assert!(result.statistics.causal_validation_passed.is_some());
12530 }
12531
12532 #[test]
12533 fn test_statistics_serialization_with_new_fields() {
12534 let stats = EnhancedGenerationStatistics {
12535 total_entries: 100,
12536 total_line_items: 500,
12537 llm_enrichment_ms: 42,
12538 llm_vendors_enriched: 10,
12539 diffusion_enhancement_ms: 100,
12540 diffusion_samples_generated: 50,
12541 causal_generation_ms: 200,
12542 causal_samples_generated: 100,
12543 causal_validation_passed: Some(true),
12544 ..Default::default()
12545 };
12546
12547 let json = serde_json::to_string(&stats).unwrap();
12548 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
12549
12550 assert_eq!(deserialized.llm_enrichment_ms, 42);
12551 assert_eq!(deserialized.llm_vendors_enriched, 10);
12552 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
12553 assert_eq!(deserialized.diffusion_samples_generated, 50);
12554 assert_eq!(deserialized.causal_generation_ms, 200);
12555 assert_eq!(deserialized.causal_samples_generated, 100);
12556 assert_eq!(deserialized.causal_validation_passed, Some(true));
12557 }
12558
12559 #[test]
12560 fn test_statistics_backward_compat_deserialization() {
12561 let old_json = r#"{
12563 "total_entries": 100,
12564 "total_line_items": 500,
12565 "accounts_count": 50,
12566 "companies_count": 1,
12567 "period_months": 12,
12568 "vendor_count": 10,
12569 "customer_count": 20,
12570 "material_count": 15,
12571 "asset_count": 5,
12572 "employee_count": 8,
12573 "p2p_chain_count": 5,
12574 "o2c_chain_count": 5,
12575 "ap_invoice_count": 5,
12576 "ar_invoice_count": 5,
12577 "ocpm_event_count": 0,
12578 "ocpm_object_count": 0,
12579 "ocpm_case_count": 0,
12580 "audit_engagement_count": 0,
12581 "audit_workpaper_count": 0,
12582 "audit_evidence_count": 0,
12583 "audit_risk_count": 0,
12584 "audit_finding_count": 0,
12585 "audit_judgment_count": 0,
12586 "anomalies_injected": 0,
12587 "data_quality_issues": 0,
12588 "banking_customer_count": 0,
12589 "banking_account_count": 0,
12590 "banking_transaction_count": 0,
12591 "banking_suspicious_count": 0,
12592 "graph_export_count": 0,
12593 "graph_node_count": 0,
12594 "graph_edge_count": 0
12595 }"#;
12596
12597 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
12598
12599 assert_eq!(stats.llm_enrichment_ms, 0);
12601 assert_eq!(stats.llm_vendors_enriched, 0);
12602 assert_eq!(stats.diffusion_enhancement_ms, 0);
12603 assert_eq!(stats.diffusion_samples_generated, 0);
12604 assert_eq!(stats.causal_generation_ms, 0);
12605 assert_eq!(stats.causal_samples_generated, 0);
12606 assert!(stats.causal_validation_passed.is_none());
12607 }
12608}