1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 MaterialGenerator,
117 O2CDocumentChain,
118 O2CGenerator,
119 O2CGeneratorConfig,
120 O2CPaymentBehavior,
121 P2PDocumentChain,
122 P2PGenerator,
124 P2PGeneratorConfig,
125 P2PPaymentBehavior,
126 PaymentReference,
127 ProvisionGenerator,
129 QualificationGenerator,
130 RfxGenerator,
131 RiskAssessmentGenerator,
132 RunningBalanceTracker,
134 ScorecardGenerator,
135 SegmentGenerator,
137 SegmentSeed,
138 SourcingProjectGenerator,
139 SpendAnalysisGenerator,
140 ValidationError,
141 VendorGenerator,
143 WorkpaperGenerator,
144};
145use datasynth_graph::{
146 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
147 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
148 TransactionGraphConfig,
149};
150use datasynth_ocpm::{
151 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
152 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
153 OcpmUuidFactory, P2pDocuments, S2cDocuments,
154};
155
156use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
157use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
158use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
159use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
160use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
161use datasynth_core::models::documents::PaymentMethod;
162use datasynth_core::models::IndustrySector;
163use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
164use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
165use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
166use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
167use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
168use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
169use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
170use datasynth_generators::audit::sample_generator::SampleGenerator;
171use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
172use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
173use datasynth_generators::coa_generator::CoAFramework;
174use datasynth_generators::llm_enrichment::VendorLlmEnricher;
175use rayon::prelude::*;
176
177fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
183 let payment_behavior = &schema_config.payment_behavior;
184 let late_dist = &payment_behavior.late_payment_days_distribution;
185
186 P2PGeneratorConfig {
187 three_way_match_rate: schema_config.three_way_match_rate,
188 partial_delivery_rate: schema_config.partial_delivery_rate,
189 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
190 price_variance_rate: schema_config.price_variance_rate,
191 max_price_variance_percent: schema_config.max_price_variance_percent,
192 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
193 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
194 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
195 payment_method_distribution: vec![
196 (PaymentMethod::BankTransfer, 0.60),
197 (PaymentMethod::Check, 0.25),
198 (PaymentMethod::Wire, 0.10),
199 (PaymentMethod::CreditCard, 0.05),
200 ],
201 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
202 payment_behavior: P2PPaymentBehavior {
203 late_payment_rate: payment_behavior.late_payment_rate,
204 late_payment_distribution: LatePaymentDistribution {
205 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
206 late_8_to_14: late_dist.late_8_to_14,
207 very_late_15_to_30: late_dist.very_late_15_to_30,
208 severely_late_31_to_60: late_dist.severely_late_31_to_60,
209 extremely_late_over_60: late_dist.extremely_late_over_60,
210 },
211 partial_payment_rate: payment_behavior.partial_payment_rate,
212 payment_correction_rate: payment_behavior.payment_correction_rate,
213 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
214 },
215 }
216}
217
218fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
220 let payment_behavior = &schema_config.payment_behavior;
221
222 O2CGeneratorConfig {
223 credit_check_failure_rate: schema_config.credit_check_failure_rate,
224 partial_shipment_rate: schema_config.partial_shipment_rate,
225 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
226 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
227 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
228 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
229 bad_debt_rate: schema_config.bad_debt_rate,
230 returns_rate: schema_config.return_rate,
231 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
232 payment_method_distribution: vec![
233 (PaymentMethod::BankTransfer, 0.50),
234 (PaymentMethod::Check, 0.30),
235 (PaymentMethod::Wire, 0.15),
236 (PaymentMethod::CreditCard, 0.05),
237 ],
238 payment_behavior: O2CPaymentBehavior {
239 partial_payment_rate: payment_behavior.partial_payments.rate,
240 short_payment_rate: payment_behavior.short_payments.rate,
241 max_short_percent: payment_behavior.short_payments.max_short_percent,
242 on_account_rate: payment_behavior.on_account_payments.rate,
243 payment_correction_rate: payment_behavior.payment_corrections.rate,
244 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
245 },
246 }
247}
248
249#[derive(Debug, Clone)]
251pub struct PhaseConfig {
252 pub generate_master_data: bool,
254 pub generate_document_flows: bool,
256 pub generate_ocpm_events: bool,
258 pub generate_journal_entries: bool,
260 pub inject_anomalies: bool,
262 pub inject_data_quality: bool,
264 pub validate_balances: bool,
266 pub show_progress: bool,
268 pub vendors_per_company: usize,
270 pub customers_per_company: usize,
272 pub materials_per_company: usize,
274 pub assets_per_company: usize,
276 pub employees_per_company: usize,
278 pub p2p_chains: usize,
280 pub o2c_chains: usize,
282 pub generate_audit: bool,
284 pub audit_engagements: usize,
286 pub workpapers_per_engagement: usize,
288 pub evidence_per_workpaper: usize,
290 pub risks_per_engagement: usize,
292 pub findings_per_engagement: usize,
294 pub judgments_per_engagement: usize,
296 pub generate_banking: bool,
298 pub generate_graph_export: bool,
300 pub generate_sourcing: bool,
302 pub generate_bank_reconciliation: bool,
304 pub generate_financial_statements: bool,
306 pub generate_accounting_standards: bool,
308 pub generate_manufacturing: bool,
310 pub generate_sales_kpi_budgets: bool,
312 pub generate_tax: bool,
314 pub generate_esg: bool,
316 pub generate_intercompany: bool,
318 pub generate_evolution_events: bool,
320 pub generate_counterfactuals: bool,
322 pub generate_compliance_regulations: bool,
324 pub generate_period_close: bool,
326 pub generate_hr: bool,
328 pub generate_treasury: bool,
330 pub generate_project_accounting: bool,
332}
333
334impl Default for PhaseConfig {
335 fn default() -> Self {
336 Self {
337 generate_master_data: true,
338 generate_document_flows: true,
339 generate_ocpm_events: false, generate_journal_entries: true,
341 inject_anomalies: false,
342 inject_data_quality: false, validate_balances: true,
344 show_progress: true,
345 vendors_per_company: 50,
346 customers_per_company: 100,
347 materials_per_company: 200,
348 assets_per_company: 50,
349 employees_per_company: 100,
350 p2p_chains: 100,
351 o2c_chains: 100,
352 generate_audit: false, audit_engagements: 5,
354 workpapers_per_engagement: 20,
355 evidence_per_workpaper: 5,
356 risks_per_engagement: 15,
357 findings_per_engagement: 8,
358 judgments_per_engagement: 10,
359 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
378 }
379}
380
381impl PhaseConfig {
382 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
387 Self {
388 generate_master_data: true,
390 generate_document_flows: true,
391 generate_journal_entries: true,
392 validate_balances: true,
393 generate_period_close: true,
394 generate_evolution_events: true,
395 show_progress: true,
396
397 generate_audit: cfg.audit.enabled,
399 generate_banking: cfg.banking.enabled,
400 generate_graph_export: cfg.graph_export.enabled,
401 generate_sourcing: cfg.source_to_pay.enabled,
402 generate_intercompany: cfg.intercompany.enabled,
403 generate_financial_statements: cfg.financial_reporting.enabled,
404 generate_bank_reconciliation: cfg.financial_reporting.enabled,
405 generate_accounting_standards: cfg.accounting_standards.enabled,
406 generate_manufacturing: cfg.manufacturing.enabled,
407 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
408 generate_tax: cfg.tax.enabled,
409 generate_esg: cfg.esg.enabled,
410 generate_ocpm_events: cfg.ocpm.enabled,
411 generate_compliance_regulations: cfg.compliance_regulations.enabled,
412 generate_hr: cfg.hr.enabled,
413 generate_treasury: cfg.treasury.enabled,
414 generate_project_accounting: cfg.project_accounting.enabled,
415
416 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
418
419 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
420 inject_data_quality: cfg.data_quality.enabled,
421
422 vendors_per_company: 50,
424 customers_per_company: 100,
425 materials_per_company: 200,
426 assets_per_company: 50,
427 employees_per_company: 100,
428 p2p_chains: 100,
429 o2c_chains: 100,
430 audit_engagements: 5,
431 workpapers_per_engagement: 20,
432 evidence_per_workpaper: 5,
433 risks_per_engagement: 15,
434 findings_per_engagement: 8,
435 judgments_per_engagement: 10,
436 }
437 }
438}
439
440#[derive(Debug, Clone, Default)]
442pub struct MasterDataSnapshot {
443 pub vendors: Vec<Vendor>,
445 pub customers: Vec<Customer>,
447 pub materials: Vec<Material>,
449 pub assets: Vec<FixedAsset>,
451 pub employees: Vec<Employee>,
453 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
455 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
457}
458
459#[derive(Debug, Clone)]
461pub struct HypergraphExportInfo {
462 pub node_count: usize,
464 pub edge_count: usize,
466 pub hyperedge_count: usize,
468 pub output_path: PathBuf,
470}
471
472#[derive(Debug, Clone, Default)]
474pub struct DocumentFlowSnapshot {
475 pub p2p_chains: Vec<P2PDocumentChain>,
477 pub o2c_chains: Vec<O2CDocumentChain>,
479 pub purchase_orders: Vec<documents::PurchaseOrder>,
481 pub goods_receipts: Vec<documents::GoodsReceipt>,
483 pub vendor_invoices: Vec<documents::VendorInvoice>,
485 pub sales_orders: Vec<documents::SalesOrder>,
487 pub deliveries: Vec<documents::Delivery>,
489 pub customer_invoices: Vec<documents::CustomerInvoice>,
491 pub payments: Vec<documents::Payment>,
493 pub document_references: Vec<documents::DocumentReference>,
496}
497
498#[derive(Debug, Clone, Default)]
500pub struct SubledgerSnapshot {
501 pub ap_invoices: Vec<APInvoice>,
503 pub ar_invoices: Vec<ARInvoice>,
505 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
507 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
509 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
511 pub ar_aging_reports: Vec<ARAgingReport>,
513 pub ap_aging_reports: Vec<APAgingReport>,
515 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
517 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
519 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
521 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
523}
524
525#[derive(Debug, Clone, Default)]
527pub struct OcpmSnapshot {
528 pub event_log: Option<OcpmEventLog>,
530 pub event_count: usize,
532 pub object_count: usize,
534 pub case_count: usize,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct AuditSnapshot {
541 pub engagements: Vec<AuditEngagement>,
543 pub workpapers: Vec<Workpaper>,
545 pub evidence: Vec<AuditEvidence>,
547 pub risk_assessments: Vec<RiskAssessment>,
549 pub findings: Vec<AuditFinding>,
551 pub judgments: Vec<ProfessionalJudgment>,
553 pub confirmations: Vec<ExternalConfirmation>,
555 pub confirmation_responses: Vec<ConfirmationResponse>,
557 pub procedure_steps: Vec<AuditProcedureStep>,
559 pub samples: Vec<AuditSample>,
561 pub analytical_results: Vec<AnalyticalProcedureResult>,
563 pub ia_functions: Vec<InternalAuditFunction>,
565 pub ia_reports: Vec<InternalAuditReport>,
567 pub related_parties: Vec<RelatedParty>,
569 pub related_party_transactions: Vec<RelatedPartyTransaction>,
571 pub component_auditors: Vec<ComponentAuditor>,
574 pub group_audit_plan: Option<GroupAuditPlan>,
576 pub component_instructions: Vec<ComponentInstruction>,
578 pub component_reports: Vec<ComponentAuditorReport>,
580 pub engagement_letters: Vec<EngagementLetter>,
583 pub subsequent_events: Vec<SubsequentEvent>,
586 pub service_organizations: Vec<ServiceOrganization>,
589 pub soc_reports: Vec<SocReport>,
591 pub user_entity_controls: Vec<UserEntityControl>,
593 pub going_concern_assessments:
596 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
597 pub accounting_estimates:
600 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
601 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
604 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
606 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
609 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
611 pub materiality_calculations:
614 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
615 pub combined_risk_assessments:
618 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
619 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
622 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
624 pub significant_transaction_classes:
627 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
628 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
631 pub analytical_relationships:
634 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
635 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
638 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
641 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
644 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
649}
650
651#[derive(Debug, Clone, Default)]
653pub struct BankingSnapshot {
654 pub customers: Vec<BankingCustomer>,
656 pub accounts: Vec<BankAccount>,
658 pub transactions: Vec<BankTransaction>,
660 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
662 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
664 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
666 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
668 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
670 pub suspicious_count: usize,
672 pub scenario_count: usize,
674}
675
676#[derive(Debug, Clone, Default, Serialize)]
678pub struct GraphExportSnapshot {
679 pub exported: bool,
681 pub graph_count: usize,
683 pub exports: HashMap<String, GraphExportInfo>,
685}
686
687#[derive(Debug, Clone, Serialize)]
689pub struct GraphExportInfo {
690 pub name: String,
692 pub format: String,
694 pub output_path: PathBuf,
696 pub node_count: usize,
698 pub edge_count: usize,
700}
701
702#[derive(Debug, Clone, Default)]
704pub struct SourcingSnapshot {
705 pub spend_analyses: Vec<SpendAnalysis>,
707 pub sourcing_projects: Vec<SourcingProject>,
709 pub qualifications: Vec<SupplierQualification>,
711 pub rfx_events: Vec<RfxEvent>,
713 pub bids: Vec<SupplierBid>,
715 pub bid_evaluations: Vec<BidEvaluation>,
717 pub contracts: Vec<ProcurementContract>,
719 pub catalog_items: Vec<CatalogItem>,
721 pub scorecards: Vec<SupplierScorecard>,
723}
724
725#[derive(Debug, Clone, Serialize, Deserialize)]
727pub struct PeriodTrialBalance {
728 pub fiscal_year: u16,
730 pub fiscal_period: u8,
732 pub period_start: NaiveDate,
734 pub period_end: NaiveDate,
736 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
738}
739
740#[derive(Debug, Clone, Default)]
742pub struct FinancialReportingSnapshot {
743 pub financial_statements: Vec<FinancialStatement>,
746 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
749 pub consolidated_statements: Vec<FinancialStatement>,
751 pub consolidation_schedules: Vec<ConsolidationSchedule>,
753 pub bank_reconciliations: Vec<BankReconciliation>,
755 pub trial_balances: Vec<PeriodTrialBalance>,
757 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
759 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
761 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
763}
764
765#[derive(Debug, Clone, Default)]
767pub struct HrSnapshot {
768 pub payroll_runs: Vec<PayrollRun>,
770 pub payroll_line_items: Vec<PayrollLineItem>,
772 pub time_entries: Vec<TimeEntry>,
774 pub expense_reports: Vec<ExpenseReport>,
776 pub benefit_enrollments: Vec<BenefitEnrollment>,
778 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
780 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
782 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
784 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
786 pub pension_journal_entries: Vec<JournalEntry>,
788 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
790 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
792 pub stock_comp_journal_entries: Vec<JournalEntry>,
794 pub payroll_run_count: usize,
796 pub payroll_line_item_count: usize,
798 pub time_entry_count: usize,
800 pub expense_report_count: usize,
802 pub benefit_enrollment_count: usize,
804 pub pension_plan_count: usize,
806 pub stock_grant_count: usize,
808}
809
810#[derive(Debug, Clone, Default)]
812pub struct AccountingStandardsSnapshot {
813 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
815 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
817 pub business_combinations:
819 Vec<datasynth_core::models::business_combination::BusinessCombination>,
820 pub business_combination_journal_entries: Vec<JournalEntry>,
822 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
824 pub ecl_provision_movements:
826 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
827 pub ecl_journal_entries: Vec<JournalEntry>,
829 pub provisions: Vec<datasynth_core::models::provision::Provision>,
831 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
833 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
835 pub provision_journal_entries: Vec<JournalEntry>,
837 pub currency_translation_results:
839 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
840 pub revenue_contract_count: usize,
842 pub impairment_test_count: usize,
844 pub business_combination_count: usize,
846 pub ecl_model_count: usize,
848 pub provision_count: usize,
850 pub currency_translation_count: usize,
852}
853
854#[derive(Debug, Clone, Default)]
856pub struct ComplianceRegulationsSnapshot {
857 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
859 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
861 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
863 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
865 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
867 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
869 pub compliance_graph: Option<datasynth_graph::Graph>,
871}
872
873#[derive(Debug, Clone, Default)]
875pub struct ManufacturingSnapshot {
876 pub production_orders: Vec<ProductionOrder>,
878 pub quality_inspections: Vec<QualityInspection>,
880 pub cycle_counts: Vec<CycleCount>,
882 pub bom_components: Vec<BomComponent>,
884 pub inventory_movements: Vec<InventoryMovement>,
886 pub production_order_count: usize,
888 pub quality_inspection_count: usize,
890 pub cycle_count_count: usize,
892 pub bom_component_count: usize,
894 pub inventory_movement_count: usize,
896}
897
898#[derive(Debug, Clone, Default)]
900pub struct SalesKpiBudgetsSnapshot {
901 pub sales_quotes: Vec<SalesQuote>,
903 pub kpis: Vec<ManagementKpi>,
905 pub budgets: Vec<Budget>,
907 pub sales_quote_count: usize,
909 pub kpi_count: usize,
911 pub budget_line_count: usize,
913}
914
915#[derive(Debug, Clone, Default)]
917pub struct AnomalyLabels {
918 pub labels: Vec<LabeledAnomaly>,
920 pub summary: Option<AnomalySummary>,
922 pub by_type: HashMap<String, usize>,
924}
925
926#[derive(Debug, Clone, Default)]
928pub struct BalanceValidationResult {
929 pub validated: bool,
931 pub is_balanced: bool,
933 pub entries_processed: u64,
935 pub total_debits: rust_decimal::Decimal,
937 pub total_credits: rust_decimal::Decimal,
939 pub accounts_tracked: usize,
941 pub companies_tracked: usize,
943 pub validation_errors: Vec<ValidationError>,
945 pub has_unbalanced_entries: bool,
947}
948
949#[derive(Debug, Clone, Default)]
951pub struct TaxSnapshot {
952 pub jurisdictions: Vec<TaxJurisdiction>,
954 pub codes: Vec<TaxCode>,
956 pub tax_lines: Vec<TaxLine>,
958 pub tax_returns: Vec<TaxReturn>,
960 pub tax_provisions: Vec<TaxProvision>,
962 pub withholding_records: Vec<WithholdingTaxRecord>,
964 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
966 pub jurisdiction_count: usize,
968 pub code_count: usize,
970 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
972}
973
974#[derive(Debug, Clone, Default, Serialize, Deserialize)]
976pub struct IntercompanySnapshot {
977 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
979 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
981 pub seller_journal_entries: Vec<JournalEntry>,
983 pub buyer_journal_entries: Vec<JournalEntry>,
985 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
987 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
989 pub matched_pair_count: usize,
991 pub elimination_entry_count: usize,
993 pub match_rate: f64,
995}
996
997#[derive(Debug, Clone, Default)]
999pub struct EsgSnapshot {
1000 pub emissions: Vec<EmissionRecord>,
1002 pub energy: Vec<EnergyConsumption>,
1004 pub water: Vec<WaterUsage>,
1006 pub waste: Vec<WasteRecord>,
1008 pub diversity: Vec<WorkforceDiversityMetric>,
1010 pub pay_equity: Vec<PayEquityMetric>,
1012 pub safety_incidents: Vec<SafetyIncident>,
1014 pub safety_metrics: Vec<SafetyMetric>,
1016 pub governance: Vec<GovernanceMetric>,
1018 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1020 pub materiality: Vec<MaterialityAssessment>,
1022 pub disclosures: Vec<EsgDisclosure>,
1024 pub climate_scenarios: Vec<ClimateScenario>,
1026 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1028 pub emission_count: usize,
1030 pub disclosure_count: usize,
1032}
1033
1034#[derive(Debug, Clone, Default)]
1036pub struct TreasurySnapshot {
1037 pub cash_positions: Vec<CashPosition>,
1039 pub cash_forecasts: Vec<CashForecast>,
1041 pub cash_pools: Vec<CashPool>,
1043 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1045 pub hedging_instruments: Vec<HedgingInstrument>,
1047 pub hedge_relationships: Vec<HedgeRelationship>,
1049 pub debt_instruments: Vec<DebtInstrument>,
1051 pub bank_guarantees: Vec<BankGuarantee>,
1053 pub netting_runs: Vec<NettingRun>,
1055 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1057}
1058
1059#[derive(Debug, Clone, Default)]
1061pub struct ProjectAccountingSnapshot {
1062 pub projects: Vec<Project>,
1064 pub cost_lines: Vec<ProjectCostLine>,
1066 pub revenue_records: Vec<ProjectRevenue>,
1068 pub earned_value_metrics: Vec<EarnedValueMetric>,
1070 pub change_orders: Vec<ChangeOrder>,
1072 pub milestones: Vec<ProjectMilestone>,
1074}
1075
1076#[derive(Debug, Default)]
1078pub struct EnhancedGenerationResult {
1079 pub chart_of_accounts: ChartOfAccounts,
1081 pub master_data: MasterDataSnapshot,
1083 pub document_flows: DocumentFlowSnapshot,
1085 pub subledger: SubledgerSnapshot,
1087 pub ocpm: OcpmSnapshot,
1089 pub audit: AuditSnapshot,
1091 pub banking: BankingSnapshot,
1093 pub graph_export: GraphExportSnapshot,
1095 pub sourcing: SourcingSnapshot,
1097 pub financial_reporting: FinancialReportingSnapshot,
1099 pub hr: HrSnapshot,
1101 pub accounting_standards: AccountingStandardsSnapshot,
1103 pub manufacturing: ManufacturingSnapshot,
1105 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1107 pub tax: TaxSnapshot,
1109 pub esg: EsgSnapshot,
1111 pub treasury: TreasurySnapshot,
1113 pub project_accounting: ProjectAccountingSnapshot,
1115 pub process_evolution: Vec<ProcessEvolutionEvent>,
1117 pub organizational_events: Vec<OrganizationalEvent>,
1119 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1121 pub intercompany: IntercompanySnapshot,
1123 pub journal_entries: Vec<JournalEntry>,
1125 pub anomaly_labels: AnomalyLabels,
1127 pub balance_validation: BalanceValidationResult,
1129 pub data_quality_stats: DataQualityStats,
1131 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1133 pub statistics: EnhancedGenerationStatistics,
1135 pub lineage: Option<super::lineage::LineageGraph>,
1137 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1139 pub internal_controls: Vec<InternalControl>,
1141 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1145 pub opening_balances: Vec<GeneratedOpeningBalance>,
1147 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1149 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1151 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1153 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1155 pub temporal_vendor_chains:
1157 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1158 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1160 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1162 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1164 pub compliance_regulations: ComplianceRegulationsSnapshot,
1166}
1167
1168#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1170pub struct EnhancedGenerationStatistics {
1171 pub total_entries: u64,
1173 pub total_line_items: u64,
1175 pub accounts_count: usize,
1177 pub companies_count: usize,
1179 pub period_months: u32,
1181 pub vendor_count: usize,
1183 pub customer_count: usize,
1184 pub material_count: usize,
1185 pub asset_count: usize,
1186 pub employee_count: usize,
1187 pub p2p_chain_count: usize,
1189 pub o2c_chain_count: usize,
1190 pub ap_invoice_count: usize,
1192 pub ar_invoice_count: usize,
1193 pub ocpm_event_count: usize,
1195 pub ocpm_object_count: usize,
1196 pub ocpm_case_count: usize,
1197 pub audit_engagement_count: usize,
1199 pub audit_workpaper_count: usize,
1200 pub audit_evidence_count: usize,
1201 pub audit_risk_count: usize,
1202 pub audit_finding_count: usize,
1203 pub audit_judgment_count: usize,
1204 #[serde(default)]
1206 pub audit_confirmation_count: usize,
1207 #[serde(default)]
1208 pub audit_confirmation_response_count: usize,
1209 #[serde(default)]
1211 pub audit_procedure_step_count: usize,
1212 #[serde(default)]
1213 pub audit_sample_count: usize,
1214 #[serde(default)]
1216 pub audit_analytical_result_count: usize,
1217 #[serde(default)]
1219 pub audit_ia_function_count: usize,
1220 #[serde(default)]
1221 pub audit_ia_report_count: usize,
1222 #[serde(default)]
1224 pub audit_related_party_count: usize,
1225 #[serde(default)]
1226 pub audit_related_party_transaction_count: usize,
1227 pub anomalies_injected: usize,
1229 pub data_quality_issues: usize,
1231 pub banking_customer_count: usize,
1233 pub banking_account_count: usize,
1234 pub banking_transaction_count: usize,
1235 pub banking_suspicious_count: usize,
1236 pub graph_export_count: usize,
1238 pub graph_node_count: usize,
1239 pub graph_edge_count: usize,
1240 #[serde(default)]
1242 pub llm_enrichment_ms: u64,
1243 #[serde(default)]
1245 pub llm_vendors_enriched: usize,
1246 #[serde(default)]
1248 pub diffusion_enhancement_ms: u64,
1249 #[serde(default)]
1251 pub diffusion_samples_generated: usize,
1252 #[serde(default)]
1254 pub causal_generation_ms: u64,
1255 #[serde(default)]
1257 pub causal_samples_generated: usize,
1258 #[serde(default)]
1260 pub causal_validation_passed: Option<bool>,
1261 #[serde(default)]
1263 pub sourcing_project_count: usize,
1264 #[serde(default)]
1265 pub rfx_event_count: usize,
1266 #[serde(default)]
1267 pub bid_count: usize,
1268 #[serde(default)]
1269 pub contract_count: usize,
1270 #[serde(default)]
1271 pub catalog_item_count: usize,
1272 #[serde(default)]
1273 pub scorecard_count: usize,
1274 #[serde(default)]
1276 pub financial_statement_count: usize,
1277 #[serde(default)]
1278 pub bank_reconciliation_count: usize,
1279 #[serde(default)]
1281 pub payroll_run_count: usize,
1282 #[serde(default)]
1283 pub time_entry_count: usize,
1284 #[serde(default)]
1285 pub expense_report_count: usize,
1286 #[serde(default)]
1287 pub benefit_enrollment_count: usize,
1288 #[serde(default)]
1289 pub pension_plan_count: usize,
1290 #[serde(default)]
1291 pub stock_grant_count: usize,
1292 #[serde(default)]
1294 pub revenue_contract_count: usize,
1295 #[serde(default)]
1296 pub impairment_test_count: usize,
1297 #[serde(default)]
1298 pub business_combination_count: usize,
1299 #[serde(default)]
1300 pub ecl_model_count: usize,
1301 #[serde(default)]
1302 pub provision_count: usize,
1303 #[serde(default)]
1305 pub production_order_count: usize,
1306 #[serde(default)]
1307 pub quality_inspection_count: usize,
1308 #[serde(default)]
1309 pub cycle_count_count: usize,
1310 #[serde(default)]
1311 pub bom_component_count: usize,
1312 #[serde(default)]
1313 pub inventory_movement_count: usize,
1314 #[serde(default)]
1316 pub sales_quote_count: usize,
1317 #[serde(default)]
1318 pub kpi_count: usize,
1319 #[serde(default)]
1320 pub budget_line_count: usize,
1321 #[serde(default)]
1323 pub tax_jurisdiction_count: usize,
1324 #[serde(default)]
1325 pub tax_code_count: usize,
1326 #[serde(default)]
1328 pub esg_emission_count: usize,
1329 #[serde(default)]
1330 pub esg_disclosure_count: usize,
1331 #[serde(default)]
1333 pub ic_matched_pair_count: usize,
1334 #[serde(default)]
1335 pub ic_elimination_count: usize,
1336 #[serde(default)]
1338 pub ic_transaction_count: usize,
1339 #[serde(default)]
1341 pub fa_subledger_count: usize,
1342 #[serde(default)]
1344 pub inventory_subledger_count: usize,
1345 #[serde(default)]
1347 pub treasury_debt_instrument_count: usize,
1348 #[serde(default)]
1350 pub treasury_hedging_instrument_count: usize,
1351 #[serde(default)]
1353 pub project_count: usize,
1354 #[serde(default)]
1356 pub project_change_order_count: usize,
1357 #[serde(default)]
1359 pub tax_provision_count: usize,
1360 #[serde(default)]
1362 pub opening_balance_count: usize,
1363 #[serde(default)]
1365 pub subledger_reconciliation_count: usize,
1366 #[serde(default)]
1368 pub tax_line_count: usize,
1369 #[serde(default)]
1371 pub project_cost_line_count: usize,
1372 #[serde(default)]
1374 pub cash_position_count: usize,
1375 #[serde(default)]
1377 pub cash_forecast_count: usize,
1378 #[serde(default)]
1380 pub cash_pool_count: usize,
1381 #[serde(default)]
1383 pub process_evolution_event_count: usize,
1384 #[serde(default)]
1386 pub organizational_event_count: usize,
1387 #[serde(default)]
1389 pub counterfactual_pair_count: usize,
1390 #[serde(default)]
1392 pub red_flag_count: usize,
1393 #[serde(default)]
1395 pub collusion_ring_count: usize,
1396 #[serde(default)]
1398 pub temporal_version_chain_count: usize,
1399 #[serde(default)]
1401 pub entity_relationship_node_count: usize,
1402 #[serde(default)]
1404 pub entity_relationship_edge_count: usize,
1405 #[serde(default)]
1407 pub cross_process_link_count: usize,
1408 #[serde(default)]
1410 pub disruption_event_count: usize,
1411 #[serde(default)]
1413 pub industry_gl_account_count: usize,
1414 #[serde(default)]
1416 pub period_close_je_count: usize,
1417}
1418
1419pub struct EnhancedOrchestrator {
1421 config: GeneratorConfig,
1422 phase_config: PhaseConfig,
1423 coa: Option<Arc<ChartOfAccounts>>,
1424 master_data: MasterDataSnapshot,
1425 seed: u64,
1426 multi_progress: Option<MultiProgress>,
1427 resource_guard: ResourceGuard,
1429 output_path: Option<PathBuf>,
1431 copula_generators: Vec<CopulaGeneratorSpec>,
1433 country_pack_registry: datasynth_core::CountryPackRegistry,
1435 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1437}
1438
1439impl EnhancedOrchestrator {
1440 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1442 datasynth_config::validate_config(&config)?;
1443
1444 let seed = config.global.seed.unwrap_or_else(rand::random);
1445
1446 let resource_guard = Self::build_resource_guard(&config, None);
1448
1449 let country_pack_registry = match &config.country_packs {
1451 Some(cp) => {
1452 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1453 .map_err(|e| SynthError::config(e.to_string()))?
1454 }
1455 None => datasynth_core::CountryPackRegistry::builtin_only()
1456 .map_err(|e| SynthError::config(e.to_string()))?,
1457 };
1458
1459 Ok(Self {
1460 config,
1461 phase_config,
1462 coa: None,
1463 master_data: MasterDataSnapshot::default(),
1464 seed,
1465 multi_progress: None,
1466 resource_guard,
1467 output_path: None,
1468 copula_generators: Vec::new(),
1469 country_pack_registry,
1470 phase_sink: None,
1471 })
1472 }
1473
1474 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1476 Self::new(config, PhaseConfig::default())
1477 }
1478
1479 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1481 self.phase_sink = Some(sink);
1482 self
1483 }
1484
1485 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1487 if let Some(ref sink) = self.phase_sink {
1488 for item in items {
1489 if let Ok(value) = serde_json::to_value(item) {
1490 if let Err(e) = sink.emit(phase, type_name, &value) {
1491 warn!(
1492 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1493 );
1494 }
1495 }
1496 }
1497 if let Err(e) = sink.phase_complete(phase) {
1498 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1499 }
1500 }
1501 }
1502
1503 pub fn with_progress(mut self, show: bool) -> Self {
1505 self.phase_config.show_progress = show;
1506 if show {
1507 self.multi_progress = Some(MultiProgress::new());
1508 }
1509 self
1510 }
1511
1512 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1514 let path = path.into();
1515 self.output_path = Some(path.clone());
1516 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1518 self
1519 }
1520
1521 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1523 &self.country_pack_registry
1524 }
1525
1526 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1528 self.country_pack_registry.get_by_str(country)
1529 }
1530
1531 fn primary_country_code(&self) -> &str {
1534 self.config
1535 .companies
1536 .first()
1537 .map(|c| c.country.as_str())
1538 .unwrap_or("US")
1539 }
1540
1541 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1543 self.country_pack_for(self.primary_country_code())
1544 }
1545
1546 fn resolve_coa_framework(&self) -> CoAFramework {
1548 if self.config.accounting_standards.enabled {
1549 match self.config.accounting_standards.framework {
1550 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1551 return CoAFramework::FrenchPcg;
1552 }
1553 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1554 return CoAFramework::GermanSkr04;
1555 }
1556 _ => {}
1557 }
1558 }
1559 let pack = self.primary_pack();
1561 match pack.accounting.framework.as_str() {
1562 "french_gaap" => CoAFramework::FrenchPcg,
1563 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1564 _ => CoAFramework::UsGaap,
1565 }
1566 }
1567
1568 pub fn has_copulas(&self) -> bool {
1573 !self.copula_generators.is_empty()
1574 }
1575
1576 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1582 &self.copula_generators
1583 }
1584
1585 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1589 &mut self.copula_generators
1590 }
1591
1592 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1596 self.copula_generators
1597 .iter_mut()
1598 .find(|c| c.name == copula_name)
1599 .map(|c| c.generator.sample())
1600 }
1601
1602 pub fn from_fingerprint(
1625 fingerprint_path: &std::path::Path,
1626 phase_config: PhaseConfig,
1627 scale: f64,
1628 ) -> SynthResult<Self> {
1629 info!("Loading fingerprint from: {}", fingerprint_path.display());
1630
1631 let reader = FingerprintReader::new();
1633 let fingerprint = reader
1634 .read_from_file(fingerprint_path)
1635 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1636
1637 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1638 }
1639
1640 pub fn from_fingerprint_data(
1647 fingerprint: Fingerprint,
1648 phase_config: PhaseConfig,
1649 scale: f64,
1650 ) -> SynthResult<Self> {
1651 info!(
1652 "Synthesizing config from fingerprint (version: {}, tables: {})",
1653 fingerprint.manifest.version,
1654 fingerprint.schema.tables.len()
1655 );
1656
1657 let seed: u64 = rand::random();
1659 info!("Fingerprint synthesis seed: {}", seed);
1660
1661 let options = SynthesisOptions {
1663 scale,
1664 seed: Some(seed),
1665 preserve_correlations: true,
1666 inject_anomalies: true,
1667 };
1668 let synthesizer = ConfigSynthesizer::with_options(options);
1669
1670 let synthesis_result = synthesizer
1672 .synthesize_full(&fingerprint, seed)
1673 .map_err(|e| {
1674 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1675 })?;
1676
1677 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1679 Self::base_config_for_industry(industry)
1680 } else {
1681 Self::base_config_for_industry("manufacturing")
1682 };
1683
1684 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1686
1687 info!(
1689 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1690 fingerprint.schema.tables.len(),
1691 scale,
1692 synthesis_result.copula_generators.len()
1693 );
1694
1695 if !synthesis_result.copula_generators.is_empty() {
1696 for spec in &synthesis_result.copula_generators {
1697 info!(
1698 " Copula '{}' for table '{}': {} columns",
1699 spec.name,
1700 spec.table,
1701 spec.columns.len()
1702 );
1703 }
1704 }
1705
1706 let mut orchestrator = Self::new(config, phase_config)?;
1708
1709 orchestrator.copula_generators = synthesis_result.copula_generators;
1711
1712 Ok(orchestrator)
1713 }
1714
1715 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1717 use datasynth_config::presets::create_preset;
1718 use datasynth_config::TransactionVolume;
1719 use datasynth_core::models::{CoAComplexity, IndustrySector};
1720
1721 let sector = match industry.to_lowercase().as_str() {
1722 "manufacturing" => IndustrySector::Manufacturing,
1723 "retail" => IndustrySector::Retail,
1724 "financial" | "financial_services" => IndustrySector::FinancialServices,
1725 "healthcare" => IndustrySector::Healthcare,
1726 "technology" | "tech" => IndustrySector::Technology,
1727 _ => IndustrySector::Manufacturing,
1728 };
1729
1730 create_preset(
1732 sector,
1733 1, 12, CoAComplexity::Medium,
1736 TransactionVolume::TenK,
1737 )
1738 }
1739
1740 fn apply_config_patch(
1742 mut config: GeneratorConfig,
1743 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1744 ) -> GeneratorConfig {
1745 use datasynth_fingerprint::synthesis::ConfigValue;
1746
1747 for (key, value) in patch.values() {
1748 match (key.as_str(), value) {
1749 ("transactions.count", ConfigValue::Integer(n)) => {
1752 info!(
1753 "Fingerprint suggests {} transactions (apply via company volumes)",
1754 n
1755 );
1756 }
1757 ("global.period_months", ConfigValue::Integer(n)) => {
1758 config.global.period_months = (*n).clamp(1, 120) as u32;
1759 }
1760 ("global.start_date", ConfigValue::String(s)) => {
1761 config.global.start_date = s.clone();
1762 }
1763 ("global.seed", ConfigValue::Integer(n)) => {
1764 config.global.seed = Some(*n as u64);
1765 }
1766 ("fraud.enabled", ConfigValue::Bool(b)) => {
1767 config.fraud.enabled = *b;
1768 }
1769 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1770 config.fraud.fraud_rate = *f;
1771 }
1772 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1773 config.data_quality.enabled = *b;
1774 }
1775 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1777 config.fraud.enabled = *b;
1778 }
1779 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1780 config.fraud.fraud_rate = *f;
1781 }
1782 _ => {
1783 debug!("Ignoring unknown config patch key: {}", key);
1784 }
1785 }
1786 }
1787
1788 config
1789 }
1790
1791 fn build_resource_guard(
1793 config: &GeneratorConfig,
1794 output_path: Option<PathBuf>,
1795 ) -> ResourceGuard {
1796 let mut builder = ResourceGuardBuilder::new();
1797
1798 if config.global.memory_limit_mb > 0 {
1800 builder = builder.memory_limit(config.global.memory_limit_mb);
1801 }
1802
1803 if let Some(path) = output_path {
1805 builder = builder.output_path(path).min_free_disk(100); }
1807
1808 builder = builder.conservative();
1810
1811 builder.build()
1812 }
1813
1814 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1819 self.resource_guard.check()
1820 }
1821
1822 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1824 let level = self.resource_guard.check()?;
1825
1826 if level != DegradationLevel::Normal {
1827 warn!(
1828 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1829 phase,
1830 level,
1831 self.resource_guard.current_memory_mb(),
1832 self.resource_guard.available_disk_mb()
1833 );
1834 }
1835
1836 Ok(level)
1837 }
1838
1839 fn get_degradation_actions(&self) -> DegradationActions {
1841 self.resource_guard.get_actions()
1842 }
1843
1844 fn check_memory_limit(&self) -> SynthResult<()> {
1846 self.check_resources()?;
1847 Ok(())
1848 }
1849
1850 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1852 info!("Starting enhanced generation workflow");
1853 info!(
1854 "Config: industry={:?}, period_months={}, companies={}",
1855 self.config.global.industry,
1856 self.config.global.period_months,
1857 self.config.companies.len()
1858 );
1859
1860 let initial_level = self.check_resources_with_log("initial")?;
1862 if initial_level == DegradationLevel::Emergency {
1863 return Err(SynthError::resource(
1864 "Insufficient resources to start generation",
1865 ));
1866 }
1867
1868 let mut stats = EnhancedGenerationStatistics {
1869 companies_count: self.config.companies.len(),
1870 period_months: self.config.global.period_months,
1871 ..Default::default()
1872 };
1873
1874 let coa = self.phase_chart_of_accounts(&mut stats)?;
1876
1877 self.phase_master_data(&mut stats)?;
1879
1880 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1882 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1883 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1884
1885 let (mut document_flows, mut subledger, fa_journal_entries) =
1887 self.phase_document_flows(&mut stats)?;
1888
1889 self.emit_phase_items(
1891 "document_flows",
1892 "PurchaseOrder",
1893 &document_flows.purchase_orders,
1894 );
1895 self.emit_phase_items(
1896 "document_flows",
1897 "GoodsReceipt",
1898 &document_flows.goods_receipts,
1899 );
1900 self.emit_phase_items(
1901 "document_flows",
1902 "VendorInvoice",
1903 &document_flows.vendor_invoices,
1904 );
1905 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1906 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1907
1908 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1910
1911 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1916 .iter()
1917 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1918 .collect();
1919 if !opening_balance_jes.is_empty() {
1920 debug!(
1921 "Prepending {} opening balance JEs to entries",
1922 opening_balance_jes.len()
1923 );
1924 }
1925
1926 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1928
1929 if !opening_balance_jes.is_empty() {
1932 let mut combined = opening_balance_jes;
1933 combined.extend(entries);
1934 entries = combined;
1935 }
1936
1937 if !fa_journal_entries.is_empty() {
1939 debug!(
1940 "Appending {} FA acquisition JEs to main entries",
1941 fa_journal_entries.len()
1942 );
1943 entries.extend(fa_journal_entries);
1944 }
1945
1946 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1948
1949 let actions = self.get_degradation_actions();
1951
1952 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1954
1955 if !sourcing.contracts.is_empty() {
1958 let mut linked_count = 0usize;
1959 let po_vendor_pairs: Vec<(String, String)> = document_flows
1961 .p2p_chains
1962 .iter()
1963 .map(|chain| {
1964 (
1965 chain.purchase_order.vendor_id.clone(),
1966 chain.purchase_order.header.document_id.clone(),
1967 )
1968 })
1969 .collect();
1970
1971 for chain in &mut document_flows.p2p_chains {
1972 if chain.purchase_order.contract_id.is_none() {
1973 if let Some(contract) = sourcing
1974 .contracts
1975 .iter()
1976 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1977 {
1978 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1979 linked_count += 1;
1980 }
1981 }
1982 }
1983
1984 for contract in &mut sourcing.contracts {
1986 let po_ids: Vec<String> = po_vendor_pairs
1987 .iter()
1988 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
1989 .map(|(_, po_id)| po_id.clone())
1990 .collect();
1991 if !po_ids.is_empty() {
1992 contract.purchase_order_ids = po_ids;
1993 }
1994 }
1995
1996 if linked_count > 0 {
1997 debug!(
1998 "Linked {} purchase orders to S2C contracts by vendor match",
1999 linked_count
2000 );
2001 }
2002 }
2003
2004 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2006
2007 if !intercompany.seller_journal_entries.is_empty()
2009 || !intercompany.buyer_journal_entries.is_empty()
2010 {
2011 let ic_je_count = intercompany.seller_journal_entries.len()
2012 + intercompany.buyer_journal_entries.len();
2013 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2014 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2015 debug!(
2016 "Appended {} IC journal entries to main entries",
2017 ic_je_count
2018 );
2019 }
2020
2021 if !intercompany.elimination_entries.is_empty() {
2023 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2024 &intercompany.elimination_entries,
2025 );
2026 if !elim_jes.is_empty() {
2027 debug!(
2028 "Appended {} elimination journal entries to main entries",
2029 elim_jes.len()
2030 );
2031 let elim_debit: rust_decimal::Decimal =
2033 elim_jes.iter().map(|je| je.total_debit()).sum();
2034 let elim_credit: rust_decimal::Decimal =
2035 elim_jes.iter().map(|je| je.total_credit()).sum();
2036 if elim_debit != elim_credit {
2037 warn!(
2038 "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2039 elim_debit,
2040 elim_credit,
2041 elim_debit - elim_credit
2042 );
2043 }
2044 entries.extend(elim_jes);
2045 }
2046 }
2047
2048 let hr = self.phase_hr_data(&mut stats)?;
2050
2051 if !hr.payroll_runs.is_empty() {
2053 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2054 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2055 entries.extend(payroll_jes);
2056 }
2057
2058 if !hr.pension_journal_entries.is_empty() {
2060 debug!(
2061 "Generated {} JEs from pension plans",
2062 hr.pension_journal_entries.len()
2063 );
2064 entries.extend(hr.pension_journal_entries.iter().cloned());
2065 }
2066
2067 if !hr.stock_comp_journal_entries.is_empty() {
2069 debug!(
2070 "Generated {} JEs from stock-based compensation",
2071 hr.stock_comp_journal_entries.len()
2072 );
2073 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2074 }
2075
2076 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2078
2079 if !manufacturing_snap.production_orders.is_empty() {
2081 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
2082 debug!("Generated {} JEs from production orders", mfg_jes.len());
2083 entries.extend(mfg_jes);
2084 }
2085
2086 if !manufacturing_snap.inventory_movements.is_empty()
2092 && !subledger.inventory_positions.is_empty()
2093 {
2094 use datasynth_core::models::MovementType as MfgMovementType;
2095 let mut receipt_count = 0usize;
2096 let mut issue_count = 0usize;
2097 for movement in &manufacturing_snap.inventory_movements {
2098 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2100 p.material_id == movement.material_code
2101 && p.company_code == movement.entity_code
2102 }) {
2103 match movement.movement_type {
2104 MfgMovementType::GoodsReceipt => {
2105 pos.add_quantity(
2107 movement.quantity,
2108 movement.value,
2109 movement.movement_date,
2110 );
2111 receipt_count += 1;
2112 }
2113 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2114 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2116 issue_count += 1;
2117 }
2118 _ => {}
2119 }
2120 }
2121 }
2122 debug!(
2123 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2124 manufacturing_snap.inventory_movements.len(),
2125 receipt_count,
2126 issue_count,
2127 );
2128 }
2129
2130 if !entries.is_empty() {
2133 stats.total_entries = entries.len() as u64;
2134 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2135 debug!(
2136 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2137 stats.total_entries, stats.total_line_items
2138 );
2139 }
2140
2141 if self.config.internal_controls.enabled && !entries.is_empty() {
2143 info!("Phase 7b: Applying internal controls to journal entries");
2144 let control_config = ControlGeneratorConfig {
2145 exception_rate: self.config.internal_controls.exception_rate,
2146 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2147 enable_sox_marking: true,
2148 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2149 self.config.internal_controls.sox_materiality_threshold,
2150 )
2151 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2152 ..Default::default()
2153 };
2154 let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
2155 for entry in &mut entries {
2156 control_gen.apply_controls(entry, &coa);
2157 }
2158 let with_controls = entries
2159 .iter()
2160 .filter(|e| !e.header.control_ids.is_empty())
2161 .count();
2162 info!(
2163 "Applied controls to {} entries ({} with control IDs assigned)",
2164 entries.len(),
2165 with_controls
2166 );
2167 }
2168
2169 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2173 .iter()
2174 .filter(|e| e.header.sod_violation)
2175 .filter_map(|e| {
2176 e.header.sod_conflict_type.map(|ct| {
2177 use datasynth_core::models::{RiskLevel, SodViolation};
2178 let severity = match ct {
2179 datasynth_core::models::SodConflictType::PaymentReleaser
2180 | datasynth_core::models::SodConflictType::RequesterApprover => {
2181 RiskLevel::Critical
2182 }
2183 datasynth_core::models::SodConflictType::PreparerApprover
2184 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2185 | datasynth_core::models::SodConflictType::JournalEntryPoster
2186 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2187 RiskLevel::High
2188 }
2189 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2190 RiskLevel::Medium
2191 }
2192 };
2193 let action = format!(
2194 "SoD conflict {:?} on entry {} ({})",
2195 ct, e.header.document_id, e.header.company_code
2196 );
2197 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2198 })
2199 })
2200 .collect();
2201 if !sod_violations.is_empty() {
2202 info!(
2203 "Phase 7c: Extracted {} SoD violations from {} entries",
2204 sod_violations.len(),
2205 entries.len()
2206 );
2207 }
2208
2209 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2211
2212 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2214
2215 self.emit_phase_items(
2217 "anomaly_injection",
2218 "LabeledAnomaly",
2219 &anomaly_labels.labels,
2220 );
2221
2222 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2224
2225 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2227
2228 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2230
2231 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2233
2234 let balance_validation = self.phase_balance_validation(&entries)?;
2236
2237 let subledger_reconciliation =
2239 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2240
2241 let (data_quality_stats, quality_issues) =
2243 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2244
2245 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2247
2248 let audit = self.phase_audit_data(&entries, &mut stats)?;
2250
2251 let banking = self.phase_banking_data(&mut stats)?;
2253
2254 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2256
2257 self.phase_llm_enrichment(&mut stats);
2259
2260 self.phase_diffusion_enhancement(&mut stats);
2262
2263 self.phase_causal_overlay(&mut stats);
2265
2266 let mut financial_reporting = self.phase_financial_reporting(
2270 &document_flows,
2271 &entries,
2272 &coa,
2273 &hr,
2274 &audit,
2275 &mut stats,
2276 )?;
2277
2278 {
2280 use datasynth_core::models::StatementType;
2281 for stmt in &financial_reporting.consolidated_statements {
2282 if stmt.statement_type == StatementType::BalanceSheet {
2283 let total_assets: rust_decimal::Decimal = stmt
2284 .line_items
2285 .iter()
2286 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2287 .map(|li| li.amount)
2288 .sum();
2289 let total_le: rust_decimal::Decimal = stmt
2290 .line_items
2291 .iter()
2292 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2293 .map(|li| li.amount)
2294 .sum();
2295 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2296 warn!(
2297 "BS equation imbalance: assets={}, L+E={}",
2298 total_assets, total_le
2299 );
2300 }
2301 }
2302 }
2303 }
2304
2305 let accounting_standards =
2307 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2308
2309 if !accounting_standards.ecl_journal_entries.is_empty() {
2311 debug!(
2312 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2313 accounting_standards.ecl_journal_entries.len()
2314 );
2315 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2316 }
2317
2318 if !accounting_standards.provision_journal_entries.is_empty() {
2320 debug!(
2321 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2322 accounting_standards.provision_journal_entries.len()
2323 );
2324 entries.extend(
2325 accounting_standards
2326 .provision_journal_entries
2327 .iter()
2328 .cloned(),
2329 );
2330 }
2331
2332 let ocpm = self.phase_ocpm_events(
2334 &document_flows,
2335 &sourcing,
2336 &hr,
2337 &manufacturing_snap,
2338 &banking,
2339 &audit,
2340 &financial_reporting,
2341 &mut stats,
2342 )?;
2343
2344 if let Some(ref event_log) = ocpm.event_log {
2346 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2347 }
2348
2349 let sales_kpi_budgets =
2351 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2352
2353 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2355
2356 self.generate_notes_to_financial_statements(
2359 &mut financial_reporting,
2360 &accounting_standards,
2361 &tax,
2362 &hr,
2363 &audit,
2364 );
2365
2366 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
2368
2369 let treasury =
2371 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2372
2373 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2375
2376 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2378
2379 let disruption_events = self.phase_disruption_events(&mut stats)?;
2381
2382 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2384
2385 let (entity_relationship_graph, cross_process_links) =
2387 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2388
2389 let industry_output = self.phase_industry_data(&mut stats);
2391
2392 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2394
2395 self.phase_hypergraph_export(
2397 &coa,
2398 &entries,
2399 &document_flows,
2400 &sourcing,
2401 &hr,
2402 &manufacturing_snap,
2403 &banking,
2404 &audit,
2405 &financial_reporting,
2406 &ocpm,
2407 &compliance_regulations,
2408 &mut stats,
2409 )?;
2410
2411 if self.phase_config.generate_graph_export {
2414 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2415 }
2416
2417 if self.config.streaming.enabled {
2419 info!("Note: streaming config is enabled but batch mode does not use it");
2420 }
2421 if self.config.vendor_network.enabled {
2422 debug!("Vendor network config available; relationship graph generation is partial");
2423 }
2424 if self.config.customer_segmentation.enabled {
2425 debug!("Customer segmentation config available; segment-aware generation is partial");
2426 }
2427
2428 let resource_stats = self.resource_guard.stats();
2430 info!(
2431 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2432 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2433 resource_stats.disk.estimated_bytes_written,
2434 resource_stats.degradation_level
2435 );
2436
2437 if let Some(ref sink) = self.phase_sink {
2439 if let Err(e) = sink.flush() {
2440 warn!("Stream sink flush failed: {e}");
2441 }
2442 }
2443
2444 let lineage = self.build_lineage_graph();
2446
2447 let gate_result = if self.config.quality_gates.enabled {
2449 let profile_name = &self.config.quality_gates.profile;
2450 match datasynth_eval::gates::get_profile(profile_name) {
2451 Some(profile) => {
2452 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2454
2455 if balance_validation.validated {
2457 eval.coherence.balance =
2458 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2459 equation_balanced: balance_validation.is_balanced,
2460 max_imbalance: (balance_validation.total_debits
2461 - balance_validation.total_credits)
2462 .abs(),
2463 periods_evaluated: 1,
2464 periods_imbalanced: if balance_validation.is_balanced {
2465 0
2466 } else {
2467 1
2468 },
2469 period_results: Vec::new(),
2470 companies_evaluated: self.config.companies.len(),
2471 });
2472 }
2473
2474 eval.coherence.passes = balance_validation.is_balanced;
2476 if !balance_validation.is_balanced {
2477 eval.coherence
2478 .failures
2479 .push("Balance sheet equation not satisfied".to_string());
2480 }
2481
2482 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2484 eval.statistical.passes = !entries.is_empty();
2485
2486 eval.quality.overall_score = 0.9; eval.quality.passes = true;
2489
2490 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2491 info!(
2492 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2493 profile_name, result.gates_passed, result.gates_total, result.summary
2494 );
2495 Some(result)
2496 }
2497 None => {
2498 warn!(
2499 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2500 profile_name
2501 );
2502 None
2503 }
2504 }
2505 } else {
2506 None
2507 };
2508
2509 let internal_controls = if self.config.internal_controls.enabled {
2511 InternalControl::standard_controls()
2512 } else {
2513 Vec::new()
2514 };
2515
2516 Ok(EnhancedGenerationResult {
2517 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2518 master_data: std::mem::take(&mut self.master_data),
2519 document_flows,
2520 subledger,
2521 ocpm,
2522 audit,
2523 banking,
2524 graph_export,
2525 sourcing,
2526 financial_reporting,
2527 hr,
2528 accounting_standards,
2529 manufacturing: manufacturing_snap,
2530 sales_kpi_budgets,
2531 tax,
2532 esg: esg_snap,
2533 treasury,
2534 project_accounting,
2535 process_evolution,
2536 organizational_events,
2537 disruption_events,
2538 intercompany,
2539 journal_entries: entries,
2540 anomaly_labels,
2541 balance_validation,
2542 data_quality_stats,
2543 quality_issues,
2544 statistics: stats,
2545 lineage: Some(lineage),
2546 gate_result,
2547 internal_controls,
2548 sod_violations,
2549 opening_balances,
2550 subledger_reconciliation,
2551 counterfactual_pairs,
2552 red_flags,
2553 collusion_rings,
2554 temporal_vendor_chains,
2555 entity_relationship_graph,
2556 cross_process_links,
2557 industry_output,
2558 compliance_regulations,
2559 })
2560 }
2561
2562 fn phase_chart_of_accounts(
2568 &mut self,
2569 stats: &mut EnhancedGenerationStatistics,
2570 ) -> SynthResult<Arc<ChartOfAccounts>> {
2571 info!("Phase 1: Generating Chart of Accounts");
2572 let coa = self.generate_coa()?;
2573 stats.accounts_count = coa.account_count();
2574 info!(
2575 "Chart of Accounts generated: {} accounts",
2576 stats.accounts_count
2577 );
2578 self.check_resources_with_log("post-coa")?;
2579 Ok(coa)
2580 }
2581
2582 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2584 if self.phase_config.generate_master_data {
2585 info!("Phase 2: Generating Master Data");
2586 self.generate_master_data()?;
2587 stats.vendor_count = self.master_data.vendors.len();
2588 stats.customer_count = self.master_data.customers.len();
2589 stats.material_count = self.master_data.materials.len();
2590 stats.asset_count = self.master_data.assets.len();
2591 stats.employee_count = self.master_data.employees.len();
2592 info!(
2593 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2594 stats.vendor_count, stats.customer_count, stats.material_count,
2595 stats.asset_count, stats.employee_count
2596 );
2597 self.check_resources_with_log("post-master-data")?;
2598 } else {
2599 debug!("Phase 2: Skipped (master data generation disabled)");
2600 }
2601 Ok(())
2602 }
2603
2604 fn phase_document_flows(
2606 &mut self,
2607 stats: &mut EnhancedGenerationStatistics,
2608 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2609 let mut document_flows = DocumentFlowSnapshot::default();
2610 let mut subledger = SubledgerSnapshot::default();
2611 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
2614
2615 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2616 info!("Phase 3: Generating Document Flows");
2617 self.generate_document_flows(&mut document_flows)?;
2618 stats.p2p_chain_count = document_flows.p2p_chains.len();
2619 stats.o2c_chain_count = document_flows.o2c_chains.len();
2620 info!(
2621 "Document flows generated: {} P2P chains, {} O2C chains",
2622 stats.p2p_chain_count, stats.o2c_chain_count
2623 );
2624
2625 debug!("Phase 3b: Linking document flows to subledgers");
2627 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2628 stats.ap_invoice_count = subledger.ap_invoices.len();
2629 stats.ar_invoice_count = subledger.ar_invoices.len();
2630 debug!(
2631 "Subledgers linked: {} AP invoices, {} AR invoices",
2632 stats.ap_invoice_count, stats.ar_invoice_count
2633 );
2634
2635 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2640 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2641 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2642 debug!("Payment settlements applied to AP and AR subledgers");
2643
2644 if let Ok(start_date) =
2647 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2648 {
2649 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2650 - chrono::Days::new(1);
2651 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2652 for company in &self.config.companies {
2659 let ar_report = ARAgingReport::from_invoices(
2660 company.code.clone(),
2661 &subledger.ar_invoices,
2662 as_of_date,
2663 );
2664 subledger.ar_aging_reports.push(ar_report);
2665
2666 let ap_report = APAgingReport::from_invoices(
2667 company.code.clone(),
2668 &subledger.ap_invoices,
2669 as_of_date,
2670 );
2671 subledger.ap_aging_reports.push(ap_report);
2672 }
2673 debug!(
2674 "AR/AP aging reports built: {} AR, {} AP",
2675 subledger.ar_aging_reports.len(),
2676 subledger.ap_aging_reports.len()
2677 );
2678
2679 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
2681 {
2682 use datasynth_generators::DunningGenerator;
2683 let mut dunning_gen = DunningGenerator::new(self.seed + 2000);
2684 for company in &self.config.companies {
2685 let currency = company.currency.as_str();
2686 let mut company_invoices: Vec<
2689 datasynth_core::models::subledger::ar::ARInvoice,
2690 > = subledger
2691 .ar_invoices
2692 .iter()
2693 .filter(|inv| inv.company_code == company.code)
2694 .cloned()
2695 .collect();
2696
2697 if company_invoices.is_empty() {
2698 continue;
2699 }
2700
2701 let result = dunning_gen.execute_dunning_run(
2702 &company.code,
2703 as_of_date,
2704 &mut company_invoices,
2705 currency,
2706 );
2707
2708 for updated in &company_invoices {
2710 if let Some(orig) = subledger
2711 .ar_invoices
2712 .iter_mut()
2713 .find(|i| i.invoice_number == updated.invoice_number)
2714 {
2715 orig.dunning_info = updated.dunning_info.clone();
2716 }
2717 }
2718
2719 subledger.dunning_runs.push(result.dunning_run);
2720 subledger.dunning_letters.extend(result.letters);
2721 dunning_journal_entries.extend(result.journal_entries);
2723 }
2724 debug!(
2725 "Dunning runs complete: {} runs, {} letters",
2726 subledger.dunning_runs.len(),
2727 subledger.dunning_letters.len()
2728 );
2729 }
2730 }
2731
2732 self.check_resources_with_log("post-document-flows")?;
2733 } else {
2734 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2735 }
2736
2737 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
2739 if !self.master_data.assets.is_empty() {
2740 debug!("Generating FA subledger records");
2741 let company_code = self
2742 .config
2743 .companies
2744 .first()
2745 .map(|c| c.code.as_str())
2746 .unwrap_or("1000");
2747 let currency = self
2748 .config
2749 .companies
2750 .first()
2751 .map(|c| c.currency.as_str())
2752 .unwrap_or("USD");
2753
2754 let mut fa_gen = datasynth_generators::FAGenerator::new(
2755 datasynth_generators::FAGeneratorConfig::default(),
2756 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2757 );
2758
2759 for asset in &self.master_data.assets {
2760 let (record, je) = fa_gen.generate_asset_acquisition(
2761 company_code,
2762 &format!("{:?}", asset.asset_class),
2763 &asset.description,
2764 asset.acquisition_date,
2765 currency,
2766 asset.cost_center.as_deref(),
2767 );
2768 subledger.fa_records.push(record);
2769 fa_journal_entries.push(je);
2770 }
2771
2772 stats.fa_subledger_count = subledger.fa_records.len();
2773 debug!(
2774 "FA subledger records generated: {} (with {} acquisition JEs)",
2775 stats.fa_subledger_count,
2776 fa_journal_entries.len()
2777 );
2778 }
2779
2780 if !self.master_data.materials.is_empty() {
2782 debug!("Generating Inventory subledger records");
2783 let first_company = self.config.companies.first();
2784 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2785 let inv_currency = first_company
2786 .map(|c| c.currency.clone())
2787 .unwrap_or_else(|| "USD".to_string());
2788
2789 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2790 datasynth_generators::InventoryGeneratorConfig::default(),
2791 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2792 inv_currency.clone(),
2793 );
2794
2795 for (i, material) in self.master_data.materials.iter().enumerate() {
2796 let plant = format!("PLANT{:02}", (i % 3) + 1);
2797 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2798 let initial_qty = rust_decimal::Decimal::from(
2799 material
2800 .safety_stock
2801 .to_string()
2802 .parse::<i64>()
2803 .unwrap_or(100),
2804 );
2805
2806 let position = inv_gen.generate_position(
2807 company_code,
2808 &plant,
2809 &storage_loc,
2810 &material.material_id,
2811 &material.description,
2812 initial_qty,
2813 Some(material.standard_cost),
2814 &inv_currency,
2815 );
2816 subledger.inventory_positions.push(position);
2817 }
2818
2819 stats.inventory_subledger_count = subledger.inventory_positions.len();
2820 debug!(
2821 "Inventory subledger records generated: {}",
2822 stats.inventory_subledger_count
2823 );
2824 }
2825
2826 if !subledger.fa_records.is_empty() {
2828 if let Ok(start_date) =
2829 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2830 {
2831 let company_code = self
2832 .config
2833 .companies
2834 .first()
2835 .map(|c| c.code.as_str())
2836 .unwrap_or("1000");
2837 let fiscal_year = start_date.year();
2838 let start_period = start_date.month();
2839 let end_period =
2840 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
2841
2842 let depr_cfg = FaDepreciationScheduleConfig {
2843 fiscal_year,
2844 start_period,
2845 end_period,
2846 seed_offset: 800,
2847 };
2848 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
2849 let runs = depr_gen.generate(company_code, &subledger.fa_records);
2850 let run_count = runs.len();
2851 subledger.depreciation_runs = runs;
2852 debug!(
2853 "Depreciation runs generated: {} runs for {} periods",
2854 run_count, self.config.global.period_months
2855 );
2856 }
2857 }
2858
2859 if !subledger.inventory_positions.is_empty() {
2861 if let Ok(start_date) =
2862 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2863 {
2864 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2865 - chrono::Days::new(1);
2866
2867 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
2868 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
2869
2870 for company in &self.config.companies {
2871 let result = inv_val_gen.generate(
2872 &company.code,
2873 &subledger.inventory_positions,
2874 as_of_date,
2875 );
2876 subledger.inventory_valuations.push(result);
2877 }
2878 debug!(
2879 "Inventory valuations generated: {} company reports",
2880 subledger.inventory_valuations.len()
2881 );
2882 }
2883 }
2884
2885 Ok((document_flows, subledger, fa_journal_entries))
2886 }
2887
2888 #[allow(clippy::too_many_arguments)]
2890 fn phase_ocpm_events(
2891 &mut self,
2892 document_flows: &DocumentFlowSnapshot,
2893 sourcing: &SourcingSnapshot,
2894 hr: &HrSnapshot,
2895 manufacturing: &ManufacturingSnapshot,
2896 banking: &BankingSnapshot,
2897 audit: &AuditSnapshot,
2898 financial_reporting: &FinancialReportingSnapshot,
2899 stats: &mut EnhancedGenerationStatistics,
2900 ) -> SynthResult<OcpmSnapshot> {
2901 let degradation = self.check_resources()?;
2902 if degradation >= DegradationLevel::Reduced {
2903 debug!(
2904 "Phase skipped due to resource pressure (degradation: {:?})",
2905 degradation
2906 );
2907 return Ok(OcpmSnapshot::default());
2908 }
2909 if self.phase_config.generate_ocpm_events {
2910 info!("Phase 3c: Generating OCPM Events");
2911 let ocpm_snapshot = self.generate_ocpm_events(
2912 document_flows,
2913 sourcing,
2914 hr,
2915 manufacturing,
2916 banking,
2917 audit,
2918 financial_reporting,
2919 )?;
2920 stats.ocpm_event_count = ocpm_snapshot.event_count;
2921 stats.ocpm_object_count = ocpm_snapshot.object_count;
2922 stats.ocpm_case_count = ocpm_snapshot.case_count;
2923 info!(
2924 "OCPM events generated: {} events, {} objects, {} cases",
2925 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2926 );
2927 self.check_resources_with_log("post-ocpm")?;
2928 Ok(ocpm_snapshot)
2929 } else {
2930 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2931 Ok(OcpmSnapshot::default())
2932 }
2933 }
2934
2935 fn phase_journal_entries(
2937 &mut self,
2938 coa: &Arc<ChartOfAccounts>,
2939 document_flows: &DocumentFlowSnapshot,
2940 _stats: &mut EnhancedGenerationStatistics,
2941 ) -> SynthResult<Vec<JournalEntry>> {
2942 let mut entries = Vec::new();
2943
2944 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2946 debug!("Phase 4a: Generating JEs from document flows");
2947 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2948 debug!("Generated {} JEs from document flows", flow_entries.len());
2949 entries.extend(flow_entries);
2950 }
2951
2952 if self.phase_config.generate_journal_entries {
2954 info!("Phase 4: Generating Journal Entries");
2955 let je_entries = self.generate_journal_entries(coa)?;
2956 info!("Generated {} standalone journal entries", je_entries.len());
2957 entries.extend(je_entries);
2958 } else {
2959 debug!("Phase 4: Skipped (journal entry generation disabled)");
2960 }
2961
2962 if !entries.is_empty() {
2963 self.check_resources_with_log("post-journal-entries")?;
2966 }
2967
2968 Ok(entries)
2969 }
2970
2971 fn phase_anomaly_injection(
2973 &mut self,
2974 entries: &mut [JournalEntry],
2975 actions: &DegradationActions,
2976 stats: &mut EnhancedGenerationStatistics,
2977 ) -> SynthResult<AnomalyLabels> {
2978 if self.phase_config.inject_anomalies
2979 && !entries.is_empty()
2980 && !actions.skip_anomaly_injection
2981 {
2982 info!("Phase 5: Injecting Anomalies");
2983 let result = self.inject_anomalies(entries)?;
2984 stats.anomalies_injected = result.labels.len();
2985 info!("Injected {} anomalies", stats.anomalies_injected);
2986 self.check_resources_with_log("post-anomaly-injection")?;
2987 Ok(result)
2988 } else if actions.skip_anomaly_injection {
2989 warn!("Phase 5: Skipped due to resource degradation");
2990 Ok(AnomalyLabels::default())
2991 } else {
2992 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2993 Ok(AnomalyLabels::default())
2994 }
2995 }
2996
2997 fn phase_balance_validation(
2999 &mut self,
3000 entries: &[JournalEntry],
3001 ) -> SynthResult<BalanceValidationResult> {
3002 if self.phase_config.validate_balances && !entries.is_empty() {
3003 debug!("Phase 6: Validating Balances");
3004 let balance_validation = self.validate_journal_entries(entries)?;
3005 if balance_validation.is_balanced {
3006 debug!("Balance validation passed");
3007 } else {
3008 warn!(
3009 "Balance validation found {} errors",
3010 balance_validation.validation_errors.len()
3011 );
3012 }
3013 Ok(balance_validation)
3014 } else {
3015 Ok(BalanceValidationResult::default())
3016 }
3017 }
3018
3019 fn phase_data_quality_injection(
3021 &mut self,
3022 entries: &mut [JournalEntry],
3023 actions: &DegradationActions,
3024 stats: &mut EnhancedGenerationStatistics,
3025 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3026 if self.phase_config.inject_data_quality
3027 && !entries.is_empty()
3028 && !actions.skip_data_quality
3029 {
3030 info!("Phase 7: Injecting Data Quality Variations");
3031 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3032 stats.data_quality_issues = dq_stats.records_with_issues;
3033 info!("Injected {} data quality issues", stats.data_quality_issues);
3034 self.check_resources_with_log("post-data-quality")?;
3035 Ok((dq_stats, quality_issues))
3036 } else if actions.skip_data_quality {
3037 warn!("Phase 7: Skipped due to resource degradation");
3038 Ok((DataQualityStats::default(), Vec::new()))
3039 } else {
3040 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3041 Ok((DataQualityStats::default(), Vec::new()))
3042 }
3043 }
3044
3045 fn phase_period_close(
3055 &mut self,
3056 entries: &mut Vec<JournalEntry>,
3057 subledger: &SubledgerSnapshot,
3058 stats: &mut EnhancedGenerationStatistics,
3059 ) -> SynthResult<()> {
3060 if !self.phase_config.generate_period_close || entries.is_empty() {
3061 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3062 return Ok(());
3063 }
3064
3065 info!("Phase 10b: Generating period-close journal entries");
3066
3067 use datasynth_core::accounts::{
3068 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3069 };
3070 use rust_decimal::Decimal;
3071
3072 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3073 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3074 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3075 let close_date = end_date - chrono::Days::new(1);
3077
3078 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3083 .config
3084 .companies
3085 .iter()
3086 .map(|c| c.code.clone())
3087 .collect();
3088
3089 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3091 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3092
3093 let period_months = self.config.global.period_months;
3097 for asset in &subledger.fa_records {
3098 use datasynth_core::models::subledger::fa::AssetStatus;
3100 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3101 continue;
3102 }
3103 let useful_life_months = asset.useful_life_months();
3104 if useful_life_months == 0 {
3105 continue;
3107 }
3108 let salvage_value = asset.salvage_value();
3109 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3110 if depreciable_base == Decimal::ZERO {
3111 continue;
3112 }
3113 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3114 * Decimal::from(period_months))
3115 .round_dp(2);
3116 if period_depr <= Decimal::ZERO {
3117 continue;
3118 }
3119
3120 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3121 depr_header.document_type = "CL".to_string();
3122 depr_header.header_text = Some(format!(
3123 "Depreciation - {} {}",
3124 asset.asset_number, asset.description
3125 ));
3126 depr_header.created_by = "CLOSE_ENGINE".to_string();
3127 depr_header.source = TransactionSource::Automated;
3128 depr_header.business_process = Some(BusinessProcess::R2R);
3129
3130 let doc_id = depr_header.document_id;
3131 let mut depr_je = JournalEntry::new(depr_header);
3132
3133 depr_je.add_line(JournalEntryLine::debit(
3135 doc_id,
3136 1,
3137 expense_accounts::DEPRECIATION.to_string(),
3138 period_depr,
3139 ));
3140 depr_je.add_line(JournalEntryLine::credit(
3142 doc_id,
3143 2,
3144 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3145 period_depr,
3146 ));
3147
3148 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3149 close_jes.push(depr_je);
3150 }
3151
3152 if !subledger.fa_records.is_empty() {
3153 debug!(
3154 "Generated {} depreciation JEs from {} FA records",
3155 close_jes.len(),
3156 subledger.fa_records.len()
3157 );
3158 }
3159
3160 {
3164 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3165 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3166
3167 let accrual_items: &[(&str, &str, &str)] = &[
3169 ("Accrued Utilities", "6200", "2100"),
3170 ("Accrued Rent", "6300", "2100"),
3171 ("Accrued Interest", "6100", "2150"),
3172 ];
3173
3174 for company_code in &company_codes {
3175 let company_revenue: Decimal = entries
3177 .iter()
3178 .filter(|e| e.header.company_code == *company_code)
3179 .flat_map(|e| e.lines.iter())
3180 .filter(|l| l.gl_account.starts_with('4'))
3181 .map(|l| l.credit_amount - l.debit_amount)
3182 .fold(Decimal::ZERO, |acc, v| acc + v);
3183
3184 if company_revenue <= Decimal::ZERO {
3185 continue;
3186 }
3187
3188 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3190 if accrual_base <= Decimal::ZERO {
3191 continue;
3192 }
3193
3194 for (description, expense_acct, liability_acct) in accrual_items {
3195 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3196 company_code,
3197 description,
3198 accrual_base,
3199 expense_acct,
3200 liability_acct,
3201 close_date,
3202 None,
3203 );
3204 close_jes.push(accrual_je);
3205 if let Some(rev_je) = reversal_je {
3206 close_jes.push(rev_je);
3207 }
3208 }
3209 }
3210
3211 debug!(
3212 "Generated accrual entries for {} companies",
3213 company_codes.len()
3214 );
3215 }
3216
3217 for company_code in &company_codes {
3218 let mut total_revenue = Decimal::ZERO;
3223 let mut total_expenses = Decimal::ZERO;
3224
3225 for entry in entries.iter() {
3226 if entry.header.company_code != *company_code {
3227 continue;
3228 }
3229 for line in &entry.lines {
3230 let category = AccountCategory::from_account(&line.gl_account);
3231 match category {
3232 AccountCategory::Revenue => {
3233 total_revenue += line.credit_amount - line.debit_amount;
3235 }
3236 AccountCategory::Cogs
3237 | AccountCategory::OperatingExpense
3238 | AccountCategory::OtherIncomeExpense
3239 | AccountCategory::Tax => {
3240 total_expenses += line.debit_amount - line.credit_amount;
3242 }
3243 _ => {}
3244 }
3245 }
3246 }
3247
3248 let pre_tax_income = total_revenue - total_expenses;
3249
3250 if pre_tax_income == Decimal::ZERO {
3252 debug!(
3253 "Company {}: no pre-tax income, skipping period close",
3254 company_code
3255 );
3256 continue;
3257 }
3258
3259 if pre_tax_income > Decimal::ZERO {
3261 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3263
3264 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3265 tax_header.document_type = "CL".to_string();
3266 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3267 tax_header.created_by = "CLOSE_ENGINE".to_string();
3268 tax_header.source = TransactionSource::Automated;
3269 tax_header.business_process = Some(BusinessProcess::R2R);
3270
3271 let doc_id = tax_header.document_id;
3272 let mut tax_je = JournalEntry::new(tax_header);
3273
3274 tax_je.add_line(JournalEntryLine::debit(
3276 doc_id,
3277 1,
3278 tax_accounts::TAX_EXPENSE.to_string(),
3279 tax_amount,
3280 ));
3281 tax_je.add_line(JournalEntryLine::credit(
3283 doc_id,
3284 2,
3285 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3286 tax_amount,
3287 ));
3288
3289 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3290 close_jes.push(tax_je);
3291 } else {
3292 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3295 if dta_amount > Decimal::ZERO {
3296 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3297 dta_header.document_type = "CL".to_string();
3298 dta_header.header_text =
3299 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3300 dta_header.created_by = "CLOSE_ENGINE".to_string();
3301 dta_header.source = TransactionSource::Automated;
3302 dta_header.business_process = Some(BusinessProcess::R2R);
3303
3304 let doc_id = dta_header.document_id;
3305 let mut dta_je = JournalEntry::new(dta_header);
3306
3307 dta_je.add_line(JournalEntryLine::debit(
3309 doc_id,
3310 1,
3311 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3312 dta_amount,
3313 ));
3314 dta_je.add_line(JournalEntryLine::credit(
3317 doc_id,
3318 2,
3319 tax_accounts::TAX_EXPENSE.to_string(),
3320 dta_amount,
3321 ));
3322
3323 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3324 close_jes.push(dta_je);
3325 debug!(
3326 "Company {}: loss year — recognised DTA of {}",
3327 company_code, dta_amount
3328 );
3329 }
3330 }
3331
3332 let tax_provision = if pre_tax_income > Decimal::ZERO {
3337 (pre_tax_income * tax_rate).round_dp(2)
3338 } else {
3339 Decimal::ZERO
3340 };
3341 let net_income = pre_tax_income - tax_provision;
3342
3343 if net_income != Decimal::ZERO {
3344 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3345 close_header.document_type = "CL".to_string();
3346 close_header.header_text =
3347 Some(format!("Income statement close - {}", company_code));
3348 close_header.created_by = "CLOSE_ENGINE".to_string();
3349 close_header.source = TransactionSource::Automated;
3350 close_header.business_process = Some(BusinessProcess::R2R);
3351
3352 let doc_id = close_header.document_id;
3353 let mut close_je = JournalEntry::new(close_header);
3354
3355 let abs_net_income = net_income.abs();
3356
3357 if net_income > Decimal::ZERO {
3358 close_je.add_line(JournalEntryLine::debit(
3360 doc_id,
3361 1,
3362 equity_accounts::INCOME_SUMMARY.to_string(),
3363 abs_net_income,
3364 ));
3365 close_je.add_line(JournalEntryLine::credit(
3366 doc_id,
3367 2,
3368 equity_accounts::RETAINED_EARNINGS.to_string(),
3369 abs_net_income,
3370 ));
3371 } else {
3372 close_je.add_line(JournalEntryLine::debit(
3374 doc_id,
3375 1,
3376 equity_accounts::RETAINED_EARNINGS.to_string(),
3377 abs_net_income,
3378 ));
3379 close_je.add_line(JournalEntryLine::credit(
3380 doc_id,
3381 2,
3382 equity_accounts::INCOME_SUMMARY.to_string(),
3383 abs_net_income,
3384 ));
3385 }
3386
3387 debug_assert!(
3388 close_je.is_balanced(),
3389 "Income statement closing JE must be balanced"
3390 );
3391 close_jes.push(close_je);
3392 }
3393 }
3394
3395 let close_count = close_jes.len();
3396 if close_count > 0 {
3397 info!("Generated {} period-close journal entries", close_count);
3398 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3399 entries.extend(close_jes);
3400 stats.period_close_je_count = close_count;
3401
3402 stats.total_entries = entries.len() as u64;
3404 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3405 } else {
3406 debug!("No period-close entries generated (no income statement activity)");
3407 }
3408
3409 Ok(())
3410 }
3411
3412 fn phase_audit_data(
3414 &mut self,
3415 entries: &[JournalEntry],
3416 stats: &mut EnhancedGenerationStatistics,
3417 ) -> SynthResult<AuditSnapshot> {
3418 if self.phase_config.generate_audit {
3419 info!("Phase 8: Generating Audit Data");
3420 let audit_snapshot = self.generate_audit_data(entries)?;
3421 stats.audit_engagement_count = audit_snapshot.engagements.len();
3422 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3423 stats.audit_evidence_count = audit_snapshot.evidence.len();
3424 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3425 stats.audit_finding_count = audit_snapshot.findings.len();
3426 stats.audit_judgment_count = audit_snapshot.judgments.len();
3427 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3428 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3429 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3430 stats.audit_sample_count = audit_snapshot.samples.len();
3431 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3432 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3433 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3434 stats.audit_related_party_count = audit_snapshot.related_parties.len();
3435 stats.audit_related_party_transaction_count =
3436 audit_snapshot.related_party_transactions.len();
3437 info!(
3438 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3439 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3440 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3441 {} RP transactions",
3442 stats.audit_engagement_count,
3443 stats.audit_workpaper_count,
3444 stats.audit_evidence_count,
3445 stats.audit_risk_count,
3446 stats.audit_finding_count,
3447 stats.audit_judgment_count,
3448 stats.audit_confirmation_count,
3449 stats.audit_procedure_step_count,
3450 stats.audit_sample_count,
3451 stats.audit_analytical_result_count,
3452 stats.audit_ia_function_count,
3453 stats.audit_ia_report_count,
3454 stats.audit_related_party_count,
3455 stats.audit_related_party_transaction_count,
3456 );
3457 self.check_resources_with_log("post-audit")?;
3458 Ok(audit_snapshot)
3459 } else {
3460 debug!("Phase 8: Skipped (audit generation disabled)");
3461 Ok(AuditSnapshot::default())
3462 }
3463 }
3464
3465 fn phase_banking_data(
3467 &mut self,
3468 stats: &mut EnhancedGenerationStatistics,
3469 ) -> SynthResult<BankingSnapshot> {
3470 if self.phase_config.generate_banking {
3471 info!("Phase 9: Generating Banking KYC/AML Data");
3472 let banking_snapshot = self.generate_banking_data()?;
3473 stats.banking_customer_count = banking_snapshot.customers.len();
3474 stats.banking_account_count = banking_snapshot.accounts.len();
3475 stats.banking_transaction_count = banking_snapshot.transactions.len();
3476 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3477 info!(
3478 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3479 stats.banking_customer_count, stats.banking_account_count,
3480 stats.banking_transaction_count, stats.banking_suspicious_count
3481 );
3482 self.check_resources_with_log("post-banking")?;
3483 Ok(banking_snapshot)
3484 } else {
3485 debug!("Phase 9: Skipped (banking generation disabled)");
3486 Ok(BankingSnapshot::default())
3487 }
3488 }
3489
3490 fn phase_graph_export(
3492 &mut self,
3493 entries: &[JournalEntry],
3494 coa: &Arc<ChartOfAccounts>,
3495 stats: &mut EnhancedGenerationStatistics,
3496 ) -> SynthResult<GraphExportSnapshot> {
3497 if self.phase_config.generate_graph_export && !entries.is_empty() {
3498 info!("Phase 10: Exporting Accounting Network Graphs");
3499 match self.export_graphs(entries, coa, stats) {
3500 Ok(snapshot) => {
3501 info!(
3502 "Graph export complete: {} graphs ({} nodes, {} edges)",
3503 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3504 );
3505 Ok(snapshot)
3506 }
3507 Err(e) => {
3508 warn!("Phase 10: Graph export failed: {}", e);
3509 Ok(GraphExportSnapshot::default())
3510 }
3511 }
3512 } else {
3513 debug!("Phase 10: Skipped (graph export disabled or no entries)");
3514 Ok(GraphExportSnapshot::default())
3515 }
3516 }
3517
3518 #[allow(clippy::too_many_arguments)]
3520 fn phase_hypergraph_export(
3521 &self,
3522 coa: &Arc<ChartOfAccounts>,
3523 entries: &[JournalEntry],
3524 document_flows: &DocumentFlowSnapshot,
3525 sourcing: &SourcingSnapshot,
3526 hr: &HrSnapshot,
3527 manufacturing: &ManufacturingSnapshot,
3528 banking: &BankingSnapshot,
3529 audit: &AuditSnapshot,
3530 financial_reporting: &FinancialReportingSnapshot,
3531 ocpm: &OcpmSnapshot,
3532 compliance: &ComplianceRegulationsSnapshot,
3533 stats: &mut EnhancedGenerationStatistics,
3534 ) -> SynthResult<()> {
3535 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3536 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3537 match self.export_hypergraph(
3538 coa,
3539 entries,
3540 document_flows,
3541 sourcing,
3542 hr,
3543 manufacturing,
3544 banking,
3545 audit,
3546 financial_reporting,
3547 ocpm,
3548 compliance,
3549 stats,
3550 ) {
3551 Ok(info) => {
3552 info!(
3553 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3554 info.node_count, info.edge_count, info.hyperedge_count
3555 );
3556 }
3557 Err(e) => {
3558 warn!("Phase 10b: Hypergraph export failed: {}", e);
3559 }
3560 }
3561 } else {
3562 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3563 }
3564 Ok(())
3565 }
3566
3567 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3573 if !self.config.llm.enabled {
3574 debug!("Phase 11: Skipped (LLM enrichment disabled)");
3575 return;
3576 }
3577
3578 info!("Phase 11: Starting LLM Enrichment");
3579 let start = std::time::Instant::now();
3580
3581 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3582 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
3585 let schema_provider = &self.config.llm.provider;
3586 let api_key_env = match schema_provider.as_str() {
3587 "openai" => Some("OPENAI_API_KEY"),
3588 "anthropic" => Some("ANTHROPIC_API_KEY"),
3589 "custom" => Some("LLM_API_KEY"),
3590 _ => None,
3591 };
3592 if let Some(key_env) = api_key_env {
3593 if std::env::var(key_env).is_ok() {
3594 let llm_config = datasynth_core::llm::LlmConfig {
3595 model: self.config.llm.model.clone(),
3596 api_key_env: key_env.to_string(),
3597 ..datasynth_core::llm::LlmConfig::default()
3598 };
3599 match HttpLlmProvider::new(llm_config) {
3600 Ok(p) => Arc::new(p),
3601 Err(e) => {
3602 warn!(
3603 "Failed to create HttpLlmProvider: {}; falling back to mock",
3604 e
3605 );
3606 Arc::new(MockLlmProvider::new(self.seed))
3607 }
3608 }
3609 } else {
3610 Arc::new(MockLlmProvider::new(self.seed))
3611 }
3612 } else {
3613 Arc::new(MockLlmProvider::new(self.seed))
3614 }
3615 };
3616 let enricher = VendorLlmEnricher::new(provider);
3617
3618 let industry = format!("{:?}", self.config.global.industry);
3619 let max_enrichments = self
3620 .config
3621 .llm
3622 .max_vendor_enrichments
3623 .min(self.master_data.vendors.len());
3624
3625 let mut enriched_count = 0usize;
3626 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3627 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3628 Ok(name) => {
3629 vendor.name = name;
3630 enriched_count += 1;
3631 }
3632 Err(e) => {
3633 warn!(
3634 "LLM vendor enrichment failed for {}: {}",
3635 vendor.vendor_id, e
3636 );
3637 }
3638 }
3639 }
3640
3641 enriched_count
3642 }));
3643
3644 match result {
3645 Ok(enriched_count) => {
3646 stats.llm_vendors_enriched = enriched_count;
3647 let elapsed = start.elapsed();
3648 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3649 info!(
3650 "Phase 11 complete: {} vendors enriched in {}ms",
3651 enriched_count, stats.llm_enrichment_ms
3652 );
3653 }
3654 Err(_) => {
3655 let elapsed = start.elapsed();
3656 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3657 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3658 }
3659 }
3660 }
3661
3662 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3668 if !self.config.diffusion.enabled {
3669 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3670 return;
3671 }
3672
3673 info!("Phase 12: Starting Diffusion Enhancement");
3674 let start = std::time::Instant::now();
3675
3676 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3677 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
3680
3681 let diffusion_config = DiffusionConfig {
3682 n_steps: self.config.diffusion.n_steps,
3683 seed: self.seed,
3684 ..Default::default()
3685 };
3686
3687 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3688
3689 let n_samples = self.config.diffusion.sample_size;
3690 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
3692
3693 samples.len()
3694 }));
3695
3696 match result {
3697 Ok(sample_count) => {
3698 stats.diffusion_samples_generated = sample_count;
3699 let elapsed = start.elapsed();
3700 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3701 info!(
3702 "Phase 12 complete: {} diffusion samples generated in {}ms",
3703 sample_count, stats.diffusion_enhancement_ms
3704 );
3705 }
3706 Err(_) => {
3707 let elapsed = start.elapsed();
3708 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3709 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
3710 }
3711 }
3712 }
3713
3714 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
3721 if !self.config.causal.enabled {
3722 debug!("Phase 13: Skipped (causal generation disabled)");
3723 return;
3724 }
3725
3726 info!("Phase 13: Starting Causal Overlay");
3727 let start = std::time::Instant::now();
3728
3729 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3730 let graph = match self.config.causal.template.as_str() {
3732 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
3733 _ => CausalGraph::fraud_detection_template(),
3734 };
3735
3736 let scm = StructuralCausalModel::new(graph.clone())
3737 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
3738
3739 let n_samples = self.config.causal.sample_size;
3740 let samples = scm
3741 .generate(n_samples, self.seed)
3742 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
3743
3744 let validation_passed = if self.config.causal.validate {
3746 let report = CausalValidator::validate_causal_structure(&samples, &graph);
3747 if report.valid {
3748 info!(
3749 "Causal validation passed: all {} checks OK",
3750 report.checks.len()
3751 );
3752 } else {
3753 warn!(
3754 "Causal validation: {} violations detected: {:?}",
3755 report.violations.len(),
3756 report.violations
3757 );
3758 }
3759 Some(report.valid)
3760 } else {
3761 None
3762 };
3763
3764 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
3765 }));
3766
3767 match result {
3768 Ok(Ok((sample_count, validation_passed))) => {
3769 stats.causal_samples_generated = sample_count;
3770 stats.causal_validation_passed = validation_passed;
3771 let elapsed = start.elapsed();
3772 stats.causal_generation_ms = elapsed.as_millis() as u64;
3773 info!(
3774 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
3775 sample_count, stats.causal_generation_ms, validation_passed,
3776 );
3777 }
3778 Ok(Err(e)) => {
3779 let elapsed = start.elapsed();
3780 stats.causal_generation_ms = elapsed.as_millis() as u64;
3781 warn!("Phase 13: Causal generation failed: {}", e);
3782 }
3783 Err(_) => {
3784 let elapsed = start.elapsed();
3785 stats.causal_generation_ms = elapsed.as_millis() as u64;
3786 warn!("Phase 13: Causal generation failed (panic caught), continuing");
3787 }
3788 }
3789 }
3790
3791 fn phase_sourcing_data(
3793 &mut self,
3794 stats: &mut EnhancedGenerationStatistics,
3795 ) -> SynthResult<SourcingSnapshot> {
3796 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
3797 debug!("Phase 14: Skipped (sourcing generation disabled)");
3798 return Ok(SourcingSnapshot::default());
3799 }
3800 let degradation = self.check_resources()?;
3801 if degradation >= DegradationLevel::Reduced {
3802 debug!(
3803 "Phase skipped due to resource pressure (degradation: {:?})",
3804 degradation
3805 );
3806 return Ok(SourcingSnapshot::default());
3807 }
3808
3809 info!("Phase 14: Generating S2C Sourcing Data");
3810 let seed = self.seed;
3811
3812 let vendor_ids: Vec<String> = self
3814 .master_data
3815 .vendors
3816 .iter()
3817 .map(|v| v.vendor_id.clone())
3818 .collect();
3819 if vendor_ids.is_empty() {
3820 debug!("Phase 14: Skipped (no vendors available)");
3821 return Ok(SourcingSnapshot::default());
3822 }
3823
3824 let categories: Vec<(String, String)> = vec![
3825 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
3826 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
3827 ("CAT-IT".to_string(), "IT Equipment".to_string()),
3828 ("CAT-SVC".to_string(), "Professional Services".to_string()),
3829 ("CAT-LOG".to_string(), "Logistics".to_string()),
3830 ];
3831 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
3832 .iter()
3833 .map(|(id, name)| {
3834 (
3835 id.clone(),
3836 name.clone(),
3837 rust_decimal::Decimal::from(100_000),
3838 )
3839 })
3840 .collect();
3841
3842 let company_code = self
3843 .config
3844 .companies
3845 .first()
3846 .map(|c| c.code.as_str())
3847 .unwrap_or("1000");
3848 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3849 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3850 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3851 let fiscal_year = start_date.year() as u16;
3852 let owner_ids: Vec<String> = self
3853 .master_data
3854 .employees
3855 .iter()
3856 .take(5)
3857 .map(|e| e.employee_id.clone())
3858 .collect();
3859 let owner_id = owner_ids
3860 .first()
3861 .map(std::string::String::as_str)
3862 .unwrap_or("BUYER-001");
3863
3864 let mut spend_gen = SpendAnalysisGenerator::new(seed);
3866 let spend_analyses =
3867 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
3868
3869 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
3871 let sourcing_projects = if owner_ids.is_empty() {
3872 Vec::new()
3873 } else {
3874 project_gen.generate(
3875 company_code,
3876 &categories_with_spend,
3877 &owner_ids,
3878 start_date,
3879 self.config.global.period_months,
3880 )
3881 };
3882 stats.sourcing_project_count = sourcing_projects.len();
3883
3884 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
3886 let mut qual_gen = QualificationGenerator::new(seed + 2);
3887 let qualifications = qual_gen.generate(
3888 company_code,
3889 &qual_vendor_ids,
3890 sourcing_projects.first().map(|p| p.project_id.as_str()),
3891 owner_id,
3892 start_date,
3893 );
3894
3895 let mut rfx_gen = RfxGenerator::new(seed + 3);
3897 let rfx_events: Vec<RfxEvent> = sourcing_projects
3898 .iter()
3899 .map(|proj| {
3900 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
3901 rfx_gen.generate(
3902 company_code,
3903 &proj.project_id,
3904 &proj.category_id,
3905 &qualified_vids,
3906 owner_id,
3907 start_date,
3908 50000.0,
3909 )
3910 })
3911 .collect();
3912 stats.rfx_event_count = rfx_events.len();
3913
3914 let mut bid_gen = BidGenerator::new(seed + 4);
3916 let mut all_bids = Vec::new();
3917 for rfx in &rfx_events {
3918 let bidder_count = vendor_ids.len().clamp(2, 5);
3919 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
3920 let bids = bid_gen.generate(rfx, &responding, start_date);
3921 all_bids.extend(bids);
3922 }
3923 stats.bid_count = all_bids.len();
3924
3925 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
3927 let bid_evaluations: Vec<BidEvaluation> = rfx_events
3928 .iter()
3929 .map(|rfx| {
3930 let rfx_bids: Vec<SupplierBid> = all_bids
3931 .iter()
3932 .filter(|b| b.rfx_id == rfx.rfx_id)
3933 .cloned()
3934 .collect();
3935 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
3936 })
3937 .collect();
3938
3939 let mut contract_gen = ContractGenerator::new(seed + 6);
3941 let contracts: Vec<ProcurementContract> = bid_evaluations
3942 .iter()
3943 .zip(rfx_events.iter())
3944 .filter_map(|(eval, rfx)| {
3945 eval.ranked_bids.first().and_then(|winner| {
3946 all_bids
3947 .iter()
3948 .find(|b| b.bid_id == winner.bid_id)
3949 .map(|winning_bid| {
3950 contract_gen.generate_from_bid(
3951 winning_bid,
3952 Some(&rfx.sourcing_project_id),
3953 &rfx.category_id,
3954 owner_id,
3955 start_date,
3956 )
3957 })
3958 })
3959 })
3960 .collect();
3961 stats.contract_count = contracts.len();
3962
3963 let mut catalog_gen = CatalogGenerator::new(seed + 7);
3965 let catalog_items = catalog_gen.generate(&contracts);
3966 stats.catalog_item_count = catalog_items.len();
3967
3968 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
3970 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
3971 .iter()
3972 .fold(
3973 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
3974 |mut acc, c| {
3975 acc.entry(c.vendor_id.clone()).or_default().push(c);
3976 acc
3977 },
3978 )
3979 .into_iter()
3980 .collect();
3981 let scorecards = scorecard_gen.generate(
3982 company_code,
3983 &vendor_contracts,
3984 start_date,
3985 end_date,
3986 owner_id,
3987 );
3988 stats.scorecard_count = scorecards.len();
3989
3990 let mut sourcing_projects = sourcing_projects;
3993 for project in &mut sourcing_projects {
3994 project.rfx_ids = rfx_events
3996 .iter()
3997 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
3998 .map(|rfx| rfx.rfx_id.clone())
3999 .collect();
4000
4001 project.contract_id = contracts
4003 .iter()
4004 .find(|c| {
4005 c.sourcing_project_id
4006 .as_deref()
4007 .is_some_and(|sp| sp == project.project_id)
4008 })
4009 .map(|c| c.contract_id.clone());
4010
4011 project.spend_analysis_id = spend_analyses
4013 .iter()
4014 .find(|sa| sa.category_id == project.category_id)
4015 .map(|sa| sa.category_id.clone());
4016 }
4017
4018 info!(
4019 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4020 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4021 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4022 );
4023 self.check_resources_with_log("post-sourcing")?;
4024
4025 Ok(SourcingSnapshot {
4026 spend_analyses,
4027 sourcing_projects,
4028 qualifications,
4029 rfx_events,
4030 bids: all_bids,
4031 bid_evaluations,
4032 contracts,
4033 catalog_items,
4034 scorecards,
4035 })
4036 }
4037
4038 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4044 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4045
4046 let parent_code = self
4047 .config
4048 .companies
4049 .first()
4050 .map(|c| c.code.clone())
4051 .unwrap_or_else(|| "PARENT".to_string());
4052
4053 let mut group = GroupStructure::new(parent_code);
4054
4055 for company in self.config.companies.iter().skip(1) {
4056 let sub =
4057 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4058 group.add_subsidiary(sub);
4059 }
4060
4061 group
4062 }
4063
4064 fn phase_intercompany(
4066 &mut self,
4067 journal_entries: &[JournalEntry],
4068 stats: &mut EnhancedGenerationStatistics,
4069 ) -> SynthResult<IntercompanySnapshot> {
4070 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4072 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4073 return Ok(IntercompanySnapshot::default());
4074 }
4075
4076 if self.config.companies.len() < 2 {
4078 debug!(
4079 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4080 self.config.companies.len()
4081 );
4082 return Ok(IntercompanySnapshot::default());
4083 }
4084
4085 info!("Phase 14b: Generating Intercompany Transactions");
4086
4087 let group_structure = self.build_group_structure();
4090 debug!(
4091 "Group structure built: parent={}, subsidiaries={}",
4092 group_structure.parent_entity,
4093 group_structure.subsidiaries.len()
4094 );
4095
4096 let seed = self.seed;
4097 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4098 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4099 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4100
4101 let parent_code = self.config.companies[0].code.clone();
4104 let mut ownership_structure =
4105 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4106
4107 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4108 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4109 format!("REL{:03}", i + 1),
4110 parent_code.clone(),
4111 company.code.clone(),
4112 rust_decimal::Decimal::from(100), start_date,
4114 );
4115 ownership_structure.add_relationship(relationship);
4116 }
4117
4118 let tp_method = match self.config.intercompany.transfer_pricing_method {
4120 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4121 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4122 }
4123 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4124 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4125 }
4126 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4127 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4128 }
4129 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4130 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4131 }
4132 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4133 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4134 }
4135 };
4136
4137 let ic_currency = self
4139 .config
4140 .companies
4141 .first()
4142 .map(|c| c.currency.clone())
4143 .unwrap_or_else(|| "USD".to_string());
4144 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4145 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4146 transfer_pricing_method: tp_method,
4147 markup_percent: rust_decimal::Decimal::from_f64_retain(
4148 self.config.intercompany.markup_percent,
4149 )
4150 .unwrap_or(rust_decimal::Decimal::from(5)),
4151 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4152 default_currency: ic_currency,
4153 ..Default::default()
4154 };
4155
4156 let mut ic_generator = datasynth_generators::ICGenerator::new(
4158 ic_gen_config,
4159 ownership_structure.clone(),
4160 seed + 50,
4161 );
4162
4163 let transactions_per_day = 3;
4166 let matched_pairs = ic_generator.generate_transactions_for_period(
4167 start_date,
4168 end_date,
4169 transactions_per_day,
4170 );
4171
4172 let mut seller_entries = Vec::new();
4174 let mut buyer_entries = Vec::new();
4175 let fiscal_year = start_date.year();
4176
4177 for pair in &matched_pairs {
4178 let fiscal_period = pair.posting_date.month();
4179 let (seller_je, buyer_je) =
4180 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4181 seller_entries.push(seller_je);
4182 buyer_entries.push(buyer_je);
4183 }
4184
4185 let matching_config = datasynth_generators::ICMatchingConfig {
4187 base_currency: self
4188 .config
4189 .companies
4190 .first()
4191 .map(|c| c.currency.clone())
4192 .unwrap_or_else(|| "USD".to_string()),
4193 ..Default::default()
4194 };
4195 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4196 matching_engine.load_matched_pairs(&matched_pairs);
4197 let matching_result = matching_engine.run_matching(end_date);
4198
4199 let mut elimination_entries = Vec::new();
4201 if self.config.intercompany.generate_eliminations {
4202 let elim_config = datasynth_generators::EliminationConfig {
4203 consolidation_entity: "GROUP".to_string(),
4204 base_currency: self
4205 .config
4206 .companies
4207 .first()
4208 .map(|c| c.currency.clone())
4209 .unwrap_or_else(|| "USD".to_string()),
4210 ..Default::default()
4211 };
4212
4213 let mut elim_generator =
4214 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4215
4216 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4217 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4218 matching_result
4219 .matched_balances
4220 .iter()
4221 .chain(matching_result.unmatched_balances.iter())
4222 .cloned()
4223 .collect();
4224
4225 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4237 std::collections::HashMap::new();
4238 let mut equity_amounts: std::collections::HashMap<
4239 String,
4240 std::collections::HashMap<String, rust_decimal::Decimal>,
4241 > = std::collections::HashMap::new();
4242 {
4243 use rust_decimal::Decimal;
4244 let hundred = Decimal::from(100u32);
4245 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4249 for sub in &group_structure.subsidiaries {
4250 let net_assets = {
4251 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4252 if na > Decimal::ZERO {
4253 na
4254 } else {
4255 Decimal::from(1_000_000u64)
4256 }
4257 };
4258 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4260 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4261
4262 let mut eq_map = std::collections::HashMap::new();
4265 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4266 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4267 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4268 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4269 }
4270 }
4271
4272 let journal = elim_generator.generate_eliminations(
4273 &fiscal_period,
4274 end_date,
4275 &all_balances,
4276 &matched_pairs,
4277 &investment_amounts,
4278 &equity_amounts,
4279 );
4280
4281 elimination_entries = journal.entries.clone();
4282 }
4283
4284 let matched_pair_count = matched_pairs.len();
4285 let elimination_entry_count = elimination_entries.len();
4286 let match_rate = matching_result.match_rate;
4287
4288 stats.ic_matched_pair_count = matched_pair_count;
4289 stats.ic_elimination_count = elimination_entry_count;
4290 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4291
4292 info!(
4293 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4294 matched_pair_count,
4295 stats.ic_transaction_count,
4296 seller_entries.len(),
4297 buyer_entries.len(),
4298 elimination_entry_count,
4299 match_rate * 100.0
4300 );
4301 self.check_resources_with_log("post-intercompany")?;
4302
4303 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4307 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4308 use rust_decimal::Decimal;
4309
4310 let eight_pct = Decimal::new(8, 2); group_structure
4313 .subsidiaries
4314 .iter()
4315 .filter(|sub| {
4316 sub.nci_percentage > Decimal::ZERO
4317 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4318 })
4319 .map(|sub| {
4320 let net_assets_from_jes =
4324 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4325
4326 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4327 net_assets_from_jes.round_dp(2)
4328 } else {
4329 Decimal::from(1_000_000u64)
4331 };
4332
4333 let net_income = (net_assets * eight_pct).round_dp(2);
4335
4336 NciMeasurement::compute(
4337 sub.entity_code.clone(),
4338 sub.nci_percentage,
4339 net_assets,
4340 net_income,
4341 )
4342 })
4343 .collect()
4344 };
4345
4346 if !nci_measurements.is_empty() {
4347 info!(
4348 "NCI measurements: {} subsidiaries with non-controlling interests",
4349 nci_measurements.len()
4350 );
4351 }
4352
4353 Ok(IntercompanySnapshot {
4354 group_structure: Some(group_structure),
4355 matched_pairs,
4356 seller_journal_entries: seller_entries,
4357 buyer_journal_entries: buyer_entries,
4358 elimination_entries,
4359 nci_measurements,
4360 matched_pair_count,
4361 elimination_entry_count,
4362 match_rate,
4363 })
4364 }
4365
4366 fn phase_financial_reporting(
4368 &mut self,
4369 document_flows: &DocumentFlowSnapshot,
4370 journal_entries: &[JournalEntry],
4371 coa: &Arc<ChartOfAccounts>,
4372 _hr: &HrSnapshot,
4373 _audit: &AuditSnapshot,
4374 stats: &mut EnhancedGenerationStatistics,
4375 ) -> SynthResult<FinancialReportingSnapshot> {
4376 let fs_enabled = self.phase_config.generate_financial_statements
4377 || self.config.financial_reporting.enabled;
4378 let br_enabled = self.phase_config.generate_bank_reconciliation;
4379
4380 if !fs_enabled && !br_enabled {
4381 debug!("Phase 15: Skipped (financial reporting disabled)");
4382 return Ok(FinancialReportingSnapshot::default());
4383 }
4384
4385 info!("Phase 15: Generating Financial Reporting Data");
4386
4387 let seed = self.seed;
4388 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4389 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4390
4391 let mut financial_statements = Vec::new();
4392 let mut bank_reconciliations = Vec::new();
4393 let mut trial_balances = Vec::new();
4394 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4395 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4396 Vec::new();
4397 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4399 std::collections::HashMap::new();
4400 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4402 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4404
4405 if fs_enabled {
4413 let has_journal_entries = !journal_entries.is_empty();
4414
4415 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4418 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4420
4421 let elimination_entries: Vec<&JournalEntry> = journal_entries
4423 .iter()
4424 .filter(|je| je.header.is_elimination)
4425 .collect();
4426
4427 for period in 0..self.config.global.period_months {
4429 let period_start = start_date + chrono::Months::new(period);
4430 let period_end =
4431 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4432 let fiscal_year = period_end.year() as u16;
4433 let fiscal_period = period_end.month() as u8;
4434 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4435
4436 let mut entity_tb_map: std::collections::HashMap<
4439 String,
4440 std::collections::HashMap<String, rust_decimal::Decimal>,
4441 > = std::collections::HashMap::new();
4442
4443 for (company_idx, company) in self.config.companies.iter().enumerate() {
4445 let company_code = company.code.as_str();
4446 let currency = company.currency.as_str();
4447 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4450 let mut company_fs_gen =
4451 FinancialStatementGenerator::new(seed + company_seed_offset);
4452
4453 if has_journal_entries {
4454 let tb_entries = Self::build_cumulative_trial_balance(
4455 journal_entries,
4456 coa,
4457 company_code,
4458 start_date,
4459 period_end,
4460 fiscal_year,
4461 fiscal_period,
4462 );
4463
4464 let entity_cat_map =
4466 entity_tb_map.entry(company_code.to_string()).or_default();
4467 for tb_entry in &tb_entries {
4468 let net = tb_entry.debit_balance - tb_entry.credit_balance;
4469 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4470 }
4471
4472 let stmts = company_fs_gen.generate(
4473 company_code,
4474 currency,
4475 &tb_entries,
4476 period_start,
4477 period_end,
4478 fiscal_year,
4479 fiscal_period,
4480 None,
4481 "SYS-AUTOCLOSE",
4482 );
4483
4484 let mut entity_stmts = Vec::new();
4485 for stmt in stmts {
4486 if stmt.statement_type == StatementType::CashFlowStatement {
4487 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4488 let cf_items = Self::build_cash_flow_from_trial_balances(
4489 &tb_entries,
4490 None,
4491 net_income,
4492 );
4493 entity_stmts.push(FinancialStatement {
4494 cash_flow_items: cf_items,
4495 ..stmt
4496 });
4497 } else {
4498 entity_stmts.push(stmt);
4499 }
4500 }
4501
4502 financial_statements.extend(entity_stmts.clone());
4504
4505 standalone_statements
4507 .entry(company_code.to_string())
4508 .or_default()
4509 .extend(entity_stmts);
4510
4511 if company_idx == 0 {
4514 trial_balances.push(PeriodTrialBalance {
4515 fiscal_year,
4516 fiscal_period,
4517 period_start,
4518 period_end,
4519 entries: tb_entries,
4520 });
4521 }
4522 } else {
4523 let tb_entries = Self::build_trial_balance_from_entries(
4525 journal_entries,
4526 coa,
4527 company_code,
4528 fiscal_year,
4529 fiscal_period,
4530 );
4531
4532 let stmts = company_fs_gen.generate(
4533 company_code,
4534 currency,
4535 &tb_entries,
4536 period_start,
4537 period_end,
4538 fiscal_year,
4539 fiscal_period,
4540 None,
4541 "SYS-AUTOCLOSE",
4542 );
4543 financial_statements.extend(stmts.clone());
4544 standalone_statements
4545 .entry(company_code.to_string())
4546 .or_default()
4547 .extend(stmts);
4548
4549 if company_idx == 0 && !tb_entries.is_empty() {
4550 trial_balances.push(PeriodTrialBalance {
4551 fiscal_year,
4552 fiscal_period,
4553 period_start,
4554 period_end,
4555 entries: tb_entries,
4556 });
4557 }
4558 }
4559 }
4560
4561 let group_currency = self
4564 .config
4565 .companies
4566 .first()
4567 .map(|c| c.currency.as_str())
4568 .unwrap_or("USD");
4569
4570 let period_eliminations: Vec<JournalEntry> = elimination_entries
4572 .iter()
4573 .filter(|je| {
4574 je.header.fiscal_year == fiscal_year
4575 && je.header.fiscal_period == fiscal_period
4576 })
4577 .map(|je| (*je).clone())
4578 .collect();
4579
4580 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4581 &entity_tb_map,
4582 &period_eliminations,
4583 &period_label,
4584 );
4585
4586 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4589 .line_items
4590 .iter()
4591 .map(|li| {
4592 let net = li.post_elimination_total;
4593 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4594 (net, rust_decimal::Decimal::ZERO)
4595 } else {
4596 (rust_decimal::Decimal::ZERO, -net)
4597 };
4598 datasynth_generators::TrialBalanceEntry {
4599 account_code: li.account_category.clone(),
4600 account_name: li.account_category.clone(),
4601 category: li.account_category.clone(),
4602 debit_balance: debit,
4603 credit_balance: credit,
4604 }
4605 })
4606 .collect();
4607
4608 let mut cons_stmts = cons_gen.generate(
4609 "GROUP",
4610 group_currency,
4611 &cons_tb,
4612 period_start,
4613 period_end,
4614 fiscal_year,
4615 fiscal_period,
4616 None,
4617 "SYS-AUTOCLOSE",
4618 );
4619
4620 let bs_categories: &[&str] = &[
4624 "CASH",
4625 "RECEIVABLES",
4626 "INVENTORY",
4627 "FIXEDASSETS",
4628 "PAYABLES",
4629 "ACCRUEDLIABILITIES",
4630 "LONGTERMDEBT",
4631 "EQUITY",
4632 ];
4633 let (bs_items, is_items): (Vec<_>, Vec<_>) =
4634 cons_line_items.into_iter().partition(|li| {
4635 let upper = li.label.to_uppercase();
4636 bs_categories.iter().any(|c| upper == *c)
4637 });
4638
4639 for stmt in &mut cons_stmts {
4640 stmt.is_consolidated = true;
4641 match stmt.statement_type {
4642 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4643 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4644 _ => {} }
4646 }
4647
4648 consolidated_statements.extend(cons_stmts);
4649 consolidation_schedules.push(schedule);
4650 }
4651
4652 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
4658 info!(
4659 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4660 stats.financial_statement_count,
4661 consolidated_statements.len(),
4662 has_journal_entries
4663 );
4664
4665 let entity_seeds: Vec<SegmentSeed> = self
4670 .config
4671 .companies
4672 .iter()
4673 .map(|c| SegmentSeed {
4674 code: c.code.clone(),
4675 name: c.name.clone(),
4676 currency: c.currency.clone(),
4677 })
4678 .collect();
4679
4680 let mut seg_gen = SegmentGenerator::new(seed + 30);
4681
4682 for period in 0..self.config.global.period_months {
4687 let period_end =
4688 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4689 let fiscal_year = period_end.year() as u16;
4690 let fiscal_period = period_end.month() as u8;
4691 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4692
4693 use datasynth_core::models::StatementType;
4694
4695 let cons_is = consolidated_statements.iter().find(|s| {
4697 s.fiscal_year == fiscal_year
4698 && s.fiscal_period == fiscal_period
4699 && s.statement_type == StatementType::IncomeStatement
4700 });
4701 let cons_bs = consolidated_statements.iter().find(|s| {
4702 s.fiscal_year == fiscal_year
4703 && s.fiscal_period == fiscal_period
4704 && s.statement_type == StatementType::BalanceSheet
4705 });
4706
4707 let is_stmt = cons_is.or_else(|| {
4709 financial_statements.iter().find(|s| {
4710 s.fiscal_year == fiscal_year
4711 && s.fiscal_period == fiscal_period
4712 && s.statement_type == StatementType::IncomeStatement
4713 })
4714 });
4715 let bs_stmt = cons_bs.or_else(|| {
4716 financial_statements.iter().find(|s| {
4717 s.fiscal_year == fiscal_year
4718 && s.fiscal_period == fiscal_period
4719 && s.statement_type == StatementType::BalanceSheet
4720 })
4721 });
4722
4723 let consolidated_revenue = is_stmt
4724 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4725 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
4727
4728 let consolidated_profit = is_stmt
4729 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
4730 .map(|li| li.amount)
4731 .unwrap_or(rust_decimal::Decimal::ZERO);
4732
4733 let consolidated_assets = bs_stmt
4734 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
4735 .map(|li| li.amount)
4736 .unwrap_or(rust_decimal::Decimal::ZERO);
4737
4738 if consolidated_revenue == rust_decimal::Decimal::ZERO
4740 && consolidated_assets == rust_decimal::Decimal::ZERO
4741 {
4742 continue;
4743 }
4744
4745 let group_code = self
4746 .config
4747 .companies
4748 .first()
4749 .map(|c| c.code.as_str())
4750 .unwrap_or("GROUP");
4751
4752 let total_depr: rust_decimal::Decimal = journal_entries
4755 .iter()
4756 .filter(|je| je.header.document_type == "CL")
4757 .flat_map(|je| je.lines.iter())
4758 .filter(|l| l.gl_account.starts_with("6000"))
4759 .map(|l| l.debit_amount)
4760 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
4761 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
4762 Some(total_depr)
4763 } else {
4764 None
4765 };
4766
4767 let (segs, recon) = seg_gen.generate(
4768 group_code,
4769 &period_label,
4770 consolidated_revenue,
4771 consolidated_profit,
4772 consolidated_assets,
4773 &entity_seeds,
4774 depr_param,
4775 );
4776 segment_reports.extend(segs);
4777 segment_reconciliations.push(recon);
4778 }
4779
4780 info!(
4781 "Segment reports generated: {} segments, {} reconciliations",
4782 segment_reports.len(),
4783 segment_reconciliations.len()
4784 );
4785 }
4786
4787 if br_enabled && !document_flows.payments.is_empty() {
4789 let employee_ids: Vec<String> = self
4790 .master_data
4791 .employees
4792 .iter()
4793 .map(|e| e.employee_id.clone())
4794 .collect();
4795 let mut br_gen =
4796 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
4797
4798 for company in &self.config.companies {
4800 let company_payments: Vec<PaymentReference> = document_flows
4801 .payments
4802 .iter()
4803 .filter(|p| p.header.company_code == company.code)
4804 .map(|p| PaymentReference {
4805 id: p.header.document_id.clone(),
4806 amount: if p.is_vendor { p.amount } else { -p.amount },
4807 date: p.header.document_date,
4808 reference: p
4809 .check_number
4810 .clone()
4811 .or_else(|| p.wire_reference.clone())
4812 .unwrap_or_else(|| p.header.document_id.clone()),
4813 })
4814 .collect();
4815
4816 if company_payments.is_empty() {
4817 continue;
4818 }
4819
4820 let bank_account_id = format!("{}-MAIN", company.code);
4821
4822 for period in 0..self.config.global.period_months {
4824 let period_start = start_date + chrono::Months::new(period);
4825 let period_end =
4826 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4827
4828 let period_payments: Vec<PaymentReference> = company_payments
4829 .iter()
4830 .filter(|p| p.date >= period_start && p.date <= period_end)
4831 .cloned()
4832 .collect();
4833
4834 let recon = br_gen.generate(
4835 &company.code,
4836 &bank_account_id,
4837 period_start,
4838 period_end,
4839 &company.currency,
4840 &period_payments,
4841 );
4842 bank_reconciliations.push(recon);
4843 }
4844 }
4845 info!(
4846 "Bank reconciliations generated: {} reconciliations",
4847 bank_reconciliations.len()
4848 );
4849 }
4850
4851 stats.bank_reconciliation_count = bank_reconciliations.len();
4852 self.check_resources_with_log("post-financial-reporting")?;
4853
4854 if !trial_balances.is_empty() {
4855 info!(
4856 "Period-close trial balances captured: {} periods",
4857 trial_balances.len()
4858 );
4859 }
4860
4861 let notes_to_financial_statements = Vec::new();
4865
4866 Ok(FinancialReportingSnapshot {
4867 financial_statements,
4868 standalone_statements,
4869 consolidated_statements,
4870 consolidation_schedules,
4871 bank_reconciliations,
4872 trial_balances,
4873 segment_reports,
4874 segment_reconciliations,
4875 notes_to_financial_statements,
4876 })
4877 }
4878
4879 fn generate_notes_to_financial_statements(
4886 &self,
4887 financial_reporting: &mut FinancialReportingSnapshot,
4888 accounting_standards: &AccountingStandardsSnapshot,
4889 tax: &TaxSnapshot,
4890 hr: &HrSnapshot,
4891 audit: &AuditSnapshot,
4892 ) {
4893 use datasynth_config::schema::AccountingFrameworkConfig;
4894 use datasynth_core::models::StatementType;
4895 use datasynth_generators::period_close::notes_generator::{
4896 NotesGenerator, NotesGeneratorContext,
4897 };
4898
4899 let seed = self.seed;
4900 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4901 {
4902 Ok(d) => d,
4903 Err(_) => return,
4904 };
4905
4906 let mut notes_gen = NotesGenerator::new(seed + 4235);
4907
4908 for company in &self.config.companies {
4909 let last_period_end = start_date
4910 + chrono::Months::new(self.config.global.period_months)
4911 - chrono::Days::new(1);
4912 let fiscal_year = last_period_end.year() as u16;
4913
4914 let entity_is = financial_reporting
4916 .standalone_statements
4917 .get(&company.code)
4918 .and_then(|stmts| {
4919 stmts.iter().find(|s| {
4920 s.fiscal_year == fiscal_year
4921 && s.statement_type == StatementType::IncomeStatement
4922 })
4923 });
4924 let entity_bs = financial_reporting
4925 .standalone_statements
4926 .get(&company.code)
4927 .and_then(|stmts| {
4928 stmts.iter().find(|s| {
4929 s.fiscal_year == fiscal_year
4930 && s.statement_type == StatementType::BalanceSheet
4931 })
4932 });
4933
4934 let revenue_amount = entity_is
4936 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4937 .map(|li| li.amount);
4938 let ppe_gross = entity_bs
4939 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
4940 .map(|li| li.amount);
4941
4942 let framework = match self
4943 .config
4944 .accounting_standards
4945 .framework
4946 .unwrap_or_default()
4947 {
4948 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
4949 "IFRS".to_string()
4950 }
4951 _ => "US GAAP".to_string(),
4952 };
4953
4954 let (entity_dta, entity_dtl) = {
4957 let mut dta = rust_decimal::Decimal::ZERO;
4958 let mut dtl = rust_decimal::Decimal::ZERO;
4959 for rf in &tax.deferred_tax.rollforwards {
4960 if rf.entity_code == company.code {
4961 dta += rf.closing_dta;
4962 dtl += rf.closing_dtl;
4963 }
4964 }
4965 (
4966 if dta > rust_decimal::Decimal::ZERO {
4967 Some(dta)
4968 } else {
4969 None
4970 },
4971 if dtl > rust_decimal::Decimal::ZERO {
4972 Some(dtl)
4973 } else {
4974 None
4975 },
4976 )
4977 };
4978
4979 let entity_provisions: Vec<_> = accounting_standards
4982 .provisions
4983 .iter()
4984 .filter(|p| p.entity_code == company.code)
4985 .collect();
4986 let provision_count = entity_provisions.len();
4987 let total_provisions = if provision_count > 0 {
4988 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
4989 } else {
4990 None
4991 };
4992
4993 let entity_pension_plan_count = hr
4995 .pension_plans
4996 .iter()
4997 .filter(|p| p.entity_code == company.code)
4998 .count();
4999 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5000 let sum: rust_decimal::Decimal = hr
5001 .pension_disclosures
5002 .iter()
5003 .filter(|d| {
5004 hr.pension_plans
5005 .iter()
5006 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5007 })
5008 .map(|d| d.net_pension_liability)
5009 .sum();
5010 let plan_assets_sum: rust_decimal::Decimal = hr
5011 .pension_plan_assets
5012 .iter()
5013 .filter(|a| {
5014 hr.pension_plans
5015 .iter()
5016 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5017 })
5018 .map(|a| a.fair_value_closing)
5019 .sum();
5020 if entity_pension_plan_count > 0 {
5021 Some(sum + plan_assets_sum)
5022 } else {
5023 None
5024 }
5025 };
5026 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5027 let sum: rust_decimal::Decimal = hr
5028 .pension_plan_assets
5029 .iter()
5030 .filter(|a| {
5031 hr.pension_plans
5032 .iter()
5033 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5034 })
5035 .map(|a| a.fair_value_closing)
5036 .sum();
5037 if entity_pension_plan_count > 0 {
5038 Some(sum)
5039 } else {
5040 None
5041 }
5042 };
5043
5044 let rp_count = audit.related_party_transactions.len();
5047 let se_count = audit.subsequent_events.len();
5048 let adjusting_count = audit
5049 .subsequent_events
5050 .iter()
5051 .filter(|e| {
5052 matches!(
5053 e.classification,
5054 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5055 )
5056 })
5057 .count();
5058
5059 let ctx = NotesGeneratorContext {
5060 entity_code: company.code.clone(),
5061 framework,
5062 period: format!("FY{}", fiscal_year),
5063 period_end: last_period_end,
5064 currency: company.currency.clone(),
5065 revenue_amount,
5066 total_ppe_gross: ppe_gross,
5067 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5068 deferred_tax_asset: entity_dta,
5070 deferred_tax_liability: entity_dtl,
5071 provision_count,
5073 total_provisions,
5074 pension_plan_count: entity_pension_plan_count,
5076 total_dbo: entity_total_dbo,
5077 total_plan_assets: entity_total_plan_assets,
5078 related_party_transaction_count: rp_count,
5080 subsequent_event_count: se_count,
5081 adjusting_event_count: adjusting_count,
5082 ..NotesGeneratorContext::default()
5083 };
5084
5085 let entity_notes = notes_gen.generate(&ctx);
5086 info!(
5087 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5088 company.code,
5089 entity_notes.len(),
5090 entity_dta,
5091 entity_dtl,
5092 provision_count,
5093 );
5094 financial_reporting
5095 .notes_to_financial_statements
5096 .extend(entity_notes);
5097 }
5098 }
5099
5100 fn build_trial_balance_from_entries(
5106 journal_entries: &[JournalEntry],
5107 coa: &ChartOfAccounts,
5108 company_code: &str,
5109 fiscal_year: u16,
5110 fiscal_period: u8,
5111 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5112 use rust_decimal::Decimal;
5113
5114 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5116 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5117
5118 for je in journal_entries {
5119 if je.header.company_code != company_code
5121 || je.header.fiscal_year != fiscal_year
5122 || je.header.fiscal_period != fiscal_period
5123 {
5124 continue;
5125 }
5126
5127 for line in &je.lines {
5128 let acct = &line.gl_account;
5129 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5130 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5131 }
5132 }
5133
5134 let mut all_accounts: Vec<&String> = account_debits
5136 .keys()
5137 .chain(account_credits.keys())
5138 .collect::<std::collections::HashSet<_>>()
5139 .into_iter()
5140 .collect();
5141 all_accounts.sort();
5142
5143 let mut entries = Vec::new();
5144
5145 for acct_number in all_accounts {
5146 let debit = account_debits
5147 .get(acct_number)
5148 .copied()
5149 .unwrap_or(Decimal::ZERO);
5150 let credit = account_credits
5151 .get(acct_number)
5152 .copied()
5153 .unwrap_or(Decimal::ZERO);
5154
5155 if debit.is_zero() && credit.is_zero() {
5156 continue;
5157 }
5158
5159 let account_name = coa
5161 .get_account(acct_number)
5162 .map(|gl| gl.short_description.clone())
5163 .unwrap_or_else(|| format!("Account {acct_number}"));
5164
5165 let category = Self::category_from_account_code(acct_number);
5170
5171 entries.push(datasynth_generators::TrialBalanceEntry {
5172 account_code: acct_number.clone(),
5173 account_name,
5174 category,
5175 debit_balance: debit,
5176 credit_balance: credit,
5177 });
5178 }
5179
5180 entries
5181 }
5182
5183 fn build_cumulative_trial_balance(
5190 journal_entries: &[JournalEntry],
5191 coa: &ChartOfAccounts,
5192 company_code: &str,
5193 start_date: NaiveDate,
5194 period_end: NaiveDate,
5195 fiscal_year: u16,
5196 fiscal_period: u8,
5197 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5198 use rust_decimal::Decimal;
5199
5200 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5202 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5203
5204 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5206 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5207
5208 for je in journal_entries {
5209 if je.header.company_code != company_code {
5210 continue;
5211 }
5212
5213 for line in &je.lines {
5214 let acct = &line.gl_account;
5215 let category = Self::category_from_account_code(acct);
5216 let is_bs_account = matches!(
5217 category.as_str(),
5218 "Cash"
5219 | "Receivables"
5220 | "Inventory"
5221 | "FixedAssets"
5222 | "Payables"
5223 | "AccruedLiabilities"
5224 | "LongTermDebt"
5225 | "Equity"
5226 );
5227
5228 if is_bs_account {
5229 if je.header.document_date <= period_end
5231 && je.header.document_date >= start_date
5232 {
5233 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5234 line.debit_amount;
5235 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5236 line.credit_amount;
5237 }
5238 } else {
5239 if je.header.fiscal_year == fiscal_year
5241 && je.header.fiscal_period == fiscal_period
5242 {
5243 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5244 line.debit_amount;
5245 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5246 line.credit_amount;
5247 }
5248 }
5249 }
5250 }
5251
5252 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5254 all_accounts.extend(bs_debits.keys().cloned());
5255 all_accounts.extend(bs_credits.keys().cloned());
5256 all_accounts.extend(is_debits.keys().cloned());
5257 all_accounts.extend(is_credits.keys().cloned());
5258
5259 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5260 sorted_accounts.sort();
5261
5262 let mut entries = Vec::new();
5263
5264 for acct_number in &sorted_accounts {
5265 let category = Self::category_from_account_code(acct_number);
5266 let is_bs_account = matches!(
5267 category.as_str(),
5268 "Cash"
5269 | "Receivables"
5270 | "Inventory"
5271 | "FixedAssets"
5272 | "Payables"
5273 | "AccruedLiabilities"
5274 | "LongTermDebt"
5275 | "Equity"
5276 );
5277
5278 let (debit, credit) = if is_bs_account {
5279 (
5280 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5281 bs_credits
5282 .get(acct_number)
5283 .copied()
5284 .unwrap_or(Decimal::ZERO),
5285 )
5286 } else {
5287 (
5288 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5289 is_credits
5290 .get(acct_number)
5291 .copied()
5292 .unwrap_or(Decimal::ZERO),
5293 )
5294 };
5295
5296 if debit.is_zero() && credit.is_zero() {
5297 continue;
5298 }
5299
5300 let account_name = coa
5301 .get_account(acct_number)
5302 .map(|gl| gl.short_description.clone())
5303 .unwrap_or_else(|| format!("Account {acct_number}"));
5304
5305 entries.push(datasynth_generators::TrialBalanceEntry {
5306 account_code: acct_number.clone(),
5307 account_name,
5308 category,
5309 debit_balance: debit,
5310 credit_balance: credit,
5311 });
5312 }
5313
5314 entries
5315 }
5316
5317 fn build_cash_flow_from_trial_balances(
5322 current_tb: &[datasynth_generators::TrialBalanceEntry],
5323 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5324 net_income: rust_decimal::Decimal,
5325 ) -> Vec<CashFlowItem> {
5326 use rust_decimal::Decimal;
5327
5328 let aggregate =
5330 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5331 let mut map: HashMap<String, Decimal> = HashMap::new();
5332 for entry in tb {
5333 let net = entry.debit_balance - entry.credit_balance;
5334 *map.entry(entry.category.clone()).or_default() += net;
5335 }
5336 map
5337 };
5338
5339 let current = aggregate(current_tb);
5340 let prior = prior_tb.map(aggregate);
5341
5342 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5344 *map.get(key).unwrap_or(&Decimal::ZERO)
5345 };
5346
5347 let change = |key: &str| -> Decimal {
5349 let curr = get(¤t, key);
5350 match &prior {
5351 Some(p) => curr - get(p, key),
5352 None => curr,
5353 }
5354 };
5355
5356 let fixed_asset_change = change("FixedAssets");
5359 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5360 -fixed_asset_change
5361 } else {
5362 Decimal::ZERO
5363 };
5364
5365 let ar_change = change("Receivables");
5367 let inventory_change = change("Inventory");
5368 let ap_change = change("Payables");
5370 let accrued_change = change("AccruedLiabilities");
5371
5372 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5373 + (-ap_change)
5374 + (-accrued_change);
5375
5376 let capex = if fixed_asset_change > Decimal::ZERO {
5378 -fixed_asset_change
5379 } else {
5380 Decimal::ZERO
5381 };
5382 let investing_cf = capex;
5383
5384 let debt_change = -change("LongTermDebt");
5386 let equity_change = -change("Equity");
5387 let financing_cf = debt_change + equity_change;
5388
5389 let net_change = operating_cf + investing_cf + financing_cf;
5390
5391 vec![
5392 CashFlowItem {
5393 item_code: "CF-NI".to_string(),
5394 label: "Net Income".to_string(),
5395 category: CashFlowCategory::Operating,
5396 amount: net_income,
5397 amount_prior: None,
5398 sort_order: 1,
5399 is_total: false,
5400 },
5401 CashFlowItem {
5402 item_code: "CF-DEP".to_string(),
5403 label: "Depreciation & Amortization".to_string(),
5404 category: CashFlowCategory::Operating,
5405 amount: depreciation_addback,
5406 amount_prior: None,
5407 sort_order: 2,
5408 is_total: false,
5409 },
5410 CashFlowItem {
5411 item_code: "CF-AR".to_string(),
5412 label: "Change in Accounts Receivable".to_string(),
5413 category: CashFlowCategory::Operating,
5414 amount: -ar_change,
5415 amount_prior: None,
5416 sort_order: 3,
5417 is_total: false,
5418 },
5419 CashFlowItem {
5420 item_code: "CF-AP".to_string(),
5421 label: "Change in Accounts Payable".to_string(),
5422 category: CashFlowCategory::Operating,
5423 amount: -ap_change,
5424 amount_prior: None,
5425 sort_order: 4,
5426 is_total: false,
5427 },
5428 CashFlowItem {
5429 item_code: "CF-INV".to_string(),
5430 label: "Change in Inventory".to_string(),
5431 category: CashFlowCategory::Operating,
5432 amount: -inventory_change,
5433 amount_prior: None,
5434 sort_order: 5,
5435 is_total: false,
5436 },
5437 CashFlowItem {
5438 item_code: "CF-OP".to_string(),
5439 label: "Net Cash from Operating Activities".to_string(),
5440 category: CashFlowCategory::Operating,
5441 amount: operating_cf,
5442 amount_prior: None,
5443 sort_order: 6,
5444 is_total: true,
5445 },
5446 CashFlowItem {
5447 item_code: "CF-CAPEX".to_string(),
5448 label: "Capital Expenditures".to_string(),
5449 category: CashFlowCategory::Investing,
5450 amount: capex,
5451 amount_prior: None,
5452 sort_order: 7,
5453 is_total: false,
5454 },
5455 CashFlowItem {
5456 item_code: "CF-INV-T".to_string(),
5457 label: "Net Cash from Investing Activities".to_string(),
5458 category: CashFlowCategory::Investing,
5459 amount: investing_cf,
5460 amount_prior: None,
5461 sort_order: 8,
5462 is_total: true,
5463 },
5464 CashFlowItem {
5465 item_code: "CF-DEBT".to_string(),
5466 label: "Net Borrowings / (Repayments)".to_string(),
5467 category: CashFlowCategory::Financing,
5468 amount: debt_change,
5469 amount_prior: None,
5470 sort_order: 9,
5471 is_total: false,
5472 },
5473 CashFlowItem {
5474 item_code: "CF-EQ".to_string(),
5475 label: "Equity Changes".to_string(),
5476 category: CashFlowCategory::Financing,
5477 amount: equity_change,
5478 amount_prior: None,
5479 sort_order: 10,
5480 is_total: false,
5481 },
5482 CashFlowItem {
5483 item_code: "CF-FIN-T".to_string(),
5484 label: "Net Cash from Financing Activities".to_string(),
5485 category: CashFlowCategory::Financing,
5486 amount: financing_cf,
5487 amount_prior: None,
5488 sort_order: 11,
5489 is_total: true,
5490 },
5491 CashFlowItem {
5492 item_code: "CF-NET".to_string(),
5493 label: "Net Change in Cash".to_string(),
5494 category: CashFlowCategory::Operating,
5495 amount: net_change,
5496 amount_prior: None,
5497 sort_order: 12,
5498 is_total: true,
5499 },
5500 ]
5501 }
5502
5503 fn calculate_net_income_from_tb(
5507 tb: &[datasynth_generators::TrialBalanceEntry],
5508 ) -> rust_decimal::Decimal {
5509 use rust_decimal::Decimal;
5510
5511 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5512 for entry in tb {
5513 let net = entry.debit_balance - entry.credit_balance;
5514 *aggregated.entry(entry.category.clone()).or_default() += net;
5515 }
5516
5517 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5518 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5519 let opex = *aggregated
5520 .get("OperatingExpenses")
5521 .unwrap_or(&Decimal::ZERO);
5522 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5523 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5524
5525 let operating_income = revenue - cogs - opex - other_expenses - other_income;
5528 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
5530 operating_income - tax
5531 }
5532
5533 fn category_from_account_code(code: &str) -> String {
5540 let prefix: String = code.chars().take(2).collect();
5541 match prefix.as_str() {
5542 "10" => "Cash",
5543 "11" => "Receivables",
5544 "12" | "13" | "14" => "Inventory",
5545 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5546 "20" => "Payables",
5547 "21" | "22" | "23" | "24" => "AccruedLiabilities",
5548 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5549 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5550 "40" | "41" | "42" | "43" | "44" => "Revenue",
5551 "50" | "51" | "52" => "CostOfSales",
5552 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5553 "OperatingExpenses"
5554 }
5555 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5556 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5557 _ => "OperatingExpenses",
5558 }
5559 .to_string()
5560 }
5561
5562 fn phase_hr_data(
5564 &mut self,
5565 stats: &mut EnhancedGenerationStatistics,
5566 ) -> SynthResult<HrSnapshot> {
5567 if !self.phase_config.generate_hr {
5568 debug!("Phase 16: Skipped (HR generation disabled)");
5569 return Ok(HrSnapshot::default());
5570 }
5571
5572 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5573
5574 let seed = self.seed;
5575 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5576 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5577 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5578 let company_code = self
5579 .config
5580 .companies
5581 .first()
5582 .map(|c| c.code.as_str())
5583 .unwrap_or("1000");
5584 let currency = self
5585 .config
5586 .companies
5587 .first()
5588 .map(|c| c.currency.as_str())
5589 .unwrap_or("USD");
5590
5591 let employee_ids: Vec<String> = self
5592 .master_data
5593 .employees
5594 .iter()
5595 .map(|e| e.employee_id.clone())
5596 .collect();
5597
5598 if employee_ids.is_empty() {
5599 debug!("Phase 16: Skipped (no employees available)");
5600 return Ok(HrSnapshot::default());
5601 }
5602
5603 let cost_center_ids: Vec<String> = self
5606 .master_data
5607 .employees
5608 .iter()
5609 .filter_map(|e| e.cost_center.clone())
5610 .collect::<std::collections::HashSet<_>>()
5611 .into_iter()
5612 .collect();
5613
5614 let mut snapshot = HrSnapshot::default();
5615
5616 if self.config.hr.payroll.enabled {
5618 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
5619 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5620
5621 let payroll_pack = self.primary_pack();
5623
5624 payroll_gen.set_country_pack(payroll_pack.clone());
5627
5628 let employees_with_salary: Vec<(
5629 String,
5630 rust_decimal::Decimal,
5631 Option<String>,
5632 Option<String>,
5633 )> = self
5634 .master_data
5635 .employees
5636 .iter()
5637 .map(|e| {
5638 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
5641 e.base_salary
5642 } else {
5643 rust_decimal::Decimal::from(60_000)
5644 };
5645 (
5646 e.employee_id.clone(),
5647 annual, e.cost_center.clone(),
5649 e.department_id.clone(),
5650 )
5651 })
5652 .collect();
5653
5654 for month in 0..self.config.global.period_months {
5655 let period_start = start_date + chrono::Months::new(month);
5656 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
5657 let (run, items) = payroll_gen.generate(
5658 company_code,
5659 &employees_with_salary,
5660 period_start,
5661 period_end,
5662 currency,
5663 );
5664 snapshot.payroll_runs.push(run);
5665 snapshot.payroll_run_count += 1;
5666 snapshot.payroll_line_item_count += items.len();
5667 snapshot.payroll_line_items.extend(items);
5668 }
5669 }
5670
5671 if self.config.hr.time_attendance.enabled {
5673 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
5674 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5675 let entries = time_gen.generate(
5676 &employee_ids,
5677 start_date,
5678 end_date,
5679 &self.config.hr.time_attendance,
5680 );
5681 snapshot.time_entry_count = entries.len();
5682 snapshot.time_entries = entries;
5683 }
5684
5685 if self.config.hr.expenses.enabled {
5687 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
5688 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5689 expense_gen.set_country_pack(self.primary_pack().clone());
5690 let company_currency = self
5691 .config
5692 .companies
5693 .first()
5694 .map(|c| c.currency.as_str())
5695 .unwrap_or("USD");
5696 let reports = expense_gen.generate_with_currency(
5697 &employee_ids,
5698 start_date,
5699 end_date,
5700 &self.config.hr.expenses,
5701 company_currency,
5702 );
5703 snapshot.expense_report_count = reports.len();
5704 snapshot.expense_reports = reports;
5705 }
5706
5707 if self.config.hr.payroll.enabled {
5709 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
5710 let employee_pairs: Vec<(String, String)> = self
5711 .master_data
5712 .employees
5713 .iter()
5714 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
5715 .collect();
5716 let enrollments =
5717 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
5718 snapshot.benefit_enrollment_count = enrollments.len();
5719 snapshot.benefit_enrollments = enrollments;
5720 }
5721
5722 if self.phase_config.generate_hr {
5724 let entity_name = self
5725 .config
5726 .companies
5727 .first()
5728 .map(|c| c.name.as_str())
5729 .unwrap_or("Entity");
5730 let period_months = self.config.global.period_months;
5731 let period_label = {
5732 let y = start_date.year();
5733 let m = start_date.month();
5734 if period_months >= 12 {
5735 format!("FY{y}")
5736 } else {
5737 format!("{y}-{m:02}")
5738 }
5739 };
5740 let reporting_date =
5741 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5742
5743 let avg_salary: Option<rust_decimal::Decimal> = {
5748 let employee_count = employee_ids.len();
5749 if self.config.hr.payroll.enabled
5750 && employee_count > 0
5751 && !snapshot.payroll_runs.is_empty()
5752 {
5753 let total_gross: rust_decimal::Decimal = snapshot
5755 .payroll_runs
5756 .iter()
5757 .filter(|r| r.company_code == company_code)
5758 .map(|r| r.total_gross)
5759 .sum();
5760 if total_gross > rust_decimal::Decimal::ZERO {
5761 let annual_total = if period_months > 0 && period_months < 12 {
5763 total_gross * rust_decimal::Decimal::from(12u32)
5764 / rust_decimal::Decimal::from(period_months)
5765 } else {
5766 total_gross
5767 };
5768 Some(
5769 (annual_total / rust_decimal::Decimal::from(employee_count))
5770 .round_dp(2),
5771 )
5772 } else {
5773 None
5774 }
5775 } else {
5776 None
5777 }
5778 };
5779
5780 let mut pension_gen =
5781 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
5782 let pension_snap = pension_gen.generate(
5783 company_code,
5784 entity_name,
5785 &period_label,
5786 reporting_date,
5787 employee_ids.len(),
5788 currency,
5789 avg_salary,
5790 period_months,
5791 );
5792 snapshot.pension_plan_count = pension_snap.plans.len();
5793 snapshot.pension_plans = pension_snap.plans;
5794 snapshot.pension_obligations = pension_snap.obligations;
5795 snapshot.pension_plan_assets = pension_snap.plan_assets;
5796 snapshot.pension_disclosures = pension_snap.disclosures;
5797 snapshot.pension_journal_entries = pension_snap.journal_entries;
5802 }
5803
5804 if self.phase_config.generate_hr && !employee_ids.is_empty() {
5806 let period_months = self.config.global.period_months;
5807 let period_label = {
5808 let y = start_date.year();
5809 let m = start_date.month();
5810 if period_months >= 12 {
5811 format!("FY{y}")
5812 } else {
5813 format!("{y}-{m:02}")
5814 }
5815 };
5816 let reporting_date =
5817 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5818
5819 let mut stock_comp_gen =
5820 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
5821 let stock_snap = stock_comp_gen.generate(
5822 company_code,
5823 &employee_ids,
5824 start_date,
5825 &period_label,
5826 reporting_date,
5827 currency,
5828 );
5829 snapshot.stock_grant_count = stock_snap.grants.len();
5830 snapshot.stock_grants = stock_snap.grants;
5831 snapshot.stock_comp_expenses = stock_snap.expenses;
5832 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
5833 }
5834
5835 stats.payroll_run_count = snapshot.payroll_run_count;
5836 stats.time_entry_count = snapshot.time_entry_count;
5837 stats.expense_report_count = snapshot.expense_report_count;
5838 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
5839 stats.pension_plan_count = snapshot.pension_plan_count;
5840 stats.stock_grant_count = snapshot.stock_grant_count;
5841
5842 info!(
5843 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
5844 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
5845 snapshot.time_entry_count, snapshot.expense_report_count,
5846 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
5847 snapshot.stock_grant_count
5848 );
5849 self.check_resources_with_log("post-hr")?;
5850
5851 Ok(snapshot)
5852 }
5853
5854 fn phase_accounting_standards(
5856 &mut self,
5857 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
5858 journal_entries: &[JournalEntry],
5859 stats: &mut EnhancedGenerationStatistics,
5860 ) -> SynthResult<AccountingStandardsSnapshot> {
5861 if !self.phase_config.generate_accounting_standards {
5862 debug!("Phase 17: Skipped (accounting standards generation disabled)");
5863 return Ok(AccountingStandardsSnapshot::default());
5864 }
5865 info!("Phase 17: Generating Accounting Standards Data");
5866
5867 let seed = self.seed;
5868 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5869 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5870 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5871 let company_code = self
5872 .config
5873 .companies
5874 .first()
5875 .map(|c| c.code.as_str())
5876 .unwrap_or("1000");
5877 let currency = self
5878 .config
5879 .companies
5880 .first()
5881 .map(|c| c.currency.as_str())
5882 .unwrap_or("USD");
5883
5884 let framework = match self.config.accounting_standards.framework {
5889 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
5890 datasynth_standards::framework::AccountingFramework::UsGaap
5891 }
5892 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
5893 datasynth_standards::framework::AccountingFramework::Ifrs
5894 }
5895 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
5896 datasynth_standards::framework::AccountingFramework::DualReporting
5897 }
5898 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
5899 datasynth_standards::framework::AccountingFramework::FrenchGaap
5900 }
5901 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
5902 datasynth_standards::framework::AccountingFramework::GermanGaap
5903 }
5904 None => {
5905 let pack = self.primary_pack();
5907 let pack_fw = pack.accounting.framework.as_str();
5908 match pack_fw {
5909 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
5910 "dual_reporting" => {
5911 datasynth_standards::framework::AccountingFramework::DualReporting
5912 }
5913 "french_gaap" => {
5914 datasynth_standards::framework::AccountingFramework::FrenchGaap
5915 }
5916 "german_gaap" | "hgb" => {
5917 datasynth_standards::framework::AccountingFramework::GermanGaap
5918 }
5919 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
5921 }
5922 }
5923 };
5924
5925 let mut snapshot = AccountingStandardsSnapshot::default();
5926
5927 if self.config.accounting_standards.revenue_recognition.enabled {
5929 let customer_ids: Vec<String> = self
5930 .master_data
5931 .customers
5932 .iter()
5933 .map(|c| c.customer_id.clone())
5934 .collect();
5935
5936 if !customer_ids.is_empty() {
5937 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
5938 let contracts = rev_gen.generate(
5939 company_code,
5940 &customer_ids,
5941 start_date,
5942 end_date,
5943 currency,
5944 &self.config.accounting_standards.revenue_recognition,
5945 framework,
5946 );
5947 snapshot.revenue_contract_count = contracts.len();
5948 snapshot.contracts = contracts;
5949 }
5950 }
5951
5952 if self.config.accounting_standards.impairment.enabled {
5954 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
5955 .master_data
5956 .assets
5957 .iter()
5958 .map(|a| {
5959 (
5960 a.asset_id.clone(),
5961 a.description.clone(),
5962 a.acquisition_cost,
5963 )
5964 })
5965 .collect();
5966
5967 if !asset_data.is_empty() {
5968 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
5969 let tests = imp_gen.generate(
5970 company_code,
5971 &asset_data,
5972 end_date,
5973 &self.config.accounting_standards.impairment,
5974 framework,
5975 );
5976 snapshot.impairment_test_count = tests.len();
5977 snapshot.impairment_tests = tests;
5978 }
5979 }
5980
5981 if self
5983 .config
5984 .accounting_standards
5985 .business_combinations
5986 .enabled
5987 {
5988 let bc_config = &self.config.accounting_standards.business_combinations;
5989 let framework_str = match framework {
5990 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5991 _ => "US_GAAP",
5992 };
5993 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
5994 let bc_snap = bc_gen.generate(
5995 company_code,
5996 currency,
5997 start_date,
5998 end_date,
5999 bc_config.acquisition_count,
6000 framework_str,
6001 );
6002 snapshot.business_combination_count = bc_snap.combinations.len();
6003 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6004 snapshot.business_combinations = bc_snap.combinations;
6005 }
6006
6007 if self
6009 .config
6010 .accounting_standards
6011 .expected_credit_loss
6012 .enabled
6013 {
6014 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6015 let framework_str = match framework {
6016 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6017 _ => "ASC_326",
6018 };
6019
6020 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6023
6024 let mut ecl_gen = EclGenerator::new(seed + 43);
6025
6026 let bucket_exposures: Vec<(
6028 datasynth_core::models::subledger::ar::AgingBucket,
6029 rust_decimal::Decimal,
6030 )> = if ar_aging_reports.is_empty() {
6031 use datasynth_core::models::subledger::ar::AgingBucket;
6033 vec![
6034 (
6035 AgingBucket::Current,
6036 rust_decimal::Decimal::from(500_000_u32),
6037 ),
6038 (
6039 AgingBucket::Days1To30,
6040 rust_decimal::Decimal::from(120_000_u32),
6041 ),
6042 (
6043 AgingBucket::Days31To60,
6044 rust_decimal::Decimal::from(45_000_u32),
6045 ),
6046 (
6047 AgingBucket::Days61To90,
6048 rust_decimal::Decimal::from(15_000_u32),
6049 ),
6050 (
6051 AgingBucket::Over90Days,
6052 rust_decimal::Decimal::from(8_000_u32),
6053 ),
6054 ]
6055 } else {
6056 use datasynth_core::models::subledger::ar::AgingBucket;
6057 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6059 std::collections::HashMap::new();
6060 for report in ar_aging_reports {
6061 for (bucket, amount) in &report.bucket_totals {
6062 *totals.entry(*bucket).or_default() += amount;
6063 }
6064 }
6065 AgingBucket::all()
6066 .into_iter()
6067 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6068 .collect()
6069 };
6070
6071 let ecl_snap = ecl_gen.generate(
6072 company_code,
6073 end_date,
6074 &bucket_exposures,
6075 ecl_config,
6076 &period_label,
6077 framework_str,
6078 );
6079
6080 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6081 snapshot.ecl_models = ecl_snap.ecl_models;
6082 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6083 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6084 }
6085
6086 {
6088 let framework_str = match framework {
6089 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6090 _ => "US_GAAP",
6091 };
6092
6093 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6098 .max(rust_decimal::Decimal::from(100_000_u32));
6099
6100 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6101
6102 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6103 let prov_snap = prov_gen.generate(
6104 company_code,
6105 currency,
6106 revenue_proxy,
6107 end_date,
6108 &period_label,
6109 framework_str,
6110 None, );
6112
6113 snapshot.provision_count = prov_snap.provisions.len();
6114 snapshot.provisions = prov_snap.provisions;
6115 snapshot.provision_movements = prov_snap.movements;
6116 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6117 snapshot.provision_journal_entries = prov_snap.journal_entries;
6118 }
6119
6120 {
6124 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6125
6126 let presentation_currency = self
6127 .config
6128 .global
6129 .presentation_currency
6130 .clone()
6131 .unwrap_or_else(|| self.config.global.group_currency.clone());
6132
6133 let mut rate_table = FxRateTable::new(&presentation_currency);
6136
6137 let base_rates = base_rates_usd();
6141 for (ccy, rate) in &base_rates {
6142 rate_table.add_rate(FxRate::new(
6143 ccy,
6144 "USD",
6145 RateType::Closing,
6146 end_date,
6147 *rate,
6148 "SYNTHETIC",
6149 ));
6150 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6153 rate_table.add_rate(FxRate::new(
6154 ccy,
6155 "USD",
6156 RateType::Average,
6157 end_date,
6158 avg,
6159 "SYNTHETIC",
6160 ));
6161 }
6162
6163 let mut translation_results = Vec::new();
6164 for company in &self.config.companies {
6165 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6168 .max(rust_decimal::Decimal::from(100_000_u32));
6169
6170 let func_ccy = company
6171 .functional_currency
6172 .clone()
6173 .unwrap_or_else(|| company.currency.clone());
6174
6175 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6176 &company.code,
6177 &func_ccy,
6178 &presentation_currency,
6179 &ias21_period_label,
6180 end_date,
6181 company_revenue,
6182 &rate_table,
6183 );
6184 translation_results.push(result);
6185 }
6186
6187 snapshot.currency_translation_count = translation_results.len();
6188 snapshot.currency_translation_results = translation_results;
6189 }
6190
6191 stats.revenue_contract_count = snapshot.revenue_contract_count;
6192 stats.impairment_test_count = snapshot.impairment_test_count;
6193 stats.business_combination_count = snapshot.business_combination_count;
6194 stats.ecl_model_count = snapshot.ecl_model_count;
6195 stats.provision_count = snapshot.provision_count;
6196
6197 info!(
6198 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6199 snapshot.revenue_contract_count,
6200 snapshot.impairment_test_count,
6201 snapshot.business_combination_count,
6202 snapshot.ecl_model_count,
6203 snapshot.provision_count,
6204 snapshot.currency_translation_count
6205 );
6206 self.check_resources_with_log("post-accounting-standards")?;
6207
6208 Ok(snapshot)
6209 }
6210
6211 fn phase_manufacturing(
6213 &mut self,
6214 stats: &mut EnhancedGenerationStatistics,
6215 ) -> SynthResult<ManufacturingSnapshot> {
6216 if !self.phase_config.generate_manufacturing {
6217 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6218 return Ok(ManufacturingSnapshot::default());
6219 }
6220 info!("Phase 18: Generating Manufacturing Data");
6221
6222 let seed = self.seed;
6223 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6224 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6225 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6226 let company_code = self
6227 .config
6228 .companies
6229 .first()
6230 .map(|c| c.code.as_str())
6231 .unwrap_or("1000");
6232
6233 let material_data: Vec<(String, String)> = self
6234 .master_data
6235 .materials
6236 .iter()
6237 .map(|m| (m.material_id.clone(), m.description.clone()))
6238 .collect();
6239
6240 if material_data.is_empty() {
6241 debug!("Phase 18: Skipped (no materials available)");
6242 return Ok(ManufacturingSnapshot::default());
6243 }
6244
6245 let mut snapshot = ManufacturingSnapshot::default();
6246
6247 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
6249 let production_orders = prod_gen.generate(
6250 company_code,
6251 &material_data,
6252 start_date,
6253 end_date,
6254 &self.config.manufacturing.production_orders,
6255 &self.config.manufacturing.costing,
6256 &self.config.manufacturing.routing,
6257 );
6258 snapshot.production_order_count = production_orders.len();
6259
6260 let inspection_data: Vec<(String, String, String)> = production_orders
6262 .iter()
6263 .map(|po| {
6264 (
6265 po.order_id.clone(),
6266 po.material_id.clone(),
6267 po.material_description.clone(),
6268 )
6269 })
6270 .collect();
6271
6272 snapshot.production_orders = production_orders;
6273
6274 if !inspection_data.is_empty() {
6275 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
6276 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6277 snapshot.quality_inspection_count = inspections.len();
6278 snapshot.quality_inspections = inspections;
6279 }
6280
6281 let storage_locations: Vec<(String, String)> = material_data
6283 .iter()
6284 .enumerate()
6285 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6286 .collect();
6287
6288 let employee_ids: Vec<String> = self
6289 .master_data
6290 .employees
6291 .iter()
6292 .map(|e| e.employee_id.clone())
6293 .collect();
6294 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
6295 .with_employee_pool(employee_ids);
6296 let mut cycle_count_total = 0usize;
6297 for month in 0..self.config.global.period_months {
6298 let count_date = start_date + chrono::Months::new(month);
6299 let items_per_count = storage_locations.len().clamp(10, 50);
6300 let cc = cc_gen.generate(
6301 company_code,
6302 &storage_locations,
6303 count_date,
6304 items_per_count,
6305 );
6306 snapshot.cycle_counts.push(cc);
6307 cycle_count_total += 1;
6308 }
6309 snapshot.cycle_count_count = cycle_count_total;
6310
6311 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
6313 let bom_components = bom_gen.generate(company_code, &material_data);
6314 snapshot.bom_component_count = bom_components.len();
6315 snapshot.bom_components = bom_components;
6316
6317 let currency = self
6319 .config
6320 .companies
6321 .first()
6322 .map(|c| c.currency.as_str())
6323 .unwrap_or("USD");
6324 let production_order_ids: Vec<String> = snapshot
6325 .production_orders
6326 .iter()
6327 .map(|po| po.order_id.clone())
6328 .collect();
6329 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
6330 let inventory_movements = inv_mov_gen.generate_with_production_orders(
6331 company_code,
6332 &material_data,
6333 start_date,
6334 end_date,
6335 2,
6336 currency,
6337 &production_order_ids,
6338 );
6339 snapshot.inventory_movement_count = inventory_movements.len();
6340 snapshot.inventory_movements = inventory_movements;
6341
6342 stats.production_order_count = snapshot.production_order_count;
6343 stats.quality_inspection_count = snapshot.quality_inspection_count;
6344 stats.cycle_count_count = snapshot.cycle_count_count;
6345 stats.bom_component_count = snapshot.bom_component_count;
6346 stats.inventory_movement_count = snapshot.inventory_movement_count;
6347
6348 info!(
6349 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6350 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6351 snapshot.bom_component_count, snapshot.inventory_movement_count
6352 );
6353 self.check_resources_with_log("post-manufacturing")?;
6354
6355 Ok(snapshot)
6356 }
6357
6358 fn phase_sales_kpi_budgets(
6360 &mut self,
6361 coa: &Arc<ChartOfAccounts>,
6362 financial_reporting: &FinancialReportingSnapshot,
6363 stats: &mut EnhancedGenerationStatistics,
6364 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6365 if !self.phase_config.generate_sales_kpi_budgets {
6366 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6367 return Ok(SalesKpiBudgetsSnapshot::default());
6368 }
6369 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6370
6371 let seed = self.seed;
6372 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6373 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6374 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6375 let company_code = self
6376 .config
6377 .companies
6378 .first()
6379 .map(|c| c.code.as_str())
6380 .unwrap_or("1000");
6381
6382 let mut snapshot = SalesKpiBudgetsSnapshot::default();
6383
6384 if self.config.sales_quotes.enabled {
6386 let customer_data: Vec<(String, String)> = self
6387 .master_data
6388 .customers
6389 .iter()
6390 .map(|c| (c.customer_id.clone(), c.name.clone()))
6391 .collect();
6392 let material_data: Vec<(String, String)> = self
6393 .master_data
6394 .materials
6395 .iter()
6396 .map(|m| (m.material_id.clone(), m.description.clone()))
6397 .collect();
6398
6399 if !customer_data.is_empty() && !material_data.is_empty() {
6400 let employee_ids: Vec<String> = self
6401 .master_data
6402 .employees
6403 .iter()
6404 .map(|e| e.employee_id.clone())
6405 .collect();
6406 let customer_ids: Vec<String> = self
6407 .master_data
6408 .customers
6409 .iter()
6410 .map(|c| c.customer_id.clone())
6411 .collect();
6412 let company_currency = self
6413 .config
6414 .companies
6415 .first()
6416 .map(|c| c.currency.as_str())
6417 .unwrap_or("USD");
6418
6419 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6420 .with_pools(employee_ids, customer_ids);
6421 let quotes = quote_gen.generate_with_currency(
6422 company_code,
6423 &customer_data,
6424 &material_data,
6425 start_date,
6426 end_date,
6427 &self.config.sales_quotes,
6428 company_currency,
6429 );
6430 snapshot.sales_quote_count = quotes.len();
6431 snapshot.sales_quotes = quotes;
6432 }
6433 }
6434
6435 if self.config.financial_reporting.management_kpis.enabled {
6437 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6438 let mut kpis = kpi_gen.generate(
6439 company_code,
6440 start_date,
6441 end_date,
6442 &self.config.financial_reporting.management_kpis,
6443 );
6444
6445 {
6447 use rust_decimal::Decimal;
6448
6449 if let Some(income_stmt) =
6450 financial_reporting.financial_statements.iter().find(|fs| {
6451 fs.statement_type == StatementType::IncomeStatement
6452 && fs.company_code == company_code
6453 })
6454 {
6455 let total_revenue: Decimal = income_stmt
6457 .line_items
6458 .iter()
6459 .filter(|li| li.section.contains("Revenue") && !li.is_total)
6460 .map(|li| li.amount)
6461 .sum();
6462 let total_cogs: Decimal = income_stmt
6463 .line_items
6464 .iter()
6465 .filter(|li| {
6466 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6467 && !li.is_total
6468 })
6469 .map(|li| li.amount.abs())
6470 .sum();
6471 let total_opex: Decimal = income_stmt
6472 .line_items
6473 .iter()
6474 .filter(|li| {
6475 li.section.contains("Expense")
6476 && !li.is_total
6477 && !li.section.contains("Cost")
6478 })
6479 .map(|li| li.amount.abs())
6480 .sum();
6481
6482 if total_revenue > Decimal::ZERO {
6483 let hundred = Decimal::from(100);
6484 let gross_margin_pct =
6485 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6486 let operating_income = total_revenue - total_cogs - total_opex;
6487 let op_margin_pct =
6488 (operating_income * hundred / total_revenue).round_dp(2);
6489
6490 for kpi in &mut kpis {
6492 if kpi.name == "Gross Margin" {
6493 kpi.value = gross_margin_pct;
6494 } else if kpi.name == "Operating Margin" {
6495 kpi.value = op_margin_pct;
6496 }
6497 }
6498 }
6499 }
6500
6501 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6503 fs.statement_type == StatementType::BalanceSheet
6504 && fs.company_code == company_code
6505 }) {
6506 let current_assets: Decimal = bs
6507 .line_items
6508 .iter()
6509 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6510 .map(|li| li.amount)
6511 .sum();
6512 let current_liabilities: Decimal = bs
6513 .line_items
6514 .iter()
6515 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6516 .map(|li| li.amount.abs())
6517 .sum();
6518
6519 if current_liabilities > Decimal::ZERO {
6520 let current_ratio = (current_assets / current_liabilities).round_dp(2);
6521 for kpi in &mut kpis {
6522 if kpi.name == "Current Ratio" {
6523 kpi.value = current_ratio;
6524 }
6525 }
6526 }
6527 }
6528 }
6529
6530 snapshot.kpi_count = kpis.len();
6531 snapshot.kpis = kpis;
6532 }
6533
6534 if self.config.financial_reporting.budgets.enabled {
6536 let account_data: Vec<(String, String)> = coa
6537 .accounts
6538 .iter()
6539 .map(|a| (a.account_number.clone(), a.short_description.clone()))
6540 .collect();
6541
6542 if !account_data.is_empty() {
6543 let fiscal_year = start_date.year() as u32;
6544 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6545 let budget = budget_gen.generate(
6546 company_code,
6547 fiscal_year,
6548 &account_data,
6549 &self.config.financial_reporting.budgets,
6550 );
6551 snapshot.budget_line_count = budget.line_items.len();
6552 snapshot.budgets.push(budget);
6553 }
6554 }
6555
6556 stats.sales_quote_count = snapshot.sales_quote_count;
6557 stats.kpi_count = snapshot.kpi_count;
6558 stats.budget_line_count = snapshot.budget_line_count;
6559
6560 info!(
6561 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6562 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6563 );
6564 self.check_resources_with_log("post-sales-kpi-budgets")?;
6565
6566 Ok(snapshot)
6567 }
6568
6569 fn phase_tax_generation(
6571 &mut self,
6572 document_flows: &DocumentFlowSnapshot,
6573 journal_entries: &[JournalEntry],
6574 stats: &mut EnhancedGenerationStatistics,
6575 ) -> SynthResult<TaxSnapshot> {
6576 if !self.phase_config.generate_tax {
6577 debug!("Phase 20: Skipped (tax generation disabled)");
6578 return Ok(TaxSnapshot::default());
6579 }
6580 info!("Phase 20: Generating Tax Data");
6581
6582 let seed = self.seed;
6583 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6584 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6585 let fiscal_year = start_date.year();
6586 let company_code = self
6587 .config
6588 .companies
6589 .first()
6590 .map(|c| c.code.as_str())
6591 .unwrap_or("1000");
6592
6593 let mut gen =
6594 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
6595
6596 let pack = self.primary_pack().clone();
6597 let (jurisdictions, codes) =
6598 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
6599
6600 let mut provisions = Vec::new();
6602 if self.config.tax.provisions.enabled {
6603 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
6604 for company in &self.config.companies {
6605 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
6606 let statutory_rate = rust_decimal::Decimal::new(
6607 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
6608 2,
6609 );
6610 let provision = provision_gen.generate(
6611 &company.code,
6612 start_date,
6613 pre_tax_income,
6614 statutory_rate,
6615 );
6616 provisions.push(provision);
6617 }
6618 }
6619
6620 let mut tax_lines = Vec::new();
6622 if !codes.is_empty() {
6623 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
6624 datasynth_generators::TaxLineGeneratorConfig::default(),
6625 codes.clone(),
6626 seed + 72,
6627 );
6628
6629 let buyer_country = self
6632 .config
6633 .companies
6634 .first()
6635 .map(|c| c.country.as_str())
6636 .unwrap_or("US");
6637 for vi in &document_flows.vendor_invoices {
6638 let lines = tax_line_gen.generate_for_document(
6639 datasynth_core::models::TaxableDocumentType::VendorInvoice,
6640 &vi.header.document_id,
6641 buyer_country, buyer_country,
6643 vi.payable_amount,
6644 vi.header.document_date,
6645 None,
6646 );
6647 tax_lines.extend(lines);
6648 }
6649
6650 for ci in &document_flows.customer_invoices {
6652 let lines = tax_line_gen.generate_for_document(
6653 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
6654 &ci.header.document_id,
6655 buyer_country, buyer_country,
6657 ci.total_gross_amount,
6658 ci.header.document_date,
6659 None,
6660 );
6661 tax_lines.extend(lines);
6662 }
6663 }
6664
6665 let deferred_tax = {
6667 let companies: Vec<(&str, &str)> = self
6668 .config
6669 .companies
6670 .iter()
6671 .map(|c| (c.code.as_str(), c.country.as_str()))
6672 .collect();
6673 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 73);
6674 deferred_gen.generate(&companies, start_date, journal_entries)
6675 };
6676
6677 let snapshot = TaxSnapshot {
6678 jurisdiction_count: jurisdictions.len(),
6679 code_count: codes.len(),
6680 jurisdictions,
6681 codes,
6682 tax_provisions: provisions,
6683 tax_lines,
6684 tax_returns: Vec::new(),
6685 withholding_records: Vec::new(),
6686 tax_anomaly_labels: Vec::new(),
6687 deferred_tax,
6688 };
6689
6690 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
6691 stats.tax_code_count = snapshot.code_count;
6692 stats.tax_provision_count = snapshot.tax_provisions.len();
6693 stats.tax_line_count = snapshot.tax_lines.len();
6694
6695 info!(
6696 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs",
6697 snapshot.jurisdiction_count,
6698 snapshot.code_count,
6699 snapshot.tax_provisions.len(),
6700 snapshot.deferred_tax.temporary_differences.len(),
6701 snapshot.deferred_tax.journal_entries.len(),
6702 );
6703 self.check_resources_with_log("post-tax")?;
6704
6705 Ok(snapshot)
6706 }
6707
6708 fn phase_esg_generation(
6710 &mut self,
6711 document_flows: &DocumentFlowSnapshot,
6712 stats: &mut EnhancedGenerationStatistics,
6713 ) -> SynthResult<EsgSnapshot> {
6714 if !self.phase_config.generate_esg {
6715 debug!("Phase 21: Skipped (ESG generation disabled)");
6716 return Ok(EsgSnapshot::default());
6717 }
6718 let degradation = self.check_resources()?;
6719 if degradation >= DegradationLevel::Reduced {
6720 debug!(
6721 "Phase skipped due to resource pressure (degradation: {:?})",
6722 degradation
6723 );
6724 return Ok(EsgSnapshot::default());
6725 }
6726 info!("Phase 21: Generating ESG Data");
6727
6728 let seed = self.seed;
6729 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6730 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6731 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6732 let entity_id = self
6733 .config
6734 .companies
6735 .first()
6736 .map(|c| c.code.as_str())
6737 .unwrap_or("1000");
6738
6739 let esg_cfg = &self.config.esg;
6740 let mut snapshot = EsgSnapshot::default();
6741
6742 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
6744 esg_cfg.environmental.energy.clone(),
6745 seed + 80,
6746 );
6747 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
6748
6749 let facility_count = esg_cfg.environmental.energy.facility_count;
6751 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
6752 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
6753
6754 let mut waste_gen = datasynth_generators::WasteGenerator::new(
6756 seed + 82,
6757 esg_cfg.environmental.waste.diversion_target,
6758 facility_count,
6759 );
6760 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
6761
6762 let mut emission_gen =
6764 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
6765
6766 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
6768 .iter()
6769 .map(|e| datasynth_generators::EnergyInput {
6770 facility_id: e.facility_id.clone(),
6771 energy_type: match e.energy_source {
6772 EnergySourceType::NaturalGas => {
6773 datasynth_generators::EnergyInputType::NaturalGas
6774 }
6775 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
6776 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
6777 _ => datasynth_generators::EnergyInputType::Electricity,
6778 },
6779 consumption_kwh: e.consumption_kwh,
6780 period: e.period,
6781 })
6782 .collect();
6783
6784 let mut emissions = Vec::new();
6785 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
6786 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
6787
6788 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
6790 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6791 for payment in &document_flows.payments {
6792 if payment.is_vendor {
6793 *totals
6794 .entry(payment.business_partner_id.clone())
6795 .or_default() += payment.amount;
6796 }
6797 }
6798 totals
6799 };
6800 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
6801 .master_data
6802 .vendors
6803 .iter()
6804 .map(|v| {
6805 let spend = vendor_payment_totals
6806 .get(&v.vendor_id)
6807 .copied()
6808 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
6809 datasynth_generators::VendorSpendInput {
6810 vendor_id: v.vendor_id.clone(),
6811 category: format!("{:?}", v.vendor_type).to_lowercase(),
6812 spend,
6813 country: v.country.clone(),
6814 }
6815 })
6816 .collect();
6817 if !vendor_spend.is_empty() {
6818 emissions.extend(emission_gen.generate_scope3_purchased_goods(
6819 entity_id,
6820 &vendor_spend,
6821 start_date,
6822 end_date,
6823 ));
6824 }
6825
6826 let headcount = self.master_data.employees.len() as u32;
6828 if headcount > 0 {
6829 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
6830 emissions.extend(emission_gen.generate_scope3_business_travel(
6831 entity_id,
6832 travel_spend,
6833 start_date,
6834 ));
6835 emissions
6836 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
6837 }
6838
6839 snapshot.emission_count = emissions.len();
6840 snapshot.emissions = emissions;
6841 snapshot.energy = energy_records;
6842
6843 let mut workforce_gen =
6845 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
6846 let total_headcount = headcount.max(100);
6847 snapshot.diversity =
6848 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
6849 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
6850 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
6851 entity_id,
6852 facility_count,
6853 start_date,
6854 end_date,
6855 );
6856
6857 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
6860 entity_id,
6861 &snapshot.safety_incidents,
6862 total_hours,
6863 start_date,
6864 );
6865 snapshot.safety_metrics = vec![safety_metric];
6866
6867 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
6869 seed + 85,
6870 esg_cfg.governance.board_size,
6871 esg_cfg.governance.independence_target,
6872 );
6873 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
6874
6875 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
6877 esg_cfg.supply_chain_esg.clone(),
6878 seed + 86,
6879 );
6880 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
6881 .master_data
6882 .vendors
6883 .iter()
6884 .map(|v| datasynth_generators::VendorInput {
6885 vendor_id: v.vendor_id.clone(),
6886 country: v.country.clone(),
6887 industry: format!("{:?}", v.vendor_type).to_lowercase(),
6888 quality_score: None,
6889 })
6890 .collect();
6891 snapshot.supplier_assessments =
6892 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
6893
6894 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
6896 seed + 87,
6897 esg_cfg.reporting.clone(),
6898 esg_cfg.climate_scenarios.clone(),
6899 );
6900 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
6901 snapshot.disclosures = disclosure_gen.generate_disclosures(
6902 entity_id,
6903 &snapshot.materiality,
6904 start_date,
6905 end_date,
6906 );
6907 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
6908 snapshot.disclosure_count = snapshot.disclosures.len();
6909
6910 if esg_cfg.anomaly_rate > 0.0 {
6912 let mut anomaly_injector =
6913 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
6914 let mut labels = Vec::new();
6915 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
6916 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
6917 labels.extend(
6918 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
6919 );
6920 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
6921 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
6922 snapshot.anomaly_labels = labels;
6923 }
6924
6925 stats.esg_emission_count = snapshot.emission_count;
6926 stats.esg_disclosure_count = snapshot.disclosure_count;
6927
6928 info!(
6929 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
6930 snapshot.emission_count,
6931 snapshot.disclosure_count,
6932 snapshot.supplier_assessments.len()
6933 );
6934 self.check_resources_with_log("post-esg")?;
6935
6936 Ok(snapshot)
6937 }
6938
6939 fn phase_treasury_data(
6941 &mut self,
6942 document_flows: &DocumentFlowSnapshot,
6943 subledger: &SubledgerSnapshot,
6944 intercompany: &IntercompanySnapshot,
6945 stats: &mut EnhancedGenerationStatistics,
6946 ) -> SynthResult<TreasurySnapshot> {
6947 if !self.phase_config.generate_treasury {
6948 debug!("Phase 22: Skipped (treasury generation disabled)");
6949 return Ok(TreasurySnapshot::default());
6950 }
6951 let degradation = self.check_resources()?;
6952 if degradation >= DegradationLevel::Reduced {
6953 debug!(
6954 "Phase skipped due to resource pressure (degradation: {:?})",
6955 degradation
6956 );
6957 return Ok(TreasurySnapshot::default());
6958 }
6959 info!("Phase 22: Generating Treasury Data");
6960
6961 let seed = self.seed;
6962 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6963 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6964 let currency = self
6965 .config
6966 .companies
6967 .first()
6968 .map(|c| c.currency.as_str())
6969 .unwrap_or("USD");
6970 let entity_id = self
6971 .config
6972 .companies
6973 .first()
6974 .map(|c| c.code.as_str())
6975 .unwrap_or("1000");
6976
6977 let mut snapshot = TreasurySnapshot::default();
6978
6979 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
6981 self.config.treasury.debt.clone(),
6982 seed + 90,
6983 );
6984 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
6985
6986 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
6988 self.config.treasury.hedging.clone(),
6989 seed + 91,
6990 );
6991 for debt in &snapshot.debt_instruments {
6992 if debt.rate_type == InterestRateType::Variable {
6993 let swap = hedge_gen.generate_ir_swap(
6994 currency,
6995 debt.principal,
6996 debt.origination_date,
6997 debt.maturity_date,
6998 );
6999 snapshot.hedging_instruments.push(swap);
7000 }
7001 }
7002
7003 {
7006 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7007 for payment in &document_flows.payments {
7008 if payment.currency != currency {
7009 let entry = fx_map
7010 .entry(payment.currency.clone())
7011 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7012 entry.0 += payment.amount;
7013 if payment.header.document_date > entry.1 {
7015 entry.1 = payment.header.document_date;
7016 }
7017 }
7018 }
7019 if !fx_map.is_empty() {
7020 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7021 .into_iter()
7022 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7023 datasynth_generators::treasury::FxExposure {
7024 currency_pair: format!("{foreign_ccy}/{currency}"),
7025 foreign_currency: foreign_ccy,
7026 net_amount,
7027 settlement_date,
7028 description: "AP payment FX exposure".to_string(),
7029 }
7030 })
7031 .collect();
7032 let (fx_instruments, fx_relationships) =
7033 hedge_gen.generate(start_date, &fx_exposures);
7034 snapshot.hedging_instruments.extend(fx_instruments);
7035 snapshot.hedge_relationships.extend(fx_relationships);
7036 }
7037 }
7038
7039 if self.config.treasury.anomaly_rate > 0.0 {
7041 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7042 seed + 92,
7043 self.config.treasury.anomaly_rate,
7044 );
7045 let mut labels = Vec::new();
7046 labels.extend(
7047 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7048 );
7049 snapshot.treasury_anomaly_labels = labels;
7050 }
7051
7052 if self.config.treasury.cash_positioning.enabled {
7054 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7055
7056 for payment in &document_flows.payments {
7058 cash_flows.push(datasynth_generators::treasury::CashFlow {
7059 date: payment.header.document_date,
7060 account_id: format!("{entity_id}-MAIN"),
7061 amount: payment.amount,
7062 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7063 });
7064 }
7065
7066 for chain in &document_flows.o2c_chains {
7068 if let Some(ref receipt) = chain.customer_receipt {
7069 cash_flows.push(datasynth_generators::treasury::CashFlow {
7070 date: receipt.header.document_date,
7071 account_id: format!("{entity_id}-MAIN"),
7072 amount: receipt.amount,
7073 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7074 });
7075 }
7076 for receipt in &chain.remainder_receipts {
7078 cash_flows.push(datasynth_generators::treasury::CashFlow {
7079 date: receipt.header.document_date,
7080 account_id: format!("{entity_id}-MAIN"),
7081 amount: receipt.amount,
7082 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7083 });
7084 }
7085 }
7086
7087 if !cash_flows.is_empty() {
7088 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7089 self.config.treasury.cash_positioning.clone(),
7090 seed + 93,
7091 );
7092 let account_id = format!("{entity_id}-MAIN");
7093 snapshot.cash_positions = cash_gen.generate(
7094 entity_id,
7095 &account_id,
7096 currency,
7097 &cash_flows,
7098 start_date,
7099 start_date + chrono::Months::new(self.config.global.period_months),
7100 rust_decimal::Decimal::new(1_000_000, 0), );
7102 }
7103 }
7104
7105 if self.config.treasury.cash_forecasting.enabled {
7107 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7108
7109 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7111 .ar_invoices
7112 .iter()
7113 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7114 .map(|inv| {
7115 let days_past_due = if inv.due_date < end_date {
7116 (end_date - inv.due_date).num_days().max(0) as u32
7117 } else {
7118 0
7119 };
7120 datasynth_generators::treasury::ArAgingItem {
7121 expected_date: inv.due_date,
7122 amount: inv.amount_remaining,
7123 days_past_due,
7124 document_id: inv.invoice_number.clone(),
7125 }
7126 })
7127 .collect();
7128
7129 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7131 .ap_invoices
7132 .iter()
7133 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7134 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7135 payment_date: inv.due_date,
7136 amount: inv.amount_remaining,
7137 document_id: inv.invoice_number.clone(),
7138 })
7139 .collect();
7140
7141 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7142 self.config.treasury.cash_forecasting.clone(),
7143 seed + 94,
7144 );
7145 let forecast = forecast_gen.generate(
7146 entity_id,
7147 currency,
7148 end_date,
7149 &ar_items,
7150 &ap_items,
7151 &[], );
7153 snapshot.cash_forecasts.push(forecast);
7154 }
7155
7156 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7158 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7159 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7160 self.config.treasury.cash_pooling.clone(),
7161 seed + 95,
7162 );
7163
7164 let account_ids: Vec<String> = snapshot
7166 .cash_positions
7167 .iter()
7168 .map(|cp| cp.bank_account_id.clone())
7169 .collect::<std::collections::HashSet<_>>()
7170 .into_iter()
7171 .collect();
7172
7173 if let Some(pool) =
7174 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7175 {
7176 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7178 for cp in &snapshot.cash_positions {
7179 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7180 }
7181
7182 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7183 latest_balances
7184 .into_iter()
7185 .filter(|(id, _)| pool.participant_accounts.contains(id))
7186 .map(
7187 |(id, balance)| datasynth_generators::treasury::AccountBalance {
7188 account_id: id,
7189 balance,
7190 },
7191 )
7192 .collect();
7193
7194 let sweeps =
7195 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7196 snapshot.cash_pool_sweeps = sweeps;
7197 snapshot.cash_pools.push(pool);
7198 }
7199 }
7200
7201 if self.config.treasury.bank_guarantees.enabled {
7203 let vendor_names: Vec<String> = self
7204 .master_data
7205 .vendors
7206 .iter()
7207 .map(|v| v.name.clone())
7208 .collect();
7209 if !vendor_names.is_empty() {
7210 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7211 self.config.treasury.bank_guarantees.clone(),
7212 seed + 96,
7213 );
7214 snapshot.bank_guarantees =
7215 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7216 }
7217 }
7218
7219 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7221 let entity_ids: Vec<String> = self
7222 .config
7223 .companies
7224 .iter()
7225 .map(|c| c.code.clone())
7226 .collect();
7227 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7228 .matched_pairs
7229 .iter()
7230 .map(|mp| {
7231 (
7232 mp.seller_company.clone(),
7233 mp.buyer_company.clone(),
7234 mp.amount,
7235 )
7236 })
7237 .collect();
7238 if entity_ids.len() >= 2 {
7239 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7240 self.config.treasury.netting.clone(),
7241 seed + 97,
7242 );
7243 snapshot.netting_runs = netting_gen.generate(
7244 &entity_ids,
7245 currency,
7246 start_date,
7247 self.config.global.period_months,
7248 &ic_amounts,
7249 );
7250 }
7251 }
7252
7253 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7254 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7255 stats.cash_position_count = snapshot.cash_positions.len();
7256 stats.cash_forecast_count = snapshot.cash_forecasts.len();
7257 stats.cash_pool_count = snapshot.cash_pools.len();
7258
7259 info!(
7260 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
7261 snapshot.debt_instruments.len(),
7262 snapshot.hedging_instruments.len(),
7263 snapshot.cash_positions.len(),
7264 snapshot.cash_forecasts.len(),
7265 snapshot.cash_pools.len(),
7266 snapshot.bank_guarantees.len(),
7267 snapshot.netting_runs.len(),
7268 );
7269 self.check_resources_with_log("post-treasury")?;
7270
7271 Ok(snapshot)
7272 }
7273
7274 fn phase_project_accounting(
7276 &mut self,
7277 document_flows: &DocumentFlowSnapshot,
7278 hr: &HrSnapshot,
7279 stats: &mut EnhancedGenerationStatistics,
7280 ) -> SynthResult<ProjectAccountingSnapshot> {
7281 if !self.phase_config.generate_project_accounting {
7282 debug!("Phase 23: Skipped (project accounting disabled)");
7283 return Ok(ProjectAccountingSnapshot::default());
7284 }
7285 let degradation = self.check_resources()?;
7286 if degradation >= DegradationLevel::Reduced {
7287 debug!(
7288 "Phase skipped due to resource pressure (degradation: {:?})",
7289 degradation
7290 );
7291 return Ok(ProjectAccountingSnapshot::default());
7292 }
7293 info!("Phase 23: Generating Project Accounting Data");
7294
7295 let seed = self.seed;
7296 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7297 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7298 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7299 let company_code = self
7300 .config
7301 .companies
7302 .first()
7303 .map(|c| c.code.as_str())
7304 .unwrap_or("1000");
7305
7306 let mut snapshot = ProjectAccountingSnapshot::default();
7307
7308 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
7310 self.config.project_accounting.clone(),
7311 seed + 95,
7312 );
7313 let pool = project_gen.generate(company_code, start_date, end_date);
7314 snapshot.projects = pool.projects.clone();
7315
7316 {
7318 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
7319 Vec::new();
7320
7321 for te in &hr.time_entries {
7323 let total_hours = te.hours_regular + te.hours_overtime;
7324 if total_hours > 0.0 {
7325 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7326 id: te.entry_id.clone(),
7327 entity_id: company_code.to_string(),
7328 date: te.date,
7329 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
7330 .unwrap_or(rust_decimal::Decimal::ZERO),
7331 source_type: CostSourceType::TimeEntry,
7332 hours: Some(
7333 rust_decimal::Decimal::from_f64_retain(total_hours)
7334 .unwrap_or(rust_decimal::Decimal::ZERO),
7335 ),
7336 });
7337 }
7338 }
7339
7340 for er in &hr.expense_reports {
7342 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7343 id: er.report_id.clone(),
7344 entity_id: company_code.to_string(),
7345 date: er.submission_date,
7346 amount: er.total_amount,
7347 source_type: CostSourceType::ExpenseReport,
7348 hours: None,
7349 });
7350 }
7351
7352 for po in &document_flows.purchase_orders {
7354 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7355 id: po.header.document_id.clone(),
7356 entity_id: company_code.to_string(),
7357 date: po.header.document_date,
7358 amount: po.total_net_amount,
7359 source_type: CostSourceType::PurchaseOrder,
7360 hours: None,
7361 });
7362 }
7363
7364 for vi in &document_flows.vendor_invoices {
7366 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7367 id: vi.header.document_id.clone(),
7368 entity_id: company_code.to_string(),
7369 date: vi.header.document_date,
7370 amount: vi.payable_amount,
7371 source_type: CostSourceType::VendorInvoice,
7372 hours: None,
7373 });
7374 }
7375
7376 if !source_docs.is_empty() && !pool.projects.is_empty() {
7377 let mut cost_gen =
7378 datasynth_generators::project_accounting::ProjectCostGenerator::new(
7379 self.config.project_accounting.cost_allocation.clone(),
7380 seed + 99,
7381 );
7382 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
7383 }
7384 }
7385
7386 if self.config.project_accounting.change_orders.enabled {
7388 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
7389 self.config.project_accounting.change_orders.clone(),
7390 seed + 96,
7391 );
7392 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
7393 }
7394
7395 if self.config.project_accounting.milestones.enabled {
7397 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
7398 self.config.project_accounting.milestones.clone(),
7399 seed + 97,
7400 );
7401 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
7402 }
7403
7404 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
7406 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
7407 self.config.project_accounting.earned_value.clone(),
7408 seed + 98,
7409 );
7410 snapshot.earned_value_metrics =
7411 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
7412 }
7413
7414 if self.config.project_accounting.revenue_recognition.enabled
7416 && !snapshot.projects.is_empty()
7417 && !snapshot.cost_lines.is_empty()
7418 {
7419 use datasynth_generators::project_accounting::RevenueGenerator;
7420 let rev_config = self.config.project_accounting.revenue_recognition.clone();
7421 let avg_contract_value =
7422 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
7423 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
7424
7425 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
7428 snapshot
7429 .projects
7430 .iter()
7431 .filter(|p| {
7432 matches!(
7433 p.project_type,
7434 datasynth_core::models::ProjectType::Customer
7435 )
7436 })
7437 .map(|p| {
7438 let cv = if p.budget > rust_decimal::Decimal::ZERO {
7439 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
7440 } else {
7442 avg_contract_value
7443 };
7444 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
7446 })
7447 .collect();
7448
7449 if !contract_values.is_empty() {
7450 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
7451 snapshot.revenue_records = rev_gen.generate(
7452 &snapshot.projects,
7453 &snapshot.cost_lines,
7454 &contract_values,
7455 start_date,
7456 end_date,
7457 );
7458 debug!(
7459 "Generated {} revenue recognition records for {} customer projects",
7460 snapshot.revenue_records.len(),
7461 contract_values.len()
7462 );
7463 }
7464 }
7465
7466 stats.project_count = snapshot.projects.len();
7467 stats.project_change_order_count = snapshot.change_orders.len();
7468 stats.project_cost_line_count = snapshot.cost_lines.len();
7469
7470 info!(
7471 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
7472 snapshot.projects.len(),
7473 snapshot.change_orders.len(),
7474 snapshot.milestones.len(),
7475 snapshot.earned_value_metrics.len()
7476 );
7477 self.check_resources_with_log("post-project-accounting")?;
7478
7479 Ok(snapshot)
7480 }
7481
7482 fn phase_evolution_events(
7484 &mut self,
7485 stats: &mut EnhancedGenerationStatistics,
7486 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
7487 if !self.phase_config.generate_evolution_events {
7488 debug!("Phase 24: Skipped (evolution events disabled)");
7489 return Ok((Vec::new(), Vec::new()));
7490 }
7491 info!("Phase 24: Generating Process Evolution + Organizational Events");
7492
7493 let seed = self.seed;
7494 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7495 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7496 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7497
7498 let mut proc_gen =
7500 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
7501 seed + 100,
7502 );
7503 let process_events = proc_gen.generate_events(start_date, end_date);
7504
7505 let company_codes: Vec<String> = self
7507 .config
7508 .companies
7509 .iter()
7510 .map(|c| c.code.clone())
7511 .collect();
7512 let mut org_gen =
7513 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
7514 seed + 101,
7515 );
7516 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
7517
7518 stats.process_evolution_event_count = process_events.len();
7519 stats.organizational_event_count = org_events.len();
7520
7521 info!(
7522 "Evolution events generated: {} process evolution, {} organizational",
7523 process_events.len(),
7524 org_events.len()
7525 );
7526 self.check_resources_with_log("post-evolution-events")?;
7527
7528 Ok((process_events, org_events))
7529 }
7530
7531 fn phase_disruption_events(
7534 &self,
7535 stats: &mut EnhancedGenerationStatistics,
7536 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
7537 if !self.config.organizational_events.enabled {
7538 debug!("Phase 24b: Skipped (organizational events disabled)");
7539 return Ok(Vec::new());
7540 }
7541 info!("Phase 24b: Generating Disruption Events");
7542
7543 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7544 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7545 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7546
7547 let company_codes: Vec<String> = self
7548 .config
7549 .companies
7550 .iter()
7551 .map(|c| c.code.clone())
7552 .collect();
7553
7554 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
7555 let events = gen.generate(start_date, end_date, &company_codes);
7556
7557 stats.disruption_event_count = events.len();
7558 info!("Disruption events generated: {} events", events.len());
7559 self.check_resources_with_log("post-disruption-events")?;
7560
7561 Ok(events)
7562 }
7563
7564 fn phase_counterfactuals(
7571 &self,
7572 journal_entries: &[JournalEntry],
7573 stats: &mut EnhancedGenerationStatistics,
7574 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
7575 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
7576 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
7577 return Ok(Vec::new());
7578 }
7579 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
7580
7581 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
7582
7583 let mut gen = CounterfactualGenerator::new(self.seed + 110);
7584
7585 let specs = [
7587 CounterfactualSpec::ScaleAmount { factor: 2.5 },
7588 CounterfactualSpec::ShiftDate { days: -14 },
7589 CounterfactualSpec::SelfApprove,
7590 CounterfactualSpec::SplitTransaction { split_count: 3 },
7591 ];
7592
7593 let pairs: Vec<_> = journal_entries
7594 .iter()
7595 .enumerate()
7596 .map(|(i, je)| {
7597 let spec = &specs[i % specs.len()];
7598 gen.generate(je, spec)
7599 })
7600 .collect();
7601
7602 stats.counterfactual_pair_count = pairs.len();
7603 info!(
7604 "Counterfactual pairs generated: {} pairs from {} journal entries",
7605 pairs.len(),
7606 journal_entries.len()
7607 );
7608 self.check_resources_with_log("post-counterfactuals")?;
7609
7610 Ok(pairs)
7611 }
7612
7613 fn phase_red_flags(
7620 &self,
7621 anomaly_labels: &AnomalyLabels,
7622 document_flows: &DocumentFlowSnapshot,
7623 stats: &mut EnhancedGenerationStatistics,
7624 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
7625 if !self.config.fraud.enabled {
7626 debug!("Phase 26: Skipped (fraud generation disabled)");
7627 return Ok(Vec::new());
7628 }
7629 info!("Phase 26: Generating Fraud Red-Flag Indicators");
7630
7631 use datasynth_generators::fraud::RedFlagGenerator;
7632
7633 let generator = RedFlagGenerator::new();
7634 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
7635
7636 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
7638 .labels
7639 .iter()
7640 .filter(|label| label.anomaly_type.is_intentional())
7641 .map(|label| label.document_id.as_str())
7642 .collect();
7643
7644 let mut flags = Vec::new();
7645
7646 for chain in &document_flows.p2p_chains {
7648 let doc_id = &chain.purchase_order.header.document_id;
7649 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7650 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7651 }
7652
7653 for chain in &document_flows.o2c_chains {
7655 let doc_id = &chain.sales_order.header.document_id;
7656 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7657 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7658 }
7659
7660 stats.red_flag_count = flags.len();
7661 info!(
7662 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
7663 flags.len(),
7664 document_flows.p2p_chains.len(),
7665 document_flows.o2c_chains.len(),
7666 fraud_doc_ids.len()
7667 );
7668 self.check_resources_with_log("post-red-flags")?;
7669
7670 Ok(flags)
7671 }
7672
7673 fn phase_collusion_rings(
7679 &mut self,
7680 stats: &mut EnhancedGenerationStatistics,
7681 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
7682 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
7683 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
7684 return Ok(Vec::new());
7685 }
7686 info!("Phase 26b: Generating Collusion Rings");
7687
7688 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7689 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7690 let months = self.config.global.period_months;
7691
7692 let employee_ids: Vec<String> = self
7693 .master_data
7694 .employees
7695 .iter()
7696 .map(|e| e.employee_id.clone())
7697 .collect();
7698 let vendor_ids: Vec<String> = self
7699 .master_data
7700 .vendors
7701 .iter()
7702 .map(|v| v.vendor_id.clone())
7703 .collect();
7704
7705 let mut generator =
7706 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
7707 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
7708
7709 stats.collusion_ring_count = rings.len();
7710 info!(
7711 "Collusion rings generated: {} rings, total members: {}",
7712 rings.len(),
7713 rings
7714 .iter()
7715 .map(datasynth_generators::fraud::CollusionRing::size)
7716 .sum::<usize>()
7717 );
7718 self.check_resources_with_log("post-collusion-rings")?;
7719
7720 Ok(rings)
7721 }
7722
7723 fn phase_temporal_attributes(
7728 &mut self,
7729 stats: &mut EnhancedGenerationStatistics,
7730 ) -> SynthResult<
7731 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
7732 > {
7733 if !self.config.temporal_attributes.enabled {
7734 debug!("Phase 27: Skipped (temporal attributes disabled)");
7735 return Ok(Vec::new());
7736 }
7737 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
7738
7739 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7740 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7741
7742 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
7746 || self.config.temporal_attributes.enabled;
7747 let temporal_config = {
7748 let ta = &self.config.temporal_attributes;
7749 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
7750 .enabled(ta.enabled)
7751 .closed_probability(ta.valid_time.closed_probability)
7752 .avg_validity_days(ta.valid_time.avg_validity_days)
7753 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
7754 .with_version_chains(if generate_version_chains {
7755 ta.avg_versions_per_entity
7756 } else {
7757 1.0
7758 })
7759 .build()
7760 };
7761 let temporal_config = if self
7763 .config
7764 .temporal_attributes
7765 .transaction_time
7766 .allow_backdating
7767 {
7768 let mut c = temporal_config;
7769 c.transaction_time.allow_backdating = true;
7770 c.transaction_time.backdating_probability = self
7771 .config
7772 .temporal_attributes
7773 .transaction_time
7774 .backdating_probability;
7775 c.transaction_time.max_backdate_days = self
7776 .config
7777 .temporal_attributes
7778 .transaction_time
7779 .max_backdate_days;
7780 c
7781 } else {
7782 temporal_config
7783 };
7784 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
7785 temporal_config,
7786 self.seed + 130,
7787 start_date,
7788 );
7789
7790 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
7791 self.seed + 130,
7792 datasynth_core::GeneratorType::Vendor,
7793 );
7794
7795 let chains: Vec<_> = self
7796 .master_data
7797 .vendors
7798 .iter()
7799 .map(|vendor| {
7800 let id = uuid_factory.next();
7801 gen.generate_version_chain(vendor.clone(), id)
7802 })
7803 .collect();
7804
7805 stats.temporal_version_chain_count = chains.len();
7806 info!("Temporal version chains generated: {} chains", chains.len());
7807 self.check_resources_with_log("post-temporal-attributes")?;
7808
7809 Ok(chains)
7810 }
7811
7812 fn phase_entity_relationships(
7822 &self,
7823 journal_entries: &[JournalEntry],
7824 document_flows: &DocumentFlowSnapshot,
7825 stats: &mut EnhancedGenerationStatistics,
7826 ) -> SynthResult<(
7827 Option<datasynth_core::models::EntityGraph>,
7828 Vec<datasynth_core::models::CrossProcessLink>,
7829 )> {
7830 use datasynth_generators::relationships::{
7831 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
7832 TransactionSummary,
7833 };
7834
7835 let rs_enabled = self.config.relationship_strength.enabled;
7836 let cpl_enabled = self.config.cross_process_links.enabled
7837 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
7838
7839 if !rs_enabled && !cpl_enabled {
7840 debug!(
7841 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
7842 );
7843 return Ok((None, Vec::new()));
7844 }
7845
7846 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
7847
7848 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7849 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7850
7851 let company_code = self
7852 .config
7853 .companies
7854 .first()
7855 .map(|c| c.code.as_str())
7856 .unwrap_or("1000");
7857
7858 let gen_config = EntityGraphConfig {
7860 enabled: rs_enabled,
7861 cross_process: datasynth_generators::relationships::CrossProcessConfig {
7862 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
7863 enable_return_flows: false,
7864 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
7865 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
7866 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
7868 1.0
7869 } else {
7870 0.30
7871 },
7872 ..Default::default()
7873 },
7874 strength_config: datasynth_generators::relationships::StrengthConfig {
7875 transaction_volume_weight: self
7876 .config
7877 .relationship_strength
7878 .calculation
7879 .transaction_volume_weight,
7880 transaction_count_weight: self
7881 .config
7882 .relationship_strength
7883 .calculation
7884 .transaction_count_weight,
7885 duration_weight: self
7886 .config
7887 .relationship_strength
7888 .calculation
7889 .relationship_duration_weight,
7890 recency_weight: self.config.relationship_strength.calculation.recency_weight,
7891 mutual_connections_weight: self
7892 .config
7893 .relationship_strength
7894 .calculation
7895 .mutual_connections_weight,
7896 recency_half_life_days: self
7897 .config
7898 .relationship_strength
7899 .calculation
7900 .recency_half_life_days,
7901 },
7902 ..Default::default()
7903 };
7904
7905 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
7906
7907 let entity_graph = if rs_enabled {
7909 let vendor_summaries: Vec<EntitySummary> = self
7911 .master_data
7912 .vendors
7913 .iter()
7914 .map(|v| {
7915 EntitySummary::new(
7916 &v.vendor_id,
7917 &v.name,
7918 datasynth_core::models::GraphEntityType::Vendor,
7919 start_date,
7920 )
7921 })
7922 .collect();
7923
7924 let customer_summaries: Vec<EntitySummary> = self
7925 .master_data
7926 .customers
7927 .iter()
7928 .map(|c| {
7929 EntitySummary::new(
7930 &c.customer_id,
7931 &c.name,
7932 datasynth_core::models::GraphEntityType::Customer,
7933 start_date,
7934 )
7935 })
7936 .collect();
7937
7938 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
7943 std::collections::HashMap::new();
7944
7945 for je in journal_entries {
7946 let cc = je.header.company_code.clone();
7947 let posting_date = je.header.posting_date;
7948 for line in &je.lines {
7949 if let Some(ref tp) = line.trading_partner {
7950 let amount = if line.debit_amount > line.credit_amount {
7951 line.debit_amount
7952 } else {
7953 line.credit_amount
7954 };
7955 let entry = txn_summaries
7956 .entry((cc.clone(), tp.clone()))
7957 .or_insert_with(|| TransactionSummary {
7958 total_volume: rust_decimal::Decimal::ZERO,
7959 transaction_count: 0,
7960 first_transaction_date: posting_date,
7961 last_transaction_date: posting_date,
7962 related_entities: std::collections::HashSet::new(),
7963 });
7964 entry.total_volume += amount;
7965 entry.transaction_count += 1;
7966 if posting_date < entry.first_transaction_date {
7967 entry.first_transaction_date = posting_date;
7968 }
7969 if posting_date > entry.last_transaction_date {
7970 entry.last_transaction_date = posting_date;
7971 }
7972 entry.related_entities.insert(cc.clone());
7973 }
7974 }
7975 }
7976
7977 for chain in &document_flows.p2p_chains {
7980 let cc = chain.purchase_order.header.company_code.clone();
7981 let vendor_id = chain.purchase_order.vendor_id.clone();
7982 let po_date = chain.purchase_order.header.document_date;
7983 let amount = chain.purchase_order.total_net_amount;
7984
7985 let entry = txn_summaries
7986 .entry((cc.clone(), vendor_id))
7987 .or_insert_with(|| TransactionSummary {
7988 total_volume: rust_decimal::Decimal::ZERO,
7989 transaction_count: 0,
7990 first_transaction_date: po_date,
7991 last_transaction_date: po_date,
7992 related_entities: std::collections::HashSet::new(),
7993 });
7994 entry.total_volume += amount;
7995 entry.transaction_count += 1;
7996 if po_date < entry.first_transaction_date {
7997 entry.first_transaction_date = po_date;
7998 }
7999 if po_date > entry.last_transaction_date {
8000 entry.last_transaction_date = po_date;
8001 }
8002 entry.related_entities.insert(cc);
8003 }
8004
8005 for chain in &document_flows.o2c_chains {
8007 let cc = chain.sales_order.header.company_code.clone();
8008 let customer_id = chain.sales_order.customer_id.clone();
8009 let so_date = chain.sales_order.header.document_date;
8010 let amount = chain.sales_order.total_net_amount;
8011
8012 let entry = txn_summaries
8013 .entry((cc.clone(), customer_id))
8014 .or_insert_with(|| TransactionSummary {
8015 total_volume: rust_decimal::Decimal::ZERO,
8016 transaction_count: 0,
8017 first_transaction_date: so_date,
8018 last_transaction_date: so_date,
8019 related_entities: std::collections::HashSet::new(),
8020 });
8021 entry.total_volume += amount;
8022 entry.transaction_count += 1;
8023 if so_date < entry.first_transaction_date {
8024 entry.first_transaction_date = so_date;
8025 }
8026 if so_date > entry.last_transaction_date {
8027 entry.last_transaction_date = so_date;
8028 }
8029 entry.related_entities.insert(cc);
8030 }
8031
8032 let as_of_date = journal_entries
8033 .last()
8034 .map(|je| je.header.posting_date)
8035 .unwrap_or(start_date);
8036
8037 let graph = gen.generate_entity_graph(
8038 company_code,
8039 as_of_date,
8040 &vendor_summaries,
8041 &customer_summaries,
8042 &txn_summaries,
8043 );
8044
8045 info!(
8046 "Entity relationship graph: {} nodes, {} edges",
8047 graph.nodes.len(),
8048 graph.edges.len()
8049 );
8050 stats.entity_relationship_node_count = graph.nodes.len();
8051 stats.entity_relationship_edge_count = graph.edges.len();
8052 Some(graph)
8053 } else {
8054 None
8055 };
8056
8057 let cross_process_links = if cpl_enabled {
8059 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8061 .p2p_chains
8062 .iter()
8063 .flat_map(|chain| {
8064 let vendor_id = chain.purchase_order.vendor_id.clone();
8065 let cc = chain.purchase_order.header.company_code.clone();
8066 chain.goods_receipts.iter().flat_map(move |gr| {
8067 gr.items.iter().filter_map({
8068 let doc_id = gr.header.document_id.clone();
8069 let v_id = vendor_id.clone();
8070 let company = cc.clone();
8071 let receipt_date = gr.header.document_date;
8072 move |item| {
8073 item.base
8074 .material_id
8075 .as_ref()
8076 .map(|mat_id| GoodsReceiptRef {
8077 document_id: doc_id.clone(),
8078 material_id: mat_id.clone(),
8079 quantity: item.base.quantity,
8080 receipt_date,
8081 vendor_id: v_id.clone(),
8082 company_code: company.clone(),
8083 })
8084 }
8085 })
8086 })
8087 })
8088 .collect();
8089
8090 let del_refs: Vec<DeliveryRef> = document_flows
8092 .o2c_chains
8093 .iter()
8094 .flat_map(|chain| {
8095 let customer_id = chain.sales_order.customer_id.clone();
8096 let cc = chain.sales_order.header.company_code.clone();
8097 chain.deliveries.iter().flat_map(move |del| {
8098 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8099 del.items.iter().filter_map({
8100 let doc_id = del.header.document_id.clone();
8101 let c_id = customer_id.clone();
8102 let company = cc.clone();
8103 move |item| {
8104 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8105 document_id: doc_id.clone(),
8106 material_id: mat_id.clone(),
8107 quantity: item.base.quantity,
8108 delivery_date,
8109 customer_id: c_id.clone(),
8110 company_code: company.clone(),
8111 })
8112 }
8113 })
8114 })
8115 })
8116 .collect();
8117
8118 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8119 info!("Cross-process links generated: {} links", links.len());
8120 stats.cross_process_link_count = links.len();
8121 links
8122 } else {
8123 Vec::new()
8124 };
8125
8126 self.check_resources_with_log("post-entity-relationships")?;
8127 Ok((entity_graph, cross_process_links))
8128 }
8129
8130 fn phase_industry_data(
8132 &self,
8133 stats: &mut EnhancedGenerationStatistics,
8134 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8135 if !self.config.industry_specific.enabled {
8136 return None;
8137 }
8138 info!("Phase 29: Generating industry-specific data");
8139 let output = datasynth_generators::industry::factory::generate_industry_output(
8140 self.config.global.industry,
8141 );
8142 stats.industry_gl_account_count = output.gl_accounts.len();
8143 info!(
8144 "Industry data generated: {} GL accounts for {:?}",
8145 output.gl_accounts.len(),
8146 self.config.global.industry
8147 );
8148 Some(output)
8149 }
8150
8151 fn phase_opening_balances(
8153 &mut self,
8154 coa: &Arc<ChartOfAccounts>,
8155 stats: &mut EnhancedGenerationStatistics,
8156 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8157 if !self.config.balance.generate_opening_balances {
8158 debug!("Phase 3b: Skipped (opening balance generation disabled)");
8159 return Ok(Vec::new());
8160 }
8161 info!("Phase 3b: Generating Opening Balances");
8162
8163 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8164 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8165 let fiscal_year = start_date.year();
8166
8167 let industry = match self.config.global.industry {
8168 IndustrySector::Manufacturing => IndustryType::Manufacturing,
8169 IndustrySector::Retail => IndustryType::Retail,
8170 IndustrySector::FinancialServices => IndustryType::Financial,
8171 IndustrySector::Healthcare => IndustryType::Healthcare,
8172 IndustrySector::Technology => IndustryType::Technology,
8173 _ => IndustryType::Manufacturing,
8174 };
8175
8176 let config = datasynth_generators::OpeningBalanceConfig {
8177 industry,
8178 ..Default::default()
8179 };
8180 let mut gen =
8181 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8182
8183 let mut results = Vec::new();
8184 for company in &self.config.companies {
8185 let spec = OpeningBalanceSpec::new(
8186 company.code.clone(),
8187 start_date,
8188 fiscal_year,
8189 company.currency.clone(),
8190 rust_decimal::Decimal::new(10_000_000, 0),
8191 industry,
8192 );
8193 let ob = gen.generate(&spec, coa, start_date, &company.code);
8194 results.push(ob);
8195 }
8196
8197 stats.opening_balance_count = results.len();
8198 info!("Opening balances generated: {} companies", results.len());
8199 self.check_resources_with_log("post-opening-balances")?;
8200
8201 Ok(results)
8202 }
8203
8204 fn phase_subledger_reconciliation(
8206 &mut self,
8207 subledger: &SubledgerSnapshot,
8208 entries: &[JournalEntry],
8209 stats: &mut EnhancedGenerationStatistics,
8210 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8211 if !self.config.balance.reconcile_subledgers {
8212 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8213 return Ok(Vec::new());
8214 }
8215 info!("Phase 9b: Reconciling GL to subledger balances");
8216
8217 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8218 .map(|d| d + chrono::Months::new(self.config.global.period_months))
8219 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8220
8221 let tracker_config = BalanceTrackerConfig {
8223 validate_on_each_entry: false,
8224 track_history: false,
8225 fail_on_validation_error: false,
8226 ..Default::default()
8227 };
8228 let recon_currency = self
8229 .config
8230 .companies
8231 .first()
8232 .map(|c| c.currency.clone())
8233 .unwrap_or_else(|| "USD".to_string());
8234 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8235 let validation_errors = tracker.apply_entries(entries);
8236 if !validation_errors.is_empty() {
8237 warn!(
8238 error_count = validation_errors.len(),
8239 "Balance tracker encountered validation errors during subledger reconciliation"
8240 );
8241 for err in &validation_errors {
8242 debug!("Balance validation error: {:?}", err);
8243 }
8244 }
8245
8246 let mut engine = datasynth_generators::ReconciliationEngine::new(
8247 datasynth_generators::ReconciliationConfig::default(),
8248 );
8249
8250 let mut results = Vec::new();
8251 let company_code = self
8252 .config
8253 .companies
8254 .first()
8255 .map(|c| c.code.as_str())
8256 .unwrap_or("1000");
8257
8258 if !subledger.ar_invoices.is_empty() {
8260 let gl_balance = tracker
8261 .get_account_balance(
8262 company_code,
8263 datasynth_core::accounts::control_accounts::AR_CONTROL,
8264 )
8265 .map(|b| b.closing_balance)
8266 .unwrap_or_default();
8267 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8268 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8269 }
8270
8271 if !subledger.ap_invoices.is_empty() {
8273 let gl_balance = tracker
8274 .get_account_balance(
8275 company_code,
8276 datasynth_core::accounts::control_accounts::AP_CONTROL,
8277 )
8278 .map(|b| b.closing_balance)
8279 .unwrap_or_default();
8280 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8281 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8282 }
8283
8284 if !subledger.fa_records.is_empty() {
8286 let gl_asset_balance = tracker
8287 .get_account_balance(
8288 company_code,
8289 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8290 )
8291 .map(|b| b.closing_balance)
8292 .unwrap_or_default();
8293 let gl_accum_depr_balance = tracker
8294 .get_account_balance(
8295 company_code,
8296 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8297 )
8298 .map(|b| b.closing_balance)
8299 .unwrap_or_default();
8300 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8301 subledger.fa_records.iter().collect();
8302 let (asset_recon, depr_recon) = engine.reconcile_fa(
8303 company_code,
8304 end_date,
8305 gl_asset_balance,
8306 gl_accum_depr_balance,
8307 &fa_refs,
8308 );
8309 results.push(asset_recon);
8310 results.push(depr_recon);
8311 }
8312
8313 if !subledger.inventory_positions.is_empty() {
8315 let gl_balance = tracker
8316 .get_account_balance(
8317 company_code,
8318 datasynth_core::accounts::control_accounts::INVENTORY,
8319 )
8320 .map(|b| b.closing_balance)
8321 .unwrap_or_default();
8322 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
8323 subledger.inventory_positions.iter().collect();
8324 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
8325 }
8326
8327 stats.subledger_reconciliation_count = results.len();
8328 let passed = results.iter().filter(|r| r.is_balanced()).count();
8329 let failed = results.len() - passed;
8330 info!(
8331 "Subledger reconciliation: {} checks, {} passed, {} failed",
8332 results.len(),
8333 passed,
8334 failed
8335 );
8336 self.check_resources_with_log("post-subledger-reconciliation")?;
8337
8338 Ok(results)
8339 }
8340
8341 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
8343 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
8344
8345 let coa_framework = self.resolve_coa_framework();
8346
8347 let mut gen = ChartOfAccountsGenerator::new(
8348 self.config.chart_of_accounts.complexity,
8349 self.config.global.industry,
8350 self.seed,
8351 )
8352 .with_coa_framework(coa_framework);
8353
8354 let coa = Arc::new(gen.generate());
8355 self.coa = Some(Arc::clone(&coa));
8356
8357 if let Some(pb) = pb {
8358 pb.finish_with_message("Chart of Accounts complete");
8359 }
8360
8361 Ok(coa)
8362 }
8363
8364 fn generate_master_data(&mut self) -> SynthResult<()> {
8366 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8367 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8368 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8369
8370 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
8372
8373 let pack = self.primary_pack().clone();
8375
8376 let vendors_per_company = self.phase_config.vendors_per_company;
8378 let customers_per_company = self.phase_config.customers_per_company;
8379 let materials_per_company = self.phase_config.materials_per_company;
8380 let assets_per_company = self.phase_config.assets_per_company;
8381 let coa_framework = self.resolve_coa_framework();
8382
8383 let per_company_results: Vec<_> = self
8386 .config
8387 .companies
8388 .par_iter()
8389 .enumerate()
8390 .map(|(i, company)| {
8391 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
8392 let pack = pack.clone();
8393
8394 let mut vendor_gen = VendorGenerator::new(company_seed);
8396 vendor_gen.set_country_pack(pack.clone());
8397 vendor_gen.set_coa_framework(coa_framework);
8398 vendor_gen.set_counter_offset(i * vendors_per_company);
8399 if self.config.vendor_network.enabled {
8401 let vn = &self.config.vendor_network;
8402 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
8403 enabled: true,
8404 depth: vn.depth,
8405 tier1_count: datasynth_generators::TierCountConfig::new(
8406 vn.tier1.min,
8407 vn.tier1.max,
8408 ),
8409 tier2_per_parent: datasynth_generators::TierCountConfig::new(
8410 vn.tier2_per_parent.min,
8411 vn.tier2_per_parent.max,
8412 ),
8413 tier3_per_parent: datasynth_generators::TierCountConfig::new(
8414 vn.tier3_per_parent.min,
8415 vn.tier3_per_parent.max,
8416 ),
8417 cluster_distribution: datasynth_generators::ClusterDistribution {
8418 reliable_strategic: vn.clusters.reliable_strategic,
8419 standard_operational: vn.clusters.standard_operational,
8420 transactional: vn.clusters.transactional,
8421 problematic: vn.clusters.problematic,
8422 },
8423 concentration_limits: datasynth_generators::ConcentrationLimits {
8424 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
8425 max_top5: vn.dependencies.top_5_concentration,
8426 },
8427 ..datasynth_generators::VendorNetworkConfig::default()
8428 });
8429 }
8430 let vendor_pool =
8431 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
8432
8433 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
8435 customer_gen.set_country_pack(pack.clone());
8436 customer_gen.set_coa_framework(coa_framework);
8437 customer_gen.set_counter_offset(i * customers_per_company);
8438 if self.config.customer_segmentation.enabled {
8440 let cs = &self.config.customer_segmentation;
8441 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
8442 enabled: true,
8443 segment_distribution: datasynth_generators::SegmentDistribution {
8444 enterprise: cs.value_segments.enterprise.customer_share,
8445 mid_market: cs.value_segments.mid_market.customer_share,
8446 smb: cs.value_segments.smb.customer_share,
8447 consumer: cs.value_segments.consumer.customer_share,
8448 },
8449 referral_config: datasynth_generators::ReferralConfig {
8450 enabled: cs.networks.referrals.enabled,
8451 referral_rate: cs.networks.referrals.referral_rate,
8452 ..Default::default()
8453 },
8454 hierarchy_config: datasynth_generators::HierarchyConfig {
8455 enabled: cs.networks.corporate_hierarchies.enabled,
8456 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
8457 ..Default::default()
8458 },
8459 ..Default::default()
8460 };
8461 customer_gen.set_segmentation_config(seg_cfg);
8462 }
8463 let customer_pool = customer_gen.generate_customer_pool(
8464 customers_per_company,
8465 &company.code,
8466 start_date,
8467 );
8468
8469 let mut material_gen = MaterialGenerator::new(company_seed + 200);
8471 material_gen.set_country_pack(pack.clone());
8472 material_gen.set_counter_offset(i * materials_per_company);
8473 let material_pool = material_gen.generate_material_pool(
8474 materials_per_company,
8475 &company.code,
8476 start_date,
8477 );
8478
8479 let mut asset_gen = AssetGenerator::new(company_seed + 300);
8481 let asset_pool = asset_gen.generate_asset_pool(
8482 assets_per_company,
8483 &company.code,
8484 (start_date, end_date),
8485 );
8486
8487 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
8489 employee_gen.set_country_pack(pack);
8490 let employee_pool =
8491 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
8492
8493 let employee_change_history =
8495 employee_gen.generate_all_change_history(&employee_pool, end_date);
8496
8497 let employee_ids: Vec<String> = employee_pool
8499 .employees
8500 .iter()
8501 .map(|e| e.employee_id.clone())
8502 .collect();
8503 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
8504 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
8505
8506 (
8507 vendor_pool.vendors,
8508 customer_pool.customers,
8509 material_pool.materials,
8510 asset_pool.assets,
8511 employee_pool.employees,
8512 employee_change_history,
8513 cost_centers,
8514 )
8515 })
8516 .collect();
8517
8518 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
8520 per_company_results
8521 {
8522 self.master_data.vendors.extend(vendors);
8523 self.master_data.customers.extend(customers);
8524 self.master_data.materials.extend(materials);
8525 self.master_data.assets.extend(assets);
8526 self.master_data.employees.extend(employees);
8527 self.master_data.cost_centers.extend(cost_centers);
8528 self.master_data
8529 .employee_change_history
8530 .extend(change_history);
8531 }
8532
8533 if let Some(pb) = &pb {
8534 pb.inc(total);
8535 }
8536 if let Some(pb) = pb {
8537 pb.finish_with_message("Master data generation complete");
8538 }
8539
8540 Ok(())
8541 }
8542
8543 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
8545 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8546 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8547
8548 let months = (self.config.global.period_months as usize).max(1);
8551 let p2p_count = self
8552 .phase_config
8553 .p2p_chains
8554 .min(self.master_data.vendors.len() * 2 * months);
8555 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
8556
8557 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
8559 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
8560 p2p_gen.set_country_pack(self.primary_pack().clone());
8561
8562 for i in 0..p2p_count {
8563 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
8564 let materials: Vec<&Material> = self
8565 .master_data
8566 .materials
8567 .iter()
8568 .skip(i % self.master_data.materials.len().max(1))
8569 .take(2.min(self.master_data.materials.len()))
8570 .collect();
8571
8572 if materials.is_empty() {
8573 continue;
8574 }
8575
8576 let company = &self.config.companies[i % self.config.companies.len()];
8577 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
8578 let fiscal_period = po_date.month() as u8;
8579 let created_by = if self.master_data.employees.is_empty() {
8580 "SYSTEM"
8581 } else {
8582 self.master_data.employees[i % self.master_data.employees.len()]
8583 .user_id
8584 .as_str()
8585 };
8586
8587 let chain = p2p_gen.generate_chain(
8588 &company.code,
8589 vendor,
8590 &materials,
8591 po_date,
8592 start_date.year() as u16,
8593 fiscal_period,
8594 created_by,
8595 );
8596
8597 flows.purchase_orders.push(chain.purchase_order.clone());
8599 flows.goods_receipts.extend(chain.goods_receipts.clone());
8600 if let Some(vi) = &chain.vendor_invoice {
8601 flows.vendor_invoices.push(vi.clone());
8602 }
8603 if let Some(payment) = &chain.payment {
8604 flows.payments.push(payment.clone());
8605 }
8606 for remainder in &chain.remainder_payments {
8607 flows.payments.push(remainder.clone());
8608 }
8609 flows.p2p_chains.push(chain);
8610
8611 if let Some(pb) = &pb {
8612 pb.inc(1);
8613 }
8614 }
8615
8616 if let Some(pb) = pb {
8617 pb.finish_with_message("P2P document flows complete");
8618 }
8619
8620 let o2c_count = self
8623 .phase_config
8624 .o2c_chains
8625 .min(self.master_data.customers.len() * 2 * months);
8626 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
8627
8628 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
8630 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
8631 o2c_gen.set_country_pack(self.primary_pack().clone());
8632
8633 for i in 0..o2c_count {
8634 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
8635 let materials: Vec<&Material> = self
8636 .master_data
8637 .materials
8638 .iter()
8639 .skip(i % self.master_data.materials.len().max(1))
8640 .take(2.min(self.master_data.materials.len()))
8641 .collect();
8642
8643 if materials.is_empty() {
8644 continue;
8645 }
8646
8647 let company = &self.config.companies[i % self.config.companies.len()];
8648 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
8649 let fiscal_period = so_date.month() as u8;
8650 let created_by = if self.master_data.employees.is_empty() {
8651 "SYSTEM"
8652 } else {
8653 self.master_data.employees[i % self.master_data.employees.len()]
8654 .user_id
8655 .as_str()
8656 };
8657
8658 let chain = o2c_gen.generate_chain(
8659 &company.code,
8660 customer,
8661 &materials,
8662 so_date,
8663 start_date.year() as u16,
8664 fiscal_period,
8665 created_by,
8666 );
8667
8668 flows.sales_orders.push(chain.sales_order.clone());
8670 flows.deliveries.extend(chain.deliveries.clone());
8671 if let Some(ci) = &chain.customer_invoice {
8672 flows.customer_invoices.push(ci.clone());
8673 }
8674 if let Some(receipt) = &chain.customer_receipt {
8675 flows.payments.push(receipt.clone());
8676 }
8677 for receipt in &chain.remainder_receipts {
8679 flows.payments.push(receipt.clone());
8680 }
8681 flows.o2c_chains.push(chain);
8682
8683 if let Some(pb) = &pb {
8684 pb.inc(1);
8685 }
8686 }
8687
8688 if let Some(pb) = pb {
8689 pb.finish_with_message("O2C document flows complete");
8690 }
8691
8692 {
8696 let mut refs = Vec::new();
8697 for doc in &flows.purchase_orders {
8698 refs.extend(doc.header.document_references.iter().cloned());
8699 }
8700 for doc in &flows.goods_receipts {
8701 refs.extend(doc.header.document_references.iter().cloned());
8702 }
8703 for doc in &flows.vendor_invoices {
8704 refs.extend(doc.header.document_references.iter().cloned());
8705 }
8706 for doc in &flows.sales_orders {
8707 refs.extend(doc.header.document_references.iter().cloned());
8708 }
8709 for doc in &flows.deliveries {
8710 refs.extend(doc.header.document_references.iter().cloned());
8711 }
8712 for doc in &flows.customer_invoices {
8713 refs.extend(doc.header.document_references.iter().cloned());
8714 }
8715 for doc in &flows.payments {
8716 refs.extend(doc.header.document_references.iter().cloned());
8717 }
8718 debug!(
8719 "Collected {} document cross-references from document headers",
8720 refs.len()
8721 );
8722 flows.document_references = refs;
8723 }
8724
8725 Ok(())
8726 }
8727
8728 fn generate_journal_entries(
8730 &mut self,
8731 coa: &Arc<ChartOfAccounts>,
8732 ) -> SynthResult<Vec<JournalEntry>> {
8733 use datasynth_core::traits::ParallelGenerator;
8734
8735 let total = self.calculate_total_transactions();
8736 let pb = self.create_progress_bar(total, "Generating Journal Entries");
8737
8738 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8739 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8740 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8741
8742 let company_codes: Vec<String> = self
8743 .config
8744 .companies
8745 .iter()
8746 .map(|c| c.code.clone())
8747 .collect();
8748
8749 let generator = JournalEntryGenerator::new_with_params(
8750 self.config.transactions.clone(),
8751 Arc::clone(coa),
8752 company_codes,
8753 start_date,
8754 end_date,
8755 self.seed,
8756 );
8757
8758 let je_pack = self.primary_pack();
8762
8763 let mut generator = generator
8764 .with_master_data(
8765 &self.master_data.vendors,
8766 &self.master_data.customers,
8767 &self.master_data.materials,
8768 )
8769 .with_country_pack_names(je_pack)
8770 .with_country_pack_temporal(
8771 self.config.temporal_patterns.clone(),
8772 self.seed + 200,
8773 je_pack,
8774 )
8775 .with_persona_errors(true)
8776 .with_fraud_config(self.config.fraud.clone());
8777
8778 if self.config.temporal.enabled {
8780 let drift_config = self.config.temporal.to_core_config();
8781 generator = generator.with_drift_config(drift_config, self.seed + 100);
8782 }
8783
8784 self.check_memory_limit()?;
8786
8787 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
8789
8790 let entries = if total >= 10_000 && num_threads > 1 {
8794 let sub_generators = generator.split(num_threads);
8797 let entries_per_thread = total as usize / num_threads;
8798 let remainder = total as usize % num_threads;
8799
8800 let batches: Vec<Vec<JournalEntry>> = sub_generators
8801 .into_par_iter()
8802 .enumerate()
8803 .map(|(i, mut gen)| {
8804 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
8805 gen.generate_batch(count)
8806 })
8807 .collect();
8808
8809 let entries = JournalEntryGenerator::merge_results(batches);
8811
8812 if let Some(pb) = &pb {
8813 pb.inc(total);
8814 }
8815 entries
8816 } else {
8817 let mut entries = Vec::with_capacity(total as usize);
8819 for _ in 0..total {
8820 let entry = generator.generate();
8821 entries.push(entry);
8822 if let Some(pb) = &pb {
8823 pb.inc(1);
8824 }
8825 }
8826 entries
8827 };
8828
8829 if let Some(pb) = pb {
8830 pb.finish_with_message("Journal entries complete");
8831 }
8832
8833 Ok(entries)
8834 }
8835
8836 fn generate_jes_from_document_flows(
8841 &mut self,
8842 flows: &DocumentFlowSnapshot,
8843 ) -> SynthResult<Vec<JournalEntry>> {
8844 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
8845 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
8846
8847 let je_config = match self.resolve_coa_framework() {
8848 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
8849 CoAFramework::GermanSkr04 => {
8850 let fa = datasynth_core::FrameworkAccounts::german_gaap();
8851 DocumentFlowJeConfig::from(&fa)
8852 }
8853 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
8854 };
8855
8856 let populate_fec = je_config.populate_fec_fields;
8857 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
8858
8859 if populate_fec {
8863 let mut aux_lookup = std::collections::HashMap::new();
8864 for vendor in &self.master_data.vendors {
8865 if let Some(ref aux) = vendor.auxiliary_gl_account {
8866 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
8867 }
8868 }
8869 for customer in &self.master_data.customers {
8870 if let Some(ref aux) = customer.auxiliary_gl_account {
8871 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
8872 }
8873 }
8874 if !aux_lookup.is_empty() {
8875 generator.set_auxiliary_account_lookup(aux_lookup);
8876 }
8877 }
8878
8879 let mut entries = Vec::new();
8880
8881 for chain in &flows.p2p_chains {
8883 let chain_entries = generator.generate_from_p2p_chain(chain);
8884 entries.extend(chain_entries);
8885 if let Some(pb) = &pb {
8886 pb.inc(1);
8887 }
8888 }
8889
8890 for chain in &flows.o2c_chains {
8892 let chain_entries = generator.generate_from_o2c_chain(chain);
8893 entries.extend(chain_entries);
8894 if let Some(pb) = &pb {
8895 pb.inc(1);
8896 }
8897 }
8898
8899 if let Some(pb) = pb {
8900 pb.finish_with_message(format!(
8901 "Generated {} JEs from document flows",
8902 entries.len()
8903 ));
8904 }
8905
8906 Ok(entries)
8907 }
8908
8909 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
8915 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
8916
8917 let mut jes = Vec::with_capacity(payroll_runs.len());
8918
8919 for run in payroll_runs {
8920 let mut je = JournalEntry::new_simple(
8921 format!("JE-PAYROLL-{}", run.payroll_id),
8922 run.company_code.clone(),
8923 run.run_date,
8924 format!("Payroll {}", run.payroll_id),
8925 );
8926
8927 je.add_line(JournalEntryLine {
8929 line_number: 1,
8930 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
8931 debit_amount: run.total_gross,
8932 reference: Some(run.payroll_id.clone()),
8933 text: Some(format!(
8934 "Payroll {} ({} employees)",
8935 run.payroll_id, run.employee_count
8936 )),
8937 ..Default::default()
8938 });
8939
8940 je.add_line(JournalEntryLine {
8942 line_number: 2,
8943 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
8944 credit_amount: run.total_gross,
8945 reference: Some(run.payroll_id.clone()),
8946 ..Default::default()
8947 });
8948
8949 jes.push(je);
8950 }
8951
8952 jes
8953 }
8954
8955 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
8961 use datasynth_core::accounts::{control_accounts, expense_accounts};
8962 use datasynth_core::models::ProductionOrderStatus;
8963
8964 let mut jes = Vec::new();
8965
8966 for order in production_orders {
8967 if !matches!(
8969 order.status,
8970 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
8971 ) {
8972 continue;
8973 }
8974
8975 let mut je = JournalEntry::new_simple(
8976 format!("JE-MFG-{}", order.order_id),
8977 order.company_code.clone(),
8978 order.actual_end.unwrap_or(order.planned_end),
8979 format!(
8980 "Production Order {} - {}",
8981 order.order_id, order.material_description
8982 ),
8983 );
8984
8985 je.add_line(JournalEntryLine {
8987 line_number: 1,
8988 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
8989 debit_amount: order.actual_cost,
8990 reference: Some(order.order_id.clone()),
8991 text: Some(format!(
8992 "Material consumption for {}",
8993 order.material_description
8994 )),
8995 quantity: Some(order.actual_quantity),
8996 unit: Some("EA".to_string()),
8997 ..Default::default()
8998 });
8999
9000 je.add_line(JournalEntryLine {
9002 line_number: 2,
9003 gl_account: control_accounts::INVENTORY.to_string(),
9004 credit_amount: order.actual_cost,
9005 reference: Some(order.order_id.clone()),
9006 ..Default::default()
9007 });
9008
9009 jes.push(je);
9010 }
9011
9012 jes
9013 }
9014
9015 fn link_document_flows_to_subledgers(
9020 &mut self,
9021 flows: &DocumentFlowSnapshot,
9022 ) -> SynthResult<SubledgerSnapshot> {
9023 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9024 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9025
9026 let vendor_names: std::collections::HashMap<String, String> = self
9028 .master_data
9029 .vendors
9030 .iter()
9031 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9032 .collect();
9033 let customer_names: std::collections::HashMap<String, String> = self
9034 .master_data
9035 .customers
9036 .iter()
9037 .map(|c| (c.customer_id.clone(), c.name.clone()))
9038 .collect();
9039
9040 let mut linker = DocumentFlowLinker::new()
9041 .with_vendor_names(vendor_names)
9042 .with_customer_names(customer_names);
9043
9044 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9046 if let Some(pb) = &pb {
9047 pb.inc(flows.vendor_invoices.len() as u64);
9048 }
9049
9050 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9052 if let Some(pb) = &pb {
9053 pb.inc(flows.customer_invoices.len() as u64);
9054 }
9055
9056 if let Some(pb) = pb {
9057 pb.finish_with_message(format!(
9058 "Linked {} AP and {} AR invoices",
9059 ap_invoices.len(),
9060 ar_invoices.len()
9061 ));
9062 }
9063
9064 Ok(SubledgerSnapshot {
9065 ap_invoices,
9066 ar_invoices,
9067 fa_records: Vec::new(),
9068 inventory_positions: Vec::new(),
9069 inventory_movements: Vec::new(),
9070 ar_aging_reports: Vec::new(),
9072 ap_aging_reports: Vec::new(),
9073 depreciation_runs: Vec::new(),
9075 inventory_valuations: Vec::new(),
9076 dunning_runs: Vec::new(),
9078 dunning_letters: Vec::new(),
9079 })
9080 }
9081
9082 #[allow(clippy::too_many_arguments)]
9087 fn generate_ocpm_events(
9088 &mut self,
9089 flows: &DocumentFlowSnapshot,
9090 sourcing: &SourcingSnapshot,
9091 hr: &HrSnapshot,
9092 manufacturing: &ManufacturingSnapshot,
9093 banking: &BankingSnapshot,
9094 audit: &AuditSnapshot,
9095 financial_reporting: &FinancialReportingSnapshot,
9096 ) -> SynthResult<OcpmSnapshot> {
9097 let total_chains = flows.p2p_chains.len()
9098 + flows.o2c_chains.len()
9099 + sourcing.sourcing_projects.len()
9100 + hr.payroll_runs.len()
9101 + manufacturing.production_orders.len()
9102 + banking.customers.len()
9103 + audit.engagements.len()
9104 + financial_reporting.bank_reconciliations.len();
9105 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9106
9107 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9109 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9110
9111 let ocpm_config = OcpmGeneratorConfig {
9113 generate_p2p: true,
9114 generate_o2c: true,
9115 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9116 generate_h2r: !hr.payroll_runs.is_empty(),
9117 generate_mfg: !manufacturing.production_orders.is_empty(),
9118 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9119 generate_bank: !banking.customers.is_empty(),
9120 generate_audit: !audit.engagements.is_empty(),
9121 happy_path_rate: 0.75,
9122 exception_path_rate: 0.20,
9123 error_path_rate: 0.05,
9124 add_duration_variability: true,
9125 duration_std_dev_factor: 0.3,
9126 };
9127 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9128 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9129
9130 let available_users: Vec<String> = self
9132 .master_data
9133 .employees
9134 .iter()
9135 .take(20)
9136 .map(|e| e.user_id.clone())
9137 .collect();
9138
9139 let fallback_date =
9141 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9142 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9143 .unwrap_or(fallback_date);
9144 let base_midnight = base_date
9145 .and_hms_opt(0, 0, 0)
9146 .expect("midnight is always valid");
9147 let base_datetime =
9148 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9149
9150 let add_result = |event_log: &mut OcpmEventLog,
9152 result: datasynth_ocpm::CaseGenerationResult| {
9153 for event in result.events {
9154 event_log.add_event(event);
9155 }
9156 for object in result.objects {
9157 event_log.add_object(object);
9158 }
9159 for relationship in result.relationships {
9160 event_log.add_relationship(relationship);
9161 }
9162 for corr in result.correlation_events {
9163 event_log.add_correlation_event(corr);
9164 }
9165 event_log.add_case(result.case_trace);
9166 };
9167
9168 for chain in &flows.p2p_chains {
9170 let po = &chain.purchase_order;
9171 let documents = P2pDocuments::new(
9172 &po.header.document_id,
9173 &po.vendor_id,
9174 &po.header.company_code,
9175 po.total_net_amount,
9176 &po.header.currency,
9177 &ocpm_uuid_factory,
9178 )
9179 .with_goods_receipt(
9180 chain
9181 .goods_receipts
9182 .first()
9183 .map(|gr| gr.header.document_id.as_str())
9184 .unwrap_or(""),
9185 &ocpm_uuid_factory,
9186 )
9187 .with_invoice(
9188 chain
9189 .vendor_invoice
9190 .as_ref()
9191 .map(|vi| vi.header.document_id.as_str())
9192 .unwrap_or(""),
9193 &ocpm_uuid_factory,
9194 )
9195 .with_payment(
9196 chain
9197 .payment
9198 .as_ref()
9199 .map(|p| p.header.document_id.as_str())
9200 .unwrap_or(""),
9201 &ocpm_uuid_factory,
9202 );
9203
9204 let start_time =
9205 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9206 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9207 add_result(&mut event_log, result);
9208
9209 if let Some(pb) = &pb {
9210 pb.inc(1);
9211 }
9212 }
9213
9214 for chain in &flows.o2c_chains {
9216 let so = &chain.sales_order;
9217 let documents = O2cDocuments::new(
9218 &so.header.document_id,
9219 &so.customer_id,
9220 &so.header.company_code,
9221 so.total_net_amount,
9222 &so.header.currency,
9223 &ocpm_uuid_factory,
9224 )
9225 .with_delivery(
9226 chain
9227 .deliveries
9228 .first()
9229 .map(|d| d.header.document_id.as_str())
9230 .unwrap_or(""),
9231 &ocpm_uuid_factory,
9232 )
9233 .with_invoice(
9234 chain
9235 .customer_invoice
9236 .as_ref()
9237 .map(|ci| ci.header.document_id.as_str())
9238 .unwrap_or(""),
9239 &ocpm_uuid_factory,
9240 )
9241 .with_receipt(
9242 chain
9243 .customer_receipt
9244 .as_ref()
9245 .map(|r| r.header.document_id.as_str())
9246 .unwrap_or(""),
9247 &ocpm_uuid_factory,
9248 );
9249
9250 let start_time =
9251 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9252 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9253 add_result(&mut event_log, result);
9254
9255 if let Some(pb) = &pb {
9256 pb.inc(1);
9257 }
9258 }
9259
9260 for project in &sourcing.sourcing_projects {
9262 let vendor_id = sourcing
9264 .contracts
9265 .iter()
9266 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9267 .map(|c| c.vendor_id.clone())
9268 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9269 .or_else(|| {
9270 self.master_data
9271 .vendors
9272 .first()
9273 .map(|v| v.vendor_id.clone())
9274 })
9275 .unwrap_or_else(|| "V000".to_string());
9276 let mut docs = S2cDocuments::new(
9277 &project.project_id,
9278 &vendor_id,
9279 &project.company_code,
9280 project.estimated_annual_spend,
9281 &ocpm_uuid_factory,
9282 );
9283 if let Some(rfx) = sourcing
9285 .rfx_events
9286 .iter()
9287 .find(|r| r.sourcing_project_id == project.project_id)
9288 {
9289 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9290 if let Some(bid) = sourcing.bids.iter().find(|b| {
9292 b.rfx_id == rfx.rfx_id
9293 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9294 }) {
9295 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9296 }
9297 }
9298 if let Some(contract) = sourcing
9300 .contracts
9301 .iter()
9302 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9303 {
9304 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9305 }
9306 let start_time = base_datetime - chrono::Duration::days(90);
9307 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9308 add_result(&mut event_log, result);
9309
9310 if let Some(pb) = &pb {
9311 pb.inc(1);
9312 }
9313 }
9314
9315 for run in &hr.payroll_runs {
9317 let employee_id = hr
9319 .payroll_line_items
9320 .iter()
9321 .find(|li| li.payroll_id == run.payroll_id)
9322 .map(|li| li.employee_id.as_str())
9323 .unwrap_or("EMP000");
9324 let docs = H2rDocuments::new(
9325 &run.payroll_id,
9326 employee_id,
9327 &run.company_code,
9328 run.total_gross,
9329 &ocpm_uuid_factory,
9330 )
9331 .with_time_entries(
9332 hr.time_entries
9333 .iter()
9334 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9335 .take(5)
9336 .map(|t| t.entry_id.as_str())
9337 .collect(),
9338 );
9339 let start_time = base_datetime - chrono::Duration::days(30);
9340 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9341 add_result(&mut event_log, result);
9342
9343 if let Some(pb) = &pb {
9344 pb.inc(1);
9345 }
9346 }
9347
9348 for order in &manufacturing.production_orders {
9350 let mut docs = MfgDocuments::new(
9351 &order.order_id,
9352 &order.material_id,
9353 &order.company_code,
9354 order.planned_quantity,
9355 &ocpm_uuid_factory,
9356 )
9357 .with_operations(
9358 order
9359 .operations
9360 .iter()
9361 .map(|o| format!("OP-{:04}", o.operation_number))
9362 .collect::<Vec<_>>()
9363 .iter()
9364 .map(std::string::String::as_str)
9365 .collect(),
9366 );
9367 if let Some(insp) = manufacturing
9369 .quality_inspections
9370 .iter()
9371 .find(|i| i.reference_id == order.order_id)
9372 {
9373 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
9374 }
9375 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
9377 cc.items
9378 .iter()
9379 .any(|item| item.material_id == order.material_id)
9380 }) {
9381 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
9382 }
9383 let start_time = base_datetime - chrono::Duration::days(60);
9384 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
9385 add_result(&mut event_log, result);
9386
9387 if let Some(pb) = &pb {
9388 pb.inc(1);
9389 }
9390 }
9391
9392 for customer in &banking.customers {
9394 let customer_id_str = customer.customer_id.to_string();
9395 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
9396 if let Some(account) = banking
9398 .accounts
9399 .iter()
9400 .find(|a| a.primary_owner_id == customer.customer_id)
9401 {
9402 let account_id_str = account.account_id.to_string();
9403 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
9404 let txn_strs: Vec<String> = banking
9406 .transactions
9407 .iter()
9408 .filter(|t| t.account_id == account.account_id)
9409 .take(10)
9410 .map(|t| t.transaction_id.to_string())
9411 .collect();
9412 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
9413 let txn_amounts: Vec<rust_decimal::Decimal> = banking
9414 .transactions
9415 .iter()
9416 .filter(|t| t.account_id == account.account_id)
9417 .take(10)
9418 .map(|t| t.amount)
9419 .collect();
9420 if !txn_ids.is_empty() {
9421 docs = docs.with_transactions(txn_ids, txn_amounts);
9422 }
9423 }
9424 let start_time = base_datetime - chrono::Duration::days(180);
9425 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
9426 add_result(&mut event_log, result);
9427
9428 if let Some(pb) = &pb {
9429 pb.inc(1);
9430 }
9431 }
9432
9433 for engagement in &audit.engagements {
9435 let engagement_id_str = engagement.engagement_id.to_string();
9436 let docs = AuditDocuments::new(
9437 &engagement_id_str,
9438 &engagement.client_entity_id,
9439 &ocpm_uuid_factory,
9440 )
9441 .with_workpapers(
9442 audit
9443 .workpapers
9444 .iter()
9445 .filter(|w| w.engagement_id == engagement.engagement_id)
9446 .take(10)
9447 .map(|w| w.workpaper_id.to_string())
9448 .collect::<Vec<_>>()
9449 .iter()
9450 .map(std::string::String::as_str)
9451 .collect(),
9452 )
9453 .with_evidence(
9454 audit
9455 .evidence
9456 .iter()
9457 .filter(|e| e.engagement_id == engagement.engagement_id)
9458 .take(10)
9459 .map(|e| e.evidence_id.to_string())
9460 .collect::<Vec<_>>()
9461 .iter()
9462 .map(std::string::String::as_str)
9463 .collect(),
9464 )
9465 .with_risks(
9466 audit
9467 .risk_assessments
9468 .iter()
9469 .filter(|r| r.engagement_id == engagement.engagement_id)
9470 .take(5)
9471 .map(|r| r.risk_id.to_string())
9472 .collect::<Vec<_>>()
9473 .iter()
9474 .map(std::string::String::as_str)
9475 .collect(),
9476 )
9477 .with_findings(
9478 audit
9479 .findings
9480 .iter()
9481 .filter(|f| f.engagement_id == engagement.engagement_id)
9482 .take(5)
9483 .map(|f| f.finding_id.to_string())
9484 .collect::<Vec<_>>()
9485 .iter()
9486 .map(std::string::String::as_str)
9487 .collect(),
9488 )
9489 .with_judgments(
9490 audit
9491 .judgments
9492 .iter()
9493 .filter(|j| j.engagement_id == engagement.engagement_id)
9494 .take(5)
9495 .map(|j| j.judgment_id.to_string())
9496 .collect::<Vec<_>>()
9497 .iter()
9498 .map(std::string::String::as_str)
9499 .collect(),
9500 );
9501 let start_time = base_datetime - chrono::Duration::days(120);
9502 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
9503 add_result(&mut event_log, result);
9504
9505 if let Some(pb) = &pb {
9506 pb.inc(1);
9507 }
9508 }
9509
9510 for recon in &financial_reporting.bank_reconciliations {
9512 let docs = BankReconDocuments::new(
9513 &recon.reconciliation_id,
9514 &recon.bank_account_id,
9515 &recon.company_code,
9516 recon.bank_ending_balance,
9517 &ocpm_uuid_factory,
9518 )
9519 .with_statement_lines(
9520 recon
9521 .statement_lines
9522 .iter()
9523 .take(20)
9524 .map(|l| l.line_id.as_str())
9525 .collect(),
9526 )
9527 .with_reconciling_items(
9528 recon
9529 .reconciling_items
9530 .iter()
9531 .take(10)
9532 .map(|i| i.item_id.as_str())
9533 .collect(),
9534 );
9535 let start_time = base_datetime - chrono::Duration::days(30);
9536 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
9537 add_result(&mut event_log, result);
9538
9539 if let Some(pb) = &pb {
9540 pb.inc(1);
9541 }
9542 }
9543
9544 event_log.compute_variants();
9546
9547 let summary = event_log.summary();
9548
9549 if let Some(pb) = pb {
9550 pb.finish_with_message(format!(
9551 "Generated {} OCPM events, {} objects",
9552 summary.event_count, summary.object_count
9553 ));
9554 }
9555
9556 Ok(OcpmSnapshot {
9557 event_count: summary.event_count,
9558 object_count: summary.object_count,
9559 case_count: summary.case_count,
9560 event_log: Some(event_log),
9561 })
9562 }
9563
9564 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
9566 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
9567
9568 let total_rate = if self.config.anomaly_injection.enabled {
9571 self.config.anomaly_injection.rates.total_rate
9572 } else if self.config.fraud.enabled {
9573 self.config.fraud.fraud_rate
9574 } else {
9575 0.02
9576 };
9577
9578 let fraud_rate = if self.config.anomaly_injection.enabled {
9579 self.config.anomaly_injection.rates.fraud_rate
9580 } else {
9581 AnomalyRateConfig::default().fraud_rate
9582 };
9583
9584 let error_rate = if self.config.anomaly_injection.enabled {
9585 self.config.anomaly_injection.rates.error_rate
9586 } else {
9587 AnomalyRateConfig::default().error_rate
9588 };
9589
9590 let process_issue_rate = if self.config.anomaly_injection.enabled {
9591 self.config.anomaly_injection.rates.process_rate
9592 } else {
9593 AnomalyRateConfig::default().process_issue_rate
9594 };
9595
9596 let anomaly_config = AnomalyInjectorConfig {
9597 rates: AnomalyRateConfig {
9598 total_rate,
9599 fraud_rate,
9600 error_rate,
9601 process_issue_rate,
9602 ..Default::default()
9603 },
9604 seed: self.seed + 5000,
9605 ..Default::default()
9606 };
9607
9608 let mut injector = AnomalyInjector::new(anomaly_config);
9609 let result = injector.process_entries(entries);
9610
9611 if let Some(pb) = &pb {
9612 pb.inc(entries.len() as u64);
9613 pb.finish_with_message("Anomaly injection complete");
9614 }
9615
9616 let mut by_type = HashMap::new();
9617 for label in &result.labels {
9618 *by_type
9619 .entry(format!("{:?}", label.anomaly_type))
9620 .or_insert(0) += 1;
9621 }
9622
9623 Ok(AnomalyLabels {
9624 labels: result.labels,
9625 summary: Some(result.summary),
9626 by_type,
9627 })
9628 }
9629
9630 fn validate_journal_entries(
9639 &mut self,
9640 entries: &[JournalEntry],
9641 ) -> SynthResult<BalanceValidationResult> {
9642 let clean_entries: Vec<&JournalEntry> = entries
9644 .iter()
9645 .filter(|e| {
9646 e.header
9647 .header_text
9648 .as_ref()
9649 .map(|t| !t.contains("[HUMAN_ERROR:"))
9650 .unwrap_or(true)
9651 })
9652 .collect();
9653
9654 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
9655
9656 let config = BalanceTrackerConfig {
9658 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
9662 };
9663 let validation_currency = self
9664 .config
9665 .companies
9666 .first()
9667 .map(|c| c.currency.clone())
9668 .unwrap_or_else(|| "USD".to_string());
9669
9670 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
9671
9672 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
9674 let errors = tracker.apply_entries(&clean_refs);
9675
9676 if let Some(pb) = &pb {
9677 pb.inc(entries.len() as u64);
9678 }
9679
9680 let has_unbalanced = tracker
9683 .get_validation_errors()
9684 .iter()
9685 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
9686
9687 let mut all_errors = errors;
9690 all_errors.extend(tracker.get_validation_errors().iter().cloned());
9691 let company_codes: Vec<String> = self
9692 .config
9693 .companies
9694 .iter()
9695 .map(|c| c.code.clone())
9696 .collect();
9697
9698 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9699 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9700 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9701
9702 for company_code in &company_codes {
9703 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
9704 all_errors.push(e);
9705 }
9706 }
9707
9708 let stats = tracker.get_statistics();
9710
9711 let is_balanced = all_errors.is_empty();
9713
9714 if let Some(pb) = pb {
9715 let msg = if is_balanced {
9716 "Balance validation passed"
9717 } else {
9718 "Balance validation completed with errors"
9719 };
9720 pb.finish_with_message(msg);
9721 }
9722
9723 Ok(BalanceValidationResult {
9724 validated: true,
9725 is_balanced,
9726 entries_processed: stats.entries_processed,
9727 total_debits: stats.total_debits,
9728 total_credits: stats.total_credits,
9729 accounts_tracked: stats.accounts_tracked,
9730 companies_tracked: stats.companies_tracked,
9731 validation_errors: all_errors,
9732 has_unbalanced_entries: has_unbalanced,
9733 })
9734 }
9735
9736 fn inject_data_quality(
9741 &mut self,
9742 entries: &mut [JournalEntry],
9743 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
9744 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
9745
9746 let config = if self.config.data_quality.enabled {
9749 let dq = &self.config.data_quality;
9750 DataQualityConfig {
9751 enable_missing_values: dq.missing_values.enabled,
9752 missing_values: datasynth_generators::MissingValueConfig {
9753 global_rate: dq.effective_missing_rate(),
9754 ..Default::default()
9755 },
9756 enable_format_variations: dq.format_variations.enabled,
9757 format_variations: datasynth_generators::FormatVariationConfig {
9758 date_variation_rate: dq.format_variations.dates.rate,
9759 amount_variation_rate: dq.format_variations.amounts.rate,
9760 identifier_variation_rate: dq.format_variations.identifiers.rate,
9761 ..Default::default()
9762 },
9763 enable_duplicates: dq.duplicates.enabled,
9764 duplicates: datasynth_generators::DuplicateConfig {
9765 duplicate_rate: dq.effective_duplicate_rate(),
9766 ..Default::default()
9767 },
9768 enable_typos: dq.typos.enabled,
9769 typos: datasynth_generators::TypoConfig {
9770 char_error_rate: dq.effective_typo_rate(),
9771 ..Default::default()
9772 },
9773 enable_encoding_issues: dq.encoding_issues.enabled,
9774 encoding_issue_rate: dq.encoding_issues.rate,
9775 seed: self.seed.wrapping_add(77), track_statistics: true,
9777 }
9778 } else {
9779 DataQualityConfig::minimal()
9780 };
9781 let mut injector = DataQualityInjector::new(config);
9782
9783 injector.set_country_pack(self.primary_pack().clone());
9785
9786 let context = HashMap::new();
9788
9789 for entry in entries.iter_mut() {
9790 if let Some(text) = &entry.header.header_text {
9792 let processed = injector.process_text_field(
9793 "header_text",
9794 text,
9795 &entry.header.document_id.to_string(),
9796 &context,
9797 );
9798 match processed {
9799 Some(new_text) if new_text != *text => {
9800 entry.header.header_text = Some(new_text);
9801 }
9802 None => {
9803 entry.header.header_text = None; }
9805 _ => {}
9806 }
9807 }
9808
9809 if let Some(ref_text) = &entry.header.reference {
9811 let processed = injector.process_text_field(
9812 "reference",
9813 ref_text,
9814 &entry.header.document_id.to_string(),
9815 &context,
9816 );
9817 match processed {
9818 Some(new_text) if new_text != *ref_text => {
9819 entry.header.reference = Some(new_text);
9820 }
9821 None => {
9822 entry.header.reference = None;
9823 }
9824 _ => {}
9825 }
9826 }
9827
9828 let user_persona = entry.header.user_persona.clone();
9830 if let Some(processed) = injector.process_text_field(
9831 "user_persona",
9832 &user_persona,
9833 &entry.header.document_id.to_string(),
9834 &context,
9835 ) {
9836 if processed != user_persona {
9837 entry.header.user_persona = processed;
9838 }
9839 }
9840
9841 for line in &mut entry.lines {
9843 if let Some(ref text) = line.line_text {
9845 let processed = injector.process_text_field(
9846 "line_text",
9847 text,
9848 &entry.header.document_id.to_string(),
9849 &context,
9850 );
9851 match processed {
9852 Some(new_text) if new_text != *text => {
9853 line.line_text = Some(new_text);
9854 }
9855 None => {
9856 line.line_text = None;
9857 }
9858 _ => {}
9859 }
9860 }
9861
9862 if let Some(cc) = &line.cost_center {
9864 let processed = injector.process_text_field(
9865 "cost_center",
9866 cc,
9867 &entry.header.document_id.to_string(),
9868 &context,
9869 );
9870 match processed {
9871 Some(new_cc) if new_cc != *cc => {
9872 line.cost_center = Some(new_cc);
9873 }
9874 None => {
9875 line.cost_center = None;
9876 }
9877 _ => {}
9878 }
9879 }
9880 }
9881
9882 if let Some(pb) = &pb {
9883 pb.inc(1);
9884 }
9885 }
9886
9887 if let Some(pb) = pb {
9888 pb.finish_with_message("Data quality injection complete");
9889 }
9890
9891 let quality_issues = injector.issues().to_vec();
9892 Ok((injector.stats().clone(), quality_issues))
9893 }
9894
9895 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
9906 let use_fsm = self
9908 .config
9909 .audit
9910 .fsm
9911 .as_ref()
9912 .map(|f| f.enabled)
9913 .unwrap_or(false);
9914
9915 if use_fsm {
9916 return self.generate_audit_data_with_fsm(entries);
9917 }
9918
9919 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9921 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9922 let fiscal_year = start_date.year() as u16;
9923 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
9924
9925 let total_revenue: rust_decimal::Decimal = entries
9927 .iter()
9928 .flat_map(|e| e.lines.iter())
9929 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
9930 .map(|l| l.credit_amount)
9931 .sum();
9932
9933 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
9935
9936 let mut snapshot = AuditSnapshot::default();
9937
9938 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
9940 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
9941 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
9942 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
9943 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
9944 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
9945 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
9946 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
9947 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
9948 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
9949 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
9950 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
9951
9952 let accounts: Vec<String> = self
9954 .coa
9955 .as_ref()
9956 .map(|coa| {
9957 coa.get_postable_accounts()
9958 .iter()
9959 .map(|acc| acc.account_code().to_string())
9960 .collect()
9961 })
9962 .unwrap_or_default();
9963
9964 for (i, company) in self.config.companies.iter().enumerate() {
9966 let company_revenue = total_revenue
9968 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
9969
9970 let engagements_for_company =
9972 self.phase_config.audit_engagements / self.config.companies.len().max(1);
9973 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
9974 1
9975 } else {
9976 0
9977 };
9978
9979 for _eng_idx in 0..(engagements_for_company + extra) {
9980 let mut engagement = engagement_gen.generate_engagement(
9982 &company.code,
9983 &company.name,
9984 fiscal_year,
9985 period_end,
9986 company_revenue,
9987 None, );
9989
9990 if !self.master_data.employees.is_empty() {
9992 let emp_count = self.master_data.employees.len();
9993 let base = (i * 10 + _eng_idx) % emp_count;
9995 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
9996 .employee_id
9997 .clone();
9998 engagement.engagement_manager_id = self.master_data.employees
9999 [(base + 1) % emp_count]
10000 .employee_id
10001 .clone();
10002 let real_team: Vec<String> = engagement
10003 .team_member_ids
10004 .iter()
10005 .enumerate()
10006 .map(|(j, _)| {
10007 self.master_data.employees[(base + 2 + j) % emp_count]
10008 .employee_id
10009 .clone()
10010 })
10011 .collect();
10012 engagement.team_member_ids = real_team;
10013 }
10014
10015 if let Some(pb) = &pb {
10016 pb.inc(1);
10017 }
10018
10019 let team_members: Vec<String> = engagement.team_member_ids.clone();
10021
10022 let workpapers =
10024 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10025
10026 for wp in &workpapers {
10027 if let Some(pb) = &pb {
10028 pb.inc(1);
10029 }
10030
10031 let evidence = evidence_gen.generate_evidence_for_workpaper(
10033 wp,
10034 &team_members,
10035 wp.preparer_date,
10036 );
10037
10038 for _ in &evidence {
10039 if let Some(pb) = &pb {
10040 pb.inc(1);
10041 }
10042 }
10043
10044 snapshot.evidence.extend(evidence);
10045 }
10046
10047 let risks =
10049 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10050
10051 for _ in &risks {
10052 if let Some(pb) = &pb {
10053 pb.inc(1);
10054 }
10055 }
10056 snapshot.risk_assessments.extend(risks);
10057
10058 let findings = finding_gen.generate_findings_for_engagement(
10060 &engagement,
10061 &workpapers,
10062 &team_members,
10063 );
10064
10065 for _ in &findings {
10066 if let Some(pb) = &pb {
10067 pb.inc(1);
10068 }
10069 }
10070 snapshot.findings.extend(findings);
10071
10072 let judgments =
10074 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10075
10076 for _ in &judgments {
10077 if let Some(pb) = &pb {
10078 pb.inc(1);
10079 }
10080 }
10081 snapshot.judgments.extend(judgments);
10082
10083 let (confs, resps) =
10085 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10086 snapshot.confirmations.extend(confs);
10087 snapshot.confirmation_responses.extend(resps);
10088
10089 let team_pairs: Vec<(String, String)> = team_members
10091 .iter()
10092 .map(|id| {
10093 let name = self
10094 .master_data
10095 .employees
10096 .iter()
10097 .find(|e| e.employee_id == *id)
10098 .map(|e| e.display_name.clone())
10099 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10100 (id.clone(), name)
10101 })
10102 .collect();
10103 for wp in &workpapers {
10104 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10105 snapshot.procedure_steps.extend(steps);
10106 }
10107
10108 for wp in &workpapers {
10110 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10111 snapshot.samples.push(sample);
10112 }
10113 }
10114
10115 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10117 snapshot.analytical_results.extend(analytical);
10118
10119 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10121 snapshot.ia_functions.push(ia_func);
10122 snapshot.ia_reports.extend(ia_reports);
10123
10124 let vendor_names: Vec<String> = self
10126 .master_data
10127 .vendors
10128 .iter()
10129 .map(|v| v.name.clone())
10130 .collect();
10131 let customer_names: Vec<String> = self
10132 .master_data
10133 .customers
10134 .iter()
10135 .map(|c| c.name.clone())
10136 .collect();
10137 let (parties, rp_txns) =
10138 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10139 snapshot.related_parties.extend(parties);
10140 snapshot.related_party_transactions.extend(rp_txns);
10141
10142 snapshot.workpapers.extend(workpapers);
10144
10145 {
10147 let scope_id = format!(
10148 "SCOPE-{}-{}",
10149 engagement.engagement_id.simple(),
10150 &engagement.client_entity_id
10151 );
10152 let scope = datasynth_core::models::audit::AuditScope::new(
10153 scope_id.clone(),
10154 engagement.engagement_id.to_string(),
10155 engagement.client_entity_id.clone(),
10156 engagement.materiality,
10157 );
10158 let mut eng = engagement;
10160 eng.scope_id = Some(scope_id);
10161 snapshot.audit_scopes.push(scope);
10162 snapshot.engagements.push(eng);
10163 }
10164 }
10165 }
10166
10167 if self.config.companies.len() > 1 {
10171 let group_materiality = snapshot
10174 .engagements
10175 .first()
10176 .map(|e| e.materiality)
10177 .unwrap_or_else(|| {
10178 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10179 total_revenue * pct
10180 });
10181
10182 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10183 let group_engagement_id = snapshot
10184 .engagements
10185 .first()
10186 .map(|e| e.engagement_id.to_string())
10187 .unwrap_or_else(|| "GROUP-ENG".to_string());
10188
10189 let component_snapshot = component_gen.generate(
10190 &self.config.companies,
10191 group_materiality,
10192 &group_engagement_id,
10193 period_end,
10194 );
10195
10196 snapshot.component_auditors = component_snapshot.component_auditors;
10197 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10198 snapshot.component_instructions = component_snapshot.component_instructions;
10199 snapshot.component_reports = component_snapshot.component_reports;
10200
10201 info!(
10202 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10203 snapshot.component_auditors.len(),
10204 snapshot.component_instructions.len(),
10205 snapshot.component_reports.len(),
10206 );
10207 }
10208
10209 {
10213 let applicable_framework = self
10214 .config
10215 .accounting_standards
10216 .framework
10217 .as_ref()
10218 .map(|f| format!("{f:?}"))
10219 .unwrap_or_else(|| "IFRS".to_string());
10220
10221 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10222 let entity_count = self.config.companies.len();
10223
10224 for engagement in &snapshot.engagements {
10225 let company = self
10226 .config
10227 .companies
10228 .iter()
10229 .find(|c| c.code == engagement.client_entity_id);
10230 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10231 let letter_date = engagement.planning_start;
10232 let letter = letter_gen.generate(
10233 &engagement.engagement_id.to_string(),
10234 &engagement.client_name,
10235 entity_count,
10236 engagement.period_end_date,
10237 currency,
10238 &applicable_framework,
10239 letter_date,
10240 );
10241 snapshot.engagement_letters.push(letter);
10242 }
10243
10244 info!(
10245 "ISA 210 engagement letters: {} generated",
10246 snapshot.engagement_letters.len()
10247 );
10248 }
10249
10250 {
10254 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10255 let entity_codes: Vec<String> = self
10256 .config
10257 .companies
10258 .iter()
10259 .map(|c| c.code.clone())
10260 .collect();
10261 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10262 info!(
10263 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10264 subsequent.len(),
10265 subsequent
10266 .iter()
10267 .filter(|e| matches!(
10268 e.classification,
10269 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10270 ))
10271 .count(),
10272 subsequent
10273 .iter()
10274 .filter(|e| matches!(
10275 e.classification,
10276 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10277 ))
10278 .count(),
10279 );
10280 snapshot.subsequent_events = subsequent;
10281 }
10282
10283 {
10287 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10288 let entity_codes: Vec<String> = self
10289 .config
10290 .companies
10291 .iter()
10292 .map(|c| c.code.clone())
10293 .collect();
10294 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10295 info!(
10296 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10297 soc_snapshot.service_organizations.len(),
10298 soc_snapshot.soc_reports.len(),
10299 soc_snapshot.user_entity_controls.len(),
10300 );
10301 snapshot.service_organizations = soc_snapshot.service_organizations;
10302 snapshot.soc_reports = soc_snapshot.soc_reports;
10303 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10304 }
10305
10306 {
10310 use datasynth_generators::audit::going_concern_generator::{
10311 GoingConcernGenerator, GoingConcernInput,
10312 };
10313 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10314 let entity_codes: Vec<String> = self
10315 .config
10316 .companies
10317 .iter()
10318 .map(|c| c.code.clone())
10319 .collect();
10320 let assessment_date = period_end + chrono::Duration::days(75);
10322 let period_label = format!("FY{}", period_end.year());
10323
10324 let gc_inputs: Vec<GoingConcernInput> = self
10335 .config
10336 .companies
10337 .iter()
10338 .map(|company| {
10339 let code = &company.code;
10340 let mut revenue = rust_decimal::Decimal::ZERO;
10341 let mut expenses = rust_decimal::Decimal::ZERO;
10342 let mut current_assets = rust_decimal::Decimal::ZERO;
10343 let mut current_liabs = rust_decimal::Decimal::ZERO;
10344 let mut total_debt = rust_decimal::Decimal::ZERO;
10345
10346 for je in entries.iter().filter(|je| &je.header.company_code == code) {
10347 for line in &je.lines {
10348 let acct = line.gl_account.as_str();
10349 let net = line.debit_amount - line.credit_amount;
10350 if acct.starts_with('4') {
10351 revenue -= net;
10353 } else if acct.starts_with('6') {
10354 expenses += net;
10356 }
10357 if acct.starts_with('1') {
10359 if let Ok(n) = acct.parse::<u32>() {
10361 if (1000..=1499).contains(&n) {
10362 current_assets += net;
10363 }
10364 }
10365 } else if acct.starts_with('2') {
10366 if let Ok(n) = acct.parse::<u32>() {
10367 if (2000..=2499).contains(&n) {
10368 current_liabs -= net; } else if (2500..=2999).contains(&n) {
10371 total_debt -= net;
10373 }
10374 }
10375 }
10376 }
10377 }
10378
10379 let net_income = revenue - expenses;
10380 let working_capital = current_assets - current_liabs;
10381 let operating_cash_flow = net_income;
10384
10385 GoingConcernInput {
10386 entity_code: code.clone(),
10387 net_income,
10388 working_capital,
10389 operating_cash_flow,
10390 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
10391 assessment_date,
10392 }
10393 })
10394 .collect();
10395
10396 let assessments = if gc_inputs.is_empty() {
10397 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
10398 } else {
10399 gc_gen.generate_for_entities_with_inputs(
10400 &entity_codes,
10401 &gc_inputs,
10402 assessment_date,
10403 &period_label,
10404 )
10405 };
10406 info!(
10407 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
10408 assessments.len(),
10409 assessments.iter().filter(|a| matches!(
10410 a.auditor_conclusion,
10411 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
10412 )).count(),
10413 assessments.iter().filter(|a| matches!(
10414 a.auditor_conclusion,
10415 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
10416 )).count(),
10417 assessments.iter().filter(|a| matches!(
10418 a.auditor_conclusion,
10419 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
10420 )).count(),
10421 );
10422 snapshot.going_concern_assessments = assessments;
10423 }
10424
10425 {
10429 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
10430 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
10431 let entity_codes: Vec<String> = self
10432 .config
10433 .companies
10434 .iter()
10435 .map(|c| c.code.clone())
10436 .collect();
10437 let estimates = est_gen.generate_for_entities(&entity_codes);
10438 info!(
10439 "ISA 540 accounting estimates: {} estimates across {} entities \
10440 ({} with retrospective reviews, {} with auditor point estimates)",
10441 estimates.len(),
10442 entity_codes.len(),
10443 estimates
10444 .iter()
10445 .filter(|e| e.retrospective_review.is_some())
10446 .count(),
10447 estimates
10448 .iter()
10449 .filter(|e| e.auditor_point_estimate.is_some())
10450 .count(),
10451 );
10452 snapshot.accounting_estimates = estimates;
10453 }
10454
10455 {
10459 use datasynth_generators::audit::audit_opinion_generator::{
10460 AuditOpinionGenerator, AuditOpinionInput,
10461 };
10462
10463 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
10464
10465 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
10467 .engagements
10468 .iter()
10469 .map(|eng| {
10470 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10472 .findings
10473 .iter()
10474 .filter(|f| f.engagement_id == eng.engagement_id)
10475 .cloned()
10476 .collect();
10477
10478 let gc = snapshot
10480 .going_concern_assessments
10481 .iter()
10482 .find(|g| g.entity_code == eng.client_entity_id)
10483 .cloned();
10484
10485 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
10487 snapshot.component_reports.clone();
10488
10489 let auditor = self
10490 .master_data
10491 .employees
10492 .first()
10493 .map(|e| e.display_name.clone())
10494 .unwrap_or_else(|| "Global Audit LLP".into());
10495
10496 let partner = self
10497 .master_data
10498 .employees
10499 .get(1)
10500 .map(|e| e.display_name.clone())
10501 .unwrap_or_else(|| eng.engagement_partner_id.clone());
10502
10503 AuditOpinionInput {
10504 entity_code: eng.client_entity_id.clone(),
10505 entity_name: eng.client_name.clone(),
10506 engagement_id: eng.engagement_id,
10507 period_end: eng.period_end_date,
10508 findings: eng_findings,
10509 going_concern: gc,
10510 component_reports: comp_reports,
10511 is_us_listed: {
10513 let fw = &self.config.audit_standards.isa_compliance.framework;
10514 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
10515 },
10516 auditor_name: auditor,
10517 engagement_partner: partner,
10518 }
10519 })
10520 .collect();
10521
10522 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
10523
10524 for go in &generated_opinions {
10525 snapshot
10526 .key_audit_matters
10527 .extend(go.key_audit_matters.clone());
10528 }
10529 snapshot.audit_opinions = generated_opinions
10530 .into_iter()
10531 .map(|go| go.opinion)
10532 .collect();
10533
10534 info!(
10535 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
10536 snapshot.audit_opinions.len(),
10537 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
10538 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
10539 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
10540 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
10541 );
10542 }
10543
10544 {
10548 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
10549
10550 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
10551
10552 for (i, company) in self.config.companies.iter().enumerate() {
10553 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
10555 .engagements
10556 .iter()
10557 .filter(|e| e.client_entity_id == company.code)
10558 .map(|e| e.engagement_id)
10559 .collect();
10560
10561 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10562 .findings
10563 .iter()
10564 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
10565 .cloned()
10566 .collect();
10567
10568 let emp_count = self.master_data.employees.len();
10570 let ceo_name = if emp_count > 0 {
10571 self.master_data.employees[i % emp_count]
10572 .display_name
10573 .clone()
10574 } else {
10575 format!("CEO of {}", company.name)
10576 };
10577 let cfo_name = if emp_count > 1 {
10578 self.master_data.employees[(i + 1) % emp_count]
10579 .display_name
10580 .clone()
10581 } else {
10582 format!("CFO of {}", company.name)
10583 };
10584
10585 let materiality = snapshot
10587 .engagements
10588 .iter()
10589 .find(|e| e.client_entity_id == company.code)
10590 .map(|e| e.materiality)
10591 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
10592
10593 let input = SoxGeneratorInput {
10594 company_code: company.code.clone(),
10595 company_name: company.name.clone(),
10596 fiscal_year,
10597 period_end,
10598 findings: company_findings,
10599 ceo_name,
10600 cfo_name,
10601 materiality_threshold: materiality,
10602 revenue_percent: rust_decimal::Decimal::from(100),
10603 assets_percent: rust_decimal::Decimal::from(100),
10604 significant_accounts: vec![
10605 "Revenue".into(),
10606 "Accounts Receivable".into(),
10607 "Inventory".into(),
10608 "Fixed Assets".into(),
10609 "Accounts Payable".into(),
10610 ],
10611 };
10612
10613 let (certs, assessment) = sox_gen.generate(&input);
10614 snapshot.sox_302_certifications.extend(certs);
10615 snapshot.sox_404_assessments.push(assessment);
10616 }
10617
10618 info!(
10619 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
10620 snapshot.sox_302_certifications.len(),
10621 snapshot.sox_404_assessments.len(),
10622 snapshot
10623 .sox_404_assessments
10624 .iter()
10625 .filter(|a| a.icfr_effective)
10626 .count(),
10627 snapshot
10628 .sox_404_assessments
10629 .iter()
10630 .filter(|a| !a.icfr_effective)
10631 .count(),
10632 );
10633 }
10634
10635 {
10639 use datasynth_generators::audit::materiality_generator::{
10640 MaterialityGenerator, MaterialityInput,
10641 };
10642
10643 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
10644
10645 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
10649
10650 for company in &self.config.companies {
10651 let company_code = company.code.clone();
10652
10653 let company_revenue: rust_decimal::Decimal = entries
10655 .iter()
10656 .filter(|e| e.company_code() == company_code)
10657 .flat_map(|e| e.lines.iter())
10658 .filter(|l| l.account_code.starts_with('4'))
10659 .map(|l| l.credit_amount)
10660 .sum();
10661
10662 let total_assets: rust_decimal::Decimal = entries
10664 .iter()
10665 .filter(|e| e.company_code() == company_code)
10666 .flat_map(|e| e.lines.iter())
10667 .filter(|l| l.account_code.starts_with('1'))
10668 .map(|l| l.debit_amount)
10669 .sum();
10670
10671 let total_expenses: rust_decimal::Decimal = entries
10673 .iter()
10674 .filter(|e| e.company_code() == company_code)
10675 .flat_map(|e| e.lines.iter())
10676 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
10677 .map(|l| l.debit_amount)
10678 .sum();
10679
10680 let equity: rust_decimal::Decimal = entries
10682 .iter()
10683 .filter(|e| e.company_code() == company_code)
10684 .flat_map(|e| e.lines.iter())
10685 .filter(|l| l.account_code.starts_with('3'))
10686 .map(|l| l.credit_amount)
10687 .sum();
10688
10689 let pretax_income = company_revenue - total_expenses;
10690
10691 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
10693 let w = rust_decimal::Decimal::try_from(company.volume_weight)
10694 .unwrap_or(rust_decimal::Decimal::ONE);
10695 (
10696 total_revenue * w,
10697 total_revenue * w * rust_decimal::Decimal::from(3),
10698 total_revenue * w * rust_decimal::Decimal::new(1, 1),
10699 total_revenue * w * rust_decimal::Decimal::from(2),
10700 )
10701 } else {
10702 (company_revenue, total_assets, pretax_income, equity)
10703 };
10704
10705 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
10708 entity_code: company_code,
10709 period: format!("FY{}", fiscal_year),
10710 revenue: rev,
10711 pretax_income: pti,
10712 total_assets: assets,
10713 equity: eq,
10714 gross_profit,
10715 });
10716 }
10717
10718 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
10719
10720 info!(
10721 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
10722 {} total assets, {} equity benchmarks)",
10723 snapshot.materiality_calculations.len(),
10724 snapshot
10725 .materiality_calculations
10726 .iter()
10727 .filter(|m| matches!(
10728 m.benchmark,
10729 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
10730 ))
10731 .count(),
10732 snapshot
10733 .materiality_calculations
10734 .iter()
10735 .filter(|m| matches!(
10736 m.benchmark,
10737 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
10738 ))
10739 .count(),
10740 snapshot
10741 .materiality_calculations
10742 .iter()
10743 .filter(|m| matches!(
10744 m.benchmark,
10745 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
10746 ))
10747 .count(),
10748 snapshot
10749 .materiality_calculations
10750 .iter()
10751 .filter(|m| matches!(
10752 m.benchmark,
10753 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
10754 ))
10755 .count(),
10756 );
10757 }
10758
10759 {
10763 use datasynth_generators::audit::cra_generator::CraGenerator;
10764
10765 let mut cra_gen = CraGenerator::new(self.seed + 8315);
10766
10767 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
10769 .audit_scopes
10770 .iter()
10771 .map(|s| (s.entity_code.clone(), s.id.clone()))
10772 .collect();
10773
10774 for company in &self.config.companies {
10775 let cras = cra_gen.generate_for_entity(&company.code, None);
10776 let scope_id = entity_scope_map.get(&company.code).cloned();
10777 let cras_with_scope: Vec<_> = cras
10778 .into_iter()
10779 .map(|mut cra| {
10780 cra.scope_id = scope_id.clone();
10781 cra
10782 })
10783 .collect();
10784 snapshot.combined_risk_assessments.extend(cras_with_scope);
10785 }
10786
10787 let significant_count = snapshot
10788 .combined_risk_assessments
10789 .iter()
10790 .filter(|c| c.significant_risk)
10791 .count();
10792 let high_cra_count = snapshot
10793 .combined_risk_assessments
10794 .iter()
10795 .filter(|c| {
10796 matches!(
10797 c.combined_risk,
10798 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
10799 )
10800 })
10801 .count();
10802
10803 info!(
10804 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
10805 snapshot.combined_risk_assessments.len(),
10806 significant_count,
10807 high_cra_count,
10808 );
10809 }
10810
10811 {
10815 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
10816
10817 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
10818
10819 for company in &self.config.companies {
10821 let entity_code = company.code.clone();
10822
10823 let tolerable_error = snapshot
10825 .materiality_calculations
10826 .iter()
10827 .find(|m| m.entity_code == entity_code)
10828 .map(|m| m.tolerable_error);
10829
10830 let entity_cras: Vec<_> = snapshot
10832 .combined_risk_assessments
10833 .iter()
10834 .filter(|c| c.entity_code == entity_code)
10835 .cloned()
10836 .collect();
10837
10838 if !entity_cras.is_empty() {
10839 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
10840 snapshot.sampling_plans.extend(plans);
10841 snapshot.sampled_items.extend(items);
10842 }
10843 }
10844
10845 let misstatement_count = snapshot
10846 .sampled_items
10847 .iter()
10848 .filter(|i| i.misstatement_found)
10849 .count();
10850
10851 info!(
10852 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
10853 snapshot.sampling_plans.len(),
10854 snapshot.sampled_items.len(),
10855 misstatement_count,
10856 );
10857 }
10858
10859 {
10863 use datasynth_generators::audit::scots_generator::{
10864 ScotsGenerator, ScotsGeneratorConfig,
10865 };
10866
10867 let ic_enabled = self.config.intercompany.enabled;
10868
10869 let config = ScotsGeneratorConfig {
10870 intercompany_enabled: ic_enabled,
10871 ..ScotsGeneratorConfig::default()
10872 };
10873 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
10874
10875 for company in &self.config.companies {
10876 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
10877 snapshot
10878 .significant_transaction_classes
10879 .extend(entity_scots);
10880 }
10881
10882 let estimation_count = snapshot
10883 .significant_transaction_classes
10884 .iter()
10885 .filter(|s| {
10886 matches!(
10887 s.transaction_type,
10888 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
10889 )
10890 })
10891 .count();
10892
10893 info!(
10894 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
10895 snapshot.significant_transaction_classes.len(),
10896 estimation_count,
10897 );
10898 }
10899
10900 {
10904 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
10905
10906 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
10907 let entity_codes: Vec<String> = self
10908 .config
10909 .companies
10910 .iter()
10911 .map(|c| c.code.clone())
10912 .collect();
10913 let unusual_flags =
10914 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
10915 info!(
10916 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
10917 unusual_flags.len(),
10918 unusual_flags
10919 .iter()
10920 .filter(|f| matches!(
10921 f.severity,
10922 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
10923 ))
10924 .count(),
10925 unusual_flags
10926 .iter()
10927 .filter(|f| matches!(
10928 f.severity,
10929 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
10930 ))
10931 .count(),
10932 unusual_flags
10933 .iter()
10934 .filter(|f| matches!(
10935 f.severity,
10936 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
10937 ))
10938 .count(),
10939 );
10940 snapshot.unusual_items = unusual_flags;
10941 }
10942
10943 {
10947 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
10948
10949 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
10950 let entity_codes: Vec<String> = self
10951 .config
10952 .companies
10953 .iter()
10954 .map(|c| c.code.clone())
10955 .collect();
10956 let current_period_label = format!("FY{fiscal_year}");
10957 let prior_period_label = format!("FY{}", fiscal_year - 1);
10958 let analytical_rels = ar_gen.generate_for_entities(
10959 &entity_codes,
10960 entries,
10961 ¤t_period_label,
10962 &prior_period_label,
10963 );
10964 let out_of_range = analytical_rels
10965 .iter()
10966 .filter(|r| !r.within_expected_range)
10967 .count();
10968 info!(
10969 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
10970 analytical_rels.len(),
10971 out_of_range,
10972 );
10973 snapshot.analytical_relationships = analytical_rels;
10974 }
10975
10976 if let Some(pb) = pb {
10977 pb.finish_with_message(format!(
10978 "Audit data: {} engagements, {} workpapers, {} evidence, \
10979 {} confirmations, {} procedure steps, {} samples, \
10980 {} analytical, {} IA funcs, {} related parties, \
10981 {} component auditors, {} letters, {} subsequent events, \
10982 {} service orgs, {} going concern, {} accounting estimates, \
10983 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
10984 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
10985 {} unusual items, {} analytical relationships",
10986 snapshot.engagements.len(),
10987 snapshot.workpapers.len(),
10988 snapshot.evidence.len(),
10989 snapshot.confirmations.len(),
10990 snapshot.procedure_steps.len(),
10991 snapshot.samples.len(),
10992 snapshot.analytical_results.len(),
10993 snapshot.ia_functions.len(),
10994 snapshot.related_parties.len(),
10995 snapshot.component_auditors.len(),
10996 snapshot.engagement_letters.len(),
10997 snapshot.subsequent_events.len(),
10998 snapshot.service_organizations.len(),
10999 snapshot.going_concern_assessments.len(),
11000 snapshot.accounting_estimates.len(),
11001 snapshot.audit_opinions.len(),
11002 snapshot.key_audit_matters.len(),
11003 snapshot.sox_302_certifications.len(),
11004 snapshot.sox_404_assessments.len(),
11005 snapshot.materiality_calculations.len(),
11006 snapshot.combined_risk_assessments.len(),
11007 snapshot.sampling_plans.len(),
11008 snapshot.significant_transaction_classes.len(),
11009 snapshot.unusual_items.len(),
11010 snapshot.analytical_relationships.len(),
11011 ));
11012 }
11013
11014 {
11021 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11022 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11023 debug!(
11024 "PCAOB-ISA mappings generated: {} mappings",
11025 snapshot.isa_pcaob_mappings.len()
11026 );
11027 }
11028
11029 {
11036 use datasynth_standards::audit::isa_reference::IsaStandard;
11037 snapshot.isa_mappings = IsaStandard::standard_entries();
11038 debug!(
11039 "ISA standard entries generated: {} standards",
11040 snapshot.isa_mappings.len()
11041 );
11042 }
11043
11044 {
11047 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11048 .engagements
11049 .iter()
11050 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11051 .collect();
11052
11053 for rpt in &mut snapshot.related_party_transactions {
11054 if rpt.journal_entry_id.is_some() {
11055 continue; }
11057 let entity = engagement_by_id
11058 .get(&rpt.engagement_id.to_string())
11059 .copied()
11060 .unwrap_or("");
11061
11062 let best_je = entries
11064 .iter()
11065 .filter(|je| je.header.company_code == entity)
11066 .min_by_key(|je| {
11067 (je.header.posting_date - rpt.transaction_date)
11068 .num_days()
11069 .abs()
11070 });
11071
11072 if let Some(je) = best_je {
11073 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11074 }
11075 }
11076
11077 let linked = snapshot
11078 .related_party_transactions
11079 .iter()
11080 .filter(|t| t.journal_entry_id.is_some())
11081 .count();
11082 debug!(
11083 "Linked {}/{} related party transactions to journal entries",
11084 linked,
11085 snapshot.related_party_transactions.len()
11086 );
11087 }
11088
11089 Ok(snapshot)
11090 }
11091
11092 fn generate_audit_data_with_fsm(
11099 &mut self,
11100 entries: &[JournalEntry],
11101 ) -> SynthResult<AuditSnapshot> {
11102 use datasynth_audit_fsm::{
11103 context::EngagementContext,
11104 engine::AuditFsmEngine,
11105 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11106 };
11107 use rand::SeedableRng;
11108 use rand_chacha::ChaCha8Rng;
11109
11110 info!("Audit FSM: generating audit data via FSM engine");
11111
11112 let fsm_config = self
11113 .config
11114 .audit
11115 .fsm
11116 .as_ref()
11117 .expect("FSM config must be present when FSM is enabled");
11118
11119 let bwp = match fsm_config.blueprint.as_str() {
11121 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11122 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11123 _ => {
11124 warn!(
11125 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11126 fsm_config.blueprint
11127 );
11128 BlueprintWithPreconditions::load_builtin_fsa()
11129 }
11130 }
11131 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11132
11133 let overlay = match fsm_config.overlay.as_str() {
11135 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11136 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11137 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11138 _ => {
11139 warn!(
11140 "Unknown FSM overlay '{}', falling back to builtin:default",
11141 fsm_config.overlay
11142 );
11143 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11144 }
11145 }
11146 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11147
11148 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11150 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11151 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11152
11153 let company = self.config.companies.first();
11155 let company_code = company
11156 .map(|c| c.code.clone())
11157 .unwrap_or_else(|| "UNKNOWN".to_string());
11158 let company_name = company
11159 .map(|c| c.name.clone())
11160 .unwrap_or_else(|| "Unknown Company".to_string());
11161 let currency = company
11162 .map(|c| c.currency.clone())
11163 .unwrap_or_else(|| "USD".to_string());
11164
11165 let entity_entries: Vec<_> = entries
11167 .iter()
11168 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11169 .cloned()
11170 .collect();
11171 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
11175 .iter()
11176 .flat_map(|e| e.lines.iter())
11177 .filter(|l| l.account_code.starts_with('4'))
11178 .map(|l| l.credit_amount - l.debit_amount)
11179 .sum();
11180
11181 let total_assets: rust_decimal::Decimal = entries
11182 .iter()
11183 .flat_map(|e| e.lines.iter())
11184 .filter(|l| l.account_code.starts_with('1'))
11185 .map(|l| l.debit_amount - l.credit_amount)
11186 .sum();
11187
11188 let total_expenses: rust_decimal::Decimal = entries
11189 .iter()
11190 .flat_map(|e| e.lines.iter())
11191 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11192 .map(|l| l.debit_amount)
11193 .sum();
11194
11195 let equity: rust_decimal::Decimal = entries
11196 .iter()
11197 .flat_map(|e| e.lines.iter())
11198 .filter(|l| l.account_code.starts_with('3'))
11199 .map(|l| l.credit_amount - l.debit_amount)
11200 .sum();
11201
11202 let total_debt: rust_decimal::Decimal = entries
11203 .iter()
11204 .flat_map(|e| e.lines.iter())
11205 .filter(|l| l.account_code.starts_with('2'))
11206 .map(|l| l.credit_amount - l.debit_amount)
11207 .sum();
11208
11209 let pretax_income = total_revenue - total_expenses;
11210
11211 let cogs: rust_decimal::Decimal = entries
11212 .iter()
11213 .flat_map(|e| e.lines.iter())
11214 .filter(|l| l.account_code.starts_with('5'))
11215 .map(|l| l.debit_amount)
11216 .sum();
11217 let gross_profit = total_revenue - cogs;
11218
11219 let current_assets: rust_decimal::Decimal = entries
11220 .iter()
11221 .flat_map(|e| e.lines.iter())
11222 .filter(|l| {
11223 l.account_code.starts_with("10")
11224 || l.account_code.starts_with("11")
11225 || l.account_code.starts_with("12")
11226 || l.account_code.starts_with("13")
11227 })
11228 .map(|l| l.debit_amount - l.credit_amount)
11229 .sum();
11230 let current_liabilities: rust_decimal::Decimal = entries
11231 .iter()
11232 .flat_map(|e| e.lines.iter())
11233 .filter(|l| {
11234 l.account_code.starts_with("20")
11235 || l.account_code.starts_with("21")
11236 || l.account_code.starts_with("22")
11237 })
11238 .map(|l| l.credit_amount - l.debit_amount)
11239 .sum();
11240 let working_capital = current_assets - current_liabilities;
11241
11242 let depreciation: rust_decimal::Decimal = entries
11243 .iter()
11244 .flat_map(|e| e.lines.iter())
11245 .filter(|l| l.account_code.starts_with("60"))
11246 .map(|l| l.debit_amount)
11247 .sum();
11248 let operating_cash_flow = pretax_income + depreciation;
11249
11250 let accounts: Vec<String> = self
11252 .coa
11253 .as_ref()
11254 .map(|coa| {
11255 coa.get_postable_accounts()
11256 .iter()
11257 .map(|acc| acc.account_code().to_string())
11258 .collect()
11259 })
11260 .unwrap_or_default();
11261
11262 let team_member_ids: Vec<String> = self
11264 .master_data
11265 .employees
11266 .iter()
11267 .take(8) .map(|e| e.employee_id.clone())
11269 .collect();
11270 let team_member_pairs: Vec<(String, String)> = self
11271 .master_data
11272 .employees
11273 .iter()
11274 .take(8)
11275 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11276 .collect();
11277
11278 let vendor_names: Vec<String> = self
11279 .master_data
11280 .vendors
11281 .iter()
11282 .map(|v| v.name.clone())
11283 .collect();
11284 let customer_names: Vec<String> = self
11285 .master_data
11286 .customers
11287 .iter()
11288 .map(|c| c.name.clone())
11289 .collect();
11290
11291 let entity_codes: Vec<String> = self
11292 .config
11293 .companies
11294 .iter()
11295 .map(|c| c.code.clone())
11296 .collect();
11297
11298 let journal_entry_ids: Vec<String> = entries
11300 .iter()
11301 .take(50)
11302 .map(|e| e.header.document_id.to_string())
11303 .collect();
11304
11305 let mut account_balances = std::collections::HashMap::<String, f64>::new();
11307 for entry in entries {
11308 for line in &entry.lines {
11309 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11310 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11311 *account_balances
11312 .entry(line.account_code.clone())
11313 .or_insert(0.0) += debit_f64 - credit_f64;
11314 }
11315 }
11316
11317 let control_ids: Vec<String> = Vec::new();
11322 let anomaly_refs: Vec<String> = Vec::new();
11323
11324 let mut context = EngagementContext {
11325 company_code,
11326 company_name,
11327 fiscal_year: start_date.year(),
11328 currency,
11329 total_revenue,
11330 total_assets,
11331 engagement_start: start_date,
11332 report_date: period_end,
11333 pretax_income,
11334 equity,
11335 gross_profit,
11336 working_capital,
11337 operating_cash_flow,
11338 total_debt,
11339 team_member_ids,
11340 team_member_pairs,
11341 accounts,
11342 vendor_names,
11343 customer_names,
11344 journal_entry_ids,
11345 account_balances,
11346 control_ids,
11347 anomaly_refs,
11348 journal_entries: entries.to_vec(),
11349 is_us_listed: false,
11350 entity_codes,
11351 auditor_firm_name: "DataSynth Audit LLP".into(),
11352 accounting_framework: self
11353 .config
11354 .accounting_standards
11355 .framework
11356 .map(|f| match f {
11357 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
11358 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
11359 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
11360 "French GAAP"
11361 }
11362 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
11363 "German GAAP"
11364 }
11365 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
11366 "Dual Reporting"
11367 }
11368 })
11369 .unwrap_or("IFRS")
11370 .into(),
11371 };
11372
11373 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
11375 let rng = ChaCha8Rng::seed_from_u64(seed);
11376 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
11377
11378 let mut result = engine
11379 .run_engagement(&context)
11380 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
11381
11382 info!(
11383 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
11384 {} phases completed, duration {:.1}h",
11385 result.event_log.len(),
11386 result.artifacts.total_artifacts(),
11387 result.anomalies.len(),
11388 result.phases_completed.len(),
11389 result.total_duration_hours,
11390 );
11391
11392 let tb_entity = context.company_code.clone();
11394 let tb_fy = context.fiscal_year;
11395 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
11396 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
11397 entries,
11398 &tb_entity,
11399 tb_fy,
11400 self.coa.as_ref().map(|c| c.as_ref()),
11401 );
11402
11403 let bag = result.artifacts;
11405 let mut snapshot = AuditSnapshot {
11406 engagements: bag.engagements,
11407 engagement_letters: bag.engagement_letters,
11408 materiality_calculations: bag.materiality_calculations,
11409 risk_assessments: bag.risk_assessments,
11410 combined_risk_assessments: bag.combined_risk_assessments,
11411 workpapers: bag.workpapers,
11412 evidence: bag.evidence,
11413 findings: bag.findings,
11414 judgments: bag.judgments,
11415 sampling_plans: bag.sampling_plans,
11416 sampled_items: bag.sampled_items,
11417 analytical_results: bag.analytical_results,
11418 going_concern_assessments: bag.going_concern_assessments,
11419 subsequent_events: bag.subsequent_events,
11420 audit_opinions: bag.audit_opinions,
11421 key_audit_matters: bag.key_audit_matters,
11422 procedure_steps: bag.procedure_steps,
11423 samples: bag.samples,
11424 confirmations: bag.confirmations,
11425 confirmation_responses: bag.confirmation_responses,
11426 fsm_event_trail: Some(result.event_log),
11428 ..Default::default()
11430 };
11431
11432 {
11434 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11435 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11436 }
11437 {
11438 use datasynth_standards::audit::isa_reference::IsaStandard;
11439 snapshot.isa_mappings = IsaStandard::standard_entries();
11440 }
11441
11442 info!(
11443 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
11444 {} risk assessments, {} findings, {} materiality calcs",
11445 snapshot.engagements.len(),
11446 snapshot.workpapers.len(),
11447 snapshot.evidence.len(),
11448 snapshot.risk_assessments.len(),
11449 snapshot.findings.len(),
11450 snapshot.materiality_calculations.len(),
11451 );
11452
11453 Ok(snapshot)
11454 }
11455
11456 fn export_graphs(
11463 &mut self,
11464 entries: &[JournalEntry],
11465 _coa: &Arc<ChartOfAccounts>,
11466 stats: &mut EnhancedGenerationStatistics,
11467 ) -> SynthResult<GraphExportSnapshot> {
11468 let pb = self.create_progress_bar(100, "Exporting Graphs");
11469
11470 let mut snapshot = GraphExportSnapshot::default();
11471
11472 let output_dir = self
11474 .output_path
11475 .clone()
11476 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11477 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11478
11479 for graph_type in &self.config.graph_export.graph_types {
11481 if let Some(pb) = &pb {
11482 pb.inc(10);
11483 }
11484
11485 let graph_config = TransactionGraphConfig {
11487 include_vendors: false,
11488 include_customers: false,
11489 create_debit_credit_edges: true,
11490 include_document_nodes: graph_type.include_document_nodes,
11491 min_edge_weight: graph_type.min_edge_weight,
11492 aggregate_parallel_edges: graph_type.aggregate_edges,
11493 framework: None,
11494 };
11495
11496 let mut builder = TransactionGraphBuilder::new(graph_config);
11497 builder.add_journal_entries(entries);
11498 let graph = builder.build();
11499
11500 stats.graph_node_count += graph.node_count();
11502 stats.graph_edge_count += graph.edge_count();
11503
11504 if let Some(pb) = &pb {
11505 pb.inc(40);
11506 }
11507
11508 for format in &self.config.graph_export.formats {
11510 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
11511
11512 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11514 warn!("Failed to create graph output directory: {}", e);
11515 continue;
11516 }
11517
11518 match format {
11519 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
11520 let pyg_config = PyGExportConfig {
11521 common: datasynth_graph::CommonExportConfig {
11522 export_node_features: true,
11523 export_edge_features: true,
11524 export_node_labels: true,
11525 export_edge_labels: true,
11526 export_masks: true,
11527 train_ratio: self.config.graph_export.train_ratio,
11528 val_ratio: self.config.graph_export.validation_ratio,
11529 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11530 },
11531 one_hot_categoricals: false,
11532 };
11533
11534 let exporter = PyGExporter::new(pyg_config);
11535 match exporter.export(&graph, &format_dir) {
11536 Ok(metadata) => {
11537 snapshot.exports.insert(
11538 format!("{}_{}", graph_type.name, "pytorch_geometric"),
11539 GraphExportInfo {
11540 name: graph_type.name.clone(),
11541 format: "pytorch_geometric".to_string(),
11542 output_path: format_dir.clone(),
11543 node_count: metadata.num_nodes,
11544 edge_count: metadata.num_edges,
11545 },
11546 );
11547 snapshot.graph_count += 1;
11548 }
11549 Err(e) => {
11550 warn!("Failed to export PyTorch Geometric graph: {}", e);
11551 }
11552 }
11553 }
11554 datasynth_config::schema::GraphExportFormat::Neo4j => {
11555 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
11556
11557 let neo4j_config = Neo4jExportConfig {
11558 export_node_properties: true,
11559 export_edge_properties: true,
11560 export_features: true,
11561 generate_cypher: true,
11562 generate_admin_import: true,
11563 database_name: "synth".to_string(),
11564 cypher_batch_size: 1000,
11565 };
11566
11567 let exporter = Neo4jExporter::new(neo4j_config);
11568 match exporter.export(&graph, &format_dir) {
11569 Ok(metadata) => {
11570 snapshot.exports.insert(
11571 format!("{}_{}", graph_type.name, "neo4j"),
11572 GraphExportInfo {
11573 name: graph_type.name.clone(),
11574 format: "neo4j".to_string(),
11575 output_path: format_dir.clone(),
11576 node_count: metadata.num_nodes,
11577 edge_count: metadata.num_edges,
11578 },
11579 );
11580 snapshot.graph_count += 1;
11581 }
11582 Err(e) => {
11583 warn!("Failed to export Neo4j graph: {}", e);
11584 }
11585 }
11586 }
11587 datasynth_config::schema::GraphExportFormat::Dgl => {
11588 use datasynth_graph::{DGLExportConfig, DGLExporter};
11589
11590 let dgl_config = DGLExportConfig {
11591 common: datasynth_graph::CommonExportConfig {
11592 export_node_features: true,
11593 export_edge_features: true,
11594 export_node_labels: true,
11595 export_edge_labels: true,
11596 export_masks: true,
11597 train_ratio: self.config.graph_export.train_ratio,
11598 val_ratio: self.config.graph_export.validation_ratio,
11599 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11600 },
11601 heterogeneous: self.config.graph_export.dgl.heterogeneous,
11602 include_pickle_script: true, };
11604
11605 let exporter = DGLExporter::new(dgl_config);
11606 match exporter.export(&graph, &format_dir) {
11607 Ok(metadata) => {
11608 snapshot.exports.insert(
11609 format!("{}_{}", graph_type.name, "dgl"),
11610 GraphExportInfo {
11611 name: graph_type.name.clone(),
11612 format: "dgl".to_string(),
11613 output_path: format_dir.clone(),
11614 node_count: metadata.common.num_nodes,
11615 edge_count: metadata.common.num_edges,
11616 },
11617 );
11618 snapshot.graph_count += 1;
11619 }
11620 Err(e) => {
11621 warn!("Failed to export DGL graph: {}", e);
11622 }
11623 }
11624 }
11625 datasynth_config::schema::GraphExportFormat::RustGraph => {
11626 use datasynth_graph::{
11627 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
11628 };
11629
11630 let rustgraph_config = RustGraphExportConfig {
11631 include_features: true,
11632 include_temporal: true,
11633 include_labels: true,
11634 source_name: "datasynth".to_string(),
11635 batch_id: None,
11636 output_format: RustGraphOutputFormat::JsonLines,
11637 export_node_properties: true,
11638 export_edge_properties: true,
11639 pretty_print: false,
11640 };
11641
11642 let exporter = RustGraphExporter::new(rustgraph_config);
11643 match exporter.export(&graph, &format_dir) {
11644 Ok(metadata) => {
11645 snapshot.exports.insert(
11646 format!("{}_{}", graph_type.name, "rustgraph"),
11647 GraphExportInfo {
11648 name: graph_type.name.clone(),
11649 format: "rustgraph".to_string(),
11650 output_path: format_dir.clone(),
11651 node_count: metadata.num_nodes,
11652 edge_count: metadata.num_edges,
11653 },
11654 );
11655 snapshot.graph_count += 1;
11656 }
11657 Err(e) => {
11658 warn!("Failed to export RustGraph: {}", e);
11659 }
11660 }
11661 }
11662 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
11663 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
11665 }
11666 }
11667 }
11668
11669 if let Some(pb) = &pb {
11670 pb.inc(40);
11671 }
11672 }
11673
11674 stats.graph_export_count = snapshot.graph_count;
11675 snapshot.exported = snapshot.graph_count > 0;
11676
11677 if let Some(pb) = pb {
11678 pb.finish_with_message(format!(
11679 "Graphs exported: {} graphs ({} nodes, {} edges)",
11680 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
11681 ));
11682 }
11683
11684 Ok(snapshot)
11685 }
11686
11687 fn build_additional_graphs(
11692 &self,
11693 banking: &BankingSnapshot,
11694 intercompany: &IntercompanySnapshot,
11695 entries: &[JournalEntry],
11696 stats: &mut EnhancedGenerationStatistics,
11697 ) {
11698 let output_dir = self
11699 .output_path
11700 .clone()
11701 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11702 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11703
11704 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
11706 info!("Phase 10c: Building banking network graph");
11707 let config = BankingGraphConfig::default();
11708 let mut builder = BankingGraphBuilder::new(config);
11709 builder.add_customers(&banking.customers);
11710 builder.add_accounts(&banking.accounts, &banking.customers);
11711 builder.add_transactions(&banking.transactions);
11712 let graph = builder.build();
11713
11714 let node_count = graph.node_count();
11715 let edge_count = graph.edge_count();
11716 stats.graph_node_count += node_count;
11717 stats.graph_edge_count += edge_count;
11718
11719 for format in &self.config.graph_export.formats {
11721 if matches!(
11722 format,
11723 datasynth_config::schema::GraphExportFormat::PytorchGeometric
11724 ) {
11725 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
11726 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11727 warn!("Failed to create banking graph output dir: {}", e);
11728 continue;
11729 }
11730 let pyg_config = PyGExportConfig::default();
11731 let exporter = PyGExporter::new(pyg_config);
11732 if let Err(e) = exporter.export(&graph, &format_dir) {
11733 warn!("Failed to export banking graph as PyG: {}", e);
11734 } else {
11735 info!(
11736 "Banking network graph exported: {} nodes, {} edges",
11737 node_count, edge_count
11738 );
11739 }
11740 }
11741 }
11742 }
11743
11744 let approval_entries: Vec<_> = entries
11746 .iter()
11747 .filter(|je| je.header.approval_workflow.is_some())
11748 .collect();
11749
11750 if !approval_entries.is_empty() {
11751 info!(
11752 "Phase 10c: Building approval network graph ({} entries with approvals)",
11753 approval_entries.len()
11754 );
11755 let config = ApprovalGraphConfig::default();
11756 let mut builder = ApprovalGraphBuilder::new(config);
11757
11758 for je in &approval_entries {
11759 if let Some(ref wf) = je.header.approval_workflow {
11760 for action in &wf.actions {
11761 let record = datasynth_core::models::ApprovalRecord {
11762 approval_id: format!(
11763 "APR-{}-{}",
11764 je.header.document_id, action.approval_level
11765 ),
11766 document_number: je.header.document_id.to_string(),
11767 document_type: "JE".to_string(),
11768 company_code: je.company_code().to_string(),
11769 requester_id: wf.preparer_id.clone(),
11770 requester_name: Some(wf.preparer_name.clone()),
11771 approver_id: action.actor_id.clone(),
11772 approver_name: action.actor_name.clone(),
11773 approval_date: je.posting_date(),
11774 action: format!("{:?}", action.action),
11775 amount: wf.amount,
11776 approval_limit: None,
11777 comments: action.comments.clone(),
11778 delegation_from: None,
11779 is_auto_approved: false,
11780 };
11781 builder.add_approval(&record);
11782 }
11783 }
11784 }
11785
11786 let graph = builder.build();
11787 let node_count = graph.node_count();
11788 let edge_count = graph.edge_count();
11789 stats.graph_node_count += node_count;
11790 stats.graph_edge_count += edge_count;
11791
11792 for format in &self.config.graph_export.formats {
11794 if matches!(
11795 format,
11796 datasynth_config::schema::GraphExportFormat::PytorchGeometric
11797 ) {
11798 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
11799 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11800 warn!("Failed to create approval graph output dir: {}", e);
11801 continue;
11802 }
11803 let pyg_config = PyGExportConfig::default();
11804 let exporter = PyGExporter::new(pyg_config);
11805 if let Err(e) = exporter.export(&graph, &format_dir) {
11806 warn!("Failed to export approval graph as PyG: {}", e);
11807 } else {
11808 info!(
11809 "Approval network graph exported: {} nodes, {} edges",
11810 node_count, edge_count
11811 );
11812 }
11813 }
11814 }
11815 }
11816
11817 if self.config.companies.len() >= 2 {
11819 info!(
11820 "Phase 10c: Building entity relationship graph ({} companies)",
11821 self.config.companies.len()
11822 );
11823
11824 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11825 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
11826
11827 let parent_code = &self.config.companies[0].code;
11829 let mut companies: Vec<datasynth_core::models::Company> =
11830 Vec::with_capacity(self.config.companies.len());
11831
11832 let first = &self.config.companies[0];
11834 companies.push(datasynth_core::models::Company::parent(
11835 &first.code,
11836 &first.name,
11837 &first.country,
11838 &first.currency,
11839 ));
11840
11841 for cc in self.config.companies.iter().skip(1) {
11843 companies.push(datasynth_core::models::Company::subsidiary(
11844 &cc.code,
11845 &cc.name,
11846 &cc.country,
11847 &cc.currency,
11848 parent_code,
11849 rust_decimal::Decimal::from(100),
11850 ));
11851 }
11852
11853 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
11855 self.config
11856 .companies
11857 .iter()
11858 .skip(1)
11859 .enumerate()
11860 .map(|(i, cc)| {
11861 let mut rel =
11862 datasynth_core::models::intercompany::IntercompanyRelationship::new(
11863 format!("REL{:03}", i + 1),
11864 parent_code.clone(),
11865 cc.code.clone(),
11866 rust_decimal::Decimal::from(100),
11867 start_date,
11868 );
11869 rel.functional_currency = cc.currency.clone();
11870 rel
11871 })
11872 .collect();
11873
11874 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
11875 builder.add_companies(&companies);
11876 builder.add_ownership_relationships(&relationships);
11877
11878 for pair in &intercompany.matched_pairs {
11880 builder.add_intercompany_edge(
11881 &pair.seller_company,
11882 &pair.buyer_company,
11883 pair.amount,
11884 &format!("{:?}", pair.transaction_type),
11885 );
11886 }
11887
11888 let graph = builder.build();
11889 let node_count = graph.node_count();
11890 let edge_count = graph.edge_count();
11891 stats.graph_node_count += node_count;
11892 stats.graph_edge_count += edge_count;
11893
11894 for format in &self.config.graph_export.formats {
11896 if matches!(
11897 format,
11898 datasynth_config::schema::GraphExportFormat::PytorchGeometric
11899 ) {
11900 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
11901 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11902 warn!("Failed to create entity graph output dir: {}", e);
11903 continue;
11904 }
11905 let pyg_config = PyGExportConfig::default();
11906 let exporter = PyGExporter::new(pyg_config);
11907 if let Err(e) = exporter.export(&graph, &format_dir) {
11908 warn!("Failed to export entity graph as PyG: {}", e);
11909 } else {
11910 info!(
11911 "Entity relationship graph exported: {} nodes, {} edges",
11912 node_count, edge_count
11913 );
11914 }
11915 }
11916 }
11917 } else {
11918 debug!(
11919 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
11920 self.config.companies.len()
11921 );
11922 }
11923 }
11924
11925 #[allow(clippy::too_many_arguments)]
11932 fn export_hypergraph(
11933 &self,
11934 coa: &Arc<ChartOfAccounts>,
11935 entries: &[JournalEntry],
11936 document_flows: &DocumentFlowSnapshot,
11937 sourcing: &SourcingSnapshot,
11938 hr: &HrSnapshot,
11939 manufacturing: &ManufacturingSnapshot,
11940 banking: &BankingSnapshot,
11941 audit: &AuditSnapshot,
11942 financial_reporting: &FinancialReportingSnapshot,
11943 ocpm: &OcpmSnapshot,
11944 compliance: &ComplianceRegulationsSnapshot,
11945 stats: &mut EnhancedGenerationStatistics,
11946 ) -> SynthResult<HypergraphExportInfo> {
11947 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
11948 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
11949 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
11950 use datasynth_graph::models::hypergraph::AggregationStrategy;
11951
11952 let hg_settings = &self.config.graph_export.hypergraph;
11953
11954 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
11956 "truncate" => AggregationStrategy::Truncate,
11957 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
11958 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
11959 "importance_sample" => AggregationStrategy::ImportanceSample,
11960 _ => AggregationStrategy::PoolByCounterparty,
11961 };
11962
11963 let builder_config = HypergraphConfig {
11964 max_nodes: hg_settings.max_nodes,
11965 aggregation_strategy,
11966 include_coso: hg_settings.governance_layer.include_coso,
11967 include_controls: hg_settings.governance_layer.include_controls,
11968 include_sox: hg_settings.governance_layer.include_sox,
11969 include_vendors: hg_settings.governance_layer.include_vendors,
11970 include_customers: hg_settings.governance_layer.include_customers,
11971 include_employees: hg_settings.governance_layer.include_employees,
11972 include_p2p: hg_settings.process_layer.include_p2p,
11973 include_o2c: hg_settings.process_layer.include_o2c,
11974 include_s2c: hg_settings.process_layer.include_s2c,
11975 include_h2r: hg_settings.process_layer.include_h2r,
11976 include_mfg: hg_settings.process_layer.include_mfg,
11977 include_bank: hg_settings.process_layer.include_bank,
11978 include_audit: hg_settings.process_layer.include_audit,
11979 include_r2r: hg_settings.process_layer.include_r2r,
11980 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
11981 docs_per_counterparty_threshold: hg_settings
11982 .process_layer
11983 .docs_per_counterparty_threshold,
11984 include_accounts: hg_settings.accounting_layer.include_accounts,
11985 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
11986 include_cross_layer_edges: hg_settings.cross_layer.enabled,
11987 include_compliance: self.config.compliance_regulations.enabled,
11988 include_tax: true,
11989 include_treasury: true,
11990 include_esg: true,
11991 include_project: true,
11992 include_intercompany: true,
11993 include_temporal_events: true,
11994 };
11995
11996 let mut builder = HypergraphBuilder::new(builder_config);
11997
11998 builder.add_coso_framework();
12000
12001 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12004 let controls = InternalControl::standard_controls();
12005 builder.add_controls(&controls);
12006 }
12007
12008 builder.add_vendors(&self.master_data.vendors);
12010 builder.add_customers(&self.master_data.customers);
12011 builder.add_employees(&self.master_data.employees);
12012
12013 builder.add_p2p_documents(
12015 &document_flows.purchase_orders,
12016 &document_flows.goods_receipts,
12017 &document_flows.vendor_invoices,
12018 &document_flows.payments,
12019 );
12020 builder.add_o2c_documents(
12021 &document_flows.sales_orders,
12022 &document_flows.deliveries,
12023 &document_flows.customer_invoices,
12024 );
12025 builder.add_s2c_documents(
12026 &sourcing.sourcing_projects,
12027 &sourcing.qualifications,
12028 &sourcing.rfx_events,
12029 &sourcing.bids,
12030 &sourcing.bid_evaluations,
12031 &sourcing.contracts,
12032 );
12033 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12034 builder.add_mfg_documents(
12035 &manufacturing.production_orders,
12036 &manufacturing.quality_inspections,
12037 &manufacturing.cycle_counts,
12038 );
12039 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12040 builder.add_audit_documents(
12041 &audit.engagements,
12042 &audit.workpapers,
12043 &audit.findings,
12044 &audit.evidence,
12045 &audit.risk_assessments,
12046 &audit.judgments,
12047 &audit.materiality_calculations,
12048 &audit.audit_opinions,
12049 &audit.going_concern_assessments,
12050 );
12051 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12052
12053 if let Some(ref event_log) = ocpm.event_log {
12055 builder.add_ocpm_events(event_log);
12056 }
12057
12058 if self.config.compliance_regulations.enabled
12060 && hg_settings.governance_layer.include_controls
12061 {
12062 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12064 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12065 .standard_records
12066 .iter()
12067 .filter_map(|r| {
12068 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12069 registry.get(&sid).cloned()
12070 })
12071 .collect();
12072
12073 builder.add_compliance_regulations(
12074 &standards,
12075 &compliance.findings,
12076 &compliance.filings,
12077 );
12078 }
12079
12080 builder.add_accounts(coa);
12082 builder.add_journal_entries_as_hyperedges(entries);
12083
12084 let hypergraph = builder.build();
12086
12087 let output_dir = self
12089 .output_path
12090 .clone()
12091 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12092 let hg_dir = output_dir
12093 .join(&self.config.graph_export.output_subdirectory)
12094 .join(&hg_settings.output_subdirectory);
12095
12096 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12098 "unified" => {
12099 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12100 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12101 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12102 })?;
12103 (
12104 metadata.num_nodes,
12105 metadata.num_edges,
12106 metadata.num_hyperedges,
12107 )
12108 }
12109 _ => {
12110 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12112 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12113 SynthError::generation(format!("Hypergraph export failed: {e}"))
12114 })?;
12115 (
12116 metadata.num_nodes,
12117 metadata.num_edges,
12118 metadata.num_hyperedges,
12119 )
12120 }
12121 };
12122
12123 #[cfg(feature = "streaming")]
12125 if let Some(ref target_url) = hg_settings.stream_target {
12126 use crate::stream_client::{StreamClient, StreamConfig};
12127 use std::io::Write as _;
12128
12129 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12130 let stream_config = StreamConfig {
12131 target_url: target_url.clone(),
12132 batch_size: hg_settings.stream_batch_size,
12133 api_key,
12134 ..StreamConfig::default()
12135 };
12136
12137 match StreamClient::new(stream_config) {
12138 Ok(mut client) => {
12139 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12140 match exporter.export_to_writer(&hypergraph, &mut client) {
12141 Ok(_) => {
12142 if let Err(e) = client.flush() {
12143 warn!("Failed to flush stream client: {}", e);
12144 } else {
12145 info!("Streamed {} records to {}", client.total_sent(), target_url);
12146 }
12147 }
12148 Err(e) => {
12149 warn!("Streaming export failed: {}", e);
12150 }
12151 }
12152 }
12153 Err(e) => {
12154 warn!("Failed to create stream client: {}", e);
12155 }
12156 }
12157 }
12158
12159 stats.graph_node_count += num_nodes;
12161 stats.graph_edge_count += num_edges;
12162 stats.graph_export_count += 1;
12163
12164 Ok(HypergraphExportInfo {
12165 node_count: num_nodes,
12166 edge_count: num_edges,
12167 hyperedge_count: num_hyperedges,
12168 output_path: hg_dir,
12169 })
12170 }
12171
12172 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12177 let pb = self.create_progress_bar(100, "Generating Banking Data");
12178
12179 let orchestrator = BankingOrchestratorBuilder::new()
12181 .config(self.config.banking.clone())
12182 .seed(self.seed + 9000)
12183 .country_pack(self.primary_pack().clone())
12184 .build();
12185
12186 if let Some(pb) = &pb {
12187 pb.inc(10);
12188 }
12189
12190 let result = orchestrator.generate();
12192
12193 if let Some(pb) = &pb {
12194 pb.inc(90);
12195 pb.finish_with_message(format!(
12196 "Banking: {} customers, {} transactions",
12197 result.customers.len(),
12198 result.transactions.len()
12199 ));
12200 }
12201
12202 let mut banking_customers = result.customers;
12207 let core_customers = &self.master_data.customers;
12208 if !core_customers.is_empty() {
12209 for (i, bc) in banking_customers.iter_mut().enumerate() {
12210 let core = &core_customers[i % core_customers.len()];
12211 bc.name = CustomerName::business(&core.name);
12212 bc.residence_country = core.country.clone();
12213 bc.enterprise_customer_id = Some(core.customer_id.clone());
12214 }
12215 debug!(
12216 "Cross-referenced {} banking customers with {} core customers",
12217 banking_customers.len(),
12218 core_customers.len()
12219 );
12220 }
12221
12222 Ok(BankingSnapshot {
12223 customers: banking_customers,
12224 accounts: result.accounts,
12225 transactions: result.transactions,
12226 transaction_labels: result.transaction_labels,
12227 customer_labels: result.customer_labels,
12228 account_labels: result.account_labels,
12229 relationship_labels: result.relationship_labels,
12230 narratives: result.narratives,
12231 suspicious_count: result.stats.suspicious_count,
12232 scenario_count: result.scenarios.len(),
12233 })
12234 }
12235
12236 fn calculate_total_transactions(&self) -> u64 {
12238 let months = self.config.global.period_months as f64;
12239 self.config
12240 .companies
12241 .iter()
12242 .map(|c| {
12243 let annual = c.annual_transaction_volume.count() as f64;
12244 let weighted = annual * c.volume_weight;
12245 (weighted * months / 12.0) as u64
12246 })
12247 .sum()
12248 }
12249
12250 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12252 if !self.phase_config.show_progress {
12253 return None;
12254 }
12255
12256 let pb = if let Some(mp) = &self.multi_progress {
12257 mp.add(ProgressBar::new(total))
12258 } else {
12259 ProgressBar::new(total)
12260 };
12261
12262 pb.set_style(
12263 ProgressStyle::default_bar()
12264 .template(&format!(
12265 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12266 ))
12267 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12268 .progress_chars("#>-"),
12269 );
12270
12271 Some(pb)
12272 }
12273
12274 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12276 self.coa.clone()
12277 }
12278
12279 pub fn get_master_data(&self) -> &MasterDataSnapshot {
12281 &self.master_data
12282 }
12283
12284 fn phase_compliance_regulations(
12286 &mut self,
12287 _stats: &mut EnhancedGenerationStatistics,
12288 ) -> SynthResult<ComplianceRegulationsSnapshot> {
12289 if !self.phase_config.generate_compliance_regulations {
12290 return Ok(ComplianceRegulationsSnapshot::default());
12291 }
12292
12293 info!("Phase: Generating Compliance Regulations Data");
12294
12295 let cr_config = &self.config.compliance_regulations;
12296
12297 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12299 self.config
12300 .companies
12301 .iter()
12302 .map(|c| c.country.clone())
12303 .collect::<std::collections::HashSet<_>>()
12304 .into_iter()
12305 .collect()
12306 } else {
12307 cr_config.jurisdictions.clone()
12308 };
12309
12310 let fallback_date =
12312 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12313 let reference_date = cr_config
12314 .reference_date
12315 .as_ref()
12316 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12317 .unwrap_or_else(|| {
12318 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12319 .unwrap_or(fallback_date)
12320 });
12321
12322 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12324 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12325 let cross_reference_records = reg_gen.generate_cross_reference_records();
12326 let jurisdiction_records =
12327 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12328
12329 info!(
12330 " Standards: {} records, {} cross-references, {} jurisdictions",
12331 standard_records.len(),
12332 cross_reference_records.len(),
12333 jurisdiction_records.len()
12334 );
12335
12336 let audit_procedures = if cr_config.audit_procedures.enabled {
12338 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12339 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12340 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12341 confidence_level: cr_config.audit_procedures.confidence_level,
12342 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12343 };
12344 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12345 self.seed + 9000,
12346 proc_config,
12347 );
12348 let registry = reg_gen.registry();
12349 let mut all_procs = Vec::new();
12350 for jurisdiction in &jurisdictions {
12351 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
12352 all_procs.extend(procs);
12353 }
12354 info!(" Audit procedures: {}", all_procs.len());
12355 all_procs
12356 } else {
12357 Vec::new()
12358 };
12359
12360 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
12362 let finding_config =
12363 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
12364 finding_rate: cr_config.findings.finding_rate,
12365 material_weakness_rate: cr_config.findings.material_weakness_rate,
12366 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
12367 generate_remediation: cr_config.findings.generate_remediation,
12368 };
12369 let mut finding_gen =
12370 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
12371 self.seed + 9100,
12372 finding_config,
12373 );
12374 let mut all_findings = Vec::new();
12375 for company in &self.config.companies {
12376 let company_findings =
12377 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
12378 all_findings.extend(company_findings);
12379 }
12380 info!(" Compliance findings: {}", all_findings.len());
12381 all_findings
12382 } else {
12383 Vec::new()
12384 };
12385
12386 let filings = if cr_config.filings.enabled {
12388 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
12389 filing_types: cr_config.filings.filing_types.clone(),
12390 generate_status_progression: cr_config.filings.generate_status_progression,
12391 };
12392 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
12393 self.seed + 9200,
12394 filing_config,
12395 );
12396 let company_codes: Vec<String> = self
12397 .config
12398 .companies
12399 .iter()
12400 .map(|c| c.code.clone())
12401 .collect();
12402 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12403 .unwrap_or(fallback_date);
12404 let filings = filing_gen.generate_filings(
12405 &company_codes,
12406 &jurisdictions,
12407 start_date,
12408 self.config.global.period_months,
12409 );
12410 info!(" Regulatory filings: {}", filings.len());
12411 filings
12412 } else {
12413 Vec::new()
12414 };
12415
12416 let compliance_graph = if cr_config.graph.enabled {
12418 let graph_config = datasynth_graph::ComplianceGraphConfig {
12419 include_standard_nodes: cr_config.graph.include_compliance_nodes,
12420 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
12421 include_cross_references: cr_config.graph.include_cross_references,
12422 include_supersession_edges: cr_config.graph.include_supersession_edges,
12423 include_account_links: cr_config.graph.include_account_links,
12424 include_control_links: cr_config.graph.include_control_links,
12425 include_company_links: cr_config.graph.include_company_links,
12426 };
12427 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
12428
12429 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
12431 .iter()
12432 .map(|r| datasynth_graph::StandardNodeInput {
12433 standard_id: r.standard_id.clone(),
12434 title: r.title.clone(),
12435 category: r.category.clone(),
12436 domain: r.domain.clone(),
12437 is_active: r.is_active,
12438 features: vec![if r.is_active { 1.0 } else { 0.0 }],
12439 applicable_account_types: r.applicable_account_types.clone(),
12440 applicable_processes: r.applicable_processes.clone(),
12441 })
12442 .collect();
12443 builder.add_standards(&standard_inputs);
12444
12445 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
12447 jurisdiction_records
12448 .iter()
12449 .map(|r| datasynth_graph::JurisdictionNodeInput {
12450 country_code: r.country_code.clone(),
12451 country_name: r.country_name.clone(),
12452 framework: r.accounting_framework.clone(),
12453 standard_count: r.standard_count,
12454 tax_rate: r.statutory_tax_rate,
12455 })
12456 .collect();
12457 builder.add_jurisdictions(&jurisdiction_inputs);
12458
12459 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
12461 cross_reference_records
12462 .iter()
12463 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
12464 from_standard: r.from_standard.clone(),
12465 to_standard: r.to_standard.clone(),
12466 relationship: r.relationship.clone(),
12467 convergence_level: r.convergence_level,
12468 })
12469 .collect();
12470 builder.add_cross_references(&xref_inputs);
12471
12472 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
12474 .iter()
12475 .map(|r| datasynth_graph::JurisdictionMappingInput {
12476 country_code: r.jurisdiction.clone(),
12477 standard_id: r.standard_id.clone(),
12478 })
12479 .collect();
12480 builder.add_jurisdiction_mappings(&mapping_inputs);
12481
12482 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
12484 .iter()
12485 .map(|p| datasynth_graph::ProcedureNodeInput {
12486 procedure_id: p.procedure_id.clone(),
12487 standard_id: p.standard_id.clone(),
12488 procedure_type: p.procedure_type.clone(),
12489 sample_size: p.sample_size,
12490 confidence_level: p.confidence_level,
12491 })
12492 .collect();
12493 builder.add_procedures(&proc_inputs);
12494
12495 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
12497 .iter()
12498 .map(|f| datasynth_graph::FindingNodeInput {
12499 finding_id: f.finding_id.to_string(),
12500 standard_id: f
12501 .related_standards
12502 .first()
12503 .map(|s| s.as_str().to_string())
12504 .unwrap_or_default(),
12505 severity: f.severity.to_string(),
12506 deficiency_level: f.deficiency_level.to_string(),
12507 severity_score: f.deficiency_level.severity_score(),
12508 control_id: f.control_id.clone(),
12509 affected_accounts: f.affected_accounts.clone(),
12510 })
12511 .collect();
12512 builder.add_findings(&finding_inputs);
12513
12514 if cr_config.graph.include_account_links {
12516 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12517 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
12518 for std_record in &standard_records {
12519 if let Some(std_obj) =
12520 registry.get(&datasynth_core::models::compliance::StandardId::parse(
12521 &std_record.standard_id,
12522 ))
12523 {
12524 for acct_type in &std_obj.applicable_account_types {
12525 account_links.push(datasynth_graph::AccountLinkInput {
12526 standard_id: std_record.standard_id.clone(),
12527 account_code: acct_type.clone(),
12528 account_name: acct_type.clone(),
12529 });
12530 }
12531 }
12532 }
12533 builder.add_account_links(&account_links);
12534 }
12535
12536 if cr_config.graph.include_control_links {
12538 let mut control_links = Vec::new();
12539 let sox_like_ids: Vec<String> = standard_records
12541 .iter()
12542 .filter(|r| {
12543 r.standard_id.starts_with("SOX")
12544 || r.standard_id.starts_with("PCAOB-AS-2201")
12545 })
12546 .map(|r| r.standard_id.clone())
12547 .collect();
12548 let control_ids = [
12550 ("C001", "Cash Controls"),
12551 ("C002", "Large Transaction Approval"),
12552 ("C010", "PO Approval"),
12553 ("C011", "Three-Way Match"),
12554 ("C020", "Revenue Recognition"),
12555 ("C021", "Credit Check"),
12556 ("C030", "Manual JE Approval"),
12557 ("C031", "Period Close Review"),
12558 ("C032", "Account Reconciliation"),
12559 ("C040", "Payroll Processing"),
12560 ("C050", "Fixed Asset Capitalization"),
12561 ("C060", "Intercompany Elimination"),
12562 ];
12563 for sox_id in &sox_like_ids {
12564 for (ctrl_id, ctrl_name) in &control_ids {
12565 control_links.push(datasynth_graph::ControlLinkInput {
12566 standard_id: sox_id.clone(),
12567 control_id: ctrl_id.to_string(),
12568 control_name: ctrl_name.to_string(),
12569 });
12570 }
12571 }
12572 builder.add_control_links(&control_links);
12573 }
12574
12575 if cr_config.graph.include_company_links {
12577 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
12578 .iter()
12579 .enumerate()
12580 .map(|(i, f)| datasynth_graph::FilingNodeInput {
12581 filing_id: format!("F{:04}", i + 1),
12582 filing_type: f.filing_type.to_string(),
12583 company_code: f.company_code.clone(),
12584 jurisdiction: f.jurisdiction.clone(),
12585 status: format!("{:?}", f.status),
12586 })
12587 .collect();
12588 builder.add_filings(&filing_inputs);
12589 }
12590
12591 let graph = builder.build();
12592 info!(
12593 " Compliance graph: {} nodes, {} edges",
12594 graph.nodes.len(),
12595 graph.edges.len()
12596 );
12597 Some(graph)
12598 } else {
12599 None
12600 };
12601
12602 self.check_resources_with_log("post-compliance-regulations")?;
12603
12604 Ok(ComplianceRegulationsSnapshot {
12605 standard_records,
12606 cross_reference_records,
12607 jurisdiction_records,
12608 audit_procedures,
12609 findings,
12610 filings,
12611 compliance_graph,
12612 })
12613 }
12614
12615 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
12617 use super::lineage::LineageGraphBuilder;
12618
12619 let mut builder = LineageGraphBuilder::new();
12620
12621 builder.add_config_section("config:global", "Global Config");
12623 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
12624 builder.add_config_section("config:transactions", "Transaction Config");
12625
12626 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
12628 builder.add_generator_phase("phase:je", "Journal Entry Generation");
12629
12630 builder.configured_by("phase:coa", "config:chart_of_accounts");
12632 builder.configured_by("phase:je", "config:transactions");
12633
12634 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
12636 builder.produced_by("output:je", "phase:je");
12637
12638 if self.phase_config.generate_master_data {
12640 builder.add_config_section("config:master_data", "Master Data Config");
12641 builder.add_generator_phase("phase:master_data", "Master Data Generation");
12642 builder.configured_by("phase:master_data", "config:master_data");
12643 builder.input_to("phase:master_data", "phase:je");
12644 }
12645
12646 if self.phase_config.generate_document_flows {
12647 builder.add_config_section("config:document_flows", "Document Flow Config");
12648 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
12649 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
12650 builder.configured_by("phase:p2p", "config:document_flows");
12651 builder.configured_by("phase:o2c", "config:document_flows");
12652
12653 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
12654 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
12655 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
12656 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
12657 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
12658
12659 builder.produced_by("output:po", "phase:p2p");
12660 builder.produced_by("output:gr", "phase:p2p");
12661 builder.produced_by("output:vi", "phase:p2p");
12662 builder.produced_by("output:so", "phase:o2c");
12663 builder.produced_by("output:ci", "phase:o2c");
12664 }
12665
12666 if self.phase_config.inject_anomalies {
12667 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
12668 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
12669 builder.configured_by("phase:anomaly", "config:fraud");
12670 builder.add_output_file(
12671 "output:labels",
12672 "Anomaly Labels",
12673 "labels/anomaly_labels.csv",
12674 );
12675 builder.produced_by("output:labels", "phase:anomaly");
12676 }
12677
12678 if self.phase_config.generate_audit {
12679 builder.add_config_section("config:audit", "Audit Config");
12680 builder.add_generator_phase("phase:audit", "Audit Data Generation");
12681 builder.configured_by("phase:audit", "config:audit");
12682 }
12683
12684 if self.phase_config.generate_banking {
12685 builder.add_config_section("config:banking", "Banking Config");
12686 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
12687 builder.configured_by("phase:banking", "config:banking");
12688 }
12689
12690 if self.config.llm.enabled {
12691 builder.add_config_section("config:llm", "LLM Enrichment Config");
12692 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
12693 builder.configured_by("phase:llm_enrichment", "config:llm");
12694 }
12695
12696 if self.config.diffusion.enabled {
12697 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
12698 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
12699 builder.configured_by("phase:diffusion", "config:diffusion");
12700 }
12701
12702 if self.config.causal.enabled {
12703 builder.add_config_section("config:causal", "Causal Generation Config");
12704 builder.add_generator_phase("phase:causal", "Causal Overlay");
12705 builder.configured_by("phase:causal", "config:causal");
12706 }
12707
12708 builder.build()
12709 }
12710
12711 fn compute_company_revenue(
12720 entries: &[JournalEntry],
12721 company_code: &str,
12722 ) -> rust_decimal::Decimal {
12723 use rust_decimal::Decimal;
12724 let mut revenue = Decimal::ZERO;
12725 for je in entries {
12726 if je.header.company_code != company_code {
12727 continue;
12728 }
12729 for line in &je.lines {
12730 if line.gl_account.starts_with('4') {
12731 revenue += line.credit_amount - line.debit_amount;
12733 }
12734 }
12735 }
12736 revenue.max(Decimal::ZERO)
12737 }
12738
12739 fn compute_entity_net_assets(
12743 entries: &[JournalEntry],
12744 entity_code: &str,
12745 ) -> rust_decimal::Decimal {
12746 use rust_decimal::Decimal;
12747 let mut asset_net = Decimal::ZERO;
12748 let mut liability_net = Decimal::ZERO;
12749 for je in entries {
12750 if je.header.company_code != entity_code {
12751 continue;
12752 }
12753 for line in &je.lines {
12754 if line.gl_account.starts_with('1') {
12755 asset_net += line.debit_amount - line.credit_amount;
12756 } else if line.gl_account.starts_with('2') {
12757 liability_net += line.credit_amount - line.debit_amount;
12758 }
12759 }
12760 }
12761 asset_net - liability_net
12762 }
12763}
12764
12765fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
12767 match format {
12768 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
12769 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
12770 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
12771 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
12772 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
12773 }
12774}
12775
12776fn compute_trial_balance_entries(
12781 entries: &[JournalEntry],
12782 entity_code: &str,
12783 fiscal_year: i32,
12784 coa: Option<&ChartOfAccounts>,
12785) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
12786 use std::collections::BTreeMap;
12787
12788 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
12789 BTreeMap::new();
12790
12791 for je in entries {
12792 for line in &je.lines {
12793 let entry = balances.entry(line.account_code.clone()).or_default();
12794 entry.0 += line.debit_amount;
12795 entry.1 += line.credit_amount;
12796 }
12797 }
12798
12799 balances
12800 .into_iter()
12801 .map(
12802 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
12803 account_description: coa
12804 .and_then(|c| c.get_account(&account_code))
12805 .map(|a| a.description().to_string())
12806 .unwrap_or_else(|| account_code.clone()),
12807 account_code,
12808 debit_balance: debit,
12809 credit_balance: credit,
12810 net_balance: debit - credit,
12811 entity_code: entity_code.to_string(),
12812 period: format!("FY{}", fiscal_year),
12813 },
12814 )
12815 .collect()
12816}
12817
12818#[cfg(test)]
12819#[allow(clippy::unwrap_used)]
12820mod tests {
12821 use super::*;
12822 use datasynth_config::schema::*;
12823
12824 fn create_test_config() -> GeneratorConfig {
12825 GeneratorConfig {
12826 global: GlobalConfig {
12827 industry: IndustrySector::Manufacturing,
12828 start_date: "2024-01-01".to_string(),
12829 period_months: 1,
12830 seed: Some(42),
12831 parallel: false,
12832 group_currency: "USD".to_string(),
12833 presentation_currency: None,
12834 worker_threads: 0,
12835 memory_limit_mb: 0,
12836 fiscal_year_months: None,
12837 },
12838 companies: vec![CompanyConfig {
12839 code: "1000".to_string(),
12840 name: "Test Company".to_string(),
12841 currency: "USD".to_string(),
12842 functional_currency: None,
12843 country: "US".to_string(),
12844 annual_transaction_volume: TransactionVolume::TenK,
12845 volume_weight: 1.0,
12846 fiscal_year_variant: "K4".to_string(),
12847 }],
12848 chart_of_accounts: ChartOfAccountsConfig {
12849 complexity: CoAComplexity::Small,
12850 industry_specific: true,
12851 custom_accounts: None,
12852 min_hierarchy_depth: 2,
12853 max_hierarchy_depth: 4,
12854 },
12855 transactions: TransactionConfig::default(),
12856 output: OutputConfig::default(),
12857 fraud: FraudConfig::default(),
12858 internal_controls: InternalControlsConfig::default(),
12859 business_processes: BusinessProcessConfig::default(),
12860 user_personas: UserPersonaConfig::default(),
12861 templates: TemplateConfig::default(),
12862 approval: ApprovalConfig::default(),
12863 departments: DepartmentConfig::default(),
12864 master_data: MasterDataConfig::default(),
12865 document_flows: DocumentFlowConfig::default(),
12866 intercompany: IntercompanyConfig::default(),
12867 balance: BalanceConfig::default(),
12868 ocpm: OcpmConfig::default(),
12869 audit: AuditGenerationConfig::default(),
12870 banking: datasynth_banking::BankingConfig::default(),
12871 data_quality: DataQualitySchemaConfig::default(),
12872 scenario: ScenarioConfig::default(),
12873 temporal: TemporalDriftConfig::default(),
12874 graph_export: GraphExportConfig::default(),
12875 streaming: StreamingSchemaConfig::default(),
12876 rate_limit: RateLimitSchemaConfig::default(),
12877 temporal_attributes: TemporalAttributeSchemaConfig::default(),
12878 relationships: RelationshipSchemaConfig::default(),
12879 accounting_standards: AccountingStandardsConfig::default(),
12880 audit_standards: AuditStandardsConfig::default(),
12881 distributions: Default::default(),
12882 temporal_patterns: Default::default(),
12883 vendor_network: VendorNetworkSchemaConfig::default(),
12884 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
12885 relationship_strength: RelationshipStrengthSchemaConfig::default(),
12886 cross_process_links: CrossProcessLinksSchemaConfig::default(),
12887 organizational_events: OrganizationalEventsSchemaConfig::default(),
12888 behavioral_drift: BehavioralDriftSchemaConfig::default(),
12889 market_drift: MarketDriftSchemaConfig::default(),
12890 drift_labeling: DriftLabelingSchemaConfig::default(),
12891 anomaly_injection: Default::default(),
12892 industry_specific: Default::default(),
12893 fingerprint_privacy: Default::default(),
12894 quality_gates: Default::default(),
12895 compliance: Default::default(),
12896 webhooks: Default::default(),
12897 llm: Default::default(),
12898 diffusion: Default::default(),
12899 causal: Default::default(),
12900 source_to_pay: Default::default(),
12901 financial_reporting: Default::default(),
12902 hr: Default::default(),
12903 manufacturing: Default::default(),
12904 sales_quotes: Default::default(),
12905 tax: Default::default(),
12906 treasury: Default::default(),
12907 project_accounting: Default::default(),
12908 esg: Default::default(),
12909 country_packs: None,
12910 scenarios: Default::default(),
12911 session: Default::default(),
12912 compliance_regulations: Default::default(),
12913 }
12914 }
12915
12916 #[test]
12917 fn test_enhanced_orchestrator_creation() {
12918 let config = create_test_config();
12919 let orchestrator = EnhancedOrchestrator::with_defaults(config);
12920 assert!(orchestrator.is_ok());
12921 }
12922
12923 #[test]
12924 fn test_minimal_generation() {
12925 let config = create_test_config();
12926 let phase_config = PhaseConfig {
12927 generate_master_data: false,
12928 generate_document_flows: false,
12929 generate_journal_entries: true,
12930 inject_anomalies: false,
12931 show_progress: false,
12932 ..Default::default()
12933 };
12934
12935 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12936 let result = orchestrator.generate();
12937
12938 assert!(result.is_ok());
12939 let result = result.unwrap();
12940 assert!(!result.journal_entries.is_empty());
12941 }
12942
12943 #[test]
12944 fn test_master_data_generation() {
12945 let config = create_test_config();
12946 let phase_config = PhaseConfig {
12947 generate_master_data: true,
12948 generate_document_flows: false,
12949 generate_journal_entries: false,
12950 inject_anomalies: false,
12951 show_progress: false,
12952 vendors_per_company: 5,
12953 customers_per_company: 5,
12954 materials_per_company: 10,
12955 assets_per_company: 5,
12956 employees_per_company: 10,
12957 ..Default::default()
12958 };
12959
12960 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12961 let result = orchestrator.generate().unwrap();
12962
12963 assert!(!result.master_data.vendors.is_empty());
12964 assert!(!result.master_data.customers.is_empty());
12965 assert!(!result.master_data.materials.is_empty());
12966 }
12967
12968 #[test]
12969 fn test_document_flow_generation() {
12970 let config = create_test_config();
12971 let phase_config = PhaseConfig {
12972 generate_master_data: true,
12973 generate_document_flows: true,
12974 generate_journal_entries: false,
12975 inject_anomalies: false,
12976 inject_data_quality: false,
12977 validate_balances: false,
12978 generate_ocpm_events: false,
12979 show_progress: false,
12980 vendors_per_company: 5,
12981 customers_per_company: 5,
12982 materials_per_company: 10,
12983 assets_per_company: 5,
12984 employees_per_company: 10,
12985 p2p_chains: 5,
12986 o2c_chains: 5,
12987 ..Default::default()
12988 };
12989
12990 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12991 let result = orchestrator.generate().unwrap();
12992
12993 assert!(!result.document_flows.p2p_chains.is_empty());
12995 assert!(!result.document_flows.o2c_chains.is_empty());
12996
12997 assert!(!result.document_flows.purchase_orders.is_empty());
12999 assert!(!result.document_flows.sales_orders.is_empty());
13000 }
13001
13002 #[test]
13003 fn test_anomaly_injection() {
13004 let config = create_test_config();
13005 let phase_config = PhaseConfig {
13006 generate_master_data: false,
13007 generate_document_flows: false,
13008 generate_journal_entries: true,
13009 inject_anomalies: true,
13010 show_progress: false,
13011 ..Default::default()
13012 };
13013
13014 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13015 let result = orchestrator.generate().unwrap();
13016
13017 assert!(!result.journal_entries.is_empty());
13019
13020 assert!(result.anomaly_labels.summary.is_some());
13023 }
13024
13025 #[test]
13026 fn test_full_generation_pipeline() {
13027 let config = create_test_config();
13028 let phase_config = PhaseConfig {
13029 generate_master_data: true,
13030 generate_document_flows: true,
13031 generate_journal_entries: true,
13032 inject_anomalies: false,
13033 inject_data_quality: false,
13034 validate_balances: true,
13035 generate_ocpm_events: false,
13036 show_progress: false,
13037 vendors_per_company: 3,
13038 customers_per_company: 3,
13039 materials_per_company: 5,
13040 assets_per_company: 3,
13041 employees_per_company: 5,
13042 p2p_chains: 3,
13043 o2c_chains: 3,
13044 ..Default::default()
13045 };
13046
13047 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13048 let result = orchestrator.generate().unwrap();
13049
13050 assert!(!result.master_data.vendors.is_empty());
13052 assert!(!result.master_data.customers.is_empty());
13053 assert!(!result.document_flows.p2p_chains.is_empty());
13054 assert!(!result.document_flows.o2c_chains.is_empty());
13055 assert!(!result.journal_entries.is_empty());
13056 assert!(result.statistics.accounts_count > 0);
13057
13058 assert!(!result.subledger.ap_invoices.is_empty());
13060 assert!(!result.subledger.ar_invoices.is_empty());
13061
13062 assert!(result.balance_validation.validated);
13064 assert!(result.balance_validation.entries_processed > 0);
13065 }
13066
13067 #[test]
13068 fn test_subledger_linking() {
13069 let config = create_test_config();
13070 let phase_config = PhaseConfig {
13071 generate_master_data: true,
13072 generate_document_flows: true,
13073 generate_journal_entries: false,
13074 inject_anomalies: false,
13075 inject_data_quality: false,
13076 validate_balances: false,
13077 generate_ocpm_events: false,
13078 show_progress: false,
13079 vendors_per_company: 5,
13080 customers_per_company: 5,
13081 materials_per_company: 10,
13082 assets_per_company: 3,
13083 employees_per_company: 5,
13084 p2p_chains: 5,
13085 o2c_chains: 5,
13086 ..Default::default()
13087 };
13088
13089 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13090 let result = orchestrator.generate().unwrap();
13091
13092 assert!(!result.document_flows.vendor_invoices.is_empty());
13094 assert!(!result.document_flows.customer_invoices.is_empty());
13095
13096 assert!(!result.subledger.ap_invoices.is_empty());
13098 assert!(!result.subledger.ar_invoices.is_empty());
13099
13100 assert_eq!(
13102 result.subledger.ap_invoices.len(),
13103 result.document_flows.vendor_invoices.len()
13104 );
13105
13106 assert_eq!(
13108 result.subledger.ar_invoices.len(),
13109 result.document_flows.customer_invoices.len()
13110 );
13111
13112 assert_eq!(
13114 result.statistics.ap_invoice_count,
13115 result.subledger.ap_invoices.len()
13116 );
13117 assert_eq!(
13118 result.statistics.ar_invoice_count,
13119 result.subledger.ar_invoices.len()
13120 );
13121 }
13122
13123 #[test]
13124 fn test_balance_validation() {
13125 let config = create_test_config();
13126 let phase_config = PhaseConfig {
13127 generate_master_data: false,
13128 generate_document_flows: false,
13129 generate_journal_entries: true,
13130 inject_anomalies: false,
13131 validate_balances: true,
13132 show_progress: false,
13133 ..Default::default()
13134 };
13135
13136 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13137 let result = orchestrator.generate().unwrap();
13138
13139 assert!(result.balance_validation.validated);
13141 assert!(result.balance_validation.entries_processed > 0);
13142
13143 assert!(!result.balance_validation.has_unbalanced_entries);
13145
13146 assert_eq!(
13148 result.balance_validation.total_debits,
13149 result.balance_validation.total_credits
13150 );
13151 }
13152
13153 #[test]
13154 fn test_statistics_accuracy() {
13155 let config = create_test_config();
13156 let phase_config = PhaseConfig {
13157 generate_master_data: true,
13158 generate_document_flows: false,
13159 generate_journal_entries: true,
13160 inject_anomalies: false,
13161 show_progress: false,
13162 vendors_per_company: 10,
13163 customers_per_company: 20,
13164 materials_per_company: 15,
13165 assets_per_company: 5,
13166 employees_per_company: 8,
13167 ..Default::default()
13168 };
13169
13170 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13171 let result = orchestrator.generate().unwrap();
13172
13173 assert_eq!(
13175 result.statistics.vendor_count,
13176 result.master_data.vendors.len()
13177 );
13178 assert_eq!(
13179 result.statistics.customer_count,
13180 result.master_data.customers.len()
13181 );
13182 assert_eq!(
13183 result.statistics.material_count,
13184 result.master_data.materials.len()
13185 );
13186 assert_eq!(
13187 result.statistics.total_entries as usize,
13188 result.journal_entries.len()
13189 );
13190 }
13191
13192 #[test]
13193 fn test_phase_config_defaults() {
13194 let config = PhaseConfig::default();
13195 assert!(config.generate_master_data);
13196 assert!(config.generate_document_flows);
13197 assert!(config.generate_journal_entries);
13198 assert!(!config.inject_anomalies);
13199 assert!(config.validate_balances);
13200 assert!(config.show_progress);
13201 assert!(config.vendors_per_company > 0);
13202 assert!(config.customers_per_company > 0);
13203 }
13204
13205 #[test]
13206 fn test_get_coa_before_generation() {
13207 let config = create_test_config();
13208 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13209
13210 assert!(orchestrator.get_coa().is_none());
13212 }
13213
13214 #[test]
13215 fn test_get_coa_after_generation() {
13216 let config = create_test_config();
13217 let phase_config = PhaseConfig {
13218 generate_master_data: false,
13219 generate_document_flows: false,
13220 generate_journal_entries: true,
13221 inject_anomalies: false,
13222 show_progress: false,
13223 ..Default::default()
13224 };
13225
13226 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13227 let _ = orchestrator.generate().unwrap();
13228
13229 assert!(orchestrator.get_coa().is_some());
13231 }
13232
13233 #[test]
13234 fn test_get_master_data() {
13235 let config = create_test_config();
13236 let phase_config = PhaseConfig {
13237 generate_master_data: true,
13238 generate_document_flows: false,
13239 generate_journal_entries: false,
13240 inject_anomalies: false,
13241 show_progress: false,
13242 vendors_per_company: 5,
13243 customers_per_company: 5,
13244 materials_per_company: 5,
13245 assets_per_company: 5,
13246 employees_per_company: 5,
13247 ..Default::default()
13248 };
13249
13250 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13251 let result = orchestrator.generate().unwrap();
13252
13253 assert!(!result.master_data.vendors.is_empty());
13255 }
13256
13257 #[test]
13258 fn test_with_progress_builder() {
13259 let config = create_test_config();
13260 let orchestrator = EnhancedOrchestrator::with_defaults(config)
13261 .unwrap()
13262 .with_progress(false);
13263
13264 assert!(!orchestrator.phase_config.show_progress);
13266 }
13267
13268 #[test]
13269 fn test_multi_company_generation() {
13270 let mut config = create_test_config();
13271 config.companies.push(CompanyConfig {
13272 code: "2000".to_string(),
13273 name: "Subsidiary".to_string(),
13274 currency: "EUR".to_string(),
13275 functional_currency: None,
13276 country: "DE".to_string(),
13277 annual_transaction_volume: TransactionVolume::TenK,
13278 volume_weight: 0.5,
13279 fiscal_year_variant: "K4".to_string(),
13280 });
13281
13282 let phase_config = PhaseConfig {
13283 generate_master_data: true,
13284 generate_document_flows: false,
13285 generate_journal_entries: true,
13286 inject_anomalies: false,
13287 show_progress: false,
13288 vendors_per_company: 5,
13289 customers_per_company: 5,
13290 materials_per_company: 5,
13291 assets_per_company: 5,
13292 employees_per_company: 5,
13293 ..Default::default()
13294 };
13295
13296 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13297 let result = orchestrator.generate().unwrap();
13298
13299 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
13302 assert!(result.statistics.companies_count == 2);
13303 }
13304
13305 #[test]
13306 fn test_empty_master_data_skips_document_flows() {
13307 let config = create_test_config();
13308 let phase_config = PhaseConfig {
13309 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
13312 inject_anomalies: false,
13313 show_progress: false,
13314 ..Default::default()
13315 };
13316
13317 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13318 let result = orchestrator.generate().unwrap();
13319
13320 assert!(result.document_flows.p2p_chains.is_empty());
13322 assert!(result.document_flows.o2c_chains.is_empty());
13323 }
13324
13325 #[test]
13326 fn test_journal_entry_line_item_count() {
13327 let config = create_test_config();
13328 let phase_config = PhaseConfig {
13329 generate_master_data: false,
13330 generate_document_flows: false,
13331 generate_journal_entries: true,
13332 inject_anomalies: false,
13333 show_progress: false,
13334 ..Default::default()
13335 };
13336
13337 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13338 let result = orchestrator.generate().unwrap();
13339
13340 let calculated_line_items: u64 = result
13342 .journal_entries
13343 .iter()
13344 .map(|e| e.line_count() as u64)
13345 .sum();
13346 assert_eq!(result.statistics.total_line_items, calculated_line_items);
13347 }
13348
13349 #[test]
13350 fn test_audit_generation() {
13351 let config = create_test_config();
13352 let phase_config = PhaseConfig {
13353 generate_master_data: false,
13354 generate_document_flows: false,
13355 generate_journal_entries: true,
13356 inject_anomalies: false,
13357 show_progress: false,
13358 generate_audit: true,
13359 audit_engagements: 2,
13360 workpapers_per_engagement: 5,
13361 evidence_per_workpaper: 2,
13362 risks_per_engagement: 3,
13363 findings_per_engagement: 2,
13364 judgments_per_engagement: 2,
13365 ..Default::default()
13366 };
13367
13368 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13369 let result = orchestrator.generate().unwrap();
13370
13371 assert_eq!(result.audit.engagements.len(), 2);
13373 assert!(!result.audit.workpapers.is_empty());
13374 assert!(!result.audit.evidence.is_empty());
13375 assert!(!result.audit.risk_assessments.is_empty());
13376 assert!(!result.audit.findings.is_empty());
13377 assert!(!result.audit.judgments.is_empty());
13378
13379 assert!(
13381 !result.audit.confirmations.is_empty(),
13382 "ISA 505 confirmations should be generated"
13383 );
13384 assert!(
13385 !result.audit.confirmation_responses.is_empty(),
13386 "ISA 505 confirmation responses should be generated"
13387 );
13388 assert!(
13389 !result.audit.procedure_steps.is_empty(),
13390 "ISA 330 procedure steps should be generated"
13391 );
13392 assert!(
13394 !result.audit.analytical_results.is_empty(),
13395 "ISA 520 analytical procedures should be generated"
13396 );
13397 assert!(
13398 !result.audit.ia_functions.is_empty(),
13399 "ISA 610 IA functions should be generated (one per engagement)"
13400 );
13401 assert!(
13402 !result.audit.related_parties.is_empty(),
13403 "ISA 550 related parties should be generated"
13404 );
13405
13406 assert_eq!(
13408 result.statistics.audit_engagement_count,
13409 result.audit.engagements.len()
13410 );
13411 assert_eq!(
13412 result.statistics.audit_workpaper_count,
13413 result.audit.workpapers.len()
13414 );
13415 assert_eq!(
13416 result.statistics.audit_evidence_count,
13417 result.audit.evidence.len()
13418 );
13419 assert_eq!(
13420 result.statistics.audit_risk_count,
13421 result.audit.risk_assessments.len()
13422 );
13423 assert_eq!(
13424 result.statistics.audit_finding_count,
13425 result.audit.findings.len()
13426 );
13427 assert_eq!(
13428 result.statistics.audit_judgment_count,
13429 result.audit.judgments.len()
13430 );
13431 assert_eq!(
13432 result.statistics.audit_confirmation_count,
13433 result.audit.confirmations.len()
13434 );
13435 assert_eq!(
13436 result.statistics.audit_confirmation_response_count,
13437 result.audit.confirmation_responses.len()
13438 );
13439 assert_eq!(
13440 result.statistics.audit_procedure_step_count,
13441 result.audit.procedure_steps.len()
13442 );
13443 assert_eq!(
13444 result.statistics.audit_sample_count,
13445 result.audit.samples.len()
13446 );
13447 assert_eq!(
13448 result.statistics.audit_analytical_result_count,
13449 result.audit.analytical_results.len()
13450 );
13451 assert_eq!(
13452 result.statistics.audit_ia_function_count,
13453 result.audit.ia_functions.len()
13454 );
13455 assert_eq!(
13456 result.statistics.audit_ia_report_count,
13457 result.audit.ia_reports.len()
13458 );
13459 assert_eq!(
13460 result.statistics.audit_related_party_count,
13461 result.audit.related_parties.len()
13462 );
13463 assert_eq!(
13464 result.statistics.audit_related_party_transaction_count,
13465 result.audit.related_party_transactions.len()
13466 );
13467 }
13468
13469 #[test]
13470 fn test_new_phases_disabled_by_default() {
13471 let config = create_test_config();
13472 assert!(!config.llm.enabled);
13474 assert!(!config.diffusion.enabled);
13475 assert!(!config.causal.enabled);
13476
13477 let phase_config = PhaseConfig {
13478 generate_master_data: false,
13479 generate_document_flows: false,
13480 generate_journal_entries: true,
13481 inject_anomalies: false,
13482 show_progress: false,
13483 ..Default::default()
13484 };
13485
13486 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13487 let result = orchestrator.generate().unwrap();
13488
13489 assert_eq!(result.statistics.llm_enrichment_ms, 0);
13491 assert_eq!(result.statistics.llm_vendors_enriched, 0);
13492 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
13493 assert_eq!(result.statistics.diffusion_samples_generated, 0);
13494 assert_eq!(result.statistics.causal_generation_ms, 0);
13495 assert_eq!(result.statistics.causal_samples_generated, 0);
13496 assert!(result.statistics.causal_validation_passed.is_none());
13497 assert_eq!(result.statistics.counterfactual_pair_count, 0);
13498 assert!(result.counterfactual_pairs.is_empty());
13499 }
13500
13501 #[test]
13502 fn test_counterfactual_generation_enabled() {
13503 let config = create_test_config();
13504 let phase_config = PhaseConfig {
13505 generate_master_data: false,
13506 generate_document_flows: false,
13507 generate_journal_entries: true,
13508 inject_anomalies: false,
13509 show_progress: false,
13510 generate_counterfactuals: true,
13511 generate_period_close: false, ..Default::default()
13513 };
13514
13515 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13516 let result = orchestrator.generate().unwrap();
13517
13518 if !result.journal_entries.is_empty() {
13520 assert_eq!(
13521 result.counterfactual_pairs.len(),
13522 result.journal_entries.len()
13523 );
13524 assert_eq!(
13525 result.statistics.counterfactual_pair_count,
13526 result.journal_entries.len()
13527 );
13528 let ids: std::collections::HashSet<_> = result
13530 .counterfactual_pairs
13531 .iter()
13532 .map(|p| p.pair_id.clone())
13533 .collect();
13534 assert_eq!(ids.len(), result.counterfactual_pairs.len());
13535 }
13536 }
13537
13538 #[test]
13539 fn test_llm_enrichment_enabled() {
13540 let mut config = create_test_config();
13541 config.llm.enabled = true;
13542 config.llm.max_vendor_enrichments = 3;
13543
13544 let phase_config = PhaseConfig {
13545 generate_master_data: true,
13546 generate_document_flows: false,
13547 generate_journal_entries: false,
13548 inject_anomalies: false,
13549 show_progress: false,
13550 vendors_per_company: 5,
13551 customers_per_company: 3,
13552 materials_per_company: 3,
13553 assets_per_company: 3,
13554 employees_per_company: 3,
13555 ..Default::default()
13556 };
13557
13558 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13559 let result = orchestrator.generate().unwrap();
13560
13561 assert!(result.statistics.llm_vendors_enriched > 0);
13563 assert!(result.statistics.llm_vendors_enriched <= 3);
13564 }
13565
13566 #[test]
13567 fn test_diffusion_enhancement_enabled() {
13568 let mut config = create_test_config();
13569 config.diffusion.enabled = true;
13570 config.diffusion.n_steps = 50;
13571 config.diffusion.sample_size = 20;
13572
13573 let phase_config = PhaseConfig {
13574 generate_master_data: false,
13575 generate_document_flows: false,
13576 generate_journal_entries: true,
13577 inject_anomalies: false,
13578 show_progress: false,
13579 ..Default::default()
13580 };
13581
13582 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13583 let result = orchestrator.generate().unwrap();
13584
13585 assert_eq!(result.statistics.diffusion_samples_generated, 20);
13587 }
13588
13589 #[test]
13590 fn test_causal_overlay_enabled() {
13591 let mut config = create_test_config();
13592 config.causal.enabled = true;
13593 config.causal.template = "fraud_detection".to_string();
13594 config.causal.sample_size = 100;
13595 config.causal.validate = true;
13596
13597 let phase_config = PhaseConfig {
13598 generate_master_data: false,
13599 generate_document_flows: false,
13600 generate_journal_entries: true,
13601 inject_anomalies: false,
13602 show_progress: false,
13603 ..Default::default()
13604 };
13605
13606 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13607 let result = orchestrator.generate().unwrap();
13608
13609 assert_eq!(result.statistics.causal_samples_generated, 100);
13611 assert!(result.statistics.causal_validation_passed.is_some());
13613 }
13614
13615 #[test]
13616 fn test_causal_overlay_revenue_cycle_template() {
13617 let mut config = create_test_config();
13618 config.causal.enabled = true;
13619 config.causal.template = "revenue_cycle".to_string();
13620 config.causal.sample_size = 50;
13621 config.causal.validate = false;
13622
13623 let phase_config = PhaseConfig {
13624 generate_master_data: false,
13625 generate_document_flows: false,
13626 generate_journal_entries: true,
13627 inject_anomalies: false,
13628 show_progress: false,
13629 ..Default::default()
13630 };
13631
13632 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13633 let result = orchestrator.generate().unwrap();
13634
13635 assert_eq!(result.statistics.causal_samples_generated, 50);
13637 assert!(result.statistics.causal_validation_passed.is_none());
13639 }
13640
13641 #[test]
13642 fn test_all_new_phases_enabled_together() {
13643 let mut config = create_test_config();
13644 config.llm.enabled = true;
13645 config.llm.max_vendor_enrichments = 2;
13646 config.diffusion.enabled = true;
13647 config.diffusion.n_steps = 20;
13648 config.diffusion.sample_size = 10;
13649 config.causal.enabled = true;
13650 config.causal.sample_size = 50;
13651 config.causal.validate = true;
13652
13653 let phase_config = PhaseConfig {
13654 generate_master_data: true,
13655 generate_document_flows: false,
13656 generate_journal_entries: true,
13657 inject_anomalies: false,
13658 show_progress: false,
13659 vendors_per_company: 5,
13660 customers_per_company: 3,
13661 materials_per_company: 3,
13662 assets_per_company: 3,
13663 employees_per_company: 3,
13664 ..Default::default()
13665 };
13666
13667 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13668 let result = orchestrator.generate().unwrap();
13669
13670 assert!(result.statistics.llm_vendors_enriched > 0);
13672 assert_eq!(result.statistics.diffusion_samples_generated, 10);
13673 assert_eq!(result.statistics.causal_samples_generated, 50);
13674 assert!(result.statistics.causal_validation_passed.is_some());
13675 }
13676
13677 #[test]
13678 fn test_statistics_serialization_with_new_fields() {
13679 let stats = EnhancedGenerationStatistics {
13680 total_entries: 100,
13681 total_line_items: 500,
13682 llm_enrichment_ms: 42,
13683 llm_vendors_enriched: 10,
13684 diffusion_enhancement_ms: 100,
13685 diffusion_samples_generated: 50,
13686 causal_generation_ms: 200,
13687 causal_samples_generated: 100,
13688 causal_validation_passed: Some(true),
13689 ..Default::default()
13690 };
13691
13692 let json = serde_json::to_string(&stats).unwrap();
13693 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
13694
13695 assert_eq!(deserialized.llm_enrichment_ms, 42);
13696 assert_eq!(deserialized.llm_vendors_enriched, 10);
13697 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
13698 assert_eq!(deserialized.diffusion_samples_generated, 50);
13699 assert_eq!(deserialized.causal_generation_ms, 200);
13700 assert_eq!(deserialized.causal_samples_generated, 100);
13701 assert_eq!(deserialized.causal_validation_passed, Some(true));
13702 }
13703
13704 #[test]
13705 fn test_statistics_backward_compat_deserialization() {
13706 let old_json = r#"{
13708 "total_entries": 100,
13709 "total_line_items": 500,
13710 "accounts_count": 50,
13711 "companies_count": 1,
13712 "period_months": 12,
13713 "vendor_count": 10,
13714 "customer_count": 20,
13715 "material_count": 15,
13716 "asset_count": 5,
13717 "employee_count": 8,
13718 "p2p_chain_count": 5,
13719 "o2c_chain_count": 5,
13720 "ap_invoice_count": 5,
13721 "ar_invoice_count": 5,
13722 "ocpm_event_count": 0,
13723 "ocpm_object_count": 0,
13724 "ocpm_case_count": 0,
13725 "audit_engagement_count": 0,
13726 "audit_workpaper_count": 0,
13727 "audit_evidence_count": 0,
13728 "audit_risk_count": 0,
13729 "audit_finding_count": 0,
13730 "audit_judgment_count": 0,
13731 "anomalies_injected": 0,
13732 "data_quality_issues": 0,
13733 "banking_customer_count": 0,
13734 "banking_account_count": 0,
13735 "banking_transaction_count": 0,
13736 "banking_suspicious_count": 0,
13737 "graph_export_count": 0,
13738 "graph_node_count": 0,
13739 "graph_edge_count": 0
13740 }"#;
13741
13742 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
13743
13744 assert_eq!(stats.llm_enrichment_ms, 0);
13746 assert_eq!(stats.llm_vendors_enriched, 0);
13747 assert_eq!(stats.diffusion_enhancement_ms, 0);
13748 assert_eq!(stats.diffusion_samples_generated, 0);
13749 assert_eq!(stats.causal_generation_ms, 0);
13750 assert_eq!(stats.causal_samples_generated, 0);
13751 assert!(stats.causal_validation_passed.is_none());
13752 }
13753}