1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use rayon::prelude::*;
178
179fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
185 let payment_behavior = &schema_config.payment_behavior;
186 let late_dist = &payment_behavior.late_payment_days_distribution;
187
188 P2PGeneratorConfig {
189 three_way_match_rate: schema_config.three_way_match_rate,
190 partial_delivery_rate: schema_config.partial_delivery_rate,
191 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
192 price_variance_rate: schema_config.price_variance_rate,
193 max_price_variance_percent: schema_config.max_price_variance_percent,
194 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
195 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
196 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
197 payment_method_distribution: vec![
198 (PaymentMethod::BankTransfer, 0.60),
199 (PaymentMethod::Check, 0.25),
200 (PaymentMethod::Wire, 0.10),
201 (PaymentMethod::CreditCard, 0.05),
202 ],
203 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
204 payment_behavior: P2PPaymentBehavior {
205 late_payment_rate: payment_behavior.late_payment_rate,
206 late_payment_distribution: LatePaymentDistribution {
207 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
208 late_8_to_14: late_dist.late_8_to_14,
209 very_late_15_to_30: late_dist.very_late_15_to_30,
210 severely_late_31_to_60: late_dist.severely_late_31_to_60,
211 extremely_late_over_60: late_dist.extremely_late_over_60,
212 },
213 partial_payment_rate: payment_behavior.partial_payment_rate,
214 payment_correction_rate: payment_behavior.payment_correction_rate,
215 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
216 },
217 }
218}
219
220fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
222 let payment_behavior = &schema_config.payment_behavior;
223
224 O2CGeneratorConfig {
225 credit_check_failure_rate: schema_config.credit_check_failure_rate,
226 partial_shipment_rate: schema_config.partial_shipment_rate,
227 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
228 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
229 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
230 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
231 bad_debt_rate: schema_config.bad_debt_rate,
232 returns_rate: schema_config.return_rate,
233 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
234 payment_method_distribution: vec![
235 (PaymentMethod::BankTransfer, 0.50),
236 (PaymentMethod::Check, 0.30),
237 (PaymentMethod::Wire, 0.15),
238 (PaymentMethod::CreditCard, 0.05),
239 ],
240 payment_behavior: O2CPaymentBehavior {
241 partial_payment_rate: payment_behavior.partial_payments.rate,
242 short_payment_rate: payment_behavior.short_payments.rate,
243 max_short_percent: payment_behavior.short_payments.max_short_percent,
244 on_account_rate: payment_behavior.on_account_payments.rate,
245 payment_correction_rate: payment_behavior.payment_corrections.rate,
246 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
247 },
248 }
249}
250
251#[derive(Debug, Clone)]
253pub struct PhaseConfig {
254 pub generate_master_data: bool,
256 pub generate_document_flows: bool,
258 pub generate_ocpm_events: bool,
260 pub generate_journal_entries: bool,
262 pub inject_anomalies: bool,
264 pub inject_data_quality: bool,
266 pub validate_balances: bool,
268 pub show_progress: bool,
270 pub vendors_per_company: usize,
272 pub customers_per_company: usize,
274 pub materials_per_company: usize,
276 pub assets_per_company: usize,
278 pub employees_per_company: usize,
280 pub p2p_chains: usize,
282 pub o2c_chains: usize,
284 pub generate_audit: bool,
286 pub audit_engagements: usize,
288 pub workpapers_per_engagement: usize,
290 pub evidence_per_workpaper: usize,
292 pub risks_per_engagement: usize,
294 pub findings_per_engagement: usize,
296 pub judgments_per_engagement: usize,
298 pub generate_banking: bool,
300 pub generate_graph_export: bool,
302 pub generate_sourcing: bool,
304 pub generate_bank_reconciliation: bool,
306 pub generate_financial_statements: bool,
308 pub generate_accounting_standards: bool,
310 pub generate_manufacturing: bool,
312 pub generate_sales_kpi_budgets: bool,
314 pub generate_tax: bool,
316 pub generate_esg: bool,
318 pub generate_intercompany: bool,
320 pub generate_evolution_events: bool,
322 pub generate_counterfactuals: bool,
324 pub generate_compliance_regulations: bool,
326 pub generate_period_close: bool,
328 pub generate_hr: bool,
330 pub generate_treasury: bool,
332 pub generate_project_accounting: bool,
334 pub generate_legal_documents: bool,
338 pub generate_it_controls: bool,
342 pub generate_analytics_metadata: bool,
347}
348
349impl Default for PhaseConfig {
350 fn default() -> Self {
351 Self {
352 generate_master_data: true,
353 generate_document_flows: true,
354 generate_ocpm_events: false, generate_journal_entries: true,
356 inject_anomalies: false,
357 inject_data_quality: false, validate_balances: true,
359 show_progress: true,
360 vendors_per_company: 50,
361 customers_per_company: 100,
362 materials_per_company: 200,
363 assets_per_company: 50,
364 employees_per_company: 100,
365 p2p_chains: 100,
366 o2c_chains: 100,
367 generate_audit: false, audit_engagements: 5,
369 workpapers_per_engagement: 20,
370 evidence_per_workpaper: 5,
371 risks_per_engagement: 15,
372 findings_per_engagement: 8,
373 judgments_per_engagement: 10,
374 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
396 }
397}
398
399impl PhaseConfig {
400 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
405 Self {
406 generate_master_data: true,
408 generate_document_flows: true,
409 generate_journal_entries: true,
410 validate_balances: true,
411 generate_period_close: true,
412 generate_evolution_events: true,
413 show_progress: true,
414
415 generate_audit: cfg.audit.enabled,
417 generate_banking: cfg.banking.enabled,
418 generate_graph_export: cfg.graph_export.enabled,
419 generate_sourcing: cfg.source_to_pay.enabled,
420 generate_intercompany: cfg.intercompany.enabled,
421 generate_financial_statements: cfg.financial_reporting.enabled,
422 generate_bank_reconciliation: cfg.financial_reporting.enabled,
423 generate_accounting_standards: cfg.accounting_standards.enabled,
424 generate_manufacturing: cfg.manufacturing.enabled,
425 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
426 generate_tax: cfg.tax.enabled,
427 generate_esg: cfg.esg.enabled,
428 generate_ocpm_events: cfg.ocpm.enabled,
429 generate_compliance_regulations: cfg.compliance_regulations.enabled,
430 generate_hr: cfg.hr.enabled,
431 generate_treasury: cfg.treasury.enabled,
432 generate_project_accounting: cfg.project_accounting.enabled,
433
434 generate_legal_documents: cfg.compliance_regulations.enabled
438 && cfg.compliance_regulations.legal_documents.enabled,
439 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
442 generate_analytics_metadata: cfg.analytics_metadata.enabled,
445
446 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
448
449 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
450 inject_data_quality: cfg.data_quality.enabled,
451
452 vendors_per_company: 50,
454 customers_per_company: 100,
455 materials_per_company: 200,
456 assets_per_company: 50,
457 employees_per_company: 100,
458 p2p_chains: 100,
459 o2c_chains: 100,
460 audit_engagements: 5,
461 workpapers_per_engagement: 20,
462 evidence_per_workpaper: 5,
463 risks_per_engagement: 15,
464 findings_per_engagement: 8,
465 judgments_per_engagement: 10,
466 }
467 }
468}
469
470#[derive(Debug, Clone, Default)]
472pub struct MasterDataSnapshot {
473 pub vendors: Vec<Vendor>,
475 pub customers: Vec<Customer>,
477 pub materials: Vec<Material>,
479 pub assets: Vec<FixedAsset>,
481 pub employees: Vec<Employee>,
483 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
485 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
487 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
491}
492
493#[derive(Debug, Clone)]
495pub struct HypergraphExportInfo {
496 pub node_count: usize,
498 pub edge_count: usize,
500 pub hyperedge_count: usize,
502 pub output_path: PathBuf,
504}
505
506#[derive(Debug, Clone, Default)]
508pub struct DocumentFlowSnapshot {
509 pub p2p_chains: Vec<P2PDocumentChain>,
511 pub o2c_chains: Vec<O2CDocumentChain>,
513 pub purchase_orders: Vec<documents::PurchaseOrder>,
515 pub goods_receipts: Vec<documents::GoodsReceipt>,
517 pub vendor_invoices: Vec<documents::VendorInvoice>,
519 pub sales_orders: Vec<documents::SalesOrder>,
521 pub deliveries: Vec<documents::Delivery>,
523 pub customer_invoices: Vec<documents::CustomerInvoice>,
525 pub payments: Vec<documents::Payment>,
527 pub document_references: Vec<documents::DocumentReference>,
530}
531
532#[derive(Debug, Clone, Default)]
534pub struct SubledgerSnapshot {
535 pub ap_invoices: Vec<APInvoice>,
537 pub ar_invoices: Vec<ARInvoice>,
539 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
541 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
543 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
545 pub ar_aging_reports: Vec<ARAgingReport>,
547 pub ap_aging_reports: Vec<APAgingReport>,
549 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
551 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
553 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
555 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
557}
558
559#[derive(Debug, Clone, Default)]
561pub struct OcpmSnapshot {
562 pub event_log: Option<OcpmEventLog>,
564 pub event_count: usize,
566 pub object_count: usize,
568 pub case_count: usize,
570}
571
572#[derive(Debug, Clone, Default)]
574pub struct AuditSnapshot {
575 pub engagements: Vec<AuditEngagement>,
577 pub workpapers: Vec<Workpaper>,
579 pub evidence: Vec<AuditEvidence>,
581 pub risk_assessments: Vec<RiskAssessment>,
583 pub findings: Vec<AuditFinding>,
585 pub judgments: Vec<ProfessionalJudgment>,
587 pub confirmations: Vec<ExternalConfirmation>,
589 pub confirmation_responses: Vec<ConfirmationResponse>,
591 pub procedure_steps: Vec<AuditProcedureStep>,
593 pub samples: Vec<AuditSample>,
595 pub analytical_results: Vec<AnalyticalProcedureResult>,
597 pub ia_functions: Vec<InternalAuditFunction>,
599 pub ia_reports: Vec<InternalAuditReport>,
601 pub related_parties: Vec<RelatedParty>,
603 pub related_party_transactions: Vec<RelatedPartyTransaction>,
605 pub component_auditors: Vec<ComponentAuditor>,
608 pub group_audit_plan: Option<GroupAuditPlan>,
610 pub component_instructions: Vec<ComponentInstruction>,
612 pub component_reports: Vec<ComponentAuditorReport>,
614 pub engagement_letters: Vec<EngagementLetter>,
617 pub subsequent_events: Vec<SubsequentEvent>,
620 pub service_organizations: Vec<ServiceOrganization>,
623 pub soc_reports: Vec<SocReport>,
625 pub user_entity_controls: Vec<UserEntityControl>,
627 pub going_concern_assessments:
630 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
631 pub accounting_estimates:
634 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
635 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
638 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
640 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
643 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
645 pub materiality_calculations:
648 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
649 pub combined_risk_assessments:
652 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
653 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
656 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
658 pub significant_transaction_classes:
661 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
662 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
665 pub analytical_relationships:
668 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
669 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
672 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
675 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
678 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
683 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
689 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
693 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
696}
697
698#[derive(Debug, Clone, Default)]
700pub struct BankingSnapshot {
701 pub customers: Vec<BankingCustomer>,
703 pub accounts: Vec<BankAccount>,
705 pub transactions: Vec<BankTransaction>,
707 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
709 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
711 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
713 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
715 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
717 pub suspicious_count: usize,
719 pub scenario_count: usize,
721}
722
723#[derive(Debug, Clone, Default, Serialize)]
725pub struct GraphExportSnapshot {
726 pub exported: bool,
728 pub graph_count: usize,
730 pub exports: HashMap<String, GraphExportInfo>,
732}
733
734#[derive(Debug, Clone, Serialize)]
736pub struct GraphExportInfo {
737 pub name: String,
739 pub format: String,
741 pub output_path: PathBuf,
743 pub node_count: usize,
745 pub edge_count: usize,
747}
748
749#[derive(Debug, Clone, Default)]
751pub struct SourcingSnapshot {
752 pub spend_analyses: Vec<SpendAnalysis>,
754 pub sourcing_projects: Vec<SourcingProject>,
756 pub qualifications: Vec<SupplierQualification>,
758 pub rfx_events: Vec<RfxEvent>,
760 pub bids: Vec<SupplierBid>,
762 pub bid_evaluations: Vec<BidEvaluation>,
764 pub contracts: Vec<ProcurementContract>,
766 pub catalog_items: Vec<CatalogItem>,
768 pub scorecards: Vec<SupplierScorecard>,
770}
771
772#[derive(Debug, Clone, Serialize, Deserialize)]
774pub struct PeriodTrialBalance {
775 pub fiscal_year: u16,
777 pub fiscal_period: u8,
779 pub period_start: NaiveDate,
781 pub period_end: NaiveDate,
783 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
785}
786
787#[derive(Debug, Clone, Default)]
789pub struct FinancialReportingSnapshot {
790 pub financial_statements: Vec<FinancialStatement>,
793 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
796 pub consolidated_statements: Vec<FinancialStatement>,
798 pub consolidation_schedules: Vec<ConsolidationSchedule>,
800 pub bank_reconciliations: Vec<BankReconciliation>,
802 pub trial_balances: Vec<PeriodTrialBalance>,
804 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
806 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
808 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
810}
811
812#[derive(Debug, Clone, Default)]
814pub struct HrSnapshot {
815 pub payroll_runs: Vec<PayrollRun>,
817 pub payroll_line_items: Vec<PayrollLineItem>,
819 pub time_entries: Vec<TimeEntry>,
821 pub expense_reports: Vec<ExpenseReport>,
823 pub benefit_enrollments: Vec<BenefitEnrollment>,
825 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
827 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
829 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
831 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
833 pub pension_journal_entries: Vec<JournalEntry>,
835 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
837 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
839 pub stock_comp_journal_entries: Vec<JournalEntry>,
841 pub payroll_run_count: usize,
843 pub payroll_line_item_count: usize,
845 pub time_entry_count: usize,
847 pub expense_report_count: usize,
849 pub benefit_enrollment_count: usize,
851 pub pension_plan_count: usize,
853 pub stock_grant_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct AccountingStandardsSnapshot {
860 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
862 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
864 pub business_combinations:
866 Vec<datasynth_core::models::business_combination::BusinessCombination>,
867 pub business_combination_journal_entries: Vec<JournalEntry>,
869 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
871 pub ecl_provision_movements:
873 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
874 pub ecl_journal_entries: Vec<JournalEntry>,
876 pub provisions: Vec<datasynth_core::models::provision::Provision>,
878 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
880 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
882 pub provision_journal_entries: Vec<JournalEntry>,
884 pub currency_translation_results:
886 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
887 pub revenue_contract_count: usize,
889 pub impairment_test_count: usize,
891 pub business_combination_count: usize,
893 pub ecl_model_count: usize,
895 pub provision_count: usize,
897 pub currency_translation_count: usize,
899 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
903 pub fair_value_measurements:
905 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
906 pub framework_differences:
908 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
909 pub framework_reconciliations:
911 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
912 pub lease_count: usize,
914 pub fair_value_measurement_count: usize,
915 pub framework_difference_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct ComplianceRegulationsSnapshot {
921 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
923 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
925 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
927 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
929 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
931 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
933 pub compliance_graph: Option<datasynth_graph::Graph>,
935}
936
937#[derive(Debug, Clone, Default)]
939pub struct ManufacturingSnapshot {
940 pub production_orders: Vec<ProductionOrder>,
942 pub quality_inspections: Vec<QualityInspection>,
944 pub cycle_counts: Vec<CycleCount>,
946 pub bom_components: Vec<BomComponent>,
948 pub inventory_movements: Vec<InventoryMovement>,
950 pub production_order_count: usize,
952 pub quality_inspection_count: usize,
954 pub cycle_count_count: usize,
956 pub bom_component_count: usize,
958 pub inventory_movement_count: usize,
960}
961
962#[derive(Debug, Clone, Default)]
964pub struct SalesKpiBudgetsSnapshot {
965 pub sales_quotes: Vec<SalesQuote>,
967 pub kpis: Vec<ManagementKpi>,
969 pub budgets: Vec<Budget>,
971 pub sales_quote_count: usize,
973 pub kpi_count: usize,
975 pub budget_line_count: usize,
977}
978
979#[derive(Debug, Clone, Default)]
981pub struct AnomalyLabels {
982 pub labels: Vec<LabeledAnomaly>,
984 pub summary: Option<AnomalySummary>,
986 pub by_type: HashMap<String, usize>,
988}
989
990#[derive(Debug, Clone, Default)]
992pub struct BalanceValidationResult {
993 pub validated: bool,
995 pub is_balanced: bool,
997 pub entries_processed: u64,
999 pub total_debits: rust_decimal::Decimal,
1001 pub total_credits: rust_decimal::Decimal,
1003 pub accounts_tracked: usize,
1005 pub companies_tracked: usize,
1007 pub validation_errors: Vec<ValidationError>,
1009 pub has_unbalanced_entries: bool,
1011}
1012
1013#[derive(Debug, Clone, Default)]
1015pub struct TaxSnapshot {
1016 pub jurisdictions: Vec<TaxJurisdiction>,
1018 pub codes: Vec<TaxCode>,
1020 pub tax_lines: Vec<TaxLine>,
1022 pub tax_returns: Vec<TaxReturn>,
1024 pub tax_provisions: Vec<TaxProvision>,
1026 pub withholding_records: Vec<WithholdingTaxRecord>,
1028 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1030 pub jurisdiction_count: usize,
1032 pub code_count: usize,
1034 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1036 pub tax_posting_journal_entries: Vec<JournalEntry>,
1038}
1039
1040#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1042pub struct IntercompanySnapshot {
1043 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1045 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1047 pub seller_journal_entries: Vec<JournalEntry>,
1049 pub buyer_journal_entries: Vec<JournalEntry>,
1051 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1053 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1055 #[serde(skip)]
1057 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1058 pub matched_pair_count: usize,
1060 pub elimination_entry_count: usize,
1062 pub match_rate: f64,
1064}
1065
1066#[derive(Debug, Clone, Default)]
1068pub struct EsgSnapshot {
1069 pub emissions: Vec<EmissionRecord>,
1071 pub energy: Vec<EnergyConsumption>,
1073 pub water: Vec<WaterUsage>,
1075 pub waste: Vec<WasteRecord>,
1077 pub diversity: Vec<WorkforceDiversityMetric>,
1079 pub pay_equity: Vec<PayEquityMetric>,
1081 pub safety_incidents: Vec<SafetyIncident>,
1083 pub safety_metrics: Vec<SafetyMetric>,
1085 pub governance: Vec<GovernanceMetric>,
1087 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1089 pub materiality: Vec<MaterialityAssessment>,
1091 pub disclosures: Vec<EsgDisclosure>,
1093 pub climate_scenarios: Vec<ClimateScenario>,
1095 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1097 pub emission_count: usize,
1099 pub disclosure_count: usize,
1101}
1102
1103#[derive(Debug, Clone, Default)]
1105pub struct TreasurySnapshot {
1106 pub cash_positions: Vec<CashPosition>,
1108 pub cash_forecasts: Vec<CashForecast>,
1110 pub cash_pools: Vec<CashPool>,
1112 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1114 pub hedging_instruments: Vec<HedgingInstrument>,
1116 pub hedge_relationships: Vec<HedgeRelationship>,
1118 pub debt_instruments: Vec<DebtInstrument>,
1120 pub bank_guarantees: Vec<BankGuarantee>,
1122 pub netting_runs: Vec<NettingRun>,
1124 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1126 pub journal_entries: Vec<JournalEntry>,
1129}
1130
1131#[derive(Debug, Clone, Default)]
1133pub struct ProjectAccountingSnapshot {
1134 pub projects: Vec<Project>,
1136 pub cost_lines: Vec<ProjectCostLine>,
1138 pub revenue_records: Vec<ProjectRevenue>,
1140 pub earned_value_metrics: Vec<EarnedValueMetric>,
1142 pub change_orders: Vec<ChangeOrder>,
1144 pub milestones: Vec<ProjectMilestone>,
1146}
1147
1148#[derive(Debug, Default)]
1150pub struct EnhancedGenerationResult {
1151 pub chart_of_accounts: ChartOfAccounts,
1153 pub master_data: MasterDataSnapshot,
1155 pub document_flows: DocumentFlowSnapshot,
1157 pub subledger: SubledgerSnapshot,
1159 pub ocpm: OcpmSnapshot,
1161 pub audit: AuditSnapshot,
1163 pub banking: BankingSnapshot,
1165 pub graph_export: GraphExportSnapshot,
1167 pub sourcing: SourcingSnapshot,
1169 pub financial_reporting: FinancialReportingSnapshot,
1171 pub hr: HrSnapshot,
1173 pub accounting_standards: AccountingStandardsSnapshot,
1175 pub manufacturing: ManufacturingSnapshot,
1177 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1179 pub tax: TaxSnapshot,
1181 pub esg: EsgSnapshot,
1183 pub treasury: TreasurySnapshot,
1185 pub project_accounting: ProjectAccountingSnapshot,
1187 pub process_evolution: Vec<ProcessEvolutionEvent>,
1189 pub organizational_events: Vec<OrganizationalEvent>,
1191 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1193 pub intercompany: IntercompanySnapshot,
1195 pub journal_entries: Vec<JournalEntry>,
1197 pub anomaly_labels: AnomalyLabels,
1199 pub balance_validation: BalanceValidationResult,
1201 pub data_quality_stats: DataQualityStats,
1203 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1205 pub statistics: EnhancedGenerationStatistics,
1207 pub lineage: Option<super::lineage::LineageGraph>,
1209 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1211 pub internal_controls: Vec<InternalControl>,
1213 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1217 pub opening_balances: Vec<GeneratedOpeningBalance>,
1219 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1221 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1223 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1225 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1227 pub temporal_vendor_chains:
1229 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1230 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1232 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1234 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1236 pub compliance_regulations: ComplianceRegulationsSnapshot,
1238 pub analytics_metadata: AnalyticsMetadataSnapshot,
1242 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1246 pub interconnectivity: InterconnectivitySnapshot,
1252}
1253
1254#[derive(Debug, Clone, Default)]
1260pub struct InterconnectivitySnapshot {
1261 pub vendor_tiers: Vec<(String, u8)>,
1264 pub vendor_clusters: Vec<(String, String)>,
1268 pub customer_value_segments: Vec<(String, String)>,
1271 pub customer_lifecycle_stages: Vec<(String, String)>,
1275 pub industry_metadata: Vec<String>,
1278}
1279
1280#[derive(Debug, Clone, Default)]
1282pub struct AnalyticsMetadataSnapshot {
1283 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1285 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1287 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1289 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1291}
1292
1293#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1295pub struct EnhancedGenerationStatistics {
1296 pub total_entries: u64,
1298 pub total_line_items: u64,
1300 pub accounts_count: usize,
1302 pub companies_count: usize,
1304 pub period_months: u32,
1306 pub vendor_count: usize,
1308 pub customer_count: usize,
1309 pub material_count: usize,
1310 pub asset_count: usize,
1311 pub employee_count: usize,
1312 pub p2p_chain_count: usize,
1314 pub o2c_chain_count: usize,
1315 pub ap_invoice_count: usize,
1317 pub ar_invoice_count: usize,
1318 pub ocpm_event_count: usize,
1320 pub ocpm_object_count: usize,
1321 pub ocpm_case_count: usize,
1322 pub audit_engagement_count: usize,
1324 pub audit_workpaper_count: usize,
1325 pub audit_evidence_count: usize,
1326 pub audit_risk_count: usize,
1327 pub audit_finding_count: usize,
1328 pub audit_judgment_count: usize,
1329 #[serde(default)]
1331 pub audit_confirmation_count: usize,
1332 #[serde(default)]
1333 pub audit_confirmation_response_count: usize,
1334 #[serde(default)]
1336 pub audit_procedure_step_count: usize,
1337 #[serde(default)]
1338 pub audit_sample_count: usize,
1339 #[serde(default)]
1341 pub audit_analytical_result_count: usize,
1342 #[serde(default)]
1344 pub audit_ia_function_count: usize,
1345 #[serde(default)]
1346 pub audit_ia_report_count: usize,
1347 #[serde(default)]
1349 pub audit_related_party_count: usize,
1350 #[serde(default)]
1351 pub audit_related_party_transaction_count: usize,
1352 pub anomalies_injected: usize,
1354 pub data_quality_issues: usize,
1356 pub banking_customer_count: usize,
1358 pub banking_account_count: usize,
1359 pub banking_transaction_count: usize,
1360 pub banking_suspicious_count: usize,
1361 pub graph_export_count: usize,
1363 pub graph_node_count: usize,
1364 pub graph_edge_count: usize,
1365 #[serde(default)]
1367 pub llm_enrichment_ms: u64,
1368 #[serde(default)]
1370 pub llm_vendors_enriched: usize,
1371 #[serde(default)]
1373 pub llm_customers_enriched: usize,
1374 #[serde(default)]
1376 pub llm_materials_enriched: usize,
1377 #[serde(default)]
1379 pub llm_findings_enriched: usize,
1380 #[serde(default)]
1382 pub diffusion_enhancement_ms: u64,
1383 #[serde(default)]
1385 pub diffusion_samples_generated: usize,
1386 #[serde(default, skip_serializing_if = "Option::is_none")]
1389 pub neural_hybrid_weight: Option<f64>,
1390 #[serde(default, skip_serializing_if = "Option::is_none")]
1392 pub neural_hybrid_strategy: Option<String>,
1393 #[serde(default, skip_serializing_if = "Option::is_none")]
1395 pub neural_routed_column_count: Option<usize>,
1396 #[serde(default)]
1398 pub causal_generation_ms: u64,
1399 #[serde(default)]
1401 pub causal_samples_generated: usize,
1402 #[serde(default)]
1404 pub causal_validation_passed: Option<bool>,
1405 #[serde(default)]
1407 pub sourcing_project_count: usize,
1408 #[serde(default)]
1409 pub rfx_event_count: usize,
1410 #[serde(default)]
1411 pub bid_count: usize,
1412 #[serde(default)]
1413 pub contract_count: usize,
1414 #[serde(default)]
1415 pub catalog_item_count: usize,
1416 #[serde(default)]
1417 pub scorecard_count: usize,
1418 #[serde(default)]
1420 pub financial_statement_count: usize,
1421 #[serde(default)]
1422 pub bank_reconciliation_count: usize,
1423 #[serde(default)]
1425 pub payroll_run_count: usize,
1426 #[serde(default)]
1427 pub time_entry_count: usize,
1428 #[serde(default)]
1429 pub expense_report_count: usize,
1430 #[serde(default)]
1431 pub benefit_enrollment_count: usize,
1432 #[serde(default)]
1433 pub pension_plan_count: usize,
1434 #[serde(default)]
1435 pub stock_grant_count: usize,
1436 #[serde(default)]
1438 pub revenue_contract_count: usize,
1439 #[serde(default)]
1440 pub impairment_test_count: usize,
1441 #[serde(default)]
1442 pub business_combination_count: usize,
1443 #[serde(default)]
1444 pub ecl_model_count: usize,
1445 #[serde(default)]
1446 pub provision_count: usize,
1447 #[serde(default)]
1449 pub production_order_count: usize,
1450 #[serde(default)]
1451 pub quality_inspection_count: usize,
1452 #[serde(default)]
1453 pub cycle_count_count: usize,
1454 #[serde(default)]
1455 pub bom_component_count: usize,
1456 #[serde(default)]
1457 pub inventory_movement_count: usize,
1458 #[serde(default)]
1460 pub sales_quote_count: usize,
1461 #[serde(default)]
1462 pub kpi_count: usize,
1463 #[serde(default)]
1464 pub budget_line_count: usize,
1465 #[serde(default)]
1467 pub tax_jurisdiction_count: usize,
1468 #[serde(default)]
1469 pub tax_code_count: usize,
1470 #[serde(default)]
1472 pub esg_emission_count: usize,
1473 #[serde(default)]
1474 pub esg_disclosure_count: usize,
1475 #[serde(default)]
1477 pub ic_matched_pair_count: usize,
1478 #[serde(default)]
1479 pub ic_elimination_count: usize,
1480 #[serde(default)]
1482 pub ic_transaction_count: usize,
1483 #[serde(default)]
1485 pub fa_subledger_count: usize,
1486 #[serde(default)]
1488 pub inventory_subledger_count: usize,
1489 #[serde(default)]
1491 pub treasury_debt_instrument_count: usize,
1492 #[serde(default)]
1494 pub treasury_hedging_instrument_count: usize,
1495 #[serde(default)]
1497 pub project_count: usize,
1498 #[serde(default)]
1500 pub project_change_order_count: usize,
1501 #[serde(default)]
1503 pub tax_provision_count: usize,
1504 #[serde(default)]
1506 pub opening_balance_count: usize,
1507 #[serde(default)]
1509 pub subledger_reconciliation_count: usize,
1510 #[serde(default)]
1512 pub tax_line_count: usize,
1513 #[serde(default)]
1515 pub project_cost_line_count: usize,
1516 #[serde(default)]
1518 pub cash_position_count: usize,
1519 #[serde(default)]
1521 pub cash_forecast_count: usize,
1522 #[serde(default)]
1524 pub cash_pool_count: usize,
1525 #[serde(default)]
1527 pub process_evolution_event_count: usize,
1528 #[serde(default)]
1530 pub organizational_event_count: usize,
1531 #[serde(default)]
1533 pub counterfactual_pair_count: usize,
1534 #[serde(default)]
1536 pub red_flag_count: usize,
1537 #[serde(default)]
1539 pub collusion_ring_count: usize,
1540 #[serde(default)]
1542 pub temporal_version_chain_count: usize,
1543 #[serde(default)]
1545 pub entity_relationship_node_count: usize,
1546 #[serde(default)]
1548 pub entity_relationship_edge_count: usize,
1549 #[serde(default)]
1551 pub cross_process_link_count: usize,
1552 #[serde(default)]
1554 pub disruption_event_count: usize,
1555 #[serde(default)]
1557 pub industry_gl_account_count: usize,
1558 #[serde(default)]
1560 pub period_close_je_count: usize,
1561}
1562
1563pub struct EnhancedOrchestrator {
1565 config: GeneratorConfig,
1566 phase_config: PhaseConfig,
1567 coa: Option<Arc<ChartOfAccounts>>,
1568 master_data: MasterDataSnapshot,
1569 seed: u64,
1570 multi_progress: Option<MultiProgress>,
1571 resource_guard: ResourceGuard,
1573 output_path: Option<PathBuf>,
1575 copula_generators: Vec<CopulaGeneratorSpec>,
1577 country_pack_registry: datasynth_core::CountryPackRegistry,
1579 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1581 template_provider: datasynth_core::templates::SharedTemplateProvider,
1588 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1595}
1596
1597impl EnhancedOrchestrator {
1598 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1600 datasynth_config::validate_config(&config)?;
1601
1602 let seed = config.global.seed.unwrap_or_else(rand::random);
1603
1604 let resource_guard = Self::build_resource_guard(&config, None);
1606
1607 let country_pack_registry = match &config.country_packs {
1609 Some(cp) => {
1610 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1611 .map_err(|e| SynthError::config(e.to_string()))?
1612 }
1613 None => datasynth_core::CountryPackRegistry::builtin_only()
1614 .map_err(|e| SynthError::config(e.to_string()))?,
1615 };
1616
1617 let template_provider = Self::build_template_provider(&config)?;
1621
1622 let temporal_context = Self::build_temporal_context(&config)?;
1626
1627 Ok(Self {
1628 config,
1629 phase_config,
1630 coa: None,
1631 master_data: MasterDataSnapshot::default(),
1632 seed,
1633 multi_progress: None,
1634 resource_guard,
1635 output_path: None,
1636 copula_generators: Vec::new(),
1637 country_pack_registry,
1638 phase_sink: None,
1639 template_provider,
1640 temporal_context,
1641 })
1642 }
1643
1644 fn build_temporal_context(
1650 config: &GeneratorConfig,
1651 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1652 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1653
1654 let tp = &config.temporal_patterns;
1655 if !tp.enabled || !tp.business_days.enabled {
1656 return Ok(None);
1657 }
1658
1659 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1660 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1661 let end_date = start_date + chrono::Months::new(config.global.period_months);
1662
1663 let region_code = tp
1664 .calendars
1665 .regions
1666 .first()
1667 .cloned()
1668 .unwrap_or_else(|| "US".to_string());
1669 let region = parse_region_code(®ion_code);
1670
1671 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1672 }
1673
1674 fn build_template_provider(
1682 config: &GeneratorConfig,
1683 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1684 use datasynth_core::templates::{
1685 loader::{MergeStrategy, TemplateLoader},
1686 DefaultTemplateProvider,
1687 };
1688 use std::sync::Arc;
1689
1690 let provider = match &config.templates.path {
1691 None => DefaultTemplateProvider::new(),
1692 Some(path) => {
1693 let data = if path.is_dir() {
1694 TemplateLoader::load_from_directory(path)
1695 } else {
1696 TemplateLoader::load_from_file(path)
1697 }
1698 .map_err(|e| {
1699 SynthError::config(format!(
1700 "Failed to load templates from {}: {e}",
1701 path.display()
1702 ))
1703 })?;
1704 let strategy = match config.templates.merge_strategy {
1705 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1706 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1707 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1708 MergeStrategy::MergePreferFile
1709 }
1710 };
1711 DefaultTemplateProvider::with_templates(data, strategy)
1712 }
1713 };
1714 Ok(Arc::new(provider))
1715 }
1716
1717 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1719 Self::new(config, PhaseConfig::default())
1720 }
1721
1722 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1724 self.phase_sink = Some(sink);
1725 self
1726 }
1727
1728 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1730 self.phase_sink = Some(sink);
1731 }
1732
1733 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1735 if let Some(ref sink) = self.phase_sink {
1736 for item in items {
1737 if let Ok(value) = serde_json::to_value(item) {
1738 if let Err(e) = sink.emit(phase, type_name, &value) {
1739 warn!(
1740 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1741 );
1742 }
1743 }
1744 }
1745 if let Err(e) = sink.phase_complete(phase) {
1746 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1747 }
1748 }
1749 }
1750
1751 pub fn with_progress(mut self, show: bool) -> Self {
1753 self.phase_config.show_progress = show;
1754 if show {
1755 self.multi_progress = Some(MultiProgress::new());
1756 }
1757 self
1758 }
1759
1760 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1762 let path = path.into();
1763 self.output_path = Some(path.clone());
1764 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1766 self
1767 }
1768
1769 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1771 &self.country_pack_registry
1772 }
1773
1774 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1776 self.country_pack_registry.get_by_str(country)
1777 }
1778
1779 fn primary_country_code(&self) -> &str {
1782 self.config
1783 .companies
1784 .first()
1785 .map(|c| c.country.as_str())
1786 .unwrap_or("US")
1787 }
1788
1789 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1791 self.country_pack_for(self.primary_country_code())
1792 }
1793
1794 fn resolve_coa_framework(&self) -> CoAFramework {
1796 if self.config.accounting_standards.enabled {
1797 match self.config.accounting_standards.framework {
1798 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1799 return CoAFramework::FrenchPcg;
1800 }
1801 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1802 return CoAFramework::GermanSkr04;
1803 }
1804 _ => {}
1805 }
1806 }
1807 let pack = self.primary_pack();
1809 match pack.accounting.framework.as_str() {
1810 "french_gaap" => CoAFramework::FrenchPcg,
1811 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1812 _ => CoAFramework::UsGaap,
1813 }
1814 }
1815
1816 pub fn has_copulas(&self) -> bool {
1821 !self.copula_generators.is_empty()
1822 }
1823
1824 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1830 &self.copula_generators
1831 }
1832
1833 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1837 &mut self.copula_generators
1838 }
1839
1840 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1844 self.copula_generators
1845 .iter_mut()
1846 .find(|c| c.name == copula_name)
1847 .map(|c| c.generator.sample())
1848 }
1849
1850 pub fn from_fingerprint(
1873 fingerprint_path: &std::path::Path,
1874 phase_config: PhaseConfig,
1875 scale: f64,
1876 ) -> SynthResult<Self> {
1877 info!("Loading fingerprint from: {}", fingerprint_path.display());
1878
1879 let reader = FingerprintReader::new();
1881 let fingerprint = reader
1882 .read_from_file(fingerprint_path)
1883 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1884
1885 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1886 }
1887
1888 pub fn from_fingerprint_data(
1895 fingerprint: Fingerprint,
1896 phase_config: PhaseConfig,
1897 scale: f64,
1898 ) -> SynthResult<Self> {
1899 info!(
1900 "Synthesizing config from fingerprint (version: {}, tables: {})",
1901 fingerprint.manifest.version,
1902 fingerprint.schema.tables.len()
1903 );
1904
1905 let seed: u64 = rand::random();
1907 info!("Fingerprint synthesis seed: {}", seed);
1908
1909 let options = SynthesisOptions {
1911 scale,
1912 seed: Some(seed),
1913 preserve_correlations: true,
1914 inject_anomalies: true,
1915 };
1916 let synthesizer = ConfigSynthesizer::with_options(options);
1917
1918 let synthesis_result = synthesizer
1920 .synthesize_full(&fingerprint, seed)
1921 .map_err(|e| {
1922 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1923 })?;
1924
1925 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1927 Self::base_config_for_industry(industry)
1928 } else {
1929 Self::base_config_for_industry("manufacturing")
1930 };
1931
1932 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1934
1935 info!(
1937 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1938 fingerprint.schema.tables.len(),
1939 scale,
1940 synthesis_result.copula_generators.len()
1941 );
1942
1943 if !synthesis_result.copula_generators.is_empty() {
1944 for spec in &synthesis_result.copula_generators {
1945 info!(
1946 " Copula '{}' for table '{}': {} columns",
1947 spec.name,
1948 spec.table,
1949 spec.columns.len()
1950 );
1951 }
1952 }
1953
1954 let mut orchestrator = Self::new(config, phase_config)?;
1956
1957 orchestrator.copula_generators = synthesis_result.copula_generators;
1959
1960 Ok(orchestrator)
1961 }
1962
1963 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1965 use datasynth_config::presets::create_preset;
1966 use datasynth_config::TransactionVolume;
1967 use datasynth_core::models::{CoAComplexity, IndustrySector};
1968
1969 let sector = match industry.to_lowercase().as_str() {
1970 "manufacturing" => IndustrySector::Manufacturing,
1971 "retail" => IndustrySector::Retail,
1972 "financial" | "financial_services" => IndustrySector::FinancialServices,
1973 "healthcare" => IndustrySector::Healthcare,
1974 "technology" | "tech" => IndustrySector::Technology,
1975 _ => IndustrySector::Manufacturing,
1976 };
1977
1978 create_preset(
1980 sector,
1981 1, 12, CoAComplexity::Medium,
1984 TransactionVolume::TenK,
1985 )
1986 }
1987
1988 fn apply_config_patch(
1990 mut config: GeneratorConfig,
1991 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1992 ) -> GeneratorConfig {
1993 use datasynth_fingerprint::synthesis::ConfigValue;
1994
1995 for (key, value) in patch.values() {
1996 match (key.as_str(), value) {
1997 ("transactions.count", ConfigValue::Integer(n)) => {
2000 info!(
2001 "Fingerprint suggests {} transactions (apply via company volumes)",
2002 n
2003 );
2004 }
2005 ("global.period_months", ConfigValue::Integer(n)) => {
2006 config.global.period_months = (*n).clamp(1, 120) as u32;
2007 }
2008 ("global.start_date", ConfigValue::String(s)) => {
2009 config.global.start_date = s.clone();
2010 }
2011 ("global.seed", ConfigValue::Integer(n)) => {
2012 config.global.seed = Some(*n as u64);
2013 }
2014 ("fraud.enabled", ConfigValue::Bool(b)) => {
2015 config.fraud.enabled = *b;
2016 }
2017 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2018 config.fraud.fraud_rate = *f;
2019 }
2020 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2021 config.data_quality.enabled = *b;
2022 }
2023 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2025 config.fraud.enabled = *b;
2026 }
2027 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2028 config.fraud.fraud_rate = *f;
2029 }
2030 _ => {
2031 debug!("Ignoring unknown config patch key: {}", key);
2032 }
2033 }
2034 }
2035
2036 config
2037 }
2038
2039 fn build_resource_guard(
2041 config: &GeneratorConfig,
2042 output_path: Option<PathBuf>,
2043 ) -> ResourceGuard {
2044 let mut builder = ResourceGuardBuilder::new();
2045
2046 if config.global.memory_limit_mb > 0 {
2048 builder = builder.memory_limit(config.global.memory_limit_mb);
2049 }
2050
2051 if let Some(path) = output_path {
2053 builder = builder.output_path(path).min_free_disk(100); }
2055
2056 builder = builder.conservative();
2058
2059 builder.build()
2060 }
2061
2062 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2067 self.resource_guard.check()
2068 }
2069
2070 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2072 let level = self.resource_guard.check()?;
2073
2074 if level != DegradationLevel::Normal {
2075 warn!(
2076 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2077 phase,
2078 level,
2079 self.resource_guard.current_memory_mb(),
2080 self.resource_guard.available_disk_mb()
2081 );
2082 }
2083
2084 Ok(level)
2085 }
2086
2087 fn get_degradation_actions(&self) -> DegradationActions {
2089 self.resource_guard.get_actions()
2090 }
2091
2092 fn check_memory_limit(&self) -> SynthResult<()> {
2094 self.check_resources()?;
2095 Ok(())
2096 }
2097
2098 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2100 info!("Starting enhanced generation workflow");
2101 info!(
2102 "Config: industry={:?}, period_months={}, companies={}",
2103 self.config.global.industry,
2104 self.config.global.period_months,
2105 self.config.companies.len()
2106 );
2107
2108 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2111 datasynth_core::serde_decimal::set_numeric_native(is_native);
2112 struct NumericModeGuard;
2113 impl Drop for NumericModeGuard {
2114 fn drop(&mut self) {
2115 datasynth_core::serde_decimal::set_numeric_native(false);
2116 }
2117 }
2118 let _numeric_guard = if is_native {
2119 Some(NumericModeGuard)
2120 } else {
2121 None
2122 };
2123
2124 let initial_level = self.check_resources_with_log("initial")?;
2126 if initial_level == DegradationLevel::Emergency {
2127 return Err(SynthError::resource(
2128 "Insufficient resources to start generation",
2129 ));
2130 }
2131
2132 let mut stats = EnhancedGenerationStatistics {
2133 companies_count: self.config.companies.len(),
2134 period_months: self.config.global.period_months,
2135 ..Default::default()
2136 };
2137
2138 let coa = self.phase_chart_of_accounts(&mut stats)?;
2140
2141 self.phase_master_data(&mut stats)?;
2143
2144 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2146 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2147 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2148
2149 let (mut document_flows, mut subledger, fa_journal_entries) =
2151 self.phase_document_flows(&mut stats)?;
2152
2153 self.emit_phase_items(
2155 "document_flows",
2156 "PurchaseOrder",
2157 &document_flows.purchase_orders,
2158 );
2159 self.emit_phase_items(
2160 "document_flows",
2161 "GoodsReceipt",
2162 &document_flows.goods_receipts,
2163 );
2164 self.emit_phase_items(
2165 "document_flows",
2166 "VendorInvoice",
2167 &document_flows.vendor_invoices,
2168 );
2169 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2170 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2171
2172 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2174
2175 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2180 .iter()
2181 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2182 .collect();
2183 if !opening_balance_jes.is_empty() {
2184 debug!(
2185 "Prepending {} opening balance JEs to entries",
2186 opening_balance_jes.len()
2187 );
2188 }
2189
2190 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2192
2193 if !opening_balance_jes.is_empty() {
2196 let mut combined = opening_balance_jes;
2197 combined.extend(entries);
2198 entries = combined;
2199 }
2200
2201 if !fa_journal_entries.is_empty() {
2203 debug!(
2204 "Appending {} FA acquisition JEs to main entries",
2205 fa_journal_entries.len()
2206 );
2207 entries.extend(fa_journal_entries);
2208 }
2209
2210 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2212
2213 let actions = self.get_degradation_actions();
2215
2216 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2218
2219 if !sourcing.contracts.is_empty() {
2222 let mut linked_count = 0usize;
2223 let po_vendor_pairs: Vec<(String, String)> = document_flows
2225 .p2p_chains
2226 .iter()
2227 .map(|chain| {
2228 (
2229 chain.purchase_order.vendor_id.clone(),
2230 chain.purchase_order.header.document_id.clone(),
2231 )
2232 })
2233 .collect();
2234
2235 for chain in &mut document_flows.p2p_chains {
2236 if chain.purchase_order.contract_id.is_none() {
2237 if let Some(contract) = sourcing
2238 .contracts
2239 .iter()
2240 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2241 {
2242 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2243 linked_count += 1;
2244 }
2245 }
2246 }
2247
2248 for contract in &mut sourcing.contracts {
2250 let po_ids: Vec<String> = po_vendor_pairs
2251 .iter()
2252 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2253 .map(|(_, po_id)| po_id.clone())
2254 .collect();
2255 if !po_ids.is_empty() {
2256 contract.purchase_order_ids = po_ids;
2257 }
2258 }
2259
2260 if linked_count > 0 {
2261 debug!(
2262 "Linked {} purchase orders to S2C contracts by vendor match",
2263 linked_count
2264 );
2265 }
2266 }
2267
2268 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2270
2271 if !intercompany.seller_journal_entries.is_empty()
2273 || !intercompany.buyer_journal_entries.is_empty()
2274 {
2275 let ic_je_count = intercompany.seller_journal_entries.len()
2276 + intercompany.buyer_journal_entries.len();
2277 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2278 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2279 debug!(
2280 "Appended {} IC journal entries to main entries",
2281 ic_je_count
2282 );
2283 }
2284
2285 if !intercompany.elimination_entries.is_empty() {
2287 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2288 &intercompany.elimination_entries,
2289 );
2290 if !elim_jes.is_empty() {
2291 debug!(
2292 "Appended {} elimination journal entries to main entries",
2293 elim_jes.len()
2294 );
2295 let elim_debit: rust_decimal::Decimal =
2297 elim_jes.iter().map(|je| je.total_debit()).sum();
2298 let elim_credit: rust_decimal::Decimal =
2299 elim_jes.iter().map(|je| je.total_credit()).sum();
2300 let elim_diff = (elim_debit - elim_credit).abs();
2301 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2303 return Err(datasynth_core::error::SynthError::generation(format!(
2304 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2305 elim_debit, elim_credit, elim_diff, tolerance
2306 )));
2307 }
2308 debug!(
2309 "IC elimination balance verified: debits={}, credits={} (diff={})",
2310 elim_debit, elim_credit, elim_diff
2311 );
2312 entries.extend(elim_jes);
2313 }
2314 }
2315
2316 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2318 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2319 document_flows
2320 .customer_invoices
2321 .extend(ic_docs.seller_invoices.iter().cloned());
2322 document_flows
2323 .purchase_orders
2324 .extend(ic_docs.buyer_orders.iter().cloned());
2325 document_flows
2326 .goods_receipts
2327 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2328 document_flows
2329 .vendor_invoices
2330 .extend(ic_docs.buyer_invoices.iter().cloned());
2331 debug!(
2332 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2333 ic_docs.seller_invoices.len(),
2334 ic_docs.buyer_orders.len(),
2335 ic_docs.buyer_goods_receipts.len(),
2336 ic_docs.buyer_invoices.len(),
2337 );
2338 }
2339 }
2340
2341 let hr = self.phase_hr_data(&mut stats)?;
2343
2344 if !hr.payroll_runs.is_empty() {
2346 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2347 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2348 entries.extend(payroll_jes);
2349 }
2350
2351 if !hr.pension_journal_entries.is_empty() {
2353 debug!(
2354 "Generated {} JEs from pension plans",
2355 hr.pension_journal_entries.len()
2356 );
2357 entries.extend(hr.pension_journal_entries.iter().cloned());
2358 }
2359
2360 if !hr.stock_comp_journal_entries.is_empty() {
2362 debug!(
2363 "Generated {} JEs from stock-based compensation",
2364 hr.stock_comp_journal_entries.len()
2365 );
2366 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2367 }
2368
2369 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2371
2372 if !manufacturing_snap.production_orders.is_empty() {
2374 let currency = self
2375 .config
2376 .companies
2377 .first()
2378 .map(|c| c.currency.as_str())
2379 .unwrap_or("USD");
2380 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2381 &manufacturing_snap.production_orders,
2382 &manufacturing_snap.quality_inspections,
2383 currency,
2384 );
2385 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2386 entries.extend(mfg_jes);
2387 }
2388
2389 if !manufacturing_snap.quality_inspections.is_empty() {
2391 let framework = match self.config.accounting_standards.framework {
2392 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2393 _ => "US_GAAP",
2394 };
2395 for company in &self.config.companies {
2396 let company_orders: Vec<_> = manufacturing_snap
2397 .production_orders
2398 .iter()
2399 .filter(|o| o.company_code == company.code)
2400 .cloned()
2401 .collect();
2402 let company_inspections: Vec<_> = manufacturing_snap
2403 .quality_inspections
2404 .iter()
2405 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2406 .cloned()
2407 .collect();
2408 if company_inspections.is_empty() {
2409 continue;
2410 }
2411 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2412 let warranty_result = warranty_gen.generate(
2413 &company.code,
2414 &company_orders,
2415 &company_inspections,
2416 &company.currency,
2417 framework,
2418 );
2419 if !warranty_result.journal_entries.is_empty() {
2420 debug!(
2421 "Generated {} warranty provision JEs for {}",
2422 warranty_result.journal_entries.len(),
2423 company.code
2424 );
2425 entries.extend(warranty_result.journal_entries);
2426 }
2427 }
2428 }
2429
2430 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2432 {
2433 let cogs_currency = self
2434 .config
2435 .companies
2436 .first()
2437 .map(|c| c.currency.as_str())
2438 .unwrap_or("USD");
2439 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2440 &document_flows.deliveries,
2441 &manufacturing_snap.production_orders,
2442 cogs_currency,
2443 );
2444 if !cogs_jes.is_empty() {
2445 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2446 entries.extend(cogs_jes);
2447 }
2448 }
2449
2450 if !manufacturing_snap.inventory_movements.is_empty()
2456 && !subledger.inventory_positions.is_empty()
2457 {
2458 use datasynth_core::models::MovementType as MfgMovementType;
2459 let mut receipt_count = 0usize;
2460 let mut issue_count = 0usize;
2461 for movement in &manufacturing_snap.inventory_movements {
2462 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2464 p.material_id == movement.material_code
2465 && p.company_code == movement.entity_code
2466 }) {
2467 match movement.movement_type {
2468 MfgMovementType::GoodsReceipt => {
2469 pos.add_quantity(
2471 movement.quantity,
2472 movement.value,
2473 movement.movement_date,
2474 );
2475 receipt_count += 1;
2476 }
2477 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2478 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2480 issue_count += 1;
2481 }
2482 _ => {}
2483 }
2484 }
2485 }
2486 debug!(
2487 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2488 manufacturing_snap.inventory_movements.len(),
2489 receipt_count,
2490 issue_count,
2491 );
2492 }
2493
2494 if !entries.is_empty() {
2497 stats.total_entries = entries.len() as u64;
2498 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2499 debug!(
2500 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2501 stats.total_entries, stats.total_line_items
2502 );
2503 }
2504
2505 if self.config.internal_controls.enabled && !entries.is_empty() {
2507 info!("Phase 7b: Applying internal controls to journal entries");
2508 let control_config = ControlGeneratorConfig {
2509 exception_rate: self.config.internal_controls.exception_rate,
2510 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2511 enable_sox_marking: true,
2512 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2513 self.config.internal_controls.sox_materiality_threshold,
2514 )
2515 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2516 ..Default::default()
2517 };
2518 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2519 for entry in &mut entries {
2520 control_gen.apply_controls(entry, &coa);
2521 }
2522 let with_controls = entries
2523 .iter()
2524 .filter(|e| !e.header.control_ids.is_empty())
2525 .count();
2526 info!(
2527 "Applied controls to {} entries ({} with control IDs assigned)",
2528 entries.len(),
2529 with_controls
2530 );
2531 }
2532
2533 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2537 .iter()
2538 .filter(|e| e.header.sod_violation)
2539 .filter_map(|e| {
2540 e.header.sod_conflict_type.map(|ct| {
2541 use datasynth_core::models::{RiskLevel, SodViolation};
2542 let severity = match ct {
2543 datasynth_core::models::SodConflictType::PaymentReleaser
2544 | datasynth_core::models::SodConflictType::RequesterApprover => {
2545 RiskLevel::Critical
2546 }
2547 datasynth_core::models::SodConflictType::PreparerApprover
2548 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2549 | datasynth_core::models::SodConflictType::JournalEntryPoster
2550 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2551 RiskLevel::High
2552 }
2553 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2554 RiskLevel::Medium
2555 }
2556 };
2557 let action = format!(
2558 "SoD conflict {:?} on entry {} ({})",
2559 ct, e.header.document_id, e.header.company_code
2560 );
2561 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2562 })
2563 })
2564 .collect();
2565 if !sod_violations.is_empty() {
2566 info!(
2567 "Phase 7c: Extracted {} SoD violations from {} entries",
2568 sod_violations.len(),
2569 entries.len()
2570 );
2571 }
2572
2573 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2575
2576 {
2584 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2585 if self.config.fraud.enabled && doc_rate > 0.0 {
2586 use datasynth_core::fraud_propagation::{
2587 inject_document_fraud, propagate_documents_to_entries,
2588 };
2589 use datasynth_core::utils::weighted_select;
2590 use datasynth_core::FraudType;
2591 use rand_chacha::rand_core::SeedableRng;
2592
2593 let dist = &self.config.fraud.fraud_type_distribution;
2594 let fraud_type_weights: [(FraudType, f64); 8] = [
2595 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2596 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2597 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2598 (
2599 FraudType::ImproperCapitalization,
2600 dist.expense_capitalization,
2601 ),
2602 (FraudType::SplitTransaction, dist.split_transaction),
2603 (FraudType::TimingAnomaly, dist.timing_anomaly),
2604 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2605 (FraudType::DuplicatePayment, dist.duplicate_payment),
2606 ];
2607 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2608 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2609 if weights_sum <= 0.0 {
2610 FraudType::FictitiousEntry
2611 } else {
2612 *weighted_select(rng, &fraud_type_weights)
2613 }
2614 };
2615
2616 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2617 let mut doc_tagged = 0usize;
2618 macro_rules! inject_into {
2619 ($collection:expr) => {{
2620 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2621 $collection.iter_mut().map(|d| &mut d.header).collect();
2622 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2623 }};
2624 }
2625 inject_into!(document_flows.purchase_orders);
2626 inject_into!(document_flows.goods_receipts);
2627 inject_into!(document_flows.vendor_invoices);
2628 inject_into!(document_flows.payments);
2629 inject_into!(document_flows.sales_orders);
2630 inject_into!(document_flows.deliveries);
2631 inject_into!(document_flows.customer_invoices);
2632 if doc_tagged > 0 {
2633 info!(
2634 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2635 );
2636 }
2637
2638 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2639 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2640 Vec::new();
2641 headers.extend(
2642 document_flows
2643 .purchase_orders
2644 .iter()
2645 .map(|d| d.header.clone()),
2646 );
2647 headers.extend(
2648 document_flows
2649 .goods_receipts
2650 .iter()
2651 .map(|d| d.header.clone()),
2652 );
2653 headers.extend(
2654 document_flows
2655 .vendor_invoices
2656 .iter()
2657 .map(|d| d.header.clone()),
2658 );
2659 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2660 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2661 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2662 headers.extend(
2663 document_flows
2664 .customer_invoices
2665 .iter()
2666 .map(|d| d.header.clone()),
2667 );
2668 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2669 if propagated > 0 {
2670 info!(
2671 "Propagated document-level fraud to {propagated} derived journal entries"
2672 );
2673 }
2674 }
2675 }
2676 }
2677
2678 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2680
2681 {
2699 use datasynth_core::fraud_bias::{
2700 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2701 };
2702 use rand_chacha::rand_core::SeedableRng;
2703 let cfg = FraudBehavioralBiasConfig::default();
2704 if cfg.enabled {
2705 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2706 let mut swept = 0usize;
2707 for entry in entries.iter_mut() {
2708 if entry.header.is_fraud && !entry.header.is_anomaly {
2709 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2710 swept += 1;
2711 }
2712 }
2713 if swept > 0 {
2714 info!(
2715 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2716 (doc-propagated + je_generator intrinsic fraud)"
2717 );
2718 }
2719 }
2720 }
2721
2722 self.emit_phase_items(
2724 "anomaly_injection",
2725 "LabeledAnomaly",
2726 &anomaly_labels.labels,
2727 );
2728
2729 if self.config.fraud.propagate_to_document {
2737 use std::collections::HashMap;
2738 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2751 for je in &entries {
2752 if je.header.is_fraud {
2753 if let Some(ref fraud_type) = je.header.fraud_type {
2754 if let Some(ref reference) = je.header.reference {
2755 fraud_map.insert(reference.clone(), *fraud_type);
2757 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2760 if !bare.is_empty() {
2761 fraud_map.insert(bare.to_string(), *fraud_type);
2762 }
2763 }
2764 }
2765 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2767 }
2768 }
2769 }
2770 if !fraud_map.is_empty() {
2771 let mut propagated = 0usize;
2772 macro_rules! propagate_to {
2774 ($collection:expr) => {
2775 for doc in &mut $collection {
2776 if doc.header.propagate_fraud(&fraud_map) {
2777 propagated += 1;
2778 }
2779 }
2780 };
2781 }
2782 propagate_to!(document_flows.purchase_orders);
2783 propagate_to!(document_flows.goods_receipts);
2784 propagate_to!(document_flows.vendor_invoices);
2785 propagate_to!(document_flows.payments);
2786 propagate_to!(document_flows.sales_orders);
2787 propagate_to!(document_flows.deliveries);
2788 propagate_to!(document_flows.customer_invoices);
2789 if propagated > 0 {
2790 info!(
2791 "Propagated fraud labels to {} document flow records",
2792 propagated
2793 );
2794 }
2795 }
2796 }
2797
2798 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2800
2801 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2803
2804 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2806
2807 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2809
2810 let balance_validation = self.phase_balance_validation(&entries)?;
2812
2813 let subledger_reconciliation =
2815 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2816
2817 let (data_quality_stats, quality_issues) =
2819 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2820
2821 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2823
2824 {
2826 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2831 for je in &entries {
2832 if je.header.is_fraud || je.header.is_anomaly {
2833 continue;
2834 }
2835 let diff = (je.total_debit() - je.total_credit()).abs();
2836 if diff > tolerance {
2837 unbalanced_clean += 1;
2838 if unbalanced_clean <= 3 {
2839 warn!(
2840 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2841 je.header.document_id,
2842 je.total_debit(),
2843 je.total_credit(),
2844 diff
2845 );
2846 }
2847 }
2848 }
2849 if unbalanced_clean > 0 {
2850 return Err(datasynth_core::error::SynthError::generation(format!(
2851 "{} non-anomaly JEs are unbalanced (debits != credits). \
2852 First few logged above. Tolerance={}",
2853 unbalanced_clean, tolerance
2854 )));
2855 }
2856 debug!(
2857 "Phase 10c: All {} non-anomaly JEs individually balanced",
2858 entries
2859 .iter()
2860 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2861 .count()
2862 );
2863
2864 let company_codes: Vec<String> = self
2866 .config
2867 .companies
2868 .iter()
2869 .map(|c| c.code.clone())
2870 .collect();
2871 for company_code in &company_codes {
2872 let mut assets = rust_decimal::Decimal::ZERO;
2873 let mut liab_equity = rust_decimal::Decimal::ZERO;
2874
2875 for entry in &entries {
2876 if entry.header.company_code != *company_code {
2877 continue;
2878 }
2879 for line in &entry.lines {
2880 let acct = &line.gl_account;
2881 let net = line.debit_amount - line.credit_amount;
2882 if acct.starts_with('1') {
2884 assets += net;
2885 }
2886 else if acct.starts_with('2') || acct.starts_with('3') {
2888 liab_equity -= net; }
2890 }
2893 }
2894
2895 let bs_diff = (assets - liab_equity).abs();
2896 if bs_diff > tolerance {
2897 warn!(
2898 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2899 revenue/expense closing entries may not fully offset",
2900 company_code, assets, liab_equity, bs_diff
2901 );
2902 } else {
2906 debug!(
2907 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2908 company_code, assets, liab_equity, bs_diff
2909 );
2910 }
2911 }
2912
2913 info!("Phase 10c: All generation-time accounting assertions passed");
2914 }
2915
2916 let audit = self.phase_audit_data(&entries, &mut stats)?;
2918
2919 let mut banking = self.phase_banking_data(&mut stats)?;
2921
2922 if self.phase_config.generate_banking
2927 && !document_flows.payments.is_empty()
2928 && !banking.accounts.is_empty()
2929 {
2930 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2931 if bridge_rate > 0.0 {
2932 let mut bridge =
2933 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2934 self.seed,
2935 );
2936 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2937 &document_flows.payments,
2938 &banking.customers,
2939 &banking.accounts,
2940 bridge_rate,
2941 );
2942 info!(
2943 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2944 bridge_stats.bridged_count,
2945 bridge_stats.transactions_emitted,
2946 bridge_stats.fraud_propagated,
2947 );
2948 let bridged_count = bridged_txns.len();
2949 banking.transactions.extend(bridged_txns);
2950
2951 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2954 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2955 &mut banking.transactions,
2956 );
2957 }
2958
2959 banking.suspicious_count = banking
2961 .transactions
2962 .iter()
2963 .filter(|t| t.is_suspicious)
2964 .count();
2965 stats.banking_transaction_count = banking.transactions.len();
2966 stats.banking_suspicious_count = banking.suspicious_count;
2967 }
2968 }
2969
2970 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2972
2973 self.phase_llm_enrichment(&mut stats);
2975
2976 self.phase_diffusion_enhancement(&mut stats);
2978
2979 self.phase_causal_overlay(&mut stats);
2981
2982 let mut financial_reporting = self.phase_financial_reporting(
2986 &document_flows,
2987 &entries,
2988 &coa,
2989 &hr,
2990 &audit,
2991 &mut stats,
2992 )?;
2993
2994 {
2996 use datasynth_core::models::StatementType;
2997 for stmt in &financial_reporting.consolidated_statements {
2998 if stmt.statement_type == StatementType::BalanceSheet {
2999 let total_assets: rust_decimal::Decimal = stmt
3000 .line_items
3001 .iter()
3002 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3003 .map(|li| li.amount)
3004 .sum();
3005 let total_le: rust_decimal::Decimal = stmt
3006 .line_items
3007 .iter()
3008 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3009 .map(|li| li.amount)
3010 .sum();
3011 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3012 warn!(
3013 "BS equation imbalance: assets={}, L+E={}",
3014 total_assets, total_le
3015 );
3016 }
3017 }
3018 }
3019 }
3020
3021 let accounting_standards =
3023 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3024
3025 if !accounting_standards.ecl_journal_entries.is_empty() {
3027 debug!(
3028 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3029 accounting_standards.ecl_journal_entries.len()
3030 );
3031 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3032 }
3033
3034 if !accounting_standards.provision_journal_entries.is_empty() {
3036 debug!(
3037 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3038 accounting_standards.provision_journal_entries.len()
3039 );
3040 entries.extend(
3041 accounting_standards
3042 .provision_journal_entries
3043 .iter()
3044 .cloned(),
3045 );
3046 }
3047
3048 let mut ocpm = self.phase_ocpm_events(
3050 &document_flows,
3051 &sourcing,
3052 &hr,
3053 &manufacturing_snap,
3054 &banking,
3055 &audit,
3056 &financial_reporting,
3057 &mut stats,
3058 )?;
3059
3060 if let Some(ref event_log) = ocpm.event_log {
3062 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3063 }
3064
3065 if let Some(ref event_log) = ocpm.event_log {
3067 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3069 std::collections::HashMap::new();
3070 for (idx, event) in event_log.events.iter().enumerate() {
3071 if let Some(ref doc_ref) = event.document_ref {
3072 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3073 }
3074 }
3075
3076 if !doc_index.is_empty() {
3077 let mut annotated = 0usize;
3078 for entry in &mut entries {
3079 let doc_id_str = entry.header.document_id.to_string();
3080 let mut matched_indices: Vec<usize> = Vec::new();
3082 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3083 matched_indices.extend(indices);
3084 }
3085 if let Some(ref reference) = entry.header.reference {
3086 let bare_ref = reference
3087 .find(':')
3088 .map(|i| &reference[i + 1..])
3089 .unwrap_or(reference.as_str());
3090 if let Some(indices) = doc_index.get(bare_ref) {
3091 for &idx in indices {
3092 if !matched_indices.contains(&idx) {
3093 matched_indices.push(idx);
3094 }
3095 }
3096 }
3097 }
3098 if !matched_indices.is_empty() {
3100 for &idx in &matched_indices {
3101 let event = &event_log.events[idx];
3102 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3103 entry.header.ocpm_event_ids.push(event.event_id);
3104 }
3105 for obj_ref in &event.object_refs {
3106 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3107 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3108 }
3109 }
3110 if entry.header.ocpm_case_id.is_none() {
3111 entry.header.ocpm_case_id = event.case_id;
3112 }
3113 }
3114 annotated += 1;
3115 }
3116 }
3117 debug!(
3118 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3119 annotated
3120 );
3121 }
3122 }
3123
3124 if let Some(ref mut event_log) = ocpm.event_log {
3128 let synthesized =
3129 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3130 if synthesized > 0 {
3131 info!(
3132 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3133 );
3134 }
3135
3136 let anomaly_events =
3141 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3142 if anomaly_events > 0 {
3143 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3144 }
3145
3146 let p2p_cfg = &self.config.ocpm.p2p_process;
3151 let any_imperfection = p2p_cfg.rework_probability > 0.0
3152 || p2p_cfg.skip_step_probability > 0.0
3153 || p2p_cfg.out_of_order_probability > 0.0;
3154 if any_imperfection {
3155 use rand_chacha::rand_core::SeedableRng;
3156 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3157 rework_rate: p2p_cfg.rework_probability,
3158 skip_rate: p2p_cfg.skip_step_probability,
3159 out_of_order_rate: p2p_cfg.out_of_order_probability,
3160 };
3161 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3162 let stats =
3163 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3164 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3165 info!(
3166 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3167 stats.rework, stats.skipped, stats.out_of_order
3168 );
3169 }
3170 }
3171 }
3172
3173 let sales_kpi_budgets =
3175 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3176
3177 let treasury =
3181 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3182
3183 if !treasury.journal_entries.is_empty() {
3185 debug!(
3186 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3187 treasury.journal_entries.len()
3188 );
3189 entries.extend(treasury.journal_entries.iter().cloned());
3190 }
3191
3192 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3194
3195 if !tax.tax_posting_journal_entries.is_empty() {
3197 debug!(
3198 "Merging {} tax posting JEs into GL",
3199 tax.tax_posting_journal_entries.len()
3200 );
3201 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3202 }
3203
3204 {
3222 use datasynth_core::fraud_bias::{
3223 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3224 };
3225 use rand_chacha::rand_core::SeedableRng;
3226 let cfg = FraudBehavioralBiasConfig::default();
3227 if cfg.enabled {
3228 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3229 let mut swept = 0usize;
3230 for entry in entries.iter_mut() {
3231 if entry.header.is_fraud && !entry.header.is_anomaly {
3232 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3233 swept += 1;
3234 }
3235 }
3236 if swept > 0 {
3237 info!(
3238 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3239 non-anomaly fraud entries (covers late-added JEs from \
3240 ECL / provisions / treasury / tax / period-close)"
3241 );
3242 }
3243 }
3244 }
3245
3246 {
3250 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3251
3252 let framework_str = {
3253 use datasynth_config::schema::AccountingFrameworkConfig;
3254 match self
3255 .config
3256 .accounting_standards
3257 .framework
3258 .unwrap_or_default()
3259 {
3260 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3261 "IFRS"
3262 }
3263 _ => "US_GAAP",
3264 }
3265 };
3266
3267 let depreciation_total: rust_decimal::Decimal = entries
3269 .iter()
3270 .filter(|je| je.header.document_type == "CL")
3271 .flat_map(|je| je.lines.iter())
3272 .filter(|l| l.gl_account.starts_with("6000"))
3273 .map(|l| l.debit_amount)
3274 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3275
3276 let interest_paid: rust_decimal::Decimal = entries
3278 .iter()
3279 .flat_map(|je| je.lines.iter())
3280 .filter(|l| l.gl_account.starts_with("7100"))
3281 .map(|l| l.debit_amount)
3282 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3283
3284 let tax_paid: rust_decimal::Decimal = entries
3286 .iter()
3287 .flat_map(|je| je.lines.iter())
3288 .filter(|l| l.gl_account.starts_with("8000"))
3289 .map(|l| l.debit_amount)
3290 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3291
3292 let capex: rust_decimal::Decimal = entries
3294 .iter()
3295 .flat_map(|je| je.lines.iter())
3296 .filter(|l| l.gl_account.starts_with("1500"))
3297 .map(|l| l.debit_amount)
3298 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3299
3300 let dividends_paid: rust_decimal::Decimal = entries
3302 .iter()
3303 .flat_map(|je| je.lines.iter())
3304 .filter(|l| l.gl_account == "2170")
3305 .map(|l| l.debit_amount)
3306 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3307
3308 let cf_data = CashFlowSourceData {
3309 depreciation_total,
3310 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3312 delta_ap: rust_decimal::Decimal::ZERO,
3313 delta_inventory: rust_decimal::Decimal::ZERO,
3314 capex,
3315 debt_issuance: rust_decimal::Decimal::ZERO,
3316 debt_repayment: rust_decimal::Decimal::ZERO,
3317 interest_paid,
3318 tax_paid,
3319 dividends_paid,
3320 framework: framework_str.to_string(),
3321 };
3322
3323 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3324 if !enhanced_cf_items.is_empty() {
3325 use datasynth_core::models::StatementType;
3327 let merge_count = enhanced_cf_items.len();
3328 for stmt in financial_reporting
3329 .financial_statements
3330 .iter_mut()
3331 .chain(financial_reporting.consolidated_statements.iter_mut())
3332 .chain(
3333 financial_reporting
3334 .standalone_statements
3335 .values_mut()
3336 .flat_map(|v| v.iter_mut()),
3337 )
3338 {
3339 if stmt.statement_type == StatementType::CashFlowStatement {
3340 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3341 }
3342 }
3343 info!(
3344 "Enhanced cash flow: {} supplementary items merged into CF statements",
3345 merge_count
3346 );
3347 }
3348 }
3349
3350 self.generate_notes_to_financial_statements(
3353 &mut financial_reporting,
3354 &accounting_standards,
3355 &tax,
3356 &hr,
3357 &audit,
3358 &treasury,
3359 );
3360
3361 if self.config.companies.len() >= 2 && !entries.is_empty() {
3365 let companies: Vec<(String, String)> = self
3366 .config
3367 .companies
3368 .iter()
3369 .map(|c| (c.code.clone(), c.name.clone()))
3370 .collect();
3371 let ic_elim: rust_decimal::Decimal =
3372 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3373 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3374 .unwrap_or(NaiveDate::MIN);
3375 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3376 let period_label = format!(
3377 "{}-{:02}",
3378 end_date.year(),
3379 (end_date - chrono::Days::new(1)).month()
3380 );
3381
3382 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3383 let (je_segments, je_recon) =
3384 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3385 if !je_segments.is_empty() {
3386 info!(
3387 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3388 je_segments.len(),
3389 ic_elim,
3390 );
3391 if financial_reporting.segment_reports.is_empty() {
3393 financial_reporting.segment_reports = je_segments;
3394 financial_reporting.segment_reconciliations = vec![je_recon];
3395 } else {
3396 financial_reporting.segment_reports.extend(je_segments);
3397 financial_reporting.segment_reconciliations.push(je_recon);
3398 }
3399 }
3400 }
3401
3402 let esg_snap =
3404 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3405
3406 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3408
3409 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3411
3412 let disruption_events = self.phase_disruption_events(&mut stats)?;
3414
3415 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3417
3418 let (entity_relationship_graph, cross_process_links) =
3420 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3421
3422 let industry_output = self.phase_industry_data(&mut stats);
3424
3425 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3427
3428 if self.config.diffusion.enabled
3446 && (self.config.diffusion.backend == "neural"
3447 || self.config.diffusion.backend == "hybrid")
3448 {
3449 let neural = &self.config.diffusion.neural;
3450 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3451 stats.neural_hybrid_weight = Some(weight);
3452 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3453 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3454 warn!(
3455 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3456 the neural/hybrid training path is not yet shipped. Config \
3457 is captured in stats (weight={weight:.2}, strategy={}, \
3458 columns={}) but no neural training runs. Statistical \
3459 diffusion (backend='statistical') continues to work.",
3460 self.config.diffusion.backend,
3461 neural.hybrid_strategy,
3462 neural.neural_columns.len(),
3463 );
3464 }
3465
3466 self.phase_hypergraph_export(
3468 &coa,
3469 &entries,
3470 &document_flows,
3471 &sourcing,
3472 &hr,
3473 &manufacturing_snap,
3474 &banking,
3475 &audit,
3476 &financial_reporting,
3477 &ocpm,
3478 &compliance_regulations,
3479 &mut stats,
3480 )?;
3481
3482 if self.phase_config.generate_graph_export {
3485 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3486 }
3487
3488 if self.config.streaming.enabled {
3490 info!("Note: streaming config is enabled but batch mode does not use it");
3491 }
3492 if self.config.vendor_network.enabled {
3493 debug!("Vendor network config available; relationship graph generation is partial");
3494 }
3495 if self.config.customer_segmentation.enabled {
3496 debug!("Customer segmentation config available; segment-aware generation is partial");
3497 }
3498
3499 let resource_stats = self.resource_guard.stats();
3501 info!(
3502 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3503 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3504 resource_stats.disk.estimated_bytes_written,
3505 resource_stats.degradation_level
3506 );
3507
3508 if let Some(ref sink) = self.phase_sink {
3510 if let Err(e) = sink.flush() {
3511 warn!("Stream sink flush failed: {e}");
3512 }
3513 }
3514
3515 let lineage = self.build_lineage_graph();
3517
3518 let gate_result = if self.config.quality_gates.enabled {
3520 let profile_name = &self.config.quality_gates.profile;
3521 match datasynth_eval::gates::get_profile(profile_name) {
3522 Some(profile) => {
3523 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3525
3526 if balance_validation.validated {
3528 eval.coherence.balance =
3529 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3530 equation_balanced: balance_validation.is_balanced,
3531 max_imbalance: (balance_validation.total_debits
3532 - balance_validation.total_credits)
3533 .abs(),
3534 periods_evaluated: 1,
3535 periods_imbalanced: if balance_validation.is_balanced {
3536 0
3537 } else {
3538 1
3539 },
3540 period_results: Vec::new(),
3541 companies_evaluated: self.config.companies.len(),
3542 });
3543 }
3544
3545 eval.coherence.passes = balance_validation.is_balanced;
3547 if !balance_validation.is_balanced {
3548 eval.coherence
3549 .failures
3550 .push("Balance sheet equation not satisfied".to_string());
3551 }
3552
3553 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3555 eval.statistical.passes = !entries.is_empty();
3556
3557 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3560
3561 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3562 info!(
3563 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3564 profile_name, result.gates_passed, result.gates_total, result.summary
3565 );
3566 Some(result)
3567 }
3568 None => {
3569 warn!(
3570 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3571 profile_name
3572 );
3573 None
3574 }
3575 }
3576 } else {
3577 None
3578 };
3579
3580 let internal_controls = if self.config.internal_controls.enabled {
3582 InternalControl::standard_controls()
3583 } else {
3584 Vec::new()
3585 };
3586
3587 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3591
3592 let statistical_validation = self.phase_statistical_validation(&entries)?;
3597
3598 let interconnectivity = self.phase_interconnectivity();
3602
3603 Ok(EnhancedGenerationResult {
3604 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3605 master_data: std::mem::take(&mut self.master_data),
3606 document_flows,
3607 subledger,
3608 ocpm,
3609 audit,
3610 banking,
3611 graph_export,
3612 sourcing,
3613 financial_reporting,
3614 hr,
3615 accounting_standards,
3616 manufacturing: manufacturing_snap,
3617 sales_kpi_budgets,
3618 tax,
3619 esg: esg_snap,
3620 treasury,
3621 project_accounting,
3622 process_evolution,
3623 organizational_events,
3624 disruption_events,
3625 intercompany,
3626 journal_entries: entries,
3627 anomaly_labels,
3628 balance_validation,
3629 data_quality_stats,
3630 quality_issues,
3631 statistics: stats,
3632 lineage: Some(lineage),
3633 gate_result,
3634 internal_controls,
3635 sod_violations,
3636 opening_balances,
3637 subledger_reconciliation,
3638 counterfactual_pairs,
3639 red_flags,
3640 collusion_rings,
3641 temporal_vendor_chains,
3642 entity_relationship_graph,
3643 cross_process_links,
3644 industry_output,
3645 compliance_regulations,
3646 analytics_metadata,
3647 statistical_validation,
3648 interconnectivity,
3649 })
3650 }
3651
3652 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3656 use rand::{RngExt, SeedableRng};
3657 use rand_chacha::ChaCha8Rng;
3658
3659 let mut snap = InterconnectivitySnapshot::default();
3660 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3661
3662 let vn = &self.config.vendor_network;
3664 if vn.enabled {
3665 let total = self.master_data.vendors.len();
3666 if total > 0 {
3667 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3668 let remaining_after_t1 = total.saturating_sub(tier1_count);
3669 let depth = vn.depth.clamp(1, 3);
3670 let tier2_count = if depth >= 2 {
3671 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3672 (tier1_count * avg).min(remaining_after_t1)
3673 } else {
3674 0
3675 };
3676 let tier3_count = total
3677 .saturating_sub(tier1_count)
3678 .saturating_sub(tier2_count);
3679
3680 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3681 let tier = if idx < tier1_count {
3682 1
3683 } else if idx < tier1_count + tier2_count {
3684 2
3685 } else {
3686 3
3687 };
3688 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3689
3690 let cl = &vn.clusters;
3692 let roll: f64 = rng.random();
3693 let cluster = if roll < cl.reliable_strategic {
3694 "reliable_strategic"
3695 } else if roll < cl.reliable_strategic + cl.standard_operational {
3696 "standard_operational"
3697 } else if roll
3698 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3699 {
3700 "transactional"
3701 } else {
3702 "problematic"
3703 };
3704 snap.vendor_clusters
3705 .push((vendor.vendor_id.clone(), cluster.to_string()));
3706 }
3707 let _ = tier3_count; }
3709 }
3710
3711 let cs = &self.config.customer_segmentation;
3713 if cs.enabled {
3714 let seg = &cs.value_segments;
3715 for customer in &self.master_data.customers {
3716 let roll: f64 = rng.random();
3717 let value_segment = if roll < seg.enterprise.customer_share {
3718 "enterprise"
3719 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3720 "mid_market"
3721 } else if roll
3722 < seg.enterprise.customer_share
3723 + seg.mid_market.customer_share
3724 + seg.smb.customer_share
3725 {
3726 "smb"
3727 } else {
3728 "consumer"
3729 };
3730 snap.customer_value_segments
3731 .push((customer.customer_id.clone(), value_segment.to_string()));
3732
3733 let roll2: f64 = rng.random();
3734 let life = &cs.lifecycle;
3735 let lifecycle = if roll2 < life.prospect_rate {
3736 "prospect"
3737 } else if roll2 < life.prospect_rate + life.new_rate {
3738 "new"
3739 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3740 "growth"
3741 } else if roll2
3742 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3743 {
3744 "mature"
3745 } else if roll2
3746 < life.prospect_rate
3747 + life.new_rate
3748 + life.growth_rate
3749 + life.mature_rate
3750 + life.at_risk_rate
3751 {
3752 "at_risk"
3753 } else if roll2
3754 < life.prospect_rate
3755 + life.new_rate
3756 + life.growth_rate
3757 + life.mature_rate
3758 + life.at_risk_rate
3759 + life.churned_rate
3760 {
3761 "churned"
3762 } else {
3763 "won_back"
3764 };
3765 snap.customer_lifecycle_stages
3766 .push((customer.customer_id.clone(), lifecycle.to_string()));
3767 }
3768 }
3769
3770 let is = &self.config.industry_specific;
3772 if is.enabled {
3773 snap.industry_metadata.push(format!(
3774 "industry_specific.enabled=true (industry={:?})",
3775 self.config.global.industry
3776 ));
3777 }
3778
3779 snap
3780 }
3781
3782 fn phase_chart_of_accounts(
3788 &mut self,
3789 stats: &mut EnhancedGenerationStatistics,
3790 ) -> SynthResult<Arc<ChartOfAccounts>> {
3791 info!("Phase 1: Generating Chart of Accounts");
3792 let coa = self.generate_coa()?;
3793 stats.accounts_count = coa.account_count();
3794 info!(
3795 "Chart of Accounts generated: {} accounts",
3796 stats.accounts_count
3797 );
3798 self.check_resources_with_log("post-coa")?;
3799 Ok(coa)
3800 }
3801
3802 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3804 if self.phase_config.generate_master_data {
3805 info!("Phase 2: Generating Master Data");
3806 self.generate_master_data()?;
3807 stats.vendor_count = self.master_data.vendors.len();
3808 stats.customer_count = self.master_data.customers.len();
3809 stats.material_count = self.master_data.materials.len();
3810 stats.asset_count = self.master_data.assets.len();
3811 stats.employee_count = self.master_data.employees.len();
3812 info!(
3813 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3814 stats.vendor_count, stats.customer_count, stats.material_count,
3815 stats.asset_count, stats.employee_count
3816 );
3817 self.check_resources_with_log("post-master-data")?;
3818 } else {
3819 debug!("Phase 2: Skipped (master data generation disabled)");
3820 }
3821 Ok(())
3822 }
3823
3824 fn phase_document_flows(
3826 &mut self,
3827 stats: &mut EnhancedGenerationStatistics,
3828 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3829 let mut document_flows = DocumentFlowSnapshot::default();
3830 let mut subledger = SubledgerSnapshot::default();
3831 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3834
3835 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3836 info!("Phase 3: Generating Document Flows");
3837 self.generate_document_flows(&mut document_flows)?;
3838 stats.p2p_chain_count = document_flows.p2p_chains.len();
3839 stats.o2c_chain_count = document_flows.o2c_chains.len();
3840 info!(
3841 "Document flows generated: {} P2P chains, {} O2C chains",
3842 stats.p2p_chain_count, stats.o2c_chain_count
3843 );
3844
3845 debug!("Phase 3b: Linking document flows to subledgers");
3847 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3848 stats.ap_invoice_count = subledger.ap_invoices.len();
3849 stats.ar_invoice_count = subledger.ar_invoices.len();
3850 debug!(
3851 "Subledgers linked: {} AP invoices, {} AR invoices",
3852 stats.ap_invoice_count, stats.ar_invoice_count
3853 );
3854
3855 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3860 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3861 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3862 debug!("Payment settlements applied to AP and AR subledgers");
3863
3864 if let Ok(start_date) =
3867 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3868 {
3869 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3870 - chrono::Days::new(1);
3871 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3872 for company in &self.config.companies {
3879 let ar_report = ARAgingReport::from_invoices(
3880 company.code.clone(),
3881 &subledger.ar_invoices,
3882 as_of_date,
3883 );
3884 subledger.ar_aging_reports.push(ar_report);
3885
3886 let ap_report = APAgingReport::from_invoices(
3887 company.code.clone(),
3888 &subledger.ap_invoices,
3889 as_of_date,
3890 );
3891 subledger.ap_aging_reports.push(ap_report);
3892 }
3893 debug!(
3894 "AR/AP aging reports built: {} AR, {} AP",
3895 subledger.ar_aging_reports.len(),
3896 subledger.ap_aging_reports.len()
3897 );
3898
3899 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3901 {
3902 use datasynth_generators::DunningGenerator;
3903 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3904 for company in &self.config.companies {
3905 let currency = company.currency.as_str();
3906 let mut company_invoices: Vec<
3909 datasynth_core::models::subledger::ar::ARInvoice,
3910 > = subledger
3911 .ar_invoices
3912 .iter()
3913 .filter(|inv| inv.company_code == company.code)
3914 .cloned()
3915 .collect();
3916
3917 if company_invoices.is_empty() {
3918 continue;
3919 }
3920
3921 let result = dunning_gen.execute_dunning_run(
3922 &company.code,
3923 as_of_date,
3924 &mut company_invoices,
3925 currency,
3926 );
3927
3928 for updated in &company_invoices {
3930 if let Some(orig) = subledger
3931 .ar_invoices
3932 .iter_mut()
3933 .find(|i| i.invoice_number == updated.invoice_number)
3934 {
3935 orig.dunning_info = updated.dunning_info.clone();
3936 }
3937 }
3938
3939 subledger.dunning_runs.push(result.dunning_run);
3940 subledger.dunning_letters.extend(result.letters);
3941 dunning_journal_entries.extend(result.journal_entries);
3943 }
3944 debug!(
3945 "Dunning runs complete: {} runs, {} letters",
3946 subledger.dunning_runs.len(),
3947 subledger.dunning_letters.len()
3948 );
3949 }
3950 }
3951
3952 self.check_resources_with_log("post-document-flows")?;
3953 } else {
3954 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3955 }
3956
3957 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3959 if !self.master_data.assets.is_empty() {
3960 debug!("Generating FA subledger records");
3961 let company_code = self
3962 .config
3963 .companies
3964 .first()
3965 .map(|c| c.code.as_str())
3966 .unwrap_or("1000");
3967 let currency = self
3968 .config
3969 .companies
3970 .first()
3971 .map(|c| c.currency.as_str())
3972 .unwrap_or("USD");
3973
3974 let mut fa_gen = datasynth_generators::FAGenerator::new(
3975 datasynth_generators::FAGeneratorConfig::default(),
3976 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3977 );
3978
3979 for asset in &self.master_data.assets {
3980 let (record, je) = fa_gen.generate_asset_acquisition(
3981 company_code,
3982 &format!("{:?}", asset.asset_class),
3983 &asset.description,
3984 asset.acquisition_date,
3985 currency,
3986 asset.cost_center.as_deref(),
3987 );
3988 subledger.fa_records.push(record);
3989 fa_journal_entries.push(je);
3990 }
3991
3992 stats.fa_subledger_count = subledger.fa_records.len();
3993 debug!(
3994 "FA subledger records generated: {} (with {} acquisition JEs)",
3995 stats.fa_subledger_count,
3996 fa_journal_entries.len()
3997 );
3998 }
3999
4000 if !self.master_data.materials.is_empty() {
4002 debug!("Generating Inventory subledger records");
4003 let first_company = self.config.companies.first();
4004 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4005 let inv_currency = first_company
4006 .map(|c| c.currency.clone())
4007 .unwrap_or_else(|| "USD".to_string());
4008
4009 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4010 datasynth_generators::InventoryGeneratorConfig::default(),
4011 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4012 inv_currency.clone(),
4013 );
4014
4015 for (i, material) in self.master_data.materials.iter().enumerate() {
4016 let plant = format!("PLANT{:02}", (i % 3) + 1);
4017 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4018 let initial_qty = rust_decimal::Decimal::from(
4019 material
4020 .safety_stock
4021 .to_string()
4022 .parse::<i64>()
4023 .unwrap_or(100),
4024 );
4025
4026 let position = inv_gen.generate_position(
4027 company_code,
4028 &plant,
4029 &storage_loc,
4030 &material.material_id,
4031 &material.description,
4032 initial_qty,
4033 Some(material.standard_cost),
4034 &inv_currency,
4035 );
4036 subledger.inventory_positions.push(position);
4037 }
4038
4039 stats.inventory_subledger_count = subledger.inventory_positions.len();
4040 debug!(
4041 "Inventory subledger records generated: {}",
4042 stats.inventory_subledger_count
4043 );
4044 }
4045
4046 if !subledger.fa_records.is_empty() {
4048 if let Ok(start_date) =
4049 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4050 {
4051 let company_code = self
4052 .config
4053 .companies
4054 .first()
4055 .map(|c| c.code.as_str())
4056 .unwrap_or("1000");
4057 let fiscal_year = start_date.year();
4058 let start_period = start_date.month();
4059 let end_period =
4060 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4061
4062 let depr_cfg = FaDepreciationScheduleConfig {
4063 fiscal_year,
4064 start_period,
4065 end_period,
4066 seed_offset: 800,
4067 };
4068 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4069 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4070 let run_count = runs.len();
4071 subledger.depreciation_runs = runs;
4072 debug!(
4073 "Depreciation runs generated: {} runs for {} periods",
4074 run_count, self.config.global.period_months
4075 );
4076 }
4077 }
4078
4079 if !subledger.inventory_positions.is_empty() {
4081 if let Ok(start_date) =
4082 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4083 {
4084 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4085 - chrono::Days::new(1);
4086
4087 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4088 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4089
4090 for company in &self.config.companies {
4091 let result = inv_val_gen.generate(
4092 &company.code,
4093 &subledger.inventory_positions,
4094 as_of_date,
4095 );
4096 subledger.inventory_valuations.push(result);
4097 }
4098 debug!(
4099 "Inventory valuations generated: {} company reports",
4100 subledger.inventory_valuations.len()
4101 );
4102 }
4103 }
4104
4105 Ok((document_flows, subledger, fa_journal_entries))
4106 }
4107
4108 #[allow(clippy::too_many_arguments)]
4110 fn phase_ocpm_events(
4111 &mut self,
4112 document_flows: &DocumentFlowSnapshot,
4113 sourcing: &SourcingSnapshot,
4114 hr: &HrSnapshot,
4115 manufacturing: &ManufacturingSnapshot,
4116 banking: &BankingSnapshot,
4117 audit: &AuditSnapshot,
4118 financial_reporting: &FinancialReportingSnapshot,
4119 stats: &mut EnhancedGenerationStatistics,
4120 ) -> SynthResult<OcpmSnapshot> {
4121 let degradation = self.check_resources()?;
4122 if degradation >= DegradationLevel::Reduced {
4123 debug!(
4124 "Phase skipped due to resource pressure (degradation: {:?})",
4125 degradation
4126 );
4127 return Ok(OcpmSnapshot::default());
4128 }
4129 if self.phase_config.generate_ocpm_events {
4130 info!("Phase 3c: Generating OCPM Events");
4131 let ocpm_snapshot = self.generate_ocpm_events(
4132 document_flows,
4133 sourcing,
4134 hr,
4135 manufacturing,
4136 banking,
4137 audit,
4138 financial_reporting,
4139 )?;
4140 stats.ocpm_event_count = ocpm_snapshot.event_count;
4141 stats.ocpm_object_count = ocpm_snapshot.object_count;
4142 stats.ocpm_case_count = ocpm_snapshot.case_count;
4143 info!(
4144 "OCPM events generated: {} events, {} objects, {} cases",
4145 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4146 );
4147 self.check_resources_with_log("post-ocpm")?;
4148 Ok(ocpm_snapshot)
4149 } else {
4150 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4151 Ok(OcpmSnapshot::default())
4152 }
4153 }
4154
4155 fn phase_journal_entries(
4157 &mut self,
4158 coa: &Arc<ChartOfAccounts>,
4159 document_flows: &DocumentFlowSnapshot,
4160 _stats: &mut EnhancedGenerationStatistics,
4161 ) -> SynthResult<Vec<JournalEntry>> {
4162 let mut entries = Vec::new();
4163
4164 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4166 debug!("Phase 4a: Generating JEs from document flows");
4167 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4168 debug!("Generated {} JEs from document flows", flow_entries.len());
4169 entries.extend(flow_entries);
4170 }
4171
4172 if self.phase_config.generate_journal_entries {
4174 info!("Phase 4: Generating Journal Entries");
4175 let je_entries = self.generate_journal_entries(coa)?;
4176 info!("Generated {} standalone journal entries", je_entries.len());
4177 entries.extend(je_entries);
4178 } else {
4179 debug!("Phase 4: Skipped (journal entry generation disabled)");
4180 }
4181
4182 if !entries.is_empty() {
4183 self.check_resources_with_log("post-journal-entries")?;
4186 }
4187
4188 Ok(entries)
4189 }
4190
4191 fn phase_anomaly_injection(
4193 &mut self,
4194 entries: &mut [JournalEntry],
4195 actions: &DegradationActions,
4196 stats: &mut EnhancedGenerationStatistics,
4197 ) -> SynthResult<AnomalyLabels> {
4198 if self.phase_config.inject_anomalies
4199 && !entries.is_empty()
4200 && !actions.skip_anomaly_injection
4201 {
4202 info!("Phase 5: Injecting Anomalies");
4203 let result = self.inject_anomalies(entries)?;
4204 stats.anomalies_injected = result.labels.len();
4205 info!("Injected {} anomalies", stats.anomalies_injected);
4206 self.check_resources_with_log("post-anomaly-injection")?;
4207 Ok(result)
4208 } else if actions.skip_anomaly_injection {
4209 warn!("Phase 5: Skipped due to resource degradation");
4210 Ok(AnomalyLabels::default())
4211 } else {
4212 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4213 Ok(AnomalyLabels::default())
4214 }
4215 }
4216
4217 fn phase_balance_validation(
4219 &mut self,
4220 entries: &[JournalEntry],
4221 ) -> SynthResult<BalanceValidationResult> {
4222 if self.phase_config.validate_balances && !entries.is_empty() {
4223 debug!("Phase 6: Validating Balances");
4224 let balance_validation = self.validate_journal_entries(entries)?;
4225 if balance_validation.is_balanced {
4226 debug!("Balance validation passed");
4227 } else {
4228 warn!(
4229 "Balance validation found {} errors",
4230 balance_validation.validation_errors.len()
4231 );
4232 }
4233 Ok(balance_validation)
4234 } else {
4235 Ok(BalanceValidationResult::default())
4236 }
4237 }
4238
4239 fn phase_data_quality_injection(
4241 &mut self,
4242 entries: &mut [JournalEntry],
4243 actions: &DegradationActions,
4244 stats: &mut EnhancedGenerationStatistics,
4245 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4246 if self.phase_config.inject_data_quality
4247 && !entries.is_empty()
4248 && !actions.skip_data_quality
4249 {
4250 info!("Phase 7: Injecting Data Quality Variations");
4251 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4252 stats.data_quality_issues = dq_stats.records_with_issues;
4253 info!("Injected {} data quality issues", stats.data_quality_issues);
4254 self.check_resources_with_log("post-data-quality")?;
4255 Ok((dq_stats, quality_issues))
4256 } else if actions.skip_data_quality {
4257 warn!("Phase 7: Skipped due to resource degradation");
4258 Ok((DataQualityStats::default(), Vec::new()))
4259 } else {
4260 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4261 Ok((DataQualityStats::default(), Vec::new()))
4262 }
4263 }
4264
4265 fn phase_period_close(
4275 &mut self,
4276 entries: &mut Vec<JournalEntry>,
4277 subledger: &SubledgerSnapshot,
4278 stats: &mut EnhancedGenerationStatistics,
4279 ) -> SynthResult<()> {
4280 if !self.phase_config.generate_period_close || entries.is_empty() {
4281 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4282 return Ok(());
4283 }
4284
4285 info!("Phase 10b: Generating period-close journal entries");
4286
4287 use datasynth_core::accounts::{
4288 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4289 };
4290 use rust_decimal::Decimal;
4291
4292 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4293 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4294 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4295 let close_date = end_date - chrono::Days::new(1);
4297
4298 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4303 .config
4304 .companies
4305 .iter()
4306 .map(|c| c.code.clone())
4307 .collect();
4308
4309 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4311 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4312
4313 let period_months = self.config.global.period_months;
4317 for asset in &subledger.fa_records {
4318 use datasynth_core::models::subledger::fa::AssetStatus;
4320 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4321 continue;
4322 }
4323 let useful_life_months = asset.useful_life_months();
4324 if useful_life_months == 0 {
4325 continue;
4327 }
4328 let salvage_value = asset.salvage_value();
4329 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4330 if depreciable_base == Decimal::ZERO {
4331 continue;
4332 }
4333 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4334 * Decimal::from(period_months))
4335 .round_dp(2);
4336 if period_depr <= Decimal::ZERO {
4337 continue;
4338 }
4339
4340 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4341 depr_header.document_type = "CL".to_string();
4342 depr_header.header_text = Some(format!(
4343 "Depreciation - {} {}",
4344 asset.asset_number, asset.description
4345 ));
4346 depr_header.created_by = "CLOSE_ENGINE".to_string();
4347 depr_header.source = TransactionSource::Automated;
4348 depr_header.business_process = Some(BusinessProcess::R2R);
4349
4350 let doc_id = depr_header.document_id;
4351 let mut depr_je = JournalEntry::new(depr_header);
4352
4353 depr_je.add_line(JournalEntryLine::debit(
4355 doc_id,
4356 1,
4357 expense_accounts::DEPRECIATION.to_string(),
4358 period_depr,
4359 ));
4360 depr_je.add_line(JournalEntryLine::credit(
4362 doc_id,
4363 2,
4364 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4365 period_depr,
4366 ));
4367
4368 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4369 close_jes.push(depr_je);
4370 }
4371
4372 if !subledger.fa_records.is_empty() {
4373 debug!(
4374 "Generated {} depreciation JEs from {} FA records",
4375 close_jes.len(),
4376 subledger.fa_records.len()
4377 );
4378 }
4379
4380 {
4384 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4385 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4386 if let Some(ctx) = &self.temporal_context {
4389 accrual_gen.set_temporal_context(Arc::clone(ctx));
4390 }
4391
4392 let accrual_items: &[(&str, &str, &str)] = &[
4394 ("Accrued Utilities", "6200", "2100"),
4395 ("Accrued Rent", "6300", "2100"),
4396 ("Accrued Interest", "6100", "2150"),
4397 ];
4398
4399 for company_code in &company_codes {
4400 let company_revenue: Decimal = entries
4402 .iter()
4403 .filter(|e| e.header.company_code == *company_code)
4404 .flat_map(|e| e.lines.iter())
4405 .filter(|l| l.gl_account.starts_with('4'))
4406 .map(|l| l.credit_amount - l.debit_amount)
4407 .fold(Decimal::ZERO, |acc, v| acc + v);
4408
4409 if company_revenue <= Decimal::ZERO {
4410 continue;
4411 }
4412
4413 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4415 if accrual_base <= Decimal::ZERO {
4416 continue;
4417 }
4418
4419 for (description, expense_acct, liability_acct) in accrual_items {
4420 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4421 company_code,
4422 description,
4423 accrual_base,
4424 expense_acct,
4425 liability_acct,
4426 close_date,
4427 None,
4428 );
4429 close_jes.push(accrual_je);
4430 if let Some(rev_je) = reversal_je {
4431 close_jes.push(rev_je);
4432 }
4433 }
4434 }
4435
4436 debug!(
4437 "Generated accrual entries for {} companies",
4438 company_codes.len()
4439 );
4440 }
4441
4442 for company_code in &company_codes {
4443 let mut total_revenue = Decimal::ZERO;
4448 let mut total_expenses = Decimal::ZERO;
4449
4450 for entry in entries.iter() {
4451 if entry.header.company_code != *company_code {
4452 continue;
4453 }
4454 for line in &entry.lines {
4455 let category = AccountCategory::from_account(&line.gl_account);
4456 match category {
4457 AccountCategory::Revenue => {
4458 total_revenue += line.credit_amount - line.debit_amount;
4460 }
4461 AccountCategory::Cogs
4462 | AccountCategory::OperatingExpense
4463 | AccountCategory::OtherIncomeExpense
4464 | AccountCategory::Tax => {
4465 total_expenses += line.debit_amount - line.credit_amount;
4467 }
4468 _ => {}
4469 }
4470 }
4471 }
4472
4473 let pre_tax_income = total_revenue - total_expenses;
4474
4475 if pre_tax_income == Decimal::ZERO {
4477 debug!(
4478 "Company {}: no pre-tax income, skipping period close",
4479 company_code
4480 );
4481 continue;
4482 }
4483
4484 if pre_tax_income > Decimal::ZERO {
4486 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4488
4489 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4490 tax_header.document_type = "CL".to_string();
4491 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4492 tax_header.created_by = "CLOSE_ENGINE".to_string();
4493 tax_header.source = TransactionSource::Automated;
4494 tax_header.business_process = Some(BusinessProcess::R2R);
4495
4496 let doc_id = tax_header.document_id;
4497 let mut tax_je = JournalEntry::new(tax_header);
4498
4499 tax_je.add_line(JournalEntryLine::debit(
4501 doc_id,
4502 1,
4503 tax_accounts::TAX_EXPENSE.to_string(),
4504 tax_amount,
4505 ));
4506 tax_je.add_line(JournalEntryLine::credit(
4508 doc_id,
4509 2,
4510 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4511 tax_amount,
4512 ));
4513
4514 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4515 close_jes.push(tax_je);
4516 } else {
4517 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4520 if dta_amount > Decimal::ZERO {
4521 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4522 dta_header.document_type = "CL".to_string();
4523 dta_header.header_text =
4524 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4525 dta_header.created_by = "CLOSE_ENGINE".to_string();
4526 dta_header.source = TransactionSource::Automated;
4527 dta_header.business_process = Some(BusinessProcess::R2R);
4528
4529 let doc_id = dta_header.document_id;
4530 let mut dta_je = JournalEntry::new(dta_header);
4531
4532 dta_je.add_line(JournalEntryLine::debit(
4534 doc_id,
4535 1,
4536 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4537 dta_amount,
4538 ));
4539 dta_je.add_line(JournalEntryLine::credit(
4542 doc_id,
4543 2,
4544 tax_accounts::TAX_EXPENSE.to_string(),
4545 dta_amount,
4546 ));
4547
4548 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4549 close_jes.push(dta_je);
4550 debug!(
4551 "Company {}: loss year — recognised DTA of {}",
4552 company_code, dta_amount
4553 );
4554 }
4555 }
4556
4557 let tax_provision = if pre_tax_income > Decimal::ZERO {
4563 (pre_tax_income * tax_rate).round_dp(2)
4564 } else {
4565 Decimal::ZERO
4566 };
4567 let net_income = pre_tax_income - tax_provision;
4568
4569 if net_income > Decimal::ZERO {
4570 use datasynth_generators::DividendGenerator;
4571 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4573 let currency_str = self
4574 .config
4575 .companies
4576 .iter()
4577 .find(|c| c.code == *company_code)
4578 .map(|c| c.currency.as_str())
4579 .unwrap_or("USD");
4580 let div_result = div_gen.generate(
4581 company_code,
4582 close_date,
4583 Decimal::new(1, 0), dividend_amount,
4585 currency_str,
4586 );
4587 let div_je_count = div_result.journal_entries.len();
4588 close_jes.extend(div_result.journal_entries);
4589 debug!(
4590 "Company {}: declared dividend of {} ({} JEs)",
4591 company_code, dividend_amount, div_je_count
4592 );
4593 }
4594
4595 if net_income != Decimal::ZERO {
4600 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4601 close_header.document_type = "CL".to_string();
4602 close_header.header_text =
4603 Some(format!("Income statement close - {}", company_code));
4604 close_header.created_by = "CLOSE_ENGINE".to_string();
4605 close_header.source = TransactionSource::Automated;
4606 close_header.business_process = Some(BusinessProcess::R2R);
4607
4608 let doc_id = close_header.document_id;
4609 let mut close_je = JournalEntry::new(close_header);
4610
4611 let abs_net_income = net_income.abs();
4612
4613 if net_income > Decimal::ZERO {
4614 close_je.add_line(JournalEntryLine::debit(
4616 doc_id,
4617 1,
4618 equity_accounts::INCOME_SUMMARY.to_string(),
4619 abs_net_income,
4620 ));
4621 close_je.add_line(JournalEntryLine::credit(
4622 doc_id,
4623 2,
4624 equity_accounts::RETAINED_EARNINGS.to_string(),
4625 abs_net_income,
4626 ));
4627 } else {
4628 close_je.add_line(JournalEntryLine::debit(
4630 doc_id,
4631 1,
4632 equity_accounts::RETAINED_EARNINGS.to_string(),
4633 abs_net_income,
4634 ));
4635 close_je.add_line(JournalEntryLine::credit(
4636 doc_id,
4637 2,
4638 equity_accounts::INCOME_SUMMARY.to_string(),
4639 abs_net_income,
4640 ));
4641 }
4642
4643 debug_assert!(
4644 close_je.is_balanced(),
4645 "Income statement closing JE must be balanced"
4646 );
4647 close_jes.push(close_je);
4648 }
4649 }
4650
4651 let close_count = close_jes.len();
4652 if close_count > 0 {
4653 info!("Generated {} period-close journal entries", close_count);
4654 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4655 entries.extend(close_jes);
4656 stats.period_close_je_count = close_count;
4657
4658 stats.total_entries = entries.len() as u64;
4660 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4661 } else {
4662 debug!("No period-close entries generated (no income statement activity)");
4663 }
4664
4665 Ok(())
4666 }
4667
4668 fn phase_audit_data(
4670 &mut self,
4671 entries: &[JournalEntry],
4672 stats: &mut EnhancedGenerationStatistics,
4673 ) -> SynthResult<AuditSnapshot> {
4674 if self.phase_config.generate_audit {
4675 info!("Phase 8: Generating Audit Data");
4676 let audit_snapshot = self.generate_audit_data(entries)?;
4677 stats.audit_engagement_count = audit_snapshot.engagements.len();
4678 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4679 stats.audit_evidence_count = audit_snapshot.evidence.len();
4680 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4681 stats.audit_finding_count = audit_snapshot.findings.len();
4682 stats.audit_judgment_count = audit_snapshot.judgments.len();
4683 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4684 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4685 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4686 stats.audit_sample_count = audit_snapshot.samples.len();
4687 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4688 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4689 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4690 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4691 stats.audit_related_party_transaction_count =
4692 audit_snapshot.related_party_transactions.len();
4693 info!(
4694 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4695 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4696 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4697 {} RP transactions",
4698 stats.audit_engagement_count,
4699 stats.audit_workpaper_count,
4700 stats.audit_evidence_count,
4701 stats.audit_risk_count,
4702 stats.audit_finding_count,
4703 stats.audit_judgment_count,
4704 stats.audit_confirmation_count,
4705 stats.audit_procedure_step_count,
4706 stats.audit_sample_count,
4707 stats.audit_analytical_result_count,
4708 stats.audit_ia_function_count,
4709 stats.audit_ia_report_count,
4710 stats.audit_related_party_count,
4711 stats.audit_related_party_transaction_count,
4712 );
4713 self.check_resources_with_log("post-audit")?;
4714 Ok(audit_snapshot)
4715 } else {
4716 debug!("Phase 8: Skipped (audit generation disabled)");
4717 Ok(AuditSnapshot::default())
4718 }
4719 }
4720
4721 fn phase_banking_data(
4723 &mut self,
4724 stats: &mut EnhancedGenerationStatistics,
4725 ) -> SynthResult<BankingSnapshot> {
4726 if self.phase_config.generate_banking {
4727 info!("Phase 9: Generating Banking KYC/AML Data");
4728 let banking_snapshot = self.generate_banking_data()?;
4729 stats.banking_customer_count = banking_snapshot.customers.len();
4730 stats.banking_account_count = banking_snapshot.accounts.len();
4731 stats.banking_transaction_count = banking_snapshot.transactions.len();
4732 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4733 info!(
4734 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4735 stats.banking_customer_count, stats.banking_account_count,
4736 stats.banking_transaction_count, stats.banking_suspicious_count
4737 );
4738 self.check_resources_with_log("post-banking")?;
4739 Ok(banking_snapshot)
4740 } else {
4741 debug!("Phase 9: Skipped (banking generation disabled)");
4742 Ok(BankingSnapshot::default())
4743 }
4744 }
4745
4746 fn phase_graph_export(
4748 &mut self,
4749 entries: &[JournalEntry],
4750 coa: &Arc<ChartOfAccounts>,
4751 stats: &mut EnhancedGenerationStatistics,
4752 ) -> SynthResult<GraphExportSnapshot> {
4753 if self.phase_config.generate_graph_export && !entries.is_empty() {
4754 info!("Phase 10: Exporting Accounting Network Graphs");
4755 match self.export_graphs(entries, coa, stats) {
4756 Ok(snapshot) => {
4757 info!(
4758 "Graph export complete: {} graphs ({} nodes, {} edges)",
4759 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4760 );
4761 Ok(snapshot)
4762 }
4763 Err(e) => {
4764 warn!("Phase 10: Graph export failed: {}", e);
4765 Ok(GraphExportSnapshot::default())
4766 }
4767 }
4768 } else {
4769 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4770 Ok(GraphExportSnapshot::default())
4771 }
4772 }
4773
4774 #[allow(clippy::too_many_arguments)]
4776 fn phase_hypergraph_export(
4777 &self,
4778 coa: &Arc<ChartOfAccounts>,
4779 entries: &[JournalEntry],
4780 document_flows: &DocumentFlowSnapshot,
4781 sourcing: &SourcingSnapshot,
4782 hr: &HrSnapshot,
4783 manufacturing: &ManufacturingSnapshot,
4784 banking: &BankingSnapshot,
4785 audit: &AuditSnapshot,
4786 financial_reporting: &FinancialReportingSnapshot,
4787 ocpm: &OcpmSnapshot,
4788 compliance: &ComplianceRegulationsSnapshot,
4789 stats: &mut EnhancedGenerationStatistics,
4790 ) -> SynthResult<()> {
4791 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4792 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4793 match self.export_hypergraph(
4794 coa,
4795 entries,
4796 document_flows,
4797 sourcing,
4798 hr,
4799 manufacturing,
4800 banking,
4801 audit,
4802 financial_reporting,
4803 ocpm,
4804 compliance,
4805 stats,
4806 ) {
4807 Ok(info) => {
4808 info!(
4809 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4810 info.node_count, info.edge_count, info.hyperedge_count
4811 );
4812 }
4813 Err(e) => {
4814 warn!("Phase 10b: Hypergraph export failed: {}", e);
4815 }
4816 }
4817 } else {
4818 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4819 }
4820 Ok(())
4821 }
4822
4823 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4829 if !self.config.llm.enabled {
4830 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4831 return;
4832 }
4833
4834 info!("Phase 11: Starting LLM Enrichment");
4835 let start = std::time::Instant::now();
4836
4837 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4838 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4841 let schema_provider = &self.config.llm.provider;
4842 let api_key_env = match schema_provider.as_str() {
4843 "openai" => Some("OPENAI_API_KEY"),
4844 "anthropic" => Some("ANTHROPIC_API_KEY"),
4845 "custom" => Some("LLM_API_KEY"),
4846 _ => None,
4847 };
4848 if let Some(key_env) = api_key_env {
4849 if std::env::var(key_env).is_ok() {
4850 let llm_config = datasynth_core::llm::LlmConfig {
4851 model: self.config.llm.model.clone(),
4852 api_key_env: key_env.to_string(),
4853 ..datasynth_core::llm::LlmConfig::default()
4854 };
4855 match HttpLlmProvider::new(llm_config) {
4856 Ok(p) => Arc::new(p),
4857 Err(e) => {
4858 warn!(
4859 "Failed to create HttpLlmProvider: {}; falling back to mock",
4860 e
4861 );
4862 Arc::new(MockLlmProvider::new(self.seed))
4863 }
4864 }
4865 } else {
4866 Arc::new(MockLlmProvider::new(self.seed))
4867 }
4868 } else {
4869 Arc::new(MockLlmProvider::new(self.seed))
4870 }
4871 };
4872 let industry = format!("{:?}", self.config.global.industry);
4876
4877 let vendor_enricher =
4878 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
4879 let max_vendors = self
4880 .config
4881 .llm
4882 .max_vendor_enrichments
4883 .min(self.master_data.vendors.len());
4884 let mut vendors_enriched = 0usize;
4885 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
4886 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4887 Ok(name) => {
4888 vendor.name = name;
4889 vendors_enriched += 1;
4890 }
4891 Err(e) => warn!(
4892 "LLM vendor enrichment failed for {}: {}",
4893 vendor.vendor_id, e
4894 ),
4895 }
4896 }
4897
4898 let mut customers_enriched = 0usize;
4899 if self.config.llm.enrich_customers {
4900 let customer_enricher =
4901 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
4902 &provider,
4903 ));
4904 let max_customers = self
4905 .config
4906 .llm
4907 .max_customer_enrichments
4908 .min(self.master_data.customers.len());
4909 for customer in self.master_data.customers.iter_mut().take(max_customers) {
4910 match customer_enricher.enrich_customer_name(
4911 &industry,
4912 "general",
4913 &customer.country,
4914 ) {
4915 Ok(name) => {
4916 customer.name = name;
4917 customers_enriched += 1;
4918 }
4919 Err(e) => warn!(
4920 "LLM customer enrichment failed for {}: {}",
4921 customer.customer_id, e
4922 ),
4923 }
4924 }
4925 }
4926
4927 let mut materials_enriched = 0usize;
4928 if self.config.llm.enrich_materials {
4929 let material_enricher =
4930 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
4931 &provider,
4932 ));
4933 let max_materials = self
4934 .config
4935 .llm
4936 .max_material_enrichments
4937 .min(self.master_data.materials.len());
4938 for material in self.master_data.materials.iter_mut().take(max_materials) {
4939 let material_type = format!("{:?}", material.material_type);
4940 match material_enricher.enrich_material_description(&material_type, &industry) {
4941 Ok(desc) => {
4942 material.description = desc;
4943 materials_enriched += 1;
4944 }
4945 Err(e) => warn!(
4946 "LLM material enrichment failed for {}: {}",
4947 material.material_id, e
4948 ),
4949 }
4950 }
4951 }
4952
4953 (vendors_enriched, customers_enriched, materials_enriched)
4954 }));
4955
4956 match result {
4957 Ok((v, c, m)) => {
4958 stats.llm_vendors_enriched = v;
4959 stats.llm_customers_enriched = c;
4960 stats.llm_materials_enriched = m;
4961 let elapsed = start.elapsed();
4962 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4963 info!(
4964 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
4965 v, c, m, stats.llm_enrichment_ms
4966 );
4967 }
4968 Err(_) => {
4969 let elapsed = start.elapsed();
4970 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4971 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4972 }
4973 }
4974 }
4975
4976 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4982 if !self.config.diffusion.enabled {
4983 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4984 return;
4985 }
4986
4987 info!("Phase 12: Starting Diffusion Enhancement");
4988 let start = std::time::Instant::now();
4989
4990 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4991 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4994
4995 let diffusion_config = DiffusionConfig {
4996 n_steps: self.config.diffusion.n_steps,
4997 seed: self.seed,
4998 ..Default::default()
4999 };
5000
5001 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5002
5003 let n_samples = self.config.diffusion.sample_size;
5004 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
5006
5007 samples.len()
5008 }));
5009
5010 match result {
5011 Ok(sample_count) => {
5012 stats.diffusion_samples_generated = sample_count;
5013 let elapsed = start.elapsed();
5014 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5015 info!(
5016 "Phase 12 complete: {} diffusion samples generated in {}ms",
5017 sample_count, stats.diffusion_enhancement_ms
5018 );
5019 }
5020 Err(_) => {
5021 let elapsed = start.elapsed();
5022 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5023 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5024 }
5025 }
5026 }
5027
5028 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5035 if !self.config.causal.enabled {
5036 debug!("Phase 13: Skipped (causal generation disabled)");
5037 return;
5038 }
5039
5040 info!("Phase 13: Starting Causal Overlay");
5041 let start = std::time::Instant::now();
5042
5043 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5044 let graph = match self.config.causal.template.as_str() {
5046 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5047 _ => CausalGraph::fraud_detection_template(),
5048 };
5049
5050 let scm = StructuralCausalModel::new(graph.clone())
5051 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5052
5053 let n_samples = self.config.causal.sample_size;
5054 let samples = scm
5055 .generate(n_samples, self.seed)
5056 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5057
5058 let validation_passed = if self.config.causal.validate {
5060 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5061 if report.valid {
5062 info!(
5063 "Causal validation passed: all {} checks OK",
5064 report.checks.len()
5065 );
5066 } else {
5067 warn!(
5068 "Causal validation: {} violations detected: {:?}",
5069 report.violations.len(),
5070 report.violations
5071 );
5072 }
5073 Some(report.valid)
5074 } else {
5075 None
5076 };
5077
5078 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5079 }));
5080
5081 match result {
5082 Ok(Ok((sample_count, validation_passed))) => {
5083 stats.causal_samples_generated = sample_count;
5084 stats.causal_validation_passed = validation_passed;
5085 let elapsed = start.elapsed();
5086 stats.causal_generation_ms = elapsed.as_millis() as u64;
5087 info!(
5088 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5089 sample_count, stats.causal_generation_ms, validation_passed,
5090 );
5091 }
5092 Ok(Err(e)) => {
5093 let elapsed = start.elapsed();
5094 stats.causal_generation_ms = elapsed.as_millis() as u64;
5095 warn!("Phase 13: Causal generation failed: {}", e);
5096 }
5097 Err(_) => {
5098 let elapsed = start.elapsed();
5099 stats.causal_generation_ms = elapsed.as_millis() as u64;
5100 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5101 }
5102 }
5103 }
5104
5105 fn phase_sourcing_data(
5107 &mut self,
5108 stats: &mut EnhancedGenerationStatistics,
5109 ) -> SynthResult<SourcingSnapshot> {
5110 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5111 debug!("Phase 14: Skipped (sourcing generation disabled)");
5112 return Ok(SourcingSnapshot::default());
5113 }
5114 let degradation = self.check_resources()?;
5115 if degradation >= DegradationLevel::Reduced {
5116 debug!(
5117 "Phase skipped due to resource pressure (degradation: {:?})",
5118 degradation
5119 );
5120 return Ok(SourcingSnapshot::default());
5121 }
5122
5123 info!("Phase 14: Generating S2C Sourcing Data");
5124 let seed = self.seed;
5125
5126 let vendor_ids: Vec<String> = self
5128 .master_data
5129 .vendors
5130 .iter()
5131 .map(|v| v.vendor_id.clone())
5132 .collect();
5133 if vendor_ids.is_empty() {
5134 debug!("Phase 14: Skipped (no vendors available)");
5135 return Ok(SourcingSnapshot::default());
5136 }
5137
5138 let categories: Vec<(String, String)> = vec![
5139 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5140 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5141 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5142 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5143 ("CAT-LOG".to_string(), "Logistics".to_string()),
5144 ];
5145 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5146 .iter()
5147 .map(|(id, name)| {
5148 (
5149 id.clone(),
5150 name.clone(),
5151 rust_decimal::Decimal::from(100_000),
5152 )
5153 })
5154 .collect();
5155
5156 let company_code = self
5157 .config
5158 .companies
5159 .first()
5160 .map(|c| c.code.as_str())
5161 .unwrap_or("1000");
5162 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5163 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5164 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5165 let fiscal_year = start_date.year() as u16;
5166 let owner_ids: Vec<String> = self
5167 .master_data
5168 .employees
5169 .iter()
5170 .take(5)
5171 .map(|e| e.employee_id.clone())
5172 .collect();
5173 let owner_id = owner_ids
5174 .first()
5175 .map(std::string::String::as_str)
5176 .unwrap_or("BUYER-001");
5177
5178 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5180 let spend_analyses =
5181 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5182
5183 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5185 let sourcing_projects = if owner_ids.is_empty() {
5186 Vec::new()
5187 } else {
5188 project_gen.generate(
5189 company_code,
5190 &categories_with_spend,
5191 &owner_ids,
5192 start_date,
5193 self.config.global.period_months,
5194 )
5195 };
5196 stats.sourcing_project_count = sourcing_projects.len();
5197
5198 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5200 let mut qual_gen = QualificationGenerator::new(seed + 2);
5201 let qualifications = qual_gen.generate(
5202 company_code,
5203 &qual_vendor_ids,
5204 sourcing_projects.first().map(|p| p.project_id.as_str()),
5205 owner_id,
5206 start_date,
5207 );
5208
5209 let mut rfx_gen = RfxGenerator::new(seed + 3);
5211 let rfx_events: Vec<RfxEvent> = sourcing_projects
5212 .iter()
5213 .map(|proj| {
5214 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5215 rfx_gen.generate(
5216 company_code,
5217 &proj.project_id,
5218 &proj.category_id,
5219 &qualified_vids,
5220 owner_id,
5221 start_date,
5222 50000.0,
5223 )
5224 })
5225 .collect();
5226 stats.rfx_event_count = rfx_events.len();
5227
5228 let mut bid_gen = BidGenerator::new(seed + 4);
5230 let mut all_bids = Vec::new();
5231 for rfx in &rfx_events {
5232 let bidder_count = vendor_ids.len().clamp(2, 5);
5233 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5234 let bids = bid_gen.generate(rfx, &responding, start_date);
5235 all_bids.extend(bids);
5236 }
5237 stats.bid_count = all_bids.len();
5238
5239 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5241 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5242 .iter()
5243 .map(|rfx| {
5244 let rfx_bids: Vec<SupplierBid> = all_bids
5245 .iter()
5246 .filter(|b| b.rfx_id == rfx.rfx_id)
5247 .cloned()
5248 .collect();
5249 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5250 })
5251 .collect();
5252
5253 let mut contract_gen = ContractGenerator::new(seed + 6);
5255 let contracts: Vec<ProcurementContract> = bid_evaluations
5256 .iter()
5257 .zip(rfx_events.iter())
5258 .filter_map(|(eval, rfx)| {
5259 eval.ranked_bids.first().and_then(|winner| {
5260 all_bids
5261 .iter()
5262 .find(|b| b.bid_id == winner.bid_id)
5263 .map(|winning_bid| {
5264 contract_gen.generate_from_bid(
5265 winning_bid,
5266 Some(&rfx.sourcing_project_id),
5267 &rfx.category_id,
5268 owner_id,
5269 start_date,
5270 )
5271 })
5272 })
5273 })
5274 .collect();
5275 stats.contract_count = contracts.len();
5276
5277 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5279 let catalog_items = catalog_gen.generate(&contracts);
5280 stats.catalog_item_count = catalog_items.len();
5281
5282 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5284 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5285 .iter()
5286 .fold(
5287 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5288 |mut acc, c| {
5289 acc.entry(c.vendor_id.clone()).or_default().push(c);
5290 acc
5291 },
5292 )
5293 .into_iter()
5294 .collect();
5295 let scorecards = scorecard_gen.generate(
5296 company_code,
5297 &vendor_contracts,
5298 start_date,
5299 end_date,
5300 owner_id,
5301 );
5302 stats.scorecard_count = scorecards.len();
5303
5304 let mut sourcing_projects = sourcing_projects;
5307 for project in &mut sourcing_projects {
5308 project.rfx_ids = rfx_events
5310 .iter()
5311 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5312 .map(|rfx| rfx.rfx_id.clone())
5313 .collect();
5314
5315 project.contract_id = contracts
5317 .iter()
5318 .find(|c| {
5319 c.sourcing_project_id
5320 .as_deref()
5321 .is_some_and(|sp| sp == project.project_id)
5322 })
5323 .map(|c| c.contract_id.clone());
5324
5325 project.spend_analysis_id = spend_analyses
5327 .iter()
5328 .find(|sa| sa.category_id == project.category_id)
5329 .map(|sa| sa.category_id.clone());
5330 }
5331
5332 info!(
5333 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5334 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5335 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5336 );
5337 self.check_resources_with_log("post-sourcing")?;
5338
5339 Ok(SourcingSnapshot {
5340 spend_analyses,
5341 sourcing_projects,
5342 qualifications,
5343 rfx_events,
5344 bids: all_bids,
5345 bid_evaluations,
5346 contracts,
5347 catalog_items,
5348 scorecards,
5349 })
5350 }
5351
5352 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5358 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5359
5360 let parent_code = self
5361 .config
5362 .companies
5363 .first()
5364 .map(|c| c.code.clone())
5365 .unwrap_or_else(|| "PARENT".to_string());
5366
5367 let mut group = GroupStructure::new(parent_code);
5368
5369 for company in self.config.companies.iter().skip(1) {
5370 let sub =
5371 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5372 group.add_subsidiary(sub);
5373 }
5374
5375 group
5376 }
5377
5378 fn phase_intercompany(
5380 &mut self,
5381 journal_entries: &[JournalEntry],
5382 stats: &mut EnhancedGenerationStatistics,
5383 ) -> SynthResult<IntercompanySnapshot> {
5384 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5386 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5387 return Ok(IntercompanySnapshot::default());
5388 }
5389
5390 if self.config.companies.len() < 2 {
5392 debug!(
5393 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5394 self.config.companies.len()
5395 );
5396 return Ok(IntercompanySnapshot::default());
5397 }
5398
5399 info!("Phase 14b: Generating Intercompany Transactions");
5400
5401 let group_structure = self.build_group_structure();
5404 debug!(
5405 "Group structure built: parent={}, subsidiaries={}",
5406 group_structure.parent_entity,
5407 group_structure.subsidiaries.len()
5408 );
5409
5410 let seed = self.seed;
5411 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5412 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5413 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5414
5415 let parent_code = self.config.companies[0].code.clone();
5418 let mut ownership_structure =
5419 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5420
5421 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5422 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5423 format!("REL{:03}", i + 1),
5424 parent_code.clone(),
5425 company.code.clone(),
5426 rust_decimal::Decimal::from(100), start_date,
5428 );
5429 ownership_structure.add_relationship(relationship);
5430 }
5431
5432 let tp_method = match self.config.intercompany.transfer_pricing_method {
5434 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5435 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5436 }
5437 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5438 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5439 }
5440 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5441 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5442 }
5443 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5444 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5445 }
5446 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5447 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5448 }
5449 };
5450
5451 let ic_currency = self
5453 .config
5454 .companies
5455 .first()
5456 .map(|c| c.currency.clone())
5457 .unwrap_or_else(|| "USD".to_string());
5458 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5459 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5460 transfer_pricing_method: tp_method,
5461 markup_percent: rust_decimal::Decimal::from_f64_retain(
5462 self.config.intercompany.markup_percent,
5463 )
5464 .unwrap_or(rust_decimal::Decimal::from(5)),
5465 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5466 default_currency: ic_currency,
5467 ..Default::default()
5468 };
5469
5470 let mut ic_generator = datasynth_generators::ICGenerator::new(
5472 ic_gen_config,
5473 ownership_structure.clone(),
5474 seed + 50,
5475 );
5476
5477 let transactions_per_day = 3;
5480 let matched_pairs = ic_generator.generate_transactions_for_period(
5481 start_date,
5482 end_date,
5483 transactions_per_day,
5484 );
5485
5486 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5488 debug!(
5489 "Generated {} IC seller invoices, {} IC buyer POs",
5490 ic_doc_chains.seller_invoices.len(),
5491 ic_doc_chains.buyer_orders.len()
5492 );
5493
5494 let mut seller_entries = Vec::new();
5496 let mut buyer_entries = Vec::new();
5497 let fiscal_year = start_date.year();
5498
5499 for pair in &matched_pairs {
5500 let fiscal_period = pair.posting_date.month();
5501 let (seller_je, buyer_je) =
5502 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5503 seller_entries.push(seller_je);
5504 buyer_entries.push(buyer_je);
5505 }
5506
5507 let matching_config = datasynth_generators::ICMatchingConfig {
5509 base_currency: self
5510 .config
5511 .companies
5512 .first()
5513 .map(|c| c.currency.clone())
5514 .unwrap_or_else(|| "USD".to_string()),
5515 ..Default::default()
5516 };
5517 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5518 matching_engine.load_matched_pairs(&matched_pairs);
5519 let matching_result = matching_engine.run_matching(end_date);
5520
5521 let mut elimination_entries = Vec::new();
5523 if self.config.intercompany.generate_eliminations {
5524 let elim_config = datasynth_generators::EliminationConfig {
5525 consolidation_entity: "GROUP".to_string(),
5526 base_currency: self
5527 .config
5528 .companies
5529 .first()
5530 .map(|c| c.currency.clone())
5531 .unwrap_or_else(|| "USD".to_string()),
5532 ..Default::default()
5533 };
5534
5535 let mut elim_generator =
5536 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5537
5538 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5539 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5540 matching_result
5541 .matched_balances
5542 .iter()
5543 .chain(matching_result.unmatched_balances.iter())
5544 .cloned()
5545 .collect();
5546
5547 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5559 std::collections::HashMap::new();
5560 let mut equity_amounts: std::collections::HashMap<
5561 String,
5562 std::collections::HashMap<String, rust_decimal::Decimal>,
5563 > = std::collections::HashMap::new();
5564 {
5565 use rust_decimal::Decimal;
5566 let hundred = Decimal::from(100u32);
5567 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5571 for sub in &group_structure.subsidiaries {
5572 let net_assets = {
5573 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5574 if na > Decimal::ZERO {
5575 na
5576 } else {
5577 Decimal::from(1_000_000u64)
5578 }
5579 };
5580 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5582 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5583
5584 let mut eq_map = std::collections::HashMap::new();
5587 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5588 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5589 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5590 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5591 }
5592 }
5593
5594 let journal = elim_generator.generate_eliminations(
5595 &fiscal_period,
5596 end_date,
5597 &all_balances,
5598 &matched_pairs,
5599 &investment_amounts,
5600 &equity_amounts,
5601 );
5602
5603 elimination_entries = journal.entries.clone();
5604 }
5605
5606 let matched_pair_count = matched_pairs.len();
5607 let elimination_entry_count = elimination_entries.len();
5608 let match_rate = matching_result.match_rate;
5609
5610 stats.ic_matched_pair_count = matched_pair_count;
5611 stats.ic_elimination_count = elimination_entry_count;
5612 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5613
5614 info!(
5615 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5616 matched_pair_count,
5617 stats.ic_transaction_count,
5618 seller_entries.len(),
5619 buyer_entries.len(),
5620 elimination_entry_count,
5621 match_rate * 100.0
5622 );
5623 self.check_resources_with_log("post-intercompany")?;
5624
5625 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5629 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5630 use rust_decimal::Decimal;
5631
5632 let eight_pct = Decimal::new(8, 2); group_structure
5635 .subsidiaries
5636 .iter()
5637 .filter(|sub| {
5638 sub.nci_percentage > Decimal::ZERO
5639 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5640 })
5641 .map(|sub| {
5642 let net_assets_from_jes =
5646 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5647
5648 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5649 net_assets_from_jes.round_dp(2)
5650 } else {
5651 Decimal::from(1_000_000u64)
5653 };
5654
5655 let net_income = (net_assets * eight_pct).round_dp(2);
5657
5658 NciMeasurement::compute(
5659 sub.entity_code.clone(),
5660 sub.nci_percentage,
5661 net_assets,
5662 net_income,
5663 )
5664 })
5665 .collect()
5666 };
5667
5668 if !nci_measurements.is_empty() {
5669 info!(
5670 "NCI measurements: {} subsidiaries with non-controlling interests",
5671 nci_measurements.len()
5672 );
5673 }
5674
5675 Ok(IntercompanySnapshot {
5676 group_structure: Some(group_structure),
5677 matched_pairs,
5678 seller_journal_entries: seller_entries,
5679 buyer_journal_entries: buyer_entries,
5680 elimination_entries,
5681 nci_measurements,
5682 ic_document_chains: Some(ic_doc_chains),
5683 matched_pair_count,
5684 elimination_entry_count,
5685 match_rate,
5686 })
5687 }
5688
5689 fn phase_financial_reporting(
5691 &mut self,
5692 document_flows: &DocumentFlowSnapshot,
5693 journal_entries: &[JournalEntry],
5694 coa: &Arc<ChartOfAccounts>,
5695 _hr: &HrSnapshot,
5696 _audit: &AuditSnapshot,
5697 stats: &mut EnhancedGenerationStatistics,
5698 ) -> SynthResult<FinancialReportingSnapshot> {
5699 let fs_enabled = self.phase_config.generate_financial_statements
5700 || self.config.financial_reporting.enabled;
5701 let br_enabled = self.phase_config.generate_bank_reconciliation;
5702
5703 if !fs_enabled && !br_enabled {
5704 debug!("Phase 15: Skipped (financial reporting disabled)");
5705 return Ok(FinancialReportingSnapshot::default());
5706 }
5707
5708 info!("Phase 15: Generating Financial Reporting Data");
5709
5710 let seed = self.seed;
5711 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5712 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5713
5714 let mut financial_statements = Vec::new();
5715 let mut bank_reconciliations = Vec::new();
5716 let mut trial_balances = Vec::new();
5717 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5718 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5719 Vec::new();
5720 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5722 std::collections::HashMap::new();
5723 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5725 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5727
5728 if fs_enabled {
5736 let has_journal_entries = !journal_entries.is_empty();
5737
5738 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5741 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5743
5744 let elimination_entries: Vec<&JournalEntry> = journal_entries
5746 .iter()
5747 .filter(|je| je.header.is_elimination)
5748 .collect();
5749
5750 for period in 0..self.config.global.period_months {
5752 let period_start = start_date + chrono::Months::new(period);
5753 let period_end =
5754 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5755 let fiscal_year = period_end.year() as u16;
5756 let fiscal_period = period_end.month() as u8;
5757 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5758
5759 let mut entity_tb_map: std::collections::HashMap<
5762 String,
5763 std::collections::HashMap<String, rust_decimal::Decimal>,
5764 > = std::collections::HashMap::new();
5765
5766 for (company_idx, company) in self.config.companies.iter().enumerate() {
5768 let company_code = company.code.as_str();
5769 let currency = company.currency.as_str();
5770 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5773 let mut company_fs_gen =
5774 FinancialStatementGenerator::new(seed + company_seed_offset);
5775
5776 if has_journal_entries {
5777 let tb_entries = Self::build_cumulative_trial_balance(
5778 journal_entries,
5779 coa,
5780 company_code,
5781 start_date,
5782 period_end,
5783 fiscal_year,
5784 fiscal_period,
5785 );
5786
5787 let entity_cat_map =
5789 entity_tb_map.entry(company_code.to_string()).or_default();
5790 for tb_entry in &tb_entries {
5791 let net = tb_entry.debit_balance - tb_entry.credit_balance;
5792 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5793 }
5794
5795 let stmts = company_fs_gen.generate(
5796 company_code,
5797 currency,
5798 &tb_entries,
5799 period_start,
5800 period_end,
5801 fiscal_year,
5802 fiscal_period,
5803 None,
5804 "SYS-AUTOCLOSE",
5805 );
5806
5807 let mut entity_stmts = Vec::new();
5808 for stmt in stmts {
5809 if stmt.statement_type == StatementType::CashFlowStatement {
5810 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5811 let cf_items = Self::build_cash_flow_from_trial_balances(
5812 &tb_entries,
5813 None,
5814 net_income,
5815 );
5816 entity_stmts.push(FinancialStatement {
5817 cash_flow_items: cf_items,
5818 ..stmt
5819 });
5820 } else {
5821 entity_stmts.push(stmt);
5822 }
5823 }
5824
5825 financial_statements.extend(entity_stmts.clone());
5827
5828 standalone_statements
5830 .entry(company_code.to_string())
5831 .or_default()
5832 .extend(entity_stmts);
5833
5834 if company_idx == 0 {
5837 trial_balances.push(PeriodTrialBalance {
5838 fiscal_year,
5839 fiscal_period,
5840 period_start,
5841 period_end,
5842 entries: tb_entries,
5843 });
5844 }
5845 } else {
5846 let tb_entries = Self::build_trial_balance_from_entries(
5848 journal_entries,
5849 coa,
5850 company_code,
5851 fiscal_year,
5852 fiscal_period,
5853 );
5854
5855 let stmts = company_fs_gen.generate(
5856 company_code,
5857 currency,
5858 &tb_entries,
5859 period_start,
5860 period_end,
5861 fiscal_year,
5862 fiscal_period,
5863 None,
5864 "SYS-AUTOCLOSE",
5865 );
5866 financial_statements.extend(stmts.clone());
5867 standalone_statements
5868 .entry(company_code.to_string())
5869 .or_default()
5870 .extend(stmts);
5871
5872 if company_idx == 0 && !tb_entries.is_empty() {
5873 trial_balances.push(PeriodTrialBalance {
5874 fiscal_year,
5875 fiscal_period,
5876 period_start,
5877 period_end,
5878 entries: tb_entries,
5879 });
5880 }
5881 }
5882 }
5883
5884 let group_currency = self
5887 .config
5888 .companies
5889 .first()
5890 .map(|c| c.currency.as_str())
5891 .unwrap_or("USD");
5892
5893 let period_eliminations: Vec<JournalEntry> = elimination_entries
5895 .iter()
5896 .filter(|je| {
5897 je.header.fiscal_year == fiscal_year
5898 && je.header.fiscal_period == fiscal_period
5899 })
5900 .map(|je| (*je).clone())
5901 .collect();
5902
5903 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5904 &entity_tb_map,
5905 &period_eliminations,
5906 &period_label,
5907 );
5908
5909 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5912 .line_items
5913 .iter()
5914 .map(|li| {
5915 let net = li.post_elimination_total;
5916 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5917 (net, rust_decimal::Decimal::ZERO)
5918 } else {
5919 (rust_decimal::Decimal::ZERO, -net)
5920 };
5921 datasynth_generators::TrialBalanceEntry {
5922 account_code: li.account_category.clone(),
5923 account_name: li.account_category.clone(),
5924 category: li.account_category.clone(),
5925 debit_balance: debit,
5926 credit_balance: credit,
5927 }
5928 })
5929 .collect();
5930
5931 let mut cons_stmts = cons_gen.generate(
5932 "GROUP",
5933 group_currency,
5934 &cons_tb,
5935 period_start,
5936 period_end,
5937 fiscal_year,
5938 fiscal_period,
5939 None,
5940 "SYS-AUTOCLOSE",
5941 );
5942
5943 let bs_categories: &[&str] = &[
5947 "CASH",
5948 "RECEIVABLES",
5949 "INVENTORY",
5950 "FIXEDASSETS",
5951 "PAYABLES",
5952 "ACCRUEDLIABILITIES",
5953 "LONGTERMDEBT",
5954 "EQUITY",
5955 ];
5956 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5957 cons_line_items.into_iter().partition(|li| {
5958 let upper = li.label.to_uppercase();
5959 bs_categories.iter().any(|c| upper == *c)
5960 });
5961
5962 for stmt in &mut cons_stmts {
5963 stmt.is_consolidated = true;
5964 match stmt.statement_type {
5965 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5966 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5967 _ => {} }
5969 }
5970
5971 consolidated_statements.extend(cons_stmts);
5972 consolidation_schedules.push(schedule);
5973 }
5974
5975 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5981 info!(
5982 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5983 stats.financial_statement_count,
5984 consolidated_statements.len(),
5985 has_journal_entries
5986 );
5987
5988 let entity_seeds: Vec<SegmentSeed> = self
5993 .config
5994 .companies
5995 .iter()
5996 .map(|c| SegmentSeed {
5997 code: c.code.clone(),
5998 name: c.name.clone(),
5999 currency: c.currency.clone(),
6000 })
6001 .collect();
6002
6003 let mut seg_gen = SegmentGenerator::new(seed + 30);
6004
6005 for period in 0..self.config.global.period_months {
6010 let period_end =
6011 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6012 let fiscal_year = period_end.year() as u16;
6013 let fiscal_period = period_end.month() as u8;
6014 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6015
6016 use datasynth_core::models::StatementType;
6017
6018 let cons_is = consolidated_statements.iter().find(|s| {
6020 s.fiscal_year == fiscal_year
6021 && s.fiscal_period == fiscal_period
6022 && s.statement_type == StatementType::IncomeStatement
6023 });
6024 let cons_bs = consolidated_statements.iter().find(|s| {
6025 s.fiscal_year == fiscal_year
6026 && s.fiscal_period == fiscal_period
6027 && s.statement_type == StatementType::BalanceSheet
6028 });
6029
6030 let is_stmt = cons_is.or_else(|| {
6032 financial_statements.iter().find(|s| {
6033 s.fiscal_year == fiscal_year
6034 && s.fiscal_period == fiscal_period
6035 && s.statement_type == StatementType::IncomeStatement
6036 })
6037 });
6038 let bs_stmt = cons_bs.or_else(|| {
6039 financial_statements.iter().find(|s| {
6040 s.fiscal_year == fiscal_year
6041 && s.fiscal_period == fiscal_period
6042 && s.statement_type == StatementType::BalanceSheet
6043 })
6044 });
6045
6046 let consolidated_revenue = is_stmt
6047 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6048 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6050
6051 let consolidated_profit = is_stmt
6052 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6053 .map(|li| li.amount)
6054 .unwrap_or(rust_decimal::Decimal::ZERO);
6055
6056 let consolidated_assets = bs_stmt
6057 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6058 .map(|li| li.amount)
6059 .unwrap_or(rust_decimal::Decimal::ZERO);
6060
6061 if consolidated_revenue == rust_decimal::Decimal::ZERO
6063 && consolidated_assets == rust_decimal::Decimal::ZERO
6064 {
6065 continue;
6066 }
6067
6068 let group_code = self
6069 .config
6070 .companies
6071 .first()
6072 .map(|c| c.code.as_str())
6073 .unwrap_or("GROUP");
6074
6075 let total_depr: rust_decimal::Decimal = journal_entries
6078 .iter()
6079 .filter(|je| je.header.document_type == "CL")
6080 .flat_map(|je| je.lines.iter())
6081 .filter(|l| l.gl_account.starts_with("6000"))
6082 .map(|l| l.debit_amount)
6083 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6084 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6085 Some(total_depr)
6086 } else {
6087 None
6088 };
6089
6090 let (segs, recon) = seg_gen.generate(
6091 group_code,
6092 &period_label,
6093 consolidated_revenue,
6094 consolidated_profit,
6095 consolidated_assets,
6096 &entity_seeds,
6097 depr_param,
6098 );
6099 segment_reports.extend(segs);
6100 segment_reconciliations.push(recon);
6101 }
6102
6103 info!(
6104 "Segment reports generated: {} segments, {} reconciliations",
6105 segment_reports.len(),
6106 segment_reconciliations.len()
6107 );
6108 }
6109
6110 if br_enabled && !document_flows.payments.is_empty() {
6112 let employee_ids: Vec<String> = self
6113 .master_data
6114 .employees
6115 .iter()
6116 .map(|e| e.employee_id.clone())
6117 .collect();
6118 let mut br_gen =
6119 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6120
6121 for company in &self.config.companies {
6123 let company_payments: Vec<PaymentReference> = document_flows
6124 .payments
6125 .iter()
6126 .filter(|p| p.header.company_code == company.code)
6127 .map(|p| PaymentReference {
6128 id: p.header.document_id.clone(),
6129 amount: if p.is_vendor { p.amount } else { -p.amount },
6130 date: p.header.document_date,
6131 reference: p
6132 .check_number
6133 .clone()
6134 .or_else(|| p.wire_reference.clone())
6135 .unwrap_or_else(|| p.header.document_id.clone()),
6136 })
6137 .collect();
6138
6139 if company_payments.is_empty() {
6140 continue;
6141 }
6142
6143 let bank_account_id = format!("{}-MAIN", company.code);
6144
6145 for period in 0..self.config.global.period_months {
6147 let period_start = start_date + chrono::Months::new(period);
6148 let period_end =
6149 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6150
6151 let period_payments: Vec<PaymentReference> = company_payments
6152 .iter()
6153 .filter(|p| p.date >= period_start && p.date <= period_end)
6154 .cloned()
6155 .collect();
6156
6157 let recon = br_gen.generate(
6158 &company.code,
6159 &bank_account_id,
6160 period_start,
6161 period_end,
6162 &company.currency,
6163 &period_payments,
6164 );
6165 bank_reconciliations.push(recon);
6166 }
6167 }
6168 info!(
6169 "Bank reconciliations generated: {} reconciliations",
6170 bank_reconciliations.len()
6171 );
6172 }
6173
6174 stats.bank_reconciliation_count = bank_reconciliations.len();
6175 self.check_resources_with_log("post-financial-reporting")?;
6176
6177 if !trial_balances.is_empty() {
6178 info!(
6179 "Period-close trial balances captured: {} periods",
6180 trial_balances.len()
6181 );
6182 }
6183
6184 let notes_to_financial_statements = Vec::new();
6188
6189 Ok(FinancialReportingSnapshot {
6190 financial_statements,
6191 standalone_statements,
6192 consolidated_statements,
6193 consolidation_schedules,
6194 bank_reconciliations,
6195 trial_balances,
6196 segment_reports,
6197 segment_reconciliations,
6198 notes_to_financial_statements,
6199 })
6200 }
6201
6202 fn generate_notes_to_financial_statements(
6209 &self,
6210 financial_reporting: &mut FinancialReportingSnapshot,
6211 accounting_standards: &AccountingStandardsSnapshot,
6212 tax: &TaxSnapshot,
6213 hr: &HrSnapshot,
6214 audit: &AuditSnapshot,
6215 treasury: &TreasurySnapshot,
6216 ) {
6217 use datasynth_config::schema::AccountingFrameworkConfig;
6218 use datasynth_core::models::StatementType;
6219 use datasynth_generators::period_close::notes_generator::{
6220 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6221 };
6222
6223 let seed = self.seed;
6224 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6225 {
6226 Ok(d) => d,
6227 Err(_) => return,
6228 };
6229
6230 let mut notes_gen = NotesGenerator::new(seed + 4235);
6231
6232 for company in &self.config.companies {
6233 let last_period_end = start_date
6234 + chrono::Months::new(self.config.global.period_months)
6235 - chrono::Days::new(1);
6236 let fiscal_year = last_period_end.year() as u16;
6237
6238 let entity_is = financial_reporting
6240 .standalone_statements
6241 .get(&company.code)
6242 .and_then(|stmts| {
6243 stmts.iter().find(|s| {
6244 s.fiscal_year == fiscal_year
6245 && s.statement_type == StatementType::IncomeStatement
6246 })
6247 });
6248 let entity_bs = financial_reporting
6249 .standalone_statements
6250 .get(&company.code)
6251 .and_then(|stmts| {
6252 stmts.iter().find(|s| {
6253 s.fiscal_year == fiscal_year
6254 && s.statement_type == StatementType::BalanceSheet
6255 })
6256 });
6257
6258 let revenue_amount = entity_is
6260 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6261 .map(|li| li.amount);
6262 let ppe_gross = entity_bs
6263 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6264 .map(|li| li.amount);
6265
6266 let framework = match self
6267 .config
6268 .accounting_standards
6269 .framework
6270 .unwrap_or_default()
6271 {
6272 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6273 "IFRS".to_string()
6274 }
6275 _ => "US GAAP".to_string(),
6276 };
6277
6278 let (entity_dta, entity_dtl) = {
6281 let mut dta = rust_decimal::Decimal::ZERO;
6282 let mut dtl = rust_decimal::Decimal::ZERO;
6283 for rf in &tax.deferred_tax.rollforwards {
6284 if rf.entity_code == company.code {
6285 dta += rf.closing_dta;
6286 dtl += rf.closing_dtl;
6287 }
6288 }
6289 (
6290 if dta > rust_decimal::Decimal::ZERO {
6291 Some(dta)
6292 } else {
6293 None
6294 },
6295 if dtl > rust_decimal::Decimal::ZERO {
6296 Some(dtl)
6297 } else {
6298 None
6299 },
6300 )
6301 };
6302
6303 let entity_provisions: Vec<_> = accounting_standards
6306 .provisions
6307 .iter()
6308 .filter(|p| p.entity_code == company.code)
6309 .collect();
6310 let provision_count = entity_provisions.len();
6311 let total_provisions = if provision_count > 0 {
6312 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6313 } else {
6314 None
6315 };
6316
6317 let entity_pension_plan_count = hr
6319 .pension_plans
6320 .iter()
6321 .filter(|p| p.entity_code == company.code)
6322 .count();
6323 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6324 let sum: rust_decimal::Decimal = hr
6325 .pension_disclosures
6326 .iter()
6327 .filter(|d| {
6328 hr.pension_plans
6329 .iter()
6330 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6331 })
6332 .map(|d| d.net_pension_liability)
6333 .sum();
6334 let plan_assets_sum: rust_decimal::Decimal = hr
6335 .pension_plan_assets
6336 .iter()
6337 .filter(|a| {
6338 hr.pension_plans
6339 .iter()
6340 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6341 })
6342 .map(|a| a.fair_value_closing)
6343 .sum();
6344 if entity_pension_plan_count > 0 {
6345 Some(sum + plan_assets_sum)
6346 } else {
6347 None
6348 }
6349 };
6350 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6351 let sum: rust_decimal::Decimal = hr
6352 .pension_plan_assets
6353 .iter()
6354 .filter(|a| {
6355 hr.pension_plans
6356 .iter()
6357 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6358 })
6359 .map(|a| a.fair_value_closing)
6360 .sum();
6361 if entity_pension_plan_count > 0 {
6362 Some(sum)
6363 } else {
6364 None
6365 }
6366 };
6367
6368 let rp_count = audit.related_party_transactions.len();
6371 let se_count = audit.subsequent_events.len();
6372 let adjusting_count = audit
6373 .subsequent_events
6374 .iter()
6375 .filter(|e| {
6376 matches!(
6377 e.classification,
6378 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6379 )
6380 })
6381 .count();
6382
6383 let ctx = NotesGeneratorContext {
6384 entity_code: company.code.clone(),
6385 framework,
6386 period: format!("FY{}", fiscal_year),
6387 period_end: last_period_end,
6388 currency: company.currency.clone(),
6389 revenue_amount,
6390 total_ppe_gross: ppe_gross,
6391 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6392 deferred_tax_asset: entity_dta,
6394 deferred_tax_liability: entity_dtl,
6395 provision_count,
6397 total_provisions,
6398 pension_plan_count: entity_pension_plan_count,
6400 total_dbo: entity_total_dbo,
6401 total_plan_assets: entity_total_plan_assets,
6402 related_party_transaction_count: rp_count,
6404 subsequent_event_count: se_count,
6405 adjusting_event_count: adjusting_count,
6406 ..NotesGeneratorContext::default()
6407 };
6408
6409 let entity_notes = notes_gen.generate(&ctx);
6410 let standard_note_count = entity_notes.len() as u32;
6411 info!(
6412 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6413 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6414 );
6415 financial_reporting
6416 .notes_to_financial_statements
6417 .extend(entity_notes);
6418
6419 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6421 .debt_instruments
6422 .iter()
6423 .filter(|d| d.entity_id == company.code)
6424 .map(|d| {
6425 (
6426 format!("{:?}", d.instrument_type),
6427 d.principal,
6428 d.maturity_date.to_string(),
6429 )
6430 })
6431 .collect();
6432
6433 let hedge_count = treasury.hedge_relationships.len();
6434 let effective_hedges = treasury
6435 .hedge_relationships
6436 .iter()
6437 .filter(|h| h.is_effective)
6438 .count();
6439 let total_notional: rust_decimal::Decimal = treasury
6440 .hedging_instruments
6441 .iter()
6442 .map(|h| h.notional_amount)
6443 .sum();
6444 let total_fair_value: rust_decimal::Decimal = treasury
6445 .hedging_instruments
6446 .iter()
6447 .map(|h| h.fair_value)
6448 .sum();
6449
6450 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6452 .provisions
6453 .iter()
6454 .filter(|p| p.entity_code == company.code)
6455 .map(|p| p.id.as_str())
6456 .collect();
6457 let provision_movements: Vec<(
6458 String,
6459 rust_decimal::Decimal,
6460 rust_decimal::Decimal,
6461 rust_decimal::Decimal,
6462 )> = accounting_standards
6463 .provision_movements
6464 .iter()
6465 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6466 .map(|m| {
6467 let prov_type = accounting_standards
6468 .provisions
6469 .iter()
6470 .find(|p| p.id == m.provision_id)
6471 .map(|p| format!("{:?}", p.provision_type))
6472 .unwrap_or_else(|| "Unknown".to_string());
6473 (prov_type, m.opening, m.additions, m.closing)
6474 })
6475 .collect();
6476
6477 let enhanced_ctx = EnhancedNotesContext {
6478 entity_code: company.code.clone(),
6479 period: format!("FY{}", fiscal_year),
6480 currency: company.currency.clone(),
6481 finished_goods_value: rust_decimal::Decimal::ZERO,
6483 wip_value: rust_decimal::Decimal::ZERO,
6484 raw_materials_value: rust_decimal::Decimal::ZERO,
6485 debt_instruments,
6486 hedge_count,
6487 effective_hedges,
6488 total_notional,
6489 total_fair_value,
6490 provision_movements,
6491 };
6492
6493 let enhanced_notes =
6494 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6495 if !enhanced_notes.is_empty() {
6496 info!(
6497 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6498 company.code,
6499 enhanced_notes.len(),
6500 enhanced_ctx.debt_instruments.len(),
6501 hedge_count,
6502 enhanced_ctx.provision_movements.len(),
6503 );
6504 financial_reporting
6505 .notes_to_financial_statements
6506 .extend(enhanced_notes);
6507 }
6508 }
6509 }
6510
6511 fn build_trial_balance_from_entries(
6517 journal_entries: &[JournalEntry],
6518 coa: &ChartOfAccounts,
6519 company_code: &str,
6520 fiscal_year: u16,
6521 fiscal_period: u8,
6522 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6523 use rust_decimal::Decimal;
6524
6525 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6527 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6528
6529 for je in journal_entries {
6530 if je.header.company_code != company_code
6532 || je.header.fiscal_year != fiscal_year
6533 || je.header.fiscal_period != fiscal_period
6534 {
6535 continue;
6536 }
6537
6538 for line in &je.lines {
6539 let acct = &line.gl_account;
6540 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6541 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6542 }
6543 }
6544
6545 let mut all_accounts: Vec<&String> = account_debits
6547 .keys()
6548 .chain(account_credits.keys())
6549 .collect::<std::collections::HashSet<_>>()
6550 .into_iter()
6551 .collect();
6552 all_accounts.sort();
6553
6554 let mut entries = Vec::new();
6555
6556 for acct_number in all_accounts {
6557 let debit = account_debits
6558 .get(acct_number)
6559 .copied()
6560 .unwrap_or(Decimal::ZERO);
6561 let credit = account_credits
6562 .get(acct_number)
6563 .copied()
6564 .unwrap_or(Decimal::ZERO);
6565
6566 if debit.is_zero() && credit.is_zero() {
6567 continue;
6568 }
6569
6570 let account_name = coa
6572 .get_account(acct_number)
6573 .map(|gl| gl.short_description.clone())
6574 .unwrap_or_else(|| format!("Account {acct_number}"));
6575
6576 let category = Self::category_from_account_code(acct_number);
6581
6582 entries.push(datasynth_generators::TrialBalanceEntry {
6583 account_code: acct_number.clone(),
6584 account_name,
6585 category,
6586 debit_balance: debit,
6587 credit_balance: credit,
6588 });
6589 }
6590
6591 entries
6592 }
6593
6594 fn build_cumulative_trial_balance(
6601 journal_entries: &[JournalEntry],
6602 coa: &ChartOfAccounts,
6603 company_code: &str,
6604 start_date: NaiveDate,
6605 period_end: NaiveDate,
6606 fiscal_year: u16,
6607 fiscal_period: u8,
6608 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6609 use rust_decimal::Decimal;
6610
6611 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6613 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6614
6615 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6617 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6618
6619 for je in journal_entries {
6620 if je.header.company_code != company_code {
6621 continue;
6622 }
6623
6624 for line in &je.lines {
6625 let acct = &line.gl_account;
6626 let category = Self::category_from_account_code(acct);
6627 let is_bs_account = matches!(
6628 category.as_str(),
6629 "Cash"
6630 | "Receivables"
6631 | "Inventory"
6632 | "FixedAssets"
6633 | "Payables"
6634 | "AccruedLiabilities"
6635 | "LongTermDebt"
6636 | "Equity"
6637 );
6638
6639 if is_bs_account {
6640 if je.header.document_date <= period_end
6642 && je.header.document_date >= start_date
6643 {
6644 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6645 line.debit_amount;
6646 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6647 line.credit_amount;
6648 }
6649 } else {
6650 if je.header.fiscal_year == fiscal_year
6652 && je.header.fiscal_period == fiscal_period
6653 {
6654 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6655 line.debit_amount;
6656 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6657 line.credit_amount;
6658 }
6659 }
6660 }
6661 }
6662
6663 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6665 all_accounts.extend(bs_debits.keys().cloned());
6666 all_accounts.extend(bs_credits.keys().cloned());
6667 all_accounts.extend(is_debits.keys().cloned());
6668 all_accounts.extend(is_credits.keys().cloned());
6669
6670 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6671 sorted_accounts.sort();
6672
6673 let mut entries = Vec::new();
6674
6675 for acct_number in &sorted_accounts {
6676 let category = Self::category_from_account_code(acct_number);
6677 let is_bs_account = matches!(
6678 category.as_str(),
6679 "Cash"
6680 | "Receivables"
6681 | "Inventory"
6682 | "FixedAssets"
6683 | "Payables"
6684 | "AccruedLiabilities"
6685 | "LongTermDebt"
6686 | "Equity"
6687 );
6688
6689 let (debit, credit) = if is_bs_account {
6690 (
6691 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6692 bs_credits
6693 .get(acct_number)
6694 .copied()
6695 .unwrap_or(Decimal::ZERO),
6696 )
6697 } else {
6698 (
6699 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6700 is_credits
6701 .get(acct_number)
6702 .copied()
6703 .unwrap_or(Decimal::ZERO),
6704 )
6705 };
6706
6707 if debit.is_zero() && credit.is_zero() {
6708 continue;
6709 }
6710
6711 let account_name = coa
6712 .get_account(acct_number)
6713 .map(|gl| gl.short_description.clone())
6714 .unwrap_or_else(|| format!("Account {acct_number}"));
6715
6716 entries.push(datasynth_generators::TrialBalanceEntry {
6717 account_code: acct_number.clone(),
6718 account_name,
6719 category,
6720 debit_balance: debit,
6721 credit_balance: credit,
6722 });
6723 }
6724
6725 entries
6726 }
6727
6728 fn build_cash_flow_from_trial_balances(
6733 current_tb: &[datasynth_generators::TrialBalanceEntry],
6734 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6735 net_income: rust_decimal::Decimal,
6736 ) -> Vec<CashFlowItem> {
6737 use rust_decimal::Decimal;
6738
6739 let aggregate =
6741 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6742 let mut map: HashMap<String, Decimal> = HashMap::new();
6743 for entry in tb {
6744 let net = entry.debit_balance - entry.credit_balance;
6745 *map.entry(entry.category.clone()).or_default() += net;
6746 }
6747 map
6748 };
6749
6750 let current = aggregate(current_tb);
6751 let prior = prior_tb.map(aggregate);
6752
6753 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6755 *map.get(key).unwrap_or(&Decimal::ZERO)
6756 };
6757
6758 let change = |key: &str| -> Decimal {
6760 let curr = get(¤t, key);
6761 match &prior {
6762 Some(p) => curr - get(p, key),
6763 None => curr,
6764 }
6765 };
6766
6767 let fixed_asset_change = change("FixedAssets");
6770 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6771 -fixed_asset_change
6772 } else {
6773 Decimal::ZERO
6774 };
6775
6776 let ar_change = change("Receivables");
6778 let inventory_change = change("Inventory");
6779 let ap_change = change("Payables");
6781 let accrued_change = change("AccruedLiabilities");
6782
6783 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6784 + (-ap_change)
6785 + (-accrued_change);
6786
6787 let capex = if fixed_asset_change > Decimal::ZERO {
6789 -fixed_asset_change
6790 } else {
6791 Decimal::ZERO
6792 };
6793 let investing_cf = capex;
6794
6795 let debt_change = -change("LongTermDebt");
6797 let equity_change = -change("Equity");
6798 let financing_cf = debt_change + equity_change;
6799
6800 let net_change = operating_cf + investing_cf + financing_cf;
6801
6802 vec![
6803 CashFlowItem {
6804 item_code: "CF-NI".to_string(),
6805 label: "Net Income".to_string(),
6806 category: CashFlowCategory::Operating,
6807 amount: net_income,
6808 amount_prior: None,
6809 sort_order: 1,
6810 is_total: false,
6811 },
6812 CashFlowItem {
6813 item_code: "CF-DEP".to_string(),
6814 label: "Depreciation & Amortization".to_string(),
6815 category: CashFlowCategory::Operating,
6816 amount: depreciation_addback,
6817 amount_prior: None,
6818 sort_order: 2,
6819 is_total: false,
6820 },
6821 CashFlowItem {
6822 item_code: "CF-AR".to_string(),
6823 label: "Change in Accounts Receivable".to_string(),
6824 category: CashFlowCategory::Operating,
6825 amount: -ar_change,
6826 amount_prior: None,
6827 sort_order: 3,
6828 is_total: false,
6829 },
6830 CashFlowItem {
6831 item_code: "CF-AP".to_string(),
6832 label: "Change in Accounts Payable".to_string(),
6833 category: CashFlowCategory::Operating,
6834 amount: -ap_change,
6835 amount_prior: None,
6836 sort_order: 4,
6837 is_total: false,
6838 },
6839 CashFlowItem {
6840 item_code: "CF-INV".to_string(),
6841 label: "Change in Inventory".to_string(),
6842 category: CashFlowCategory::Operating,
6843 amount: -inventory_change,
6844 amount_prior: None,
6845 sort_order: 5,
6846 is_total: false,
6847 },
6848 CashFlowItem {
6849 item_code: "CF-OP".to_string(),
6850 label: "Net Cash from Operating Activities".to_string(),
6851 category: CashFlowCategory::Operating,
6852 amount: operating_cf,
6853 amount_prior: None,
6854 sort_order: 6,
6855 is_total: true,
6856 },
6857 CashFlowItem {
6858 item_code: "CF-CAPEX".to_string(),
6859 label: "Capital Expenditures".to_string(),
6860 category: CashFlowCategory::Investing,
6861 amount: capex,
6862 amount_prior: None,
6863 sort_order: 7,
6864 is_total: false,
6865 },
6866 CashFlowItem {
6867 item_code: "CF-INV-T".to_string(),
6868 label: "Net Cash from Investing Activities".to_string(),
6869 category: CashFlowCategory::Investing,
6870 amount: investing_cf,
6871 amount_prior: None,
6872 sort_order: 8,
6873 is_total: true,
6874 },
6875 CashFlowItem {
6876 item_code: "CF-DEBT".to_string(),
6877 label: "Net Borrowings / (Repayments)".to_string(),
6878 category: CashFlowCategory::Financing,
6879 amount: debt_change,
6880 amount_prior: None,
6881 sort_order: 9,
6882 is_total: false,
6883 },
6884 CashFlowItem {
6885 item_code: "CF-EQ".to_string(),
6886 label: "Equity Changes".to_string(),
6887 category: CashFlowCategory::Financing,
6888 amount: equity_change,
6889 amount_prior: None,
6890 sort_order: 10,
6891 is_total: false,
6892 },
6893 CashFlowItem {
6894 item_code: "CF-FIN-T".to_string(),
6895 label: "Net Cash from Financing Activities".to_string(),
6896 category: CashFlowCategory::Financing,
6897 amount: financing_cf,
6898 amount_prior: None,
6899 sort_order: 11,
6900 is_total: true,
6901 },
6902 CashFlowItem {
6903 item_code: "CF-NET".to_string(),
6904 label: "Net Change in Cash".to_string(),
6905 category: CashFlowCategory::Operating,
6906 amount: net_change,
6907 amount_prior: None,
6908 sort_order: 12,
6909 is_total: true,
6910 },
6911 ]
6912 }
6913
6914 fn calculate_net_income_from_tb(
6918 tb: &[datasynth_generators::TrialBalanceEntry],
6919 ) -> rust_decimal::Decimal {
6920 use rust_decimal::Decimal;
6921
6922 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6923 for entry in tb {
6924 let net = entry.debit_balance - entry.credit_balance;
6925 *aggregated.entry(entry.category.clone()).or_default() += net;
6926 }
6927
6928 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6929 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6930 let opex = *aggregated
6931 .get("OperatingExpenses")
6932 .unwrap_or(&Decimal::ZERO);
6933 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6934 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6935
6936 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6939 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6941 operating_income - tax
6942 }
6943
6944 fn category_from_account_code(code: &str) -> String {
6951 let prefix: String = code.chars().take(2).collect();
6952 match prefix.as_str() {
6953 "10" => "Cash",
6954 "11" => "Receivables",
6955 "12" | "13" | "14" => "Inventory",
6956 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6957 "20" => "Payables",
6958 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6959 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6960 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6961 "40" | "41" | "42" | "43" | "44" => "Revenue",
6962 "50" | "51" | "52" => "CostOfSales",
6963 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6964 "OperatingExpenses"
6965 }
6966 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6967 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6968 _ => "OperatingExpenses",
6969 }
6970 .to_string()
6971 }
6972
6973 fn phase_hr_data(
6975 &mut self,
6976 stats: &mut EnhancedGenerationStatistics,
6977 ) -> SynthResult<HrSnapshot> {
6978 if !self.phase_config.generate_hr {
6979 debug!("Phase 16: Skipped (HR generation disabled)");
6980 return Ok(HrSnapshot::default());
6981 }
6982
6983 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6984
6985 let seed = self.seed;
6986 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6987 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6988 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6989 let company_code = self
6990 .config
6991 .companies
6992 .first()
6993 .map(|c| c.code.as_str())
6994 .unwrap_or("1000");
6995 let currency = self
6996 .config
6997 .companies
6998 .first()
6999 .map(|c| c.currency.as_str())
7000 .unwrap_or("USD");
7001
7002 let employee_ids: Vec<String> = self
7003 .master_data
7004 .employees
7005 .iter()
7006 .map(|e| e.employee_id.clone())
7007 .collect();
7008
7009 if employee_ids.is_empty() {
7010 debug!("Phase 16: Skipped (no employees available)");
7011 return Ok(HrSnapshot::default());
7012 }
7013
7014 let cost_center_ids: Vec<String> = self
7017 .master_data
7018 .employees
7019 .iter()
7020 .filter_map(|e| e.cost_center.clone())
7021 .collect::<std::collections::HashSet<_>>()
7022 .into_iter()
7023 .collect();
7024
7025 let mut snapshot = HrSnapshot::default();
7026
7027 if self.config.hr.payroll.enabled {
7029 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7030 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7031
7032 let payroll_pack = self.primary_pack();
7034
7035 payroll_gen.set_country_pack(payroll_pack.clone());
7038
7039 let employees_with_salary: Vec<(
7040 String,
7041 rust_decimal::Decimal,
7042 Option<String>,
7043 Option<String>,
7044 )> = self
7045 .master_data
7046 .employees
7047 .iter()
7048 .map(|e| {
7049 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7052 e.base_salary
7053 } else {
7054 rust_decimal::Decimal::from(60_000)
7055 };
7056 (
7057 e.employee_id.clone(),
7058 annual, e.cost_center.clone(),
7060 e.department_id.clone(),
7061 )
7062 })
7063 .collect();
7064
7065 let change_history = &self.master_data.employee_change_history;
7068 let has_changes = !change_history.is_empty();
7069 if has_changes {
7070 debug!(
7071 "Payroll will incorporate {} employee change events",
7072 change_history.len()
7073 );
7074 }
7075
7076 for month in 0..self.config.global.period_months {
7077 let period_start = start_date + chrono::Months::new(month);
7078 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7079 let (run, items) = if has_changes {
7080 payroll_gen.generate_with_changes(
7081 company_code,
7082 &employees_with_salary,
7083 period_start,
7084 period_end,
7085 currency,
7086 change_history,
7087 )
7088 } else {
7089 payroll_gen.generate(
7090 company_code,
7091 &employees_with_salary,
7092 period_start,
7093 period_end,
7094 currency,
7095 )
7096 };
7097 snapshot.payroll_runs.push(run);
7098 snapshot.payroll_run_count += 1;
7099 snapshot.payroll_line_item_count += items.len();
7100 snapshot.payroll_line_items.extend(items);
7101 }
7102 }
7103
7104 if self.config.hr.time_attendance.enabled {
7106 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7107 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7108 if let Some(ctx) = &self.temporal_context {
7112 time_gen.set_temporal_context(Arc::clone(ctx));
7113 }
7114 let entries = time_gen.generate(
7115 &employee_ids,
7116 start_date,
7117 end_date,
7118 &self.config.hr.time_attendance,
7119 );
7120 snapshot.time_entry_count = entries.len();
7121 snapshot.time_entries = entries;
7122 }
7123
7124 if self.config.hr.expenses.enabled {
7126 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7127 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7128 expense_gen.set_country_pack(self.primary_pack().clone());
7129 if let Some(ctx) = &self.temporal_context {
7132 expense_gen.set_temporal_context(Arc::clone(ctx));
7133 }
7134 let company_currency = self
7135 .config
7136 .companies
7137 .first()
7138 .map(|c| c.currency.as_str())
7139 .unwrap_or("USD");
7140 let reports = expense_gen.generate_with_currency(
7141 &employee_ids,
7142 start_date,
7143 end_date,
7144 &self.config.hr.expenses,
7145 company_currency,
7146 );
7147 snapshot.expense_report_count = reports.len();
7148 snapshot.expense_reports = reports;
7149 }
7150
7151 if self.config.hr.payroll.enabled {
7153 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7154 let employee_pairs: Vec<(String, String)> = self
7155 .master_data
7156 .employees
7157 .iter()
7158 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7159 .collect();
7160 let enrollments =
7161 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7162 snapshot.benefit_enrollment_count = enrollments.len();
7163 snapshot.benefit_enrollments = enrollments;
7164 }
7165
7166 if self.phase_config.generate_hr {
7168 let entity_name = self
7169 .config
7170 .companies
7171 .first()
7172 .map(|c| c.name.as_str())
7173 .unwrap_or("Entity");
7174 let period_months = self.config.global.period_months;
7175 let period_label = {
7176 let y = start_date.year();
7177 let m = start_date.month();
7178 if period_months >= 12 {
7179 format!("FY{y}")
7180 } else {
7181 format!("{y}-{m:02}")
7182 }
7183 };
7184 let reporting_date =
7185 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7186
7187 let avg_salary: Option<rust_decimal::Decimal> = {
7192 let employee_count = employee_ids.len();
7193 if self.config.hr.payroll.enabled
7194 && employee_count > 0
7195 && !snapshot.payroll_runs.is_empty()
7196 {
7197 let total_gross: rust_decimal::Decimal = snapshot
7199 .payroll_runs
7200 .iter()
7201 .filter(|r| r.company_code == company_code)
7202 .map(|r| r.total_gross)
7203 .sum();
7204 if total_gross > rust_decimal::Decimal::ZERO {
7205 let annual_total = if period_months > 0 && period_months < 12 {
7207 total_gross * rust_decimal::Decimal::from(12u32)
7208 / rust_decimal::Decimal::from(period_months)
7209 } else {
7210 total_gross
7211 };
7212 Some(
7213 (annual_total / rust_decimal::Decimal::from(employee_count))
7214 .round_dp(2),
7215 )
7216 } else {
7217 None
7218 }
7219 } else {
7220 None
7221 }
7222 };
7223
7224 let mut pension_gen =
7225 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7226 let pension_snap = pension_gen.generate(
7227 company_code,
7228 entity_name,
7229 &period_label,
7230 reporting_date,
7231 employee_ids.len(),
7232 currency,
7233 avg_salary,
7234 period_months,
7235 );
7236 snapshot.pension_plan_count = pension_snap.plans.len();
7237 snapshot.pension_plans = pension_snap.plans;
7238 snapshot.pension_obligations = pension_snap.obligations;
7239 snapshot.pension_plan_assets = pension_snap.plan_assets;
7240 snapshot.pension_disclosures = pension_snap.disclosures;
7241 snapshot.pension_journal_entries = pension_snap.journal_entries;
7246 }
7247
7248 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7250 let period_months = self.config.global.period_months;
7251 let period_label = {
7252 let y = start_date.year();
7253 let m = start_date.month();
7254 if period_months >= 12 {
7255 format!("FY{y}")
7256 } else {
7257 format!("{y}-{m:02}")
7258 }
7259 };
7260 let reporting_date =
7261 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7262
7263 let mut stock_comp_gen =
7264 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7265 let stock_snap = stock_comp_gen.generate(
7266 company_code,
7267 &employee_ids,
7268 start_date,
7269 &period_label,
7270 reporting_date,
7271 currency,
7272 );
7273 snapshot.stock_grant_count = stock_snap.grants.len();
7274 snapshot.stock_grants = stock_snap.grants;
7275 snapshot.stock_comp_expenses = stock_snap.expenses;
7276 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7277 }
7278
7279 stats.payroll_run_count = snapshot.payroll_run_count;
7280 stats.time_entry_count = snapshot.time_entry_count;
7281 stats.expense_report_count = snapshot.expense_report_count;
7282 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7283 stats.pension_plan_count = snapshot.pension_plan_count;
7284 stats.stock_grant_count = snapshot.stock_grant_count;
7285
7286 info!(
7287 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7288 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7289 snapshot.time_entry_count, snapshot.expense_report_count,
7290 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7291 snapshot.stock_grant_count
7292 );
7293 self.check_resources_with_log("post-hr")?;
7294
7295 Ok(snapshot)
7296 }
7297
7298 fn phase_accounting_standards(
7300 &mut self,
7301 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7302 journal_entries: &[JournalEntry],
7303 stats: &mut EnhancedGenerationStatistics,
7304 ) -> SynthResult<AccountingStandardsSnapshot> {
7305 if !self.phase_config.generate_accounting_standards {
7306 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7307 return Ok(AccountingStandardsSnapshot::default());
7308 }
7309 info!("Phase 17: Generating Accounting Standards Data");
7310
7311 let seed = self.seed;
7312 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7313 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7314 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7315 let company_code = self
7316 .config
7317 .companies
7318 .first()
7319 .map(|c| c.code.as_str())
7320 .unwrap_or("1000");
7321 let currency = self
7322 .config
7323 .companies
7324 .first()
7325 .map(|c| c.currency.as_str())
7326 .unwrap_or("USD");
7327
7328 let framework = match self.config.accounting_standards.framework {
7333 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7334 datasynth_standards::framework::AccountingFramework::UsGaap
7335 }
7336 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7337 datasynth_standards::framework::AccountingFramework::Ifrs
7338 }
7339 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7340 datasynth_standards::framework::AccountingFramework::DualReporting
7341 }
7342 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7343 datasynth_standards::framework::AccountingFramework::FrenchGaap
7344 }
7345 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7346 datasynth_standards::framework::AccountingFramework::GermanGaap
7347 }
7348 None => {
7349 let pack = self.primary_pack();
7351 let pack_fw = pack.accounting.framework.as_str();
7352 match pack_fw {
7353 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7354 "dual_reporting" => {
7355 datasynth_standards::framework::AccountingFramework::DualReporting
7356 }
7357 "french_gaap" => {
7358 datasynth_standards::framework::AccountingFramework::FrenchGaap
7359 }
7360 "german_gaap" | "hgb" => {
7361 datasynth_standards::framework::AccountingFramework::GermanGaap
7362 }
7363 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7365 }
7366 }
7367 };
7368
7369 let mut snapshot = AccountingStandardsSnapshot::default();
7370
7371 if self.config.accounting_standards.revenue_recognition.enabled {
7373 let customer_ids: Vec<String> = self
7374 .master_data
7375 .customers
7376 .iter()
7377 .map(|c| c.customer_id.clone())
7378 .collect();
7379
7380 if !customer_ids.is_empty() {
7381 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7382 let contracts = rev_gen.generate(
7383 company_code,
7384 &customer_ids,
7385 start_date,
7386 end_date,
7387 currency,
7388 &self.config.accounting_standards.revenue_recognition,
7389 framework,
7390 );
7391 snapshot.revenue_contract_count = contracts.len();
7392 snapshot.contracts = contracts;
7393 }
7394 }
7395
7396 if self.config.accounting_standards.impairment.enabled {
7398 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7399 .master_data
7400 .assets
7401 .iter()
7402 .map(|a| {
7403 (
7404 a.asset_id.clone(),
7405 a.description.clone(),
7406 a.acquisition_cost,
7407 )
7408 })
7409 .collect();
7410
7411 if !asset_data.is_empty() {
7412 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7413 let tests = imp_gen.generate(
7414 company_code,
7415 &asset_data,
7416 end_date,
7417 &self.config.accounting_standards.impairment,
7418 framework,
7419 );
7420 snapshot.impairment_test_count = tests.len();
7421 snapshot.impairment_tests = tests;
7422 }
7423 }
7424
7425 if self
7427 .config
7428 .accounting_standards
7429 .business_combinations
7430 .enabled
7431 {
7432 let bc_config = &self.config.accounting_standards.business_combinations;
7433 let framework_str = match framework {
7434 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7435 _ => "US_GAAP",
7436 };
7437 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7438 let bc_snap = bc_gen.generate(
7439 company_code,
7440 currency,
7441 start_date,
7442 end_date,
7443 bc_config.acquisition_count,
7444 framework_str,
7445 );
7446 snapshot.business_combination_count = bc_snap.combinations.len();
7447 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7448 snapshot.business_combinations = bc_snap.combinations;
7449 }
7450
7451 if self
7453 .config
7454 .accounting_standards
7455 .expected_credit_loss
7456 .enabled
7457 {
7458 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7459 let framework_str = match framework {
7460 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7461 _ => "ASC_326",
7462 };
7463
7464 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7467
7468 let mut ecl_gen = EclGenerator::new(seed + 43);
7469
7470 let bucket_exposures: Vec<(
7472 datasynth_core::models::subledger::ar::AgingBucket,
7473 rust_decimal::Decimal,
7474 )> = if ar_aging_reports.is_empty() {
7475 use datasynth_core::models::subledger::ar::AgingBucket;
7477 vec![
7478 (
7479 AgingBucket::Current,
7480 rust_decimal::Decimal::from(500_000_u32),
7481 ),
7482 (
7483 AgingBucket::Days1To30,
7484 rust_decimal::Decimal::from(120_000_u32),
7485 ),
7486 (
7487 AgingBucket::Days31To60,
7488 rust_decimal::Decimal::from(45_000_u32),
7489 ),
7490 (
7491 AgingBucket::Days61To90,
7492 rust_decimal::Decimal::from(15_000_u32),
7493 ),
7494 (
7495 AgingBucket::Over90Days,
7496 rust_decimal::Decimal::from(8_000_u32),
7497 ),
7498 ]
7499 } else {
7500 use datasynth_core::models::subledger::ar::AgingBucket;
7501 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7503 std::collections::HashMap::new();
7504 for report in ar_aging_reports {
7505 for (bucket, amount) in &report.bucket_totals {
7506 *totals.entry(*bucket).or_default() += amount;
7507 }
7508 }
7509 AgingBucket::all()
7510 .into_iter()
7511 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7512 .collect()
7513 };
7514
7515 let ecl_snap = ecl_gen.generate(
7516 company_code,
7517 end_date,
7518 &bucket_exposures,
7519 ecl_config,
7520 &period_label,
7521 framework_str,
7522 );
7523
7524 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7525 snapshot.ecl_models = ecl_snap.ecl_models;
7526 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7527 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7528 }
7529
7530 {
7532 let framework_str = match framework {
7533 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7534 _ => "US_GAAP",
7535 };
7536
7537 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7542 .max(rust_decimal::Decimal::from(100_000_u32));
7543
7544 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7545
7546 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7547 let prov_snap = prov_gen.generate(
7548 company_code,
7549 currency,
7550 revenue_proxy,
7551 end_date,
7552 &period_label,
7553 framework_str,
7554 None, );
7556
7557 snapshot.provision_count = prov_snap.provisions.len();
7558 snapshot.provisions = prov_snap.provisions;
7559 snapshot.provision_movements = prov_snap.movements;
7560 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7561 snapshot.provision_journal_entries = prov_snap.journal_entries;
7562 }
7563
7564 {
7568 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7569
7570 let presentation_currency = self
7571 .config
7572 .global
7573 .presentation_currency
7574 .clone()
7575 .unwrap_or_else(|| self.config.global.group_currency.clone());
7576
7577 let mut rate_table = FxRateTable::new(&presentation_currency);
7580
7581 let base_rates = base_rates_usd();
7585 for (ccy, rate) in &base_rates {
7586 rate_table.add_rate(FxRate::new(
7587 ccy,
7588 "USD",
7589 RateType::Closing,
7590 end_date,
7591 *rate,
7592 "SYNTHETIC",
7593 ));
7594 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7597 rate_table.add_rate(FxRate::new(
7598 ccy,
7599 "USD",
7600 RateType::Average,
7601 end_date,
7602 avg,
7603 "SYNTHETIC",
7604 ));
7605 }
7606
7607 let mut translation_results = Vec::new();
7608 for company in &self.config.companies {
7609 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7612 .max(rust_decimal::Decimal::from(100_000_u32));
7613
7614 let func_ccy = company
7615 .functional_currency
7616 .clone()
7617 .unwrap_or_else(|| company.currency.clone());
7618
7619 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7620 &company.code,
7621 &func_ccy,
7622 &presentation_currency,
7623 &ias21_period_label,
7624 end_date,
7625 company_revenue,
7626 &rate_table,
7627 );
7628 translation_results.push(result);
7629 }
7630
7631 snapshot.currency_translation_count = translation_results.len();
7632 snapshot.currency_translation_results = translation_results;
7633 }
7634
7635 stats.revenue_contract_count = snapshot.revenue_contract_count;
7636 stats.impairment_test_count = snapshot.impairment_test_count;
7637 stats.business_combination_count = snapshot.business_combination_count;
7638 stats.ecl_model_count = snapshot.ecl_model_count;
7639 stats.provision_count = snapshot.provision_count;
7640
7641 if self.config.accounting_standards.leases.enabled {
7645 use datasynth_generators::standards::LeaseGenerator;
7646 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7647 .unwrap_or_else(|_| {
7648 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7649 });
7650 let framework =
7651 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7652 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7653 for company in &self.config.companies {
7654 let leases = lease_gen.generate(
7655 &company.code,
7656 start_date,
7657 &self.config.accounting_standards.leases,
7658 framework,
7659 );
7660 snapshot.lease_count += leases.len();
7661 snapshot.leases.extend(leases);
7662 }
7663 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7664 }
7665
7666 if self.config.accounting_standards.fair_value.enabled {
7670 use datasynth_generators::standards::FairValueGenerator;
7671 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7672 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7673 + chrono::Months::new(self.config.global.period_months);
7674 let framework =
7675 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7676 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7677 for company in &self.config.companies {
7678 let measurements = fv_gen.generate(
7679 &company.code,
7680 end_date,
7681 &company.currency,
7682 &self.config.accounting_standards.fair_value,
7683 framework,
7684 );
7685 snapshot.fair_value_measurement_count += measurements.len();
7686 snapshot.fair_value_measurements.extend(measurements);
7687 }
7688 info!(
7689 "v3.3.1 fair value measurements: {}",
7690 snapshot.fair_value_measurement_count
7691 );
7692 }
7693
7694 if self.config.accounting_standards.generate_differences
7698 && matches!(
7699 self.config.accounting_standards.framework,
7700 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7701 )
7702 {
7703 use datasynth_generators::standards::FrameworkReconciliationGenerator;
7704 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7705 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7706 + chrono::Months::new(self.config.global.period_months);
7707 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7708 for company in &self.config.companies {
7709 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7710 snapshot.framework_difference_count += records.len();
7711 snapshot.framework_differences.extend(records);
7712 snapshot.framework_reconciliations.push(reconciliation);
7713 }
7714 info!(
7715 "v3.3.1 framework reconciliation: {} differences across {} entities",
7716 snapshot.framework_difference_count,
7717 snapshot.framework_reconciliations.len()
7718 );
7719 }
7720
7721 info!(
7722 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7723 snapshot.revenue_contract_count,
7724 snapshot.impairment_test_count,
7725 snapshot.business_combination_count,
7726 snapshot.ecl_model_count,
7727 snapshot.provision_count,
7728 snapshot.currency_translation_count,
7729 snapshot.lease_count,
7730 snapshot.fair_value_measurement_count,
7731 snapshot.framework_difference_count,
7732 );
7733 self.check_resources_with_log("post-accounting-standards")?;
7734
7735 Ok(snapshot)
7736 }
7737
7738 fn resolve_accounting_framework(
7742 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
7743 ) -> datasynth_standards::framework::AccountingFramework {
7744 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
7745 use datasynth_standards::framework::AccountingFramework as Fw;
7746 match cfg {
7747 Some(Cfg::Ifrs) => Fw::Ifrs,
7748 Some(Cfg::DualReporting) => Fw::DualReporting,
7749 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
7750 Some(Cfg::GermanGaap) => Fw::GermanGaap,
7751 _ => Fw::UsGaap,
7752 }
7753 }
7754
7755 fn phase_manufacturing(
7757 &mut self,
7758 stats: &mut EnhancedGenerationStatistics,
7759 ) -> SynthResult<ManufacturingSnapshot> {
7760 if !self.phase_config.generate_manufacturing {
7761 debug!("Phase 18: Skipped (manufacturing generation disabled)");
7762 return Ok(ManufacturingSnapshot::default());
7763 }
7764 info!("Phase 18: Generating Manufacturing Data");
7765
7766 let seed = self.seed;
7767 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7768 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7769 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7770 let company_code = self
7771 .config
7772 .companies
7773 .first()
7774 .map(|c| c.code.as_str())
7775 .unwrap_or("1000");
7776
7777 let material_data: Vec<(String, String)> = self
7778 .master_data
7779 .materials
7780 .iter()
7781 .map(|m| (m.material_id.clone(), m.description.clone()))
7782 .collect();
7783
7784 if material_data.is_empty() {
7785 debug!("Phase 18: Skipped (no materials available)");
7786 return Ok(ManufacturingSnapshot::default());
7787 }
7788
7789 let mut snapshot = ManufacturingSnapshot::default();
7790
7791 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7793 if let Some(ctx) = &self.temporal_context {
7795 prod_gen.set_temporal_context(Arc::clone(ctx));
7796 }
7797 let production_orders = prod_gen.generate(
7798 company_code,
7799 &material_data,
7800 start_date,
7801 end_date,
7802 &self.config.manufacturing.production_orders,
7803 &self.config.manufacturing.costing,
7804 &self.config.manufacturing.routing,
7805 );
7806 snapshot.production_order_count = production_orders.len();
7807
7808 let inspection_data: Vec<(String, String, String)> = production_orders
7810 .iter()
7811 .map(|po| {
7812 (
7813 po.order_id.clone(),
7814 po.material_id.clone(),
7815 po.material_description.clone(),
7816 )
7817 })
7818 .collect();
7819
7820 snapshot.production_orders = production_orders;
7821
7822 if !inspection_data.is_empty() {
7823 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7824 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7825 snapshot.quality_inspection_count = inspections.len();
7826 snapshot.quality_inspections = inspections;
7827 }
7828
7829 let storage_locations: Vec<(String, String)> = material_data
7831 .iter()
7832 .enumerate()
7833 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7834 .collect();
7835
7836 let employee_ids: Vec<String> = self
7837 .master_data
7838 .employees
7839 .iter()
7840 .map(|e| e.employee_id.clone())
7841 .collect();
7842 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7843 .with_employee_pool(employee_ids);
7844 let mut cycle_count_total = 0usize;
7845 for month in 0..self.config.global.period_months {
7846 let count_date = start_date + chrono::Months::new(month);
7847 let items_per_count = storage_locations.len().clamp(10, 50);
7848 let cc = cc_gen.generate(
7849 company_code,
7850 &storage_locations,
7851 count_date,
7852 items_per_count,
7853 );
7854 snapshot.cycle_counts.push(cc);
7855 cycle_count_total += 1;
7856 }
7857 snapshot.cycle_count_count = cycle_count_total;
7858
7859 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7861 let bom_components = bom_gen.generate(company_code, &material_data);
7862 snapshot.bom_component_count = bom_components.len();
7863 snapshot.bom_components = bom_components;
7864
7865 let currency = self
7867 .config
7868 .companies
7869 .first()
7870 .map(|c| c.currency.as_str())
7871 .unwrap_or("USD");
7872 let production_order_ids: Vec<String> = snapshot
7873 .production_orders
7874 .iter()
7875 .map(|po| po.order_id.clone())
7876 .collect();
7877 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7878 let inventory_movements = inv_mov_gen.generate_with_production_orders(
7879 company_code,
7880 &material_data,
7881 start_date,
7882 end_date,
7883 2,
7884 currency,
7885 &production_order_ids,
7886 );
7887 snapshot.inventory_movement_count = inventory_movements.len();
7888 snapshot.inventory_movements = inventory_movements;
7889
7890 stats.production_order_count = snapshot.production_order_count;
7891 stats.quality_inspection_count = snapshot.quality_inspection_count;
7892 stats.cycle_count_count = snapshot.cycle_count_count;
7893 stats.bom_component_count = snapshot.bom_component_count;
7894 stats.inventory_movement_count = snapshot.inventory_movement_count;
7895
7896 info!(
7897 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7898 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7899 snapshot.bom_component_count, snapshot.inventory_movement_count
7900 );
7901 self.check_resources_with_log("post-manufacturing")?;
7902
7903 Ok(snapshot)
7904 }
7905
7906 fn phase_sales_kpi_budgets(
7908 &mut self,
7909 coa: &Arc<ChartOfAccounts>,
7910 financial_reporting: &FinancialReportingSnapshot,
7911 stats: &mut EnhancedGenerationStatistics,
7912 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7913 if !self.phase_config.generate_sales_kpi_budgets {
7914 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7915 return Ok(SalesKpiBudgetsSnapshot::default());
7916 }
7917 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7918
7919 let seed = self.seed;
7920 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7921 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7922 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7923 let company_code = self
7924 .config
7925 .companies
7926 .first()
7927 .map(|c| c.code.as_str())
7928 .unwrap_or("1000");
7929
7930 let mut snapshot = SalesKpiBudgetsSnapshot::default();
7931
7932 if self.config.sales_quotes.enabled {
7934 let customer_data: Vec<(String, String)> = self
7935 .master_data
7936 .customers
7937 .iter()
7938 .map(|c| (c.customer_id.clone(), c.name.clone()))
7939 .collect();
7940 let material_data: Vec<(String, String)> = self
7941 .master_data
7942 .materials
7943 .iter()
7944 .map(|m| (m.material_id.clone(), m.description.clone()))
7945 .collect();
7946
7947 if !customer_data.is_empty() && !material_data.is_empty() {
7948 let employee_ids: Vec<String> = self
7949 .master_data
7950 .employees
7951 .iter()
7952 .map(|e| e.employee_id.clone())
7953 .collect();
7954 let customer_ids: Vec<String> = self
7955 .master_data
7956 .customers
7957 .iter()
7958 .map(|c| c.customer_id.clone())
7959 .collect();
7960 let company_currency = self
7961 .config
7962 .companies
7963 .first()
7964 .map(|c| c.currency.as_str())
7965 .unwrap_or("USD");
7966
7967 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7968 .with_pools(employee_ids, customer_ids);
7969 let quotes = quote_gen.generate_with_currency(
7970 company_code,
7971 &customer_data,
7972 &material_data,
7973 start_date,
7974 end_date,
7975 &self.config.sales_quotes,
7976 company_currency,
7977 );
7978 snapshot.sales_quote_count = quotes.len();
7979 snapshot.sales_quotes = quotes;
7980 }
7981 }
7982
7983 if self.config.financial_reporting.management_kpis.enabled {
7985 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7986 let mut kpis = kpi_gen.generate(
7987 company_code,
7988 start_date,
7989 end_date,
7990 &self.config.financial_reporting.management_kpis,
7991 );
7992
7993 {
7995 use rust_decimal::Decimal;
7996
7997 if let Some(income_stmt) =
7998 financial_reporting.financial_statements.iter().find(|fs| {
7999 fs.statement_type == StatementType::IncomeStatement
8000 && fs.company_code == company_code
8001 })
8002 {
8003 let total_revenue: Decimal = income_stmt
8005 .line_items
8006 .iter()
8007 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8008 .map(|li| li.amount)
8009 .sum();
8010 let total_cogs: Decimal = income_stmt
8011 .line_items
8012 .iter()
8013 .filter(|li| {
8014 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8015 && !li.is_total
8016 })
8017 .map(|li| li.amount.abs())
8018 .sum();
8019 let total_opex: Decimal = income_stmt
8020 .line_items
8021 .iter()
8022 .filter(|li| {
8023 li.section.contains("Expense")
8024 && !li.is_total
8025 && !li.section.contains("Cost")
8026 })
8027 .map(|li| li.amount.abs())
8028 .sum();
8029
8030 if total_revenue > Decimal::ZERO {
8031 let hundred = Decimal::from(100);
8032 let gross_margin_pct =
8033 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8034 let operating_income = total_revenue - total_cogs - total_opex;
8035 let op_margin_pct =
8036 (operating_income * hundred / total_revenue).round_dp(2);
8037
8038 for kpi in &mut kpis {
8040 if kpi.name == "Gross Margin" {
8041 kpi.value = gross_margin_pct;
8042 } else if kpi.name == "Operating Margin" {
8043 kpi.value = op_margin_pct;
8044 }
8045 }
8046 }
8047 }
8048
8049 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8051 fs.statement_type == StatementType::BalanceSheet
8052 && fs.company_code == company_code
8053 }) {
8054 let current_assets: Decimal = bs
8055 .line_items
8056 .iter()
8057 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8058 .map(|li| li.amount)
8059 .sum();
8060 let current_liabilities: Decimal = bs
8061 .line_items
8062 .iter()
8063 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8064 .map(|li| li.amount.abs())
8065 .sum();
8066
8067 if current_liabilities > Decimal::ZERO {
8068 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8069 for kpi in &mut kpis {
8070 if kpi.name == "Current Ratio" {
8071 kpi.value = current_ratio;
8072 }
8073 }
8074 }
8075 }
8076 }
8077
8078 snapshot.kpi_count = kpis.len();
8079 snapshot.kpis = kpis;
8080 }
8081
8082 if self.config.financial_reporting.budgets.enabled {
8084 let account_data: Vec<(String, String)> = coa
8085 .accounts
8086 .iter()
8087 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8088 .collect();
8089
8090 if !account_data.is_empty() {
8091 let fiscal_year = start_date.year() as u32;
8092 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8093 let budget = budget_gen.generate(
8094 company_code,
8095 fiscal_year,
8096 &account_data,
8097 &self.config.financial_reporting.budgets,
8098 );
8099 snapshot.budget_line_count = budget.line_items.len();
8100 snapshot.budgets.push(budget);
8101 }
8102 }
8103
8104 stats.sales_quote_count = snapshot.sales_quote_count;
8105 stats.kpi_count = snapshot.kpi_count;
8106 stats.budget_line_count = snapshot.budget_line_count;
8107
8108 info!(
8109 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8110 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8111 );
8112 self.check_resources_with_log("post-sales-kpi-budgets")?;
8113
8114 Ok(snapshot)
8115 }
8116
8117 fn compute_pre_tax_income(
8124 company_code: &str,
8125 journal_entries: &[JournalEntry],
8126 ) -> rust_decimal::Decimal {
8127 use datasynth_core::accounts::AccountCategory;
8128 use rust_decimal::Decimal;
8129
8130 let mut total_revenue = Decimal::ZERO;
8131 let mut total_expenses = Decimal::ZERO;
8132
8133 for je in journal_entries {
8134 if je.header.company_code != company_code {
8135 continue;
8136 }
8137 for line in &je.lines {
8138 let cat = AccountCategory::from_account(&line.gl_account);
8139 match cat {
8140 AccountCategory::Revenue => {
8141 total_revenue += line.credit_amount - line.debit_amount;
8142 }
8143 AccountCategory::Cogs
8144 | AccountCategory::OperatingExpense
8145 | AccountCategory::OtherIncomeExpense => {
8146 total_expenses += line.debit_amount - line.credit_amount;
8147 }
8148 _ => {}
8149 }
8150 }
8151 }
8152
8153 let pti = (total_revenue - total_expenses).round_dp(2);
8154 if pti == rust_decimal::Decimal::ZERO {
8155 rust_decimal::Decimal::from(1_000_000u32)
8158 } else {
8159 pti
8160 }
8161 }
8162
8163 fn phase_tax_generation(
8165 &mut self,
8166 document_flows: &DocumentFlowSnapshot,
8167 journal_entries: &[JournalEntry],
8168 stats: &mut EnhancedGenerationStatistics,
8169 ) -> SynthResult<TaxSnapshot> {
8170 if !self.phase_config.generate_tax {
8171 debug!("Phase 20: Skipped (tax generation disabled)");
8172 return Ok(TaxSnapshot::default());
8173 }
8174 info!("Phase 20: Generating Tax Data");
8175
8176 let seed = self.seed;
8177 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8178 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8179 let fiscal_year = start_date.year();
8180 let company_code = self
8181 .config
8182 .companies
8183 .first()
8184 .map(|c| c.code.as_str())
8185 .unwrap_or("1000");
8186
8187 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8188 seed + 370,
8189 self.config.tax.clone(),
8190 );
8191
8192 let pack = self.primary_pack().clone();
8193 let (jurisdictions, codes) =
8194 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8195
8196 let mut provisions = Vec::new();
8198 if self.config.tax.provisions.enabled {
8199 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8200 for company in &self.config.companies {
8201 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8202 let statutory_rate = rust_decimal::Decimal::new(
8203 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8204 2,
8205 );
8206 let provision = provision_gen.generate(
8207 &company.code,
8208 start_date,
8209 pre_tax_income,
8210 statutory_rate,
8211 );
8212 provisions.push(provision);
8213 }
8214 }
8215
8216 let mut tax_lines = Vec::new();
8218 if !codes.is_empty() {
8219 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8220 datasynth_generators::TaxLineGeneratorConfig::default(),
8221 codes.clone(),
8222 seed + 372,
8223 );
8224
8225 let buyer_country = self
8228 .config
8229 .companies
8230 .first()
8231 .map(|c| c.country.as_str())
8232 .unwrap_or("US");
8233 for vi in &document_flows.vendor_invoices {
8234 let lines = tax_line_gen.generate_for_document(
8235 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8236 &vi.header.document_id,
8237 buyer_country, buyer_country,
8239 vi.payable_amount,
8240 vi.header.document_date,
8241 None,
8242 );
8243 tax_lines.extend(lines);
8244 }
8245
8246 for ci in &document_flows.customer_invoices {
8248 let lines = tax_line_gen.generate_for_document(
8249 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8250 &ci.header.document_id,
8251 buyer_country, buyer_country,
8253 ci.total_gross_amount,
8254 ci.header.document_date,
8255 None,
8256 );
8257 tax_lines.extend(lines);
8258 }
8259 }
8260
8261 let deferred_tax = {
8263 let companies: Vec<(&str, &str)> = self
8264 .config
8265 .companies
8266 .iter()
8267 .map(|c| (c.code.as_str(), c.country.as_str()))
8268 .collect();
8269 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8270 deferred_gen.generate(&companies, start_date, journal_entries)
8271 };
8272
8273 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8276 std::collections::HashMap::new();
8277 for vi in &document_flows.vendor_invoices {
8278 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8279 }
8280 for ci in &document_flows.customer_invoices {
8281 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8282 }
8283
8284 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8286 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8287 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8288 &tax_lines,
8289 company_code,
8290 &doc_dates,
8291 end_date,
8292 );
8293 debug!("Generated {} tax posting JEs", jes.len());
8294 jes
8295 } else {
8296 Vec::new()
8297 };
8298
8299 let snapshot = TaxSnapshot {
8300 jurisdiction_count: jurisdictions.len(),
8301 code_count: codes.len(),
8302 jurisdictions,
8303 codes,
8304 tax_provisions: provisions,
8305 tax_lines,
8306 tax_returns: Vec::new(),
8307 withholding_records: Vec::new(),
8308 tax_anomaly_labels: Vec::new(),
8309 deferred_tax,
8310 tax_posting_journal_entries,
8311 };
8312
8313 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8314 stats.tax_code_count = snapshot.code_count;
8315 stats.tax_provision_count = snapshot.tax_provisions.len();
8316 stats.tax_line_count = snapshot.tax_lines.len();
8317
8318 info!(
8319 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8320 snapshot.jurisdiction_count,
8321 snapshot.code_count,
8322 snapshot.tax_provisions.len(),
8323 snapshot.deferred_tax.temporary_differences.len(),
8324 snapshot.deferred_tax.journal_entries.len(),
8325 snapshot.tax_posting_journal_entries.len(),
8326 );
8327 self.check_resources_with_log("post-tax")?;
8328
8329 Ok(snapshot)
8330 }
8331
8332 fn phase_esg_generation(
8334 &mut self,
8335 document_flows: &DocumentFlowSnapshot,
8336 manufacturing: &ManufacturingSnapshot,
8337 stats: &mut EnhancedGenerationStatistics,
8338 ) -> SynthResult<EsgSnapshot> {
8339 if !self.phase_config.generate_esg {
8340 debug!("Phase 21: Skipped (ESG generation disabled)");
8341 return Ok(EsgSnapshot::default());
8342 }
8343 let degradation = self.check_resources()?;
8344 if degradation >= DegradationLevel::Reduced {
8345 debug!(
8346 "Phase skipped due to resource pressure (degradation: {:?})",
8347 degradation
8348 );
8349 return Ok(EsgSnapshot::default());
8350 }
8351 info!("Phase 21: Generating ESG Data");
8352
8353 let seed = self.seed;
8354 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8355 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8356 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8357 let entity_id = self
8358 .config
8359 .companies
8360 .first()
8361 .map(|c| c.code.as_str())
8362 .unwrap_or("1000");
8363
8364 let esg_cfg = &self.config.esg;
8365 let mut snapshot = EsgSnapshot::default();
8366
8367 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8369 esg_cfg.environmental.energy.clone(),
8370 seed + 80,
8371 );
8372 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8373
8374 let facility_count = esg_cfg.environmental.energy.facility_count;
8376 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8377 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8378
8379 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8381 seed + 82,
8382 esg_cfg.environmental.waste.diversion_target,
8383 facility_count,
8384 );
8385 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8386
8387 let mut emission_gen =
8389 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8390
8391 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8393 .iter()
8394 .map(|e| datasynth_generators::EnergyInput {
8395 facility_id: e.facility_id.clone(),
8396 energy_type: match e.energy_source {
8397 EnergySourceType::NaturalGas => {
8398 datasynth_generators::EnergyInputType::NaturalGas
8399 }
8400 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8401 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8402 _ => datasynth_generators::EnergyInputType::Electricity,
8403 },
8404 consumption_kwh: e.consumption_kwh,
8405 period: e.period,
8406 })
8407 .collect();
8408
8409 if !manufacturing.production_orders.is_empty() {
8411 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8412 &manufacturing.production_orders,
8413 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8416 if !mfg_energy.is_empty() {
8417 info!(
8418 "ESG: {} energy inputs derived from {} production orders",
8419 mfg_energy.len(),
8420 manufacturing.production_orders.len(),
8421 );
8422 energy_inputs.extend(mfg_energy);
8423 }
8424 }
8425
8426 let mut emissions = Vec::new();
8427 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8428 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8429
8430 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8432 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8433 for payment in &document_flows.payments {
8434 if payment.is_vendor {
8435 *totals
8436 .entry(payment.business_partner_id.clone())
8437 .or_default() += payment.amount;
8438 }
8439 }
8440 totals
8441 };
8442 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8443 .master_data
8444 .vendors
8445 .iter()
8446 .map(|v| {
8447 let spend = vendor_payment_totals
8448 .get(&v.vendor_id)
8449 .copied()
8450 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8451 datasynth_generators::VendorSpendInput {
8452 vendor_id: v.vendor_id.clone(),
8453 category: format!("{:?}", v.vendor_type).to_lowercase(),
8454 spend,
8455 country: v.country.clone(),
8456 }
8457 })
8458 .collect();
8459 if !vendor_spend.is_empty() {
8460 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8461 entity_id,
8462 &vendor_spend,
8463 start_date,
8464 end_date,
8465 ));
8466 }
8467
8468 let headcount = self.master_data.employees.len() as u32;
8470 if headcount > 0 {
8471 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8472 emissions.extend(emission_gen.generate_scope3_business_travel(
8473 entity_id,
8474 travel_spend,
8475 start_date,
8476 ));
8477 emissions
8478 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8479 }
8480
8481 snapshot.emission_count = emissions.len();
8482 snapshot.emissions = emissions;
8483 snapshot.energy = energy_records;
8484
8485 let mut workforce_gen =
8487 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8488 let total_headcount = headcount.max(100);
8489 snapshot.diversity =
8490 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8491 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8492
8493 if !self.master_data.employees.is_empty() {
8495 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8496 entity_id,
8497 &self.master_data.employees,
8498 end_date,
8499 );
8500 if !hr_diversity.is_empty() {
8501 info!(
8502 "ESG: {} diversity metrics derived from {} actual employees",
8503 hr_diversity.len(),
8504 self.master_data.employees.len(),
8505 );
8506 snapshot.diversity.extend(hr_diversity);
8507 }
8508 }
8509
8510 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8511 entity_id,
8512 facility_count,
8513 start_date,
8514 end_date,
8515 );
8516
8517 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
8520 entity_id,
8521 &snapshot.safety_incidents,
8522 total_hours,
8523 start_date,
8524 );
8525 snapshot.safety_metrics = vec![safety_metric];
8526
8527 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8529 seed + 85,
8530 esg_cfg.governance.board_size,
8531 esg_cfg.governance.independence_target,
8532 );
8533 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8534
8535 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8537 esg_cfg.supply_chain_esg.clone(),
8538 seed + 86,
8539 );
8540 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8541 .master_data
8542 .vendors
8543 .iter()
8544 .map(|v| datasynth_generators::VendorInput {
8545 vendor_id: v.vendor_id.clone(),
8546 country: v.country.clone(),
8547 industry: format!("{:?}", v.vendor_type).to_lowercase(),
8548 quality_score: None,
8549 })
8550 .collect();
8551 snapshot.supplier_assessments =
8552 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8553
8554 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8556 seed + 87,
8557 esg_cfg.reporting.clone(),
8558 esg_cfg.climate_scenarios.clone(),
8559 );
8560 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8561 snapshot.disclosures = disclosure_gen.generate_disclosures(
8562 entity_id,
8563 &snapshot.materiality,
8564 start_date,
8565 end_date,
8566 );
8567 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8568 snapshot.disclosure_count = snapshot.disclosures.len();
8569
8570 if esg_cfg.anomaly_rate > 0.0 {
8572 let mut anomaly_injector =
8573 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8574 let mut labels = Vec::new();
8575 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8576 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8577 labels.extend(
8578 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8579 );
8580 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8581 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8582 snapshot.anomaly_labels = labels;
8583 }
8584
8585 stats.esg_emission_count = snapshot.emission_count;
8586 stats.esg_disclosure_count = snapshot.disclosure_count;
8587
8588 info!(
8589 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8590 snapshot.emission_count,
8591 snapshot.disclosure_count,
8592 snapshot.supplier_assessments.len()
8593 );
8594 self.check_resources_with_log("post-esg")?;
8595
8596 Ok(snapshot)
8597 }
8598
8599 fn phase_treasury_data(
8601 &mut self,
8602 document_flows: &DocumentFlowSnapshot,
8603 subledger: &SubledgerSnapshot,
8604 intercompany: &IntercompanySnapshot,
8605 stats: &mut EnhancedGenerationStatistics,
8606 ) -> SynthResult<TreasurySnapshot> {
8607 if !self.phase_config.generate_treasury {
8608 debug!("Phase 22: Skipped (treasury generation disabled)");
8609 return Ok(TreasurySnapshot::default());
8610 }
8611 let degradation = self.check_resources()?;
8612 if degradation >= DegradationLevel::Reduced {
8613 debug!(
8614 "Phase skipped due to resource pressure (degradation: {:?})",
8615 degradation
8616 );
8617 return Ok(TreasurySnapshot::default());
8618 }
8619 info!("Phase 22: Generating Treasury Data");
8620
8621 let seed = self.seed;
8622 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8623 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8624 let currency = self
8625 .config
8626 .companies
8627 .first()
8628 .map(|c| c.currency.as_str())
8629 .unwrap_or("USD");
8630 let entity_id = self
8631 .config
8632 .companies
8633 .first()
8634 .map(|c| c.code.as_str())
8635 .unwrap_or("1000");
8636
8637 let mut snapshot = TreasurySnapshot::default();
8638
8639 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8641 self.config.treasury.debt.clone(),
8642 seed + 90,
8643 );
8644 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8645
8646 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8648 self.config.treasury.hedging.clone(),
8649 seed + 91,
8650 );
8651 for debt in &snapshot.debt_instruments {
8652 if debt.rate_type == InterestRateType::Variable {
8653 let swap = hedge_gen.generate_ir_swap(
8654 currency,
8655 debt.principal,
8656 debt.origination_date,
8657 debt.maturity_date,
8658 );
8659 snapshot.hedging_instruments.push(swap);
8660 }
8661 }
8662
8663 {
8666 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8667 for payment in &document_flows.payments {
8668 if payment.currency != currency {
8669 let entry = fx_map
8670 .entry(payment.currency.clone())
8671 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8672 entry.0 += payment.amount;
8673 if payment.header.document_date > entry.1 {
8675 entry.1 = payment.header.document_date;
8676 }
8677 }
8678 }
8679 if !fx_map.is_empty() {
8680 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8681 .into_iter()
8682 .map(|(foreign_ccy, (net_amount, settlement_date))| {
8683 datasynth_generators::treasury::FxExposure {
8684 currency_pair: format!("{foreign_ccy}/{currency}"),
8685 foreign_currency: foreign_ccy,
8686 net_amount,
8687 settlement_date,
8688 description: "AP payment FX exposure".to_string(),
8689 }
8690 })
8691 .collect();
8692 let (fx_instruments, fx_relationships) =
8693 hedge_gen.generate(start_date, &fx_exposures);
8694 snapshot.hedging_instruments.extend(fx_instruments);
8695 snapshot.hedge_relationships.extend(fx_relationships);
8696 }
8697 }
8698
8699 if self.config.treasury.anomaly_rate > 0.0 {
8701 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8702 seed + 92,
8703 self.config.treasury.anomaly_rate,
8704 );
8705 let mut labels = Vec::new();
8706 labels.extend(
8707 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8708 );
8709 snapshot.treasury_anomaly_labels = labels;
8710 }
8711
8712 if self.config.treasury.cash_positioning.enabled {
8714 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8715
8716 for payment in &document_flows.payments {
8718 cash_flows.push(datasynth_generators::treasury::CashFlow {
8719 date: payment.header.document_date,
8720 account_id: format!("{entity_id}-MAIN"),
8721 amount: payment.amount,
8722 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8723 });
8724 }
8725
8726 for chain in &document_flows.o2c_chains {
8728 if let Some(ref receipt) = chain.customer_receipt {
8729 cash_flows.push(datasynth_generators::treasury::CashFlow {
8730 date: receipt.header.document_date,
8731 account_id: format!("{entity_id}-MAIN"),
8732 amount: receipt.amount,
8733 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8734 });
8735 }
8736 for receipt in &chain.remainder_receipts {
8738 cash_flows.push(datasynth_generators::treasury::CashFlow {
8739 date: receipt.header.document_date,
8740 account_id: format!("{entity_id}-MAIN"),
8741 amount: receipt.amount,
8742 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8743 });
8744 }
8745 }
8746
8747 if !cash_flows.is_empty() {
8748 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8749 self.config.treasury.cash_positioning.clone(),
8750 seed + 93,
8751 );
8752 let account_id = format!("{entity_id}-MAIN");
8753 snapshot.cash_positions = cash_gen.generate(
8754 entity_id,
8755 &account_id,
8756 currency,
8757 &cash_flows,
8758 start_date,
8759 start_date + chrono::Months::new(self.config.global.period_months),
8760 rust_decimal::Decimal::new(1_000_000, 0), );
8762 }
8763 }
8764
8765 if self.config.treasury.cash_forecasting.enabled {
8767 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8768
8769 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8771 .ar_invoices
8772 .iter()
8773 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8774 .map(|inv| {
8775 let days_past_due = if inv.due_date < end_date {
8776 (end_date - inv.due_date).num_days().max(0) as u32
8777 } else {
8778 0
8779 };
8780 datasynth_generators::treasury::ArAgingItem {
8781 expected_date: inv.due_date,
8782 amount: inv.amount_remaining,
8783 days_past_due,
8784 document_id: inv.invoice_number.clone(),
8785 }
8786 })
8787 .collect();
8788
8789 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8791 .ap_invoices
8792 .iter()
8793 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8794 .map(|inv| datasynth_generators::treasury::ApAgingItem {
8795 payment_date: inv.due_date,
8796 amount: inv.amount_remaining,
8797 document_id: inv.invoice_number.clone(),
8798 })
8799 .collect();
8800
8801 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8802 self.config.treasury.cash_forecasting.clone(),
8803 seed + 94,
8804 );
8805 let forecast = forecast_gen.generate(
8806 entity_id,
8807 currency,
8808 end_date,
8809 &ar_items,
8810 &ap_items,
8811 &[], );
8813 snapshot.cash_forecasts.push(forecast);
8814 }
8815
8816 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8818 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8819 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8820 self.config.treasury.cash_pooling.clone(),
8821 seed + 95,
8822 );
8823
8824 let account_ids: Vec<String> = snapshot
8826 .cash_positions
8827 .iter()
8828 .map(|cp| cp.bank_account_id.clone())
8829 .collect::<std::collections::HashSet<_>>()
8830 .into_iter()
8831 .collect();
8832
8833 if let Some(pool) =
8834 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8835 {
8836 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8838 for cp in &snapshot.cash_positions {
8839 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8840 }
8841
8842 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8843 latest_balances
8844 .into_iter()
8845 .filter(|(id, _)| pool.participant_accounts.contains(id))
8846 .map(
8847 |(id, balance)| datasynth_generators::treasury::AccountBalance {
8848 account_id: id,
8849 balance,
8850 },
8851 )
8852 .collect();
8853
8854 let sweeps =
8855 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8856 snapshot.cash_pool_sweeps = sweeps;
8857 snapshot.cash_pools.push(pool);
8858 }
8859 }
8860
8861 if self.config.treasury.bank_guarantees.enabled {
8863 let vendor_names: Vec<String> = self
8864 .master_data
8865 .vendors
8866 .iter()
8867 .map(|v| v.name.clone())
8868 .collect();
8869 if !vendor_names.is_empty() {
8870 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8871 self.config.treasury.bank_guarantees.clone(),
8872 seed + 96,
8873 );
8874 snapshot.bank_guarantees =
8875 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8876 }
8877 }
8878
8879 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8881 let entity_ids: Vec<String> = self
8882 .config
8883 .companies
8884 .iter()
8885 .map(|c| c.code.clone())
8886 .collect();
8887 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8888 .matched_pairs
8889 .iter()
8890 .map(|mp| {
8891 (
8892 mp.seller_company.clone(),
8893 mp.buyer_company.clone(),
8894 mp.amount,
8895 )
8896 })
8897 .collect();
8898 if entity_ids.len() >= 2 {
8899 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8900 self.config.treasury.netting.clone(),
8901 seed + 97,
8902 );
8903 snapshot.netting_runs = netting_gen.generate(
8904 &entity_ids,
8905 currency,
8906 start_date,
8907 self.config.global.period_months,
8908 &ic_amounts,
8909 );
8910 }
8911 }
8912
8913 {
8915 use datasynth_generators::treasury::TreasuryAccounting;
8916
8917 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8918 let mut treasury_jes = Vec::new();
8919
8920 if !snapshot.debt_instruments.is_empty() {
8922 let debt_jes =
8923 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8924 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8925 treasury_jes.extend(debt_jes);
8926 }
8927
8928 if !snapshot.hedging_instruments.is_empty() {
8930 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8931 &snapshot.hedging_instruments,
8932 &snapshot.hedge_relationships,
8933 end_date,
8934 entity_id,
8935 );
8936 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8937 treasury_jes.extend(hedge_jes);
8938 }
8939
8940 if !snapshot.cash_pool_sweeps.is_empty() {
8942 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8943 &snapshot.cash_pool_sweeps,
8944 entity_id,
8945 );
8946 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8947 treasury_jes.extend(sweep_jes);
8948 }
8949
8950 if !treasury_jes.is_empty() {
8951 debug!("Total treasury journal entries: {}", treasury_jes.len());
8952 }
8953 snapshot.journal_entries = treasury_jes;
8954 }
8955
8956 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8957 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8958 stats.cash_position_count = snapshot.cash_positions.len();
8959 stats.cash_forecast_count = snapshot.cash_forecasts.len();
8960 stats.cash_pool_count = snapshot.cash_pools.len();
8961
8962 info!(
8963 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8964 snapshot.debt_instruments.len(),
8965 snapshot.hedging_instruments.len(),
8966 snapshot.cash_positions.len(),
8967 snapshot.cash_forecasts.len(),
8968 snapshot.cash_pools.len(),
8969 snapshot.bank_guarantees.len(),
8970 snapshot.netting_runs.len(),
8971 snapshot.journal_entries.len(),
8972 );
8973 self.check_resources_with_log("post-treasury")?;
8974
8975 Ok(snapshot)
8976 }
8977
8978 fn phase_project_accounting(
8980 &mut self,
8981 document_flows: &DocumentFlowSnapshot,
8982 hr: &HrSnapshot,
8983 stats: &mut EnhancedGenerationStatistics,
8984 ) -> SynthResult<ProjectAccountingSnapshot> {
8985 if !self.phase_config.generate_project_accounting {
8986 debug!("Phase 23: Skipped (project accounting disabled)");
8987 return Ok(ProjectAccountingSnapshot::default());
8988 }
8989 let degradation = self.check_resources()?;
8990 if degradation >= DegradationLevel::Reduced {
8991 debug!(
8992 "Phase skipped due to resource pressure (degradation: {:?})",
8993 degradation
8994 );
8995 return Ok(ProjectAccountingSnapshot::default());
8996 }
8997 info!("Phase 23: Generating Project Accounting Data");
8998
8999 let seed = self.seed;
9000 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9001 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9002 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9003 let company_code = self
9004 .config
9005 .companies
9006 .first()
9007 .map(|c| c.code.as_str())
9008 .unwrap_or("1000");
9009
9010 let mut snapshot = ProjectAccountingSnapshot::default();
9011
9012 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9014 self.config.project_accounting.clone(),
9015 seed + 95,
9016 );
9017 let pool = project_gen.generate(company_code, start_date, end_date);
9018 snapshot.projects = pool.projects.clone();
9019
9020 {
9022 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9023 Vec::new();
9024
9025 for te in &hr.time_entries {
9027 let total_hours = te.hours_regular + te.hours_overtime;
9028 if total_hours > 0.0 {
9029 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9030 id: te.entry_id.clone(),
9031 entity_id: company_code.to_string(),
9032 date: te.date,
9033 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9034 .unwrap_or(rust_decimal::Decimal::ZERO),
9035 source_type: CostSourceType::TimeEntry,
9036 hours: Some(
9037 rust_decimal::Decimal::from_f64_retain(total_hours)
9038 .unwrap_or(rust_decimal::Decimal::ZERO),
9039 ),
9040 });
9041 }
9042 }
9043
9044 for er in &hr.expense_reports {
9046 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9047 id: er.report_id.clone(),
9048 entity_id: company_code.to_string(),
9049 date: er.submission_date,
9050 amount: er.total_amount,
9051 source_type: CostSourceType::ExpenseReport,
9052 hours: None,
9053 });
9054 }
9055
9056 for po in &document_flows.purchase_orders {
9058 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9059 id: po.header.document_id.clone(),
9060 entity_id: company_code.to_string(),
9061 date: po.header.document_date,
9062 amount: po.total_net_amount,
9063 source_type: CostSourceType::PurchaseOrder,
9064 hours: None,
9065 });
9066 }
9067
9068 for vi in &document_flows.vendor_invoices {
9070 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9071 id: vi.header.document_id.clone(),
9072 entity_id: company_code.to_string(),
9073 date: vi.header.document_date,
9074 amount: vi.payable_amount,
9075 source_type: CostSourceType::VendorInvoice,
9076 hours: None,
9077 });
9078 }
9079
9080 if !source_docs.is_empty() && !pool.projects.is_empty() {
9081 let mut cost_gen =
9082 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9083 self.config.project_accounting.cost_allocation.clone(),
9084 seed + 99,
9085 );
9086 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9087 }
9088 }
9089
9090 if self.config.project_accounting.change_orders.enabled {
9092 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9093 self.config.project_accounting.change_orders.clone(),
9094 seed + 96,
9095 );
9096 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9097 }
9098
9099 if self.config.project_accounting.milestones.enabled {
9101 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9102 self.config.project_accounting.milestones.clone(),
9103 seed + 97,
9104 );
9105 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9106 }
9107
9108 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9110 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9111 self.config.project_accounting.earned_value.clone(),
9112 seed + 98,
9113 );
9114 snapshot.earned_value_metrics =
9115 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9116 }
9117
9118 if self.config.project_accounting.revenue_recognition.enabled
9120 && !snapshot.projects.is_empty()
9121 && !snapshot.cost_lines.is_empty()
9122 {
9123 use datasynth_generators::project_accounting::RevenueGenerator;
9124 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9125 let avg_contract_value =
9126 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9127 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9128
9129 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9132 snapshot
9133 .projects
9134 .iter()
9135 .filter(|p| {
9136 matches!(
9137 p.project_type,
9138 datasynth_core::models::ProjectType::Customer
9139 )
9140 })
9141 .map(|p| {
9142 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9143 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9144 } else {
9146 avg_contract_value
9147 };
9148 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9150 })
9151 .collect();
9152
9153 if !contract_values.is_empty() {
9154 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9155 snapshot.revenue_records = rev_gen.generate(
9156 &snapshot.projects,
9157 &snapshot.cost_lines,
9158 &contract_values,
9159 start_date,
9160 end_date,
9161 );
9162 debug!(
9163 "Generated {} revenue recognition records for {} customer projects",
9164 snapshot.revenue_records.len(),
9165 contract_values.len()
9166 );
9167 }
9168 }
9169
9170 stats.project_count = snapshot.projects.len();
9171 stats.project_change_order_count = snapshot.change_orders.len();
9172 stats.project_cost_line_count = snapshot.cost_lines.len();
9173
9174 info!(
9175 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9176 snapshot.projects.len(),
9177 snapshot.change_orders.len(),
9178 snapshot.milestones.len(),
9179 snapshot.earned_value_metrics.len()
9180 );
9181 self.check_resources_with_log("post-project-accounting")?;
9182
9183 Ok(snapshot)
9184 }
9185
9186 fn phase_evolution_events(
9188 &mut self,
9189 stats: &mut EnhancedGenerationStatistics,
9190 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9191 if !self.phase_config.generate_evolution_events {
9192 debug!("Phase 24: Skipped (evolution events disabled)");
9193 return Ok((Vec::new(), Vec::new()));
9194 }
9195 info!("Phase 24: Generating Process Evolution + Organizational Events");
9196
9197 let seed = self.seed;
9198 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9199 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9200 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9201
9202 let mut proc_gen =
9204 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9205 seed + 100,
9206 );
9207 let process_events = proc_gen.generate_events(start_date, end_date);
9208
9209 let company_codes: Vec<String> = self
9211 .config
9212 .companies
9213 .iter()
9214 .map(|c| c.code.clone())
9215 .collect();
9216 let mut org_gen =
9217 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9218 seed + 101,
9219 );
9220 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9221
9222 stats.process_evolution_event_count = process_events.len();
9223 stats.organizational_event_count = org_events.len();
9224
9225 info!(
9226 "Evolution events generated: {} process evolution, {} organizational",
9227 process_events.len(),
9228 org_events.len()
9229 );
9230 self.check_resources_with_log("post-evolution-events")?;
9231
9232 Ok((process_events, org_events))
9233 }
9234
9235 fn phase_disruption_events(
9238 &self,
9239 stats: &mut EnhancedGenerationStatistics,
9240 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9241 if !self.config.organizational_events.enabled {
9242 debug!("Phase 24b: Skipped (organizational events disabled)");
9243 return Ok(Vec::new());
9244 }
9245 info!("Phase 24b: Generating Disruption Events");
9246
9247 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9248 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9249 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9250
9251 let company_codes: Vec<String> = self
9252 .config
9253 .companies
9254 .iter()
9255 .map(|c| c.code.clone())
9256 .collect();
9257
9258 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9259 let events = gen.generate(start_date, end_date, &company_codes);
9260
9261 stats.disruption_event_count = events.len();
9262 info!("Disruption events generated: {} events", events.len());
9263 self.check_resources_with_log("post-disruption-events")?;
9264
9265 Ok(events)
9266 }
9267
9268 fn phase_counterfactuals(
9275 &self,
9276 journal_entries: &[JournalEntry],
9277 stats: &mut EnhancedGenerationStatistics,
9278 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9279 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9280 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9281 return Ok(Vec::new());
9282 }
9283 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9284
9285 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9286
9287 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9288
9289 let specs = [
9291 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9292 CounterfactualSpec::ShiftDate { days: -14 },
9293 CounterfactualSpec::SelfApprove,
9294 CounterfactualSpec::SplitTransaction { split_count: 3 },
9295 ];
9296
9297 let pairs: Vec<_> = journal_entries
9298 .iter()
9299 .enumerate()
9300 .map(|(i, je)| {
9301 let spec = &specs[i % specs.len()];
9302 gen.generate(je, spec)
9303 })
9304 .collect();
9305
9306 stats.counterfactual_pair_count = pairs.len();
9307 info!(
9308 "Counterfactual pairs generated: {} pairs from {} journal entries",
9309 pairs.len(),
9310 journal_entries.len()
9311 );
9312 self.check_resources_with_log("post-counterfactuals")?;
9313
9314 Ok(pairs)
9315 }
9316
9317 fn phase_red_flags(
9324 &self,
9325 anomaly_labels: &AnomalyLabels,
9326 document_flows: &DocumentFlowSnapshot,
9327 stats: &mut EnhancedGenerationStatistics,
9328 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9329 if !self.config.fraud.enabled {
9330 debug!("Phase 26: Skipped (fraud generation disabled)");
9331 return Ok(Vec::new());
9332 }
9333 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9334
9335 use datasynth_generators::fraud::RedFlagGenerator;
9336
9337 let generator = RedFlagGenerator::new();
9338 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9339
9340 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9342 .labels
9343 .iter()
9344 .filter(|label| label.anomaly_type.is_intentional())
9345 .map(|label| label.document_id.as_str())
9346 .collect();
9347
9348 let mut flags = Vec::new();
9349
9350 for chain in &document_flows.p2p_chains {
9352 let doc_id = &chain.purchase_order.header.document_id;
9353 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9354 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9355 }
9356
9357 for chain in &document_flows.o2c_chains {
9359 let doc_id = &chain.sales_order.header.document_id;
9360 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9361 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9362 }
9363
9364 stats.red_flag_count = flags.len();
9365 info!(
9366 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9367 flags.len(),
9368 document_flows.p2p_chains.len(),
9369 document_flows.o2c_chains.len(),
9370 fraud_doc_ids.len()
9371 );
9372 self.check_resources_with_log("post-red-flags")?;
9373
9374 Ok(flags)
9375 }
9376
9377 fn phase_collusion_rings(
9383 &mut self,
9384 stats: &mut EnhancedGenerationStatistics,
9385 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9386 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9387 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9388 return Ok(Vec::new());
9389 }
9390 info!("Phase 26b: Generating Collusion Rings");
9391
9392 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9393 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9394 let months = self.config.global.period_months;
9395
9396 let employee_ids: Vec<String> = self
9397 .master_data
9398 .employees
9399 .iter()
9400 .map(|e| e.employee_id.clone())
9401 .collect();
9402 let vendor_ids: Vec<String> = self
9403 .master_data
9404 .vendors
9405 .iter()
9406 .map(|v| v.vendor_id.clone())
9407 .collect();
9408
9409 let mut generator =
9410 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9411 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9412
9413 stats.collusion_ring_count = rings.len();
9414 info!(
9415 "Collusion rings generated: {} rings, total members: {}",
9416 rings.len(),
9417 rings
9418 .iter()
9419 .map(datasynth_generators::fraud::CollusionRing::size)
9420 .sum::<usize>()
9421 );
9422 self.check_resources_with_log("post-collusion-rings")?;
9423
9424 Ok(rings)
9425 }
9426
9427 fn phase_temporal_attributes(
9432 &mut self,
9433 stats: &mut EnhancedGenerationStatistics,
9434 ) -> SynthResult<
9435 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9436 > {
9437 if !self.config.temporal_attributes.enabled {
9438 debug!("Phase 27: Skipped (temporal attributes disabled)");
9439 return Ok(Vec::new());
9440 }
9441 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9442
9443 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9444 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9445
9446 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9450 || self.config.temporal_attributes.enabled;
9451 let temporal_config = {
9452 let ta = &self.config.temporal_attributes;
9453 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9454 .enabled(ta.enabled)
9455 .closed_probability(ta.valid_time.closed_probability)
9456 .avg_validity_days(ta.valid_time.avg_validity_days)
9457 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9458 .with_version_chains(if generate_version_chains {
9459 ta.avg_versions_per_entity
9460 } else {
9461 1.0
9462 })
9463 .build()
9464 };
9465 let temporal_config = if self
9467 .config
9468 .temporal_attributes
9469 .transaction_time
9470 .allow_backdating
9471 {
9472 let mut c = temporal_config;
9473 c.transaction_time.allow_backdating = true;
9474 c.transaction_time.backdating_probability = self
9475 .config
9476 .temporal_attributes
9477 .transaction_time
9478 .backdating_probability;
9479 c.transaction_time.max_backdate_days = self
9480 .config
9481 .temporal_attributes
9482 .transaction_time
9483 .max_backdate_days;
9484 c
9485 } else {
9486 temporal_config
9487 };
9488 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9489 temporal_config,
9490 self.seed + 130,
9491 start_date,
9492 );
9493
9494 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9495 self.seed + 130,
9496 datasynth_core::GeneratorType::Vendor,
9497 );
9498
9499 let chains: Vec<_> = self
9500 .master_data
9501 .vendors
9502 .iter()
9503 .map(|vendor| {
9504 let id = uuid_factory.next();
9505 gen.generate_version_chain(vendor.clone(), id)
9506 })
9507 .collect();
9508
9509 stats.temporal_version_chain_count = chains.len();
9510 info!("Temporal version chains generated: {} chains", chains.len());
9511 self.check_resources_with_log("post-temporal-attributes")?;
9512
9513 Ok(chains)
9514 }
9515
9516 fn phase_entity_relationships(
9526 &self,
9527 journal_entries: &[JournalEntry],
9528 document_flows: &DocumentFlowSnapshot,
9529 stats: &mut EnhancedGenerationStatistics,
9530 ) -> SynthResult<(
9531 Option<datasynth_core::models::EntityGraph>,
9532 Vec<datasynth_core::models::CrossProcessLink>,
9533 )> {
9534 use datasynth_generators::relationships::{
9535 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9536 TransactionSummary,
9537 };
9538
9539 let rs_enabled = self.config.relationship_strength.enabled;
9540 let cpl_enabled = self.config.cross_process_links.enabled
9541 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9542
9543 if !rs_enabled && !cpl_enabled {
9544 debug!(
9545 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9546 );
9547 return Ok((None, Vec::new()));
9548 }
9549
9550 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9551
9552 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9553 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9554
9555 let company_code = self
9556 .config
9557 .companies
9558 .first()
9559 .map(|c| c.code.as_str())
9560 .unwrap_or("1000");
9561
9562 let gen_config = EntityGraphConfig {
9564 enabled: rs_enabled,
9565 cross_process: datasynth_generators::relationships::CrossProcessConfig {
9566 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9567 enable_return_flows: false,
9568 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9569 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9570 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9572 1.0
9573 } else {
9574 0.30
9575 },
9576 ..Default::default()
9577 },
9578 strength_config: datasynth_generators::relationships::StrengthConfig {
9579 transaction_volume_weight: self
9580 .config
9581 .relationship_strength
9582 .calculation
9583 .transaction_volume_weight,
9584 transaction_count_weight: self
9585 .config
9586 .relationship_strength
9587 .calculation
9588 .transaction_count_weight,
9589 duration_weight: self
9590 .config
9591 .relationship_strength
9592 .calculation
9593 .relationship_duration_weight,
9594 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9595 mutual_connections_weight: self
9596 .config
9597 .relationship_strength
9598 .calculation
9599 .mutual_connections_weight,
9600 recency_half_life_days: self
9601 .config
9602 .relationship_strength
9603 .calculation
9604 .recency_half_life_days,
9605 },
9606 ..Default::default()
9607 };
9608
9609 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9610
9611 let entity_graph = if rs_enabled {
9613 let vendor_summaries: Vec<EntitySummary> = self
9615 .master_data
9616 .vendors
9617 .iter()
9618 .map(|v| {
9619 EntitySummary::new(
9620 &v.vendor_id,
9621 &v.name,
9622 datasynth_core::models::GraphEntityType::Vendor,
9623 start_date,
9624 )
9625 })
9626 .collect();
9627
9628 let customer_summaries: Vec<EntitySummary> = self
9629 .master_data
9630 .customers
9631 .iter()
9632 .map(|c| {
9633 EntitySummary::new(
9634 &c.customer_id,
9635 &c.name,
9636 datasynth_core::models::GraphEntityType::Customer,
9637 start_date,
9638 )
9639 })
9640 .collect();
9641
9642 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9647 std::collections::HashMap::new();
9648
9649 for je in journal_entries {
9650 let cc = je.header.company_code.clone();
9651 let posting_date = je.header.posting_date;
9652 for line in &je.lines {
9653 if let Some(ref tp) = line.trading_partner {
9654 let amount = if line.debit_amount > line.credit_amount {
9655 line.debit_amount
9656 } else {
9657 line.credit_amount
9658 };
9659 let entry = txn_summaries
9660 .entry((cc.clone(), tp.clone()))
9661 .or_insert_with(|| TransactionSummary {
9662 total_volume: rust_decimal::Decimal::ZERO,
9663 transaction_count: 0,
9664 first_transaction_date: posting_date,
9665 last_transaction_date: posting_date,
9666 related_entities: std::collections::HashSet::new(),
9667 });
9668 entry.total_volume += amount;
9669 entry.transaction_count += 1;
9670 if posting_date < entry.first_transaction_date {
9671 entry.first_transaction_date = posting_date;
9672 }
9673 if posting_date > entry.last_transaction_date {
9674 entry.last_transaction_date = posting_date;
9675 }
9676 entry.related_entities.insert(cc.clone());
9677 }
9678 }
9679 }
9680
9681 for chain in &document_flows.p2p_chains {
9684 let cc = chain.purchase_order.header.company_code.clone();
9685 let vendor_id = chain.purchase_order.vendor_id.clone();
9686 let po_date = chain.purchase_order.header.document_date;
9687 let amount = chain.purchase_order.total_net_amount;
9688
9689 let entry = txn_summaries
9690 .entry((cc.clone(), vendor_id))
9691 .or_insert_with(|| TransactionSummary {
9692 total_volume: rust_decimal::Decimal::ZERO,
9693 transaction_count: 0,
9694 first_transaction_date: po_date,
9695 last_transaction_date: po_date,
9696 related_entities: std::collections::HashSet::new(),
9697 });
9698 entry.total_volume += amount;
9699 entry.transaction_count += 1;
9700 if po_date < entry.first_transaction_date {
9701 entry.first_transaction_date = po_date;
9702 }
9703 if po_date > entry.last_transaction_date {
9704 entry.last_transaction_date = po_date;
9705 }
9706 entry.related_entities.insert(cc);
9707 }
9708
9709 for chain in &document_flows.o2c_chains {
9711 let cc = chain.sales_order.header.company_code.clone();
9712 let customer_id = chain.sales_order.customer_id.clone();
9713 let so_date = chain.sales_order.header.document_date;
9714 let amount = chain.sales_order.total_net_amount;
9715
9716 let entry = txn_summaries
9717 .entry((cc.clone(), customer_id))
9718 .or_insert_with(|| TransactionSummary {
9719 total_volume: rust_decimal::Decimal::ZERO,
9720 transaction_count: 0,
9721 first_transaction_date: so_date,
9722 last_transaction_date: so_date,
9723 related_entities: std::collections::HashSet::new(),
9724 });
9725 entry.total_volume += amount;
9726 entry.transaction_count += 1;
9727 if so_date < entry.first_transaction_date {
9728 entry.first_transaction_date = so_date;
9729 }
9730 if so_date > entry.last_transaction_date {
9731 entry.last_transaction_date = so_date;
9732 }
9733 entry.related_entities.insert(cc);
9734 }
9735
9736 let as_of_date = journal_entries
9737 .last()
9738 .map(|je| je.header.posting_date)
9739 .unwrap_or(start_date);
9740
9741 let graph = gen.generate_entity_graph(
9742 company_code,
9743 as_of_date,
9744 &vendor_summaries,
9745 &customer_summaries,
9746 &txn_summaries,
9747 );
9748
9749 info!(
9750 "Entity relationship graph: {} nodes, {} edges",
9751 graph.nodes.len(),
9752 graph.edges.len()
9753 );
9754 stats.entity_relationship_node_count = graph.nodes.len();
9755 stats.entity_relationship_edge_count = graph.edges.len();
9756 Some(graph)
9757 } else {
9758 None
9759 };
9760
9761 let cross_process_links = if cpl_enabled {
9763 let gr_refs: Vec<GoodsReceiptRef> = document_flows
9765 .p2p_chains
9766 .iter()
9767 .flat_map(|chain| {
9768 let vendor_id = chain.purchase_order.vendor_id.clone();
9769 let cc = chain.purchase_order.header.company_code.clone();
9770 chain.goods_receipts.iter().flat_map(move |gr| {
9771 gr.items.iter().filter_map({
9772 let doc_id = gr.header.document_id.clone();
9773 let v_id = vendor_id.clone();
9774 let company = cc.clone();
9775 let receipt_date = gr.header.document_date;
9776 move |item| {
9777 item.base
9778 .material_id
9779 .as_ref()
9780 .map(|mat_id| GoodsReceiptRef {
9781 document_id: doc_id.clone(),
9782 material_id: mat_id.clone(),
9783 quantity: item.base.quantity,
9784 receipt_date,
9785 vendor_id: v_id.clone(),
9786 company_code: company.clone(),
9787 })
9788 }
9789 })
9790 })
9791 })
9792 .collect();
9793
9794 let del_refs: Vec<DeliveryRef> = document_flows
9796 .o2c_chains
9797 .iter()
9798 .flat_map(|chain| {
9799 let customer_id = chain.sales_order.customer_id.clone();
9800 let cc = chain.sales_order.header.company_code.clone();
9801 chain.deliveries.iter().flat_map(move |del| {
9802 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9803 del.items.iter().filter_map({
9804 let doc_id = del.header.document_id.clone();
9805 let c_id = customer_id.clone();
9806 let company = cc.clone();
9807 move |item| {
9808 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9809 document_id: doc_id.clone(),
9810 material_id: mat_id.clone(),
9811 quantity: item.base.quantity,
9812 delivery_date,
9813 customer_id: c_id.clone(),
9814 company_code: company.clone(),
9815 })
9816 }
9817 })
9818 })
9819 })
9820 .collect();
9821
9822 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9823 info!("Cross-process links generated: {} links", links.len());
9824 stats.cross_process_link_count = links.len();
9825 links
9826 } else {
9827 Vec::new()
9828 };
9829
9830 self.check_resources_with_log("post-entity-relationships")?;
9831 Ok((entity_graph, cross_process_links))
9832 }
9833
9834 fn phase_industry_data(
9836 &self,
9837 stats: &mut EnhancedGenerationStatistics,
9838 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9839 if !self.config.industry_specific.enabled {
9840 return None;
9841 }
9842 info!("Phase 29: Generating industry-specific data");
9843 let output = datasynth_generators::industry::factory::generate_industry_output(
9844 self.config.global.industry,
9845 );
9846 stats.industry_gl_account_count = output.gl_accounts.len();
9847 info!(
9848 "Industry data generated: {} GL accounts for {:?}",
9849 output.gl_accounts.len(),
9850 self.config.global.industry
9851 );
9852 Some(output)
9853 }
9854
9855 fn phase_opening_balances(
9857 &mut self,
9858 coa: &Arc<ChartOfAccounts>,
9859 stats: &mut EnhancedGenerationStatistics,
9860 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9861 if !self.config.balance.generate_opening_balances {
9862 debug!("Phase 3b: Skipped (opening balance generation disabled)");
9863 return Ok(Vec::new());
9864 }
9865 info!("Phase 3b: Generating Opening Balances");
9866
9867 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9868 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9869 let fiscal_year = start_date.year();
9870
9871 let industry = match self.config.global.industry {
9872 IndustrySector::Manufacturing => IndustryType::Manufacturing,
9873 IndustrySector::Retail => IndustryType::Retail,
9874 IndustrySector::FinancialServices => IndustryType::Financial,
9875 IndustrySector::Healthcare => IndustryType::Healthcare,
9876 IndustrySector::Technology => IndustryType::Technology,
9877 _ => IndustryType::Manufacturing,
9878 };
9879
9880 let config = datasynth_generators::OpeningBalanceConfig {
9881 industry,
9882 ..Default::default()
9883 };
9884 let mut gen =
9885 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9886
9887 let mut results = Vec::new();
9888 for company in &self.config.companies {
9889 let spec = OpeningBalanceSpec::new(
9890 company.code.clone(),
9891 start_date,
9892 fiscal_year,
9893 company.currency.clone(),
9894 rust_decimal::Decimal::new(10_000_000, 0),
9895 industry,
9896 );
9897 let ob = gen.generate(&spec, coa, start_date, &company.code);
9898 results.push(ob);
9899 }
9900
9901 stats.opening_balance_count = results.len();
9902 info!("Opening balances generated: {} companies", results.len());
9903 self.check_resources_with_log("post-opening-balances")?;
9904
9905 Ok(results)
9906 }
9907
9908 fn phase_subledger_reconciliation(
9910 &mut self,
9911 subledger: &SubledgerSnapshot,
9912 entries: &[JournalEntry],
9913 stats: &mut EnhancedGenerationStatistics,
9914 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9915 if !self.config.balance.reconcile_subledgers {
9916 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9917 return Ok(Vec::new());
9918 }
9919 info!("Phase 9b: Reconciling GL to subledger balances");
9920
9921 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9922 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9923 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9924
9925 let tracker_config = BalanceTrackerConfig {
9927 validate_on_each_entry: false,
9928 track_history: false,
9929 fail_on_validation_error: false,
9930 ..Default::default()
9931 };
9932 let recon_currency = self
9933 .config
9934 .companies
9935 .first()
9936 .map(|c| c.currency.clone())
9937 .unwrap_or_else(|| "USD".to_string());
9938 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9939 let validation_errors = tracker.apply_entries(entries);
9940 if !validation_errors.is_empty() {
9941 warn!(
9942 error_count = validation_errors.len(),
9943 "Balance tracker encountered validation errors during subledger reconciliation"
9944 );
9945 for err in &validation_errors {
9946 debug!("Balance validation error: {:?}", err);
9947 }
9948 }
9949
9950 let mut engine = datasynth_generators::ReconciliationEngine::new(
9951 datasynth_generators::ReconciliationConfig::default(),
9952 );
9953
9954 let mut results = Vec::new();
9955 let company_code = self
9956 .config
9957 .companies
9958 .first()
9959 .map(|c| c.code.as_str())
9960 .unwrap_or("1000");
9961
9962 if !subledger.ar_invoices.is_empty() {
9964 let gl_balance = tracker
9965 .get_account_balance(
9966 company_code,
9967 datasynth_core::accounts::control_accounts::AR_CONTROL,
9968 )
9969 .map(|b| b.closing_balance)
9970 .unwrap_or_default();
9971 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9972 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9973 }
9974
9975 if !subledger.ap_invoices.is_empty() {
9977 let gl_balance = tracker
9978 .get_account_balance(
9979 company_code,
9980 datasynth_core::accounts::control_accounts::AP_CONTROL,
9981 )
9982 .map(|b| b.closing_balance)
9983 .unwrap_or_default();
9984 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9985 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9986 }
9987
9988 if !subledger.fa_records.is_empty() {
9990 let gl_asset_balance = tracker
9991 .get_account_balance(
9992 company_code,
9993 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9994 )
9995 .map(|b| b.closing_balance)
9996 .unwrap_or_default();
9997 let gl_accum_depr_balance = tracker
9998 .get_account_balance(
9999 company_code,
10000 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10001 )
10002 .map(|b| b.closing_balance)
10003 .unwrap_or_default();
10004 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10005 subledger.fa_records.iter().collect();
10006 let (asset_recon, depr_recon) = engine.reconcile_fa(
10007 company_code,
10008 end_date,
10009 gl_asset_balance,
10010 gl_accum_depr_balance,
10011 &fa_refs,
10012 );
10013 results.push(asset_recon);
10014 results.push(depr_recon);
10015 }
10016
10017 if !subledger.inventory_positions.is_empty() {
10019 let gl_balance = tracker
10020 .get_account_balance(
10021 company_code,
10022 datasynth_core::accounts::control_accounts::INVENTORY,
10023 )
10024 .map(|b| b.closing_balance)
10025 .unwrap_or_default();
10026 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10027 subledger.inventory_positions.iter().collect();
10028 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10029 }
10030
10031 stats.subledger_reconciliation_count = results.len();
10032 let passed = results.iter().filter(|r| r.is_balanced()).count();
10033 let failed = results.len() - passed;
10034 info!(
10035 "Subledger reconciliation: {} checks, {} passed, {} failed",
10036 results.len(),
10037 passed,
10038 failed
10039 );
10040 self.check_resources_with_log("post-subledger-reconciliation")?;
10041
10042 Ok(results)
10043 }
10044
10045 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10047 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10048
10049 let coa_framework = self.resolve_coa_framework();
10050
10051 let mut gen = ChartOfAccountsGenerator::new(
10052 self.config.chart_of_accounts.complexity,
10053 self.config.global.industry,
10054 self.seed,
10055 )
10056 .with_coa_framework(coa_framework);
10057
10058 let coa = Arc::new(gen.generate());
10059 self.coa = Some(Arc::clone(&coa));
10060
10061 if let Some(pb) = pb {
10062 pb.finish_with_message("Chart of Accounts complete");
10063 }
10064
10065 Ok(coa)
10066 }
10067
10068 fn generate_master_data(&mut self) -> SynthResult<()> {
10070 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10071 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10072 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10073
10074 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10076
10077 let pack = self.primary_pack().clone();
10079
10080 let vendors_per_company = self.phase_config.vendors_per_company;
10082 let customers_per_company = self.phase_config.customers_per_company;
10083 let materials_per_company = self.phase_config.materials_per_company;
10084 let assets_per_company = self.phase_config.assets_per_company;
10085 let coa_framework = self.resolve_coa_framework();
10086
10087 let per_company_results: Vec<_> = self
10090 .config
10091 .companies
10092 .par_iter()
10093 .enumerate()
10094 .map(|(i, company)| {
10095 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10096 let pack = pack.clone();
10097
10098 let mut vendor_gen = VendorGenerator::new(company_seed);
10100 vendor_gen.set_country_pack(pack.clone());
10101 vendor_gen.set_coa_framework(coa_framework);
10102 vendor_gen.set_counter_offset(i * vendors_per_company);
10103 vendor_gen.set_template_provider(self.template_provider.clone());
10106 if self.config.vendor_network.enabled {
10108 let vn = &self.config.vendor_network;
10109 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10110 enabled: true,
10111 depth: vn.depth,
10112 tier1_count: datasynth_generators::TierCountConfig::new(
10113 vn.tier1.min,
10114 vn.tier1.max,
10115 ),
10116 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10117 vn.tier2_per_parent.min,
10118 vn.tier2_per_parent.max,
10119 ),
10120 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10121 vn.tier3_per_parent.min,
10122 vn.tier3_per_parent.max,
10123 ),
10124 cluster_distribution: datasynth_generators::ClusterDistribution {
10125 reliable_strategic: vn.clusters.reliable_strategic,
10126 standard_operational: vn.clusters.standard_operational,
10127 transactional: vn.clusters.transactional,
10128 problematic: vn.clusters.problematic,
10129 },
10130 concentration_limits: datasynth_generators::ConcentrationLimits {
10131 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10132 max_top5: vn.dependencies.top_5_concentration,
10133 },
10134 ..datasynth_generators::VendorNetworkConfig::default()
10135 });
10136 }
10137 let vendor_pool =
10138 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10139
10140 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10142 customer_gen.set_country_pack(pack.clone());
10143 customer_gen.set_coa_framework(coa_framework);
10144 customer_gen.set_counter_offset(i * customers_per_company);
10145 customer_gen.set_template_provider(self.template_provider.clone());
10147 if self.config.customer_segmentation.enabled {
10149 let cs = &self.config.customer_segmentation;
10150 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10151 enabled: true,
10152 segment_distribution: datasynth_generators::SegmentDistribution {
10153 enterprise: cs.value_segments.enterprise.customer_share,
10154 mid_market: cs.value_segments.mid_market.customer_share,
10155 smb: cs.value_segments.smb.customer_share,
10156 consumer: cs.value_segments.consumer.customer_share,
10157 },
10158 referral_config: datasynth_generators::ReferralConfig {
10159 enabled: cs.networks.referrals.enabled,
10160 referral_rate: cs.networks.referrals.referral_rate,
10161 ..Default::default()
10162 },
10163 hierarchy_config: datasynth_generators::HierarchyConfig {
10164 enabled: cs.networks.corporate_hierarchies.enabled,
10165 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10166 ..Default::default()
10167 },
10168 ..Default::default()
10169 };
10170 customer_gen.set_segmentation_config(seg_cfg);
10171 }
10172 let customer_pool = customer_gen.generate_customer_pool(
10173 customers_per_company,
10174 &company.code,
10175 start_date,
10176 );
10177
10178 let mut material_gen = MaterialGenerator::new(company_seed + 200);
10180 material_gen.set_country_pack(pack.clone());
10181 material_gen.set_counter_offset(i * materials_per_company);
10182 material_gen.set_template_provider(self.template_provider.clone());
10184 let material_pool = material_gen.generate_material_pool(
10185 materials_per_company,
10186 &company.code,
10187 start_date,
10188 );
10189
10190 let mut asset_gen = AssetGenerator::new(company_seed + 300);
10192 asset_gen.set_template_provider(self.template_provider.clone());
10194 let asset_pool = asset_gen.generate_asset_pool(
10195 assets_per_company,
10196 &company.code,
10197 (start_date, end_date),
10198 );
10199
10200 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10202 employee_gen.set_country_pack(pack);
10203 employee_gen.set_template_provider(self.template_provider.clone());
10205 let employee_pool =
10206 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10207
10208 let employee_change_history =
10210 employee_gen.generate_all_change_history(&employee_pool, end_date);
10211
10212 let employee_ids: Vec<String> = employee_pool
10214 .employees
10215 .iter()
10216 .map(|e| e.employee_id.clone())
10217 .collect();
10218 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10219 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10220
10221 (
10222 vendor_pool.vendors,
10223 customer_pool.customers,
10224 material_pool.materials,
10225 asset_pool.assets,
10226 employee_pool.employees,
10227 employee_change_history,
10228 cost_centers,
10229 )
10230 })
10231 .collect();
10232
10233 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
10235 per_company_results
10236 {
10237 self.master_data.vendors.extend(vendors);
10238 self.master_data.customers.extend(customers);
10239 self.master_data.materials.extend(materials);
10240 self.master_data.assets.extend(assets);
10241 self.master_data.employees.extend(employees);
10242 self.master_data.cost_centers.extend(cost_centers);
10243 self.master_data
10244 .employee_change_history
10245 .extend(change_history);
10246 }
10247
10248 {
10252 use datasynth_core::models::IndustrySector;
10253 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10254 let industry = match self.config.global.industry {
10255 IndustrySector::Manufacturing => "manufacturing",
10256 IndustrySector::Retail => "retail",
10257 IndustrySector::FinancialServices => "financial_services",
10258 IndustrySector::Technology => "technology",
10259 IndustrySector::Healthcare => "healthcare",
10260 _ => "other",
10261 };
10262 for (i, company) in self.config.companies.iter().enumerate() {
10263 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10264 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10265 let profile = profile_gen.generate(&company.code, industry);
10266 self.master_data.organizational_profiles.push(profile);
10267 }
10268 }
10269
10270 if let Some(pb) = &pb {
10271 pb.inc(total);
10272 }
10273 if let Some(pb) = pb {
10274 pb.finish_with_message("Master data generation complete");
10275 }
10276
10277 Ok(())
10278 }
10279
10280 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10282 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10283 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10284
10285 let months = (self.config.global.period_months as usize).max(1);
10288 let p2p_count = self
10289 .phase_config
10290 .p2p_chains
10291 .min(self.master_data.vendors.len() * 2 * months);
10292 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10293
10294 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10296 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10297 p2p_gen.set_country_pack(self.primary_pack().clone());
10298 if let Some(ctx) = &self.temporal_context {
10302 p2p_gen.set_temporal_context(Arc::clone(ctx));
10303 }
10304
10305 for i in 0..p2p_count {
10306 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10307 let materials: Vec<&Material> = self
10308 .master_data
10309 .materials
10310 .iter()
10311 .skip(i % self.master_data.materials.len().max(1))
10312 .take(2.min(self.master_data.materials.len()))
10313 .collect();
10314
10315 if materials.is_empty() {
10316 continue;
10317 }
10318
10319 let company = &self.config.companies[i % self.config.companies.len()];
10320 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10321 let fiscal_period = po_date.month() as u8;
10322 let created_by = if self.master_data.employees.is_empty() {
10323 "SYSTEM"
10324 } else {
10325 self.master_data.employees[i % self.master_data.employees.len()]
10326 .user_id
10327 .as_str()
10328 };
10329
10330 let chain = p2p_gen.generate_chain(
10331 &company.code,
10332 vendor,
10333 &materials,
10334 po_date,
10335 start_date.year() as u16,
10336 fiscal_period,
10337 created_by,
10338 );
10339
10340 flows.purchase_orders.push(chain.purchase_order.clone());
10342 flows.goods_receipts.extend(chain.goods_receipts.clone());
10343 if let Some(vi) = &chain.vendor_invoice {
10344 flows.vendor_invoices.push(vi.clone());
10345 }
10346 if let Some(payment) = &chain.payment {
10347 flows.payments.push(payment.clone());
10348 }
10349 for remainder in &chain.remainder_payments {
10350 flows.payments.push(remainder.clone());
10351 }
10352 flows.p2p_chains.push(chain);
10353
10354 if let Some(pb) = &pb {
10355 pb.inc(1);
10356 }
10357 }
10358
10359 if let Some(pb) = pb {
10360 pb.finish_with_message("P2P document flows complete");
10361 }
10362
10363 let o2c_count = self
10366 .phase_config
10367 .o2c_chains
10368 .min(self.master_data.customers.len() * 2 * months);
10369 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10370
10371 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10373 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10374 o2c_gen.set_country_pack(self.primary_pack().clone());
10375 if let Some(ctx) = &self.temporal_context {
10377 o2c_gen.set_temporal_context(Arc::clone(ctx));
10378 }
10379
10380 for i in 0..o2c_count {
10381 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10382 let materials: Vec<&Material> = self
10383 .master_data
10384 .materials
10385 .iter()
10386 .skip(i % self.master_data.materials.len().max(1))
10387 .take(2.min(self.master_data.materials.len()))
10388 .collect();
10389
10390 if materials.is_empty() {
10391 continue;
10392 }
10393
10394 let company = &self.config.companies[i % self.config.companies.len()];
10395 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10396 let fiscal_period = so_date.month() as u8;
10397 let created_by = if self.master_data.employees.is_empty() {
10398 "SYSTEM"
10399 } else {
10400 self.master_data.employees[i % self.master_data.employees.len()]
10401 .user_id
10402 .as_str()
10403 };
10404
10405 let chain = o2c_gen.generate_chain(
10406 &company.code,
10407 customer,
10408 &materials,
10409 so_date,
10410 start_date.year() as u16,
10411 fiscal_period,
10412 created_by,
10413 );
10414
10415 flows.sales_orders.push(chain.sales_order.clone());
10417 flows.deliveries.extend(chain.deliveries.clone());
10418 if let Some(ci) = &chain.customer_invoice {
10419 flows.customer_invoices.push(ci.clone());
10420 }
10421 if let Some(receipt) = &chain.customer_receipt {
10422 flows.payments.push(receipt.clone());
10423 }
10424 for receipt in &chain.remainder_receipts {
10426 flows.payments.push(receipt.clone());
10427 }
10428 flows.o2c_chains.push(chain);
10429
10430 if let Some(pb) = &pb {
10431 pb.inc(1);
10432 }
10433 }
10434
10435 if let Some(pb) = pb {
10436 pb.finish_with_message("O2C document flows complete");
10437 }
10438
10439 {
10443 let mut refs = Vec::new();
10444 for doc in &flows.purchase_orders {
10445 refs.extend(doc.header.document_references.iter().cloned());
10446 }
10447 for doc in &flows.goods_receipts {
10448 refs.extend(doc.header.document_references.iter().cloned());
10449 }
10450 for doc in &flows.vendor_invoices {
10451 refs.extend(doc.header.document_references.iter().cloned());
10452 }
10453 for doc in &flows.sales_orders {
10454 refs.extend(doc.header.document_references.iter().cloned());
10455 }
10456 for doc in &flows.deliveries {
10457 refs.extend(doc.header.document_references.iter().cloned());
10458 }
10459 for doc in &flows.customer_invoices {
10460 refs.extend(doc.header.document_references.iter().cloned());
10461 }
10462 for doc in &flows.payments {
10463 refs.extend(doc.header.document_references.iter().cloned());
10464 }
10465 debug!(
10466 "Collected {} document cross-references from document headers",
10467 refs.len()
10468 );
10469 flows.document_references = refs;
10470 }
10471
10472 Ok(())
10473 }
10474
10475 fn generate_journal_entries(
10477 &mut self,
10478 coa: &Arc<ChartOfAccounts>,
10479 ) -> SynthResult<Vec<JournalEntry>> {
10480 use datasynth_core::traits::ParallelGenerator;
10481
10482 let total = self.calculate_total_transactions();
10483 let pb = self.create_progress_bar(total, "Generating Journal Entries");
10484
10485 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10486 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10487 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10488
10489 let company_codes: Vec<String> = self
10490 .config
10491 .companies
10492 .iter()
10493 .map(|c| c.code.clone())
10494 .collect();
10495
10496 let mut generator = JournalEntryGenerator::new_with_params(
10497 self.config.transactions.clone(),
10498 Arc::clone(coa),
10499 company_codes,
10500 start_date,
10501 end_date,
10502 self.seed,
10503 );
10504 let bp = &self.config.business_processes;
10507 generator.set_business_process_weights(
10508 bp.o2c_weight,
10509 bp.p2p_weight,
10510 bp.r2r_weight,
10511 bp.h2r_weight,
10512 bp.a2r_weight,
10513 );
10514 generator
10519 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10520 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10521 let generator = generator;
10522
10523 let je_pack = self.primary_pack();
10527
10528 let mut generator = generator
10529 .with_master_data(
10530 &self.master_data.vendors,
10531 &self.master_data.customers,
10532 &self.master_data.materials,
10533 )
10534 .with_country_pack_names(je_pack)
10535 .with_country_pack_temporal(
10536 self.config.temporal_patterns.clone(),
10537 self.seed + 200,
10538 je_pack,
10539 )
10540 .with_persona_errors(true)
10541 .with_fraud_config(self.config.fraud.clone());
10542
10543 let temporal_enabled = self.config.temporal.enabled;
10548 let regimes_enabled = self.config.distributions.regime_changes.enabled;
10549 if temporal_enabled || regimes_enabled {
10550 let mut drift_config = if temporal_enabled {
10551 self.config.temporal.to_core_config()
10552 } else {
10553 datasynth_core::distributions::DriftConfig::default()
10556 };
10557 if regimes_enabled {
10558 self.config
10559 .distributions
10560 .regime_changes
10561 .apply_to(&mut drift_config, start_date);
10562 }
10563 generator = generator.with_drift_config(drift_config, self.seed + 100);
10564 }
10565
10566 self.check_memory_limit()?;
10568
10569 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10571
10572 let entries = if total >= 10_000 && num_threads > 1 {
10576 let sub_generators = generator.split(num_threads);
10579 let entries_per_thread = total as usize / num_threads;
10580 let remainder = total as usize % num_threads;
10581
10582 let batches: Vec<Vec<JournalEntry>> = sub_generators
10583 .into_par_iter()
10584 .enumerate()
10585 .map(|(i, mut gen)| {
10586 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10587 gen.generate_batch(count)
10588 })
10589 .collect();
10590
10591 let entries = JournalEntryGenerator::merge_results(batches);
10593
10594 if let Some(pb) = &pb {
10595 pb.inc(total);
10596 }
10597 entries
10598 } else {
10599 let mut entries = Vec::with_capacity(total as usize);
10601 for _ in 0..total {
10602 let entry = generator.generate();
10603 entries.push(entry);
10604 if let Some(pb) = &pb {
10605 pb.inc(1);
10606 }
10607 }
10608 entries
10609 };
10610
10611 if let Some(pb) = pb {
10612 pb.finish_with_message("Journal entries complete");
10613 }
10614
10615 Ok(entries)
10616 }
10617
10618 fn generate_jes_from_document_flows(
10623 &mut self,
10624 flows: &DocumentFlowSnapshot,
10625 ) -> SynthResult<Vec<JournalEntry>> {
10626 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
10627 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
10628
10629 let je_config = match self.resolve_coa_framework() {
10630 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
10631 CoAFramework::GermanSkr04 => {
10632 let fa = datasynth_core::FrameworkAccounts::german_gaap();
10633 DocumentFlowJeConfig::from(&fa)
10634 }
10635 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
10636 };
10637
10638 let populate_fec = je_config.populate_fec_fields;
10639 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
10640
10641 if populate_fec {
10645 let mut aux_lookup = std::collections::HashMap::new();
10646 for vendor in &self.master_data.vendors {
10647 if let Some(ref aux) = vendor.auxiliary_gl_account {
10648 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
10649 }
10650 }
10651 for customer in &self.master_data.customers {
10652 if let Some(ref aux) = customer.auxiliary_gl_account {
10653 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10654 }
10655 }
10656 if !aux_lookup.is_empty() {
10657 generator.set_auxiliary_account_lookup(aux_lookup);
10658 }
10659 }
10660
10661 let mut entries = Vec::new();
10662
10663 for chain in &flows.p2p_chains {
10665 let chain_entries = generator.generate_from_p2p_chain(chain);
10666 entries.extend(chain_entries);
10667 if let Some(pb) = &pb {
10668 pb.inc(1);
10669 }
10670 }
10671
10672 for chain in &flows.o2c_chains {
10674 let chain_entries = generator.generate_from_o2c_chain(chain);
10675 entries.extend(chain_entries);
10676 if let Some(pb) = &pb {
10677 pb.inc(1);
10678 }
10679 }
10680
10681 if let Some(pb) = pb {
10682 pb.finish_with_message(format!(
10683 "Generated {} JEs from document flows",
10684 entries.len()
10685 ));
10686 }
10687
10688 Ok(entries)
10689 }
10690
10691 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10697 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10698
10699 let mut jes = Vec::with_capacity(payroll_runs.len());
10700
10701 for run in payroll_runs {
10702 let mut je = JournalEntry::new_simple(
10703 format!("JE-PAYROLL-{}", run.payroll_id),
10704 run.company_code.clone(),
10705 run.run_date,
10706 format!("Payroll {}", run.payroll_id),
10707 );
10708
10709 je.add_line(JournalEntryLine {
10711 line_number: 1,
10712 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10713 debit_amount: run.total_gross,
10714 reference: Some(run.payroll_id.clone()),
10715 text: Some(format!(
10716 "Payroll {} ({} employees)",
10717 run.payroll_id, run.employee_count
10718 )),
10719 ..Default::default()
10720 });
10721
10722 je.add_line(JournalEntryLine {
10724 line_number: 2,
10725 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10726 credit_amount: run.total_gross,
10727 reference: Some(run.payroll_id.clone()),
10728 ..Default::default()
10729 });
10730
10731 jes.push(je);
10732 }
10733
10734 jes
10735 }
10736
10737 fn link_document_flows_to_subledgers(
10742 &mut self,
10743 flows: &DocumentFlowSnapshot,
10744 ) -> SynthResult<SubledgerSnapshot> {
10745 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10746 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10747
10748 let vendor_names: std::collections::HashMap<String, String> = self
10750 .master_data
10751 .vendors
10752 .iter()
10753 .map(|v| (v.vendor_id.clone(), v.name.clone()))
10754 .collect();
10755 let customer_names: std::collections::HashMap<String, String> = self
10756 .master_data
10757 .customers
10758 .iter()
10759 .map(|c| (c.customer_id.clone(), c.name.clone()))
10760 .collect();
10761
10762 let mut linker = DocumentFlowLinker::new()
10763 .with_vendor_names(vendor_names)
10764 .with_customer_names(customer_names);
10765
10766 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10768 if let Some(pb) = &pb {
10769 pb.inc(flows.vendor_invoices.len() as u64);
10770 }
10771
10772 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10774 if let Some(pb) = &pb {
10775 pb.inc(flows.customer_invoices.len() as u64);
10776 }
10777
10778 if let Some(pb) = pb {
10779 pb.finish_with_message(format!(
10780 "Linked {} AP and {} AR invoices",
10781 ap_invoices.len(),
10782 ar_invoices.len()
10783 ));
10784 }
10785
10786 Ok(SubledgerSnapshot {
10787 ap_invoices,
10788 ar_invoices,
10789 fa_records: Vec::new(),
10790 inventory_positions: Vec::new(),
10791 inventory_movements: Vec::new(),
10792 ar_aging_reports: Vec::new(),
10794 ap_aging_reports: Vec::new(),
10795 depreciation_runs: Vec::new(),
10797 inventory_valuations: Vec::new(),
10798 dunning_runs: Vec::new(),
10800 dunning_letters: Vec::new(),
10801 })
10802 }
10803
10804 #[allow(clippy::too_many_arguments)]
10809 fn generate_ocpm_events(
10810 &mut self,
10811 flows: &DocumentFlowSnapshot,
10812 sourcing: &SourcingSnapshot,
10813 hr: &HrSnapshot,
10814 manufacturing: &ManufacturingSnapshot,
10815 banking: &BankingSnapshot,
10816 audit: &AuditSnapshot,
10817 financial_reporting: &FinancialReportingSnapshot,
10818 ) -> SynthResult<OcpmSnapshot> {
10819 let total_chains = flows.p2p_chains.len()
10820 + flows.o2c_chains.len()
10821 + sourcing.sourcing_projects.len()
10822 + hr.payroll_runs.len()
10823 + manufacturing.production_orders.len()
10824 + banking.customers.len()
10825 + audit.engagements.len()
10826 + financial_reporting.bank_reconciliations.len();
10827 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10828
10829 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10831 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10832
10833 let ocpm_config = OcpmGeneratorConfig {
10835 generate_p2p: true,
10836 generate_o2c: true,
10837 generate_s2c: !sourcing.sourcing_projects.is_empty(),
10838 generate_h2r: !hr.payroll_runs.is_empty(),
10839 generate_mfg: !manufacturing.production_orders.is_empty(),
10840 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10841 generate_bank: !banking.customers.is_empty(),
10842 generate_audit: !audit.engagements.is_empty(),
10843 happy_path_rate: 0.75,
10844 exception_path_rate: 0.20,
10845 error_path_rate: 0.05,
10846 add_duration_variability: true,
10847 duration_std_dev_factor: 0.3,
10848 };
10849 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10850 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10851
10852 let available_users: Vec<String> = self
10854 .master_data
10855 .employees
10856 .iter()
10857 .take(20)
10858 .map(|e| e.user_id.clone())
10859 .collect();
10860
10861 let fallback_date =
10863 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10864 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10865 .unwrap_or(fallback_date);
10866 let base_midnight = base_date
10867 .and_hms_opt(0, 0, 0)
10868 .expect("midnight is always valid");
10869 let base_datetime =
10870 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10871
10872 let add_result = |event_log: &mut OcpmEventLog,
10874 result: datasynth_ocpm::CaseGenerationResult| {
10875 for event in result.events {
10876 event_log.add_event(event);
10877 }
10878 for object in result.objects {
10879 event_log.add_object(object);
10880 }
10881 for relationship in result.relationships {
10882 event_log.add_relationship(relationship);
10883 }
10884 for corr in result.correlation_events {
10885 event_log.add_correlation_event(corr);
10886 }
10887 event_log.add_case(result.case_trace);
10888 };
10889
10890 for chain in &flows.p2p_chains {
10892 let po = &chain.purchase_order;
10893 let documents = P2pDocuments::new(
10894 &po.header.document_id,
10895 &po.vendor_id,
10896 &po.header.company_code,
10897 po.total_net_amount,
10898 &po.header.currency,
10899 &ocpm_uuid_factory,
10900 )
10901 .with_goods_receipt(
10902 chain
10903 .goods_receipts
10904 .first()
10905 .map(|gr| gr.header.document_id.as_str())
10906 .unwrap_or(""),
10907 &ocpm_uuid_factory,
10908 )
10909 .with_invoice(
10910 chain
10911 .vendor_invoice
10912 .as_ref()
10913 .map(|vi| vi.header.document_id.as_str())
10914 .unwrap_or(""),
10915 &ocpm_uuid_factory,
10916 )
10917 .with_payment(
10918 chain
10919 .payment
10920 .as_ref()
10921 .map(|p| p.header.document_id.as_str())
10922 .unwrap_or(""),
10923 &ocpm_uuid_factory,
10924 );
10925
10926 let start_time =
10927 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10928 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10929 add_result(&mut event_log, result);
10930
10931 if let Some(pb) = &pb {
10932 pb.inc(1);
10933 }
10934 }
10935
10936 for chain in &flows.o2c_chains {
10938 let so = &chain.sales_order;
10939 let documents = O2cDocuments::new(
10940 &so.header.document_id,
10941 &so.customer_id,
10942 &so.header.company_code,
10943 so.total_net_amount,
10944 &so.header.currency,
10945 &ocpm_uuid_factory,
10946 )
10947 .with_delivery(
10948 chain
10949 .deliveries
10950 .first()
10951 .map(|d| d.header.document_id.as_str())
10952 .unwrap_or(""),
10953 &ocpm_uuid_factory,
10954 )
10955 .with_invoice(
10956 chain
10957 .customer_invoice
10958 .as_ref()
10959 .map(|ci| ci.header.document_id.as_str())
10960 .unwrap_or(""),
10961 &ocpm_uuid_factory,
10962 )
10963 .with_receipt(
10964 chain
10965 .customer_receipt
10966 .as_ref()
10967 .map(|r| r.header.document_id.as_str())
10968 .unwrap_or(""),
10969 &ocpm_uuid_factory,
10970 );
10971
10972 let start_time =
10973 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10974 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10975 add_result(&mut event_log, result);
10976
10977 if let Some(pb) = &pb {
10978 pb.inc(1);
10979 }
10980 }
10981
10982 for project in &sourcing.sourcing_projects {
10984 let vendor_id = sourcing
10986 .contracts
10987 .iter()
10988 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10989 .map(|c| c.vendor_id.clone())
10990 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10991 .or_else(|| {
10992 self.master_data
10993 .vendors
10994 .first()
10995 .map(|v| v.vendor_id.clone())
10996 })
10997 .unwrap_or_else(|| "V000".to_string());
10998 let mut docs = S2cDocuments::new(
10999 &project.project_id,
11000 &vendor_id,
11001 &project.company_code,
11002 project.estimated_annual_spend,
11003 &ocpm_uuid_factory,
11004 );
11005 if let Some(rfx) = sourcing
11007 .rfx_events
11008 .iter()
11009 .find(|r| r.sourcing_project_id == project.project_id)
11010 {
11011 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11012 if let Some(bid) = sourcing.bids.iter().find(|b| {
11014 b.rfx_id == rfx.rfx_id
11015 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11016 }) {
11017 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11018 }
11019 }
11020 if let Some(contract) = sourcing
11022 .contracts
11023 .iter()
11024 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11025 {
11026 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11027 }
11028 let start_time = base_datetime - chrono::Duration::days(90);
11029 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11030 add_result(&mut event_log, result);
11031
11032 if let Some(pb) = &pb {
11033 pb.inc(1);
11034 }
11035 }
11036
11037 for run in &hr.payroll_runs {
11039 let employee_id = hr
11041 .payroll_line_items
11042 .iter()
11043 .find(|li| li.payroll_id == run.payroll_id)
11044 .map(|li| li.employee_id.as_str())
11045 .unwrap_or("EMP000");
11046 let docs = H2rDocuments::new(
11047 &run.payroll_id,
11048 employee_id,
11049 &run.company_code,
11050 run.total_gross,
11051 &ocpm_uuid_factory,
11052 )
11053 .with_time_entries(
11054 hr.time_entries
11055 .iter()
11056 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11057 .take(5)
11058 .map(|t| t.entry_id.as_str())
11059 .collect(),
11060 );
11061 let start_time = base_datetime - chrono::Duration::days(30);
11062 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11063 add_result(&mut event_log, result);
11064
11065 if let Some(pb) = &pb {
11066 pb.inc(1);
11067 }
11068 }
11069
11070 for order in &manufacturing.production_orders {
11072 let mut docs = MfgDocuments::new(
11073 &order.order_id,
11074 &order.material_id,
11075 &order.company_code,
11076 order.planned_quantity,
11077 &ocpm_uuid_factory,
11078 )
11079 .with_operations(
11080 order
11081 .operations
11082 .iter()
11083 .map(|o| format!("OP-{:04}", o.operation_number))
11084 .collect::<Vec<_>>()
11085 .iter()
11086 .map(std::string::String::as_str)
11087 .collect(),
11088 );
11089 if let Some(insp) = manufacturing
11091 .quality_inspections
11092 .iter()
11093 .find(|i| i.reference_id == order.order_id)
11094 {
11095 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11096 }
11097 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11099 cc.items
11100 .iter()
11101 .any(|item| item.material_id == order.material_id)
11102 }) {
11103 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11104 }
11105 let start_time = base_datetime - chrono::Duration::days(60);
11106 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11107 add_result(&mut event_log, result);
11108
11109 if let Some(pb) = &pb {
11110 pb.inc(1);
11111 }
11112 }
11113
11114 for customer in &banking.customers {
11116 let customer_id_str = customer.customer_id.to_string();
11117 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11118 if let Some(account) = banking
11120 .accounts
11121 .iter()
11122 .find(|a| a.primary_owner_id == customer.customer_id)
11123 {
11124 let account_id_str = account.account_id.to_string();
11125 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11126 let txn_strs: Vec<String> = banking
11128 .transactions
11129 .iter()
11130 .filter(|t| t.account_id == account.account_id)
11131 .take(10)
11132 .map(|t| t.transaction_id.to_string())
11133 .collect();
11134 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11135 let txn_amounts: Vec<rust_decimal::Decimal> = banking
11136 .transactions
11137 .iter()
11138 .filter(|t| t.account_id == account.account_id)
11139 .take(10)
11140 .map(|t| t.amount)
11141 .collect();
11142 if !txn_ids.is_empty() {
11143 docs = docs.with_transactions(txn_ids, txn_amounts);
11144 }
11145 }
11146 let start_time = base_datetime - chrono::Duration::days(180);
11147 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11148 add_result(&mut event_log, result);
11149
11150 if let Some(pb) = &pb {
11151 pb.inc(1);
11152 }
11153 }
11154
11155 for engagement in &audit.engagements {
11157 let engagement_id_str = engagement.engagement_id.to_string();
11158 let docs = AuditDocuments::new(
11159 &engagement_id_str,
11160 &engagement.client_entity_id,
11161 &ocpm_uuid_factory,
11162 )
11163 .with_workpapers(
11164 audit
11165 .workpapers
11166 .iter()
11167 .filter(|w| w.engagement_id == engagement.engagement_id)
11168 .take(10)
11169 .map(|w| w.workpaper_id.to_string())
11170 .collect::<Vec<_>>()
11171 .iter()
11172 .map(std::string::String::as_str)
11173 .collect(),
11174 )
11175 .with_evidence(
11176 audit
11177 .evidence
11178 .iter()
11179 .filter(|e| e.engagement_id == engagement.engagement_id)
11180 .take(10)
11181 .map(|e| e.evidence_id.to_string())
11182 .collect::<Vec<_>>()
11183 .iter()
11184 .map(std::string::String::as_str)
11185 .collect(),
11186 )
11187 .with_risks(
11188 audit
11189 .risk_assessments
11190 .iter()
11191 .filter(|r| r.engagement_id == engagement.engagement_id)
11192 .take(5)
11193 .map(|r| r.risk_id.to_string())
11194 .collect::<Vec<_>>()
11195 .iter()
11196 .map(std::string::String::as_str)
11197 .collect(),
11198 )
11199 .with_findings(
11200 audit
11201 .findings
11202 .iter()
11203 .filter(|f| f.engagement_id == engagement.engagement_id)
11204 .take(5)
11205 .map(|f| f.finding_id.to_string())
11206 .collect::<Vec<_>>()
11207 .iter()
11208 .map(std::string::String::as_str)
11209 .collect(),
11210 )
11211 .with_judgments(
11212 audit
11213 .judgments
11214 .iter()
11215 .filter(|j| j.engagement_id == engagement.engagement_id)
11216 .take(5)
11217 .map(|j| j.judgment_id.to_string())
11218 .collect::<Vec<_>>()
11219 .iter()
11220 .map(std::string::String::as_str)
11221 .collect(),
11222 );
11223 let start_time = base_datetime - chrono::Duration::days(120);
11224 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11225 add_result(&mut event_log, result);
11226
11227 if let Some(pb) = &pb {
11228 pb.inc(1);
11229 }
11230 }
11231
11232 for recon in &financial_reporting.bank_reconciliations {
11234 let docs = BankReconDocuments::new(
11235 &recon.reconciliation_id,
11236 &recon.bank_account_id,
11237 &recon.company_code,
11238 recon.bank_ending_balance,
11239 &ocpm_uuid_factory,
11240 )
11241 .with_statement_lines(
11242 recon
11243 .statement_lines
11244 .iter()
11245 .take(20)
11246 .map(|l| l.line_id.as_str())
11247 .collect(),
11248 )
11249 .with_reconciling_items(
11250 recon
11251 .reconciling_items
11252 .iter()
11253 .take(10)
11254 .map(|i| i.item_id.as_str())
11255 .collect(),
11256 );
11257 let start_time = base_datetime - chrono::Duration::days(30);
11258 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11259 add_result(&mut event_log, result);
11260
11261 if let Some(pb) = &pb {
11262 pb.inc(1);
11263 }
11264 }
11265
11266 event_log.compute_variants();
11268
11269 let summary = event_log.summary();
11270
11271 if let Some(pb) = pb {
11272 pb.finish_with_message(format!(
11273 "Generated {} OCPM events, {} objects",
11274 summary.event_count, summary.object_count
11275 ));
11276 }
11277
11278 Ok(OcpmSnapshot {
11279 event_count: summary.event_count,
11280 object_count: summary.object_count,
11281 case_count: summary.case_count,
11282 event_log: Some(event_log),
11283 })
11284 }
11285
11286 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11288 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11289
11290 let total_rate = if self.config.anomaly_injection.enabled {
11293 self.config.anomaly_injection.rates.total_rate
11294 } else if self.config.fraud.enabled {
11295 self.config.fraud.fraud_rate
11296 } else {
11297 0.02
11298 };
11299
11300 let fraud_rate = if self.config.anomaly_injection.enabled {
11301 self.config.anomaly_injection.rates.fraud_rate
11302 } else {
11303 AnomalyRateConfig::default().fraud_rate
11304 };
11305
11306 let error_rate = if self.config.anomaly_injection.enabled {
11307 self.config.anomaly_injection.rates.error_rate
11308 } else {
11309 AnomalyRateConfig::default().error_rate
11310 };
11311
11312 let process_issue_rate = if self.config.anomaly_injection.enabled {
11313 self.config.anomaly_injection.rates.process_rate
11314 } else {
11315 AnomalyRateConfig::default().process_issue_rate
11316 };
11317
11318 let anomaly_config = AnomalyInjectorConfig {
11319 rates: AnomalyRateConfig {
11320 total_rate,
11321 fraud_rate,
11322 error_rate,
11323 process_issue_rate,
11324 ..Default::default()
11325 },
11326 seed: self.seed + 5000,
11327 ..Default::default()
11328 };
11329
11330 let mut injector = AnomalyInjector::new(anomaly_config);
11331 let result = injector.process_entries(entries);
11332
11333 if let Some(pb) = &pb {
11334 pb.inc(entries.len() as u64);
11335 pb.finish_with_message("Anomaly injection complete");
11336 }
11337
11338 let mut by_type = HashMap::new();
11339 for label in &result.labels {
11340 *by_type
11341 .entry(format!("{:?}", label.anomaly_type))
11342 .or_insert(0) += 1;
11343 }
11344
11345 Ok(AnomalyLabels {
11346 labels: result.labels,
11347 summary: Some(result.summary),
11348 by_type,
11349 })
11350 }
11351
11352 fn validate_journal_entries(
11361 &mut self,
11362 entries: &[JournalEntry],
11363 ) -> SynthResult<BalanceValidationResult> {
11364 let clean_entries: Vec<&JournalEntry> = entries
11366 .iter()
11367 .filter(|e| {
11368 e.header
11369 .header_text
11370 .as_ref()
11371 .map(|t| !t.contains("[HUMAN_ERROR:"))
11372 .unwrap_or(true)
11373 })
11374 .collect();
11375
11376 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11377
11378 let config = BalanceTrackerConfig {
11380 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
11384 };
11385 let validation_currency = self
11386 .config
11387 .companies
11388 .first()
11389 .map(|c| c.currency.clone())
11390 .unwrap_or_else(|| "USD".to_string());
11391
11392 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11393
11394 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11396 let errors = tracker.apply_entries(&clean_refs);
11397
11398 if let Some(pb) = &pb {
11399 pb.inc(entries.len() as u64);
11400 }
11401
11402 let has_unbalanced = tracker
11405 .get_validation_errors()
11406 .iter()
11407 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11408
11409 let mut all_errors = errors;
11412 all_errors.extend(tracker.get_validation_errors().iter().cloned());
11413 let company_codes: Vec<String> = self
11414 .config
11415 .companies
11416 .iter()
11417 .map(|c| c.code.clone())
11418 .collect();
11419
11420 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11421 .map(|d| d + chrono::Months::new(self.config.global.period_months))
11422 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11423
11424 for company_code in &company_codes {
11425 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11426 all_errors.push(e);
11427 }
11428 }
11429
11430 let stats = tracker.get_statistics();
11432
11433 let is_balanced = all_errors.is_empty();
11435
11436 if let Some(pb) = pb {
11437 let msg = if is_balanced {
11438 "Balance validation passed"
11439 } else {
11440 "Balance validation completed with errors"
11441 };
11442 pb.finish_with_message(msg);
11443 }
11444
11445 Ok(BalanceValidationResult {
11446 validated: true,
11447 is_balanced,
11448 entries_processed: stats.entries_processed,
11449 total_debits: stats.total_debits,
11450 total_credits: stats.total_credits,
11451 accounts_tracked: stats.accounts_tracked,
11452 companies_tracked: stats.companies_tracked,
11453 validation_errors: all_errors,
11454 has_unbalanced_entries: has_unbalanced,
11455 })
11456 }
11457
11458 fn inject_data_quality(
11463 &mut self,
11464 entries: &mut [JournalEntry],
11465 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11466 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11467
11468 let config = if self.config.data_quality.enabled {
11471 let dq = &self.config.data_quality;
11472 DataQualityConfig {
11473 enable_missing_values: dq.missing_values.enabled,
11474 missing_values: datasynth_generators::MissingValueConfig {
11475 global_rate: dq.effective_missing_rate(),
11476 ..Default::default()
11477 },
11478 enable_format_variations: dq.format_variations.enabled,
11479 format_variations: datasynth_generators::FormatVariationConfig {
11480 date_variation_rate: dq.format_variations.dates.rate,
11481 amount_variation_rate: dq.format_variations.amounts.rate,
11482 identifier_variation_rate: dq.format_variations.identifiers.rate,
11483 ..Default::default()
11484 },
11485 enable_duplicates: dq.duplicates.enabled,
11486 duplicates: datasynth_generators::DuplicateConfig {
11487 duplicate_rate: dq.effective_duplicate_rate(),
11488 ..Default::default()
11489 },
11490 enable_typos: dq.typos.enabled,
11491 typos: datasynth_generators::TypoConfig {
11492 char_error_rate: dq.effective_typo_rate(),
11493 ..Default::default()
11494 },
11495 enable_encoding_issues: dq.encoding_issues.enabled,
11496 encoding_issue_rate: dq.encoding_issues.rate,
11497 seed: self.seed.wrapping_add(77), track_statistics: true,
11499 }
11500 } else {
11501 DataQualityConfig::minimal()
11502 };
11503 let mut injector = DataQualityInjector::new(config);
11504
11505 injector.set_country_pack(self.primary_pack().clone());
11507
11508 let context = HashMap::new();
11510
11511 for entry in entries.iter_mut() {
11512 if let Some(text) = &entry.header.header_text {
11514 let processed = injector.process_text_field(
11515 "header_text",
11516 text,
11517 &entry.header.document_id.to_string(),
11518 &context,
11519 );
11520 match processed {
11521 Some(new_text) if new_text != *text => {
11522 entry.header.header_text = Some(new_text);
11523 }
11524 None => {
11525 entry.header.header_text = None; }
11527 _ => {}
11528 }
11529 }
11530
11531 if let Some(ref_text) = &entry.header.reference {
11533 let processed = injector.process_text_field(
11534 "reference",
11535 ref_text,
11536 &entry.header.document_id.to_string(),
11537 &context,
11538 );
11539 match processed {
11540 Some(new_text) if new_text != *ref_text => {
11541 entry.header.reference = Some(new_text);
11542 }
11543 None => {
11544 entry.header.reference = None;
11545 }
11546 _ => {}
11547 }
11548 }
11549
11550 let user_persona = entry.header.user_persona.clone();
11552 if let Some(processed) = injector.process_text_field(
11553 "user_persona",
11554 &user_persona,
11555 &entry.header.document_id.to_string(),
11556 &context,
11557 ) {
11558 if processed != user_persona {
11559 entry.header.user_persona = processed;
11560 }
11561 }
11562
11563 for line in &mut entry.lines {
11565 if let Some(ref text) = line.line_text {
11567 let processed = injector.process_text_field(
11568 "line_text",
11569 text,
11570 &entry.header.document_id.to_string(),
11571 &context,
11572 );
11573 match processed {
11574 Some(new_text) if new_text != *text => {
11575 line.line_text = Some(new_text);
11576 }
11577 None => {
11578 line.line_text = None;
11579 }
11580 _ => {}
11581 }
11582 }
11583
11584 if let Some(cc) = &line.cost_center {
11586 let processed = injector.process_text_field(
11587 "cost_center",
11588 cc,
11589 &entry.header.document_id.to_string(),
11590 &context,
11591 );
11592 match processed {
11593 Some(new_cc) if new_cc != *cc => {
11594 line.cost_center = Some(new_cc);
11595 }
11596 None => {
11597 line.cost_center = None;
11598 }
11599 _ => {}
11600 }
11601 }
11602 }
11603
11604 if let Some(pb) = &pb {
11605 pb.inc(1);
11606 }
11607 }
11608
11609 if let Some(pb) = pb {
11610 pb.finish_with_message("Data quality injection complete");
11611 }
11612
11613 let quality_issues = injector.issues().to_vec();
11614 Ok((injector.stats().clone(), quality_issues))
11615 }
11616
11617 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
11628 let use_fsm = self
11630 .config
11631 .audit
11632 .fsm
11633 .as_ref()
11634 .map(|f| f.enabled)
11635 .unwrap_or(false);
11636
11637 if use_fsm {
11638 return self.generate_audit_data_with_fsm(entries);
11639 }
11640
11641 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11643 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11644 let fiscal_year = start_date.year() as u16;
11645 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11646
11647 let total_revenue: rust_decimal::Decimal = entries
11649 .iter()
11650 .flat_map(|e| e.lines.iter())
11651 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11652 .map(|l| l.credit_amount)
11653 .sum();
11654
11655 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11657
11658 let mut snapshot = AuditSnapshot::default();
11659
11660 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11662 engagement_gen.set_team_config(&self.config.audit.team);
11665
11666 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11667 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
11671 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11672 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11673 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11674 finding_gen.set_template_provider(self.template_provider.clone());
11676 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11677 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11678 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11679 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11680 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11681 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11682 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11683
11684 let accounts: Vec<String> = self
11686 .coa
11687 .as_ref()
11688 .map(|coa| {
11689 coa.get_postable_accounts()
11690 .iter()
11691 .map(|acc| acc.account_code().to_string())
11692 .collect()
11693 })
11694 .unwrap_or_default();
11695
11696 for (i, company) in self.config.companies.iter().enumerate() {
11698 let company_revenue = total_revenue
11700 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11701
11702 let engagements_for_company =
11704 self.phase_config.audit_engagements / self.config.companies.len().max(1);
11705 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11706 1
11707 } else {
11708 0
11709 };
11710
11711 for _eng_idx in 0..(engagements_for_company + extra) {
11712 let eng_type =
11717 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
11718
11719 let mut engagement = engagement_gen.generate_engagement(
11721 &company.code,
11722 &company.name,
11723 fiscal_year,
11724 period_end,
11725 company_revenue,
11726 Some(eng_type),
11727 );
11728
11729 if !self.master_data.employees.is_empty() {
11731 let emp_count = self.master_data.employees.len();
11732 let base = (i * 10 + _eng_idx) % emp_count;
11734 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11735 .employee_id
11736 .clone();
11737 engagement.engagement_manager_id = self.master_data.employees
11738 [(base + 1) % emp_count]
11739 .employee_id
11740 .clone();
11741 let real_team: Vec<String> = engagement
11742 .team_member_ids
11743 .iter()
11744 .enumerate()
11745 .map(|(j, _)| {
11746 self.master_data.employees[(base + 2 + j) % emp_count]
11747 .employee_id
11748 .clone()
11749 })
11750 .collect();
11751 engagement.team_member_ids = real_team;
11752 }
11753
11754 if let Some(pb) = &pb {
11755 pb.inc(1);
11756 }
11757
11758 let team_members: Vec<String> = engagement.team_member_ids.clone();
11760
11761 let workpapers = if self.config.audit.generate_workpapers {
11767 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
11768 } else {
11769 Vec::new()
11770 };
11771
11772 for wp in &workpapers {
11773 if let Some(pb) = &pb {
11774 pb.inc(1);
11775 }
11776
11777 let evidence = evidence_gen.generate_evidence_for_workpaper(
11779 wp,
11780 &team_members,
11781 wp.preparer_date,
11782 );
11783
11784 for _ in &evidence {
11785 if let Some(pb) = &pb {
11786 pb.inc(1);
11787 }
11788 }
11789
11790 snapshot.evidence.extend(evidence);
11791 }
11792
11793 let risks =
11795 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11796
11797 for _ in &risks {
11798 if let Some(pb) = &pb {
11799 pb.inc(1);
11800 }
11801 }
11802 snapshot.risk_assessments.extend(risks);
11803
11804 let findings = finding_gen.generate_findings_for_engagement(
11806 &engagement,
11807 &workpapers,
11808 &team_members,
11809 );
11810
11811 for _ in &findings {
11812 if let Some(pb) = &pb {
11813 pb.inc(1);
11814 }
11815 }
11816 snapshot.findings.extend(findings);
11817
11818 let judgments =
11820 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11821
11822 for _ in &judgments {
11823 if let Some(pb) = &pb {
11824 pb.inc(1);
11825 }
11826 }
11827 snapshot.judgments.extend(judgments);
11828
11829 let (confs, resps) =
11831 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11832 snapshot.confirmations.extend(confs);
11833 snapshot.confirmation_responses.extend(resps);
11834
11835 let team_pairs: Vec<(String, String)> = team_members
11837 .iter()
11838 .map(|id| {
11839 let name = self
11840 .master_data
11841 .employees
11842 .iter()
11843 .find(|e| e.employee_id == *id)
11844 .map(|e| e.display_name.clone())
11845 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11846 (id.clone(), name)
11847 })
11848 .collect();
11849 for wp in &workpapers {
11850 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11851 snapshot.procedure_steps.extend(steps);
11852 }
11853
11854 for wp in &workpapers {
11856 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11857 snapshot.samples.push(sample);
11858 }
11859 }
11860
11861 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11863 snapshot.analytical_results.extend(analytical);
11864
11865 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11867 snapshot.ia_functions.push(ia_func);
11868 snapshot.ia_reports.extend(ia_reports);
11869
11870 let vendor_names: Vec<String> = self
11872 .master_data
11873 .vendors
11874 .iter()
11875 .map(|v| v.name.clone())
11876 .collect();
11877 let customer_names: Vec<String> = self
11878 .master_data
11879 .customers
11880 .iter()
11881 .map(|c| c.name.clone())
11882 .collect();
11883 let (parties, rp_txns) =
11884 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11885 snapshot.related_parties.extend(parties);
11886 snapshot.related_party_transactions.extend(rp_txns);
11887
11888 snapshot.workpapers.extend(workpapers);
11890
11891 {
11893 let scope_id = format!(
11894 "SCOPE-{}-{}",
11895 engagement.engagement_id.simple(),
11896 &engagement.client_entity_id
11897 );
11898 let scope = datasynth_core::models::audit::AuditScope::new(
11899 scope_id.clone(),
11900 engagement.engagement_id.to_string(),
11901 engagement.client_entity_id.clone(),
11902 engagement.materiality,
11903 );
11904 let mut eng = engagement;
11906 eng.scope_id = Some(scope_id);
11907 snapshot.audit_scopes.push(scope);
11908 snapshot.engagements.push(eng);
11909 }
11910 }
11911 }
11912
11913 if self.config.companies.len() > 1 {
11917 let group_materiality = snapshot
11920 .engagements
11921 .first()
11922 .map(|e| e.materiality)
11923 .unwrap_or_else(|| {
11924 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11925 total_revenue * pct
11926 });
11927
11928 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11929 let group_engagement_id = snapshot
11930 .engagements
11931 .first()
11932 .map(|e| e.engagement_id.to_string())
11933 .unwrap_or_else(|| "GROUP-ENG".to_string());
11934
11935 let component_snapshot = component_gen.generate(
11936 &self.config.companies,
11937 group_materiality,
11938 &group_engagement_id,
11939 period_end,
11940 );
11941
11942 snapshot.component_auditors = component_snapshot.component_auditors;
11943 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11944 snapshot.component_instructions = component_snapshot.component_instructions;
11945 snapshot.component_reports = component_snapshot.component_reports;
11946
11947 info!(
11948 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11949 snapshot.component_auditors.len(),
11950 snapshot.component_instructions.len(),
11951 snapshot.component_reports.len(),
11952 );
11953 }
11954
11955 {
11959 let applicable_framework = self
11960 .config
11961 .accounting_standards
11962 .framework
11963 .as_ref()
11964 .map(|f| format!("{f:?}"))
11965 .unwrap_or_else(|| "IFRS".to_string());
11966
11967 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11968 let entity_count = self.config.companies.len();
11969
11970 for engagement in &snapshot.engagements {
11971 let company = self
11972 .config
11973 .companies
11974 .iter()
11975 .find(|c| c.code == engagement.client_entity_id);
11976 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11977 let letter_date = engagement.planning_start;
11978 let letter = letter_gen.generate(
11979 &engagement.engagement_id.to_string(),
11980 &engagement.client_name,
11981 entity_count,
11982 engagement.period_end_date,
11983 currency,
11984 &applicable_framework,
11985 letter_date,
11986 );
11987 snapshot.engagement_letters.push(letter);
11988 }
11989
11990 info!(
11991 "ISA 210 engagement letters: {} generated",
11992 snapshot.engagement_letters.len()
11993 );
11994 }
11995
11996 if self.phase_config.generate_legal_documents {
12000 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12001 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12002 for engagement in &snapshot.engagements {
12003 let employee_names: Vec<String> = self
12007 .master_data
12008 .employees
12009 .iter()
12010 .filter(|e| e.company_code == engagement.client_entity_id)
12011 .map(|e| e.display_name.clone())
12012 .collect();
12013 let names_to_use = if !employee_names.is_empty() {
12014 employee_names
12015 } else {
12016 self.master_data
12017 .employees
12018 .iter()
12019 .take(10)
12020 .map(|e| e.display_name.clone())
12021 .collect()
12022 };
12023 let docs = legal_gen.generate(
12024 &engagement.client_entity_id,
12025 engagement.fiscal_year as i32,
12026 &names_to_use,
12027 );
12028 snapshot.legal_documents.extend(docs);
12029 }
12030 info!(
12031 "v3.3.0 legal documents: {} emitted across {} engagements",
12032 snapshot.legal_documents.len(),
12033 snapshot.engagements.len()
12034 );
12035 }
12036
12037 if self.phase_config.generate_it_controls {
12047 use datasynth_generators::it_controls_generator::ItControlsGenerator;
12048 use std::collections::HashMap;
12049 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12050
12051 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12054 HashMap::new();
12055 for engagement in &snapshot.engagements {
12056 let entry = by_company
12057 .entry(engagement.client_entity_id.clone())
12058 .or_insert((engagement.planning_start, engagement.period_end_date));
12059 if engagement.planning_start < entry.0 {
12060 entry.0 = engagement.planning_start;
12061 }
12062 if engagement.period_end_date > entry.1 {
12063 entry.1 = engagement.period_end_date;
12064 }
12065 }
12066
12067 let systems: Vec<String> = vec![
12071 "SAP ECC",
12072 "SAP S/4 HANA",
12073 "Oracle EBS",
12074 "Workday",
12075 "NetSuite",
12076 "Active Directory",
12077 "SharePoint",
12078 "Salesforce",
12079 "ServiceNow",
12080 "Jira",
12081 "GitHub Enterprise",
12082 "AWS Console",
12083 "Okta",
12084 ]
12085 .into_iter()
12086 .map(String::from)
12087 .collect();
12088
12089 for (company_code, (start, end)) in by_company {
12090 let emps: Vec<(String, String)> = self
12091 .master_data
12092 .employees
12093 .iter()
12094 .filter(|e| e.company_code == company_code)
12095 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12096 .collect();
12097 if emps.is_empty() {
12098 continue;
12099 }
12100 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12103 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12104 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12105 snapshot.it_controls_access_logs.extend(access_logs);
12106 snapshot.it_controls_change_records.extend(change_records);
12107 }
12108
12109 info!(
12110 "v3.3.0 IT controls: {} access logs, {} change records",
12111 snapshot.it_controls_access_logs.len(),
12112 snapshot.it_controls_change_records.len()
12113 );
12114 }
12115
12116 {
12120 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12121 let entity_codes: Vec<String> = self
12122 .config
12123 .companies
12124 .iter()
12125 .map(|c| c.code.clone())
12126 .collect();
12127 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12128 info!(
12129 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12130 subsequent.len(),
12131 subsequent
12132 .iter()
12133 .filter(|e| matches!(
12134 e.classification,
12135 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12136 ))
12137 .count(),
12138 subsequent
12139 .iter()
12140 .filter(|e| matches!(
12141 e.classification,
12142 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12143 ))
12144 .count(),
12145 );
12146 snapshot.subsequent_events = subsequent;
12147 }
12148
12149 {
12153 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12154 let entity_codes: Vec<String> = self
12155 .config
12156 .companies
12157 .iter()
12158 .map(|c| c.code.clone())
12159 .collect();
12160 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12161 info!(
12162 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12163 soc_snapshot.service_organizations.len(),
12164 soc_snapshot.soc_reports.len(),
12165 soc_snapshot.user_entity_controls.len(),
12166 );
12167 snapshot.service_organizations = soc_snapshot.service_organizations;
12168 snapshot.soc_reports = soc_snapshot.soc_reports;
12169 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12170 }
12171
12172 {
12176 use datasynth_generators::audit::going_concern_generator::{
12177 GoingConcernGenerator, GoingConcernInput,
12178 };
12179 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12180 let entity_codes: Vec<String> = self
12181 .config
12182 .companies
12183 .iter()
12184 .map(|c| c.code.clone())
12185 .collect();
12186 let assessment_date = period_end + chrono::Duration::days(75);
12188 let period_label = format!("FY{}", period_end.year());
12189
12190 let gc_inputs: Vec<GoingConcernInput> = self
12201 .config
12202 .companies
12203 .iter()
12204 .map(|company| {
12205 let code = &company.code;
12206 let mut revenue = rust_decimal::Decimal::ZERO;
12207 let mut expenses = rust_decimal::Decimal::ZERO;
12208 let mut current_assets = rust_decimal::Decimal::ZERO;
12209 let mut current_liabs = rust_decimal::Decimal::ZERO;
12210 let mut total_debt = rust_decimal::Decimal::ZERO;
12211
12212 for je in entries.iter().filter(|je| &je.header.company_code == code) {
12213 for line in &je.lines {
12214 let acct = line.gl_account.as_str();
12215 let net = line.debit_amount - line.credit_amount;
12216 if acct.starts_with('4') {
12217 revenue -= net;
12219 } else if acct.starts_with('6') {
12220 expenses += net;
12222 }
12223 if acct.starts_with('1') {
12225 if let Ok(n) = acct.parse::<u32>() {
12227 if (1000..=1499).contains(&n) {
12228 current_assets += net;
12229 }
12230 }
12231 } else if acct.starts_with('2') {
12232 if let Ok(n) = acct.parse::<u32>() {
12233 if (2000..=2499).contains(&n) {
12234 current_liabs -= net; } else if (2500..=2999).contains(&n) {
12237 total_debt -= net;
12239 }
12240 }
12241 }
12242 }
12243 }
12244
12245 let net_income = revenue - expenses;
12246 let working_capital = current_assets - current_liabs;
12247 let operating_cash_flow = net_income;
12250
12251 GoingConcernInput {
12252 entity_code: code.clone(),
12253 net_income,
12254 working_capital,
12255 operating_cash_flow,
12256 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12257 assessment_date,
12258 }
12259 })
12260 .collect();
12261
12262 let assessments = if gc_inputs.is_empty() {
12263 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12264 } else {
12265 gc_gen.generate_for_entities_with_inputs(
12266 &entity_codes,
12267 &gc_inputs,
12268 assessment_date,
12269 &period_label,
12270 )
12271 };
12272 info!(
12273 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12274 assessments.len(),
12275 assessments.iter().filter(|a| matches!(
12276 a.auditor_conclusion,
12277 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12278 )).count(),
12279 assessments.iter().filter(|a| matches!(
12280 a.auditor_conclusion,
12281 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12282 )).count(),
12283 assessments.iter().filter(|a| matches!(
12284 a.auditor_conclusion,
12285 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12286 )).count(),
12287 );
12288 snapshot.going_concern_assessments = assessments;
12289 }
12290
12291 {
12295 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12296 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12297 let entity_codes: Vec<String> = self
12298 .config
12299 .companies
12300 .iter()
12301 .map(|c| c.code.clone())
12302 .collect();
12303 let estimates = est_gen.generate_for_entities(&entity_codes);
12304 info!(
12305 "ISA 540 accounting estimates: {} estimates across {} entities \
12306 ({} with retrospective reviews, {} with auditor point estimates)",
12307 estimates.len(),
12308 entity_codes.len(),
12309 estimates
12310 .iter()
12311 .filter(|e| e.retrospective_review.is_some())
12312 .count(),
12313 estimates
12314 .iter()
12315 .filter(|e| e.auditor_point_estimate.is_some())
12316 .count(),
12317 );
12318 snapshot.accounting_estimates = estimates;
12319 }
12320
12321 {
12325 use datasynth_generators::audit::audit_opinion_generator::{
12326 AuditOpinionGenerator, AuditOpinionInput,
12327 };
12328
12329 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12330
12331 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12333 .engagements
12334 .iter()
12335 .map(|eng| {
12336 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12338 .findings
12339 .iter()
12340 .filter(|f| f.engagement_id == eng.engagement_id)
12341 .cloned()
12342 .collect();
12343
12344 let gc = snapshot
12346 .going_concern_assessments
12347 .iter()
12348 .find(|g| g.entity_code == eng.client_entity_id)
12349 .cloned();
12350
12351 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12353 snapshot.component_reports.clone();
12354
12355 let auditor = self
12356 .master_data
12357 .employees
12358 .first()
12359 .map(|e| e.display_name.clone())
12360 .unwrap_or_else(|| "Global Audit LLP".into());
12361
12362 let partner = self
12363 .master_data
12364 .employees
12365 .get(1)
12366 .map(|e| e.display_name.clone())
12367 .unwrap_or_else(|| eng.engagement_partner_id.clone());
12368
12369 AuditOpinionInput {
12370 entity_code: eng.client_entity_id.clone(),
12371 entity_name: eng.client_name.clone(),
12372 engagement_id: eng.engagement_id,
12373 period_end: eng.period_end_date,
12374 findings: eng_findings,
12375 going_concern: gc,
12376 component_reports: comp_reports,
12377 is_us_listed: {
12379 let fw = &self.config.audit_standards.isa_compliance.framework;
12380 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12381 },
12382 auditor_name: auditor,
12383 engagement_partner: partner,
12384 }
12385 })
12386 .collect();
12387
12388 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12389
12390 for go in &generated_opinions {
12391 snapshot
12392 .key_audit_matters
12393 .extend(go.key_audit_matters.clone());
12394 }
12395 snapshot.audit_opinions = generated_opinions
12396 .into_iter()
12397 .map(|go| go.opinion)
12398 .collect();
12399
12400 info!(
12401 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12402 snapshot.audit_opinions.len(),
12403 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12404 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12405 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12406 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12407 );
12408 }
12409
12410 {
12414 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12415
12416 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12417
12418 for (i, company) in self.config.companies.iter().enumerate() {
12419 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12421 .engagements
12422 .iter()
12423 .filter(|e| e.client_entity_id == company.code)
12424 .map(|e| e.engagement_id)
12425 .collect();
12426
12427 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12428 .findings
12429 .iter()
12430 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12431 .cloned()
12432 .collect();
12433
12434 let emp_count = self.master_data.employees.len();
12436 let ceo_name = if emp_count > 0 {
12437 self.master_data.employees[i % emp_count]
12438 .display_name
12439 .clone()
12440 } else {
12441 format!("CEO of {}", company.name)
12442 };
12443 let cfo_name = if emp_count > 1 {
12444 self.master_data.employees[(i + 1) % emp_count]
12445 .display_name
12446 .clone()
12447 } else {
12448 format!("CFO of {}", company.name)
12449 };
12450
12451 let materiality = snapshot
12453 .engagements
12454 .iter()
12455 .find(|e| e.client_entity_id == company.code)
12456 .map(|e| e.materiality)
12457 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12458
12459 let input = SoxGeneratorInput {
12460 company_code: company.code.clone(),
12461 company_name: company.name.clone(),
12462 fiscal_year,
12463 period_end,
12464 findings: company_findings,
12465 ceo_name,
12466 cfo_name,
12467 materiality_threshold: materiality,
12468 revenue_percent: rust_decimal::Decimal::from(100),
12469 assets_percent: rust_decimal::Decimal::from(100),
12470 significant_accounts: vec![
12471 "Revenue".into(),
12472 "Accounts Receivable".into(),
12473 "Inventory".into(),
12474 "Fixed Assets".into(),
12475 "Accounts Payable".into(),
12476 ],
12477 };
12478
12479 let (certs, assessment) = sox_gen.generate(&input);
12480 snapshot.sox_302_certifications.extend(certs);
12481 snapshot.sox_404_assessments.push(assessment);
12482 }
12483
12484 info!(
12485 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12486 snapshot.sox_302_certifications.len(),
12487 snapshot.sox_404_assessments.len(),
12488 snapshot
12489 .sox_404_assessments
12490 .iter()
12491 .filter(|a| a.icfr_effective)
12492 .count(),
12493 snapshot
12494 .sox_404_assessments
12495 .iter()
12496 .filter(|a| !a.icfr_effective)
12497 .count(),
12498 );
12499 }
12500
12501 {
12505 use datasynth_generators::audit::materiality_generator::{
12506 MaterialityGenerator, MaterialityInput,
12507 };
12508
12509 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12510
12511 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12515
12516 for company in &self.config.companies {
12517 let company_code = company.code.clone();
12518
12519 let company_revenue: rust_decimal::Decimal = entries
12521 .iter()
12522 .filter(|e| e.company_code() == company_code)
12523 .flat_map(|e| e.lines.iter())
12524 .filter(|l| l.account_code.starts_with('4'))
12525 .map(|l| l.credit_amount)
12526 .sum();
12527
12528 let total_assets: rust_decimal::Decimal = entries
12530 .iter()
12531 .filter(|e| e.company_code() == company_code)
12532 .flat_map(|e| e.lines.iter())
12533 .filter(|l| l.account_code.starts_with('1'))
12534 .map(|l| l.debit_amount)
12535 .sum();
12536
12537 let total_expenses: rust_decimal::Decimal = entries
12539 .iter()
12540 .filter(|e| e.company_code() == company_code)
12541 .flat_map(|e| e.lines.iter())
12542 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12543 .map(|l| l.debit_amount)
12544 .sum();
12545
12546 let equity: rust_decimal::Decimal = entries
12548 .iter()
12549 .filter(|e| e.company_code() == company_code)
12550 .flat_map(|e| e.lines.iter())
12551 .filter(|l| l.account_code.starts_with('3'))
12552 .map(|l| l.credit_amount)
12553 .sum();
12554
12555 let pretax_income = company_revenue - total_expenses;
12556
12557 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12559 let w = rust_decimal::Decimal::try_from(company.volume_weight)
12560 .unwrap_or(rust_decimal::Decimal::ONE);
12561 (
12562 total_revenue * w,
12563 total_revenue * w * rust_decimal::Decimal::from(3),
12564 total_revenue * w * rust_decimal::Decimal::new(1, 1),
12565 total_revenue * w * rust_decimal::Decimal::from(2),
12566 )
12567 } else {
12568 (company_revenue, total_assets, pretax_income, equity)
12569 };
12570
12571 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
12574 entity_code: company_code,
12575 period: format!("FY{}", fiscal_year),
12576 revenue: rev,
12577 pretax_income: pti,
12578 total_assets: assets,
12579 equity: eq,
12580 gross_profit,
12581 });
12582 }
12583
12584 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12585
12586 info!(
12587 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12588 {} total assets, {} equity benchmarks)",
12589 snapshot.materiality_calculations.len(),
12590 snapshot
12591 .materiality_calculations
12592 .iter()
12593 .filter(|m| matches!(
12594 m.benchmark,
12595 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12596 ))
12597 .count(),
12598 snapshot
12599 .materiality_calculations
12600 .iter()
12601 .filter(|m| matches!(
12602 m.benchmark,
12603 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12604 ))
12605 .count(),
12606 snapshot
12607 .materiality_calculations
12608 .iter()
12609 .filter(|m| matches!(
12610 m.benchmark,
12611 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12612 ))
12613 .count(),
12614 snapshot
12615 .materiality_calculations
12616 .iter()
12617 .filter(|m| matches!(
12618 m.benchmark,
12619 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
12620 ))
12621 .count(),
12622 );
12623 }
12624
12625 {
12629 use datasynth_generators::audit::cra_generator::CraGenerator;
12630
12631 let mut cra_gen = CraGenerator::new(self.seed + 8315);
12632
12633 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
12635 .audit_scopes
12636 .iter()
12637 .map(|s| (s.entity_code.clone(), s.id.clone()))
12638 .collect();
12639
12640 for company in &self.config.companies {
12641 let cras = cra_gen.generate_for_entity(&company.code, None);
12642 let scope_id = entity_scope_map.get(&company.code).cloned();
12643 let cras_with_scope: Vec<_> = cras
12644 .into_iter()
12645 .map(|mut cra| {
12646 cra.scope_id = scope_id.clone();
12647 cra
12648 })
12649 .collect();
12650 snapshot.combined_risk_assessments.extend(cras_with_scope);
12651 }
12652
12653 let significant_count = snapshot
12654 .combined_risk_assessments
12655 .iter()
12656 .filter(|c| c.significant_risk)
12657 .count();
12658 let high_cra_count = snapshot
12659 .combined_risk_assessments
12660 .iter()
12661 .filter(|c| {
12662 matches!(
12663 c.combined_risk,
12664 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
12665 )
12666 })
12667 .count();
12668
12669 info!(
12670 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
12671 snapshot.combined_risk_assessments.len(),
12672 significant_count,
12673 high_cra_count,
12674 );
12675 }
12676
12677 {
12681 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
12682
12683 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
12684
12685 for company in &self.config.companies {
12687 let entity_code = company.code.clone();
12688
12689 let tolerable_error = snapshot
12691 .materiality_calculations
12692 .iter()
12693 .find(|m| m.entity_code == entity_code)
12694 .map(|m| m.tolerable_error);
12695
12696 let entity_cras: Vec<_> = snapshot
12698 .combined_risk_assessments
12699 .iter()
12700 .filter(|c| c.entity_code == entity_code)
12701 .cloned()
12702 .collect();
12703
12704 if !entity_cras.is_empty() {
12705 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
12706 snapshot.sampling_plans.extend(plans);
12707 snapshot.sampled_items.extend(items);
12708 }
12709 }
12710
12711 let misstatement_count = snapshot
12712 .sampled_items
12713 .iter()
12714 .filter(|i| i.misstatement_found)
12715 .count();
12716
12717 info!(
12718 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
12719 snapshot.sampling_plans.len(),
12720 snapshot.sampled_items.len(),
12721 misstatement_count,
12722 );
12723 }
12724
12725 {
12729 use datasynth_generators::audit::scots_generator::{
12730 ScotsGenerator, ScotsGeneratorConfig,
12731 };
12732
12733 let ic_enabled = self.config.intercompany.enabled;
12734
12735 let config = ScotsGeneratorConfig {
12736 intercompany_enabled: ic_enabled,
12737 ..ScotsGeneratorConfig::default()
12738 };
12739 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
12740
12741 for company in &self.config.companies {
12742 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
12743 snapshot
12744 .significant_transaction_classes
12745 .extend(entity_scots);
12746 }
12747
12748 let estimation_count = snapshot
12749 .significant_transaction_classes
12750 .iter()
12751 .filter(|s| {
12752 matches!(
12753 s.transaction_type,
12754 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
12755 )
12756 })
12757 .count();
12758
12759 info!(
12760 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
12761 snapshot.significant_transaction_classes.len(),
12762 estimation_count,
12763 );
12764 }
12765
12766 {
12770 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
12771
12772 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
12773 let entity_codes: Vec<String> = self
12774 .config
12775 .companies
12776 .iter()
12777 .map(|c| c.code.clone())
12778 .collect();
12779 let unusual_flags =
12780 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
12781 info!(
12782 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
12783 unusual_flags.len(),
12784 unusual_flags
12785 .iter()
12786 .filter(|f| matches!(
12787 f.severity,
12788 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
12789 ))
12790 .count(),
12791 unusual_flags
12792 .iter()
12793 .filter(|f| matches!(
12794 f.severity,
12795 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12796 ))
12797 .count(),
12798 unusual_flags
12799 .iter()
12800 .filter(|f| matches!(
12801 f.severity,
12802 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12803 ))
12804 .count(),
12805 );
12806 snapshot.unusual_items = unusual_flags;
12807 }
12808
12809 {
12813 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12814
12815 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12816 let entity_codes: Vec<String> = self
12817 .config
12818 .companies
12819 .iter()
12820 .map(|c| c.code.clone())
12821 .collect();
12822 let current_period_label = format!("FY{fiscal_year}");
12823 let prior_period_label = format!("FY{}", fiscal_year - 1);
12824 let analytical_rels = ar_gen.generate_for_entities(
12825 &entity_codes,
12826 entries,
12827 ¤t_period_label,
12828 &prior_period_label,
12829 );
12830 let out_of_range = analytical_rels
12831 .iter()
12832 .filter(|r| !r.within_expected_range)
12833 .count();
12834 info!(
12835 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
12836 analytical_rels.len(),
12837 out_of_range,
12838 );
12839 snapshot.analytical_relationships = analytical_rels;
12840 }
12841
12842 if let Some(pb) = pb {
12843 pb.finish_with_message(format!(
12844 "Audit data: {} engagements, {} workpapers, {} evidence, \
12845 {} confirmations, {} procedure steps, {} samples, \
12846 {} analytical, {} IA funcs, {} related parties, \
12847 {} component auditors, {} letters, {} subsequent events, \
12848 {} service orgs, {} going concern, {} accounting estimates, \
12849 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12850 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12851 {} unusual items, {} analytical relationships",
12852 snapshot.engagements.len(),
12853 snapshot.workpapers.len(),
12854 snapshot.evidence.len(),
12855 snapshot.confirmations.len(),
12856 snapshot.procedure_steps.len(),
12857 snapshot.samples.len(),
12858 snapshot.analytical_results.len(),
12859 snapshot.ia_functions.len(),
12860 snapshot.related_parties.len(),
12861 snapshot.component_auditors.len(),
12862 snapshot.engagement_letters.len(),
12863 snapshot.subsequent_events.len(),
12864 snapshot.service_organizations.len(),
12865 snapshot.going_concern_assessments.len(),
12866 snapshot.accounting_estimates.len(),
12867 snapshot.audit_opinions.len(),
12868 snapshot.key_audit_matters.len(),
12869 snapshot.sox_302_certifications.len(),
12870 snapshot.sox_404_assessments.len(),
12871 snapshot.materiality_calculations.len(),
12872 snapshot.combined_risk_assessments.len(),
12873 snapshot.sampling_plans.len(),
12874 snapshot.significant_transaction_classes.len(),
12875 snapshot.unusual_items.len(),
12876 snapshot.analytical_relationships.len(),
12877 ));
12878 }
12879
12880 {
12887 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12888 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12889 debug!(
12890 "PCAOB-ISA mappings generated: {} mappings",
12891 snapshot.isa_pcaob_mappings.len()
12892 );
12893 }
12894
12895 {
12902 use datasynth_standards::audit::isa_reference::IsaStandard;
12903 snapshot.isa_mappings = IsaStandard::standard_entries();
12904 debug!(
12905 "ISA standard entries generated: {} standards",
12906 snapshot.isa_mappings.len()
12907 );
12908 }
12909
12910 {
12913 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12914 .engagements
12915 .iter()
12916 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12917 .collect();
12918
12919 for rpt in &mut snapshot.related_party_transactions {
12920 if rpt.journal_entry_id.is_some() {
12921 continue; }
12923 let entity = engagement_by_id
12924 .get(&rpt.engagement_id.to_string())
12925 .copied()
12926 .unwrap_or("");
12927
12928 let best_je = entries
12930 .iter()
12931 .filter(|je| je.header.company_code == entity)
12932 .min_by_key(|je| {
12933 (je.header.posting_date - rpt.transaction_date)
12934 .num_days()
12935 .abs()
12936 });
12937
12938 if let Some(je) = best_je {
12939 rpt.journal_entry_id = Some(je.header.document_id.to_string());
12940 }
12941 }
12942
12943 let linked = snapshot
12944 .related_party_transactions
12945 .iter()
12946 .filter(|t| t.journal_entry_id.is_some())
12947 .count();
12948 debug!(
12949 "Linked {}/{} related party transactions to journal entries",
12950 linked,
12951 snapshot.related_party_transactions.len()
12952 );
12953 }
12954
12955 if !snapshot.engagements.is_empty() {
12961 use datasynth_generators::audit_opinion_generator::{
12962 AuditOpinionGenerator, AuditOpinionInput,
12963 };
12964
12965 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12966 let inputs: Vec<AuditOpinionInput> = snapshot
12967 .engagements
12968 .iter()
12969 .map(|eng| {
12970 let findings = snapshot
12971 .findings
12972 .iter()
12973 .filter(|f| f.engagement_id == eng.engagement_id)
12974 .cloned()
12975 .collect();
12976 let going_concern = snapshot
12977 .going_concern_assessments
12978 .iter()
12979 .find(|gc| gc.entity_code == eng.client_entity_id)
12980 .cloned();
12981 let component_reports = snapshot
12984 .component_reports
12985 .iter()
12986 .filter(|r| r.entity_code == eng.client_entity_id)
12987 .cloned()
12988 .collect();
12989
12990 AuditOpinionInput {
12991 entity_code: eng.client_entity_id.clone(),
12992 entity_name: eng.client_name.clone(),
12993 engagement_id: eng.engagement_id,
12994 period_end: eng.period_end_date,
12995 findings,
12996 going_concern,
12997 component_reports,
12998 is_us_listed: matches!(
12999 eng.engagement_type,
13000 datasynth_core::audit::EngagementType::IntegratedAudit
13001 | datasynth_core::audit::EngagementType::Sox404
13002 ),
13003 auditor_name: "DataSynth Audit LLP".to_string(),
13004 engagement_partner: "Engagement Partner".to_string(),
13005 }
13006 })
13007 .collect();
13008
13009 let generated = opinion_gen.generate_batch(&inputs);
13010 for g in generated {
13011 snapshot.key_audit_matters.extend(g.key_audit_matters);
13012 snapshot.audit_opinions.push(g.opinion);
13013 }
13014 debug!(
13015 "Generated {} audit opinions with {} key audit matters",
13016 snapshot.audit_opinions.len(),
13017 snapshot.key_audit_matters.len()
13018 );
13019 }
13020
13021 Ok(snapshot)
13022 }
13023
13024 fn generate_audit_data_with_fsm(
13031 &mut self,
13032 entries: &[JournalEntry],
13033 ) -> SynthResult<AuditSnapshot> {
13034 use datasynth_audit_fsm::{
13035 context::EngagementContext,
13036 engine::AuditFsmEngine,
13037 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13038 };
13039 use rand::SeedableRng;
13040 use rand_chacha::ChaCha8Rng;
13041
13042 info!("Audit FSM: generating audit data via FSM engine");
13043
13044 let fsm_config = self
13045 .config
13046 .audit
13047 .fsm
13048 .as_ref()
13049 .expect("FSM config must be present when FSM is enabled");
13050
13051 let bwp = match fsm_config.blueprint.as_str() {
13053 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13054 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13055 _ => {
13056 warn!(
13057 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13058 fsm_config.blueprint
13059 );
13060 BlueprintWithPreconditions::load_builtin_fsa()
13061 }
13062 }
13063 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13064
13065 let overlay = match fsm_config.overlay.as_str() {
13067 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13068 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13069 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13070 _ => {
13071 warn!(
13072 "Unknown FSM overlay '{}', falling back to builtin:default",
13073 fsm_config.overlay
13074 );
13075 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13076 }
13077 }
13078 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13079
13080 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13082 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13083 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13084
13085 let company = self.config.companies.first();
13087 let company_code = company
13088 .map(|c| c.code.clone())
13089 .unwrap_or_else(|| "UNKNOWN".to_string());
13090 let company_name = company
13091 .map(|c| c.name.clone())
13092 .unwrap_or_else(|| "Unknown Company".to_string());
13093 let currency = company
13094 .map(|c| c.currency.clone())
13095 .unwrap_or_else(|| "USD".to_string());
13096
13097 let entity_entries: Vec<_> = entries
13099 .iter()
13100 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13101 .cloned()
13102 .collect();
13103 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
13107 .iter()
13108 .flat_map(|e| e.lines.iter())
13109 .filter(|l| l.account_code.starts_with('4'))
13110 .map(|l| l.credit_amount - l.debit_amount)
13111 .sum();
13112
13113 let total_assets: rust_decimal::Decimal = entries
13114 .iter()
13115 .flat_map(|e| e.lines.iter())
13116 .filter(|l| l.account_code.starts_with('1'))
13117 .map(|l| l.debit_amount - l.credit_amount)
13118 .sum();
13119
13120 let total_expenses: rust_decimal::Decimal = entries
13121 .iter()
13122 .flat_map(|e| e.lines.iter())
13123 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13124 .map(|l| l.debit_amount)
13125 .sum();
13126
13127 let equity: rust_decimal::Decimal = entries
13128 .iter()
13129 .flat_map(|e| e.lines.iter())
13130 .filter(|l| l.account_code.starts_with('3'))
13131 .map(|l| l.credit_amount - l.debit_amount)
13132 .sum();
13133
13134 let total_debt: rust_decimal::Decimal = entries
13135 .iter()
13136 .flat_map(|e| e.lines.iter())
13137 .filter(|l| l.account_code.starts_with('2'))
13138 .map(|l| l.credit_amount - l.debit_amount)
13139 .sum();
13140
13141 let pretax_income = total_revenue - total_expenses;
13142
13143 let cogs: rust_decimal::Decimal = entries
13144 .iter()
13145 .flat_map(|e| e.lines.iter())
13146 .filter(|l| l.account_code.starts_with('5'))
13147 .map(|l| l.debit_amount)
13148 .sum();
13149 let gross_profit = total_revenue - cogs;
13150
13151 let current_assets: rust_decimal::Decimal = entries
13152 .iter()
13153 .flat_map(|e| e.lines.iter())
13154 .filter(|l| {
13155 l.account_code.starts_with("10")
13156 || l.account_code.starts_with("11")
13157 || l.account_code.starts_with("12")
13158 || l.account_code.starts_with("13")
13159 })
13160 .map(|l| l.debit_amount - l.credit_amount)
13161 .sum();
13162 let current_liabilities: rust_decimal::Decimal = entries
13163 .iter()
13164 .flat_map(|e| e.lines.iter())
13165 .filter(|l| {
13166 l.account_code.starts_with("20")
13167 || l.account_code.starts_with("21")
13168 || l.account_code.starts_with("22")
13169 })
13170 .map(|l| l.credit_amount - l.debit_amount)
13171 .sum();
13172 let working_capital = current_assets - current_liabilities;
13173
13174 let depreciation: rust_decimal::Decimal = entries
13175 .iter()
13176 .flat_map(|e| e.lines.iter())
13177 .filter(|l| l.account_code.starts_with("60"))
13178 .map(|l| l.debit_amount)
13179 .sum();
13180 let operating_cash_flow = pretax_income + depreciation;
13181
13182 let accounts: Vec<String> = self
13184 .coa
13185 .as_ref()
13186 .map(|coa| {
13187 coa.get_postable_accounts()
13188 .iter()
13189 .map(|acc| acc.account_code().to_string())
13190 .collect()
13191 })
13192 .unwrap_or_default();
13193
13194 let team_member_ids: Vec<String> = self
13196 .master_data
13197 .employees
13198 .iter()
13199 .take(8) .map(|e| e.employee_id.clone())
13201 .collect();
13202 let team_member_pairs: Vec<(String, String)> = self
13203 .master_data
13204 .employees
13205 .iter()
13206 .take(8)
13207 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13208 .collect();
13209
13210 let vendor_names: Vec<String> = self
13211 .master_data
13212 .vendors
13213 .iter()
13214 .map(|v| v.name.clone())
13215 .collect();
13216 let customer_names: Vec<String> = self
13217 .master_data
13218 .customers
13219 .iter()
13220 .map(|c| c.name.clone())
13221 .collect();
13222
13223 let entity_codes: Vec<String> = self
13224 .config
13225 .companies
13226 .iter()
13227 .map(|c| c.code.clone())
13228 .collect();
13229
13230 let journal_entry_ids: Vec<String> = entries
13232 .iter()
13233 .take(50)
13234 .map(|e| e.header.document_id.to_string())
13235 .collect();
13236
13237 let mut account_balances = std::collections::HashMap::<String, f64>::new();
13239 for entry in entries {
13240 for line in &entry.lines {
13241 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13242 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13243 *account_balances
13244 .entry(line.account_code.clone())
13245 .or_insert(0.0) += debit_f64 - credit_f64;
13246 }
13247 }
13248
13249 let control_ids: Vec<String> = Vec::new();
13254 let anomaly_refs: Vec<String> = Vec::new();
13255
13256 let mut context = EngagementContext {
13257 company_code,
13258 company_name,
13259 fiscal_year: start_date.year(),
13260 currency,
13261 total_revenue,
13262 total_assets,
13263 engagement_start: start_date,
13264 report_date: period_end,
13265 pretax_income,
13266 equity,
13267 gross_profit,
13268 working_capital,
13269 operating_cash_flow,
13270 total_debt,
13271 team_member_ids,
13272 team_member_pairs,
13273 accounts,
13274 vendor_names,
13275 customer_names,
13276 journal_entry_ids,
13277 account_balances,
13278 control_ids,
13279 anomaly_refs,
13280 journal_entries: entries.to_vec(),
13281 is_us_listed: false,
13282 entity_codes,
13283 auditor_firm_name: "DataSynth Audit LLP".into(),
13284 accounting_framework: self
13285 .config
13286 .accounting_standards
13287 .framework
13288 .map(|f| match f {
13289 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13290 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13291 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13292 "French GAAP"
13293 }
13294 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13295 "German GAAP"
13296 }
13297 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13298 "Dual Reporting"
13299 }
13300 })
13301 .unwrap_or("IFRS")
13302 .into(),
13303 };
13304
13305 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13307 let rng = ChaCha8Rng::seed_from_u64(seed);
13308 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13309
13310 let mut result = engine
13311 .run_engagement(&context)
13312 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13313
13314 info!(
13315 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13316 {} phases completed, duration {:.1}h",
13317 result.event_log.len(),
13318 result.artifacts.total_artifacts(),
13319 result.anomalies.len(),
13320 result.phases_completed.len(),
13321 result.total_duration_hours,
13322 );
13323
13324 let tb_entity = context.company_code.clone();
13326 let tb_fy = context.fiscal_year;
13327 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13328 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13329 entries,
13330 &tb_entity,
13331 tb_fy,
13332 self.coa.as_ref().map(|c| c.as_ref()),
13333 );
13334
13335 let bag = result.artifacts;
13337 let mut snapshot = AuditSnapshot {
13338 engagements: bag.engagements,
13339 engagement_letters: bag.engagement_letters,
13340 materiality_calculations: bag.materiality_calculations,
13341 risk_assessments: bag.risk_assessments,
13342 combined_risk_assessments: bag.combined_risk_assessments,
13343 workpapers: bag.workpapers,
13344 evidence: bag.evidence,
13345 findings: bag.findings,
13346 judgments: bag.judgments,
13347 sampling_plans: bag.sampling_plans,
13348 sampled_items: bag.sampled_items,
13349 analytical_results: bag.analytical_results,
13350 going_concern_assessments: bag.going_concern_assessments,
13351 subsequent_events: bag.subsequent_events,
13352 audit_opinions: bag.audit_opinions,
13353 key_audit_matters: bag.key_audit_matters,
13354 procedure_steps: bag.procedure_steps,
13355 samples: bag.samples,
13356 confirmations: bag.confirmations,
13357 confirmation_responses: bag.confirmation_responses,
13358 fsm_event_trail: Some(result.event_log),
13360 ..Default::default()
13362 };
13363
13364 {
13366 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13367 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13368 }
13369 {
13370 use datasynth_standards::audit::isa_reference::IsaStandard;
13371 snapshot.isa_mappings = IsaStandard::standard_entries();
13372 }
13373
13374 info!(
13375 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13376 {} risk assessments, {} findings, {} materiality calcs",
13377 snapshot.engagements.len(),
13378 snapshot.workpapers.len(),
13379 snapshot.evidence.len(),
13380 snapshot.risk_assessments.len(),
13381 snapshot.findings.len(),
13382 snapshot.materiality_calculations.len(),
13383 );
13384
13385 Ok(snapshot)
13386 }
13387
13388 fn export_graphs(
13395 &mut self,
13396 entries: &[JournalEntry],
13397 _coa: &Arc<ChartOfAccounts>,
13398 stats: &mut EnhancedGenerationStatistics,
13399 ) -> SynthResult<GraphExportSnapshot> {
13400 let pb = self.create_progress_bar(100, "Exporting Graphs");
13401
13402 let mut snapshot = GraphExportSnapshot::default();
13403
13404 let output_dir = self
13406 .output_path
13407 .clone()
13408 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13409 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13410
13411 for graph_type in &self.config.graph_export.graph_types {
13413 if let Some(pb) = &pb {
13414 pb.inc(10);
13415 }
13416
13417 let graph_config = TransactionGraphConfig {
13419 include_vendors: false,
13420 include_customers: false,
13421 create_debit_credit_edges: true,
13422 include_document_nodes: graph_type.include_document_nodes,
13423 min_edge_weight: graph_type.min_edge_weight,
13424 aggregate_parallel_edges: graph_type.aggregate_edges,
13425 framework: None,
13426 };
13427
13428 let mut builder = TransactionGraphBuilder::new(graph_config);
13429 builder.add_journal_entries(entries);
13430 let graph = builder.build();
13431
13432 stats.graph_node_count += graph.node_count();
13434 stats.graph_edge_count += graph.edge_count();
13435
13436 if let Some(pb) = &pb {
13437 pb.inc(40);
13438 }
13439
13440 for format in &self.config.graph_export.formats {
13442 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13443
13444 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13446 warn!("Failed to create graph output directory: {}", e);
13447 continue;
13448 }
13449
13450 match format {
13451 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13452 let pyg_config = PyGExportConfig {
13453 common: datasynth_graph::CommonExportConfig {
13454 export_node_features: true,
13455 export_edge_features: true,
13456 export_node_labels: true,
13457 export_edge_labels: true,
13458 export_masks: true,
13459 train_ratio: self.config.graph_export.train_ratio,
13460 val_ratio: self.config.graph_export.validation_ratio,
13461 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13462 },
13463 one_hot_categoricals: false,
13464 };
13465
13466 let exporter = PyGExporter::new(pyg_config);
13467 match exporter.export(&graph, &format_dir) {
13468 Ok(metadata) => {
13469 snapshot.exports.insert(
13470 format!("{}_{}", graph_type.name, "pytorch_geometric"),
13471 GraphExportInfo {
13472 name: graph_type.name.clone(),
13473 format: "pytorch_geometric".to_string(),
13474 output_path: format_dir.clone(),
13475 node_count: metadata.num_nodes,
13476 edge_count: metadata.num_edges,
13477 },
13478 );
13479 snapshot.graph_count += 1;
13480 }
13481 Err(e) => {
13482 warn!("Failed to export PyTorch Geometric graph: {}", e);
13483 }
13484 }
13485 }
13486 datasynth_config::schema::GraphExportFormat::Neo4j => {
13487 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13488
13489 let neo4j_config = Neo4jExportConfig {
13490 export_node_properties: true,
13491 export_edge_properties: true,
13492 export_features: true,
13493 generate_cypher: true,
13494 generate_admin_import: true,
13495 database_name: "synth".to_string(),
13496 cypher_batch_size: 1000,
13497 };
13498
13499 let exporter = Neo4jExporter::new(neo4j_config);
13500 match exporter.export(&graph, &format_dir) {
13501 Ok(metadata) => {
13502 snapshot.exports.insert(
13503 format!("{}_{}", graph_type.name, "neo4j"),
13504 GraphExportInfo {
13505 name: graph_type.name.clone(),
13506 format: "neo4j".to_string(),
13507 output_path: format_dir.clone(),
13508 node_count: metadata.num_nodes,
13509 edge_count: metadata.num_edges,
13510 },
13511 );
13512 snapshot.graph_count += 1;
13513 }
13514 Err(e) => {
13515 warn!("Failed to export Neo4j graph: {}", e);
13516 }
13517 }
13518 }
13519 datasynth_config::schema::GraphExportFormat::Dgl => {
13520 use datasynth_graph::{DGLExportConfig, DGLExporter};
13521
13522 let dgl_config = DGLExportConfig {
13523 common: datasynth_graph::CommonExportConfig {
13524 export_node_features: true,
13525 export_edge_features: true,
13526 export_node_labels: true,
13527 export_edge_labels: true,
13528 export_masks: true,
13529 train_ratio: self.config.graph_export.train_ratio,
13530 val_ratio: self.config.graph_export.validation_ratio,
13531 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13532 },
13533 heterogeneous: self.config.graph_export.dgl.heterogeneous,
13534 include_pickle_script: true, };
13536
13537 let exporter = DGLExporter::new(dgl_config);
13538 match exporter.export(&graph, &format_dir) {
13539 Ok(metadata) => {
13540 snapshot.exports.insert(
13541 format!("{}_{}", graph_type.name, "dgl"),
13542 GraphExportInfo {
13543 name: graph_type.name.clone(),
13544 format: "dgl".to_string(),
13545 output_path: format_dir.clone(),
13546 node_count: metadata.common.num_nodes,
13547 edge_count: metadata.common.num_edges,
13548 },
13549 );
13550 snapshot.graph_count += 1;
13551 }
13552 Err(e) => {
13553 warn!("Failed to export DGL graph: {}", e);
13554 }
13555 }
13556 }
13557 datasynth_config::schema::GraphExportFormat::RustGraph => {
13558 use datasynth_graph::{
13559 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13560 };
13561
13562 let rustgraph_config = RustGraphExportConfig {
13563 include_features: true,
13564 include_temporal: true,
13565 include_labels: true,
13566 source_name: "datasynth".to_string(),
13567 batch_id: None,
13568 output_format: RustGraphOutputFormat::JsonLines,
13569 export_node_properties: true,
13570 export_edge_properties: true,
13571 pretty_print: false,
13572 };
13573
13574 let exporter = RustGraphExporter::new(rustgraph_config);
13575 match exporter.export(&graph, &format_dir) {
13576 Ok(metadata) => {
13577 snapshot.exports.insert(
13578 format!("{}_{}", graph_type.name, "rustgraph"),
13579 GraphExportInfo {
13580 name: graph_type.name.clone(),
13581 format: "rustgraph".to_string(),
13582 output_path: format_dir.clone(),
13583 node_count: metadata.num_nodes,
13584 edge_count: metadata.num_edges,
13585 },
13586 );
13587 snapshot.graph_count += 1;
13588 }
13589 Err(e) => {
13590 warn!("Failed to export RustGraph: {}", e);
13591 }
13592 }
13593 }
13594 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13595 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13597 }
13598 }
13599 }
13600
13601 if let Some(pb) = &pb {
13602 pb.inc(40);
13603 }
13604 }
13605
13606 stats.graph_export_count = snapshot.graph_count;
13607 snapshot.exported = snapshot.graph_count > 0;
13608
13609 if let Some(pb) = pb {
13610 pb.finish_with_message(format!(
13611 "Graphs exported: {} graphs ({} nodes, {} edges)",
13612 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13613 ));
13614 }
13615
13616 Ok(snapshot)
13617 }
13618
13619 fn build_additional_graphs(
13624 &self,
13625 banking: &BankingSnapshot,
13626 intercompany: &IntercompanySnapshot,
13627 entries: &[JournalEntry],
13628 stats: &mut EnhancedGenerationStatistics,
13629 ) {
13630 let output_dir = self
13631 .output_path
13632 .clone()
13633 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13634 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13635
13636 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
13638 info!("Phase 10c: Building banking network graph");
13639 let config = BankingGraphConfig::default();
13640 let mut builder = BankingGraphBuilder::new(config);
13641 builder.add_customers(&banking.customers);
13642 builder.add_accounts(&banking.accounts, &banking.customers);
13643 builder.add_transactions(&banking.transactions);
13644 let graph = builder.build();
13645
13646 let node_count = graph.node_count();
13647 let edge_count = graph.edge_count();
13648 stats.graph_node_count += node_count;
13649 stats.graph_edge_count += edge_count;
13650
13651 for format in &self.config.graph_export.formats {
13653 if matches!(
13654 format,
13655 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13656 ) {
13657 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
13658 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13659 warn!("Failed to create banking graph output dir: {}", e);
13660 continue;
13661 }
13662 let pyg_config = PyGExportConfig::default();
13663 let exporter = PyGExporter::new(pyg_config);
13664 if let Err(e) = exporter.export(&graph, &format_dir) {
13665 warn!("Failed to export banking graph as PyG: {}", e);
13666 } else {
13667 info!(
13668 "Banking network graph exported: {} nodes, {} edges",
13669 node_count, edge_count
13670 );
13671 }
13672 }
13673 }
13674 }
13675
13676 let approval_entries: Vec<_> = entries
13678 .iter()
13679 .filter(|je| je.header.approval_workflow.is_some())
13680 .collect();
13681
13682 if !approval_entries.is_empty() {
13683 info!(
13684 "Phase 10c: Building approval network graph ({} entries with approvals)",
13685 approval_entries.len()
13686 );
13687 let config = ApprovalGraphConfig::default();
13688 let mut builder = ApprovalGraphBuilder::new(config);
13689
13690 for je in &approval_entries {
13691 if let Some(ref wf) = je.header.approval_workflow {
13692 for action in &wf.actions {
13693 let record = datasynth_core::models::ApprovalRecord {
13694 approval_id: format!(
13695 "APR-{}-{}",
13696 je.header.document_id, action.approval_level
13697 ),
13698 document_number: je.header.document_id.to_string(),
13699 document_type: "JE".to_string(),
13700 company_code: je.company_code().to_string(),
13701 requester_id: wf.preparer_id.clone(),
13702 requester_name: Some(wf.preparer_name.clone()),
13703 approver_id: action.actor_id.clone(),
13704 approver_name: action.actor_name.clone(),
13705 approval_date: je.posting_date(),
13706 action: format!("{:?}", action.action),
13707 amount: wf.amount,
13708 approval_limit: None,
13709 comments: action.comments.clone(),
13710 delegation_from: None,
13711 is_auto_approved: false,
13712 };
13713 builder.add_approval(&record);
13714 }
13715 }
13716 }
13717
13718 let graph = builder.build();
13719 let node_count = graph.node_count();
13720 let edge_count = graph.edge_count();
13721 stats.graph_node_count += node_count;
13722 stats.graph_edge_count += edge_count;
13723
13724 for format in &self.config.graph_export.formats {
13726 if matches!(
13727 format,
13728 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13729 ) {
13730 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
13731 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13732 warn!("Failed to create approval graph output dir: {}", e);
13733 continue;
13734 }
13735 let pyg_config = PyGExportConfig::default();
13736 let exporter = PyGExporter::new(pyg_config);
13737 if let Err(e) = exporter.export(&graph, &format_dir) {
13738 warn!("Failed to export approval graph as PyG: {}", e);
13739 } else {
13740 info!(
13741 "Approval network graph exported: {} nodes, {} edges",
13742 node_count, edge_count
13743 );
13744 }
13745 }
13746 }
13747 }
13748
13749 if self.config.companies.len() >= 2 {
13751 info!(
13752 "Phase 10c: Building entity relationship graph ({} companies)",
13753 self.config.companies.len()
13754 );
13755
13756 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13757 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
13758
13759 let parent_code = &self.config.companies[0].code;
13761 let mut companies: Vec<datasynth_core::models::Company> =
13762 Vec::with_capacity(self.config.companies.len());
13763
13764 let first = &self.config.companies[0];
13766 companies.push(datasynth_core::models::Company::parent(
13767 &first.code,
13768 &first.name,
13769 &first.country,
13770 &first.currency,
13771 ));
13772
13773 for cc in self.config.companies.iter().skip(1) {
13775 companies.push(datasynth_core::models::Company::subsidiary(
13776 &cc.code,
13777 &cc.name,
13778 &cc.country,
13779 &cc.currency,
13780 parent_code,
13781 rust_decimal::Decimal::from(100),
13782 ));
13783 }
13784
13785 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
13787 self.config
13788 .companies
13789 .iter()
13790 .skip(1)
13791 .enumerate()
13792 .map(|(i, cc)| {
13793 let mut rel =
13794 datasynth_core::models::intercompany::IntercompanyRelationship::new(
13795 format!("REL{:03}", i + 1),
13796 parent_code.clone(),
13797 cc.code.clone(),
13798 rust_decimal::Decimal::from(100),
13799 start_date,
13800 );
13801 rel.functional_currency = cc.currency.clone();
13802 rel
13803 })
13804 .collect();
13805
13806 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13807 builder.add_companies(&companies);
13808 builder.add_ownership_relationships(&relationships);
13809
13810 for pair in &intercompany.matched_pairs {
13812 builder.add_intercompany_edge(
13813 &pair.seller_company,
13814 &pair.buyer_company,
13815 pair.amount,
13816 &format!("{:?}", pair.transaction_type),
13817 );
13818 }
13819
13820 let graph = builder.build();
13821 let node_count = graph.node_count();
13822 let edge_count = graph.edge_count();
13823 stats.graph_node_count += node_count;
13824 stats.graph_edge_count += edge_count;
13825
13826 for format in &self.config.graph_export.formats {
13828 if matches!(
13829 format,
13830 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13831 ) {
13832 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
13833 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13834 warn!("Failed to create entity graph output dir: {}", e);
13835 continue;
13836 }
13837 let pyg_config = PyGExportConfig::default();
13838 let exporter = PyGExporter::new(pyg_config);
13839 if let Err(e) = exporter.export(&graph, &format_dir) {
13840 warn!("Failed to export entity graph as PyG: {}", e);
13841 } else {
13842 info!(
13843 "Entity relationship graph exported: {} nodes, {} edges",
13844 node_count, edge_count
13845 );
13846 }
13847 }
13848 }
13849 } else {
13850 debug!(
13851 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13852 self.config.companies.len()
13853 );
13854 }
13855 }
13856
13857 #[allow(clippy::too_many_arguments)]
13864 fn export_hypergraph(
13865 &self,
13866 coa: &Arc<ChartOfAccounts>,
13867 entries: &[JournalEntry],
13868 document_flows: &DocumentFlowSnapshot,
13869 sourcing: &SourcingSnapshot,
13870 hr: &HrSnapshot,
13871 manufacturing: &ManufacturingSnapshot,
13872 banking: &BankingSnapshot,
13873 audit: &AuditSnapshot,
13874 financial_reporting: &FinancialReportingSnapshot,
13875 ocpm: &OcpmSnapshot,
13876 compliance: &ComplianceRegulationsSnapshot,
13877 stats: &mut EnhancedGenerationStatistics,
13878 ) -> SynthResult<HypergraphExportInfo> {
13879 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13880 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13881 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13882 use datasynth_graph::models::hypergraph::AggregationStrategy;
13883
13884 let hg_settings = &self.config.graph_export.hypergraph;
13885
13886 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13888 "truncate" => AggregationStrategy::Truncate,
13889 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13890 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13891 "importance_sample" => AggregationStrategy::ImportanceSample,
13892 _ => AggregationStrategy::PoolByCounterparty,
13893 };
13894
13895 let builder_config = HypergraphConfig {
13896 max_nodes: hg_settings.max_nodes,
13897 aggregation_strategy,
13898 include_coso: hg_settings.governance_layer.include_coso,
13899 include_controls: hg_settings.governance_layer.include_controls,
13900 include_sox: hg_settings.governance_layer.include_sox,
13901 include_vendors: hg_settings.governance_layer.include_vendors,
13902 include_customers: hg_settings.governance_layer.include_customers,
13903 include_employees: hg_settings.governance_layer.include_employees,
13904 include_p2p: hg_settings.process_layer.include_p2p,
13905 include_o2c: hg_settings.process_layer.include_o2c,
13906 include_s2c: hg_settings.process_layer.include_s2c,
13907 include_h2r: hg_settings.process_layer.include_h2r,
13908 include_mfg: hg_settings.process_layer.include_mfg,
13909 include_bank: hg_settings.process_layer.include_bank,
13910 include_audit: hg_settings.process_layer.include_audit,
13911 include_r2r: hg_settings.process_layer.include_r2r,
13912 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13913 docs_per_counterparty_threshold: hg_settings
13914 .process_layer
13915 .docs_per_counterparty_threshold,
13916 include_accounts: hg_settings.accounting_layer.include_accounts,
13917 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13918 include_cross_layer_edges: hg_settings.cross_layer.enabled,
13919 include_compliance: self.config.compliance_regulations.enabled,
13920 include_tax: true,
13921 include_treasury: true,
13922 include_esg: true,
13923 include_project: true,
13924 include_intercompany: true,
13925 include_temporal_events: true,
13926 };
13927
13928 let mut builder = HypergraphBuilder::new(builder_config);
13929
13930 builder.add_coso_framework();
13932
13933 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13936 let controls = InternalControl::standard_controls();
13937 builder.add_controls(&controls);
13938 }
13939
13940 builder.add_vendors(&self.master_data.vendors);
13942 builder.add_customers(&self.master_data.customers);
13943 builder.add_employees(&self.master_data.employees);
13944
13945 builder.add_p2p_documents(
13947 &document_flows.purchase_orders,
13948 &document_flows.goods_receipts,
13949 &document_flows.vendor_invoices,
13950 &document_flows.payments,
13951 );
13952 builder.add_o2c_documents(
13953 &document_flows.sales_orders,
13954 &document_flows.deliveries,
13955 &document_flows.customer_invoices,
13956 );
13957 builder.add_s2c_documents(
13958 &sourcing.sourcing_projects,
13959 &sourcing.qualifications,
13960 &sourcing.rfx_events,
13961 &sourcing.bids,
13962 &sourcing.bid_evaluations,
13963 &sourcing.contracts,
13964 );
13965 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13966 builder.add_mfg_documents(
13967 &manufacturing.production_orders,
13968 &manufacturing.quality_inspections,
13969 &manufacturing.cycle_counts,
13970 );
13971 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13972 builder.add_audit_documents(
13973 &audit.engagements,
13974 &audit.workpapers,
13975 &audit.findings,
13976 &audit.evidence,
13977 &audit.risk_assessments,
13978 &audit.judgments,
13979 &audit.materiality_calculations,
13980 &audit.audit_opinions,
13981 &audit.going_concern_assessments,
13982 );
13983 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13984
13985 if let Some(ref event_log) = ocpm.event_log {
13987 builder.add_ocpm_events(event_log);
13988 }
13989
13990 if self.config.compliance_regulations.enabled
13992 && hg_settings.governance_layer.include_controls
13993 {
13994 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13996 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13997 .standard_records
13998 .iter()
13999 .filter_map(|r| {
14000 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14001 registry.get(&sid).cloned()
14002 })
14003 .collect();
14004
14005 builder.add_compliance_regulations(
14006 &standards,
14007 &compliance.findings,
14008 &compliance.filings,
14009 );
14010 }
14011
14012 builder.add_accounts(coa);
14014 builder.add_journal_entries_as_hyperedges(entries);
14015
14016 let hypergraph = builder.build();
14018
14019 let output_dir = self
14021 .output_path
14022 .clone()
14023 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14024 let hg_dir = output_dir
14025 .join(&self.config.graph_export.output_subdirectory)
14026 .join(&hg_settings.output_subdirectory);
14027
14028 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14030 "unified" => {
14031 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14032 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14033 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14034 })?;
14035 (
14036 metadata.num_nodes,
14037 metadata.num_edges,
14038 metadata.num_hyperedges,
14039 )
14040 }
14041 _ => {
14042 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14044 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14045 SynthError::generation(format!("Hypergraph export failed: {e}"))
14046 })?;
14047 (
14048 metadata.num_nodes,
14049 metadata.num_edges,
14050 metadata.num_hyperedges,
14051 )
14052 }
14053 };
14054
14055 #[cfg(feature = "streaming")]
14057 if let Some(ref target_url) = hg_settings.stream_target {
14058 use crate::stream_client::{StreamClient, StreamConfig};
14059 use std::io::Write as _;
14060
14061 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14062 let stream_config = StreamConfig {
14063 target_url: target_url.clone(),
14064 batch_size: hg_settings.stream_batch_size,
14065 api_key,
14066 ..StreamConfig::default()
14067 };
14068
14069 match StreamClient::new(stream_config) {
14070 Ok(mut client) => {
14071 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14072 match exporter.export_to_writer(&hypergraph, &mut client) {
14073 Ok(_) => {
14074 if let Err(e) = client.flush() {
14075 warn!("Failed to flush stream client: {}", e);
14076 } else {
14077 info!("Streamed {} records to {}", client.total_sent(), target_url);
14078 }
14079 }
14080 Err(e) => {
14081 warn!("Streaming export failed: {}", e);
14082 }
14083 }
14084 }
14085 Err(e) => {
14086 warn!("Failed to create stream client: {}", e);
14087 }
14088 }
14089 }
14090
14091 stats.graph_node_count += num_nodes;
14093 stats.graph_edge_count += num_edges;
14094 stats.graph_export_count += 1;
14095
14096 Ok(HypergraphExportInfo {
14097 node_count: num_nodes,
14098 edge_count: num_edges,
14099 hyperedge_count: num_hyperedges,
14100 output_path: hg_dir,
14101 })
14102 }
14103
14104 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14109 let pb = self.create_progress_bar(100, "Generating Banking Data");
14110
14111 let orchestrator = BankingOrchestratorBuilder::new()
14113 .config(self.config.banking.clone())
14114 .seed(self.seed + 9000)
14115 .country_pack(self.primary_pack().clone())
14116 .build();
14117
14118 if let Some(pb) = &pb {
14119 pb.inc(10);
14120 }
14121
14122 let result = orchestrator.generate();
14124
14125 if let Some(pb) = &pb {
14126 pb.inc(90);
14127 pb.finish_with_message(format!(
14128 "Banking: {} customers, {} transactions",
14129 result.customers.len(),
14130 result.transactions.len()
14131 ));
14132 }
14133
14134 let mut banking_customers = result.customers;
14139 let core_customers = &self.master_data.customers;
14140 if !core_customers.is_empty() {
14141 for (i, bc) in banking_customers.iter_mut().enumerate() {
14142 let core = &core_customers[i % core_customers.len()];
14143 bc.name = CustomerName::business(&core.name);
14144 bc.residence_country = core.country.clone();
14145 bc.enterprise_customer_id = Some(core.customer_id.clone());
14146 }
14147 debug!(
14148 "Cross-referenced {} banking customers with {} core customers",
14149 banking_customers.len(),
14150 core_customers.len()
14151 );
14152 }
14153
14154 Ok(BankingSnapshot {
14155 customers: banking_customers,
14156 accounts: result.accounts,
14157 transactions: result.transactions,
14158 transaction_labels: result.transaction_labels,
14159 customer_labels: result.customer_labels,
14160 account_labels: result.account_labels,
14161 relationship_labels: result.relationship_labels,
14162 narratives: result.narratives,
14163 suspicious_count: result.stats.suspicious_count,
14164 scenario_count: result.scenarios.len(),
14165 })
14166 }
14167
14168 fn calculate_total_transactions(&self) -> u64 {
14170 let months = self.config.global.period_months as f64;
14171 self.config
14172 .companies
14173 .iter()
14174 .map(|c| {
14175 let annual = c.annual_transaction_volume.count() as f64;
14176 let weighted = annual * c.volume_weight;
14177 (weighted * months / 12.0) as u64
14178 })
14179 .sum()
14180 }
14181
14182 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14184 if !self.phase_config.show_progress {
14185 return None;
14186 }
14187
14188 let pb = if let Some(mp) = &self.multi_progress {
14189 mp.add(ProgressBar::new(total))
14190 } else {
14191 ProgressBar::new(total)
14192 };
14193
14194 pb.set_style(
14195 ProgressStyle::default_bar()
14196 .template(&format!(
14197 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14198 ))
14199 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14200 .progress_chars("#>-"),
14201 );
14202
14203 Some(pb)
14204 }
14205
14206 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14208 self.coa.clone()
14209 }
14210
14211 pub fn get_master_data(&self) -> &MasterDataSnapshot {
14213 &self.master_data
14214 }
14215
14216 fn phase_compliance_regulations(
14218 &mut self,
14219 _stats: &mut EnhancedGenerationStatistics,
14220 ) -> SynthResult<ComplianceRegulationsSnapshot> {
14221 if !self.phase_config.generate_compliance_regulations {
14222 return Ok(ComplianceRegulationsSnapshot::default());
14223 }
14224
14225 info!("Phase: Generating Compliance Regulations Data");
14226
14227 let cr_config = &self.config.compliance_regulations;
14228
14229 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14231 self.config
14232 .companies
14233 .iter()
14234 .map(|c| c.country.clone())
14235 .collect::<std::collections::HashSet<_>>()
14236 .into_iter()
14237 .collect()
14238 } else {
14239 cr_config.jurisdictions.clone()
14240 };
14241
14242 let fallback_date =
14244 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14245 let reference_date = cr_config
14246 .reference_date
14247 .as_ref()
14248 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14249 .unwrap_or_else(|| {
14250 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14251 .unwrap_or(fallback_date)
14252 });
14253
14254 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14256 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14257 let cross_reference_records = reg_gen.generate_cross_reference_records();
14258 let jurisdiction_records =
14259 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14260
14261 info!(
14262 " Standards: {} records, {} cross-references, {} jurisdictions",
14263 standard_records.len(),
14264 cross_reference_records.len(),
14265 jurisdiction_records.len()
14266 );
14267
14268 let audit_procedures = if cr_config.audit_procedures.enabled {
14270 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14271 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14272 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14273 confidence_level: cr_config.audit_procedures.confidence_level,
14274 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14275 };
14276 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14277 self.seed + 9000,
14278 proc_config,
14279 );
14280 let registry = reg_gen.registry();
14281 let mut all_procs = Vec::new();
14282 for jurisdiction in &jurisdictions {
14283 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14284 all_procs.extend(procs);
14285 }
14286 info!(" Audit procedures: {}", all_procs.len());
14287 all_procs
14288 } else {
14289 Vec::new()
14290 };
14291
14292 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14294 let finding_config =
14295 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14296 finding_rate: cr_config.findings.finding_rate,
14297 material_weakness_rate: cr_config.findings.material_weakness_rate,
14298 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14299 generate_remediation: cr_config.findings.generate_remediation,
14300 };
14301 let mut finding_gen =
14302 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14303 self.seed + 9100,
14304 finding_config,
14305 );
14306 let mut all_findings = Vec::new();
14307 for company in &self.config.companies {
14308 let company_findings =
14309 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14310 all_findings.extend(company_findings);
14311 }
14312 info!(" Compliance findings: {}", all_findings.len());
14313 all_findings
14314 } else {
14315 Vec::new()
14316 };
14317
14318 let filings = if cr_config.filings.enabled {
14320 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14321 filing_types: cr_config.filings.filing_types.clone(),
14322 generate_status_progression: cr_config.filings.generate_status_progression,
14323 };
14324 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14325 self.seed + 9200,
14326 filing_config,
14327 );
14328 let company_codes: Vec<String> = self
14329 .config
14330 .companies
14331 .iter()
14332 .map(|c| c.code.clone())
14333 .collect();
14334 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14335 .unwrap_or(fallback_date);
14336 let filings = filing_gen.generate_filings(
14337 &company_codes,
14338 &jurisdictions,
14339 start_date,
14340 self.config.global.period_months,
14341 );
14342 info!(" Regulatory filings: {}", filings.len());
14343 filings
14344 } else {
14345 Vec::new()
14346 };
14347
14348 let compliance_graph = if cr_config.graph.enabled {
14350 let graph_config = datasynth_graph::ComplianceGraphConfig {
14351 include_standard_nodes: cr_config.graph.include_compliance_nodes,
14352 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14353 include_cross_references: cr_config.graph.include_cross_references,
14354 include_supersession_edges: cr_config.graph.include_supersession_edges,
14355 include_account_links: cr_config.graph.include_account_links,
14356 include_control_links: cr_config.graph.include_control_links,
14357 include_company_links: cr_config.graph.include_company_links,
14358 };
14359 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14360
14361 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14363 .iter()
14364 .map(|r| datasynth_graph::StandardNodeInput {
14365 standard_id: r.standard_id.clone(),
14366 title: r.title.clone(),
14367 category: r.category.clone(),
14368 domain: r.domain.clone(),
14369 is_active: r.is_active,
14370 features: vec![if r.is_active { 1.0 } else { 0.0 }],
14371 applicable_account_types: r.applicable_account_types.clone(),
14372 applicable_processes: r.applicable_processes.clone(),
14373 })
14374 .collect();
14375 builder.add_standards(&standard_inputs);
14376
14377 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14379 jurisdiction_records
14380 .iter()
14381 .map(|r| datasynth_graph::JurisdictionNodeInput {
14382 country_code: r.country_code.clone(),
14383 country_name: r.country_name.clone(),
14384 framework: r.accounting_framework.clone(),
14385 standard_count: r.standard_count,
14386 tax_rate: r.statutory_tax_rate,
14387 })
14388 .collect();
14389 builder.add_jurisdictions(&jurisdiction_inputs);
14390
14391 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14393 cross_reference_records
14394 .iter()
14395 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14396 from_standard: r.from_standard.clone(),
14397 to_standard: r.to_standard.clone(),
14398 relationship: r.relationship.clone(),
14399 convergence_level: r.convergence_level,
14400 })
14401 .collect();
14402 builder.add_cross_references(&xref_inputs);
14403
14404 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14406 .iter()
14407 .map(|r| datasynth_graph::JurisdictionMappingInput {
14408 country_code: r.jurisdiction.clone(),
14409 standard_id: r.standard_id.clone(),
14410 })
14411 .collect();
14412 builder.add_jurisdiction_mappings(&mapping_inputs);
14413
14414 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14416 .iter()
14417 .map(|p| datasynth_graph::ProcedureNodeInput {
14418 procedure_id: p.procedure_id.clone(),
14419 standard_id: p.standard_id.clone(),
14420 procedure_type: p.procedure_type.clone(),
14421 sample_size: p.sample_size,
14422 confidence_level: p.confidence_level,
14423 })
14424 .collect();
14425 builder.add_procedures(&proc_inputs);
14426
14427 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14429 .iter()
14430 .map(|f| datasynth_graph::FindingNodeInput {
14431 finding_id: f.finding_id.to_string(),
14432 standard_id: f
14433 .related_standards
14434 .first()
14435 .map(|s| s.as_str().to_string())
14436 .unwrap_or_default(),
14437 severity: f.severity.to_string(),
14438 deficiency_level: f.deficiency_level.to_string(),
14439 severity_score: f.deficiency_level.severity_score(),
14440 control_id: f.control_id.clone(),
14441 affected_accounts: f.affected_accounts.clone(),
14442 })
14443 .collect();
14444 builder.add_findings(&finding_inputs);
14445
14446 if cr_config.graph.include_account_links {
14448 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14449 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14450 for std_record in &standard_records {
14451 if let Some(std_obj) =
14452 registry.get(&datasynth_core::models::compliance::StandardId::parse(
14453 &std_record.standard_id,
14454 ))
14455 {
14456 for acct_type in &std_obj.applicable_account_types {
14457 account_links.push(datasynth_graph::AccountLinkInput {
14458 standard_id: std_record.standard_id.clone(),
14459 account_code: acct_type.clone(),
14460 account_name: acct_type.clone(),
14461 });
14462 }
14463 }
14464 }
14465 builder.add_account_links(&account_links);
14466 }
14467
14468 if cr_config.graph.include_control_links {
14470 let mut control_links = Vec::new();
14471 let sox_like_ids: Vec<String> = standard_records
14473 .iter()
14474 .filter(|r| {
14475 r.standard_id.starts_with("SOX")
14476 || r.standard_id.starts_with("PCAOB-AS-2201")
14477 })
14478 .map(|r| r.standard_id.clone())
14479 .collect();
14480 let control_ids = [
14482 ("C001", "Cash Controls"),
14483 ("C002", "Large Transaction Approval"),
14484 ("C010", "PO Approval"),
14485 ("C011", "Three-Way Match"),
14486 ("C020", "Revenue Recognition"),
14487 ("C021", "Credit Check"),
14488 ("C030", "Manual JE Approval"),
14489 ("C031", "Period Close Review"),
14490 ("C032", "Account Reconciliation"),
14491 ("C040", "Payroll Processing"),
14492 ("C050", "Fixed Asset Capitalization"),
14493 ("C060", "Intercompany Elimination"),
14494 ];
14495 for sox_id in &sox_like_ids {
14496 for (ctrl_id, ctrl_name) in &control_ids {
14497 control_links.push(datasynth_graph::ControlLinkInput {
14498 standard_id: sox_id.clone(),
14499 control_id: ctrl_id.to_string(),
14500 control_name: ctrl_name.to_string(),
14501 });
14502 }
14503 }
14504 builder.add_control_links(&control_links);
14505 }
14506
14507 if cr_config.graph.include_company_links {
14509 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14510 .iter()
14511 .enumerate()
14512 .map(|(i, f)| datasynth_graph::FilingNodeInput {
14513 filing_id: format!("F{:04}", i + 1),
14514 filing_type: f.filing_type.to_string(),
14515 company_code: f.company_code.clone(),
14516 jurisdiction: f.jurisdiction.clone(),
14517 status: format!("{:?}", f.status),
14518 })
14519 .collect();
14520 builder.add_filings(&filing_inputs);
14521 }
14522
14523 let graph = builder.build();
14524 info!(
14525 " Compliance graph: {} nodes, {} edges",
14526 graph.nodes.len(),
14527 graph.edges.len()
14528 );
14529 Some(graph)
14530 } else {
14531 None
14532 };
14533
14534 self.check_resources_with_log("post-compliance-regulations")?;
14535
14536 Ok(ComplianceRegulationsSnapshot {
14537 standard_records,
14538 cross_reference_records,
14539 jurisdiction_records,
14540 audit_procedures,
14541 findings,
14542 filings,
14543 compliance_graph,
14544 })
14545 }
14546
14547 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14549 use super::lineage::LineageGraphBuilder;
14550
14551 let mut builder = LineageGraphBuilder::new();
14552
14553 builder.add_config_section("config:global", "Global Config");
14555 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14556 builder.add_config_section("config:transactions", "Transaction Config");
14557
14558 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14560 builder.add_generator_phase("phase:je", "Journal Entry Generation");
14561
14562 builder.configured_by("phase:coa", "config:chart_of_accounts");
14564 builder.configured_by("phase:je", "config:transactions");
14565
14566 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14568 builder.produced_by("output:je", "phase:je");
14569
14570 if self.phase_config.generate_master_data {
14572 builder.add_config_section("config:master_data", "Master Data Config");
14573 builder.add_generator_phase("phase:master_data", "Master Data Generation");
14574 builder.configured_by("phase:master_data", "config:master_data");
14575 builder.input_to("phase:master_data", "phase:je");
14576 }
14577
14578 if self.phase_config.generate_document_flows {
14579 builder.add_config_section("config:document_flows", "Document Flow Config");
14580 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14581 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14582 builder.configured_by("phase:p2p", "config:document_flows");
14583 builder.configured_by("phase:o2c", "config:document_flows");
14584
14585 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14586 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14587 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14588 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14589 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14590
14591 builder.produced_by("output:po", "phase:p2p");
14592 builder.produced_by("output:gr", "phase:p2p");
14593 builder.produced_by("output:vi", "phase:p2p");
14594 builder.produced_by("output:so", "phase:o2c");
14595 builder.produced_by("output:ci", "phase:o2c");
14596 }
14597
14598 if self.phase_config.inject_anomalies {
14599 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14600 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14601 builder.configured_by("phase:anomaly", "config:fraud");
14602 builder.add_output_file(
14603 "output:labels",
14604 "Anomaly Labels",
14605 "labels/anomaly_labels.csv",
14606 );
14607 builder.produced_by("output:labels", "phase:anomaly");
14608 }
14609
14610 if self.phase_config.generate_audit {
14611 builder.add_config_section("config:audit", "Audit Config");
14612 builder.add_generator_phase("phase:audit", "Audit Data Generation");
14613 builder.configured_by("phase:audit", "config:audit");
14614 }
14615
14616 if self.phase_config.generate_banking {
14617 builder.add_config_section("config:banking", "Banking Config");
14618 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
14619 builder.configured_by("phase:banking", "config:banking");
14620 }
14621
14622 if self.config.llm.enabled {
14623 builder.add_config_section("config:llm", "LLM Enrichment Config");
14624 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
14625 builder.configured_by("phase:llm_enrichment", "config:llm");
14626 }
14627
14628 if self.config.diffusion.enabled {
14629 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
14630 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
14631 builder.configured_by("phase:diffusion", "config:diffusion");
14632 }
14633
14634 if self.config.causal.enabled {
14635 builder.add_config_section("config:causal", "Causal Generation Config");
14636 builder.add_generator_phase("phase:causal", "Causal Overlay");
14637 builder.configured_by("phase:causal", "config:causal");
14638 }
14639
14640 builder.build()
14641 }
14642
14643 fn compute_company_revenue(
14652 entries: &[JournalEntry],
14653 company_code: &str,
14654 ) -> rust_decimal::Decimal {
14655 use rust_decimal::Decimal;
14656 let mut revenue = Decimal::ZERO;
14657 for je in entries {
14658 if je.header.company_code != company_code {
14659 continue;
14660 }
14661 for line in &je.lines {
14662 if line.gl_account.starts_with('4') {
14663 revenue += line.credit_amount - line.debit_amount;
14665 }
14666 }
14667 }
14668 revenue.max(Decimal::ZERO)
14669 }
14670
14671 fn compute_entity_net_assets(
14675 entries: &[JournalEntry],
14676 entity_code: &str,
14677 ) -> rust_decimal::Decimal {
14678 use rust_decimal::Decimal;
14679 let mut asset_net = Decimal::ZERO;
14680 let mut liability_net = Decimal::ZERO;
14681 for je in entries {
14682 if je.header.company_code != entity_code {
14683 continue;
14684 }
14685 for line in &je.lines {
14686 if line.gl_account.starts_with('1') {
14687 asset_net += line.debit_amount - line.credit_amount;
14688 } else if line.gl_account.starts_with('2') {
14689 liability_net += line.credit_amount - line.debit_amount;
14690 }
14691 }
14692 }
14693 asset_net - liability_net
14694 }
14695
14696 fn phase_statistical_validation(
14707 &self,
14708 entries: &[JournalEntry],
14709 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
14710 use datasynth_config::schema::StatisticalTestConfig;
14711 use datasynth_core::distributions::{
14712 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
14713 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
14714 };
14715 use rust_decimal::prelude::ToPrimitive;
14716
14717 let cfg = &self.config.distributions.validation;
14718 if !cfg.enabled {
14719 return Ok(None);
14720 }
14721
14722 let amounts: Vec<rust_decimal::Decimal> = entries
14725 .iter()
14726 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
14727 .filter(|a| *a > rust_decimal::Decimal::ZERO)
14728 .collect();
14729
14730 let paired_amount_linecount: Vec<(f64, f64)> = entries
14734 .iter()
14735 .filter_map(|je| {
14736 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
14737 if amt > rust_decimal::Decimal::ZERO {
14738 amt.to_f64().map(|a| (a, je.lines.len() as f64))
14739 } else {
14740 None
14741 }
14742 })
14743 .collect();
14744
14745 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
14746 for test_cfg in &cfg.tests {
14747 match test_cfg {
14748 StatisticalTestConfig::BenfordFirstDigit {
14749 threshold_mad,
14750 warning_mad,
14751 } => {
14752 results.push(run_benford_first_digit(
14753 &amounts,
14754 *threshold_mad,
14755 *warning_mad,
14756 ));
14757 }
14758 StatisticalTestConfig::ChiSquared { bins, significance } => {
14759 results.push(run_chi_squared(&amounts, *bins, *significance));
14760 }
14761 StatisticalTestConfig::DistributionFit {
14762 target: _,
14763 ks_significance,
14764 method: _,
14765 } => {
14766 results.push(run_ks_uniform_log(&amounts, *ks_significance));
14769 }
14770 StatisticalTestConfig::AndersonDarling {
14771 target: _,
14772 significance,
14773 } => {
14774 results.push(run_anderson_darling(&amounts, *significance));
14777 }
14778 StatisticalTestConfig::CorrelationCheck {
14779 expected_correlations,
14780 } => {
14781 if expected_correlations.is_empty() {
14785 results.push(StatisticalTestResult {
14786 name: "correlation_check".to_string(),
14787 outcome: TestOutcome::Skipped,
14788 statistic: 0.0,
14789 threshold: 0.0,
14790 message: "no expected correlations declared".to_string(),
14791 });
14792 } else {
14793 for ec in expected_correlations {
14794 let pair_key = format!("{}_{}", ec.field1, ec.field2);
14795 let is_amount_linecount = (ec.field1 == "amount"
14796 && ec.field2 == "line_count")
14797 || (ec.field1 == "line_count" && ec.field2 == "amount");
14798 if is_amount_linecount {
14799 let xs: Vec<f64> =
14800 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
14801 let ys: Vec<f64> =
14802 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
14803 results.push(run_correlation_check(
14804 &pair_key,
14805 &xs,
14806 &ys,
14807 ec.expected_r,
14808 ec.tolerance,
14809 ));
14810 } else {
14811 results.push(StatisticalTestResult {
14812 name: format!("correlation_check_{pair_key}"),
14813 outcome: TestOutcome::Skipped,
14814 statistic: 0.0,
14815 threshold: ec.tolerance,
14816 message: format!(
14817 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
14818 ec.field1, ec.field2
14819 ),
14820 });
14821 }
14822 }
14823 }
14824 }
14825 }
14826 }
14827
14828 let report = StatisticalValidationReport {
14829 sample_count: amounts.len(),
14830 results,
14831 };
14832
14833 if cfg.reporting.fail_on_error && !report.all_passed() {
14834 let failed = report.failed_names().join(", ");
14835 return Err(SynthError::validation(format!(
14836 "statistical validation failed: {failed}"
14837 )));
14838 }
14839
14840 Ok(Some(report))
14841 }
14842
14843 fn phase_analytics_metadata(
14856 &mut self,
14857 entries: &[JournalEntry],
14858 ) -> SynthResult<AnalyticsMetadataSnapshot> {
14859 use datasynth_generators::drift_event_generator::DriftEventGenerator;
14860 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
14861 use datasynth_generators::management_report_generator::ManagementReportGenerator;
14862 use datasynth_generators::prior_year_generator::PriorYearGenerator;
14863 use std::collections::BTreeMap;
14864
14865 let mut snap = AnalyticsMetadataSnapshot::default();
14866
14867 if !self.phase_config.generate_analytics_metadata {
14868 return Ok(snap);
14869 }
14870
14871 let cfg = &self.config.analytics_metadata;
14872 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14873 .map(|d| d.year())
14874 .unwrap_or(2025);
14875
14876 if cfg.prior_year {
14878 let mut gen = PriorYearGenerator::new(self.seed + 9100);
14879 for company in &self.config.companies {
14880 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
14883 BTreeMap::new();
14884 for je in entries {
14885 if je.header.company_code != company.code {
14886 continue;
14887 }
14888 for line in &je.lines {
14889 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
14890 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
14891 });
14892 entry.1 += line.debit_amount - line.credit_amount;
14893 }
14894 }
14895 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
14896 .into_iter()
14897 .filter(|(_, (_, bal))| !bal.is_zero())
14898 .map(|(code, (name, bal))| (code, name, bal))
14899 .collect();
14900 if !current.is_empty() {
14901 let comparatives =
14902 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
14903 snap.prior_year_comparatives.extend(comparatives);
14904 }
14905 }
14906 info!(
14907 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
14908 snap.prior_year_comparatives.len(),
14909 self.config.companies.len()
14910 );
14911 }
14912
14913 if cfg.industry_benchmark {
14915 use datasynth_core::models::IndustrySector;
14916 let industry = match self.config.global.industry {
14917 IndustrySector::Manufacturing => "manufacturing",
14918 IndustrySector::Retail => "retail",
14919 IndustrySector::FinancialServices => "financial_services",
14920 IndustrySector::Technology => "technology",
14921 IndustrySector::Healthcare => "healthcare",
14922 _ => "other",
14923 };
14924 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
14925 let benchmarks = gen.generate(industry, fiscal_year);
14926 info!(
14927 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
14928 benchmarks.len()
14929 );
14930 snap.industry_benchmarks = benchmarks;
14931 }
14932
14933 if cfg.management_reports {
14935 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
14936 let period_months = self.config.global.period_months;
14937 for company in &self.config.companies {
14938 let reports =
14939 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
14940 snap.management_reports.extend(reports);
14941 }
14942 info!(
14943 "v3.3.0 analytics: {} management reports across {} companies",
14944 snap.management_reports.len(),
14945 self.config.companies.len()
14946 );
14947 }
14948
14949 if cfg.drift_events {
14951 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
14952 .expect("hardcoded NaiveDate 2025-01-01 is valid");
14953 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14954 .unwrap_or(fallback_start);
14955 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
14956 let mut gen = DriftEventGenerator::new(self.seed + 9400);
14957 let drifts = gen.generate_standalone_drifts(start_date, end_date);
14958 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
14959 snap.drift_events = drifts;
14960 }
14961 let _ = entries;
14963
14964 Ok(snap)
14965 }
14966}
14967
14968fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
14970 match format {
14971 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
14972 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
14973 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
14974 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
14975 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
14976 }
14977}
14978
14979fn compute_trial_balance_entries(
14984 entries: &[JournalEntry],
14985 entity_code: &str,
14986 fiscal_year: i32,
14987 coa: Option<&ChartOfAccounts>,
14988) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
14989 use std::collections::BTreeMap;
14990
14991 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
14992 BTreeMap::new();
14993
14994 for je in entries {
14995 for line in &je.lines {
14996 let entry = balances.entry(line.account_code.clone()).or_default();
14997 entry.0 += line.debit_amount;
14998 entry.1 += line.credit_amount;
14999 }
15000 }
15001
15002 balances
15003 .into_iter()
15004 .map(
15005 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15006 account_description: coa
15007 .and_then(|c| c.get_account(&account_code))
15008 .map(|a| a.description().to_string())
15009 .unwrap_or_else(|| account_code.clone()),
15010 account_code,
15011 debit_balance: debit,
15012 credit_balance: credit,
15013 net_balance: debit - credit,
15014 entity_code: entity_code.to_string(),
15015 period: format!("FY{}", fiscal_year),
15016 },
15017 )
15018 .collect()
15019}
15020
15021#[cfg(test)]
15022#[allow(clippy::unwrap_used)]
15023mod tests {
15024 use super::*;
15025 use datasynth_config::schema::*;
15026
15027 fn create_test_config() -> GeneratorConfig {
15028 GeneratorConfig {
15029 global: GlobalConfig {
15030 industry: IndustrySector::Manufacturing,
15031 start_date: "2024-01-01".to_string(),
15032 period_months: 1,
15033 seed: Some(42),
15034 parallel: false,
15035 group_currency: "USD".to_string(),
15036 presentation_currency: None,
15037 worker_threads: 0,
15038 memory_limit_mb: 0,
15039 fiscal_year_months: None,
15040 },
15041 companies: vec![CompanyConfig {
15042 code: "1000".to_string(),
15043 name: "Test Company".to_string(),
15044 currency: "USD".to_string(),
15045 functional_currency: None,
15046 country: "US".to_string(),
15047 annual_transaction_volume: TransactionVolume::TenK,
15048 volume_weight: 1.0,
15049 fiscal_year_variant: "K4".to_string(),
15050 }],
15051 chart_of_accounts: ChartOfAccountsConfig {
15052 complexity: CoAComplexity::Small,
15053 industry_specific: true,
15054 custom_accounts: None,
15055 min_hierarchy_depth: 2,
15056 max_hierarchy_depth: 4,
15057 },
15058 transactions: TransactionConfig::default(),
15059 output: OutputConfig::default(),
15060 fraud: FraudConfig::default(),
15061 internal_controls: InternalControlsConfig::default(),
15062 business_processes: BusinessProcessConfig::default(),
15063 user_personas: UserPersonaConfig::default(),
15064 templates: TemplateConfig::default(),
15065 approval: ApprovalConfig::default(),
15066 departments: DepartmentConfig::default(),
15067 master_data: MasterDataConfig::default(),
15068 document_flows: DocumentFlowConfig::default(),
15069 intercompany: IntercompanyConfig::default(),
15070 balance: BalanceConfig::default(),
15071 ocpm: OcpmConfig::default(),
15072 audit: AuditGenerationConfig::default(),
15073 banking: datasynth_banking::BankingConfig::default(),
15074 data_quality: DataQualitySchemaConfig::default(),
15075 scenario: ScenarioConfig::default(),
15076 temporal: TemporalDriftConfig::default(),
15077 graph_export: GraphExportConfig::default(),
15078 streaming: StreamingSchemaConfig::default(),
15079 rate_limit: RateLimitSchemaConfig::default(),
15080 temporal_attributes: TemporalAttributeSchemaConfig::default(),
15081 relationships: RelationshipSchemaConfig::default(),
15082 accounting_standards: AccountingStandardsConfig::default(),
15083 audit_standards: AuditStandardsConfig::default(),
15084 distributions: Default::default(),
15085 temporal_patterns: Default::default(),
15086 vendor_network: VendorNetworkSchemaConfig::default(),
15087 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15088 relationship_strength: RelationshipStrengthSchemaConfig::default(),
15089 cross_process_links: CrossProcessLinksSchemaConfig::default(),
15090 organizational_events: OrganizationalEventsSchemaConfig::default(),
15091 behavioral_drift: BehavioralDriftSchemaConfig::default(),
15092 market_drift: MarketDriftSchemaConfig::default(),
15093 drift_labeling: DriftLabelingSchemaConfig::default(),
15094 anomaly_injection: Default::default(),
15095 industry_specific: Default::default(),
15096 fingerprint_privacy: Default::default(),
15097 quality_gates: Default::default(),
15098 compliance: Default::default(),
15099 webhooks: Default::default(),
15100 llm: Default::default(),
15101 diffusion: Default::default(),
15102 causal: Default::default(),
15103 source_to_pay: Default::default(),
15104 financial_reporting: Default::default(),
15105 hr: Default::default(),
15106 manufacturing: Default::default(),
15107 sales_quotes: Default::default(),
15108 tax: Default::default(),
15109 treasury: Default::default(),
15110 project_accounting: Default::default(),
15111 esg: Default::default(),
15112 country_packs: None,
15113 scenarios: Default::default(),
15114 session: Default::default(),
15115 compliance_regulations: Default::default(),
15116 analytics_metadata: Default::default(),
15117 }
15118 }
15119
15120 #[test]
15121 fn test_enhanced_orchestrator_creation() {
15122 let config = create_test_config();
15123 let orchestrator = EnhancedOrchestrator::with_defaults(config);
15124 assert!(orchestrator.is_ok());
15125 }
15126
15127 #[test]
15128 fn test_minimal_generation() {
15129 let config = create_test_config();
15130 let phase_config = PhaseConfig {
15131 generate_master_data: false,
15132 generate_document_flows: false,
15133 generate_journal_entries: true,
15134 inject_anomalies: false,
15135 show_progress: false,
15136 ..Default::default()
15137 };
15138
15139 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15140 let result = orchestrator.generate();
15141
15142 assert!(result.is_ok());
15143 let result = result.unwrap();
15144 assert!(!result.journal_entries.is_empty());
15145 }
15146
15147 #[test]
15148 fn test_master_data_generation() {
15149 let config = create_test_config();
15150 let phase_config = PhaseConfig {
15151 generate_master_data: true,
15152 generate_document_flows: false,
15153 generate_journal_entries: false,
15154 inject_anomalies: false,
15155 show_progress: false,
15156 vendors_per_company: 5,
15157 customers_per_company: 5,
15158 materials_per_company: 10,
15159 assets_per_company: 5,
15160 employees_per_company: 10,
15161 ..Default::default()
15162 };
15163
15164 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15165 let result = orchestrator.generate().unwrap();
15166
15167 assert!(!result.master_data.vendors.is_empty());
15168 assert!(!result.master_data.customers.is_empty());
15169 assert!(!result.master_data.materials.is_empty());
15170 }
15171
15172 #[test]
15173 fn test_document_flow_generation() {
15174 let config = create_test_config();
15175 let phase_config = PhaseConfig {
15176 generate_master_data: true,
15177 generate_document_flows: true,
15178 generate_journal_entries: false,
15179 inject_anomalies: false,
15180 inject_data_quality: false,
15181 validate_balances: false,
15182 generate_ocpm_events: false,
15183 show_progress: false,
15184 vendors_per_company: 5,
15185 customers_per_company: 5,
15186 materials_per_company: 10,
15187 assets_per_company: 5,
15188 employees_per_company: 10,
15189 p2p_chains: 5,
15190 o2c_chains: 5,
15191 ..Default::default()
15192 };
15193
15194 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15195 let result = orchestrator.generate().unwrap();
15196
15197 assert!(!result.document_flows.p2p_chains.is_empty());
15199 assert!(!result.document_flows.o2c_chains.is_empty());
15200
15201 assert!(!result.document_flows.purchase_orders.is_empty());
15203 assert!(!result.document_flows.sales_orders.is_empty());
15204 }
15205
15206 #[test]
15207 fn test_anomaly_injection() {
15208 let config = create_test_config();
15209 let phase_config = PhaseConfig {
15210 generate_master_data: false,
15211 generate_document_flows: false,
15212 generate_journal_entries: true,
15213 inject_anomalies: true,
15214 show_progress: false,
15215 ..Default::default()
15216 };
15217
15218 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15219 let result = orchestrator.generate().unwrap();
15220
15221 assert!(!result.journal_entries.is_empty());
15223
15224 assert!(result.anomaly_labels.summary.is_some());
15227 }
15228
15229 #[test]
15230 fn test_full_generation_pipeline() {
15231 let config = create_test_config();
15232 let phase_config = PhaseConfig {
15233 generate_master_data: true,
15234 generate_document_flows: true,
15235 generate_journal_entries: true,
15236 inject_anomalies: false,
15237 inject_data_quality: false,
15238 validate_balances: true,
15239 generate_ocpm_events: false,
15240 show_progress: false,
15241 vendors_per_company: 3,
15242 customers_per_company: 3,
15243 materials_per_company: 5,
15244 assets_per_company: 3,
15245 employees_per_company: 5,
15246 p2p_chains: 3,
15247 o2c_chains: 3,
15248 ..Default::default()
15249 };
15250
15251 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15252 let result = orchestrator.generate().unwrap();
15253
15254 assert!(!result.master_data.vendors.is_empty());
15256 assert!(!result.master_data.customers.is_empty());
15257 assert!(!result.document_flows.p2p_chains.is_empty());
15258 assert!(!result.document_flows.o2c_chains.is_empty());
15259 assert!(!result.journal_entries.is_empty());
15260 assert!(result.statistics.accounts_count > 0);
15261
15262 assert!(!result.subledger.ap_invoices.is_empty());
15264 assert!(!result.subledger.ar_invoices.is_empty());
15265
15266 assert!(result.balance_validation.validated);
15268 assert!(result.balance_validation.entries_processed > 0);
15269 }
15270
15271 #[test]
15272 fn test_subledger_linking() {
15273 let config = create_test_config();
15274 let phase_config = PhaseConfig {
15275 generate_master_data: true,
15276 generate_document_flows: true,
15277 generate_journal_entries: false,
15278 inject_anomalies: false,
15279 inject_data_quality: false,
15280 validate_balances: false,
15281 generate_ocpm_events: false,
15282 show_progress: false,
15283 vendors_per_company: 5,
15284 customers_per_company: 5,
15285 materials_per_company: 10,
15286 assets_per_company: 3,
15287 employees_per_company: 5,
15288 p2p_chains: 5,
15289 o2c_chains: 5,
15290 ..Default::default()
15291 };
15292
15293 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15294 let result = orchestrator.generate().unwrap();
15295
15296 assert!(!result.document_flows.vendor_invoices.is_empty());
15298 assert!(!result.document_flows.customer_invoices.is_empty());
15299
15300 assert!(!result.subledger.ap_invoices.is_empty());
15302 assert!(!result.subledger.ar_invoices.is_empty());
15303
15304 assert_eq!(
15306 result.subledger.ap_invoices.len(),
15307 result.document_flows.vendor_invoices.len()
15308 );
15309
15310 assert_eq!(
15312 result.subledger.ar_invoices.len(),
15313 result.document_flows.customer_invoices.len()
15314 );
15315
15316 assert_eq!(
15318 result.statistics.ap_invoice_count,
15319 result.subledger.ap_invoices.len()
15320 );
15321 assert_eq!(
15322 result.statistics.ar_invoice_count,
15323 result.subledger.ar_invoices.len()
15324 );
15325 }
15326
15327 #[test]
15328 fn test_balance_validation() {
15329 let config = create_test_config();
15330 let phase_config = PhaseConfig {
15331 generate_master_data: false,
15332 generate_document_flows: false,
15333 generate_journal_entries: true,
15334 inject_anomalies: false,
15335 validate_balances: true,
15336 show_progress: false,
15337 ..Default::default()
15338 };
15339
15340 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15341 let result = orchestrator.generate().unwrap();
15342
15343 assert!(result.balance_validation.validated);
15345 assert!(result.balance_validation.entries_processed > 0);
15346
15347 assert!(!result.balance_validation.has_unbalanced_entries);
15349
15350 assert_eq!(
15352 result.balance_validation.total_debits,
15353 result.balance_validation.total_credits
15354 );
15355 }
15356
15357 #[test]
15358 fn test_statistics_accuracy() {
15359 let config = create_test_config();
15360 let phase_config = PhaseConfig {
15361 generate_master_data: true,
15362 generate_document_flows: false,
15363 generate_journal_entries: true,
15364 inject_anomalies: false,
15365 show_progress: false,
15366 vendors_per_company: 10,
15367 customers_per_company: 20,
15368 materials_per_company: 15,
15369 assets_per_company: 5,
15370 employees_per_company: 8,
15371 ..Default::default()
15372 };
15373
15374 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15375 let result = orchestrator.generate().unwrap();
15376
15377 assert_eq!(
15379 result.statistics.vendor_count,
15380 result.master_data.vendors.len()
15381 );
15382 assert_eq!(
15383 result.statistics.customer_count,
15384 result.master_data.customers.len()
15385 );
15386 assert_eq!(
15387 result.statistics.material_count,
15388 result.master_data.materials.len()
15389 );
15390 assert_eq!(
15391 result.statistics.total_entries as usize,
15392 result.journal_entries.len()
15393 );
15394 }
15395
15396 #[test]
15397 fn test_phase_config_defaults() {
15398 let config = PhaseConfig::default();
15399 assert!(config.generate_master_data);
15400 assert!(config.generate_document_flows);
15401 assert!(config.generate_journal_entries);
15402 assert!(!config.inject_anomalies);
15403 assert!(config.validate_balances);
15404 assert!(config.show_progress);
15405 assert!(config.vendors_per_company > 0);
15406 assert!(config.customers_per_company > 0);
15407 }
15408
15409 #[test]
15410 fn test_get_coa_before_generation() {
15411 let config = create_test_config();
15412 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15413
15414 assert!(orchestrator.get_coa().is_none());
15416 }
15417
15418 #[test]
15419 fn test_get_coa_after_generation() {
15420 let config = create_test_config();
15421 let phase_config = PhaseConfig {
15422 generate_master_data: false,
15423 generate_document_flows: false,
15424 generate_journal_entries: true,
15425 inject_anomalies: false,
15426 show_progress: false,
15427 ..Default::default()
15428 };
15429
15430 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15431 let _ = orchestrator.generate().unwrap();
15432
15433 assert!(orchestrator.get_coa().is_some());
15435 }
15436
15437 #[test]
15438 fn test_get_master_data() {
15439 let config = create_test_config();
15440 let phase_config = PhaseConfig {
15441 generate_master_data: true,
15442 generate_document_flows: false,
15443 generate_journal_entries: false,
15444 inject_anomalies: false,
15445 show_progress: false,
15446 vendors_per_company: 5,
15447 customers_per_company: 5,
15448 materials_per_company: 5,
15449 assets_per_company: 5,
15450 employees_per_company: 5,
15451 ..Default::default()
15452 };
15453
15454 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15455 let result = orchestrator.generate().unwrap();
15456
15457 assert!(!result.master_data.vendors.is_empty());
15459 }
15460
15461 #[test]
15462 fn test_with_progress_builder() {
15463 let config = create_test_config();
15464 let orchestrator = EnhancedOrchestrator::with_defaults(config)
15465 .unwrap()
15466 .with_progress(false);
15467
15468 assert!(!orchestrator.phase_config.show_progress);
15470 }
15471
15472 #[test]
15473 fn test_multi_company_generation() {
15474 let mut config = create_test_config();
15475 config.companies.push(CompanyConfig {
15476 code: "2000".to_string(),
15477 name: "Subsidiary".to_string(),
15478 currency: "EUR".to_string(),
15479 functional_currency: None,
15480 country: "DE".to_string(),
15481 annual_transaction_volume: TransactionVolume::TenK,
15482 volume_weight: 0.5,
15483 fiscal_year_variant: "K4".to_string(),
15484 });
15485
15486 let phase_config = PhaseConfig {
15487 generate_master_data: true,
15488 generate_document_flows: false,
15489 generate_journal_entries: true,
15490 inject_anomalies: false,
15491 show_progress: false,
15492 vendors_per_company: 5,
15493 customers_per_company: 5,
15494 materials_per_company: 5,
15495 assets_per_company: 5,
15496 employees_per_company: 5,
15497 ..Default::default()
15498 };
15499
15500 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15501 let result = orchestrator.generate().unwrap();
15502
15503 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
15506 assert!(result.statistics.companies_count == 2);
15507 }
15508
15509 #[test]
15510 fn test_empty_master_data_skips_document_flows() {
15511 let config = create_test_config();
15512 let phase_config = PhaseConfig {
15513 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
15516 inject_anomalies: false,
15517 show_progress: false,
15518 ..Default::default()
15519 };
15520
15521 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15522 let result = orchestrator.generate().unwrap();
15523
15524 assert!(result.document_flows.p2p_chains.is_empty());
15526 assert!(result.document_flows.o2c_chains.is_empty());
15527 }
15528
15529 #[test]
15530 fn test_journal_entry_line_item_count() {
15531 let config = create_test_config();
15532 let phase_config = PhaseConfig {
15533 generate_master_data: false,
15534 generate_document_flows: false,
15535 generate_journal_entries: true,
15536 inject_anomalies: false,
15537 show_progress: false,
15538 ..Default::default()
15539 };
15540
15541 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15542 let result = orchestrator.generate().unwrap();
15543
15544 let calculated_line_items: u64 = result
15546 .journal_entries
15547 .iter()
15548 .map(|e| e.line_count() as u64)
15549 .sum();
15550 assert_eq!(result.statistics.total_line_items, calculated_line_items);
15551 }
15552
15553 #[test]
15554 fn test_audit_generation() {
15555 let config = create_test_config();
15556 let phase_config = PhaseConfig {
15557 generate_master_data: false,
15558 generate_document_flows: false,
15559 generate_journal_entries: true,
15560 inject_anomalies: false,
15561 show_progress: false,
15562 generate_audit: true,
15563 audit_engagements: 2,
15564 workpapers_per_engagement: 5,
15565 evidence_per_workpaper: 2,
15566 risks_per_engagement: 3,
15567 findings_per_engagement: 2,
15568 judgments_per_engagement: 2,
15569 ..Default::default()
15570 };
15571
15572 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15573 let result = orchestrator.generate().unwrap();
15574
15575 assert_eq!(result.audit.engagements.len(), 2);
15577 assert!(!result.audit.workpapers.is_empty());
15578 assert!(!result.audit.evidence.is_empty());
15579 assert!(!result.audit.risk_assessments.is_empty());
15580 assert!(!result.audit.findings.is_empty());
15581 assert!(!result.audit.judgments.is_empty());
15582
15583 assert!(
15585 !result.audit.confirmations.is_empty(),
15586 "ISA 505 confirmations should be generated"
15587 );
15588 assert!(
15589 !result.audit.confirmation_responses.is_empty(),
15590 "ISA 505 confirmation responses should be generated"
15591 );
15592 assert!(
15593 !result.audit.procedure_steps.is_empty(),
15594 "ISA 330 procedure steps should be generated"
15595 );
15596 assert!(
15598 !result.audit.analytical_results.is_empty(),
15599 "ISA 520 analytical procedures should be generated"
15600 );
15601 assert!(
15602 !result.audit.ia_functions.is_empty(),
15603 "ISA 610 IA functions should be generated (one per engagement)"
15604 );
15605 assert!(
15606 !result.audit.related_parties.is_empty(),
15607 "ISA 550 related parties should be generated"
15608 );
15609
15610 assert_eq!(
15612 result.statistics.audit_engagement_count,
15613 result.audit.engagements.len()
15614 );
15615 assert_eq!(
15616 result.statistics.audit_workpaper_count,
15617 result.audit.workpapers.len()
15618 );
15619 assert_eq!(
15620 result.statistics.audit_evidence_count,
15621 result.audit.evidence.len()
15622 );
15623 assert_eq!(
15624 result.statistics.audit_risk_count,
15625 result.audit.risk_assessments.len()
15626 );
15627 assert_eq!(
15628 result.statistics.audit_finding_count,
15629 result.audit.findings.len()
15630 );
15631 assert_eq!(
15632 result.statistics.audit_judgment_count,
15633 result.audit.judgments.len()
15634 );
15635 assert_eq!(
15636 result.statistics.audit_confirmation_count,
15637 result.audit.confirmations.len()
15638 );
15639 assert_eq!(
15640 result.statistics.audit_confirmation_response_count,
15641 result.audit.confirmation_responses.len()
15642 );
15643 assert_eq!(
15644 result.statistics.audit_procedure_step_count,
15645 result.audit.procedure_steps.len()
15646 );
15647 assert_eq!(
15648 result.statistics.audit_sample_count,
15649 result.audit.samples.len()
15650 );
15651 assert_eq!(
15652 result.statistics.audit_analytical_result_count,
15653 result.audit.analytical_results.len()
15654 );
15655 assert_eq!(
15656 result.statistics.audit_ia_function_count,
15657 result.audit.ia_functions.len()
15658 );
15659 assert_eq!(
15660 result.statistics.audit_ia_report_count,
15661 result.audit.ia_reports.len()
15662 );
15663 assert_eq!(
15664 result.statistics.audit_related_party_count,
15665 result.audit.related_parties.len()
15666 );
15667 assert_eq!(
15668 result.statistics.audit_related_party_transaction_count,
15669 result.audit.related_party_transactions.len()
15670 );
15671 }
15672
15673 #[test]
15674 fn test_new_phases_disabled_by_default() {
15675 let config = create_test_config();
15676 assert!(!config.llm.enabled);
15678 assert!(!config.diffusion.enabled);
15679 assert!(!config.causal.enabled);
15680
15681 let phase_config = PhaseConfig {
15682 generate_master_data: false,
15683 generate_document_flows: false,
15684 generate_journal_entries: true,
15685 inject_anomalies: false,
15686 show_progress: false,
15687 ..Default::default()
15688 };
15689
15690 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15691 let result = orchestrator.generate().unwrap();
15692
15693 assert_eq!(result.statistics.llm_enrichment_ms, 0);
15695 assert_eq!(result.statistics.llm_vendors_enriched, 0);
15696 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
15697 assert_eq!(result.statistics.diffusion_samples_generated, 0);
15698 assert_eq!(result.statistics.causal_generation_ms, 0);
15699 assert_eq!(result.statistics.causal_samples_generated, 0);
15700 assert!(result.statistics.causal_validation_passed.is_none());
15701 assert_eq!(result.statistics.counterfactual_pair_count, 0);
15702 assert!(result.counterfactual_pairs.is_empty());
15703 }
15704
15705 #[test]
15706 fn test_counterfactual_generation_enabled() {
15707 let config = create_test_config();
15708 let phase_config = PhaseConfig {
15709 generate_master_data: false,
15710 generate_document_flows: false,
15711 generate_journal_entries: true,
15712 inject_anomalies: false,
15713 show_progress: false,
15714 generate_counterfactuals: true,
15715 generate_period_close: false, ..Default::default()
15717 };
15718
15719 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15720 let result = orchestrator.generate().unwrap();
15721
15722 if !result.journal_entries.is_empty() {
15724 assert_eq!(
15725 result.counterfactual_pairs.len(),
15726 result.journal_entries.len()
15727 );
15728 assert_eq!(
15729 result.statistics.counterfactual_pair_count,
15730 result.journal_entries.len()
15731 );
15732 let ids: std::collections::HashSet<_> = result
15734 .counterfactual_pairs
15735 .iter()
15736 .map(|p| p.pair_id.clone())
15737 .collect();
15738 assert_eq!(ids.len(), result.counterfactual_pairs.len());
15739 }
15740 }
15741
15742 #[test]
15743 fn test_llm_enrichment_enabled() {
15744 let mut config = create_test_config();
15745 config.llm.enabled = true;
15746 config.llm.max_vendor_enrichments = 3;
15747
15748 let phase_config = PhaseConfig {
15749 generate_master_data: true,
15750 generate_document_flows: false,
15751 generate_journal_entries: false,
15752 inject_anomalies: false,
15753 show_progress: false,
15754 vendors_per_company: 5,
15755 customers_per_company: 3,
15756 materials_per_company: 3,
15757 assets_per_company: 3,
15758 employees_per_company: 3,
15759 ..Default::default()
15760 };
15761
15762 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15763 let result = orchestrator.generate().unwrap();
15764
15765 assert!(result.statistics.llm_vendors_enriched > 0);
15767 assert!(result.statistics.llm_vendors_enriched <= 3);
15768 }
15769
15770 #[test]
15771 fn test_diffusion_enhancement_enabled() {
15772 let mut config = create_test_config();
15773 config.diffusion.enabled = true;
15774 config.diffusion.n_steps = 50;
15775 config.diffusion.sample_size = 20;
15776
15777 let phase_config = PhaseConfig {
15778 generate_master_data: false,
15779 generate_document_flows: false,
15780 generate_journal_entries: true,
15781 inject_anomalies: false,
15782 show_progress: false,
15783 ..Default::default()
15784 };
15785
15786 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15787 let result = orchestrator.generate().unwrap();
15788
15789 assert_eq!(result.statistics.diffusion_samples_generated, 20);
15791 }
15792
15793 #[test]
15794 fn test_causal_overlay_enabled() {
15795 let mut config = create_test_config();
15796 config.causal.enabled = true;
15797 config.causal.template = "fraud_detection".to_string();
15798 config.causal.sample_size = 100;
15799 config.causal.validate = true;
15800
15801 let phase_config = PhaseConfig {
15802 generate_master_data: false,
15803 generate_document_flows: false,
15804 generate_journal_entries: true,
15805 inject_anomalies: false,
15806 show_progress: false,
15807 ..Default::default()
15808 };
15809
15810 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15811 let result = orchestrator.generate().unwrap();
15812
15813 assert_eq!(result.statistics.causal_samples_generated, 100);
15815 assert!(result.statistics.causal_validation_passed.is_some());
15817 }
15818
15819 #[test]
15820 fn test_causal_overlay_revenue_cycle_template() {
15821 let mut config = create_test_config();
15822 config.causal.enabled = true;
15823 config.causal.template = "revenue_cycle".to_string();
15824 config.causal.sample_size = 50;
15825 config.causal.validate = false;
15826
15827 let phase_config = PhaseConfig {
15828 generate_master_data: false,
15829 generate_document_flows: false,
15830 generate_journal_entries: true,
15831 inject_anomalies: false,
15832 show_progress: false,
15833 ..Default::default()
15834 };
15835
15836 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15837 let result = orchestrator.generate().unwrap();
15838
15839 assert_eq!(result.statistics.causal_samples_generated, 50);
15841 assert!(result.statistics.causal_validation_passed.is_none());
15843 }
15844
15845 #[test]
15846 fn test_all_new_phases_enabled_together() {
15847 let mut config = create_test_config();
15848 config.llm.enabled = true;
15849 config.llm.max_vendor_enrichments = 2;
15850 config.diffusion.enabled = true;
15851 config.diffusion.n_steps = 20;
15852 config.diffusion.sample_size = 10;
15853 config.causal.enabled = true;
15854 config.causal.sample_size = 50;
15855 config.causal.validate = true;
15856
15857 let phase_config = PhaseConfig {
15858 generate_master_data: true,
15859 generate_document_flows: false,
15860 generate_journal_entries: true,
15861 inject_anomalies: false,
15862 show_progress: false,
15863 vendors_per_company: 5,
15864 customers_per_company: 3,
15865 materials_per_company: 3,
15866 assets_per_company: 3,
15867 employees_per_company: 3,
15868 ..Default::default()
15869 };
15870
15871 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15872 let result = orchestrator.generate().unwrap();
15873
15874 assert!(result.statistics.llm_vendors_enriched > 0);
15876 assert_eq!(result.statistics.diffusion_samples_generated, 10);
15877 assert_eq!(result.statistics.causal_samples_generated, 50);
15878 assert!(result.statistics.causal_validation_passed.is_some());
15879 }
15880
15881 #[test]
15882 fn test_statistics_serialization_with_new_fields() {
15883 let stats = EnhancedGenerationStatistics {
15884 total_entries: 100,
15885 total_line_items: 500,
15886 llm_enrichment_ms: 42,
15887 llm_vendors_enriched: 10,
15888 diffusion_enhancement_ms: 100,
15889 diffusion_samples_generated: 50,
15890 causal_generation_ms: 200,
15891 causal_samples_generated: 100,
15892 causal_validation_passed: Some(true),
15893 ..Default::default()
15894 };
15895
15896 let json = serde_json::to_string(&stats).unwrap();
15897 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
15898
15899 assert_eq!(deserialized.llm_enrichment_ms, 42);
15900 assert_eq!(deserialized.llm_vendors_enriched, 10);
15901 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
15902 assert_eq!(deserialized.diffusion_samples_generated, 50);
15903 assert_eq!(deserialized.causal_generation_ms, 200);
15904 assert_eq!(deserialized.causal_samples_generated, 100);
15905 assert_eq!(deserialized.causal_validation_passed, Some(true));
15906 }
15907
15908 #[test]
15909 fn test_statistics_backward_compat_deserialization() {
15910 let old_json = r#"{
15912 "total_entries": 100,
15913 "total_line_items": 500,
15914 "accounts_count": 50,
15915 "companies_count": 1,
15916 "period_months": 12,
15917 "vendor_count": 10,
15918 "customer_count": 20,
15919 "material_count": 15,
15920 "asset_count": 5,
15921 "employee_count": 8,
15922 "p2p_chain_count": 5,
15923 "o2c_chain_count": 5,
15924 "ap_invoice_count": 5,
15925 "ar_invoice_count": 5,
15926 "ocpm_event_count": 0,
15927 "ocpm_object_count": 0,
15928 "ocpm_case_count": 0,
15929 "audit_engagement_count": 0,
15930 "audit_workpaper_count": 0,
15931 "audit_evidence_count": 0,
15932 "audit_risk_count": 0,
15933 "audit_finding_count": 0,
15934 "audit_judgment_count": 0,
15935 "anomalies_injected": 0,
15936 "data_quality_issues": 0,
15937 "banking_customer_count": 0,
15938 "banking_account_count": 0,
15939 "banking_transaction_count": 0,
15940 "banking_suspicious_count": 0,
15941 "graph_export_count": 0,
15942 "graph_node_count": 0,
15943 "graph_edge_count": 0
15944 }"#;
15945
15946 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
15947
15948 assert_eq!(stats.llm_enrichment_ms, 0);
15950 assert_eq!(stats.llm_vendors_enriched, 0);
15951 assert_eq!(stats.diffusion_enhancement_ms, 0);
15952 assert_eq!(stats.diffusion_samples_generated, 0);
15953 assert_eq!(stats.causal_generation_ms, 0);
15954 assert_eq!(stats.causal_samples_generated, 0);
15955 assert!(stats.causal_validation_passed.is_none());
15956 }
15957}