1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 MaterialGenerator,
117 O2CDocumentChain,
118 O2CGenerator,
119 O2CGeneratorConfig,
120 O2CPaymentBehavior,
121 P2PDocumentChain,
122 P2PGenerator,
124 P2PGeneratorConfig,
125 P2PPaymentBehavior,
126 PaymentReference,
127 ProvisionGenerator,
129 QualificationGenerator,
130 RfxGenerator,
131 RiskAssessmentGenerator,
132 RunningBalanceTracker,
134 ScorecardGenerator,
135 SegmentGenerator,
137 SegmentSeed,
138 SourcingProjectGenerator,
139 SpendAnalysisGenerator,
140 ValidationError,
141 VendorGenerator,
143 WorkpaperGenerator,
144};
145use datasynth_graph::{
146 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
147 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
148 TransactionGraphConfig,
149};
150use datasynth_ocpm::{
151 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
152 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
153 OcpmUuidFactory, P2pDocuments, S2cDocuments,
154};
155
156use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
157use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
158use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
159use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
160use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
161use datasynth_core::models::documents::PaymentMethod;
162use datasynth_core::models::IndustrySector;
163use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
164use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
165use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
166use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
167use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
168use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
169use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
170use datasynth_generators::audit::sample_generator::SampleGenerator;
171use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
172use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
173use datasynth_generators::coa_generator::CoAFramework;
174use datasynth_generators::llm_enrichment::VendorLlmEnricher;
175use rayon::prelude::*;
176
177fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
183 let payment_behavior = &schema_config.payment_behavior;
184 let late_dist = &payment_behavior.late_payment_days_distribution;
185
186 P2PGeneratorConfig {
187 three_way_match_rate: schema_config.three_way_match_rate,
188 partial_delivery_rate: schema_config.partial_delivery_rate,
189 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
190 price_variance_rate: schema_config.price_variance_rate,
191 max_price_variance_percent: schema_config.max_price_variance_percent,
192 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
193 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
194 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
195 payment_method_distribution: vec![
196 (PaymentMethod::BankTransfer, 0.60),
197 (PaymentMethod::Check, 0.25),
198 (PaymentMethod::Wire, 0.10),
199 (PaymentMethod::CreditCard, 0.05),
200 ],
201 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
202 payment_behavior: P2PPaymentBehavior {
203 late_payment_rate: payment_behavior.late_payment_rate,
204 late_payment_distribution: LatePaymentDistribution {
205 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
206 late_8_to_14: late_dist.late_8_to_14,
207 very_late_15_to_30: late_dist.very_late_15_to_30,
208 severely_late_31_to_60: late_dist.severely_late_31_to_60,
209 extremely_late_over_60: late_dist.extremely_late_over_60,
210 },
211 partial_payment_rate: payment_behavior.partial_payment_rate,
212 payment_correction_rate: payment_behavior.payment_correction_rate,
213 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
214 },
215 }
216}
217
218fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
220 let payment_behavior = &schema_config.payment_behavior;
221
222 O2CGeneratorConfig {
223 credit_check_failure_rate: schema_config.credit_check_failure_rate,
224 partial_shipment_rate: schema_config.partial_shipment_rate,
225 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
226 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
227 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
228 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
229 bad_debt_rate: schema_config.bad_debt_rate,
230 returns_rate: schema_config.return_rate,
231 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
232 payment_method_distribution: vec![
233 (PaymentMethod::BankTransfer, 0.50),
234 (PaymentMethod::Check, 0.30),
235 (PaymentMethod::Wire, 0.15),
236 (PaymentMethod::CreditCard, 0.05),
237 ],
238 payment_behavior: O2CPaymentBehavior {
239 partial_payment_rate: payment_behavior.partial_payments.rate,
240 short_payment_rate: payment_behavior.short_payments.rate,
241 max_short_percent: payment_behavior.short_payments.max_short_percent,
242 on_account_rate: payment_behavior.on_account_payments.rate,
243 payment_correction_rate: payment_behavior.payment_corrections.rate,
244 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
245 },
246 }
247}
248
249#[derive(Debug, Clone)]
251pub struct PhaseConfig {
252 pub generate_master_data: bool,
254 pub generate_document_flows: bool,
256 pub generate_ocpm_events: bool,
258 pub generate_journal_entries: bool,
260 pub inject_anomalies: bool,
262 pub inject_data_quality: bool,
264 pub validate_balances: bool,
266 pub show_progress: bool,
268 pub vendors_per_company: usize,
270 pub customers_per_company: usize,
272 pub materials_per_company: usize,
274 pub assets_per_company: usize,
276 pub employees_per_company: usize,
278 pub p2p_chains: usize,
280 pub o2c_chains: usize,
282 pub generate_audit: bool,
284 pub audit_engagements: usize,
286 pub workpapers_per_engagement: usize,
288 pub evidence_per_workpaper: usize,
290 pub risks_per_engagement: usize,
292 pub findings_per_engagement: usize,
294 pub judgments_per_engagement: usize,
296 pub generate_banking: bool,
298 pub generate_graph_export: bool,
300 pub generate_sourcing: bool,
302 pub generate_bank_reconciliation: bool,
304 pub generate_financial_statements: bool,
306 pub generate_accounting_standards: bool,
308 pub generate_manufacturing: bool,
310 pub generate_sales_kpi_budgets: bool,
312 pub generate_tax: bool,
314 pub generate_esg: bool,
316 pub generate_intercompany: bool,
318 pub generate_evolution_events: bool,
320 pub generate_counterfactuals: bool,
322 pub generate_compliance_regulations: bool,
324 pub generate_period_close: bool,
326 pub generate_hr: bool,
328 pub generate_treasury: bool,
330 pub generate_project_accounting: bool,
332}
333
334impl Default for PhaseConfig {
335 fn default() -> Self {
336 Self {
337 generate_master_data: true,
338 generate_document_flows: true,
339 generate_ocpm_events: false, generate_journal_entries: true,
341 inject_anomalies: false,
342 inject_data_quality: false, validate_balances: true,
344 show_progress: true,
345 vendors_per_company: 50,
346 customers_per_company: 100,
347 materials_per_company: 200,
348 assets_per_company: 50,
349 employees_per_company: 100,
350 p2p_chains: 100,
351 o2c_chains: 100,
352 generate_audit: false, audit_engagements: 5,
354 workpapers_per_engagement: 20,
355 evidence_per_workpaper: 5,
356 risks_per_engagement: 15,
357 findings_per_engagement: 8,
358 judgments_per_engagement: 10,
359 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
378 }
379}
380
381impl PhaseConfig {
382 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
387 Self {
388 generate_master_data: true,
390 generate_document_flows: true,
391 generate_journal_entries: true,
392 validate_balances: true,
393 generate_period_close: true,
394 generate_evolution_events: true,
395 show_progress: true,
396
397 generate_audit: cfg.audit.enabled,
399 generate_banking: cfg.banking.enabled,
400 generate_graph_export: cfg.graph_export.enabled,
401 generate_sourcing: cfg.source_to_pay.enabled,
402 generate_intercompany: cfg.intercompany.enabled,
403 generate_financial_statements: cfg.financial_reporting.enabled,
404 generate_bank_reconciliation: cfg.financial_reporting.enabled,
405 generate_accounting_standards: cfg.accounting_standards.enabled,
406 generate_manufacturing: cfg.manufacturing.enabled,
407 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
408 generate_tax: cfg.tax.enabled,
409 generate_esg: cfg.esg.enabled,
410 generate_ocpm_events: cfg.ocpm.enabled,
411 generate_compliance_regulations: cfg.compliance_regulations.enabled,
412 generate_hr: cfg.hr.enabled,
413 generate_treasury: cfg.treasury.enabled,
414 generate_project_accounting: cfg.project_accounting.enabled,
415
416 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
418
419 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
420 inject_data_quality: cfg.data_quality.enabled,
421
422 vendors_per_company: 50,
424 customers_per_company: 100,
425 materials_per_company: 200,
426 assets_per_company: 50,
427 employees_per_company: 100,
428 p2p_chains: 100,
429 o2c_chains: 100,
430 audit_engagements: 5,
431 workpapers_per_engagement: 20,
432 evidence_per_workpaper: 5,
433 risks_per_engagement: 15,
434 findings_per_engagement: 8,
435 judgments_per_engagement: 10,
436 }
437 }
438}
439
440#[derive(Debug, Clone, Default)]
442pub struct MasterDataSnapshot {
443 pub vendors: Vec<Vendor>,
445 pub customers: Vec<Customer>,
447 pub materials: Vec<Material>,
449 pub assets: Vec<FixedAsset>,
451 pub employees: Vec<Employee>,
453 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
455 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
457}
458
459#[derive(Debug, Clone)]
461pub struct HypergraphExportInfo {
462 pub node_count: usize,
464 pub edge_count: usize,
466 pub hyperedge_count: usize,
468 pub output_path: PathBuf,
470}
471
472#[derive(Debug, Clone, Default)]
474pub struct DocumentFlowSnapshot {
475 pub p2p_chains: Vec<P2PDocumentChain>,
477 pub o2c_chains: Vec<O2CDocumentChain>,
479 pub purchase_orders: Vec<documents::PurchaseOrder>,
481 pub goods_receipts: Vec<documents::GoodsReceipt>,
483 pub vendor_invoices: Vec<documents::VendorInvoice>,
485 pub sales_orders: Vec<documents::SalesOrder>,
487 pub deliveries: Vec<documents::Delivery>,
489 pub customer_invoices: Vec<documents::CustomerInvoice>,
491 pub payments: Vec<documents::Payment>,
493 pub document_references: Vec<documents::DocumentReference>,
496}
497
498#[derive(Debug, Clone, Default)]
500pub struct SubledgerSnapshot {
501 pub ap_invoices: Vec<APInvoice>,
503 pub ar_invoices: Vec<ARInvoice>,
505 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
507 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
509 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
511 pub ar_aging_reports: Vec<ARAgingReport>,
513 pub ap_aging_reports: Vec<APAgingReport>,
515 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
517 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
519 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
521 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
523}
524
525#[derive(Debug, Clone, Default)]
527pub struct OcpmSnapshot {
528 pub event_log: Option<OcpmEventLog>,
530 pub event_count: usize,
532 pub object_count: usize,
534 pub case_count: usize,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct AuditSnapshot {
541 pub engagements: Vec<AuditEngagement>,
543 pub workpapers: Vec<Workpaper>,
545 pub evidence: Vec<AuditEvidence>,
547 pub risk_assessments: Vec<RiskAssessment>,
549 pub findings: Vec<AuditFinding>,
551 pub judgments: Vec<ProfessionalJudgment>,
553 pub confirmations: Vec<ExternalConfirmation>,
555 pub confirmation_responses: Vec<ConfirmationResponse>,
557 pub procedure_steps: Vec<AuditProcedureStep>,
559 pub samples: Vec<AuditSample>,
561 pub analytical_results: Vec<AnalyticalProcedureResult>,
563 pub ia_functions: Vec<InternalAuditFunction>,
565 pub ia_reports: Vec<InternalAuditReport>,
567 pub related_parties: Vec<RelatedParty>,
569 pub related_party_transactions: Vec<RelatedPartyTransaction>,
571 pub component_auditors: Vec<ComponentAuditor>,
574 pub group_audit_plan: Option<GroupAuditPlan>,
576 pub component_instructions: Vec<ComponentInstruction>,
578 pub component_reports: Vec<ComponentAuditorReport>,
580 pub engagement_letters: Vec<EngagementLetter>,
583 pub subsequent_events: Vec<SubsequentEvent>,
586 pub service_organizations: Vec<ServiceOrganization>,
589 pub soc_reports: Vec<SocReport>,
591 pub user_entity_controls: Vec<UserEntityControl>,
593 pub going_concern_assessments:
596 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
597 pub accounting_estimates:
600 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
601 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
604 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
606 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
609 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
611 pub materiality_calculations:
614 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
615 pub combined_risk_assessments:
618 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
619 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
622 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
624 pub significant_transaction_classes:
627 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
628 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
631 pub analytical_relationships:
634 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
635 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
638 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
641 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
644}
645
646#[derive(Debug, Clone, Default)]
648pub struct BankingSnapshot {
649 pub customers: Vec<BankingCustomer>,
651 pub accounts: Vec<BankAccount>,
653 pub transactions: Vec<BankTransaction>,
655 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
657 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
659 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
661 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
663 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
665 pub suspicious_count: usize,
667 pub scenario_count: usize,
669}
670
671#[derive(Debug, Clone, Default, Serialize)]
673pub struct GraphExportSnapshot {
674 pub exported: bool,
676 pub graph_count: usize,
678 pub exports: HashMap<String, GraphExportInfo>,
680}
681
682#[derive(Debug, Clone, Serialize)]
684pub struct GraphExportInfo {
685 pub name: String,
687 pub format: String,
689 pub output_path: PathBuf,
691 pub node_count: usize,
693 pub edge_count: usize,
695}
696
697#[derive(Debug, Clone, Default)]
699pub struct SourcingSnapshot {
700 pub spend_analyses: Vec<SpendAnalysis>,
702 pub sourcing_projects: Vec<SourcingProject>,
704 pub qualifications: Vec<SupplierQualification>,
706 pub rfx_events: Vec<RfxEvent>,
708 pub bids: Vec<SupplierBid>,
710 pub bid_evaluations: Vec<BidEvaluation>,
712 pub contracts: Vec<ProcurementContract>,
714 pub catalog_items: Vec<CatalogItem>,
716 pub scorecards: Vec<SupplierScorecard>,
718}
719
720#[derive(Debug, Clone, Serialize, Deserialize)]
722pub struct PeriodTrialBalance {
723 pub fiscal_year: u16,
725 pub fiscal_period: u8,
727 pub period_start: NaiveDate,
729 pub period_end: NaiveDate,
731 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
733}
734
735#[derive(Debug, Clone, Default)]
737pub struct FinancialReportingSnapshot {
738 pub financial_statements: Vec<FinancialStatement>,
741 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
744 pub consolidated_statements: Vec<FinancialStatement>,
746 pub consolidation_schedules: Vec<ConsolidationSchedule>,
748 pub bank_reconciliations: Vec<BankReconciliation>,
750 pub trial_balances: Vec<PeriodTrialBalance>,
752 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
754 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
756 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
758}
759
760#[derive(Debug, Clone, Default)]
762pub struct HrSnapshot {
763 pub payroll_runs: Vec<PayrollRun>,
765 pub payroll_line_items: Vec<PayrollLineItem>,
767 pub time_entries: Vec<TimeEntry>,
769 pub expense_reports: Vec<ExpenseReport>,
771 pub benefit_enrollments: Vec<BenefitEnrollment>,
773 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
775 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
777 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
779 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
781 pub pension_journal_entries: Vec<JournalEntry>,
783 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
785 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
787 pub stock_comp_journal_entries: Vec<JournalEntry>,
789 pub payroll_run_count: usize,
791 pub payroll_line_item_count: usize,
793 pub time_entry_count: usize,
795 pub expense_report_count: usize,
797 pub benefit_enrollment_count: usize,
799 pub pension_plan_count: usize,
801 pub stock_grant_count: usize,
803}
804
805#[derive(Debug, Clone, Default)]
807pub struct AccountingStandardsSnapshot {
808 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
810 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
812 pub business_combinations:
814 Vec<datasynth_core::models::business_combination::BusinessCombination>,
815 pub business_combination_journal_entries: Vec<JournalEntry>,
817 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
819 pub ecl_provision_movements:
821 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
822 pub ecl_journal_entries: Vec<JournalEntry>,
824 pub provisions: Vec<datasynth_core::models::provision::Provision>,
826 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
828 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
830 pub provision_journal_entries: Vec<JournalEntry>,
832 pub currency_translation_results:
834 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
835 pub revenue_contract_count: usize,
837 pub impairment_test_count: usize,
839 pub business_combination_count: usize,
841 pub ecl_model_count: usize,
843 pub provision_count: usize,
845 pub currency_translation_count: usize,
847}
848
849#[derive(Debug, Clone, Default)]
851pub struct ComplianceRegulationsSnapshot {
852 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
854 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
856 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
858 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
860 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
862 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
864 pub compliance_graph: Option<datasynth_graph::Graph>,
866}
867
868#[derive(Debug, Clone, Default)]
870pub struct ManufacturingSnapshot {
871 pub production_orders: Vec<ProductionOrder>,
873 pub quality_inspections: Vec<QualityInspection>,
875 pub cycle_counts: Vec<CycleCount>,
877 pub bom_components: Vec<BomComponent>,
879 pub inventory_movements: Vec<InventoryMovement>,
881 pub production_order_count: usize,
883 pub quality_inspection_count: usize,
885 pub cycle_count_count: usize,
887 pub bom_component_count: usize,
889 pub inventory_movement_count: usize,
891}
892
893#[derive(Debug, Clone, Default)]
895pub struct SalesKpiBudgetsSnapshot {
896 pub sales_quotes: Vec<SalesQuote>,
898 pub kpis: Vec<ManagementKpi>,
900 pub budgets: Vec<Budget>,
902 pub sales_quote_count: usize,
904 pub kpi_count: usize,
906 pub budget_line_count: usize,
908}
909
910#[derive(Debug, Clone, Default)]
912pub struct AnomalyLabels {
913 pub labels: Vec<LabeledAnomaly>,
915 pub summary: Option<AnomalySummary>,
917 pub by_type: HashMap<String, usize>,
919}
920
921#[derive(Debug, Clone, Default)]
923pub struct BalanceValidationResult {
924 pub validated: bool,
926 pub is_balanced: bool,
928 pub entries_processed: u64,
930 pub total_debits: rust_decimal::Decimal,
932 pub total_credits: rust_decimal::Decimal,
934 pub accounts_tracked: usize,
936 pub companies_tracked: usize,
938 pub validation_errors: Vec<ValidationError>,
940 pub has_unbalanced_entries: bool,
942}
943
944#[derive(Debug, Clone, Default)]
946pub struct TaxSnapshot {
947 pub jurisdictions: Vec<TaxJurisdiction>,
949 pub codes: Vec<TaxCode>,
951 pub tax_lines: Vec<TaxLine>,
953 pub tax_returns: Vec<TaxReturn>,
955 pub tax_provisions: Vec<TaxProvision>,
957 pub withholding_records: Vec<WithholdingTaxRecord>,
959 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
961 pub jurisdiction_count: usize,
963 pub code_count: usize,
965 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
967}
968
969#[derive(Debug, Clone, Default, Serialize, Deserialize)]
971pub struct IntercompanySnapshot {
972 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
974 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
976 pub seller_journal_entries: Vec<JournalEntry>,
978 pub buyer_journal_entries: Vec<JournalEntry>,
980 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
982 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
984 pub matched_pair_count: usize,
986 pub elimination_entry_count: usize,
988 pub match_rate: f64,
990}
991
992#[derive(Debug, Clone, Default)]
994pub struct EsgSnapshot {
995 pub emissions: Vec<EmissionRecord>,
997 pub energy: Vec<EnergyConsumption>,
999 pub water: Vec<WaterUsage>,
1001 pub waste: Vec<WasteRecord>,
1003 pub diversity: Vec<WorkforceDiversityMetric>,
1005 pub pay_equity: Vec<PayEquityMetric>,
1007 pub safety_incidents: Vec<SafetyIncident>,
1009 pub safety_metrics: Vec<SafetyMetric>,
1011 pub governance: Vec<GovernanceMetric>,
1013 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1015 pub materiality: Vec<MaterialityAssessment>,
1017 pub disclosures: Vec<EsgDisclosure>,
1019 pub climate_scenarios: Vec<ClimateScenario>,
1021 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1023 pub emission_count: usize,
1025 pub disclosure_count: usize,
1027}
1028
1029#[derive(Debug, Clone, Default)]
1031pub struct TreasurySnapshot {
1032 pub cash_positions: Vec<CashPosition>,
1034 pub cash_forecasts: Vec<CashForecast>,
1036 pub cash_pools: Vec<CashPool>,
1038 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1040 pub hedging_instruments: Vec<HedgingInstrument>,
1042 pub hedge_relationships: Vec<HedgeRelationship>,
1044 pub debt_instruments: Vec<DebtInstrument>,
1046 pub bank_guarantees: Vec<BankGuarantee>,
1048 pub netting_runs: Vec<NettingRun>,
1050 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1052}
1053
1054#[derive(Debug, Clone, Default)]
1056pub struct ProjectAccountingSnapshot {
1057 pub projects: Vec<Project>,
1059 pub cost_lines: Vec<ProjectCostLine>,
1061 pub revenue_records: Vec<ProjectRevenue>,
1063 pub earned_value_metrics: Vec<EarnedValueMetric>,
1065 pub change_orders: Vec<ChangeOrder>,
1067 pub milestones: Vec<ProjectMilestone>,
1069}
1070
1071#[derive(Debug, Default)]
1073pub struct EnhancedGenerationResult {
1074 pub chart_of_accounts: ChartOfAccounts,
1076 pub master_data: MasterDataSnapshot,
1078 pub document_flows: DocumentFlowSnapshot,
1080 pub subledger: SubledgerSnapshot,
1082 pub ocpm: OcpmSnapshot,
1084 pub audit: AuditSnapshot,
1086 pub banking: BankingSnapshot,
1088 pub graph_export: GraphExportSnapshot,
1090 pub sourcing: SourcingSnapshot,
1092 pub financial_reporting: FinancialReportingSnapshot,
1094 pub hr: HrSnapshot,
1096 pub accounting_standards: AccountingStandardsSnapshot,
1098 pub manufacturing: ManufacturingSnapshot,
1100 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1102 pub tax: TaxSnapshot,
1104 pub esg: EsgSnapshot,
1106 pub treasury: TreasurySnapshot,
1108 pub project_accounting: ProjectAccountingSnapshot,
1110 pub process_evolution: Vec<ProcessEvolutionEvent>,
1112 pub organizational_events: Vec<OrganizationalEvent>,
1114 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1116 pub intercompany: IntercompanySnapshot,
1118 pub journal_entries: Vec<JournalEntry>,
1120 pub anomaly_labels: AnomalyLabels,
1122 pub balance_validation: BalanceValidationResult,
1124 pub data_quality_stats: DataQualityStats,
1126 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1128 pub statistics: EnhancedGenerationStatistics,
1130 pub lineage: Option<super::lineage::LineageGraph>,
1132 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1134 pub internal_controls: Vec<InternalControl>,
1136 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1140 pub opening_balances: Vec<GeneratedOpeningBalance>,
1142 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1144 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1146 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1148 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1150 pub temporal_vendor_chains:
1152 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1153 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1155 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1157 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1159 pub compliance_regulations: ComplianceRegulationsSnapshot,
1161}
1162
1163#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1165pub struct EnhancedGenerationStatistics {
1166 pub total_entries: u64,
1168 pub total_line_items: u64,
1170 pub accounts_count: usize,
1172 pub companies_count: usize,
1174 pub period_months: u32,
1176 pub vendor_count: usize,
1178 pub customer_count: usize,
1179 pub material_count: usize,
1180 pub asset_count: usize,
1181 pub employee_count: usize,
1182 pub p2p_chain_count: usize,
1184 pub o2c_chain_count: usize,
1185 pub ap_invoice_count: usize,
1187 pub ar_invoice_count: usize,
1188 pub ocpm_event_count: usize,
1190 pub ocpm_object_count: usize,
1191 pub ocpm_case_count: usize,
1192 pub audit_engagement_count: usize,
1194 pub audit_workpaper_count: usize,
1195 pub audit_evidence_count: usize,
1196 pub audit_risk_count: usize,
1197 pub audit_finding_count: usize,
1198 pub audit_judgment_count: usize,
1199 #[serde(default)]
1201 pub audit_confirmation_count: usize,
1202 #[serde(default)]
1203 pub audit_confirmation_response_count: usize,
1204 #[serde(default)]
1206 pub audit_procedure_step_count: usize,
1207 #[serde(default)]
1208 pub audit_sample_count: usize,
1209 #[serde(default)]
1211 pub audit_analytical_result_count: usize,
1212 #[serde(default)]
1214 pub audit_ia_function_count: usize,
1215 #[serde(default)]
1216 pub audit_ia_report_count: usize,
1217 #[serde(default)]
1219 pub audit_related_party_count: usize,
1220 #[serde(default)]
1221 pub audit_related_party_transaction_count: usize,
1222 pub anomalies_injected: usize,
1224 pub data_quality_issues: usize,
1226 pub banking_customer_count: usize,
1228 pub banking_account_count: usize,
1229 pub banking_transaction_count: usize,
1230 pub banking_suspicious_count: usize,
1231 pub graph_export_count: usize,
1233 pub graph_node_count: usize,
1234 pub graph_edge_count: usize,
1235 #[serde(default)]
1237 pub llm_enrichment_ms: u64,
1238 #[serde(default)]
1240 pub llm_vendors_enriched: usize,
1241 #[serde(default)]
1243 pub diffusion_enhancement_ms: u64,
1244 #[serde(default)]
1246 pub diffusion_samples_generated: usize,
1247 #[serde(default)]
1249 pub causal_generation_ms: u64,
1250 #[serde(default)]
1252 pub causal_samples_generated: usize,
1253 #[serde(default)]
1255 pub causal_validation_passed: Option<bool>,
1256 #[serde(default)]
1258 pub sourcing_project_count: usize,
1259 #[serde(default)]
1260 pub rfx_event_count: usize,
1261 #[serde(default)]
1262 pub bid_count: usize,
1263 #[serde(default)]
1264 pub contract_count: usize,
1265 #[serde(default)]
1266 pub catalog_item_count: usize,
1267 #[serde(default)]
1268 pub scorecard_count: usize,
1269 #[serde(default)]
1271 pub financial_statement_count: usize,
1272 #[serde(default)]
1273 pub bank_reconciliation_count: usize,
1274 #[serde(default)]
1276 pub payroll_run_count: usize,
1277 #[serde(default)]
1278 pub time_entry_count: usize,
1279 #[serde(default)]
1280 pub expense_report_count: usize,
1281 #[serde(default)]
1282 pub benefit_enrollment_count: usize,
1283 #[serde(default)]
1284 pub pension_plan_count: usize,
1285 #[serde(default)]
1286 pub stock_grant_count: usize,
1287 #[serde(default)]
1289 pub revenue_contract_count: usize,
1290 #[serde(default)]
1291 pub impairment_test_count: usize,
1292 #[serde(default)]
1293 pub business_combination_count: usize,
1294 #[serde(default)]
1295 pub ecl_model_count: usize,
1296 #[serde(default)]
1297 pub provision_count: usize,
1298 #[serde(default)]
1300 pub production_order_count: usize,
1301 #[serde(default)]
1302 pub quality_inspection_count: usize,
1303 #[serde(default)]
1304 pub cycle_count_count: usize,
1305 #[serde(default)]
1306 pub bom_component_count: usize,
1307 #[serde(default)]
1308 pub inventory_movement_count: usize,
1309 #[serde(default)]
1311 pub sales_quote_count: usize,
1312 #[serde(default)]
1313 pub kpi_count: usize,
1314 #[serde(default)]
1315 pub budget_line_count: usize,
1316 #[serde(default)]
1318 pub tax_jurisdiction_count: usize,
1319 #[serde(default)]
1320 pub tax_code_count: usize,
1321 #[serde(default)]
1323 pub esg_emission_count: usize,
1324 #[serde(default)]
1325 pub esg_disclosure_count: usize,
1326 #[serde(default)]
1328 pub ic_matched_pair_count: usize,
1329 #[serde(default)]
1330 pub ic_elimination_count: usize,
1331 #[serde(default)]
1333 pub ic_transaction_count: usize,
1334 #[serde(default)]
1336 pub fa_subledger_count: usize,
1337 #[serde(default)]
1339 pub inventory_subledger_count: usize,
1340 #[serde(default)]
1342 pub treasury_debt_instrument_count: usize,
1343 #[serde(default)]
1345 pub treasury_hedging_instrument_count: usize,
1346 #[serde(default)]
1348 pub project_count: usize,
1349 #[serde(default)]
1351 pub project_change_order_count: usize,
1352 #[serde(default)]
1354 pub tax_provision_count: usize,
1355 #[serde(default)]
1357 pub opening_balance_count: usize,
1358 #[serde(default)]
1360 pub subledger_reconciliation_count: usize,
1361 #[serde(default)]
1363 pub tax_line_count: usize,
1364 #[serde(default)]
1366 pub project_cost_line_count: usize,
1367 #[serde(default)]
1369 pub cash_position_count: usize,
1370 #[serde(default)]
1372 pub cash_forecast_count: usize,
1373 #[serde(default)]
1375 pub cash_pool_count: usize,
1376 #[serde(default)]
1378 pub process_evolution_event_count: usize,
1379 #[serde(default)]
1381 pub organizational_event_count: usize,
1382 #[serde(default)]
1384 pub counterfactual_pair_count: usize,
1385 #[serde(default)]
1387 pub red_flag_count: usize,
1388 #[serde(default)]
1390 pub collusion_ring_count: usize,
1391 #[serde(default)]
1393 pub temporal_version_chain_count: usize,
1394 #[serde(default)]
1396 pub entity_relationship_node_count: usize,
1397 #[serde(default)]
1399 pub entity_relationship_edge_count: usize,
1400 #[serde(default)]
1402 pub cross_process_link_count: usize,
1403 #[serde(default)]
1405 pub disruption_event_count: usize,
1406 #[serde(default)]
1408 pub industry_gl_account_count: usize,
1409 #[serde(default)]
1411 pub period_close_je_count: usize,
1412}
1413
1414pub struct EnhancedOrchestrator {
1416 config: GeneratorConfig,
1417 phase_config: PhaseConfig,
1418 coa: Option<Arc<ChartOfAccounts>>,
1419 master_data: MasterDataSnapshot,
1420 seed: u64,
1421 multi_progress: Option<MultiProgress>,
1422 resource_guard: ResourceGuard,
1424 output_path: Option<PathBuf>,
1426 copula_generators: Vec<CopulaGeneratorSpec>,
1428 country_pack_registry: datasynth_core::CountryPackRegistry,
1430 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1432}
1433
1434impl EnhancedOrchestrator {
1435 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1437 datasynth_config::validate_config(&config)?;
1438
1439 let seed = config.global.seed.unwrap_or_else(rand::random);
1440
1441 let resource_guard = Self::build_resource_guard(&config, None);
1443
1444 let country_pack_registry = match &config.country_packs {
1446 Some(cp) => {
1447 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1448 .map_err(|e| SynthError::config(e.to_string()))?
1449 }
1450 None => datasynth_core::CountryPackRegistry::builtin_only()
1451 .map_err(|e| SynthError::config(e.to_string()))?,
1452 };
1453
1454 Ok(Self {
1455 config,
1456 phase_config,
1457 coa: None,
1458 master_data: MasterDataSnapshot::default(),
1459 seed,
1460 multi_progress: None,
1461 resource_guard,
1462 output_path: None,
1463 copula_generators: Vec::new(),
1464 country_pack_registry,
1465 phase_sink: None,
1466 })
1467 }
1468
1469 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1471 Self::new(config, PhaseConfig::default())
1472 }
1473
1474 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1476 self.phase_sink = Some(sink);
1477 self
1478 }
1479
1480 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1482 if let Some(ref sink) = self.phase_sink {
1483 for item in items {
1484 if let Ok(value) = serde_json::to_value(item) {
1485 if let Err(e) = sink.emit(phase, type_name, &value) {
1486 warn!(
1487 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1488 );
1489 }
1490 }
1491 }
1492 if let Err(e) = sink.phase_complete(phase) {
1493 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1494 }
1495 }
1496 }
1497
1498 pub fn with_progress(mut self, show: bool) -> Self {
1500 self.phase_config.show_progress = show;
1501 if show {
1502 self.multi_progress = Some(MultiProgress::new());
1503 }
1504 self
1505 }
1506
1507 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1509 let path = path.into();
1510 self.output_path = Some(path.clone());
1511 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1513 self
1514 }
1515
1516 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1518 &self.country_pack_registry
1519 }
1520
1521 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1523 self.country_pack_registry.get_by_str(country)
1524 }
1525
1526 fn primary_country_code(&self) -> &str {
1529 self.config
1530 .companies
1531 .first()
1532 .map(|c| c.country.as_str())
1533 .unwrap_or("US")
1534 }
1535
1536 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1538 self.country_pack_for(self.primary_country_code())
1539 }
1540
1541 fn resolve_coa_framework(&self) -> CoAFramework {
1543 if self.config.accounting_standards.enabled {
1544 match self.config.accounting_standards.framework {
1545 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1546 return CoAFramework::FrenchPcg;
1547 }
1548 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1549 return CoAFramework::GermanSkr04;
1550 }
1551 _ => {}
1552 }
1553 }
1554 let pack = self.primary_pack();
1556 match pack.accounting.framework.as_str() {
1557 "french_gaap" => CoAFramework::FrenchPcg,
1558 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1559 _ => CoAFramework::UsGaap,
1560 }
1561 }
1562
1563 pub fn has_copulas(&self) -> bool {
1568 !self.copula_generators.is_empty()
1569 }
1570
1571 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1577 &self.copula_generators
1578 }
1579
1580 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1584 &mut self.copula_generators
1585 }
1586
1587 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1591 self.copula_generators
1592 .iter_mut()
1593 .find(|c| c.name == copula_name)
1594 .map(|c| c.generator.sample())
1595 }
1596
1597 pub fn from_fingerprint(
1620 fingerprint_path: &std::path::Path,
1621 phase_config: PhaseConfig,
1622 scale: f64,
1623 ) -> SynthResult<Self> {
1624 info!("Loading fingerprint from: {}", fingerprint_path.display());
1625
1626 let reader = FingerprintReader::new();
1628 let fingerprint = reader
1629 .read_from_file(fingerprint_path)
1630 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1631
1632 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1633 }
1634
1635 pub fn from_fingerprint_data(
1642 fingerprint: Fingerprint,
1643 phase_config: PhaseConfig,
1644 scale: f64,
1645 ) -> SynthResult<Self> {
1646 info!(
1647 "Synthesizing config from fingerprint (version: {}, tables: {})",
1648 fingerprint.manifest.version,
1649 fingerprint.schema.tables.len()
1650 );
1651
1652 let seed: u64 = rand::random();
1654 info!("Fingerprint synthesis seed: {}", seed);
1655
1656 let options = SynthesisOptions {
1658 scale,
1659 seed: Some(seed),
1660 preserve_correlations: true,
1661 inject_anomalies: true,
1662 };
1663 let synthesizer = ConfigSynthesizer::with_options(options);
1664
1665 let synthesis_result = synthesizer
1667 .synthesize_full(&fingerprint, seed)
1668 .map_err(|e| {
1669 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1670 })?;
1671
1672 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1674 Self::base_config_for_industry(industry)
1675 } else {
1676 Self::base_config_for_industry("manufacturing")
1677 };
1678
1679 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1681
1682 info!(
1684 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1685 fingerprint.schema.tables.len(),
1686 scale,
1687 synthesis_result.copula_generators.len()
1688 );
1689
1690 if !synthesis_result.copula_generators.is_empty() {
1691 for spec in &synthesis_result.copula_generators {
1692 info!(
1693 " Copula '{}' for table '{}': {} columns",
1694 spec.name,
1695 spec.table,
1696 spec.columns.len()
1697 );
1698 }
1699 }
1700
1701 let mut orchestrator = Self::new(config, phase_config)?;
1703
1704 orchestrator.copula_generators = synthesis_result.copula_generators;
1706
1707 Ok(orchestrator)
1708 }
1709
1710 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1712 use datasynth_config::presets::create_preset;
1713 use datasynth_config::TransactionVolume;
1714 use datasynth_core::models::{CoAComplexity, IndustrySector};
1715
1716 let sector = match industry.to_lowercase().as_str() {
1717 "manufacturing" => IndustrySector::Manufacturing,
1718 "retail" => IndustrySector::Retail,
1719 "financial" | "financial_services" => IndustrySector::FinancialServices,
1720 "healthcare" => IndustrySector::Healthcare,
1721 "technology" | "tech" => IndustrySector::Technology,
1722 _ => IndustrySector::Manufacturing,
1723 };
1724
1725 create_preset(
1727 sector,
1728 1, 12, CoAComplexity::Medium,
1731 TransactionVolume::TenK,
1732 )
1733 }
1734
1735 fn apply_config_patch(
1737 mut config: GeneratorConfig,
1738 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1739 ) -> GeneratorConfig {
1740 use datasynth_fingerprint::synthesis::ConfigValue;
1741
1742 for (key, value) in patch.values() {
1743 match (key.as_str(), value) {
1744 ("transactions.count", ConfigValue::Integer(n)) => {
1747 info!(
1748 "Fingerprint suggests {} transactions (apply via company volumes)",
1749 n
1750 );
1751 }
1752 ("global.period_months", ConfigValue::Integer(n)) => {
1753 config.global.period_months = (*n).clamp(1, 120) as u32;
1754 }
1755 ("global.start_date", ConfigValue::String(s)) => {
1756 config.global.start_date = s.clone();
1757 }
1758 ("global.seed", ConfigValue::Integer(n)) => {
1759 config.global.seed = Some(*n as u64);
1760 }
1761 ("fraud.enabled", ConfigValue::Bool(b)) => {
1762 config.fraud.enabled = *b;
1763 }
1764 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1765 config.fraud.fraud_rate = *f;
1766 }
1767 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1768 config.data_quality.enabled = *b;
1769 }
1770 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1772 config.fraud.enabled = *b;
1773 }
1774 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1775 config.fraud.fraud_rate = *f;
1776 }
1777 _ => {
1778 debug!("Ignoring unknown config patch key: {}", key);
1779 }
1780 }
1781 }
1782
1783 config
1784 }
1785
1786 fn build_resource_guard(
1788 config: &GeneratorConfig,
1789 output_path: Option<PathBuf>,
1790 ) -> ResourceGuard {
1791 let mut builder = ResourceGuardBuilder::new();
1792
1793 if config.global.memory_limit_mb > 0 {
1795 builder = builder.memory_limit(config.global.memory_limit_mb);
1796 }
1797
1798 if let Some(path) = output_path {
1800 builder = builder.output_path(path).min_free_disk(100); }
1802
1803 builder = builder.conservative();
1805
1806 builder.build()
1807 }
1808
1809 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1814 self.resource_guard.check()
1815 }
1816
1817 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1819 let level = self.resource_guard.check()?;
1820
1821 if level != DegradationLevel::Normal {
1822 warn!(
1823 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1824 phase,
1825 level,
1826 self.resource_guard.current_memory_mb(),
1827 self.resource_guard.available_disk_mb()
1828 );
1829 }
1830
1831 Ok(level)
1832 }
1833
1834 fn get_degradation_actions(&self) -> DegradationActions {
1836 self.resource_guard.get_actions()
1837 }
1838
1839 fn check_memory_limit(&self) -> SynthResult<()> {
1841 self.check_resources()?;
1842 Ok(())
1843 }
1844
1845 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1847 info!("Starting enhanced generation workflow");
1848 info!(
1849 "Config: industry={:?}, period_months={}, companies={}",
1850 self.config.global.industry,
1851 self.config.global.period_months,
1852 self.config.companies.len()
1853 );
1854
1855 let initial_level = self.check_resources_with_log("initial")?;
1857 if initial_level == DegradationLevel::Emergency {
1858 return Err(SynthError::resource(
1859 "Insufficient resources to start generation",
1860 ));
1861 }
1862
1863 let mut stats = EnhancedGenerationStatistics {
1864 companies_count: self.config.companies.len(),
1865 period_months: self.config.global.period_months,
1866 ..Default::default()
1867 };
1868
1869 let coa = self.phase_chart_of_accounts(&mut stats)?;
1871
1872 self.phase_master_data(&mut stats)?;
1874
1875 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1877 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1878 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1879
1880 let (mut document_flows, mut subledger, fa_journal_entries) =
1882 self.phase_document_flows(&mut stats)?;
1883
1884 self.emit_phase_items(
1886 "document_flows",
1887 "PurchaseOrder",
1888 &document_flows.purchase_orders,
1889 );
1890 self.emit_phase_items(
1891 "document_flows",
1892 "GoodsReceipt",
1893 &document_flows.goods_receipts,
1894 );
1895 self.emit_phase_items(
1896 "document_flows",
1897 "VendorInvoice",
1898 &document_flows.vendor_invoices,
1899 );
1900 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1901 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1902
1903 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1905
1906 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1911 .iter()
1912 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1913 .collect();
1914 if !opening_balance_jes.is_empty() {
1915 debug!(
1916 "Prepending {} opening balance JEs to entries",
1917 opening_balance_jes.len()
1918 );
1919 }
1920
1921 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1923
1924 if !opening_balance_jes.is_empty() {
1927 let mut combined = opening_balance_jes;
1928 combined.extend(entries);
1929 entries = combined;
1930 }
1931
1932 if !fa_journal_entries.is_empty() {
1934 debug!(
1935 "Appending {} FA acquisition JEs to main entries",
1936 fa_journal_entries.len()
1937 );
1938 entries.extend(fa_journal_entries);
1939 }
1940
1941 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1943
1944 let actions = self.get_degradation_actions();
1946
1947 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1949
1950 if !sourcing.contracts.is_empty() {
1953 let mut linked_count = 0usize;
1954 let po_vendor_pairs: Vec<(String, String)> = document_flows
1956 .p2p_chains
1957 .iter()
1958 .map(|chain| {
1959 (
1960 chain.purchase_order.vendor_id.clone(),
1961 chain.purchase_order.header.document_id.clone(),
1962 )
1963 })
1964 .collect();
1965
1966 for chain in &mut document_flows.p2p_chains {
1967 if chain.purchase_order.contract_id.is_none() {
1968 if let Some(contract) = sourcing
1969 .contracts
1970 .iter()
1971 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1972 {
1973 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1974 linked_count += 1;
1975 }
1976 }
1977 }
1978
1979 for contract in &mut sourcing.contracts {
1981 let po_ids: Vec<String> = po_vendor_pairs
1982 .iter()
1983 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
1984 .map(|(_, po_id)| po_id.clone())
1985 .collect();
1986 if !po_ids.is_empty() {
1987 contract.purchase_order_ids = po_ids;
1988 }
1989 }
1990
1991 if linked_count > 0 {
1992 debug!(
1993 "Linked {} purchase orders to S2C contracts by vendor match",
1994 linked_count
1995 );
1996 }
1997 }
1998
1999 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2001
2002 if !intercompany.seller_journal_entries.is_empty()
2004 || !intercompany.buyer_journal_entries.is_empty()
2005 {
2006 let ic_je_count = intercompany.seller_journal_entries.len()
2007 + intercompany.buyer_journal_entries.len();
2008 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2009 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2010 debug!(
2011 "Appended {} IC journal entries to main entries",
2012 ic_je_count
2013 );
2014 }
2015
2016 if !intercompany.elimination_entries.is_empty() {
2018 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2019 &intercompany.elimination_entries,
2020 );
2021 if !elim_jes.is_empty() {
2022 debug!(
2023 "Appended {} elimination journal entries to main entries",
2024 elim_jes.len()
2025 );
2026 let elim_debit: rust_decimal::Decimal =
2028 elim_jes.iter().map(|je| je.total_debit()).sum();
2029 let elim_credit: rust_decimal::Decimal =
2030 elim_jes.iter().map(|je| je.total_credit()).sum();
2031 if elim_debit != elim_credit {
2032 warn!(
2033 "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2034 elim_debit,
2035 elim_credit,
2036 elim_debit - elim_credit
2037 );
2038 }
2039 entries.extend(elim_jes);
2040 }
2041 }
2042
2043 let hr = self.phase_hr_data(&mut stats)?;
2045
2046 if !hr.payroll_runs.is_empty() {
2048 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2049 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2050 entries.extend(payroll_jes);
2051 }
2052
2053 if !hr.pension_journal_entries.is_empty() {
2055 debug!(
2056 "Generated {} JEs from pension plans",
2057 hr.pension_journal_entries.len()
2058 );
2059 entries.extend(hr.pension_journal_entries.iter().cloned());
2060 }
2061
2062 if !hr.stock_comp_journal_entries.is_empty() {
2064 debug!(
2065 "Generated {} JEs from stock-based compensation",
2066 hr.stock_comp_journal_entries.len()
2067 );
2068 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2069 }
2070
2071 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2073
2074 if !manufacturing_snap.production_orders.is_empty() {
2076 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
2077 debug!("Generated {} JEs from production orders", mfg_jes.len());
2078 entries.extend(mfg_jes);
2079 }
2080
2081 if !manufacturing_snap.inventory_movements.is_empty()
2087 && !subledger.inventory_positions.is_empty()
2088 {
2089 use datasynth_core::models::MovementType as MfgMovementType;
2090 let mut receipt_count = 0usize;
2091 let mut issue_count = 0usize;
2092 for movement in &manufacturing_snap.inventory_movements {
2093 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2095 p.material_id == movement.material_code
2096 && p.company_code == movement.entity_code
2097 }) {
2098 match movement.movement_type {
2099 MfgMovementType::GoodsReceipt => {
2100 pos.add_quantity(
2102 movement.quantity,
2103 movement.value,
2104 movement.movement_date,
2105 );
2106 receipt_count += 1;
2107 }
2108 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2109 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2111 issue_count += 1;
2112 }
2113 _ => {}
2114 }
2115 }
2116 }
2117 debug!(
2118 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2119 manufacturing_snap.inventory_movements.len(),
2120 receipt_count,
2121 issue_count,
2122 );
2123 }
2124
2125 if !entries.is_empty() {
2128 stats.total_entries = entries.len() as u64;
2129 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2130 debug!(
2131 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2132 stats.total_entries, stats.total_line_items
2133 );
2134 }
2135
2136 if self.config.internal_controls.enabled && !entries.is_empty() {
2138 info!("Phase 7b: Applying internal controls to journal entries");
2139 let control_config = ControlGeneratorConfig {
2140 exception_rate: self.config.internal_controls.exception_rate,
2141 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2142 enable_sox_marking: true,
2143 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2144 self.config.internal_controls.sox_materiality_threshold,
2145 )
2146 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2147 ..Default::default()
2148 };
2149 let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
2150 for entry in &mut entries {
2151 control_gen.apply_controls(entry, &coa);
2152 }
2153 let with_controls = entries
2154 .iter()
2155 .filter(|e| !e.header.control_ids.is_empty())
2156 .count();
2157 info!(
2158 "Applied controls to {} entries ({} with control IDs assigned)",
2159 entries.len(),
2160 with_controls
2161 );
2162 }
2163
2164 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2168 .iter()
2169 .filter(|e| e.header.sod_violation)
2170 .filter_map(|e| {
2171 e.header.sod_conflict_type.map(|ct| {
2172 use datasynth_core::models::{RiskLevel, SodViolation};
2173 let severity = match ct {
2174 datasynth_core::models::SodConflictType::PaymentReleaser
2175 | datasynth_core::models::SodConflictType::RequesterApprover => {
2176 RiskLevel::Critical
2177 }
2178 datasynth_core::models::SodConflictType::PreparerApprover
2179 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2180 | datasynth_core::models::SodConflictType::JournalEntryPoster
2181 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2182 RiskLevel::High
2183 }
2184 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2185 RiskLevel::Medium
2186 }
2187 };
2188 let action = format!(
2189 "SoD conflict {:?} on entry {} ({})",
2190 ct, e.header.document_id, e.header.company_code
2191 );
2192 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2193 })
2194 })
2195 .collect();
2196 if !sod_violations.is_empty() {
2197 info!(
2198 "Phase 7c: Extracted {} SoD violations from {} entries",
2199 sod_violations.len(),
2200 entries.len()
2201 );
2202 }
2203
2204 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2206
2207 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2209
2210 self.emit_phase_items(
2212 "anomaly_injection",
2213 "LabeledAnomaly",
2214 &anomaly_labels.labels,
2215 );
2216
2217 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2219
2220 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2222
2223 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2225
2226 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2228
2229 let balance_validation = self.phase_balance_validation(&entries)?;
2231
2232 let subledger_reconciliation =
2234 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2235
2236 let (data_quality_stats, quality_issues) =
2238 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2239
2240 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2242
2243 let audit = self.phase_audit_data(&entries, &mut stats)?;
2245
2246 let banking = self.phase_banking_data(&mut stats)?;
2248
2249 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2251
2252 self.phase_llm_enrichment(&mut stats);
2254
2255 self.phase_diffusion_enhancement(&mut stats);
2257
2258 self.phase_causal_overlay(&mut stats);
2260
2261 let mut financial_reporting = self.phase_financial_reporting(
2265 &document_flows,
2266 &entries,
2267 &coa,
2268 &hr,
2269 &audit,
2270 &mut stats,
2271 )?;
2272
2273 {
2275 use datasynth_core::models::StatementType;
2276 for stmt in &financial_reporting.consolidated_statements {
2277 if stmt.statement_type == StatementType::BalanceSheet {
2278 let total_assets: rust_decimal::Decimal = stmt
2279 .line_items
2280 .iter()
2281 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2282 .map(|li| li.amount)
2283 .sum();
2284 let total_le: rust_decimal::Decimal = stmt
2285 .line_items
2286 .iter()
2287 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2288 .map(|li| li.amount)
2289 .sum();
2290 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2291 warn!(
2292 "BS equation imbalance: assets={}, L+E={}",
2293 total_assets, total_le
2294 );
2295 }
2296 }
2297 }
2298 }
2299
2300 let accounting_standards =
2302 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2303
2304 if !accounting_standards.ecl_journal_entries.is_empty() {
2306 debug!(
2307 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2308 accounting_standards.ecl_journal_entries.len()
2309 );
2310 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2311 }
2312
2313 if !accounting_standards.provision_journal_entries.is_empty() {
2315 debug!(
2316 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2317 accounting_standards.provision_journal_entries.len()
2318 );
2319 entries.extend(
2320 accounting_standards
2321 .provision_journal_entries
2322 .iter()
2323 .cloned(),
2324 );
2325 }
2326
2327 let ocpm = self.phase_ocpm_events(
2329 &document_flows,
2330 &sourcing,
2331 &hr,
2332 &manufacturing_snap,
2333 &banking,
2334 &audit,
2335 &financial_reporting,
2336 &mut stats,
2337 )?;
2338
2339 if let Some(ref event_log) = ocpm.event_log {
2341 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2342 }
2343
2344 let sales_kpi_budgets =
2346 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2347
2348 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2350
2351 self.generate_notes_to_financial_statements(
2354 &mut financial_reporting,
2355 &accounting_standards,
2356 &tax,
2357 &hr,
2358 &audit,
2359 );
2360
2361 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
2363
2364 let treasury =
2366 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2367
2368 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2370
2371 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2373
2374 let disruption_events = self.phase_disruption_events(&mut stats)?;
2376
2377 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2379
2380 let (entity_relationship_graph, cross_process_links) =
2382 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2383
2384 let industry_output = self.phase_industry_data(&mut stats);
2386
2387 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2389
2390 self.phase_hypergraph_export(
2392 &coa,
2393 &entries,
2394 &document_flows,
2395 &sourcing,
2396 &hr,
2397 &manufacturing_snap,
2398 &banking,
2399 &audit,
2400 &financial_reporting,
2401 &ocpm,
2402 &compliance_regulations,
2403 &mut stats,
2404 )?;
2405
2406 if self.phase_config.generate_graph_export {
2409 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2410 }
2411
2412 if self.config.streaming.enabled {
2414 info!("Note: streaming config is enabled but batch mode does not use it");
2415 }
2416 if self.config.vendor_network.enabled {
2417 debug!("Vendor network config available; relationship graph generation is partial");
2418 }
2419 if self.config.customer_segmentation.enabled {
2420 debug!("Customer segmentation config available; segment-aware generation is partial");
2421 }
2422
2423 let resource_stats = self.resource_guard.stats();
2425 info!(
2426 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2427 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2428 resource_stats.disk.estimated_bytes_written,
2429 resource_stats.degradation_level
2430 );
2431
2432 if let Some(ref sink) = self.phase_sink {
2434 if let Err(e) = sink.flush() {
2435 warn!("Stream sink flush failed: {e}");
2436 }
2437 }
2438
2439 let lineage = self.build_lineage_graph();
2441
2442 let gate_result = if self.config.quality_gates.enabled {
2444 let profile_name = &self.config.quality_gates.profile;
2445 match datasynth_eval::gates::get_profile(profile_name) {
2446 Some(profile) => {
2447 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2449
2450 if balance_validation.validated {
2452 eval.coherence.balance =
2453 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2454 equation_balanced: balance_validation.is_balanced,
2455 max_imbalance: (balance_validation.total_debits
2456 - balance_validation.total_credits)
2457 .abs(),
2458 periods_evaluated: 1,
2459 periods_imbalanced: if balance_validation.is_balanced {
2460 0
2461 } else {
2462 1
2463 },
2464 period_results: Vec::new(),
2465 companies_evaluated: self.config.companies.len(),
2466 });
2467 }
2468
2469 eval.coherence.passes = balance_validation.is_balanced;
2471 if !balance_validation.is_balanced {
2472 eval.coherence
2473 .failures
2474 .push("Balance sheet equation not satisfied".to_string());
2475 }
2476
2477 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2479 eval.statistical.passes = !entries.is_empty();
2480
2481 eval.quality.overall_score = 0.9; eval.quality.passes = true;
2484
2485 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2486 info!(
2487 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2488 profile_name, result.gates_passed, result.gates_total, result.summary
2489 );
2490 Some(result)
2491 }
2492 None => {
2493 warn!(
2494 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2495 profile_name
2496 );
2497 None
2498 }
2499 }
2500 } else {
2501 None
2502 };
2503
2504 let internal_controls = if self.config.internal_controls.enabled {
2506 InternalControl::standard_controls()
2507 } else {
2508 Vec::new()
2509 };
2510
2511 Ok(EnhancedGenerationResult {
2512 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2513 master_data: std::mem::take(&mut self.master_data),
2514 document_flows,
2515 subledger,
2516 ocpm,
2517 audit,
2518 banking,
2519 graph_export,
2520 sourcing,
2521 financial_reporting,
2522 hr,
2523 accounting_standards,
2524 manufacturing: manufacturing_snap,
2525 sales_kpi_budgets,
2526 tax,
2527 esg: esg_snap,
2528 treasury,
2529 project_accounting,
2530 process_evolution,
2531 organizational_events,
2532 disruption_events,
2533 intercompany,
2534 journal_entries: entries,
2535 anomaly_labels,
2536 balance_validation,
2537 data_quality_stats,
2538 quality_issues,
2539 statistics: stats,
2540 lineage: Some(lineage),
2541 gate_result,
2542 internal_controls,
2543 sod_violations,
2544 opening_balances,
2545 subledger_reconciliation,
2546 counterfactual_pairs,
2547 red_flags,
2548 collusion_rings,
2549 temporal_vendor_chains,
2550 entity_relationship_graph,
2551 cross_process_links,
2552 industry_output,
2553 compliance_regulations,
2554 })
2555 }
2556
2557 fn phase_chart_of_accounts(
2563 &mut self,
2564 stats: &mut EnhancedGenerationStatistics,
2565 ) -> SynthResult<Arc<ChartOfAccounts>> {
2566 info!("Phase 1: Generating Chart of Accounts");
2567 let coa = self.generate_coa()?;
2568 stats.accounts_count = coa.account_count();
2569 info!(
2570 "Chart of Accounts generated: {} accounts",
2571 stats.accounts_count
2572 );
2573 self.check_resources_with_log("post-coa")?;
2574 Ok(coa)
2575 }
2576
2577 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2579 if self.phase_config.generate_master_data {
2580 info!("Phase 2: Generating Master Data");
2581 self.generate_master_data()?;
2582 stats.vendor_count = self.master_data.vendors.len();
2583 stats.customer_count = self.master_data.customers.len();
2584 stats.material_count = self.master_data.materials.len();
2585 stats.asset_count = self.master_data.assets.len();
2586 stats.employee_count = self.master_data.employees.len();
2587 info!(
2588 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2589 stats.vendor_count, stats.customer_count, stats.material_count,
2590 stats.asset_count, stats.employee_count
2591 );
2592 self.check_resources_with_log("post-master-data")?;
2593 } else {
2594 debug!("Phase 2: Skipped (master data generation disabled)");
2595 }
2596 Ok(())
2597 }
2598
2599 fn phase_document_flows(
2601 &mut self,
2602 stats: &mut EnhancedGenerationStatistics,
2603 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2604 let mut document_flows = DocumentFlowSnapshot::default();
2605 let mut subledger = SubledgerSnapshot::default();
2606 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
2609
2610 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2611 info!("Phase 3: Generating Document Flows");
2612 self.generate_document_flows(&mut document_flows)?;
2613 stats.p2p_chain_count = document_flows.p2p_chains.len();
2614 stats.o2c_chain_count = document_flows.o2c_chains.len();
2615 info!(
2616 "Document flows generated: {} P2P chains, {} O2C chains",
2617 stats.p2p_chain_count, stats.o2c_chain_count
2618 );
2619
2620 debug!("Phase 3b: Linking document flows to subledgers");
2622 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2623 stats.ap_invoice_count = subledger.ap_invoices.len();
2624 stats.ar_invoice_count = subledger.ar_invoices.len();
2625 debug!(
2626 "Subledgers linked: {} AP invoices, {} AR invoices",
2627 stats.ap_invoice_count, stats.ar_invoice_count
2628 );
2629
2630 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2635 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2636 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2637 debug!("Payment settlements applied to AP and AR subledgers");
2638
2639 if let Ok(start_date) =
2642 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2643 {
2644 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2645 - chrono::Days::new(1);
2646 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2647 for company in &self.config.companies {
2654 let ar_report = ARAgingReport::from_invoices(
2655 company.code.clone(),
2656 &subledger.ar_invoices,
2657 as_of_date,
2658 );
2659 subledger.ar_aging_reports.push(ar_report);
2660
2661 let ap_report = APAgingReport::from_invoices(
2662 company.code.clone(),
2663 &subledger.ap_invoices,
2664 as_of_date,
2665 );
2666 subledger.ap_aging_reports.push(ap_report);
2667 }
2668 debug!(
2669 "AR/AP aging reports built: {} AR, {} AP",
2670 subledger.ar_aging_reports.len(),
2671 subledger.ap_aging_reports.len()
2672 );
2673
2674 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
2676 {
2677 use datasynth_generators::DunningGenerator;
2678 let mut dunning_gen = DunningGenerator::new(self.seed + 2000);
2679 for company in &self.config.companies {
2680 let currency = company.currency.as_str();
2681 let mut company_invoices: Vec<
2684 datasynth_core::models::subledger::ar::ARInvoice,
2685 > = subledger
2686 .ar_invoices
2687 .iter()
2688 .filter(|inv| inv.company_code == company.code)
2689 .cloned()
2690 .collect();
2691
2692 if company_invoices.is_empty() {
2693 continue;
2694 }
2695
2696 let result = dunning_gen.execute_dunning_run(
2697 &company.code,
2698 as_of_date,
2699 &mut company_invoices,
2700 currency,
2701 );
2702
2703 for updated in &company_invoices {
2705 if let Some(orig) = subledger
2706 .ar_invoices
2707 .iter_mut()
2708 .find(|i| i.invoice_number == updated.invoice_number)
2709 {
2710 orig.dunning_info = updated.dunning_info.clone();
2711 }
2712 }
2713
2714 subledger.dunning_runs.push(result.dunning_run);
2715 subledger.dunning_letters.extend(result.letters);
2716 dunning_journal_entries.extend(result.journal_entries);
2718 }
2719 debug!(
2720 "Dunning runs complete: {} runs, {} letters",
2721 subledger.dunning_runs.len(),
2722 subledger.dunning_letters.len()
2723 );
2724 }
2725 }
2726
2727 self.check_resources_with_log("post-document-flows")?;
2728 } else {
2729 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2730 }
2731
2732 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
2734 if !self.master_data.assets.is_empty() {
2735 debug!("Generating FA subledger records");
2736 let company_code = self
2737 .config
2738 .companies
2739 .first()
2740 .map(|c| c.code.as_str())
2741 .unwrap_or("1000");
2742 let currency = self
2743 .config
2744 .companies
2745 .first()
2746 .map(|c| c.currency.as_str())
2747 .unwrap_or("USD");
2748
2749 let mut fa_gen = datasynth_generators::FAGenerator::new(
2750 datasynth_generators::FAGeneratorConfig::default(),
2751 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2752 );
2753
2754 for asset in &self.master_data.assets {
2755 let (record, je) = fa_gen.generate_asset_acquisition(
2756 company_code,
2757 &format!("{:?}", asset.asset_class),
2758 &asset.description,
2759 asset.acquisition_date,
2760 currency,
2761 asset.cost_center.as_deref(),
2762 );
2763 subledger.fa_records.push(record);
2764 fa_journal_entries.push(je);
2765 }
2766
2767 stats.fa_subledger_count = subledger.fa_records.len();
2768 debug!(
2769 "FA subledger records generated: {} (with {} acquisition JEs)",
2770 stats.fa_subledger_count,
2771 fa_journal_entries.len()
2772 );
2773 }
2774
2775 if !self.master_data.materials.is_empty() {
2777 debug!("Generating Inventory subledger records");
2778 let first_company = self.config.companies.first();
2779 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2780 let inv_currency = first_company
2781 .map(|c| c.currency.clone())
2782 .unwrap_or_else(|| "USD".to_string());
2783
2784 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2785 datasynth_generators::InventoryGeneratorConfig::default(),
2786 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2787 inv_currency.clone(),
2788 );
2789
2790 for (i, material) in self.master_data.materials.iter().enumerate() {
2791 let plant = format!("PLANT{:02}", (i % 3) + 1);
2792 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2793 let initial_qty = rust_decimal::Decimal::from(
2794 material
2795 .safety_stock
2796 .to_string()
2797 .parse::<i64>()
2798 .unwrap_or(100),
2799 );
2800
2801 let position = inv_gen.generate_position(
2802 company_code,
2803 &plant,
2804 &storage_loc,
2805 &material.material_id,
2806 &material.description,
2807 initial_qty,
2808 Some(material.standard_cost),
2809 &inv_currency,
2810 );
2811 subledger.inventory_positions.push(position);
2812 }
2813
2814 stats.inventory_subledger_count = subledger.inventory_positions.len();
2815 debug!(
2816 "Inventory subledger records generated: {}",
2817 stats.inventory_subledger_count
2818 );
2819 }
2820
2821 if !subledger.fa_records.is_empty() {
2823 if let Ok(start_date) =
2824 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2825 {
2826 let company_code = self
2827 .config
2828 .companies
2829 .first()
2830 .map(|c| c.code.as_str())
2831 .unwrap_or("1000");
2832 let fiscal_year = start_date.year();
2833 let start_period = start_date.month();
2834 let end_period =
2835 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
2836
2837 let depr_cfg = FaDepreciationScheduleConfig {
2838 fiscal_year,
2839 start_period,
2840 end_period,
2841 seed_offset: 800,
2842 };
2843 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
2844 let runs = depr_gen.generate(company_code, &subledger.fa_records);
2845 let run_count = runs.len();
2846 subledger.depreciation_runs = runs;
2847 debug!(
2848 "Depreciation runs generated: {} runs for {} periods",
2849 run_count, self.config.global.period_months
2850 );
2851 }
2852 }
2853
2854 if !subledger.inventory_positions.is_empty() {
2856 if let Ok(start_date) =
2857 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2858 {
2859 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2860 - chrono::Days::new(1);
2861
2862 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
2863 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
2864
2865 for company in &self.config.companies {
2866 let result = inv_val_gen.generate(
2867 &company.code,
2868 &subledger.inventory_positions,
2869 as_of_date,
2870 );
2871 subledger.inventory_valuations.push(result);
2872 }
2873 debug!(
2874 "Inventory valuations generated: {} company reports",
2875 subledger.inventory_valuations.len()
2876 );
2877 }
2878 }
2879
2880 Ok((document_flows, subledger, fa_journal_entries))
2881 }
2882
2883 #[allow(clippy::too_many_arguments)]
2885 fn phase_ocpm_events(
2886 &mut self,
2887 document_flows: &DocumentFlowSnapshot,
2888 sourcing: &SourcingSnapshot,
2889 hr: &HrSnapshot,
2890 manufacturing: &ManufacturingSnapshot,
2891 banking: &BankingSnapshot,
2892 audit: &AuditSnapshot,
2893 financial_reporting: &FinancialReportingSnapshot,
2894 stats: &mut EnhancedGenerationStatistics,
2895 ) -> SynthResult<OcpmSnapshot> {
2896 let degradation = self.check_resources()?;
2897 if degradation >= DegradationLevel::Reduced {
2898 debug!(
2899 "Phase skipped due to resource pressure (degradation: {:?})",
2900 degradation
2901 );
2902 return Ok(OcpmSnapshot::default());
2903 }
2904 if self.phase_config.generate_ocpm_events {
2905 info!("Phase 3c: Generating OCPM Events");
2906 let ocpm_snapshot = self.generate_ocpm_events(
2907 document_flows,
2908 sourcing,
2909 hr,
2910 manufacturing,
2911 banking,
2912 audit,
2913 financial_reporting,
2914 )?;
2915 stats.ocpm_event_count = ocpm_snapshot.event_count;
2916 stats.ocpm_object_count = ocpm_snapshot.object_count;
2917 stats.ocpm_case_count = ocpm_snapshot.case_count;
2918 info!(
2919 "OCPM events generated: {} events, {} objects, {} cases",
2920 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2921 );
2922 self.check_resources_with_log("post-ocpm")?;
2923 Ok(ocpm_snapshot)
2924 } else {
2925 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2926 Ok(OcpmSnapshot::default())
2927 }
2928 }
2929
2930 fn phase_journal_entries(
2932 &mut self,
2933 coa: &Arc<ChartOfAccounts>,
2934 document_flows: &DocumentFlowSnapshot,
2935 _stats: &mut EnhancedGenerationStatistics,
2936 ) -> SynthResult<Vec<JournalEntry>> {
2937 let mut entries = Vec::new();
2938
2939 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2941 debug!("Phase 4a: Generating JEs from document flows");
2942 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2943 debug!("Generated {} JEs from document flows", flow_entries.len());
2944 entries.extend(flow_entries);
2945 }
2946
2947 if self.phase_config.generate_journal_entries {
2949 info!("Phase 4: Generating Journal Entries");
2950 let je_entries = self.generate_journal_entries(coa)?;
2951 info!("Generated {} standalone journal entries", je_entries.len());
2952 entries.extend(je_entries);
2953 } else {
2954 debug!("Phase 4: Skipped (journal entry generation disabled)");
2955 }
2956
2957 if !entries.is_empty() {
2958 self.check_resources_with_log("post-journal-entries")?;
2961 }
2962
2963 Ok(entries)
2964 }
2965
2966 fn phase_anomaly_injection(
2968 &mut self,
2969 entries: &mut [JournalEntry],
2970 actions: &DegradationActions,
2971 stats: &mut EnhancedGenerationStatistics,
2972 ) -> SynthResult<AnomalyLabels> {
2973 if self.phase_config.inject_anomalies
2974 && !entries.is_empty()
2975 && !actions.skip_anomaly_injection
2976 {
2977 info!("Phase 5: Injecting Anomalies");
2978 let result = self.inject_anomalies(entries)?;
2979 stats.anomalies_injected = result.labels.len();
2980 info!("Injected {} anomalies", stats.anomalies_injected);
2981 self.check_resources_with_log("post-anomaly-injection")?;
2982 Ok(result)
2983 } else if actions.skip_anomaly_injection {
2984 warn!("Phase 5: Skipped due to resource degradation");
2985 Ok(AnomalyLabels::default())
2986 } else {
2987 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2988 Ok(AnomalyLabels::default())
2989 }
2990 }
2991
2992 fn phase_balance_validation(
2994 &mut self,
2995 entries: &[JournalEntry],
2996 ) -> SynthResult<BalanceValidationResult> {
2997 if self.phase_config.validate_balances && !entries.is_empty() {
2998 debug!("Phase 6: Validating Balances");
2999 let balance_validation = self.validate_journal_entries(entries)?;
3000 if balance_validation.is_balanced {
3001 debug!("Balance validation passed");
3002 } else {
3003 warn!(
3004 "Balance validation found {} errors",
3005 balance_validation.validation_errors.len()
3006 );
3007 }
3008 Ok(balance_validation)
3009 } else {
3010 Ok(BalanceValidationResult::default())
3011 }
3012 }
3013
3014 fn phase_data_quality_injection(
3016 &mut self,
3017 entries: &mut [JournalEntry],
3018 actions: &DegradationActions,
3019 stats: &mut EnhancedGenerationStatistics,
3020 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3021 if self.phase_config.inject_data_quality
3022 && !entries.is_empty()
3023 && !actions.skip_data_quality
3024 {
3025 info!("Phase 7: Injecting Data Quality Variations");
3026 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3027 stats.data_quality_issues = dq_stats.records_with_issues;
3028 info!("Injected {} data quality issues", stats.data_quality_issues);
3029 self.check_resources_with_log("post-data-quality")?;
3030 Ok((dq_stats, quality_issues))
3031 } else if actions.skip_data_quality {
3032 warn!("Phase 7: Skipped due to resource degradation");
3033 Ok((DataQualityStats::default(), Vec::new()))
3034 } else {
3035 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3036 Ok((DataQualityStats::default(), Vec::new()))
3037 }
3038 }
3039
3040 fn phase_period_close(
3050 &mut self,
3051 entries: &mut Vec<JournalEntry>,
3052 subledger: &SubledgerSnapshot,
3053 stats: &mut EnhancedGenerationStatistics,
3054 ) -> SynthResult<()> {
3055 if !self.phase_config.generate_period_close || entries.is_empty() {
3056 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3057 return Ok(());
3058 }
3059
3060 info!("Phase 10b: Generating period-close journal entries");
3061
3062 use datasynth_core::accounts::{
3063 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3064 };
3065 use rust_decimal::Decimal;
3066
3067 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3068 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3069 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3070 let close_date = end_date - chrono::Days::new(1);
3072
3073 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3078 .config
3079 .companies
3080 .iter()
3081 .map(|c| c.code.clone())
3082 .collect();
3083
3084 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3086 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3087
3088 let period_months = self.config.global.period_months;
3092 for asset in &subledger.fa_records {
3093 use datasynth_core::models::subledger::fa::AssetStatus;
3095 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3096 continue;
3097 }
3098 let useful_life_months = asset.useful_life_months();
3099 if useful_life_months == 0 {
3100 continue;
3102 }
3103 let salvage_value = asset.salvage_value();
3104 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3105 if depreciable_base == Decimal::ZERO {
3106 continue;
3107 }
3108 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3109 * Decimal::from(period_months))
3110 .round_dp(2);
3111 if period_depr <= Decimal::ZERO {
3112 continue;
3113 }
3114
3115 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3116 depr_header.document_type = "CL".to_string();
3117 depr_header.header_text = Some(format!(
3118 "Depreciation - {} {}",
3119 asset.asset_number, asset.description
3120 ));
3121 depr_header.created_by = "CLOSE_ENGINE".to_string();
3122 depr_header.source = TransactionSource::Automated;
3123 depr_header.business_process = Some(BusinessProcess::R2R);
3124
3125 let doc_id = depr_header.document_id;
3126 let mut depr_je = JournalEntry::new(depr_header);
3127
3128 depr_je.add_line(JournalEntryLine::debit(
3130 doc_id,
3131 1,
3132 expense_accounts::DEPRECIATION.to_string(),
3133 period_depr,
3134 ));
3135 depr_je.add_line(JournalEntryLine::credit(
3137 doc_id,
3138 2,
3139 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3140 period_depr,
3141 ));
3142
3143 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3144 close_jes.push(depr_je);
3145 }
3146
3147 if !subledger.fa_records.is_empty() {
3148 debug!(
3149 "Generated {} depreciation JEs from {} FA records",
3150 close_jes.len(),
3151 subledger.fa_records.len()
3152 );
3153 }
3154
3155 {
3159 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3160 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3161
3162 let accrual_items: &[(&str, &str, &str)] = &[
3164 ("Accrued Utilities", "6200", "2100"),
3165 ("Accrued Rent", "6300", "2100"),
3166 ("Accrued Interest", "6100", "2150"),
3167 ];
3168
3169 for company_code in &company_codes {
3170 let company_revenue: Decimal = entries
3172 .iter()
3173 .filter(|e| e.header.company_code == *company_code)
3174 .flat_map(|e| e.lines.iter())
3175 .filter(|l| l.gl_account.starts_with('4'))
3176 .map(|l| l.credit_amount - l.debit_amount)
3177 .fold(Decimal::ZERO, |acc, v| acc + v);
3178
3179 if company_revenue <= Decimal::ZERO {
3180 continue;
3181 }
3182
3183 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3185 if accrual_base <= Decimal::ZERO {
3186 continue;
3187 }
3188
3189 for (description, expense_acct, liability_acct) in accrual_items {
3190 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3191 company_code,
3192 description,
3193 accrual_base,
3194 expense_acct,
3195 liability_acct,
3196 close_date,
3197 None,
3198 );
3199 close_jes.push(accrual_je);
3200 if let Some(rev_je) = reversal_je {
3201 close_jes.push(rev_je);
3202 }
3203 }
3204 }
3205
3206 debug!(
3207 "Generated accrual entries for {} companies",
3208 company_codes.len()
3209 );
3210 }
3211
3212 for company_code in &company_codes {
3213 let mut total_revenue = Decimal::ZERO;
3218 let mut total_expenses = Decimal::ZERO;
3219
3220 for entry in entries.iter() {
3221 if entry.header.company_code != *company_code {
3222 continue;
3223 }
3224 for line in &entry.lines {
3225 let category = AccountCategory::from_account(&line.gl_account);
3226 match category {
3227 AccountCategory::Revenue => {
3228 total_revenue += line.credit_amount - line.debit_amount;
3230 }
3231 AccountCategory::Cogs
3232 | AccountCategory::OperatingExpense
3233 | AccountCategory::OtherIncomeExpense
3234 | AccountCategory::Tax => {
3235 total_expenses += line.debit_amount - line.credit_amount;
3237 }
3238 _ => {}
3239 }
3240 }
3241 }
3242
3243 let pre_tax_income = total_revenue - total_expenses;
3244
3245 if pre_tax_income == Decimal::ZERO {
3247 debug!(
3248 "Company {}: no pre-tax income, skipping period close",
3249 company_code
3250 );
3251 continue;
3252 }
3253
3254 if pre_tax_income > Decimal::ZERO {
3256 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3258
3259 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3260 tax_header.document_type = "CL".to_string();
3261 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3262 tax_header.created_by = "CLOSE_ENGINE".to_string();
3263 tax_header.source = TransactionSource::Automated;
3264 tax_header.business_process = Some(BusinessProcess::R2R);
3265
3266 let doc_id = tax_header.document_id;
3267 let mut tax_je = JournalEntry::new(tax_header);
3268
3269 tax_je.add_line(JournalEntryLine::debit(
3271 doc_id,
3272 1,
3273 tax_accounts::TAX_EXPENSE.to_string(),
3274 tax_amount,
3275 ));
3276 tax_je.add_line(JournalEntryLine::credit(
3278 doc_id,
3279 2,
3280 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3281 tax_amount,
3282 ));
3283
3284 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3285 close_jes.push(tax_je);
3286 } else {
3287 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3290 if dta_amount > Decimal::ZERO {
3291 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3292 dta_header.document_type = "CL".to_string();
3293 dta_header.header_text =
3294 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3295 dta_header.created_by = "CLOSE_ENGINE".to_string();
3296 dta_header.source = TransactionSource::Automated;
3297 dta_header.business_process = Some(BusinessProcess::R2R);
3298
3299 let doc_id = dta_header.document_id;
3300 let mut dta_je = JournalEntry::new(dta_header);
3301
3302 dta_je.add_line(JournalEntryLine::debit(
3304 doc_id,
3305 1,
3306 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3307 dta_amount,
3308 ));
3309 dta_je.add_line(JournalEntryLine::credit(
3312 doc_id,
3313 2,
3314 tax_accounts::TAX_EXPENSE.to_string(),
3315 dta_amount,
3316 ));
3317
3318 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3319 close_jes.push(dta_je);
3320 debug!(
3321 "Company {}: loss year — recognised DTA of {}",
3322 company_code, dta_amount
3323 );
3324 }
3325 }
3326
3327 let tax_provision = if pre_tax_income > Decimal::ZERO {
3332 (pre_tax_income * tax_rate).round_dp(2)
3333 } else {
3334 Decimal::ZERO
3335 };
3336 let net_income = pre_tax_income - tax_provision;
3337
3338 if net_income != Decimal::ZERO {
3339 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3340 close_header.document_type = "CL".to_string();
3341 close_header.header_text =
3342 Some(format!("Income statement close - {}", company_code));
3343 close_header.created_by = "CLOSE_ENGINE".to_string();
3344 close_header.source = TransactionSource::Automated;
3345 close_header.business_process = Some(BusinessProcess::R2R);
3346
3347 let doc_id = close_header.document_id;
3348 let mut close_je = JournalEntry::new(close_header);
3349
3350 let abs_net_income = net_income.abs();
3351
3352 if net_income > Decimal::ZERO {
3353 close_je.add_line(JournalEntryLine::debit(
3355 doc_id,
3356 1,
3357 equity_accounts::INCOME_SUMMARY.to_string(),
3358 abs_net_income,
3359 ));
3360 close_je.add_line(JournalEntryLine::credit(
3361 doc_id,
3362 2,
3363 equity_accounts::RETAINED_EARNINGS.to_string(),
3364 abs_net_income,
3365 ));
3366 } else {
3367 close_je.add_line(JournalEntryLine::debit(
3369 doc_id,
3370 1,
3371 equity_accounts::RETAINED_EARNINGS.to_string(),
3372 abs_net_income,
3373 ));
3374 close_je.add_line(JournalEntryLine::credit(
3375 doc_id,
3376 2,
3377 equity_accounts::INCOME_SUMMARY.to_string(),
3378 abs_net_income,
3379 ));
3380 }
3381
3382 debug_assert!(
3383 close_je.is_balanced(),
3384 "Income statement closing JE must be balanced"
3385 );
3386 close_jes.push(close_je);
3387 }
3388 }
3389
3390 let close_count = close_jes.len();
3391 if close_count > 0 {
3392 info!("Generated {} period-close journal entries", close_count);
3393 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3394 entries.extend(close_jes);
3395 stats.period_close_je_count = close_count;
3396
3397 stats.total_entries = entries.len() as u64;
3399 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3400 } else {
3401 debug!("No period-close entries generated (no income statement activity)");
3402 }
3403
3404 Ok(())
3405 }
3406
3407 fn phase_audit_data(
3409 &mut self,
3410 entries: &[JournalEntry],
3411 stats: &mut EnhancedGenerationStatistics,
3412 ) -> SynthResult<AuditSnapshot> {
3413 if self.phase_config.generate_audit {
3414 info!("Phase 8: Generating Audit Data");
3415 let audit_snapshot = self.generate_audit_data(entries)?;
3416 stats.audit_engagement_count = audit_snapshot.engagements.len();
3417 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3418 stats.audit_evidence_count = audit_snapshot.evidence.len();
3419 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3420 stats.audit_finding_count = audit_snapshot.findings.len();
3421 stats.audit_judgment_count = audit_snapshot.judgments.len();
3422 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3423 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3424 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3425 stats.audit_sample_count = audit_snapshot.samples.len();
3426 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3427 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3428 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3429 stats.audit_related_party_count = audit_snapshot.related_parties.len();
3430 stats.audit_related_party_transaction_count =
3431 audit_snapshot.related_party_transactions.len();
3432 info!(
3433 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3434 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3435 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3436 {} RP transactions",
3437 stats.audit_engagement_count,
3438 stats.audit_workpaper_count,
3439 stats.audit_evidence_count,
3440 stats.audit_risk_count,
3441 stats.audit_finding_count,
3442 stats.audit_judgment_count,
3443 stats.audit_confirmation_count,
3444 stats.audit_procedure_step_count,
3445 stats.audit_sample_count,
3446 stats.audit_analytical_result_count,
3447 stats.audit_ia_function_count,
3448 stats.audit_ia_report_count,
3449 stats.audit_related_party_count,
3450 stats.audit_related_party_transaction_count,
3451 );
3452 self.check_resources_with_log("post-audit")?;
3453 Ok(audit_snapshot)
3454 } else {
3455 debug!("Phase 8: Skipped (audit generation disabled)");
3456 Ok(AuditSnapshot::default())
3457 }
3458 }
3459
3460 fn phase_banking_data(
3462 &mut self,
3463 stats: &mut EnhancedGenerationStatistics,
3464 ) -> SynthResult<BankingSnapshot> {
3465 if self.phase_config.generate_banking {
3466 info!("Phase 9: Generating Banking KYC/AML Data");
3467 let banking_snapshot = self.generate_banking_data()?;
3468 stats.banking_customer_count = banking_snapshot.customers.len();
3469 stats.banking_account_count = banking_snapshot.accounts.len();
3470 stats.banking_transaction_count = banking_snapshot.transactions.len();
3471 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3472 info!(
3473 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3474 stats.banking_customer_count, stats.banking_account_count,
3475 stats.banking_transaction_count, stats.banking_suspicious_count
3476 );
3477 self.check_resources_with_log("post-banking")?;
3478 Ok(banking_snapshot)
3479 } else {
3480 debug!("Phase 9: Skipped (banking generation disabled)");
3481 Ok(BankingSnapshot::default())
3482 }
3483 }
3484
3485 fn phase_graph_export(
3487 &mut self,
3488 entries: &[JournalEntry],
3489 coa: &Arc<ChartOfAccounts>,
3490 stats: &mut EnhancedGenerationStatistics,
3491 ) -> SynthResult<GraphExportSnapshot> {
3492 if self.phase_config.generate_graph_export && !entries.is_empty() {
3493 info!("Phase 10: Exporting Accounting Network Graphs");
3494 match self.export_graphs(entries, coa, stats) {
3495 Ok(snapshot) => {
3496 info!(
3497 "Graph export complete: {} graphs ({} nodes, {} edges)",
3498 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3499 );
3500 Ok(snapshot)
3501 }
3502 Err(e) => {
3503 warn!("Phase 10: Graph export failed: {}", e);
3504 Ok(GraphExportSnapshot::default())
3505 }
3506 }
3507 } else {
3508 debug!("Phase 10: Skipped (graph export disabled or no entries)");
3509 Ok(GraphExportSnapshot::default())
3510 }
3511 }
3512
3513 #[allow(clippy::too_many_arguments)]
3515 fn phase_hypergraph_export(
3516 &self,
3517 coa: &Arc<ChartOfAccounts>,
3518 entries: &[JournalEntry],
3519 document_flows: &DocumentFlowSnapshot,
3520 sourcing: &SourcingSnapshot,
3521 hr: &HrSnapshot,
3522 manufacturing: &ManufacturingSnapshot,
3523 banking: &BankingSnapshot,
3524 audit: &AuditSnapshot,
3525 financial_reporting: &FinancialReportingSnapshot,
3526 ocpm: &OcpmSnapshot,
3527 compliance: &ComplianceRegulationsSnapshot,
3528 stats: &mut EnhancedGenerationStatistics,
3529 ) -> SynthResult<()> {
3530 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3531 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3532 match self.export_hypergraph(
3533 coa,
3534 entries,
3535 document_flows,
3536 sourcing,
3537 hr,
3538 manufacturing,
3539 banking,
3540 audit,
3541 financial_reporting,
3542 ocpm,
3543 compliance,
3544 stats,
3545 ) {
3546 Ok(info) => {
3547 info!(
3548 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3549 info.node_count, info.edge_count, info.hyperedge_count
3550 );
3551 }
3552 Err(e) => {
3553 warn!("Phase 10b: Hypergraph export failed: {}", e);
3554 }
3555 }
3556 } else {
3557 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3558 }
3559 Ok(())
3560 }
3561
3562 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3568 if !self.config.llm.enabled {
3569 debug!("Phase 11: Skipped (LLM enrichment disabled)");
3570 return;
3571 }
3572
3573 info!("Phase 11: Starting LLM Enrichment");
3574 let start = std::time::Instant::now();
3575
3576 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3577 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
3580 let schema_provider = &self.config.llm.provider;
3581 let api_key_env = match schema_provider.as_str() {
3582 "openai" => Some("OPENAI_API_KEY"),
3583 "anthropic" => Some("ANTHROPIC_API_KEY"),
3584 "custom" => Some("LLM_API_KEY"),
3585 _ => None,
3586 };
3587 if let Some(key_env) = api_key_env {
3588 if std::env::var(key_env).is_ok() {
3589 let llm_config = datasynth_core::llm::LlmConfig {
3590 model: self.config.llm.model.clone(),
3591 api_key_env: key_env.to_string(),
3592 ..datasynth_core::llm::LlmConfig::default()
3593 };
3594 match HttpLlmProvider::new(llm_config) {
3595 Ok(p) => Arc::new(p),
3596 Err(e) => {
3597 warn!(
3598 "Failed to create HttpLlmProvider: {}; falling back to mock",
3599 e
3600 );
3601 Arc::new(MockLlmProvider::new(self.seed))
3602 }
3603 }
3604 } else {
3605 Arc::new(MockLlmProvider::new(self.seed))
3606 }
3607 } else {
3608 Arc::new(MockLlmProvider::new(self.seed))
3609 }
3610 };
3611 let enricher = VendorLlmEnricher::new(provider);
3612
3613 let industry = format!("{:?}", self.config.global.industry);
3614 let max_enrichments = self
3615 .config
3616 .llm
3617 .max_vendor_enrichments
3618 .min(self.master_data.vendors.len());
3619
3620 let mut enriched_count = 0usize;
3621 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3622 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3623 Ok(name) => {
3624 vendor.name = name;
3625 enriched_count += 1;
3626 }
3627 Err(e) => {
3628 warn!(
3629 "LLM vendor enrichment failed for {}: {}",
3630 vendor.vendor_id, e
3631 );
3632 }
3633 }
3634 }
3635
3636 enriched_count
3637 }));
3638
3639 match result {
3640 Ok(enriched_count) => {
3641 stats.llm_vendors_enriched = enriched_count;
3642 let elapsed = start.elapsed();
3643 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3644 info!(
3645 "Phase 11 complete: {} vendors enriched in {}ms",
3646 enriched_count, stats.llm_enrichment_ms
3647 );
3648 }
3649 Err(_) => {
3650 let elapsed = start.elapsed();
3651 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3652 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3653 }
3654 }
3655 }
3656
3657 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3663 if !self.config.diffusion.enabled {
3664 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3665 return;
3666 }
3667
3668 info!("Phase 12: Starting Diffusion Enhancement");
3669 let start = std::time::Instant::now();
3670
3671 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3672 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
3675
3676 let diffusion_config = DiffusionConfig {
3677 n_steps: self.config.diffusion.n_steps,
3678 seed: self.seed,
3679 ..Default::default()
3680 };
3681
3682 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3683
3684 let n_samples = self.config.diffusion.sample_size;
3685 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
3687
3688 samples.len()
3689 }));
3690
3691 match result {
3692 Ok(sample_count) => {
3693 stats.diffusion_samples_generated = sample_count;
3694 let elapsed = start.elapsed();
3695 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3696 info!(
3697 "Phase 12 complete: {} diffusion samples generated in {}ms",
3698 sample_count, stats.diffusion_enhancement_ms
3699 );
3700 }
3701 Err(_) => {
3702 let elapsed = start.elapsed();
3703 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3704 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
3705 }
3706 }
3707 }
3708
3709 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
3716 if !self.config.causal.enabled {
3717 debug!("Phase 13: Skipped (causal generation disabled)");
3718 return;
3719 }
3720
3721 info!("Phase 13: Starting Causal Overlay");
3722 let start = std::time::Instant::now();
3723
3724 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3725 let graph = match self.config.causal.template.as_str() {
3727 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
3728 _ => CausalGraph::fraud_detection_template(),
3729 };
3730
3731 let scm = StructuralCausalModel::new(graph.clone())
3732 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
3733
3734 let n_samples = self.config.causal.sample_size;
3735 let samples = scm
3736 .generate(n_samples, self.seed)
3737 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
3738
3739 let validation_passed = if self.config.causal.validate {
3741 let report = CausalValidator::validate_causal_structure(&samples, &graph);
3742 if report.valid {
3743 info!(
3744 "Causal validation passed: all {} checks OK",
3745 report.checks.len()
3746 );
3747 } else {
3748 warn!(
3749 "Causal validation: {} violations detected: {:?}",
3750 report.violations.len(),
3751 report.violations
3752 );
3753 }
3754 Some(report.valid)
3755 } else {
3756 None
3757 };
3758
3759 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
3760 }));
3761
3762 match result {
3763 Ok(Ok((sample_count, validation_passed))) => {
3764 stats.causal_samples_generated = sample_count;
3765 stats.causal_validation_passed = validation_passed;
3766 let elapsed = start.elapsed();
3767 stats.causal_generation_ms = elapsed.as_millis() as u64;
3768 info!(
3769 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
3770 sample_count, stats.causal_generation_ms, validation_passed,
3771 );
3772 }
3773 Ok(Err(e)) => {
3774 let elapsed = start.elapsed();
3775 stats.causal_generation_ms = elapsed.as_millis() as u64;
3776 warn!("Phase 13: Causal generation failed: {}", e);
3777 }
3778 Err(_) => {
3779 let elapsed = start.elapsed();
3780 stats.causal_generation_ms = elapsed.as_millis() as u64;
3781 warn!("Phase 13: Causal generation failed (panic caught), continuing");
3782 }
3783 }
3784 }
3785
3786 fn phase_sourcing_data(
3788 &mut self,
3789 stats: &mut EnhancedGenerationStatistics,
3790 ) -> SynthResult<SourcingSnapshot> {
3791 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
3792 debug!("Phase 14: Skipped (sourcing generation disabled)");
3793 return Ok(SourcingSnapshot::default());
3794 }
3795 let degradation = self.check_resources()?;
3796 if degradation >= DegradationLevel::Reduced {
3797 debug!(
3798 "Phase skipped due to resource pressure (degradation: {:?})",
3799 degradation
3800 );
3801 return Ok(SourcingSnapshot::default());
3802 }
3803
3804 info!("Phase 14: Generating S2C Sourcing Data");
3805 let seed = self.seed;
3806
3807 let vendor_ids: Vec<String> = self
3809 .master_data
3810 .vendors
3811 .iter()
3812 .map(|v| v.vendor_id.clone())
3813 .collect();
3814 if vendor_ids.is_empty() {
3815 debug!("Phase 14: Skipped (no vendors available)");
3816 return Ok(SourcingSnapshot::default());
3817 }
3818
3819 let categories: Vec<(String, String)> = vec![
3820 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
3821 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
3822 ("CAT-IT".to_string(), "IT Equipment".to_string()),
3823 ("CAT-SVC".to_string(), "Professional Services".to_string()),
3824 ("CAT-LOG".to_string(), "Logistics".to_string()),
3825 ];
3826 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
3827 .iter()
3828 .map(|(id, name)| {
3829 (
3830 id.clone(),
3831 name.clone(),
3832 rust_decimal::Decimal::from(100_000),
3833 )
3834 })
3835 .collect();
3836
3837 let company_code = self
3838 .config
3839 .companies
3840 .first()
3841 .map(|c| c.code.as_str())
3842 .unwrap_or("1000");
3843 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3844 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3845 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3846 let fiscal_year = start_date.year() as u16;
3847 let owner_ids: Vec<String> = self
3848 .master_data
3849 .employees
3850 .iter()
3851 .take(5)
3852 .map(|e| e.employee_id.clone())
3853 .collect();
3854 let owner_id = owner_ids
3855 .first()
3856 .map(std::string::String::as_str)
3857 .unwrap_or("BUYER-001");
3858
3859 let mut spend_gen = SpendAnalysisGenerator::new(seed);
3861 let spend_analyses =
3862 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
3863
3864 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
3866 let sourcing_projects = if owner_ids.is_empty() {
3867 Vec::new()
3868 } else {
3869 project_gen.generate(
3870 company_code,
3871 &categories_with_spend,
3872 &owner_ids,
3873 start_date,
3874 self.config.global.period_months,
3875 )
3876 };
3877 stats.sourcing_project_count = sourcing_projects.len();
3878
3879 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
3881 let mut qual_gen = QualificationGenerator::new(seed + 2);
3882 let qualifications = qual_gen.generate(
3883 company_code,
3884 &qual_vendor_ids,
3885 sourcing_projects.first().map(|p| p.project_id.as_str()),
3886 owner_id,
3887 start_date,
3888 );
3889
3890 let mut rfx_gen = RfxGenerator::new(seed + 3);
3892 let rfx_events: Vec<RfxEvent> = sourcing_projects
3893 .iter()
3894 .map(|proj| {
3895 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
3896 rfx_gen.generate(
3897 company_code,
3898 &proj.project_id,
3899 &proj.category_id,
3900 &qualified_vids,
3901 owner_id,
3902 start_date,
3903 50000.0,
3904 )
3905 })
3906 .collect();
3907 stats.rfx_event_count = rfx_events.len();
3908
3909 let mut bid_gen = BidGenerator::new(seed + 4);
3911 let mut all_bids = Vec::new();
3912 for rfx in &rfx_events {
3913 let bidder_count = vendor_ids.len().clamp(2, 5);
3914 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
3915 let bids = bid_gen.generate(rfx, &responding, start_date);
3916 all_bids.extend(bids);
3917 }
3918 stats.bid_count = all_bids.len();
3919
3920 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
3922 let bid_evaluations: Vec<BidEvaluation> = rfx_events
3923 .iter()
3924 .map(|rfx| {
3925 let rfx_bids: Vec<SupplierBid> = all_bids
3926 .iter()
3927 .filter(|b| b.rfx_id == rfx.rfx_id)
3928 .cloned()
3929 .collect();
3930 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
3931 })
3932 .collect();
3933
3934 let mut contract_gen = ContractGenerator::new(seed + 6);
3936 let contracts: Vec<ProcurementContract> = bid_evaluations
3937 .iter()
3938 .zip(rfx_events.iter())
3939 .filter_map(|(eval, rfx)| {
3940 eval.ranked_bids.first().and_then(|winner| {
3941 all_bids
3942 .iter()
3943 .find(|b| b.bid_id == winner.bid_id)
3944 .map(|winning_bid| {
3945 contract_gen.generate_from_bid(
3946 winning_bid,
3947 Some(&rfx.sourcing_project_id),
3948 &rfx.category_id,
3949 owner_id,
3950 start_date,
3951 )
3952 })
3953 })
3954 })
3955 .collect();
3956 stats.contract_count = contracts.len();
3957
3958 let mut catalog_gen = CatalogGenerator::new(seed + 7);
3960 let catalog_items = catalog_gen.generate(&contracts);
3961 stats.catalog_item_count = catalog_items.len();
3962
3963 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
3965 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
3966 .iter()
3967 .fold(
3968 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
3969 |mut acc, c| {
3970 acc.entry(c.vendor_id.clone()).or_default().push(c);
3971 acc
3972 },
3973 )
3974 .into_iter()
3975 .collect();
3976 let scorecards = scorecard_gen.generate(
3977 company_code,
3978 &vendor_contracts,
3979 start_date,
3980 end_date,
3981 owner_id,
3982 );
3983 stats.scorecard_count = scorecards.len();
3984
3985 let mut sourcing_projects = sourcing_projects;
3988 for project in &mut sourcing_projects {
3989 project.rfx_ids = rfx_events
3991 .iter()
3992 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
3993 .map(|rfx| rfx.rfx_id.clone())
3994 .collect();
3995
3996 project.contract_id = contracts
3998 .iter()
3999 .find(|c| {
4000 c.sourcing_project_id
4001 .as_deref()
4002 .is_some_and(|sp| sp == project.project_id)
4003 })
4004 .map(|c| c.contract_id.clone());
4005
4006 project.spend_analysis_id = spend_analyses
4008 .iter()
4009 .find(|sa| sa.category_id == project.category_id)
4010 .map(|sa| sa.category_id.clone());
4011 }
4012
4013 info!(
4014 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4015 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4016 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4017 );
4018 self.check_resources_with_log("post-sourcing")?;
4019
4020 Ok(SourcingSnapshot {
4021 spend_analyses,
4022 sourcing_projects,
4023 qualifications,
4024 rfx_events,
4025 bids: all_bids,
4026 bid_evaluations,
4027 contracts,
4028 catalog_items,
4029 scorecards,
4030 })
4031 }
4032
4033 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4039 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4040
4041 let parent_code = self
4042 .config
4043 .companies
4044 .first()
4045 .map(|c| c.code.clone())
4046 .unwrap_or_else(|| "PARENT".to_string());
4047
4048 let mut group = GroupStructure::new(parent_code);
4049
4050 for company in self.config.companies.iter().skip(1) {
4051 let sub =
4052 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4053 group.add_subsidiary(sub);
4054 }
4055
4056 group
4057 }
4058
4059 fn phase_intercompany(
4061 &mut self,
4062 journal_entries: &[JournalEntry],
4063 stats: &mut EnhancedGenerationStatistics,
4064 ) -> SynthResult<IntercompanySnapshot> {
4065 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4067 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4068 return Ok(IntercompanySnapshot::default());
4069 }
4070
4071 if self.config.companies.len() < 2 {
4073 debug!(
4074 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4075 self.config.companies.len()
4076 );
4077 return Ok(IntercompanySnapshot::default());
4078 }
4079
4080 info!("Phase 14b: Generating Intercompany Transactions");
4081
4082 let group_structure = self.build_group_structure();
4085 debug!(
4086 "Group structure built: parent={}, subsidiaries={}",
4087 group_structure.parent_entity,
4088 group_structure.subsidiaries.len()
4089 );
4090
4091 let seed = self.seed;
4092 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4093 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4094 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4095
4096 let parent_code = self.config.companies[0].code.clone();
4099 let mut ownership_structure =
4100 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4101
4102 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4103 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4104 format!("REL{:03}", i + 1),
4105 parent_code.clone(),
4106 company.code.clone(),
4107 rust_decimal::Decimal::from(100), start_date,
4109 );
4110 ownership_structure.add_relationship(relationship);
4111 }
4112
4113 let tp_method = match self.config.intercompany.transfer_pricing_method {
4115 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4116 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4117 }
4118 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4119 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4120 }
4121 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4122 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4123 }
4124 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4125 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4126 }
4127 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4128 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4129 }
4130 };
4131
4132 let ic_currency = self
4134 .config
4135 .companies
4136 .first()
4137 .map(|c| c.currency.clone())
4138 .unwrap_or_else(|| "USD".to_string());
4139 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4140 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4141 transfer_pricing_method: tp_method,
4142 markup_percent: rust_decimal::Decimal::from_f64_retain(
4143 self.config.intercompany.markup_percent,
4144 )
4145 .unwrap_or(rust_decimal::Decimal::from(5)),
4146 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4147 default_currency: ic_currency,
4148 ..Default::default()
4149 };
4150
4151 let mut ic_generator = datasynth_generators::ICGenerator::new(
4153 ic_gen_config,
4154 ownership_structure.clone(),
4155 seed + 50,
4156 );
4157
4158 let transactions_per_day = 3;
4161 let matched_pairs = ic_generator.generate_transactions_for_period(
4162 start_date,
4163 end_date,
4164 transactions_per_day,
4165 );
4166
4167 let mut seller_entries = Vec::new();
4169 let mut buyer_entries = Vec::new();
4170 let fiscal_year = start_date.year();
4171
4172 for pair in &matched_pairs {
4173 let fiscal_period = pair.posting_date.month();
4174 let (seller_je, buyer_je) =
4175 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4176 seller_entries.push(seller_je);
4177 buyer_entries.push(buyer_je);
4178 }
4179
4180 let matching_config = datasynth_generators::ICMatchingConfig {
4182 base_currency: self
4183 .config
4184 .companies
4185 .first()
4186 .map(|c| c.currency.clone())
4187 .unwrap_or_else(|| "USD".to_string()),
4188 ..Default::default()
4189 };
4190 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4191 matching_engine.load_matched_pairs(&matched_pairs);
4192 let matching_result = matching_engine.run_matching(end_date);
4193
4194 let mut elimination_entries = Vec::new();
4196 if self.config.intercompany.generate_eliminations {
4197 let elim_config = datasynth_generators::EliminationConfig {
4198 consolidation_entity: "GROUP".to_string(),
4199 base_currency: self
4200 .config
4201 .companies
4202 .first()
4203 .map(|c| c.currency.clone())
4204 .unwrap_or_else(|| "USD".to_string()),
4205 ..Default::default()
4206 };
4207
4208 let mut elim_generator =
4209 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4210
4211 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4212 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4213 matching_result
4214 .matched_balances
4215 .iter()
4216 .chain(matching_result.unmatched_balances.iter())
4217 .cloned()
4218 .collect();
4219
4220 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4232 std::collections::HashMap::new();
4233 let mut equity_amounts: std::collections::HashMap<
4234 String,
4235 std::collections::HashMap<String, rust_decimal::Decimal>,
4236 > = std::collections::HashMap::new();
4237 {
4238 use rust_decimal::Decimal;
4239 let hundred = Decimal::from(100u32);
4240 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4244 for sub in &group_structure.subsidiaries {
4245 let net_assets = {
4246 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4247 if na > Decimal::ZERO {
4248 na
4249 } else {
4250 Decimal::from(1_000_000u64)
4251 }
4252 };
4253 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4255 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4256
4257 let mut eq_map = std::collections::HashMap::new();
4260 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4261 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4262 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4263 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4264 }
4265 }
4266
4267 let journal = elim_generator.generate_eliminations(
4268 &fiscal_period,
4269 end_date,
4270 &all_balances,
4271 &matched_pairs,
4272 &investment_amounts,
4273 &equity_amounts,
4274 );
4275
4276 elimination_entries = journal.entries.clone();
4277 }
4278
4279 let matched_pair_count = matched_pairs.len();
4280 let elimination_entry_count = elimination_entries.len();
4281 let match_rate = matching_result.match_rate;
4282
4283 stats.ic_matched_pair_count = matched_pair_count;
4284 stats.ic_elimination_count = elimination_entry_count;
4285 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4286
4287 info!(
4288 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4289 matched_pair_count,
4290 stats.ic_transaction_count,
4291 seller_entries.len(),
4292 buyer_entries.len(),
4293 elimination_entry_count,
4294 match_rate * 100.0
4295 );
4296 self.check_resources_with_log("post-intercompany")?;
4297
4298 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4302 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4303 use rust_decimal::Decimal;
4304
4305 let eight_pct = Decimal::new(8, 2); group_structure
4308 .subsidiaries
4309 .iter()
4310 .filter(|sub| {
4311 sub.nci_percentage > Decimal::ZERO
4312 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4313 })
4314 .map(|sub| {
4315 let net_assets_from_jes =
4319 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4320
4321 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4322 net_assets_from_jes.round_dp(2)
4323 } else {
4324 Decimal::from(1_000_000u64)
4326 };
4327
4328 let net_income = (net_assets * eight_pct).round_dp(2);
4330
4331 NciMeasurement::compute(
4332 sub.entity_code.clone(),
4333 sub.nci_percentage,
4334 net_assets,
4335 net_income,
4336 )
4337 })
4338 .collect()
4339 };
4340
4341 if !nci_measurements.is_empty() {
4342 info!(
4343 "NCI measurements: {} subsidiaries with non-controlling interests",
4344 nci_measurements.len()
4345 );
4346 }
4347
4348 Ok(IntercompanySnapshot {
4349 group_structure: Some(group_structure),
4350 matched_pairs,
4351 seller_journal_entries: seller_entries,
4352 buyer_journal_entries: buyer_entries,
4353 elimination_entries,
4354 nci_measurements,
4355 matched_pair_count,
4356 elimination_entry_count,
4357 match_rate,
4358 })
4359 }
4360
4361 fn phase_financial_reporting(
4363 &mut self,
4364 document_flows: &DocumentFlowSnapshot,
4365 journal_entries: &[JournalEntry],
4366 coa: &Arc<ChartOfAccounts>,
4367 _hr: &HrSnapshot,
4368 _audit: &AuditSnapshot,
4369 stats: &mut EnhancedGenerationStatistics,
4370 ) -> SynthResult<FinancialReportingSnapshot> {
4371 let fs_enabled = self.phase_config.generate_financial_statements
4372 || self.config.financial_reporting.enabled;
4373 let br_enabled = self.phase_config.generate_bank_reconciliation;
4374
4375 if !fs_enabled && !br_enabled {
4376 debug!("Phase 15: Skipped (financial reporting disabled)");
4377 return Ok(FinancialReportingSnapshot::default());
4378 }
4379
4380 info!("Phase 15: Generating Financial Reporting Data");
4381
4382 let seed = self.seed;
4383 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4384 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4385
4386 let mut financial_statements = Vec::new();
4387 let mut bank_reconciliations = Vec::new();
4388 let mut trial_balances = Vec::new();
4389 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4390 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4391 Vec::new();
4392 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4394 std::collections::HashMap::new();
4395 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4397 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4399
4400 if fs_enabled {
4408 let has_journal_entries = !journal_entries.is_empty();
4409
4410 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4413 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4415
4416 let elimination_entries: Vec<&JournalEntry> = journal_entries
4418 .iter()
4419 .filter(|je| je.header.is_elimination)
4420 .collect();
4421
4422 for period in 0..self.config.global.period_months {
4424 let period_start = start_date + chrono::Months::new(period);
4425 let period_end =
4426 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4427 let fiscal_year = period_end.year() as u16;
4428 let fiscal_period = period_end.month() as u8;
4429 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4430
4431 let mut entity_tb_map: std::collections::HashMap<
4434 String,
4435 std::collections::HashMap<String, rust_decimal::Decimal>,
4436 > = std::collections::HashMap::new();
4437
4438 for (company_idx, company) in self.config.companies.iter().enumerate() {
4440 let company_code = company.code.as_str();
4441 let currency = company.currency.as_str();
4442 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4445 let mut company_fs_gen =
4446 FinancialStatementGenerator::new(seed + company_seed_offset);
4447
4448 if has_journal_entries {
4449 let tb_entries = Self::build_cumulative_trial_balance(
4450 journal_entries,
4451 coa,
4452 company_code,
4453 start_date,
4454 period_end,
4455 fiscal_year,
4456 fiscal_period,
4457 );
4458
4459 let entity_cat_map =
4461 entity_tb_map.entry(company_code.to_string()).or_default();
4462 for tb_entry in &tb_entries {
4463 let net = tb_entry.debit_balance - tb_entry.credit_balance;
4464 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4465 }
4466
4467 let stmts = company_fs_gen.generate(
4468 company_code,
4469 currency,
4470 &tb_entries,
4471 period_start,
4472 period_end,
4473 fiscal_year,
4474 fiscal_period,
4475 None,
4476 "SYS-AUTOCLOSE",
4477 );
4478
4479 let mut entity_stmts = Vec::new();
4480 for stmt in stmts {
4481 if stmt.statement_type == StatementType::CashFlowStatement {
4482 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4483 let cf_items = Self::build_cash_flow_from_trial_balances(
4484 &tb_entries,
4485 None,
4486 net_income,
4487 );
4488 entity_stmts.push(FinancialStatement {
4489 cash_flow_items: cf_items,
4490 ..stmt
4491 });
4492 } else {
4493 entity_stmts.push(stmt);
4494 }
4495 }
4496
4497 financial_statements.extend(entity_stmts.clone());
4499
4500 standalone_statements
4502 .entry(company_code.to_string())
4503 .or_default()
4504 .extend(entity_stmts);
4505
4506 if company_idx == 0 {
4509 trial_balances.push(PeriodTrialBalance {
4510 fiscal_year,
4511 fiscal_period,
4512 period_start,
4513 period_end,
4514 entries: tb_entries,
4515 });
4516 }
4517 } else {
4518 let tb_entries = Self::build_trial_balance_from_entries(
4520 journal_entries,
4521 coa,
4522 company_code,
4523 fiscal_year,
4524 fiscal_period,
4525 );
4526
4527 let stmts = company_fs_gen.generate(
4528 company_code,
4529 currency,
4530 &tb_entries,
4531 period_start,
4532 period_end,
4533 fiscal_year,
4534 fiscal_period,
4535 None,
4536 "SYS-AUTOCLOSE",
4537 );
4538 financial_statements.extend(stmts.clone());
4539 standalone_statements
4540 .entry(company_code.to_string())
4541 .or_default()
4542 .extend(stmts);
4543
4544 if company_idx == 0 && !tb_entries.is_empty() {
4545 trial_balances.push(PeriodTrialBalance {
4546 fiscal_year,
4547 fiscal_period,
4548 period_start,
4549 period_end,
4550 entries: tb_entries,
4551 });
4552 }
4553 }
4554 }
4555
4556 let group_currency = self
4559 .config
4560 .companies
4561 .first()
4562 .map(|c| c.currency.as_str())
4563 .unwrap_or("USD");
4564
4565 let period_eliminations: Vec<JournalEntry> = elimination_entries
4567 .iter()
4568 .filter(|je| {
4569 je.header.fiscal_year == fiscal_year
4570 && je.header.fiscal_period == fiscal_period
4571 })
4572 .map(|je| (*je).clone())
4573 .collect();
4574
4575 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4576 &entity_tb_map,
4577 &period_eliminations,
4578 &period_label,
4579 );
4580
4581 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4584 .line_items
4585 .iter()
4586 .map(|li| {
4587 let net = li.post_elimination_total;
4588 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4589 (net, rust_decimal::Decimal::ZERO)
4590 } else {
4591 (rust_decimal::Decimal::ZERO, -net)
4592 };
4593 datasynth_generators::TrialBalanceEntry {
4594 account_code: li.account_category.clone(),
4595 account_name: li.account_category.clone(),
4596 category: li.account_category.clone(),
4597 debit_balance: debit,
4598 credit_balance: credit,
4599 }
4600 })
4601 .collect();
4602
4603 let mut cons_stmts = cons_gen.generate(
4604 "GROUP",
4605 group_currency,
4606 &cons_tb,
4607 period_start,
4608 period_end,
4609 fiscal_year,
4610 fiscal_period,
4611 None,
4612 "SYS-AUTOCLOSE",
4613 );
4614
4615 let bs_categories: &[&str] = &[
4619 "CASH",
4620 "RECEIVABLES",
4621 "INVENTORY",
4622 "FIXEDASSETS",
4623 "PAYABLES",
4624 "ACCRUEDLIABILITIES",
4625 "LONGTERMDEBT",
4626 "EQUITY",
4627 ];
4628 let (bs_items, is_items): (Vec<_>, Vec<_>) =
4629 cons_line_items.into_iter().partition(|li| {
4630 let upper = li.label.to_uppercase();
4631 bs_categories.iter().any(|c| upper == *c)
4632 });
4633
4634 for stmt in &mut cons_stmts {
4635 stmt.is_consolidated = true;
4636 match stmt.statement_type {
4637 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4638 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4639 _ => {} }
4641 }
4642
4643 consolidated_statements.extend(cons_stmts);
4644 consolidation_schedules.push(schedule);
4645 }
4646
4647 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
4653 info!(
4654 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4655 stats.financial_statement_count,
4656 consolidated_statements.len(),
4657 has_journal_entries
4658 );
4659
4660 let entity_seeds: Vec<SegmentSeed> = self
4665 .config
4666 .companies
4667 .iter()
4668 .map(|c| SegmentSeed {
4669 code: c.code.clone(),
4670 name: c.name.clone(),
4671 currency: c.currency.clone(),
4672 })
4673 .collect();
4674
4675 let mut seg_gen = SegmentGenerator::new(seed + 30);
4676
4677 for period in 0..self.config.global.period_months {
4682 let period_end =
4683 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4684 let fiscal_year = period_end.year() as u16;
4685 let fiscal_period = period_end.month() as u8;
4686 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4687
4688 use datasynth_core::models::StatementType;
4689
4690 let cons_is = consolidated_statements.iter().find(|s| {
4692 s.fiscal_year == fiscal_year
4693 && s.fiscal_period == fiscal_period
4694 && s.statement_type == StatementType::IncomeStatement
4695 });
4696 let cons_bs = consolidated_statements.iter().find(|s| {
4697 s.fiscal_year == fiscal_year
4698 && s.fiscal_period == fiscal_period
4699 && s.statement_type == StatementType::BalanceSheet
4700 });
4701
4702 let is_stmt = cons_is.or_else(|| {
4704 financial_statements.iter().find(|s| {
4705 s.fiscal_year == fiscal_year
4706 && s.fiscal_period == fiscal_period
4707 && s.statement_type == StatementType::IncomeStatement
4708 })
4709 });
4710 let bs_stmt = cons_bs.or_else(|| {
4711 financial_statements.iter().find(|s| {
4712 s.fiscal_year == fiscal_year
4713 && s.fiscal_period == fiscal_period
4714 && s.statement_type == StatementType::BalanceSheet
4715 })
4716 });
4717
4718 let consolidated_revenue = is_stmt
4719 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4720 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
4722
4723 let consolidated_profit = is_stmt
4724 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
4725 .map(|li| li.amount)
4726 .unwrap_or(rust_decimal::Decimal::ZERO);
4727
4728 let consolidated_assets = bs_stmt
4729 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
4730 .map(|li| li.amount)
4731 .unwrap_or(rust_decimal::Decimal::ZERO);
4732
4733 if consolidated_revenue == rust_decimal::Decimal::ZERO
4735 && consolidated_assets == rust_decimal::Decimal::ZERO
4736 {
4737 continue;
4738 }
4739
4740 let group_code = self
4741 .config
4742 .companies
4743 .first()
4744 .map(|c| c.code.as_str())
4745 .unwrap_or("GROUP");
4746
4747 let total_depr: rust_decimal::Decimal = journal_entries
4750 .iter()
4751 .filter(|je| je.header.document_type == "CL")
4752 .flat_map(|je| je.lines.iter())
4753 .filter(|l| l.gl_account.starts_with("6000"))
4754 .map(|l| l.debit_amount)
4755 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
4756 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
4757 Some(total_depr)
4758 } else {
4759 None
4760 };
4761
4762 let (segs, recon) = seg_gen.generate(
4763 group_code,
4764 &period_label,
4765 consolidated_revenue,
4766 consolidated_profit,
4767 consolidated_assets,
4768 &entity_seeds,
4769 depr_param,
4770 );
4771 segment_reports.extend(segs);
4772 segment_reconciliations.push(recon);
4773 }
4774
4775 info!(
4776 "Segment reports generated: {} segments, {} reconciliations",
4777 segment_reports.len(),
4778 segment_reconciliations.len()
4779 );
4780 }
4781
4782 if br_enabled && !document_flows.payments.is_empty() {
4784 let employee_ids: Vec<String> = self
4785 .master_data
4786 .employees
4787 .iter()
4788 .map(|e| e.employee_id.clone())
4789 .collect();
4790 let mut br_gen =
4791 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
4792
4793 for company in &self.config.companies {
4795 let company_payments: Vec<PaymentReference> = document_flows
4796 .payments
4797 .iter()
4798 .filter(|p| p.header.company_code == company.code)
4799 .map(|p| PaymentReference {
4800 id: p.header.document_id.clone(),
4801 amount: if p.is_vendor { p.amount } else { -p.amount },
4802 date: p.header.document_date,
4803 reference: p
4804 .check_number
4805 .clone()
4806 .or_else(|| p.wire_reference.clone())
4807 .unwrap_or_else(|| p.header.document_id.clone()),
4808 })
4809 .collect();
4810
4811 if company_payments.is_empty() {
4812 continue;
4813 }
4814
4815 let bank_account_id = format!("{}-MAIN", company.code);
4816
4817 for period in 0..self.config.global.period_months {
4819 let period_start = start_date + chrono::Months::new(period);
4820 let period_end =
4821 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4822
4823 let period_payments: Vec<PaymentReference> = company_payments
4824 .iter()
4825 .filter(|p| p.date >= period_start && p.date <= period_end)
4826 .cloned()
4827 .collect();
4828
4829 let recon = br_gen.generate(
4830 &company.code,
4831 &bank_account_id,
4832 period_start,
4833 period_end,
4834 &company.currency,
4835 &period_payments,
4836 );
4837 bank_reconciliations.push(recon);
4838 }
4839 }
4840 info!(
4841 "Bank reconciliations generated: {} reconciliations",
4842 bank_reconciliations.len()
4843 );
4844 }
4845
4846 stats.bank_reconciliation_count = bank_reconciliations.len();
4847 self.check_resources_with_log("post-financial-reporting")?;
4848
4849 if !trial_balances.is_empty() {
4850 info!(
4851 "Period-close trial balances captured: {} periods",
4852 trial_balances.len()
4853 );
4854 }
4855
4856 let notes_to_financial_statements = Vec::new();
4860
4861 Ok(FinancialReportingSnapshot {
4862 financial_statements,
4863 standalone_statements,
4864 consolidated_statements,
4865 consolidation_schedules,
4866 bank_reconciliations,
4867 trial_balances,
4868 segment_reports,
4869 segment_reconciliations,
4870 notes_to_financial_statements,
4871 })
4872 }
4873
4874 fn generate_notes_to_financial_statements(
4881 &self,
4882 financial_reporting: &mut FinancialReportingSnapshot,
4883 accounting_standards: &AccountingStandardsSnapshot,
4884 tax: &TaxSnapshot,
4885 hr: &HrSnapshot,
4886 audit: &AuditSnapshot,
4887 ) {
4888 use datasynth_config::schema::AccountingFrameworkConfig;
4889 use datasynth_core::models::StatementType;
4890 use datasynth_generators::period_close::notes_generator::{
4891 NotesGenerator, NotesGeneratorContext,
4892 };
4893
4894 let seed = self.seed;
4895 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4896 {
4897 Ok(d) => d,
4898 Err(_) => return,
4899 };
4900
4901 let mut notes_gen = NotesGenerator::new(seed + 4235);
4902
4903 for company in &self.config.companies {
4904 let last_period_end = start_date
4905 + chrono::Months::new(self.config.global.period_months)
4906 - chrono::Days::new(1);
4907 let fiscal_year = last_period_end.year() as u16;
4908
4909 let entity_is = financial_reporting
4911 .standalone_statements
4912 .get(&company.code)
4913 .and_then(|stmts| {
4914 stmts.iter().find(|s| {
4915 s.fiscal_year == fiscal_year
4916 && s.statement_type == StatementType::IncomeStatement
4917 })
4918 });
4919 let entity_bs = financial_reporting
4920 .standalone_statements
4921 .get(&company.code)
4922 .and_then(|stmts| {
4923 stmts.iter().find(|s| {
4924 s.fiscal_year == fiscal_year
4925 && s.statement_type == StatementType::BalanceSheet
4926 })
4927 });
4928
4929 let revenue_amount = entity_is
4931 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4932 .map(|li| li.amount);
4933 let ppe_gross = entity_bs
4934 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
4935 .map(|li| li.amount);
4936
4937 let framework = match self
4938 .config
4939 .accounting_standards
4940 .framework
4941 .unwrap_or_default()
4942 {
4943 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
4944 "IFRS".to_string()
4945 }
4946 _ => "US GAAP".to_string(),
4947 };
4948
4949 let (entity_dta, entity_dtl) = {
4952 let mut dta = rust_decimal::Decimal::ZERO;
4953 let mut dtl = rust_decimal::Decimal::ZERO;
4954 for rf in &tax.deferred_tax.rollforwards {
4955 if rf.entity_code == company.code {
4956 dta += rf.closing_dta;
4957 dtl += rf.closing_dtl;
4958 }
4959 }
4960 (
4961 if dta > rust_decimal::Decimal::ZERO {
4962 Some(dta)
4963 } else {
4964 None
4965 },
4966 if dtl > rust_decimal::Decimal::ZERO {
4967 Some(dtl)
4968 } else {
4969 None
4970 },
4971 )
4972 };
4973
4974 let entity_provisions: Vec<_> = accounting_standards
4977 .provisions
4978 .iter()
4979 .filter(|p| p.entity_code == company.code)
4980 .collect();
4981 let provision_count = entity_provisions.len();
4982 let total_provisions = if provision_count > 0 {
4983 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
4984 } else {
4985 None
4986 };
4987
4988 let entity_pension_plan_count = hr
4990 .pension_plans
4991 .iter()
4992 .filter(|p| p.entity_code == company.code)
4993 .count();
4994 let entity_total_dbo: Option<rust_decimal::Decimal> = {
4995 let sum: rust_decimal::Decimal = hr
4996 .pension_disclosures
4997 .iter()
4998 .filter(|d| {
4999 hr.pension_plans
5000 .iter()
5001 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5002 })
5003 .map(|d| d.net_pension_liability)
5004 .sum();
5005 let plan_assets_sum: rust_decimal::Decimal = hr
5006 .pension_plan_assets
5007 .iter()
5008 .filter(|a| {
5009 hr.pension_plans
5010 .iter()
5011 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5012 })
5013 .map(|a| a.fair_value_closing)
5014 .sum();
5015 if entity_pension_plan_count > 0 {
5016 Some(sum + plan_assets_sum)
5017 } else {
5018 None
5019 }
5020 };
5021 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5022 let sum: rust_decimal::Decimal = hr
5023 .pension_plan_assets
5024 .iter()
5025 .filter(|a| {
5026 hr.pension_plans
5027 .iter()
5028 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5029 })
5030 .map(|a| a.fair_value_closing)
5031 .sum();
5032 if entity_pension_plan_count > 0 {
5033 Some(sum)
5034 } else {
5035 None
5036 }
5037 };
5038
5039 let rp_count = audit.related_party_transactions.len();
5042 let se_count = audit.subsequent_events.len();
5043 let adjusting_count = audit
5044 .subsequent_events
5045 .iter()
5046 .filter(|e| {
5047 matches!(
5048 e.classification,
5049 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5050 )
5051 })
5052 .count();
5053
5054 let ctx = NotesGeneratorContext {
5055 entity_code: company.code.clone(),
5056 framework,
5057 period: format!("FY{}", fiscal_year),
5058 period_end: last_period_end,
5059 currency: company.currency.clone(),
5060 revenue_amount,
5061 total_ppe_gross: ppe_gross,
5062 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5063 deferred_tax_asset: entity_dta,
5065 deferred_tax_liability: entity_dtl,
5066 provision_count,
5068 total_provisions,
5069 pension_plan_count: entity_pension_plan_count,
5071 total_dbo: entity_total_dbo,
5072 total_plan_assets: entity_total_plan_assets,
5073 related_party_transaction_count: rp_count,
5075 subsequent_event_count: se_count,
5076 adjusting_event_count: adjusting_count,
5077 ..NotesGeneratorContext::default()
5078 };
5079
5080 let entity_notes = notes_gen.generate(&ctx);
5081 info!(
5082 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5083 company.code,
5084 entity_notes.len(),
5085 entity_dta,
5086 entity_dtl,
5087 provision_count,
5088 );
5089 financial_reporting
5090 .notes_to_financial_statements
5091 .extend(entity_notes);
5092 }
5093 }
5094
5095 fn build_trial_balance_from_entries(
5101 journal_entries: &[JournalEntry],
5102 coa: &ChartOfAccounts,
5103 company_code: &str,
5104 fiscal_year: u16,
5105 fiscal_period: u8,
5106 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5107 use rust_decimal::Decimal;
5108
5109 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5111 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5112
5113 for je in journal_entries {
5114 if je.header.company_code != company_code
5116 || je.header.fiscal_year != fiscal_year
5117 || je.header.fiscal_period != fiscal_period
5118 {
5119 continue;
5120 }
5121
5122 for line in &je.lines {
5123 let acct = &line.gl_account;
5124 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5125 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5126 }
5127 }
5128
5129 let mut all_accounts: Vec<&String> = account_debits
5131 .keys()
5132 .chain(account_credits.keys())
5133 .collect::<std::collections::HashSet<_>>()
5134 .into_iter()
5135 .collect();
5136 all_accounts.sort();
5137
5138 let mut entries = Vec::new();
5139
5140 for acct_number in all_accounts {
5141 let debit = account_debits
5142 .get(acct_number)
5143 .copied()
5144 .unwrap_or(Decimal::ZERO);
5145 let credit = account_credits
5146 .get(acct_number)
5147 .copied()
5148 .unwrap_or(Decimal::ZERO);
5149
5150 if debit.is_zero() && credit.is_zero() {
5151 continue;
5152 }
5153
5154 let account_name = coa
5156 .get_account(acct_number)
5157 .map(|gl| gl.short_description.clone())
5158 .unwrap_or_else(|| format!("Account {acct_number}"));
5159
5160 let category = Self::category_from_account_code(acct_number);
5165
5166 entries.push(datasynth_generators::TrialBalanceEntry {
5167 account_code: acct_number.clone(),
5168 account_name,
5169 category,
5170 debit_balance: debit,
5171 credit_balance: credit,
5172 });
5173 }
5174
5175 entries
5176 }
5177
5178 fn build_cumulative_trial_balance(
5185 journal_entries: &[JournalEntry],
5186 coa: &ChartOfAccounts,
5187 company_code: &str,
5188 start_date: NaiveDate,
5189 period_end: NaiveDate,
5190 fiscal_year: u16,
5191 fiscal_period: u8,
5192 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5193 use rust_decimal::Decimal;
5194
5195 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5197 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5198
5199 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5201 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5202
5203 for je in journal_entries {
5204 if je.header.company_code != company_code {
5205 continue;
5206 }
5207
5208 for line in &je.lines {
5209 let acct = &line.gl_account;
5210 let category = Self::category_from_account_code(acct);
5211 let is_bs_account = matches!(
5212 category.as_str(),
5213 "Cash"
5214 | "Receivables"
5215 | "Inventory"
5216 | "FixedAssets"
5217 | "Payables"
5218 | "AccruedLiabilities"
5219 | "LongTermDebt"
5220 | "Equity"
5221 );
5222
5223 if is_bs_account {
5224 if je.header.document_date <= period_end
5226 && je.header.document_date >= start_date
5227 {
5228 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5229 line.debit_amount;
5230 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5231 line.credit_amount;
5232 }
5233 } else {
5234 if je.header.fiscal_year == fiscal_year
5236 && je.header.fiscal_period == fiscal_period
5237 {
5238 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5239 line.debit_amount;
5240 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5241 line.credit_amount;
5242 }
5243 }
5244 }
5245 }
5246
5247 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5249 all_accounts.extend(bs_debits.keys().cloned());
5250 all_accounts.extend(bs_credits.keys().cloned());
5251 all_accounts.extend(is_debits.keys().cloned());
5252 all_accounts.extend(is_credits.keys().cloned());
5253
5254 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5255 sorted_accounts.sort();
5256
5257 let mut entries = Vec::new();
5258
5259 for acct_number in &sorted_accounts {
5260 let category = Self::category_from_account_code(acct_number);
5261 let is_bs_account = matches!(
5262 category.as_str(),
5263 "Cash"
5264 | "Receivables"
5265 | "Inventory"
5266 | "FixedAssets"
5267 | "Payables"
5268 | "AccruedLiabilities"
5269 | "LongTermDebt"
5270 | "Equity"
5271 );
5272
5273 let (debit, credit) = if is_bs_account {
5274 (
5275 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5276 bs_credits
5277 .get(acct_number)
5278 .copied()
5279 .unwrap_or(Decimal::ZERO),
5280 )
5281 } else {
5282 (
5283 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5284 is_credits
5285 .get(acct_number)
5286 .copied()
5287 .unwrap_or(Decimal::ZERO),
5288 )
5289 };
5290
5291 if debit.is_zero() && credit.is_zero() {
5292 continue;
5293 }
5294
5295 let account_name = coa
5296 .get_account(acct_number)
5297 .map(|gl| gl.short_description.clone())
5298 .unwrap_or_else(|| format!("Account {acct_number}"));
5299
5300 entries.push(datasynth_generators::TrialBalanceEntry {
5301 account_code: acct_number.clone(),
5302 account_name,
5303 category,
5304 debit_balance: debit,
5305 credit_balance: credit,
5306 });
5307 }
5308
5309 entries
5310 }
5311
5312 fn build_cash_flow_from_trial_balances(
5317 current_tb: &[datasynth_generators::TrialBalanceEntry],
5318 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5319 net_income: rust_decimal::Decimal,
5320 ) -> Vec<CashFlowItem> {
5321 use rust_decimal::Decimal;
5322
5323 let aggregate =
5325 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5326 let mut map: HashMap<String, Decimal> = HashMap::new();
5327 for entry in tb {
5328 let net = entry.debit_balance - entry.credit_balance;
5329 *map.entry(entry.category.clone()).or_default() += net;
5330 }
5331 map
5332 };
5333
5334 let current = aggregate(current_tb);
5335 let prior = prior_tb.map(aggregate);
5336
5337 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5339 *map.get(key).unwrap_or(&Decimal::ZERO)
5340 };
5341
5342 let change = |key: &str| -> Decimal {
5344 let curr = get(¤t, key);
5345 match &prior {
5346 Some(p) => curr - get(p, key),
5347 None => curr,
5348 }
5349 };
5350
5351 let fixed_asset_change = change("FixedAssets");
5354 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5355 -fixed_asset_change
5356 } else {
5357 Decimal::ZERO
5358 };
5359
5360 let ar_change = change("Receivables");
5362 let inventory_change = change("Inventory");
5363 let ap_change = change("Payables");
5365 let accrued_change = change("AccruedLiabilities");
5366
5367 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5368 + (-ap_change)
5369 + (-accrued_change);
5370
5371 let capex = if fixed_asset_change > Decimal::ZERO {
5373 -fixed_asset_change
5374 } else {
5375 Decimal::ZERO
5376 };
5377 let investing_cf = capex;
5378
5379 let debt_change = -change("LongTermDebt");
5381 let equity_change = -change("Equity");
5382 let financing_cf = debt_change + equity_change;
5383
5384 let net_change = operating_cf + investing_cf + financing_cf;
5385
5386 vec![
5387 CashFlowItem {
5388 item_code: "CF-NI".to_string(),
5389 label: "Net Income".to_string(),
5390 category: CashFlowCategory::Operating,
5391 amount: net_income,
5392 amount_prior: None,
5393 sort_order: 1,
5394 is_total: false,
5395 },
5396 CashFlowItem {
5397 item_code: "CF-DEP".to_string(),
5398 label: "Depreciation & Amortization".to_string(),
5399 category: CashFlowCategory::Operating,
5400 amount: depreciation_addback,
5401 amount_prior: None,
5402 sort_order: 2,
5403 is_total: false,
5404 },
5405 CashFlowItem {
5406 item_code: "CF-AR".to_string(),
5407 label: "Change in Accounts Receivable".to_string(),
5408 category: CashFlowCategory::Operating,
5409 amount: -ar_change,
5410 amount_prior: None,
5411 sort_order: 3,
5412 is_total: false,
5413 },
5414 CashFlowItem {
5415 item_code: "CF-AP".to_string(),
5416 label: "Change in Accounts Payable".to_string(),
5417 category: CashFlowCategory::Operating,
5418 amount: -ap_change,
5419 amount_prior: None,
5420 sort_order: 4,
5421 is_total: false,
5422 },
5423 CashFlowItem {
5424 item_code: "CF-INV".to_string(),
5425 label: "Change in Inventory".to_string(),
5426 category: CashFlowCategory::Operating,
5427 amount: -inventory_change,
5428 amount_prior: None,
5429 sort_order: 5,
5430 is_total: false,
5431 },
5432 CashFlowItem {
5433 item_code: "CF-OP".to_string(),
5434 label: "Net Cash from Operating Activities".to_string(),
5435 category: CashFlowCategory::Operating,
5436 amount: operating_cf,
5437 amount_prior: None,
5438 sort_order: 6,
5439 is_total: true,
5440 },
5441 CashFlowItem {
5442 item_code: "CF-CAPEX".to_string(),
5443 label: "Capital Expenditures".to_string(),
5444 category: CashFlowCategory::Investing,
5445 amount: capex,
5446 amount_prior: None,
5447 sort_order: 7,
5448 is_total: false,
5449 },
5450 CashFlowItem {
5451 item_code: "CF-INV-T".to_string(),
5452 label: "Net Cash from Investing Activities".to_string(),
5453 category: CashFlowCategory::Investing,
5454 amount: investing_cf,
5455 amount_prior: None,
5456 sort_order: 8,
5457 is_total: true,
5458 },
5459 CashFlowItem {
5460 item_code: "CF-DEBT".to_string(),
5461 label: "Net Borrowings / (Repayments)".to_string(),
5462 category: CashFlowCategory::Financing,
5463 amount: debt_change,
5464 amount_prior: None,
5465 sort_order: 9,
5466 is_total: false,
5467 },
5468 CashFlowItem {
5469 item_code: "CF-EQ".to_string(),
5470 label: "Equity Changes".to_string(),
5471 category: CashFlowCategory::Financing,
5472 amount: equity_change,
5473 amount_prior: None,
5474 sort_order: 10,
5475 is_total: false,
5476 },
5477 CashFlowItem {
5478 item_code: "CF-FIN-T".to_string(),
5479 label: "Net Cash from Financing Activities".to_string(),
5480 category: CashFlowCategory::Financing,
5481 amount: financing_cf,
5482 amount_prior: None,
5483 sort_order: 11,
5484 is_total: true,
5485 },
5486 CashFlowItem {
5487 item_code: "CF-NET".to_string(),
5488 label: "Net Change in Cash".to_string(),
5489 category: CashFlowCategory::Operating,
5490 amount: net_change,
5491 amount_prior: None,
5492 sort_order: 12,
5493 is_total: true,
5494 },
5495 ]
5496 }
5497
5498 fn calculate_net_income_from_tb(
5502 tb: &[datasynth_generators::TrialBalanceEntry],
5503 ) -> rust_decimal::Decimal {
5504 use rust_decimal::Decimal;
5505
5506 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5507 for entry in tb {
5508 let net = entry.debit_balance - entry.credit_balance;
5509 *aggregated.entry(entry.category.clone()).or_default() += net;
5510 }
5511
5512 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5513 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5514 let opex = *aggregated
5515 .get("OperatingExpenses")
5516 .unwrap_or(&Decimal::ZERO);
5517 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5518 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5519
5520 let operating_income = revenue - cogs - opex - other_expenses - other_income;
5523 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
5525 operating_income - tax
5526 }
5527
5528 fn category_from_account_code(code: &str) -> String {
5535 let prefix: String = code.chars().take(2).collect();
5536 match prefix.as_str() {
5537 "10" => "Cash",
5538 "11" => "Receivables",
5539 "12" | "13" | "14" => "Inventory",
5540 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5541 "20" => "Payables",
5542 "21" | "22" | "23" | "24" => "AccruedLiabilities",
5543 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5544 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5545 "40" | "41" | "42" | "43" | "44" => "Revenue",
5546 "50" | "51" | "52" => "CostOfSales",
5547 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5548 "OperatingExpenses"
5549 }
5550 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5551 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5552 _ => "OperatingExpenses",
5553 }
5554 .to_string()
5555 }
5556
5557 fn phase_hr_data(
5559 &mut self,
5560 stats: &mut EnhancedGenerationStatistics,
5561 ) -> SynthResult<HrSnapshot> {
5562 if !self.phase_config.generate_hr {
5563 debug!("Phase 16: Skipped (HR generation disabled)");
5564 return Ok(HrSnapshot::default());
5565 }
5566
5567 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5568
5569 let seed = self.seed;
5570 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5571 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5572 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5573 let company_code = self
5574 .config
5575 .companies
5576 .first()
5577 .map(|c| c.code.as_str())
5578 .unwrap_or("1000");
5579 let currency = self
5580 .config
5581 .companies
5582 .first()
5583 .map(|c| c.currency.as_str())
5584 .unwrap_or("USD");
5585
5586 let employee_ids: Vec<String> = self
5587 .master_data
5588 .employees
5589 .iter()
5590 .map(|e| e.employee_id.clone())
5591 .collect();
5592
5593 if employee_ids.is_empty() {
5594 debug!("Phase 16: Skipped (no employees available)");
5595 return Ok(HrSnapshot::default());
5596 }
5597
5598 let cost_center_ids: Vec<String> = self
5601 .master_data
5602 .employees
5603 .iter()
5604 .filter_map(|e| e.cost_center.clone())
5605 .collect::<std::collections::HashSet<_>>()
5606 .into_iter()
5607 .collect();
5608
5609 let mut snapshot = HrSnapshot::default();
5610
5611 if self.config.hr.payroll.enabled {
5613 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
5614 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5615
5616 let payroll_pack = self.primary_pack();
5618
5619 payroll_gen.set_country_pack(payroll_pack.clone());
5622
5623 let employees_with_salary: Vec<(
5624 String,
5625 rust_decimal::Decimal,
5626 Option<String>,
5627 Option<String>,
5628 )> = self
5629 .master_data
5630 .employees
5631 .iter()
5632 .map(|e| {
5633 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
5636 e.base_salary
5637 } else {
5638 rust_decimal::Decimal::from(60_000)
5639 };
5640 (
5641 e.employee_id.clone(),
5642 annual, e.cost_center.clone(),
5644 e.department_id.clone(),
5645 )
5646 })
5647 .collect();
5648
5649 for month in 0..self.config.global.period_months {
5650 let period_start = start_date + chrono::Months::new(month);
5651 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
5652 let (run, items) = payroll_gen.generate(
5653 company_code,
5654 &employees_with_salary,
5655 period_start,
5656 period_end,
5657 currency,
5658 );
5659 snapshot.payroll_runs.push(run);
5660 snapshot.payroll_run_count += 1;
5661 snapshot.payroll_line_item_count += items.len();
5662 snapshot.payroll_line_items.extend(items);
5663 }
5664 }
5665
5666 if self.config.hr.time_attendance.enabled {
5668 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
5669 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5670 let entries = time_gen.generate(
5671 &employee_ids,
5672 start_date,
5673 end_date,
5674 &self.config.hr.time_attendance,
5675 );
5676 snapshot.time_entry_count = entries.len();
5677 snapshot.time_entries = entries;
5678 }
5679
5680 if self.config.hr.expenses.enabled {
5682 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
5683 .with_pools(employee_ids.clone(), cost_center_ids.clone());
5684 expense_gen.set_country_pack(self.primary_pack().clone());
5685 let company_currency = self
5686 .config
5687 .companies
5688 .first()
5689 .map(|c| c.currency.as_str())
5690 .unwrap_or("USD");
5691 let reports = expense_gen.generate_with_currency(
5692 &employee_ids,
5693 start_date,
5694 end_date,
5695 &self.config.hr.expenses,
5696 company_currency,
5697 );
5698 snapshot.expense_report_count = reports.len();
5699 snapshot.expense_reports = reports;
5700 }
5701
5702 if self.config.hr.payroll.enabled {
5704 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
5705 let employee_pairs: Vec<(String, String)> = self
5706 .master_data
5707 .employees
5708 .iter()
5709 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
5710 .collect();
5711 let enrollments =
5712 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
5713 snapshot.benefit_enrollment_count = enrollments.len();
5714 snapshot.benefit_enrollments = enrollments;
5715 }
5716
5717 if self.phase_config.generate_hr {
5719 let entity_name = self
5720 .config
5721 .companies
5722 .first()
5723 .map(|c| c.name.as_str())
5724 .unwrap_or("Entity");
5725 let period_months = self.config.global.period_months;
5726 let period_label = {
5727 let y = start_date.year();
5728 let m = start_date.month();
5729 if period_months >= 12 {
5730 format!("FY{y}")
5731 } else {
5732 format!("{y}-{m:02}")
5733 }
5734 };
5735 let reporting_date =
5736 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5737
5738 let avg_salary: Option<rust_decimal::Decimal> = {
5743 let employee_count = employee_ids.len();
5744 if self.config.hr.payroll.enabled
5745 && employee_count > 0
5746 && !snapshot.payroll_runs.is_empty()
5747 {
5748 let total_gross: rust_decimal::Decimal = snapshot
5750 .payroll_runs
5751 .iter()
5752 .filter(|r| r.company_code == company_code)
5753 .map(|r| r.total_gross)
5754 .sum();
5755 if total_gross > rust_decimal::Decimal::ZERO {
5756 let annual_total = if period_months > 0 && period_months < 12 {
5758 total_gross * rust_decimal::Decimal::from(12u32)
5759 / rust_decimal::Decimal::from(period_months)
5760 } else {
5761 total_gross
5762 };
5763 Some(
5764 (annual_total / rust_decimal::Decimal::from(employee_count))
5765 .round_dp(2),
5766 )
5767 } else {
5768 None
5769 }
5770 } else {
5771 None
5772 }
5773 };
5774
5775 let mut pension_gen =
5776 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
5777 let pension_snap = pension_gen.generate(
5778 company_code,
5779 entity_name,
5780 &period_label,
5781 reporting_date,
5782 employee_ids.len(),
5783 currency,
5784 avg_salary,
5785 period_months,
5786 );
5787 snapshot.pension_plan_count = pension_snap.plans.len();
5788 snapshot.pension_plans = pension_snap.plans;
5789 snapshot.pension_obligations = pension_snap.obligations;
5790 snapshot.pension_plan_assets = pension_snap.plan_assets;
5791 snapshot.pension_disclosures = pension_snap.disclosures;
5792 snapshot.pension_journal_entries = pension_snap.journal_entries;
5797 }
5798
5799 if self.phase_config.generate_hr && !employee_ids.is_empty() {
5801 let period_months = self.config.global.period_months;
5802 let period_label = {
5803 let y = start_date.year();
5804 let m = start_date.month();
5805 if period_months >= 12 {
5806 format!("FY{y}")
5807 } else {
5808 format!("{y}-{m:02}")
5809 }
5810 };
5811 let reporting_date =
5812 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5813
5814 let mut stock_comp_gen =
5815 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
5816 let stock_snap = stock_comp_gen.generate(
5817 company_code,
5818 &employee_ids,
5819 start_date,
5820 &period_label,
5821 reporting_date,
5822 currency,
5823 );
5824 snapshot.stock_grant_count = stock_snap.grants.len();
5825 snapshot.stock_grants = stock_snap.grants;
5826 snapshot.stock_comp_expenses = stock_snap.expenses;
5827 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
5828 }
5829
5830 stats.payroll_run_count = snapshot.payroll_run_count;
5831 stats.time_entry_count = snapshot.time_entry_count;
5832 stats.expense_report_count = snapshot.expense_report_count;
5833 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
5834 stats.pension_plan_count = snapshot.pension_plan_count;
5835 stats.stock_grant_count = snapshot.stock_grant_count;
5836
5837 info!(
5838 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
5839 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
5840 snapshot.time_entry_count, snapshot.expense_report_count,
5841 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
5842 snapshot.stock_grant_count
5843 );
5844 self.check_resources_with_log("post-hr")?;
5845
5846 Ok(snapshot)
5847 }
5848
5849 fn phase_accounting_standards(
5851 &mut self,
5852 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
5853 journal_entries: &[JournalEntry],
5854 stats: &mut EnhancedGenerationStatistics,
5855 ) -> SynthResult<AccountingStandardsSnapshot> {
5856 if !self.phase_config.generate_accounting_standards {
5857 debug!("Phase 17: Skipped (accounting standards generation disabled)");
5858 return Ok(AccountingStandardsSnapshot::default());
5859 }
5860 info!("Phase 17: Generating Accounting Standards Data");
5861
5862 let seed = self.seed;
5863 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5864 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5865 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5866 let company_code = self
5867 .config
5868 .companies
5869 .first()
5870 .map(|c| c.code.as_str())
5871 .unwrap_or("1000");
5872 let currency = self
5873 .config
5874 .companies
5875 .first()
5876 .map(|c| c.currency.as_str())
5877 .unwrap_or("USD");
5878
5879 let framework = match self.config.accounting_standards.framework {
5884 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
5885 datasynth_standards::framework::AccountingFramework::UsGaap
5886 }
5887 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
5888 datasynth_standards::framework::AccountingFramework::Ifrs
5889 }
5890 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
5891 datasynth_standards::framework::AccountingFramework::DualReporting
5892 }
5893 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
5894 datasynth_standards::framework::AccountingFramework::FrenchGaap
5895 }
5896 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
5897 datasynth_standards::framework::AccountingFramework::GermanGaap
5898 }
5899 None => {
5900 let pack = self.primary_pack();
5902 let pack_fw = pack.accounting.framework.as_str();
5903 match pack_fw {
5904 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
5905 "dual_reporting" => {
5906 datasynth_standards::framework::AccountingFramework::DualReporting
5907 }
5908 "french_gaap" => {
5909 datasynth_standards::framework::AccountingFramework::FrenchGaap
5910 }
5911 "german_gaap" | "hgb" => {
5912 datasynth_standards::framework::AccountingFramework::GermanGaap
5913 }
5914 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
5916 }
5917 }
5918 };
5919
5920 let mut snapshot = AccountingStandardsSnapshot::default();
5921
5922 if self.config.accounting_standards.revenue_recognition.enabled {
5924 let customer_ids: Vec<String> = self
5925 .master_data
5926 .customers
5927 .iter()
5928 .map(|c| c.customer_id.clone())
5929 .collect();
5930
5931 if !customer_ids.is_empty() {
5932 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
5933 let contracts = rev_gen.generate(
5934 company_code,
5935 &customer_ids,
5936 start_date,
5937 end_date,
5938 currency,
5939 &self.config.accounting_standards.revenue_recognition,
5940 framework,
5941 );
5942 snapshot.revenue_contract_count = contracts.len();
5943 snapshot.contracts = contracts;
5944 }
5945 }
5946
5947 if self.config.accounting_standards.impairment.enabled {
5949 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
5950 .master_data
5951 .assets
5952 .iter()
5953 .map(|a| {
5954 (
5955 a.asset_id.clone(),
5956 a.description.clone(),
5957 a.acquisition_cost,
5958 )
5959 })
5960 .collect();
5961
5962 if !asset_data.is_empty() {
5963 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
5964 let tests = imp_gen.generate(
5965 company_code,
5966 &asset_data,
5967 end_date,
5968 &self.config.accounting_standards.impairment,
5969 framework,
5970 );
5971 snapshot.impairment_test_count = tests.len();
5972 snapshot.impairment_tests = tests;
5973 }
5974 }
5975
5976 if self
5978 .config
5979 .accounting_standards
5980 .business_combinations
5981 .enabled
5982 {
5983 let bc_config = &self.config.accounting_standards.business_combinations;
5984 let framework_str = match framework {
5985 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5986 _ => "US_GAAP",
5987 };
5988 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
5989 let bc_snap = bc_gen.generate(
5990 company_code,
5991 currency,
5992 start_date,
5993 end_date,
5994 bc_config.acquisition_count,
5995 framework_str,
5996 );
5997 snapshot.business_combination_count = bc_snap.combinations.len();
5998 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
5999 snapshot.business_combinations = bc_snap.combinations;
6000 }
6001
6002 if self
6004 .config
6005 .accounting_standards
6006 .expected_credit_loss
6007 .enabled
6008 {
6009 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6010 let framework_str = match framework {
6011 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6012 _ => "ASC_326",
6013 };
6014
6015 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6018
6019 let mut ecl_gen = EclGenerator::new(seed + 43);
6020
6021 let bucket_exposures: Vec<(
6023 datasynth_core::models::subledger::ar::AgingBucket,
6024 rust_decimal::Decimal,
6025 )> = if ar_aging_reports.is_empty() {
6026 use datasynth_core::models::subledger::ar::AgingBucket;
6028 vec![
6029 (
6030 AgingBucket::Current,
6031 rust_decimal::Decimal::from(500_000_u32),
6032 ),
6033 (
6034 AgingBucket::Days1To30,
6035 rust_decimal::Decimal::from(120_000_u32),
6036 ),
6037 (
6038 AgingBucket::Days31To60,
6039 rust_decimal::Decimal::from(45_000_u32),
6040 ),
6041 (
6042 AgingBucket::Days61To90,
6043 rust_decimal::Decimal::from(15_000_u32),
6044 ),
6045 (
6046 AgingBucket::Over90Days,
6047 rust_decimal::Decimal::from(8_000_u32),
6048 ),
6049 ]
6050 } else {
6051 use datasynth_core::models::subledger::ar::AgingBucket;
6052 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6054 std::collections::HashMap::new();
6055 for report in ar_aging_reports {
6056 for (bucket, amount) in &report.bucket_totals {
6057 *totals.entry(*bucket).or_default() += amount;
6058 }
6059 }
6060 AgingBucket::all()
6061 .into_iter()
6062 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6063 .collect()
6064 };
6065
6066 let ecl_snap = ecl_gen.generate(
6067 company_code,
6068 end_date,
6069 &bucket_exposures,
6070 ecl_config,
6071 &period_label,
6072 framework_str,
6073 );
6074
6075 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6076 snapshot.ecl_models = ecl_snap.ecl_models;
6077 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6078 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6079 }
6080
6081 {
6083 let framework_str = match framework {
6084 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6085 _ => "US_GAAP",
6086 };
6087
6088 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6093 .max(rust_decimal::Decimal::from(100_000_u32));
6094
6095 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6096
6097 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6098 let prov_snap = prov_gen.generate(
6099 company_code,
6100 currency,
6101 revenue_proxy,
6102 end_date,
6103 &period_label,
6104 framework_str,
6105 None, );
6107
6108 snapshot.provision_count = prov_snap.provisions.len();
6109 snapshot.provisions = prov_snap.provisions;
6110 snapshot.provision_movements = prov_snap.movements;
6111 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6112 snapshot.provision_journal_entries = prov_snap.journal_entries;
6113 }
6114
6115 {
6119 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6120
6121 let presentation_currency = self
6122 .config
6123 .global
6124 .presentation_currency
6125 .clone()
6126 .unwrap_or_else(|| self.config.global.group_currency.clone());
6127
6128 let mut rate_table = FxRateTable::new(&presentation_currency);
6131
6132 let base_rates = base_rates_usd();
6136 for (ccy, rate) in &base_rates {
6137 rate_table.add_rate(FxRate::new(
6138 ccy,
6139 "USD",
6140 RateType::Closing,
6141 end_date,
6142 *rate,
6143 "SYNTHETIC",
6144 ));
6145 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6148 rate_table.add_rate(FxRate::new(
6149 ccy,
6150 "USD",
6151 RateType::Average,
6152 end_date,
6153 avg,
6154 "SYNTHETIC",
6155 ));
6156 }
6157
6158 let mut translation_results = Vec::new();
6159 for company in &self.config.companies {
6160 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6163 .max(rust_decimal::Decimal::from(100_000_u32));
6164
6165 let func_ccy = company
6166 .functional_currency
6167 .clone()
6168 .unwrap_or_else(|| company.currency.clone());
6169
6170 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6171 &company.code,
6172 &func_ccy,
6173 &presentation_currency,
6174 &ias21_period_label,
6175 end_date,
6176 company_revenue,
6177 &rate_table,
6178 );
6179 translation_results.push(result);
6180 }
6181
6182 snapshot.currency_translation_count = translation_results.len();
6183 snapshot.currency_translation_results = translation_results;
6184 }
6185
6186 stats.revenue_contract_count = snapshot.revenue_contract_count;
6187 stats.impairment_test_count = snapshot.impairment_test_count;
6188 stats.business_combination_count = snapshot.business_combination_count;
6189 stats.ecl_model_count = snapshot.ecl_model_count;
6190 stats.provision_count = snapshot.provision_count;
6191
6192 info!(
6193 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6194 snapshot.revenue_contract_count,
6195 snapshot.impairment_test_count,
6196 snapshot.business_combination_count,
6197 snapshot.ecl_model_count,
6198 snapshot.provision_count,
6199 snapshot.currency_translation_count
6200 );
6201 self.check_resources_with_log("post-accounting-standards")?;
6202
6203 Ok(snapshot)
6204 }
6205
6206 fn phase_manufacturing(
6208 &mut self,
6209 stats: &mut EnhancedGenerationStatistics,
6210 ) -> SynthResult<ManufacturingSnapshot> {
6211 if !self.phase_config.generate_manufacturing {
6212 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6213 return Ok(ManufacturingSnapshot::default());
6214 }
6215 info!("Phase 18: Generating Manufacturing Data");
6216
6217 let seed = self.seed;
6218 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6219 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6220 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6221 let company_code = self
6222 .config
6223 .companies
6224 .first()
6225 .map(|c| c.code.as_str())
6226 .unwrap_or("1000");
6227
6228 let material_data: Vec<(String, String)> = self
6229 .master_data
6230 .materials
6231 .iter()
6232 .map(|m| (m.material_id.clone(), m.description.clone()))
6233 .collect();
6234
6235 if material_data.is_empty() {
6236 debug!("Phase 18: Skipped (no materials available)");
6237 return Ok(ManufacturingSnapshot::default());
6238 }
6239
6240 let mut snapshot = ManufacturingSnapshot::default();
6241
6242 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
6244 let production_orders = prod_gen.generate(
6245 company_code,
6246 &material_data,
6247 start_date,
6248 end_date,
6249 &self.config.manufacturing.production_orders,
6250 &self.config.manufacturing.costing,
6251 &self.config.manufacturing.routing,
6252 );
6253 snapshot.production_order_count = production_orders.len();
6254
6255 let inspection_data: Vec<(String, String, String)> = production_orders
6257 .iter()
6258 .map(|po| {
6259 (
6260 po.order_id.clone(),
6261 po.material_id.clone(),
6262 po.material_description.clone(),
6263 )
6264 })
6265 .collect();
6266
6267 snapshot.production_orders = production_orders;
6268
6269 if !inspection_data.is_empty() {
6270 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
6271 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6272 snapshot.quality_inspection_count = inspections.len();
6273 snapshot.quality_inspections = inspections;
6274 }
6275
6276 let storage_locations: Vec<(String, String)> = material_data
6278 .iter()
6279 .enumerate()
6280 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6281 .collect();
6282
6283 let employee_ids: Vec<String> = self
6284 .master_data
6285 .employees
6286 .iter()
6287 .map(|e| e.employee_id.clone())
6288 .collect();
6289 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
6290 .with_employee_pool(employee_ids);
6291 let mut cycle_count_total = 0usize;
6292 for month in 0..self.config.global.period_months {
6293 let count_date = start_date + chrono::Months::new(month);
6294 let items_per_count = storage_locations.len().clamp(10, 50);
6295 let cc = cc_gen.generate(
6296 company_code,
6297 &storage_locations,
6298 count_date,
6299 items_per_count,
6300 );
6301 snapshot.cycle_counts.push(cc);
6302 cycle_count_total += 1;
6303 }
6304 snapshot.cycle_count_count = cycle_count_total;
6305
6306 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
6308 let bom_components = bom_gen.generate(company_code, &material_data);
6309 snapshot.bom_component_count = bom_components.len();
6310 snapshot.bom_components = bom_components;
6311
6312 let currency = self
6314 .config
6315 .companies
6316 .first()
6317 .map(|c| c.currency.as_str())
6318 .unwrap_or("USD");
6319 let production_order_ids: Vec<String> = snapshot
6320 .production_orders
6321 .iter()
6322 .map(|po| po.order_id.clone())
6323 .collect();
6324 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
6325 let inventory_movements = inv_mov_gen.generate_with_production_orders(
6326 company_code,
6327 &material_data,
6328 start_date,
6329 end_date,
6330 2,
6331 currency,
6332 &production_order_ids,
6333 );
6334 snapshot.inventory_movement_count = inventory_movements.len();
6335 snapshot.inventory_movements = inventory_movements;
6336
6337 stats.production_order_count = snapshot.production_order_count;
6338 stats.quality_inspection_count = snapshot.quality_inspection_count;
6339 stats.cycle_count_count = snapshot.cycle_count_count;
6340 stats.bom_component_count = snapshot.bom_component_count;
6341 stats.inventory_movement_count = snapshot.inventory_movement_count;
6342
6343 info!(
6344 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6345 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6346 snapshot.bom_component_count, snapshot.inventory_movement_count
6347 );
6348 self.check_resources_with_log("post-manufacturing")?;
6349
6350 Ok(snapshot)
6351 }
6352
6353 fn phase_sales_kpi_budgets(
6355 &mut self,
6356 coa: &Arc<ChartOfAccounts>,
6357 financial_reporting: &FinancialReportingSnapshot,
6358 stats: &mut EnhancedGenerationStatistics,
6359 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6360 if !self.phase_config.generate_sales_kpi_budgets {
6361 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6362 return Ok(SalesKpiBudgetsSnapshot::default());
6363 }
6364 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6365
6366 let seed = self.seed;
6367 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6368 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6369 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6370 let company_code = self
6371 .config
6372 .companies
6373 .first()
6374 .map(|c| c.code.as_str())
6375 .unwrap_or("1000");
6376
6377 let mut snapshot = SalesKpiBudgetsSnapshot::default();
6378
6379 if self.config.sales_quotes.enabled {
6381 let customer_data: Vec<(String, String)> = self
6382 .master_data
6383 .customers
6384 .iter()
6385 .map(|c| (c.customer_id.clone(), c.name.clone()))
6386 .collect();
6387 let material_data: Vec<(String, String)> = self
6388 .master_data
6389 .materials
6390 .iter()
6391 .map(|m| (m.material_id.clone(), m.description.clone()))
6392 .collect();
6393
6394 if !customer_data.is_empty() && !material_data.is_empty() {
6395 let employee_ids: Vec<String> = self
6396 .master_data
6397 .employees
6398 .iter()
6399 .map(|e| e.employee_id.clone())
6400 .collect();
6401 let customer_ids: Vec<String> = self
6402 .master_data
6403 .customers
6404 .iter()
6405 .map(|c| c.customer_id.clone())
6406 .collect();
6407 let company_currency = self
6408 .config
6409 .companies
6410 .first()
6411 .map(|c| c.currency.as_str())
6412 .unwrap_or("USD");
6413
6414 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6415 .with_pools(employee_ids, customer_ids);
6416 let quotes = quote_gen.generate_with_currency(
6417 company_code,
6418 &customer_data,
6419 &material_data,
6420 start_date,
6421 end_date,
6422 &self.config.sales_quotes,
6423 company_currency,
6424 );
6425 snapshot.sales_quote_count = quotes.len();
6426 snapshot.sales_quotes = quotes;
6427 }
6428 }
6429
6430 if self.config.financial_reporting.management_kpis.enabled {
6432 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6433 let mut kpis = kpi_gen.generate(
6434 company_code,
6435 start_date,
6436 end_date,
6437 &self.config.financial_reporting.management_kpis,
6438 );
6439
6440 {
6442 use rust_decimal::Decimal;
6443
6444 if let Some(income_stmt) =
6445 financial_reporting.financial_statements.iter().find(|fs| {
6446 fs.statement_type == StatementType::IncomeStatement
6447 && fs.company_code == company_code
6448 })
6449 {
6450 let total_revenue: Decimal = income_stmt
6452 .line_items
6453 .iter()
6454 .filter(|li| li.section.contains("Revenue") && !li.is_total)
6455 .map(|li| li.amount)
6456 .sum();
6457 let total_cogs: Decimal = income_stmt
6458 .line_items
6459 .iter()
6460 .filter(|li| {
6461 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6462 && !li.is_total
6463 })
6464 .map(|li| li.amount.abs())
6465 .sum();
6466 let total_opex: Decimal = income_stmt
6467 .line_items
6468 .iter()
6469 .filter(|li| {
6470 li.section.contains("Expense")
6471 && !li.is_total
6472 && !li.section.contains("Cost")
6473 })
6474 .map(|li| li.amount.abs())
6475 .sum();
6476
6477 if total_revenue > Decimal::ZERO {
6478 let hundred = Decimal::from(100);
6479 let gross_margin_pct =
6480 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6481 let operating_income = total_revenue - total_cogs - total_opex;
6482 let op_margin_pct =
6483 (operating_income * hundred / total_revenue).round_dp(2);
6484
6485 for kpi in &mut kpis {
6487 if kpi.name == "Gross Margin" {
6488 kpi.value = gross_margin_pct;
6489 } else if kpi.name == "Operating Margin" {
6490 kpi.value = op_margin_pct;
6491 }
6492 }
6493 }
6494 }
6495
6496 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6498 fs.statement_type == StatementType::BalanceSheet
6499 && fs.company_code == company_code
6500 }) {
6501 let current_assets: Decimal = bs
6502 .line_items
6503 .iter()
6504 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6505 .map(|li| li.amount)
6506 .sum();
6507 let current_liabilities: Decimal = bs
6508 .line_items
6509 .iter()
6510 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6511 .map(|li| li.amount.abs())
6512 .sum();
6513
6514 if current_liabilities > Decimal::ZERO {
6515 let current_ratio = (current_assets / current_liabilities).round_dp(2);
6516 for kpi in &mut kpis {
6517 if kpi.name == "Current Ratio" {
6518 kpi.value = current_ratio;
6519 }
6520 }
6521 }
6522 }
6523 }
6524
6525 snapshot.kpi_count = kpis.len();
6526 snapshot.kpis = kpis;
6527 }
6528
6529 if self.config.financial_reporting.budgets.enabled {
6531 let account_data: Vec<(String, String)> = coa
6532 .accounts
6533 .iter()
6534 .map(|a| (a.account_number.clone(), a.short_description.clone()))
6535 .collect();
6536
6537 if !account_data.is_empty() {
6538 let fiscal_year = start_date.year() as u32;
6539 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6540 let budget = budget_gen.generate(
6541 company_code,
6542 fiscal_year,
6543 &account_data,
6544 &self.config.financial_reporting.budgets,
6545 );
6546 snapshot.budget_line_count = budget.line_items.len();
6547 snapshot.budgets.push(budget);
6548 }
6549 }
6550
6551 stats.sales_quote_count = snapshot.sales_quote_count;
6552 stats.kpi_count = snapshot.kpi_count;
6553 stats.budget_line_count = snapshot.budget_line_count;
6554
6555 info!(
6556 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6557 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6558 );
6559 self.check_resources_with_log("post-sales-kpi-budgets")?;
6560
6561 Ok(snapshot)
6562 }
6563
6564 fn phase_tax_generation(
6566 &mut self,
6567 document_flows: &DocumentFlowSnapshot,
6568 journal_entries: &[JournalEntry],
6569 stats: &mut EnhancedGenerationStatistics,
6570 ) -> SynthResult<TaxSnapshot> {
6571 if !self.phase_config.generate_tax {
6572 debug!("Phase 20: Skipped (tax generation disabled)");
6573 return Ok(TaxSnapshot::default());
6574 }
6575 info!("Phase 20: Generating Tax Data");
6576
6577 let seed = self.seed;
6578 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6579 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6580 let fiscal_year = start_date.year();
6581 let company_code = self
6582 .config
6583 .companies
6584 .first()
6585 .map(|c| c.code.as_str())
6586 .unwrap_or("1000");
6587
6588 let mut gen =
6589 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
6590
6591 let pack = self.primary_pack().clone();
6592 let (jurisdictions, codes) =
6593 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
6594
6595 let mut provisions = Vec::new();
6597 if self.config.tax.provisions.enabled {
6598 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
6599 for company in &self.config.companies {
6600 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
6601 let statutory_rate = rust_decimal::Decimal::new(
6602 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
6603 2,
6604 );
6605 let provision = provision_gen.generate(
6606 &company.code,
6607 start_date,
6608 pre_tax_income,
6609 statutory_rate,
6610 );
6611 provisions.push(provision);
6612 }
6613 }
6614
6615 let mut tax_lines = Vec::new();
6617 if !codes.is_empty() {
6618 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
6619 datasynth_generators::TaxLineGeneratorConfig::default(),
6620 codes.clone(),
6621 seed + 72,
6622 );
6623
6624 let buyer_country = self
6627 .config
6628 .companies
6629 .first()
6630 .map(|c| c.country.as_str())
6631 .unwrap_or("US");
6632 for vi in &document_flows.vendor_invoices {
6633 let lines = tax_line_gen.generate_for_document(
6634 datasynth_core::models::TaxableDocumentType::VendorInvoice,
6635 &vi.header.document_id,
6636 buyer_country, buyer_country,
6638 vi.payable_amount,
6639 vi.header.document_date,
6640 None,
6641 );
6642 tax_lines.extend(lines);
6643 }
6644
6645 for ci in &document_flows.customer_invoices {
6647 let lines = tax_line_gen.generate_for_document(
6648 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
6649 &ci.header.document_id,
6650 buyer_country, buyer_country,
6652 ci.total_gross_amount,
6653 ci.header.document_date,
6654 None,
6655 );
6656 tax_lines.extend(lines);
6657 }
6658 }
6659
6660 let deferred_tax = {
6662 let companies: Vec<(&str, &str)> = self
6663 .config
6664 .companies
6665 .iter()
6666 .map(|c| (c.code.as_str(), c.country.as_str()))
6667 .collect();
6668 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 73);
6669 deferred_gen.generate(&companies, start_date, journal_entries)
6670 };
6671
6672 let snapshot = TaxSnapshot {
6673 jurisdiction_count: jurisdictions.len(),
6674 code_count: codes.len(),
6675 jurisdictions,
6676 codes,
6677 tax_provisions: provisions,
6678 tax_lines,
6679 tax_returns: Vec::new(),
6680 withholding_records: Vec::new(),
6681 tax_anomaly_labels: Vec::new(),
6682 deferred_tax,
6683 };
6684
6685 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
6686 stats.tax_code_count = snapshot.code_count;
6687 stats.tax_provision_count = snapshot.tax_provisions.len();
6688 stats.tax_line_count = snapshot.tax_lines.len();
6689
6690 info!(
6691 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs",
6692 snapshot.jurisdiction_count,
6693 snapshot.code_count,
6694 snapshot.tax_provisions.len(),
6695 snapshot.deferred_tax.temporary_differences.len(),
6696 snapshot.deferred_tax.journal_entries.len(),
6697 );
6698 self.check_resources_with_log("post-tax")?;
6699
6700 Ok(snapshot)
6701 }
6702
6703 fn phase_esg_generation(
6705 &mut self,
6706 document_flows: &DocumentFlowSnapshot,
6707 stats: &mut EnhancedGenerationStatistics,
6708 ) -> SynthResult<EsgSnapshot> {
6709 if !self.phase_config.generate_esg {
6710 debug!("Phase 21: Skipped (ESG generation disabled)");
6711 return Ok(EsgSnapshot::default());
6712 }
6713 let degradation = self.check_resources()?;
6714 if degradation >= DegradationLevel::Reduced {
6715 debug!(
6716 "Phase skipped due to resource pressure (degradation: {:?})",
6717 degradation
6718 );
6719 return Ok(EsgSnapshot::default());
6720 }
6721 info!("Phase 21: Generating ESG Data");
6722
6723 let seed = self.seed;
6724 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6725 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6726 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6727 let entity_id = self
6728 .config
6729 .companies
6730 .first()
6731 .map(|c| c.code.as_str())
6732 .unwrap_or("1000");
6733
6734 let esg_cfg = &self.config.esg;
6735 let mut snapshot = EsgSnapshot::default();
6736
6737 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
6739 esg_cfg.environmental.energy.clone(),
6740 seed + 80,
6741 );
6742 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
6743
6744 let facility_count = esg_cfg.environmental.energy.facility_count;
6746 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
6747 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
6748
6749 let mut waste_gen = datasynth_generators::WasteGenerator::new(
6751 seed + 82,
6752 esg_cfg.environmental.waste.diversion_target,
6753 facility_count,
6754 );
6755 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
6756
6757 let mut emission_gen =
6759 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
6760
6761 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
6763 .iter()
6764 .map(|e| datasynth_generators::EnergyInput {
6765 facility_id: e.facility_id.clone(),
6766 energy_type: match e.energy_source {
6767 EnergySourceType::NaturalGas => {
6768 datasynth_generators::EnergyInputType::NaturalGas
6769 }
6770 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
6771 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
6772 _ => datasynth_generators::EnergyInputType::Electricity,
6773 },
6774 consumption_kwh: e.consumption_kwh,
6775 period: e.period,
6776 })
6777 .collect();
6778
6779 let mut emissions = Vec::new();
6780 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
6781 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
6782
6783 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
6785 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6786 for payment in &document_flows.payments {
6787 if payment.is_vendor {
6788 *totals
6789 .entry(payment.business_partner_id.clone())
6790 .or_default() += payment.amount;
6791 }
6792 }
6793 totals
6794 };
6795 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
6796 .master_data
6797 .vendors
6798 .iter()
6799 .map(|v| {
6800 let spend = vendor_payment_totals
6801 .get(&v.vendor_id)
6802 .copied()
6803 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
6804 datasynth_generators::VendorSpendInput {
6805 vendor_id: v.vendor_id.clone(),
6806 category: format!("{:?}", v.vendor_type).to_lowercase(),
6807 spend,
6808 country: v.country.clone(),
6809 }
6810 })
6811 .collect();
6812 if !vendor_spend.is_empty() {
6813 emissions.extend(emission_gen.generate_scope3_purchased_goods(
6814 entity_id,
6815 &vendor_spend,
6816 start_date,
6817 end_date,
6818 ));
6819 }
6820
6821 let headcount = self.master_data.employees.len() as u32;
6823 if headcount > 0 {
6824 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
6825 emissions.extend(emission_gen.generate_scope3_business_travel(
6826 entity_id,
6827 travel_spend,
6828 start_date,
6829 ));
6830 emissions
6831 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
6832 }
6833
6834 snapshot.emission_count = emissions.len();
6835 snapshot.emissions = emissions;
6836 snapshot.energy = energy_records;
6837
6838 let mut workforce_gen =
6840 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
6841 let total_headcount = headcount.max(100);
6842 snapshot.diversity =
6843 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
6844 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
6845 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
6846 entity_id,
6847 facility_count,
6848 start_date,
6849 end_date,
6850 );
6851
6852 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
6855 entity_id,
6856 &snapshot.safety_incidents,
6857 total_hours,
6858 start_date,
6859 );
6860 snapshot.safety_metrics = vec![safety_metric];
6861
6862 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
6864 seed + 85,
6865 esg_cfg.governance.board_size,
6866 esg_cfg.governance.independence_target,
6867 );
6868 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
6869
6870 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
6872 esg_cfg.supply_chain_esg.clone(),
6873 seed + 86,
6874 );
6875 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
6876 .master_data
6877 .vendors
6878 .iter()
6879 .map(|v| datasynth_generators::VendorInput {
6880 vendor_id: v.vendor_id.clone(),
6881 country: v.country.clone(),
6882 industry: format!("{:?}", v.vendor_type).to_lowercase(),
6883 quality_score: None,
6884 })
6885 .collect();
6886 snapshot.supplier_assessments =
6887 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
6888
6889 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
6891 seed + 87,
6892 esg_cfg.reporting.clone(),
6893 esg_cfg.climate_scenarios.clone(),
6894 );
6895 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
6896 snapshot.disclosures = disclosure_gen.generate_disclosures(
6897 entity_id,
6898 &snapshot.materiality,
6899 start_date,
6900 end_date,
6901 );
6902 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
6903 snapshot.disclosure_count = snapshot.disclosures.len();
6904
6905 if esg_cfg.anomaly_rate > 0.0 {
6907 let mut anomaly_injector =
6908 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
6909 let mut labels = Vec::new();
6910 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
6911 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
6912 labels.extend(
6913 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
6914 );
6915 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
6916 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
6917 snapshot.anomaly_labels = labels;
6918 }
6919
6920 stats.esg_emission_count = snapshot.emission_count;
6921 stats.esg_disclosure_count = snapshot.disclosure_count;
6922
6923 info!(
6924 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
6925 snapshot.emission_count,
6926 snapshot.disclosure_count,
6927 snapshot.supplier_assessments.len()
6928 );
6929 self.check_resources_with_log("post-esg")?;
6930
6931 Ok(snapshot)
6932 }
6933
6934 fn phase_treasury_data(
6936 &mut self,
6937 document_flows: &DocumentFlowSnapshot,
6938 subledger: &SubledgerSnapshot,
6939 intercompany: &IntercompanySnapshot,
6940 stats: &mut EnhancedGenerationStatistics,
6941 ) -> SynthResult<TreasurySnapshot> {
6942 if !self.phase_config.generate_treasury {
6943 debug!("Phase 22: Skipped (treasury generation disabled)");
6944 return Ok(TreasurySnapshot::default());
6945 }
6946 let degradation = self.check_resources()?;
6947 if degradation >= DegradationLevel::Reduced {
6948 debug!(
6949 "Phase skipped due to resource pressure (degradation: {:?})",
6950 degradation
6951 );
6952 return Ok(TreasurySnapshot::default());
6953 }
6954 info!("Phase 22: Generating Treasury Data");
6955
6956 let seed = self.seed;
6957 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6958 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6959 let currency = self
6960 .config
6961 .companies
6962 .first()
6963 .map(|c| c.currency.as_str())
6964 .unwrap_or("USD");
6965 let entity_id = self
6966 .config
6967 .companies
6968 .first()
6969 .map(|c| c.code.as_str())
6970 .unwrap_or("1000");
6971
6972 let mut snapshot = TreasurySnapshot::default();
6973
6974 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
6976 self.config.treasury.debt.clone(),
6977 seed + 90,
6978 );
6979 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
6980
6981 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
6983 self.config.treasury.hedging.clone(),
6984 seed + 91,
6985 );
6986 for debt in &snapshot.debt_instruments {
6987 if debt.rate_type == InterestRateType::Variable {
6988 let swap = hedge_gen.generate_ir_swap(
6989 currency,
6990 debt.principal,
6991 debt.origination_date,
6992 debt.maturity_date,
6993 );
6994 snapshot.hedging_instruments.push(swap);
6995 }
6996 }
6997
6998 {
7001 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7002 for payment in &document_flows.payments {
7003 if payment.currency != currency {
7004 let entry = fx_map
7005 .entry(payment.currency.clone())
7006 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7007 entry.0 += payment.amount;
7008 if payment.header.document_date > entry.1 {
7010 entry.1 = payment.header.document_date;
7011 }
7012 }
7013 }
7014 if !fx_map.is_empty() {
7015 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7016 .into_iter()
7017 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7018 datasynth_generators::treasury::FxExposure {
7019 currency_pair: format!("{foreign_ccy}/{currency}"),
7020 foreign_currency: foreign_ccy,
7021 net_amount,
7022 settlement_date,
7023 description: "AP payment FX exposure".to_string(),
7024 }
7025 })
7026 .collect();
7027 let (fx_instruments, fx_relationships) =
7028 hedge_gen.generate(start_date, &fx_exposures);
7029 snapshot.hedging_instruments.extend(fx_instruments);
7030 snapshot.hedge_relationships.extend(fx_relationships);
7031 }
7032 }
7033
7034 if self.config.treasury.anomaly_rate > 0.0 {
7036 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7037 seed + 92,
7038 self.config.treasury.anomaly_rate,
7039 );
7040 let mut labels = Vec::new();
7041 labels.extend(
7042 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7043 );
7044 snapshot.treasury_anomaly_labels = labels;
7045 }
7046
7047 if self.config.treasury.cash_positioning.enabled {
7049 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7050
7051 for payment in &document_flows.payments {
7053 cash_flows.push(datasynth_generators::treasury::CashFlow {
7054 date: payment.header.document_date,
7055 account_id: format!("{entity_id}-MAIN"),
7056 amount: payment.amount,
7057 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7058 });
7059 }
7060
7061 for chain in &document_flows.o2c_chains {
7063 if let Some(ref receipt) = chain.customer_receipt {
7064 cash_flows.push(datasynth_generators::treasury::CashFlow {
7065 date: receipt.header.document_date,
7066 account_id: format!("{entity_id}-MAIN"),
7067 amount: receipt.amount,
7068 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7069 });
7070 }
7071 for receipt in &chain.remainder_receipts {
7073 cash_flows.push(datasynth_generators::treasury::CashFlow {
7074 date: receipt.header.document_date,
7075 account_id: format!("{entity_id}-MAIN"),
7076 amount: receipt.amount,
7077 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7078 });
7079 }
7080 }
7081
7082 if !cash_flows.is_empty() {
7083 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7084 self.config.treasury.cash_positioning.clone(),
7085 seed + 93,
7086 );
7087 let account_id = format!("{entity_id}-MAIN");
7088 snapshot.cash_positions = cash_gen.generate(
7089 entity_id,
7090 &account_id,
7091 currency,
7092 &cash_flows,
7093 start_date,
7094 start_date + chrono::Months::new(self.config.global.period_months),
7095 rust_decimal::Decimal::new(1_000_000, 0), );
7097 }
7098 }
7099
7100 if self.config.treasury.cash_forecasting.enabled {
7102 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7103
7104 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7106 .ar_invoices
7107 .iter()
7108 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7109 .map(|inv| {
7110 let days_past_due = if inv.due_date < end_date {
7111 (end_date - inv.due_date).num_days().max(0) as u32
7112 } else {
7113 0
7114 };
7115 datasynth_generators::treasury::ArAgingItem {
7116 expected_date: inv.due_date,
7117 amount: inv.amount_remaining,
7118 days_past_due,
7119 document_id: inv.invoice_number.clone(),
7120 }
7121 })
7122 .collect();
7123
7124 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7126 .ap_invoices
7127 .iter()
7128 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7129 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7130 payment_date: inv.due_date,
7131 amount: inv.amount_remaining,
7132 document_id: inv.invoice_number.clone(),
7133 })
7134 .collect();
7135
7136 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7137 self.config.treasury.cash_forecasting.clone(),
7138 seed + 94,
7139 );
7140 let forecast = forecast_gen.generate(
7141 entity_id,
7142 currency,
7143 end_date,
7144 &ar_items,
7145 &ap_items,
7146 &[], );
7148 snapshot.cash_forecasts.push(forecast);
7149 }
7150
7151 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7153 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7154 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7155 self.config.treasury.cash_pooling.clone(),
7156 seed + 95,
7157 );
7158
7159 let account_ids: Vec<String> = snapshot
7161 .cash_positions
7162 .iter()
7163 .map(|cp| cp.bank_account_id.clone())
7164 .collect::<std::collections::HashSet<_>>()
7165 .into_iter()
7166 .collect();
7167
7168 if let Some(pool) =
7169 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7170 {
7171 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7173 for cp in &snapshot.cash_positions {
7174 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7175 }
7176
7177 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7178 latest_balances
7179 .into_iter()
7180 .filter(|(id, _)| pool.participant_accounts.contains(id))
7181 .map(
7182 |(id, balance)| datasynth_generators::treasury::AccountBalance {
7183 account_id: id,
7184 balance,
7185 },
7186 )
7187 .collect();
7188
7189 let sweeps =
7190 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7191 snapshot.cash_pool_sweeps = sweeps;
7192 snapshot.cash_pools.push(pool);
7193 }
7194 }
7195
7196 if self.config.treasury.bank_guarantees.enabled {
7198 let vendor_names: Vec<String> = self
7199 .master_data
7200 .vendors
7201 .iter()
7202 .map(|v| v.name.clone())
7203 .collect();
7204 if !vendor_names.is_empty() {
7205 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7206 self.config.treasury.bank_guarantees.clone(),
7207 seed + 96,
7208 );
7209 snapshot.bank_guarantees =
7210 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7211 }
7212 }
7213
7214 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7216 let entity_ids: Vec<String> = self
7217 .config
7218 .companies
7219 .iter()
7220 .map(|c| c.code.clone())
7221 .collect();
7222 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7223 .matched_pairs
7224 .iter()
7225 .map(|mp| {
7226 (
7227 mp.seller_company.clone(),
7228 mp.buyer_company.clone(),
7229 mp.amount,
7230 )
7231 })
7232 .collect();
7233 if entity_ids.len() >= 2 {
7234 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7235 self.config.treasury.netting.clone(),
7236 seed + 97,
7237 );
7238 snapshot.netting_runs = netting_gen.generate(
7239 &entity_ids,
7240 currency,
7241 start_date,
7242 self.config.global.period_months,
7243 &ic_amounts,
7244 );
7245 }
7246 }
7247
7248 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7249 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7250 stats.cash_position_count = snapshot.cash_positions.len();
7251 stats.cash_forecast_count = snapshot.cash_forecasts.len();
7252 stats.cash_pool_count = snapshot.cash_pools.len();
7253
7254 info!(
7255 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
7256 snapshot.debt_instruments.len(),
7257 snapshot.hedging_instruments.len(),
7258 snapshot.cash_positions.len(),
7259 snapshot.cash_forecasts.len(),
7260 snapshot.cash_pools.len(),
7261 snapshot.bank_guarantees.len(),
7262 snapshot.netting_runs.len(),
7263 );
7264 self.check_resources_with_log("post-treasury")?;
7265
7266 Ok(snapshot)
7267 }
7268
7269 fn phase_project_accounting(
7271 &mut self,
7272 document_flows: &DocumentFlowSnapshot,
7273 hr: &HrSnapshot,
7274 stats: &mut EnhancedGenerationStatistics,
7275 ) -> SynthResult<ProjectAccountingSnapshot> {
7276 if !self.phase_config.generate_project_accounting {
7277 debug!("Phase 23: Skipped (project accounting disabled)");
7278 return Ok(ProjectAccountingSnapshot::default());
7279 }
7280 let degradation = self.check_resources()?;
7281 if degradation >= DegradationLevel::Reduced {
7282 debug!(
7283 "Phase skipped due to resource pressure (degradation: {:?})",
7284 degradation
7285 );
7286 return Ok(ProjectAccountingSnapshot::default());
7287 }
7288 info!("Phase 23: Generating Project Accounting Data");
7289
7290 let seed = self.seed;
7291 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7292 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7293 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7294 let company_code = self
7295 .config
7296 .companies
7297 .first()
7298 .map(|c| c.code.as_str())
7299 .unwrap_or("1000");
7300
7301 let mut snapshot = ProjectAccountingSnapshot::default();
7302
7303 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
7305 self.config.project_accounting.clone(),
7306 seed + 95,
7307 );
7308 let pool = project_gen.generate(company_code, start_date, end_date);
7309 snapshot.projects = pool.projects.clone();
7310
7311 {
7313 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
7314 Vec::new();
7315
7316 for te in &hr.time_entries {
7318 let total_hours = te.hours_regular + te.hours_overtime;
7319 if total_hours > 0.0 {
7320 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7321 id: te.entry_id.clone(),
7322 entity_id: company_code.to_string(),
7323 date: te.date,
7324 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
7325 .unwrap_or(rust_decimal::Decimal::ZERO),
7326 source_type: CostSourceType::TimeEntry,
7327 hours: Some(
7328 rust_decimal::Decimal::from_f64_retain(total_hours)
7329 .unwrap_or(rust_decimal::Decimal::ZERO),
7330 ),
7331 });
7332 }
7333 }
7334
7335 for er in &hr.expense_reports {
7337 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7338 id: er.report_id.clone(),
7339 entity_id: company_code.to_string(),
7340 date: er.submission_date,
7341 amount: er.total_amount,
7342 source_type: CostSourceType::ExpenseReport,
7343 hours: None,
7344 });
7345 }
7346
7347 for po in &document_flows.purchase_orders {
7349 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7350 id: po.header.document_id.clone(),
7351 entity_id: company_code.to_string(),
7352 date: po.header.document_date,
7353 amount: po.total_net_amount,
7354 source_type: CostSourceType::PurchaseOrder,
7355 hours: None,
7356 });
7357 }
7358
7359 for vi in &document_flows.vendor_invoices {
7361 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7362 id: vi.header.document_id.clone(),
7363 entity_id: company_code.to_string(),
7364 date: vi.header.document_date,
7365 amount: vi.payable_amount,
7366 source_type: CostSourceType::VendorInvoice,
7367 hours: None,
7368 });
7369 }
7370
7371 if !source_docs.is_empty() && !pool.projects.is_empty() {
7372 let mut cost_gen =
7373 datasynth_generators::project_accounting::ProjectCostGenerator::new(
7374 self.config.project_accounting.cost_allocation.clone(),
7375 seed + 99,
7376 );
7377 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
7378 }
7379 }
7380
7381 if self.config.project_accounting.change_orders.enabled {
7383 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
7384 self.config.project_accounting.change_orders.clone(),
7385 seed + 96,
7386 );
7387 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
7388 }
7389
7390 if self.config.project_accounting.milestones.enabled {
7392 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
7393 self.config.project_accounting.milestones.clone(),
7394 seed + 97,
7395 );
7396 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
7397 }
7398
7399 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
7401 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
7402 self.config.project_accounting.earned_value.clone(),
7403 seed + 98,
7404 );
7405 snapshot.earned_value_metrics =
7406 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
7407 }
7408
7409 if self.config.project_accounting.revenue_recognition.enabled
7411 && !snapshot.projects.is_empty()
7412 && !snapshot.cost_lines.is_empty()
7413 {
7414 use datasynth_generators::project_accounting::RevenueGenerator;
7415 let rev_config = self.config.project_accounting.revenue_recognition.clone();
7416 let avg_contract_value =
7417 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
7418 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
7419
7420 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
7423 snapshot
7424 .projects
7425 .iter()
7426 .filter(|p| {
7427 matches!(
7428 p.project_type,
7429 datasynth_core::models::ProjectType::Customer
7430 )
7431 })
7432 .map(|p| {
7433 let cv = if p.budget > rust_decimal::Decimal::ZERO {
7434 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
7435 } else {
7437 avg_contract_value
7438 };
7439 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
7441 })
7442 .collect();
7443
7444 if !contract_values.is_empty() {
7445 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
7446 snapshot.revenue_records = rev_gen.generate(
7447 &snapshot.projects,
7448 &snapshot.cost_lines,
7449 &contract_values,
7450 start_date,
7451 end_date,
7452 );
7453 debug!(
7454 "Generated {} revenue recognition records for {} customer projects",
7455 snapshot.revenue_records.len(),
7456 contract_values.len()
7457 );
7458 }
7459 }
7460
7461 stats.project_count = snapshot.projects.len();
7462 stats.project_change_order_count = snapshot.change_orders.len();
7463 stats.project_cost_line_count = snapshot.cost_lines.len();
7464
7465 info!(
7466 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
7467 snapshot.projects.len(),
7468 snapshot.change_orders.len(),
7469 snapshot.milestones.len(),
7470 snapshot.earned_value_metrics.len()
7471 );
7472 self.check_resources_with_log("post-project-accounting")?;
7473
7474 Ok(snapshot)
7475 }
7476
7477 fn phase_evolution_events(
7479 &mut self,
7480 stats: &mut EnhancedGenerationStatistics,
7481 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
7482 if !self.phase_config.generate_evolution_events {
7483 debug!("Phase 24: Skipped (evolution events disabled)");
7484 return Ok((Vec::new(), Vec::new()));
7485 }
7486 info!("Phase 24: Generating Process Evolution + Organizational Events");
7487
7488 let seed = self.seed;
7489 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7490 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7491 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7492
7493 let mut proc_gen =
7495 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
7496 seed + 100,
7497 );
7498 let process_events = proc_gen.generate_events(start_date, end_date);
7499
7500 let company_codes: Vec<String> = self
7502 .config
7503 .companies
7504 .iter()
7505 .map(|c| c.code.clone())
7506 .collect();
7507 let mut org_gen =
7508 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
7509 seed + 101,
7510 );
7511 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
7512
7513 stats.process_evolution_event_count = process_events.len();
7514 stats.organizational_event_count = org_events.len();
7515
7516 info!(
7517 "Evolution events generated: {} process evolution, {} organizational",
7518 process_events.len(),
7519 org_events.len()
7520 );
7521 self.check_resources_with_log("post-evolution-events")?;
7522
7523 Ok((process_events, org_events))
7524 }
7525
7526 fn phase_disruption_events(
7529 &self,
7530 stats: &mut EnhancedGenerationStatistics,
7531 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
7532 if !self.config.organizational_events.enabled {
7533 debug!("Phase 24b: Skipped (organizational events disabled)");
7534 return Ok(Vec::new());
7535 }
7536 info!("Phase 24b: Generating Disruption Events");
7537
7538 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7539 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7540 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7541
7542 let company_codes: Vec<String> = self
7543 .config
7544 .companies
7545 .iter()
7546 .map(|c| c.code.clone())
7547 .collect();
7548
7549 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
7550 let events = gen.generate(start_date, end_date, &company_codes);
7551
7552 stats.disruption_event_count = events.len();
7553 info!("Disruption events generated: {} events", events.len());
7554 self.check_resources_with_log("post-disruption-events")?;
7555
7556 Ok(events)
7557 }
7558
7559 fn phase_counterfactuals(
7566 &self,
7567 journal_entries: &[JournalEntry],
7568 stats: &mut EnhancedGenerationStatistics,
7569 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
7570 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
7571 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
7572 return Ok(Vec::new());
7573 }
7574 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
7575
7576 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
7577
7578 let mut gen = CounterfactualGenerator::new(self.seed + 110);
7579
7580 let specs = [
7582 CounterfactualSpec::ScaleAmount { factor: 2.5 },
7583 CounterfactualSpec::ShiftDate { days: -14 },
7584 CounterfactualSpec::SelfApprove,
7585 CounterfactualSpec::SplitTransaction { split_count: 3 },
7586 ];
7587
7588 let pairs: Vec<_> = journal_entries
7589 .iter()
7590 .enumerate()
7591 .map(|(i, je)| {
7592 let spec = &specs[i % specs.len()];
7593 gen.generate(je, spec)
7594 })
7595 .collect();
7596
7597 stats.counterfactual_pair_count = pairs.len();
7598 info!(
7599 "Counterfactual pairs generated: {} pairs from {} journal entries",
7600 pairs.len(),
7601 journal_entries.len()
7602 );
7603 self.check_resources_with_log("post-counterfactuals")?;
7604
7605 Ok(pairs)
7606 }
7607
7608 fn phase_red_flags(
7615 &self,
7616 anomaly_labels: &AnomalyLabels,
7617 document_flows: &DocumentFlowSnapshot,
7618 stats: &mut EnhancedGenerationStatistics,
7619 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
7620 if !self.config.fraud.enabled {
7621 debug!("Phase 26: Skipped (fraud generation disabled)");
7622 return Ok(Vec::new());
7623 }
7624 info!("Phase 26: Generating Fraud Red-Flag Indicators");
7625
7626 use datasynth_generators::fraud::RedFlagGenerator;
7627
7628 let generator = RedFlagGenerator::new();
7629 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
7630
7631 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
7633 .labels
7634 .iter()
7635 .filter(|label| label.anomaly_type.is_intentional())
7636 .map(|label| label.document_id.as_str())
7637 .collect();
7638
7639 let mut flags = Vec::new();
7640
7641 for chain in &document_flows.p2p_chains {
7643 let doc_id = &chain.purchase_order.header.document_id;
7644 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7645 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7646 }
7647
7648 for chain in &document_flows.o2c_chains {
7650 let doc_id = &chain.sales_order.header.document_id;
7651 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7652 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7653 }
7654
7655 stats.red_flag_count = flags.len();
7656 info!(
7657 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
7658 flags.len(),
7659 document_flows.p2p_chains.len(),
7660 document_flows.o2c_chains.len(),
7661 fraud_doc_ids.len()
7662 );
7663 self.check_resources_with_log("post-red-flags")?;
7664
7665 Ok(flags)
7666 }
7667
7668 fn phase_collusion_rings(
7674 &mut self,
7675 stats: &mut EnhancedGenerationStatistics,
7676 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
7677 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
7678 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
7679 return Ok(Vec::new());
7680 }
7681 info!("Phase 26b: Generating Collusion Rings");
7682
7683 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7684 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7685 let months = self.config.global.period_months;
7686
7687 let employee_ids: Vec<String> = self
7688 .master_data
7689 .employees
7690 .iter()
7691 .map(|e| e.employee_id.clone())
7692 .collect();
7693 let vendor_ids: Vec<String> = self
7694 .master_data
7695 .vendors
7696 .iter()
7697 .map(|v| v.vendor_id.clone())
7698 .collect();
7699
7700 let mut generator =
7701 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
7702 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
7703
7704 stats.collusion_ring_count = rings.len();
7705 info!(
7706 "Collusion rings generated: {} rings, total members: {}",
7707 rings.len(),
7708 rings
7709 .iter()
7710 .map(datasynth_generators::fraud::CollusionRing::size)
7711 .sum::<usize>()
7712 );
7713 self.check_resources_with_log("post-collusion-rings")?;
7714
7715 Ok(rings)
7716 }
7717
7718 fn phase_temporal_attributes(
7723 &mut self,
7724 stats: &mut EnhancedGenerationStatistics,
7725 ) -> SynthResult<
7726 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
7727 > {
7728 if !self.config.temporal_attributes.enabled {
7729 debug!("Phase 27: Skipped (temporal attributes disabled)");
7730 return Ok(Vec::new());
7731 }
7732 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
7733
7734 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7735 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7736
7737 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
7741 || self.config.temporal_attributes.enabled;
7742 let temporal_config = {
7743 let ta = &self.config.temporal_attributes;
7744 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
7745 .enabled(ta.enabled)
7746 .closed_probability(ta.valid_time.closed_probability)
7747 .avg_validity_days(ta.valid_time.avg_validity_days)
7748 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
7749 .with_version_chains(if generate_version_chains {
7750 ta.avg_versions_per_entity
7751 } else {
7752 1.0
7753 })
7754 .build()
7755 };
7756 let temporal_config = if self
7758 .config
7759 .temporal_attributes
7760 .transaction_time
7761 .allow_backdating
7762 {
7763 let mut c = temporal_config;
7764 c.transaction_time.allow_backdating = true;
7765 c.transaction_time.backdating_probability = self
7766 .config
7767 .temporal_attributes
7768 .transaction_time
7769 .backdating_probability;
7770 c.transaction_time.max_backdate_days = self
7771 .config
7772 .temporal_attributes
7773 .transaction_time
7774 .max_backdate_days;
7775 c
7776 } else {
7777 temporal_config
7778 };
7779 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
7780 temporal_config,
7781 self.seed + 130,
7782 start_date,
7783 );
7784
7785 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
7786 self.seed + 130,
7787 datasynth_core::GeneratorType::Vendor,
7788 );
7789
7790 let chains: Vec<_> = self
7791 .master_data
7792 .vendors
7793 .iter()
7794 .map(|vendor| {
7795 let id = uuid_factory.next();
7796 gen.generate_version_chain(vendor.clone(), id)
7797 })
7798 .collect();
7799
7800 stats.temporal_version_chain_count = chains.len();
7801 info!("Temporal version chains generated: {} chains", chains.len());
7802 self.check_resources_with_log("post-temporal-attributes")?;
7803
7804 Ok(chains)
7805 }
7806
7807 fn phase_entity_relationships(
7817 &self,
7818 journal_entries: &[JournalEntry],
7819 document_flows: &DocumentFlowSnapshot,
7820 stats: &mut EnhancedGenerationStatistics,
7821 ) -> SynthResult<(
7822 Option<datasynth_core::models::EntityGraph>,
7823 Vec<datasynth_core::models::CrossProcessLink>,
7824 )> {
7825 use datasynth_generators::relationships::{
7826 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
7827 TransactionSummary,
7828 };
7829
7830 let rs_enabled = self.config.relationship_strength.enabled;
7831 let cpl_enabled = self.config.cross_process_links.enabled
7832 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
7833
7834 if !rs_enabled && !cpl_enabled {
7835 debug!(
7836 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
7837 );
7838 return Ok((None, Vec::new()));
7839 }
7840
7841 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
7842
7843 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7844 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7845
7846 let company_code = self
7847 .config
7848 .companies
7849 .first()
7850 .map(|c| c.code.as_str())
7851 .unwrap_or("1000");
7852
7853 let gen_config = EntityGraphConfig {
7855 enabled: rs_enabled,
7856 cross_process: datasynth_generators::relationships::CrossProcessConfig {
7857 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
7858 enable_return_flows: false,
7859 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
7860 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
7861 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
7863 1.0
7864 } else {
7865 0.30
7866 },
7867 ..Default::default()
7868 },
7869 strength_config: datasynth_generators::relationships::StrengthConfig {
7870 transaction_volume_weight: self
7871 .config
7872 .relationship_strength
7873 .calculation
7874 .transaction_volume_weight,
7875 transaction_count_weight: self
7876 .config
7877 .relationship_strength
7878 .calculation
7879 .transaction_count_weight,
7880 duration_weight: self
7881 .config
7882 .relationship_strength
7883 .calculation
7884 .relationship_duration_weight,
7885 recency_weight: self.config.relationship_strength.calculation.recency_weight,
7886 mutual_connections_weight: self
7887 .config
7888 .relationship_strength
7889 .calculation
7890 .mutual_connections_weight,
7891 recency_half_life_days: self
7892 .config
7893 .relationship_strength
7894 .calculation
7895 .recency_half_life_days,
7896 },
7897 ..Default::default()
7898 };
7899
7900 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
7901
7902 let entity_graph = if rs_enabled {
7904 let vendor_summaries: Vec<EntitySummary> = self
7906 .master_data
7907 .vendors
7908 .iter()
7909 .map(|v| {
7910 EntitySummary::new(
7911 &v.vendor_id,
7912 &v.name,
7913 datasynth_core::models::GraphEntityType::Vendor,
7914 start_date,
7915 )
7916 })
7917 .collect();
7918
7919 let customer_summaries: Vec<EntitySummary> = self
7920 .master_data
7921 .customers
7922 .iter()
7923 .map(|c| {
7924 EntitySummary::new(
7925 &c.customer_id,
7926 &c.name,
7927 datasynth_core::models::GraphEntityType::Customer,
7928 start_date,
7929 )
7930 })
7931 .collect();
7932
7933 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
7938 std::collections::HashMap::new();
7939
7940 for je in journal_entries {
7941 let cc = je.header.company_code.clone();
7942 let posting_date = je.header.posting_date;
7943 for line in &je.lines {
7944 if let Some(ref tp) = line.trading_partner {
7945 let amount = if line.debit_amount > line.credit_amount {
7946 line.debit_amount
7947 } else {
7948 line.credit_amount
7949 };
7950 let entry = txn_summaries
7951 .entry((cc.clone(), tp.clone()))
7952 .or_insert_with(|| TransactionSummary {
7953 total_volume: rust_decimal::Decimal::ZERO,
7954 transaction_count: 0,
7955 first_transaction_date: posting_date,
7956 last_transaction_date: posting_date,
7957 related_entities: std::collections::HashSet::new(),
7958 });
7959 entry.total_volume += amount;
7960 entry.transaction_count += 1;
7961 if posting_date < entry.first_transaction_date {
7962 entry.first_transaction_date = posting_date;
7963 }
7964 if posting_date > entry.last_transaction_date {
7965 entry.last_transaction_date = posting_date;
7966 }
7967 entry.related_entities.insert(cc.clone());
7968 }
7969 }
7970 }
7971
7972 for chain in &document_flows.p2p_chains {
7975 let cc = chain.purchase_order.header.company_code.clone();
7976 let vendor_id = chain.purchase_order.vendor_id.clone();
7977 let po_date = chain.purchase_order.header.document_date;
7978 let amount = chain.purchase_order.total_net_amount;
7979
7980 let entry = txn_summaries
7981 .entry((cc.clone(), vendor_id))
7982 .or_insert_with(|| TransactionSummary {
7983 total_volume: rust_decimal::Decimal::ZERO,
7984 transaction_count: 0,
7985 first_transaction_date: po_date,
7986 last_transaction_date: po_date,
7987 related_entities: std::collections::HashSet::new(),
7988 });
7989 entry.total_volume += amount;
7990 entry.transaction_count += 1;
7991 if po_date < entry.first_transaction_date {
7992 entry.first_transaction_date = po_date;
7993 }
7994 if po_date > entry.last_transaction_date {
7995 entry.last_transaction_date = po_date;
7996 }
7997 entry.related_entities.insert(cc);
7998 }
7999
8000 for chain in &document_flows.o2c_chains {
8002 let cc = chain.sales_order.header.company_code.clone();
8003 let customer_id = chain.sales_order.customer_id.clone();
8004 let so_date = chain.sales_order.header.document_date;
8005 let amount = chain.sales_order.total_net_amount;
8006
8007 let entry = txn_summaries
8008 .entry((cc.clone(), customer_id))
8009 .or_insert_with(|| TransactionSummary {
8010 total_volume: rust_decimal::Decimal::ZERO,
8011 transaction_count: 0,
8012 first_transaction_date: so_date,
8013 last_transaction_date: so_date,
8014 related_entities: std::collections::HashSet::new(),
8015 });
8016 entry.total_volume += amount;
8017 entry.transaction_count += 1;
8018 if so_date < entry.first_transaction_date {
8019 entry.first_transaction_date = so_date;
8020 }
8021 if so_date > entry.last_transaction_date {
8022 entry.last_transaction_date = so_date;
8023 }
8024 entry.related_entities.insert(cc);
8025 }
8026
8027 let as_of_date = journal_entries
8028 .last()
8029 .map(|je| je.header.posting_date)
8030 .unwrap_or(start_date);
8031
8032 let graph = gen.generate_entity_graph(
8033 company_code,
8034 as_of_date,
8035 &vendor_summaries,
8036 &customer_summaries,
8037 &txn_summaries,
8038 );
8039
8040 info!(
8041 "Entity relationship graph: {} nodes, {} edges",
8042 graph.nodes.len(),
8043 graph.edges.len()
8044 );
8045 stats.entity_relationship_node_count = graph.nodes.len();
8046 stats.entity_relationship_edge_count = graph.edges.len();
8047 Some(graph)
8048 } else {
8049 None
8050 };
8051
8052 let cross_process_links = if cpl_enabled {
8054 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8056 .p2p_chains
8057 .iter()
8058 .flat_map(|chain| {
8059 let vendor_id = chain.purchase_order.vendor_id.clone();
8060 let cc = chain.purchase_order.header.company_code.clone();
8061 chain.goods_receipts.iter().flat_map(move |gr| {
8062 gr.items.iter().filter_map({
8063 let doc_id = gr.header.document_id.clone();
8064 let v_id = vendor_id.clone();
8065 let company = cc.clone();
8066 let receipt_date = gr.header.document_date;
8067 move |item| {
8068 item.base
8069 .material_id
8070 .as_ref()
8071 .map(|mat_id| GoodsReceiptRef {
8072 document_id: doc_id.clone(),
8073 material_id: mat_id.clone(),
8074 quantity: item.base.quantity,
8075 receipt_date,
8076 vendor_id: v_id.clone(),
8077 company_code: company.clone(),
8078 })
8079 }
8080 })
8081 })
8082 })
8083 .collect();
8084
8085 let del_refs: Vec<DeliveryRef> = document_flows
8087 .o2c_chains
8088 .iter()
8089 .flat_map(|chain| {
8090 let customer_id = chain.sales_order.customer_id.clone();
8091 let cc = chain.sales_order.header.company_code.clone();
8092 chain.deliveries.iter().flat_map(move |del| {
8093 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8094 del.items.iter().filter_map({
8095 let doc_id = del.header.document_id.clone();
8096 let c_id = customer_id.clone();
8097 let company = cc.clone();
8098 move |item| {
8099 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8100 document_id: doc_id.clone(),
8101 material_id: mat_id.clone(),
8102 quantity: item.base.quantity,
8103 delivery_date,
8104 customer_id: c_id.clone(),
8105 company_code: company.clone(),
8106 })
8107 }
8108 })
8109 })
8110 })
8111 .collect();
8112
8113 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8114 info!("Cross-process links generated: {} links", links.len());
8115 stats.cross_process_link_count = links.len();
8116 links
8117 } else {
8118 Vec::new()
8119 };
8120
8121 self.check_resources_with_log("post-entity-relationships")?;
8122 Ok((entity_graph, cross_process_links))
8123 }
8124
8125 fn phase_industry_data(
8127 &self,
8128 stats: &mut EnhancedGenerationStatistics,
8129 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8130 if !self.config.industry_specific.enabled {
8131 return None;
8132 }
8133 info!("Phase 29: Generating industry-specific data");
8134 let output = datasynth_generators::industry::factory::generate_industry_output(
8135 self.config.global.industry,
8136 );
8137 stats.industry_gl_account_count = output.gl_accounts.len();
8138 info!(
8139 "Industry data generated: {} GL accounts for {:?}",
8140 output.gl_accounts.len(),
8141 self.config.global.industry
8142 );
8143 Some(output)
8144 }
8145
8146 fn phase_opening_balances(
8148 &mut self,
8149 coa: &Arc<ChartOfAccounts>,
8150 stats: &mut EnhancedGenerationStatistics,
8151 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8152 if !self.config.balance.generate_opening_balances {
8153 debug!("Phase 3b: Skipped (opening balance generation disabled)");
8154 return Ok(Vec::new());
8155 }
8156 info!("Phase 3b: Generating Opening Balances");
8157
8158 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8159 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8160 let fiscal_year = start_date.year();
8161
8162 let industry = match self.config.global.industry {
8163 IndustrySector::Manufacturing => IndustryType::Manufacturing,
8164 IndustrySector::Retail => IndustryType::Retail,
8165 IndustrySector::FinancialServices => IndustryType::Financial,
8166 IndustrySector::Healthcare => IndustryType::Healthcare,
8167 IndustrySector::Technology => IndustryType::Technology,
8168 _ => IndustryType::Manufacturing,
8169 };
8170
8171 let config = datasynth_generators::OpeningBalanceConfig {
8172 industry,
8173 ..Default::default()
8174 };
8175 let mut gen =
8176 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8177
8178 let mut results = Vec::new();
8179 for company in &self.config.companies {
8180 let spec = OpeningBalanceSpec::new(
8181 company.code.clone(),
8182 start_date,
8183 fiscal_year,
8184 company.currency.clone(),
8185 rust_decimal::Decimal::new(10_000_000, 0),
8186 industry,
8187 );
8188 let ob = gen.generate(&spec, coa, start_date, &company.code);
8189 results.push(ob);
8190 }
8191
8192 stats.opening_balance_count = results.len();
8193 info!("Opening balances generated: {} companies", results.len());
8194 self.check_resources_with_log("post-opening-balances")?;
8195
8196 Ok(results)
8197 }
8198
8199 fn phase_subledger_reconciliation(
8201 &mut self,
8202 subledger: &SubledgerSnapshot,
8203 entries: &[JournalEntry],
8204 stats: &mut EnhancedGenerationStatistics,
8205 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8206 if !self.config.balance.reconcile_subledgers {
8207 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8208 return Ok(Vec::new());
8209 }
8210 info!("Phase 9b: Reconciling GL to subledger balances");
8211
8212 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8213 .map(|d| d + chrono::Months::new(self.config.global.period_months))
8214 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8215
8216 let tracker_config = BalanceTrackerConfig {
8218 validate_on_each_entry: false,
8219 track_history: false,
8220 fail_on_validation_error: false,
8221 ..Default::default()
8222 };
8223 let recon_currency = self
8224 .config
8225 .companies
8226 .first()
8227 .map(|c| c.currency.clone())
8228 .unwrap_or_else(|| "USD".to_string());
8229 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8230 let validation_errors = tracker.apply_entries(entries);
8231 if !validation_errors.is_empty() {
8232 warn!(
8233 error_count = validation_errors.len(),
8234 "Balance tracker encountered validation errors during subledger reconciliation"
8235 );
8236 for err in &validation_errors {
8237 debug!("Balance validation error: {:?}", err);
8238 }
8239 }
8240
8241 let mut engine = datasynth_generators::ReconciliationEngine::new(
8242 datasynth_generators::ReconciliationConfig::default(),
8243 );
8244
8245 let mut results = Vec::new();
8246 let company_code = self
8247 .config
8248 .companies
8249 .first()
8250 .map(|c| c.code.as_str())
8251 .unwrap_or("1000");
8252
8253 if !subledger.ar_invoices.is_empty() {
8255 let gl_balance = tracker
8256 .get_account_balance(
8257 company_code,
8258 datasynth_core::accounts::control_accounts::AR_CONTROL,
8259 )
8260 .map(|b| b.closing_balance)
8261 .unwrap_or_default();
8262 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8263 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8264 }
8265
8266 if !subledger.ap_invoices.is_empty() {
8268 let gl_balance = tracker
8269 .get_account_balance(
8270 company_code,
8271 datasynth_core::accounts::control_accounts::AP_CONTROL,
8272 )
8273 .map(|b| b.closing_balance)
8274 .unwrap_or_default();
8275 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8276 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8277 }
8278
8279 if !subledger.fa_records.is_empty() {
8281 let gl_asset_balance = tracker
8282 .get_account_balance(
8283 company_code,
8284 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8285 )
8286 .map(|b| b.closing_balance)
8287 .unwrap_or_default();
8288 let gl_accum_depr_balance = tracker
8289 .get_account_balance(
8290 company_code,
8291 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8292 )
8293 .map(|b| b.closing_balance)
8294 .unwrap_or_default();
8295 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8296 subledger.fa_records.iter().collect();
8297 let (asset_recon, depr_recon) = engine.reconcile_fa(
8298 company_code,
8299 end_date,
8300 gl_asset_balance,
8301 gl_accum_depr_balance,
8302 &fa_refs,
8303 );
8304 results.push(asset_recon);
8305 results.push(depr_recon);
8306 }
8307
8308 if !subledger.inventory_positions.is_empty() {
8310 let gl_balance = tracker
8311 .get_account_balance(
8312 company_code,
8313 datasynth_core::accounts::control_accounts::INVENTORY,
8314 )
8315 .map(|b| b.closing_balance)
8316 .unwrap_or_default();
8317 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
8318 subledger.inventory_positions.iter().collect();
8319 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
8320 }
8321
8322 stats.subledger_reconciliation_count = results.len();
8323 let passed = results.iter().filter(|r| r.is_balanced()).count();
8324 let failed = results.len() - passed;
8325 info!(
8326 "Subledger reconciliation: {} checks, {} passed, {} failed",
8327 results.len(),
8328 passed,
8329 failed
8330 );
8331 self.check_resources_with_log("post-subledger-reconciliation")?;
8332
8333 Ok(results)
8334 }
8335
8336 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
8338 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
8339
8340 let coa_framework = self.resolve_coa_framework();
8341
8342 let mut gen = ChartOfAccountsGenerator::new(
8343 self.config.chart_of_accounts.complexity,
8344 self.config.global.industry,
8345 self.seed,
8346 )
8347 .with_coa_framework(coa_framework);
8348
8349 let coa = Arc::new(gen.generate());
8350 self.coa = Some(Arc::clone(&coa));
8351
8352 if let Some(pb) = pb {
8353 pb.finish_with_message("Chart of Accounts complete");
8354 }
8355
8356 Ok(coa)
8357 }
8358
8359 fn generate_master_data(&mut self) -> SynthResult<()> {
8361 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8362 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8363 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8364
8365 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
8367
8368 let pack = self.primary_pack().clone();
8370
8371 let vendors_per_company = self.phase_config.vendors_per_company;
8373 let customers_per_company = self.phase_config.customers_per_company;
8374 let materials_per_company = self.phase_config.materials_per_company;
8375 let assets_per_company = self.phase_config.assets_per_company;
8376 let coa_framework = self.resolve_coa_framework();
8377
8378 let per_company_results: Vec<_> = self
8381 .config
8382 .companies
8383 .par_iter()
8384 .enumerate()
8385 .map(|(i, company)| {
8386 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
8387 let pack = pack.clone();
8388
8389 let mut vendor_gen = VendorGenerator::new(company_seed);
8391 vendor_gen.set_country_pack(pack.clone());
8392 vendor_gen.set_coa_framework(coa_framework);
8393 vendor_gen.set_counter_offset(i * vendors_per_company);
8394 if self.config.vendor_network.enabled {
8396 let vn = &self.config.vendor_network;
8397 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
8398 enabled: true,
8399 depth: vn.depth,
8400 tier1_count: datasynth_generators::TierCountConfig::new(
8401 vn.tier1.min,
8402 vn.tier1.max,
8403 ),
8404 tier2_per_parent: datasynth_generators::TierCountConfig::new(
8405 vn.tier2_per_parent.min,
8406 vn.tier2_per_parent.max,
8407 ),
8408 tier3_per_parent: datasynth_generators::TierCountConfig::new(
8409 vn.tier3_per_parent.min,
8410 vn.tier3_per_parent.max,
8411 ),
8412 cluster_distribution: datasynth_generators::ClusterDistribution {
8413 reliable_strategic: vn.clusters.reliable_strategic,
8414 standard_operational: vn.clusters.standard_operational,
8415 transactional: vn.clusters.transactional,
8416 problematic: vn.clusters.problematic,
8417 },
8418 concentration_limits: datasynth_generators::ConcentrationLimits {
8419 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
8420 max_top5: vn.dependencies.top_5_concentration,
8421 },
8422 ..datasynth_generators::VendorNetworkConfig::default()
8423 });
8424 }
8425 let vendor_pool =
8426 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
8427
8428 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
8430 customer_gen.set_country_pack(pack.clone());
8431 customer_gen.set_coa_framework(coa_framework);
8432 customer_gen.set_counter_offset(i * customers_per_company);
8433 if self.config.customer_segmentation.enabled {
8435 let cs = &self.config.customer_segmentation;
8436 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
8437 enabled: true,
8438 segment_distribution: datasynth_generators::SegmentDistribution {
8439 enterprise: cs.value_segments.enterprise.customer_share,
8440 mid_market: cs.value_segments.mid_market.customer_share,
8441 smb: cs.value_segments.smb.customer_share,
8442 consumer: cs.value_segments.consumer.customer_share,
8443 },
8444 referral_config: datasynth_generators::ReferralConfig {
8445 enabled: cs.networks.referrals.enabled,
8446 referral_rate: cs.networks.referrals.referral_rate,
8447 ..Default::default()
8448 },
8449 hierarchy_config: datasynth_generators::HierarchyConfig {
8450 enabled: cs.networks.corporate_hierarchies.enabled,
8451 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
8452 ..Default::default()
8453 },
8454 ..Default::default()
8455 };
8456 customer_gen.set_segmentation_config(seg_cfg);
8457 }
8458 let customer_pool = customer_gen.generate_customer_pool(
8459 customers_per_company,
8460 &company.code,
8461 start_date,
8462 );
8463
8464 let mut material_gen = MaterialGenerator::new(company_seed + 200);
8466 material_gen.set_country_pack(pack.clone());
8467 material_gen.set_counter_offset(i * materials_per_company);
8468 let material_pool = material_gen.generate_material_pool(
8469 materials_per_company,
8470 &company.code,
8471 start_date,
8472 );
8473
8474 let mut asset_gen = AssetGenerator::new(company_seed + 300);
8476 let asset_pool = asset_gen.generate_asset_pool(
8477 assets_per_company,
8478 &company.code,
8479 (start_date, end_date),
8480 );
8481
8482 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
8484 employee_gen.set_country_pack(pack);
8485 let employee_pool =
8486 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
8487
8488 let employee_change_history =
8490 employee_gen.generate_all_change_history(&employee_pool, end_date);
8491
8492 let employee_ids: Vec<String> = employee_pool
8494 .employees
8495 .iter()
8496 .map(|e| e.employee_id.clone())
8497 .collect();
8498 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
8499 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
8500
8501 (
8502 vendor_pool.vendors,
8503 customer_pool.customers,
8504 material_pool.materials,
8505 asset_pool.assets,
8506 employee_pool.employees,
8507 employee_change_history,
8508 cost_centers,
8509 )
8510 })
8511 .collect();
8512
8513 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
8515 per_company_results
8516 {
8517 self.master_data.vendors.extend(vendors);
8518 self.master_data.customers.extend(customers);
8519 self.master_data.materials.extend(materials);
8520 self.master_data.assets.extend(assets);
8521 self.master_data.employees.extend(employees);
8522 self.master_data.cost_centers.extend(cost_centers);
8523 self.master_data
8524 .employee_change_history
8525 .extend(change_history);
8526 }
8527
8528 if let Some(pb) = &pb {
8529 pb.inc(total);
8530 }
8531 if let Some(pb) = pb {
8532 pb.finish_with_message("Master data generation complete");
8533 }
8534
8535 Ok(())
8536 }
8537
8538 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
8540 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8541 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8542
8543 let months = (self.config.global.period_months as usize).max(1);
8546 let p2p_count = self
8547 .phase_config
8548 .p2p_chains
8549 .min(self.master_data.vendors.len() * 2 * months);
8550 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
8551
8552 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
8554 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
8555 p2p_gen.set_country_pack(self.primary_pack().clone());
8556
8557 for i in 0..p2p_count {
8558 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
8559 let materials: Vec<&Material> = self
8560 .master_data
8561 .materials
8562 .iter()
8563 .skip(i % self.master_data.materials.len().max(1))
8564 .take(2.min(self.master_data.materials.len()))
8565 .collect();
8566
8567 if materials.is_empty() {
8568 continue;
8569 }
8570
8571 let company = &self.config.companies[i % self.config.companies.len()];
8572 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
8573 let fiscal_period = po_date.month() as u8;
8574 let created_by = if self.master_data.employees.is_empty() {
8575 "SYSTEM"
8576 } else {
8577 self.master_data.employees[i % self.master_data.employees.len()]
8578 .user_id
8579 .as_str()
8580 };
8581
8582 let chain = p2p_gen.generate_chain(
8583 &company.code,
8584 vendor,
8585 &materials,
8586 po_date,
8587 start_date.year() as u16,
8588 fiscal_period,
8589 created_by,
8590 );
8591
8592 flows.purchase_orders.push(chain.purchase_order.clone());
8594 flows.goods_receipts.extend(chain.goods_receipts.clone());
8595 if let Some(vi) = &chain.vendor_invoice {
8596 flows.vendor_invoices.push(vi.clone());
8597 }
8598 if let Some(payment) = &chain.payment {
8599 flows.payments.push(payment.clone());
8600 }
8601 for remainder in &chain.remainder_payments {
8602 flows.payments.push(remainder.clone());
8603 }
8604 flows.p2p_chains.push(chain);
8605
8606 if let Some(pb) = &pb {
8607 pb.inc(1);
8608 }
8609 }
8610
8611 if let Some(pb) = pb {
8612 pb.finish_with_message("P2P document flows complete");
8613 }
8614
8615 let o2c_count = self
8618 .phase_config
8619 .o2c_chains
8620 .min(self.master_data.customers.len() * 2 * months);
8621 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
8622
8623 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
8625 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
8626 o2c_gen.set_country_pack(self.primary_pack().clone());
8627
8628 for i in 0..o2c_count {
8629 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
8630 let materials: Vec<&Material> = self
8631 .master_data
8632 .materials
8633 .iter()
8634 .skip(i % self.master_data.materials.len().max(1))
8635 .take(2.min(self.master_data.materials.len()))
8636 .collect();
8637
8638 if materials.is_empty() {
8639 continue;
8640 }
8641
8642 let company = &self.config.companies[i % self.config.companies.len()];
8643 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
8644 let fiscal_period = so_date.month() as u8;
8645 let created_by = if self.master_data.employees.is_empty() {
8646 "SYSTEM"
8647 } else {
8648 self.master_data.employees[i % self.master_data.employees.len()]
8649 .user_id
8650 .as_str()
8651 };
8652
8653 let chain = o2c_gen.generate_chain(
8654 &company.code,
8655 customer,
8656 &materials,
8657 so_date,
8658 start_date.year() as u16,
8659 fiscal_period,
8660 created_by,
8661 );
8662
8663 flows.sales_orders.push(chain.sales_order.clone());
8665 flows.deliveries.extend(chain.deliveries.clone());
8666 if let Some(ci) = &chain.customer_invoice {
8667 flows.customer_invoices.push(ci.clone());
8668 }
8669 if let Some(receipt) = &chain.customer_receipt {
8670 flows.payments.push(receipt.clone());
8671 }
8672 for receipt in &chain.remainder_receipts {
8674 flows.payments.push(receipt.clone());
8675 }
8676 flows.o2c_chains.push(chain);
8677
8678 if let Some(pb) = &pb {
8679 pb.inc(1);
8680 }
8681 }
8682
8683 if let Some(pb) = pb {
8684 pb.finish_with_message("O2C document flows complete");
8685 }
8686
8687 {
8691 let mut refs = Vec::new();
8692 for doc in &flows.purchase_orders {
8693 refs.extend(doc.header.document_references.iter().cloned());
8694 }
8695 for doc in &flows.goods_receipts {
8696 refs.extend(doc.header.document_references.iter().cloned());
8697 }
8698 for doc in &flows.vendor_invoices {
8699 refs.extend(doc.header.document_references.iter().cloned());
8700 }
8701 for doc in &flows.sales_orders {
8702 refs.extend(doc.header.document_references.iter().cloned());
8703 }
8704 for doc in &flows.deliveries {
8705 refs.extend(doc.header.document_references.iter().cloned());
8706 }
8707 for doc in &flows.customer_invoices {
8708 refs.extend(doc.header.document_references.iter().cloned());
8709 }
8710 for doc in &flows.payments {
8711 refs.extend(doc.header.document_references.iter().cloned());
8712 }
8713 debug!(
8714 "Collected {} document cross-references from document headers",
8715 refs.len()
8716 );
8717 flows.document_references = refs;
8718 }
8719
8720 Ok(())
8721 }
8722
8723 fn generate_journal_entries(
8725 &mut self,
8726 coa: &Arc<ChartOfAccounts>,
8727 ) -> SynthResult<Vec<JournalEntry>> {
8728 use datasynth_core::traits::ParallelGenerator;
8729
8730 let total = self.calculate_total_transactions();
8731 let pb = self.create_progress_bar(total, "Generating Journal Entries");
8732
8733 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8734 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8735 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8736
8737 let company_codes: Vec<String> = self
8738 .config
8739 .companies
8740 .iter()
8741 .map(|c| c.code.clone())
8742 .collect();
8743
8744 let generator = JournalEntryGenerator::new_with_params(
8745 self.config.transactions.clone(),
8746 Arc::clone(coa),
8747 company_codes,
8748 start_date,
8749 end_date,
8750 self.seed,
8751 );
8752
8753 let je_pack = self.primary_pack();
8757
8758 let mut generator = generator
8759 .with_master_data(
8760 &self.master_data.vendors,
8761 &self.master_data.customers,
8762 &self.master_data.materials,
8763 )
8764 .with_country_pack_names(je_pack)
8765 .with_country_pack_temporal(
8766 self.config.temporal_patterns.clone(),
8767 self.seed + 200,
8768 je_pack,
8769 )
8770 .with_persona_errors(true)
8771 .with_fraud_config(self.config.fraud.clone());
8772
8773 if self.config.temporal.enabled {
8775 let drift_config = self.config.temporal.to_core_config();
8776 generator = generator.with_drift_config(drift_config, self.seed + 100);
8777 }
8778
8779 self.check_memory_limit()?;
8781
8782 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
8784
8785 let entries = if total >= 10_000 && num_threads > 1 {
8789 let sub_generators = generator.split(num_threads);
8792 let entries_per_thread = total as usize / num_threads;
8793 let remainder = total as usize % num_threads;
8794
8795 let batches: Vec<Vec<JournalEntry>> = sub_generators
8796 .into_par_iter()
8797 .enumerate()
8798 .map(|(i, mut gen)| {
8799 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
8800 gen.generate_batch(count)
8801 })
8802 .collect();
8803
8804 let entries = JournalEntryGenerator::merge_results(batches);
8806
8807 if let Some(pb) = &pb {
8808 pb.inc(total);
8809 }
8810 entries
8811 } else {
8812 let mut entries = Vec::with_capacity(total as usize);
8814 for _ in 0..total {
8815 let entry = generator.generate();
8816 entries.push(entry);
8817 if let Some(pb) = &pb {
8818 pb.inc(1);
8819 }
8820 }
8821 entries
8822 };
8823
8824 if let Some(pb) = pb {
8825 pb.finish_with_message("Journal entries complete");
8826 }
8827
8828 Ok(entries)
8829 }
8830
8831 fn generate_jes_from_document_flows(
8836 &mut self,
8837 flows: &DocumentFlowSnapshot,
8838 ) -> SynthResult<Vec<JournalEntry>> {
8839 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
8840 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
8841
8842 let je_config = match self.resolve_coa_framework() {
8843 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
8844 CoAFramework::GermanSkr04 => {
8845 let fa = datasynth_core::FrameworkAccounts::german_gaap();
8846 DocumentFlowJeConfig::from(&fa)
8847 }
8848 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
8849 };
8850
8851 let populate_fec = je_config.populate_fec_fields;
8852 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
8853
8854 if populate_fec {
8858 let mut aux_lookup = std::collections::HashMap::new();
8859 for vendor in &self.master_data.vendors {
8860 if let Some(ref aux) = vendor.auxiliary_gl_account {
8861 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
8862 }
8863 }
8864 for customer in &self.master_data.customers {
8865 if let Some(ref aux) = customer.auxiliary_gl_account {
8866 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
8867 }
8868 }
8869 if !aux_lookup.is_empty() {
8870 generator.set_auxiliary_account_lookup(aux_lookup);
8871 }
8872 }
8873
8874 let mut entries = Vec::new();
8875
8876 for chain in &flows.p2p_chains {
8878 let chain_entries = generator.generate_from_p2p_chain(chain);
8879 entries.extend(chain_entries);
8880 if let Some(pb) = &pb {
8881 pb.inc(1);
8882 }
8883 }
8884
8885 for chain in &flows.o2c_chains {
8887 let chain_entries = generator.generate_from_o2c_chain(chain);
8888 entries.extend(chain_entries);
8889 if let Some(pb) = &pb {
8890 pb.inc(1);
8891 }
8892 }
8893
8894 if let Some(pb) = pb {
8895 pb.finish_with_message(format!(
8896 "Generated {} JEs from document flows",
8897 entries.len()
8898 ));
8899 }
8900
8901 Ok(entries)
8902 }
8903
8904 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
8910 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
8911
8912 let mut jes = Vec::with_capacity(payroll_runs.len());
8913
8914 for run in payroll_runs {
8915 let mut je = JournalEntry::new_simple(
8916 format!("JE-PAYROLL-{}", run.payroll_id),
8917 run.company_code.clone(),
8918 run.run_date,
8919 format!("Payroll {}", run.payroll_id),
8920 );
8921
8922 je.add_line(JournalEntryLine {
8924 line_number: 1,
8925 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
8926 debit_amount: run.total_gross,
8927 reference: Some(run.payroll_id.clone()),
8928 text: Some(format!(
8929 "Payroll {} ({} employees)",
8930 run.payroll_id, run.employee_count
8931 )),
8932 ..Default::default()
8933 });
8934
8935 je.add_line(JournalEntryLine {
8937 line_number: 2,
8938 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
8939 credit_amount: run.total_gross,
8940 reference: Some(run.payroll_id.clone()),
8941 ..Default::default()
8942 });
8943
8944 jes.push(je);
8945 }
8946
8947 jes
8948 }
8949
8950 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
8956 use datasynth_core::accounts::{control_accounts, expense_accounts};
8957 use datasynth_core::models::ProductionOrderStatus;
8958
8959 let mut jes = Vec::new();
8960
8961 for order in production_orders {
8962 if !matches!(
8964 order.status,
8965 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
8966 ) {
8967 continue;
8968 }
8969
8970 let mut je = JournalEntry::new_simple(
8971 format!("JE-MFG-{}", order.order_id),
8972 order.company_code.clone(),
8973 order.actual_end.unwrap_or(order.planned_end),
8974 format!(
8975 "Production Order {} - {}",
8976 order.order_id, order.material_description
8977 ),
8978 );
8979
8980 je.add_line(JournalEntryLine {
8982 line_number: 1,
8983 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
8984 debit_amount: order.actual_cost,
8985 reference: Some(order.order_id.clone()),
8986 text: Some(format!(
8987 "Material consumption for {}",
8988 order.material_description
8989 )),
8990 quantity: Some(order.actual_quantity),
8991 unit: Some("EA".to_string()),
8992 ..Default::default()
8993 });
8994
8995 je.add_line(JournalEntryLine {
8997 line_number: 2,
8998 gl_account: control_accounts::INVENTORY.to_string(),
8999 credit_amount: order.actual_cost,
9000 reference: Some(order.order_id.clone()),
9001 ..Default::default()
9002 });
9003
9004 jes.push(je);
9005 }
9006
9007 jes
9008 }
9009
9010 fn link_document_flows_to_subledgers(
9015 &mut self,
9016 flows: &DocumentFlowSnapshot,
9017 ) -> SynthResult<SubledgerSnapshot> {
9018 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9019 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9020
9021 let vendor_names: std::collections::HashMap<String, String> = self
9023 .master_data
9024 .vendors
9025 .iter()
9026 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9027 .collect();
9028 let customer_names: std::collections::HashMap<String, String> = self
9029 .master_data
9030 .customers
9031 .iter()
9032 .map(|c| (c.customer_id.clone(), c.name.clone()))
9033 .collect();
9034
9035 let mut linker = DocumentFlowLinker::new()
9036 .with_vendor_names(vendor_names)
9037 .with_customer_names(customer_names);
9038
9039 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9041 if let Some(pb) = &pb {
9042 pb.inc(flows.vendor_invoices.len() as u64);
9043 }
9044
9045 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9047 if let Some(pb) = &pb {
9048 pb.inc(flows.customer_invoices.len() as u64);
9049 }
9050
9051 if let Some(pb) = pb {
9052 pb.finish_with_message(format!(
9053 "Linked {} AP and {} AR invoices",
9054 ap_invoices.len(),
9055 ar_invoices.len()
9056 ));
9057 }
9058
9059 Ok(SubledgerSnapshot {
9060 ap_invoices,
9061 ar_invoices,
9062 fa_records: Vec::new(),
9063 inventory_positions: Vec::new(),
9064 inventory_movements: Vec::new(),
9065 ar_aging_reports: Vec::new(),
9067 ap_aging_reports: Vec::new(),
9068 depreciation_runs: Vec::new(),
9070 inventory_valuations: Vec::new(),
9071 dunning_runs: Vec::new(),
9073 dunning_letters: Vec::new(),
9074 })
9075 }
9076
9077 #[allow(clippy::too_many_arguments)]
9082 fn generate_ocpm_events(
9083 &mut self,
9084 flows: &DocumentFlowSnapshot,
9085 sourcing: &SourcingSnapshot,
9086 hr: &HrSnapshot,
9087 manufacturing: &ManufacturingSnapshot,
9088 banking: &BankingSnapshot,
9089 audit: &AuditSnapshot,
9090 financial_reporting: &FinancialReportingSnapshot,
9091 ) -> SynthResult<OcpmSnapshot> {
9092 let total_chains = flows.p2p_chains.len()
9093 + flows.o2c_chains.len()
9094 + sourcing.sourcing_projects.len()
9095 + hr.payroll_runs.len()
9096 + manufacturing.production_orders.len()
9097 + banking.customers.len()
9098 + audit.engagements.len()
9099 + financial_reporting.bank_reconciliations.len();
9100 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9101
9102 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9104 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9105
9106 let ocpm_config = OcpmGeneratorConfig {
9108 generate_p2p: true,
9109 generate_o2c: true,
9110 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9111 generate_h2r: !hr.payroll_runs.is_empty(),
9112 generate_mfg: !manufacturing.production_orders.is_empty(),
9113 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9114 generate_bank: !banking.customers.is_empty(),
9115 generate_audit: !audit.engagements.is_empty(),
9116 happy_path_rate: 0.75,
9117 exception_path_rate: 0.20,
9118 error_path_rate: 0.05,
9119 add_duration_variability: true,
9120 duration_std_dev_factor: 0.3,
9121 };
9122 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9123 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9124
9125 let available_users: Vec<String> = self
9127 .master_data
9128 .employees
9129 .iter()
9130 .take(20)
9131 .map(|e| e.user_id.clone())
9132 .collect();
9133
9134 let fallback_date =
9136 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9137 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9138 .unwrap_or(fallback_date);
9139 let base_midnight = base_date
9140 .and_hms_opt(0, 0, 0)
9141 .expect("midnight is always valid");
9142 let base_datetime =
9143 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9144
9145 let add_result = |event_log: &mut OcpmEventLog,
9147 result: datasynth_ocpm::CaseGenerationResult| {
9148 for event in result.events {
9149 event_log.add_event(event);
9150 }
9151 for object in result.objects {
9152 event_log.add_object(object);
9153 }
9154 for relationship in result.relationships {
9155 event_log.add_relationship(relationship);
9156 }
9157 for corr in result.correlation_events {
9158 event_log.add_correlation_event(corr);
9159 }
9160 event_log.add_case(result.case_trace);
9161 };
9162
9163 for chain in &flows.p2p_chains {
9165 let po = &chain.purchase_order;
9166 let documents = P2pDocuments::new(
9167 &po.header.document_id,
9168 &po.vendor_id,
9169 &po.header.company_code,
9170 po.total_net_amount,
9171 &po.header.currency,
9172 &ocpm_uuid_factory,
9173 )
9174 .with_goods_receipt(
9175 chain
9176 .goods_receipts
9177 .first()
9178 .map(|gr| gr.header.document_id.as_str())
9179 .unwrap_or(""),
9180 &ocpm_uuid_factory,
9181 )
9182 .with_invoice(
9183 chain
9184 .vendor_invoice
9185 .as_ref()
9186 .map(|vi| vi.header.document_id.as_str())
9187 .unwrap_or(""),
9188 &ocpm_uuid_factory,
9189 )
9190 .with_payment(
9191 chain
9192 .payment
9193 .as_ref()
9194 .map(|p| p.header.document_id.as_str())
9195 .unwrap_or(""),
9196 &ocpm_uuid_factory,
9197 );
9198
9199 let start_time =
9200 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9201 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9202 add_result(&mut event_log, result);
9203
9204 if let Some(pb) = &pb {
9205 pb.inc(1);
9206 }
9207 }
9208
9209 for chain in &flows.o2c_chains {
9211 let so = &chain.sales_order;
9212 let documents = O2cDocuments::new(
9213 &so.header.document_id,
9214 &so.customer_id,
9215 &so.header.company_code,
9216 so.total_net_amount,
9217 &so.header.currency,
9218 &ocpm_uuid_factory,
9219 )
9220 .with_delivery(
9221 chain
9222 .deliveries
9223 .first()
9224 .map(|d| d.header.document_id.as_str())
9225 .unwrap_or(""),
9226 &ocpm_uuid_factory,
9227 )
9228 .with_invoice(
9229 chain
9230 .customer_invoice
9231 .as_ref()
9232 .map(|ci| ci.header.document_id.as_str())
9233 .unwrap_or(""),
9234 &ocpm_uuid_factory,
9235 )
9236 .with_receipt(
9237 chain
9238 .customer_receipt
9239 .as_ref()
9240 .map(|r| r.header.document_id.as_str())
9241 .unwrap_or(""),
9242 &ocpm_uuid_factory,
9243 );
9244
9245 let start_time =
9246 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9247 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9248 add_result(&mut event_log, result);
9249
9250 if let Some(pb) = &pb {
9251 pb.inc(1);
9252 }
9253 }
9254
9255 for project in &sourcing.sourcing_projects {
9257 let vendor_id = sourcing
9259 .contracts
9260 .iter()
9261 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9262 .map(|c| c.vendor_id.clone())
9263 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9264 .or_else(|| {
9265 self.master_data
9266 .vendors
9267 .first()
9268 .map(|v| v.vendor_id.clone())
9269 })
9270 .unwrap_or_else(|| "V000".to_string());
9271 let mut docs = S2cDocuments::new(
9272 &project.project_id,
9273 &vendor_id,
9274 &project.company_code,
9275 project.estimated_annual_spend,
9276 &ocpm_uuid_factory,
9277 );
9278 if let Some(rfx) = sourcing
9280 .rfx_events
9281 .iter()
9282 .find(|r| r.sourcing_project_id == project.project_id)
9283 {
9284 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9285 if let Some(bid) = sourcing.bids.iter().find(|b| {
9287 b.rfx_id == rfx.rfx_id
9288 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9289 }) {
9290 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9291 }
9292 }
9293 if let Some(contract) = sourcing
9295 .contracts
9296 .iter()
9297 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9298 {
9299 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9300 }
9301 let start_time = base_datetime - chrono::Duration::days(90);
9302 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9303 add_result(&mut event_log, result);
9304
9305 if let Some(pb) = &pb {
9306 pb.inc(1);
9307 }
9308 }
9309
9310 for run in &hr.payroll_runs {
9312 let employee_id = hr
9314 .payroll_line_items
9315 .iter()
9316 .find(|li| li.payroll_id == run.payroll_id)
9317 .map(|li| li.employee_id.as_str())
9318 .unwrap_or("EMP000");
9319 let docs = H2rDocuments::new(
9320 &run.payroll_id,
9321 employee_id,
9322 &run.company_code,
9323 run.total_gross,
9324 &ocpm_uuid_factory,
9325 )
9326 .with_time_entries(
9327 hr.time_entries
9328 .iter()
9329 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9330 .take(5)
9331 .map(|t| t.entry_id.as_str())
9332 .collect(),
9333 );
9334 let start_time = base_datetime - chrono::Duration::days(30);
9335 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9336 add_result(&mut event_log, result);
9337
9338 if let Some(pb) = &pb {
9339 pb.inc(1);
9340 }
9341 }
9342
9343 for order in &manufacturing.production_orders {
9345 let mut docs = MfgDocuments::new(
9346 &order.order_id,
9347 &order.material_id,
9348 &order.company_code,
9349 order.planned_quantity,
9350 &ocpm_uuid_factory,
9351 )
9352 .with_operations(
9353 order
9354 .operations
9355 .iter()
9356 .map(|o| format!("OP-{:04}", o.operation_number))
9357 .collect::<Vec<_>>()
9358 .iter()
9359 .map(std::string::String::as_str)
9360 .collect(),
9361 );
9362 if let Some(insp) = manufacturing
9364 .quality_inspections
9365 .iter()
9366 .find(|i| i.reference_id == order.order_id)
9367 {
9368 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
9369 }
9370 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
9372 cc.items
9373 .iter()
9374 .any(|item| item.material_id == order.material_id)
9375 }) {
9376 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
9377 }
9378 let start_time = base_datetime - chrono::Duration::days(60);
9379 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
9380 add_result(&mut event_log, result);
9381
9382 if let Some(pb) = &pb {
9383 pb.inc(1);
9384 }
9385 }
9386
9387 for customer in &banking.customers {
9389 let customer_id_str = customer.customer_id.to_string();
9390 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
9391 if let Some(account) = banking
9393 .accounts
9394 .iter()
9395 .find(|a| a.primary_owner_id == customer.customer_id)
9396 {
9397 let account_id_str = account.account_id.to_string();
9398 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
9399 let txn_strs: Vec<String> = banking
9401 .transactions
9402 .iter()
9403 .filter(|t| t.account_id == account.account_id)
9404 .take(10)
9405 .map(|t| t.transaction_id.to_string())
9406 .collect();
9407 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
9408 let txn_amounts: Vec<rust_decimal::Decimal> = banking
9409 .transactions
9410 .iter()
9411 .filter(|t| t.account_id == account.account_id)
9412 .take(10)
9413 .map(|t| t.amount)
9414 .collect();
9415 if !txn_ids.is_empty() {
9416 docs = docs.with_transactions(txn_ids, txn_amounts);
9417 }
9418 }
9419 let start_time = base_datetime - chrono::Duration::days(180);
9420 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
9421 add_result(&mut event_log, result);
9422
9423 if let Some(pb) = &pb {
9424 pb.inc(1);
9425 }
9426 }
9427
9428 for engagement in &audit.engagements {
9430 let engagement_id_str = engagement.engagement_id.to_string();
9431 let docs = AuditDocuments::new(
9432 &engagement_id_str,
9433 &engagement.client_entity_id,
9434 &ocpm_uuid_factory,
9435 )
9436 .with_workpapers(
9437 audit
9438 .workpapers
9439 .iter()
9440 .filter(|w| w.engagement_id == engagement.engagement_id)
9441 .take(10)
9442 .map(|w| w.workpaper_id.to_string())
9443 .collect::<Vec<_>>()
9444 .iter()
9445 .map(std::string::String::as_str)
9446 .collect(),
9447 )
9448 .with_evidence(
9449 audit
9450 .evidence
9451 .iter()
9452 .filter(|e| e.engagement_id == engagement.engagement_id)
9453 .take(10)
9454 .map(|e| e.evidence_id.to_string())
9455 .collect::<Vec<_>>()
9456 .iter()
9457 .map(std::string::String::as_str)
9458 .collect(),
9459 )
9460 .with_risks(
9461 audit
9462 .risk_assessments
9463 .iter()
9464 .filter(|r| r.engagement_id == engagement.engagement_id)
9465 .take(5)
9466 .map(|r| r.risk_id.to_string())
9467 .collect::<Vec<_>>()
9468 .iter()
9469 .map(std::string::String::as_str)
9470 .collect(),
9471 )
9472 .with_findings(
9473 audit
9474 .findings
9475 .iter()
9476 .filter(|f| f.engagement_id == engagement.engagement_id)
9477 .take(5)
9478 .map(|f| f.finding_id.to_string())
9479 .collect::<Vec<_>>()
9480 .iter()
9481 .map(std::string::String::as_str)
9482 .collect(),
9483 )
9484 .with_judgments(
9485 audit
9486 .judgments
9487 .iter()
9488 .filter(|j| j.engagement_id == engagement.engagement_id)
9489 .take(5)
9490 .map(|j| j.judgment_id.to_string())
9491 .collect::<Vec<_>>()
9492 .iter()
9493 .map(std::string::String::as_str)
9494 .collect(),
9495 );
9496 let start_time = base_datetime - chrono::Duration::days(120);
9497 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
9498 add_result(&mut event_log, result);
9499
9500 if let Some(pb) = &pb {
9501 pb.inc(1);
9502 }
9503 }
9504
9505 for recon in &financial_reporting.bank_reconciliations {
9507 let docs = BankReconDocuments::new(
9508 &recon.reconciliation_id,
9509 &recon.bank_account_id,
9510 &recon.company_code,
9511 recon.bank_ending_balance,
9512 &ocpm_uuid_factory,
9513 )
9514 .with_statement_lines(
9515 recon
9516 .statement_lines
9517 .iter()
9518 .take(20)
9519 .map(|l| l.line_id.as_str())
9520 .collect(),
9521 )
9522 .with_reconciling_items(
9523 recon
9524 .reconciling_items
9525 .iter()
9526 .take(10)
9527 .map(|i| i.item_id.as_str())
9528 .collect(),
9529 );
9530 let start_time = base_datetime - chrono::Duration::days(30);
9531 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
9532 add_result(&mut event_log, result);
9533
9534 if let Some(pb) = &pb {
9535 pb.inc(1);
9536 }
9537 }
9538
9539 event_log.compute_variants();
9541
9542 let summary = event_log.summary();
9543
9544 if let Some(pb) = pb {
9545 pb.finish_with_message(format!(
9546 "Generated {} OCPM events, {} objects",
9547 summary.event_count, summary.object_count
9548 ));
9549 }
9550
9551 Ok(OcpmSnapshot {
9552 event_count: summary.event_count,
9553 object_count: summary.object_count,
9554 case_count: summary.case_count,
9555 event_log: Some(event_log),
9556 })
9557 }
9558
9559 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
9561 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
9562
9563 let total_rate = if self.config.anomaly_injection.enabled {
9566 self.config.anomaly_injection.rates.total_rate
9567 } else if self.config.fraud.enabled {
9568 self.config.fraud.fraud_rate
9569 } else {
9570 0.02
9571 };
9572
9573 let fraud_rate = if self.config.anomaly_injection.enabled {
9574 self.config.anomaly_injection.rates.fraud_rate
9575 } else {
9576 AnomalyRateConfig::default().fraud_rate
9577 };
9578
9579 let error_rate = if self.config.anomaly_injection.enabled {
9580 self.config.anomaly_injection.rates.error_rate
9581 } else {
9582 AnomalyRateConfig::default().error_rate
9583 };
9584
9585 let process_issue_rate = if self.config.anomaly_injection.enabled {
9586 self.config.anomaly_injection.rates.process_rate
9587 } else {
9588 AnomalyRateConfig::default().process_issue_rate
9589 };
9590
9591 let anomaly_config = AnomalyInjectorConfig {
9592 rates: AnomalyRateConfig {
9593 total_rate,
9594 fraud_rate,
9595 error_rate,
9596 process_issue_rate,
9597 ..Default::default()
9598 },
9599 seed: self.seed + 5000,
9600 ..Default::default()
9601 };
9602
9603 let mut injector = AnomalyInjector::new(anomaly_config);
9604 let result = injector.process_entries(entries);
9605
9606 if let Some(pb) = &pb {
9607 pb.inc(entries.len() as u64);
9608 pb.finish_with_message("Anomaly injection complete");
9609 }
9610
9611 let mut by_type = HashMap::new();
9612 for label in &result.labels {
9613 *by_type
9614 .entry(format!("{:?}", label.anomaly_type))
9615 .or_insert(0) += 1;
9616 }
9617
9618 Ok(AnomalyLabels {
9619 labels: result.labels,
9620 summary: Some(result.summary),
9621 by_type,
9622 })
9623 }
9624
9625 fn validate_journal_entries(
9634 &mut self,
9635 entries: &[JournalEntry],
9636 ) -> SynthResult<BalanceValidationResult> {
9637 let clean_entries: Vec<&JournalEntry> = entries
9639 .iter()
9640 .filter(|e| {
9641 e.header
9642 .header_text
9643 .as_ref()
9644 .map(|t| !t.contains("[HUMAN_ERROR:"))
9645 .unwrap_or(true)
9646 })
9647 .collect();
9648
9649 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
9650
9651 let config = BalanceTrackerConfig {
9653 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
9657 };
9658 let validation_currency = self
9659 .config
9660 .companies
9661 .first()
9662 .map(|c| c.currency.clone())
9663 .unwrap_or_else(|| "USD".to_string());
9664
9665 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
9666
9667 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
9669 let errors = tracker.apply_entries(&clean_refs);
9670
9671 if let Some(pb) = &pb {
9672 pb.inc(entries.len() as u64);
9673 }
9674
9675 let has_unbalanced = tracker
9678 .get_validation_errors()
9679 .iter()
9680 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
9681
9682 let mut all_errors = errors;
9685 all_errors.extend(tracker.get_validation_errors().iter().cloned());
9686 let company_codes: Vec<String> = self
9687 .config
9688 .companies
9689 .iter()
9690 .map(|c| c.code.clone())
9691 .collect();
9692
9693 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9694 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9695 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9696
9697 for company_code in &company_codes {
9698 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
9699 all_errors.push(e);
9700 }
9701 }
9702
9703 let stats = tracker.get_statistics();
9705
9706 let is_balanced = all_errors.is_empty();
9708
9709 if let Some(pb) = pb {
9710 let msg = if is_balanced {
9711 "Balance validation passed"
9712 } else {
9713 "Balance validation completed with errors"
9714 };
9715 pb.finish_with_message(msg);
9716 }
9717
9718 Ok(BalanceValidationResult {
9719 validated: true,
9720 is_balanced,
9721 entries_processed: stats.entries_processed,
9722 total_debits: stats.total_debits,
9723 total_credits: stats.total_credits,
9724 accounts_tracked: stats.accounts_tracked,
9725 companies_tracked: stats.companies_tracked,
9726 validation_errors: all_errors,
9727 has_unbalanced_entries: has_unbalanced,
9728 })
9729 }
9730
9731 fn inject_data_quality(
9736 &mut self,
9737 entries: &mut [JournalEntry],
9738 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
9739 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
9740
9741 let config = if self.config.data_quality.enabled {
9744 let dq = &self.config.data_quality;
9745 DataQualityConfig {
9746 enable_missing_values: dq.missing_values.enabled,
9747 missing_values: datasynth_generators::MissingValueConfig {
9748 global_rate: dq.effective_missing_rate(),
9749 ..Default::default()
9750 },
9751 enable_format_variations: dq.format_variations.enabled,
9752 format_variations: datasynth_generators::FormatVariationConfig {
9753 date_variation_rate: dq.format_variations.dates.rate,
9754 amount_variation_rate: dq.format_variations.amounts.rate,
9755 identifier_variation_rate: dq.format_variations.identifiers.rate,
9756 ..Default::default()
9757 },
9758 enable_duplicates: dq.duplicates.enabled,
9759 duplicates: datasynth_generators::DuplicateConfig {
9760 duplicate_rate: dq.effective_duplicate_rate(),
9761 ..Default::default()
9762 },
9763 enable_typos: dq.typos.enabled,
9764 typos: datasynth_generators::TypoConfig {
9765 char_error_rate: dq.effective_typo_rate(),
9766 ..Default::default()
9767 },
9768 enable_encoding_issues: dq.encoding_issues.enabled,
9769 encoding_issue_rate: dq.encoding_issues.rate,
9770 seed: self.seed.wrapping_add(77), track_statistics: true,
9772 }
9773 } else {
9774 DataQualityConfig::minimal()
9775 };
9776 let mut injector = DataQualityInjector::new(config);
9777
9778 injector.set_country_pack(self.primary_pack().clone());
9780
9781 let context = HashMap::new();
9783
9784 for entry in entries.iter_mut() {
9785 if let Some(text) = &entry.header.header_text {
9787 let processed = injector.process_text_field(
9788 "header_text",
9789 text,
9790 &entry.header.document_id.to_string(),
9791 &context,
9792 );
9793 match processed {
9794 Some(new_text) if new_text != *text => {
9795 entry.header.header_text = Some(new_text);
9796 }
9797 None => {
9798 entry.header.header_text = None; }
9800 _ => {}
9801 }
9802 }
9803
9804 if let Some(ref_text) = &entry.header.reference {
9806 let processed = injector.process_text_field(
9807 "reference",
9808 ref_text,
9809 &entry.header.document_id.to_string(),
9810 &context,
9811 );
9812 match processed {
9813 Some(new_text) if new_text != *ref_text => {
9814 entry.header.reference = Some(new_text);
9815 }
9816 None => {
9817 entry.header.reference = None;
9818 }
9819 _ => {}
9820 }
9821 }
9822
9823 let user_persona = entry.header.user_persona.clone();
9825 if let Some(processed) = injector.process_text_field(
9826 "user_persona",
9827 &user_persona,
9828 &entry.header.document_id.to_string(),
9829 &context,
9830 ) {
9831 if processed != user_persona {
9832 entry.header.user_persona = processed;
9833 }
9834 }
9835
9836 for line in &mut entry.lines {
9838 if let Some(ref text) = line.line_text {
9840 let processed = injector.process_text_field(
9841 "line_text",
9842 text,
9843 &entry.header.document_id.to_string(),
9844 &context,
9845 );
9846 match processed {
9847 Some(new_text) if new_text != *text => {
9848 line.line_text = Some(new_text);
9849 }
9850 None => {
9851 line.line_text = None;
9852 }
9853 _ => {}
9854 }
9855 }
9856
9857 if let Some(cc) = &line.cost_center {
9859 let processed = injector.process_text_field(
9860 "cost_center",
9861 cc,
9862 &entry.header.document_id.to_string(),
9863 &context,
9864 );
9865 match processed {
9866 Some(new_cc) if new_cc != *cc => {
9867 line.cost_center = Some(new_cc);
9868 }
9869 None => {
9870 line.cost_center = None;
9871 }
9872 _ => {}
9873 }
9874 }
9875 }
9876
9877 if let Some(pb) = &pb {
9878 pb.inc(1);
9879 }
9880 }
9881
9882 if let Some(pb) = pb {
9883 pb.finish_with_message("Data quality injection complete");
9884 }
9885
9886 let quality_issues = injector.issues().to_vec();
9887 Ok((injector.stats().clone(), quality_issues))
9888 }
9889
9890 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
9901 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9902 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9903 let fiscal_year = start_date.year() as u16;
9904 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
9905
9906 let total_revenue: rust_decimal::Decimal = entries
9908 .iter()
9909 .flat_map(|e| e.lines.iter())
9910 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
9911 .map(|l| l.credit_amount)
9912 .sum();
9913
9914 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
9916
9917 let mut snapshot = AuditSnapshot::default();
9918
9919 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
9921 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
9922 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
9923 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
9924 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
9925 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
9926 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
9927 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
9928 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
9929 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
9930 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
9931 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
9932
9933 let accounts: Vec<String> = self
9935 .coa
9936 .as_ref()
9937 .map(|coa| {
9938 coa.get_postable_accounts()
9939 .iter()
9940 .map(|acc| acc.account_code().to_string())
9941 .collect()
9942 })
9943 .unwrap_or_default();
9944
9945 for (i, company) in self.config.companies.iter().enumerate() {
9947 let company_revenue = total_revenue
9949 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
9950
9951 let engagements_for_company =
9953 self.phase_config.audit_engagements / self.config.companies.len().max(1);
9954 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
9955 1
9956 } else {
9957 0
9958 };
9959
9960 for _eng_idx in 0..(engagements_for_company + extra) {
9961 let mut engagement = engagement_gen.generate_engagement(
9963 &company.code,
9964 &company.name,
9965 fiscal_year,
9966 period_end,
9967 company_revenue,
9968 None, );
9970
9971 if !self.master_data.employees.is_empty() {
9973 let emp_count = self.master_data.employees.len();
9974 let base = (i * 10 + _eng_idx) % emp_count;
9976 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
9977 .employee_id
9978 .clone();
9979 engagement.engagement_manager_id = self.master_data.employees
9980 [(base + 1) % emp_count]
9981 .employee_id
9982 .clone();
9983 let real_team: Vec<String> = engagement
9984 .team_member_ids
9985 .iter()
9986 .enumerate()
9987 .map(|(j, _)| {
9988 self.master_data.employees[(base + 2 + j) % emp_count]
9989 .employee_id
9990 .clone()
9991 })
9992 .collect();
9993 engagement.team_member_ids = real_team;
9994 }
9995
9996 if let Some(pb) = &pb {
9997 pb.inc(1);
9998 }
9999
10000 let team_members: Vec<String> = engagement.team_member_ids.clone();
10002
10003 let workpapers =
10005 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10006
10007 for wp in &workpapers {
10008 if let Some(pb) = &pb {
10009 pb.inc(1);
10010 }
10011
10012 let evidence = evidence_gen.generate_evidence_for_workpaper(
10014 wp,
10015 &team_members,
10016 wp.preparer_date,
10017 );
10018
10019 for _ in &evidence {
10020 if let Some(pb) = &pb {
10021 pb.inc(1);
10022 }
10023 }
10024
10025 snapshot.evidence.extend(evidence);
10026 }
10027
10028 let risks =
10030 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10031
10032 for _ in &risks {
10033 if let Some(pb) = &pb {
10034 pb.inc(1);
10035 }
10036 }
10037 snapshot.risk_assessments.extend(risks);
10038
10039 let findings = finding_gen.generate_findings_for_engagement(
10041 &engagement,
10042 &workpapers,
10043 &team_members,
10044 );
10045
10046 for _ in &findings {
10047 if let Some(pb) = &pb {
10048 pb.inc(1);
10049 }
10050 }
10051 snapshot.findings.extend(findings);
10052
10053 let judgments =
10055 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10056
10057 for _ in &judgments {
10058 if let Some(pb) = &pb {
10059 pb.inc(1);
10060 }
10061 }
10062 snapshot.judgments.extend(judgments);
10063
10064 let (confs, resps) =
10066 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10067 snapshot.confirmations.extend(confs);
10068 snapshot.confirmation_responses.extend(resps);
10069
10070 let team_pairs: Vec<(String, String)> = team_members
10072 .iter()
10073 .map(|id| {
10074 let name = self
10075 .master_data
10076 .employees
10077 .iter()
10078 .find(|e| e.employee_id == *id)
10079 .map(|e| e.display_name.clone())
10080 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10081 (id.clone(), name)
10082 })
10083 .collect();
10084 for wp in &workpapers {
10085 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10086 snapshot.procedure_steps.extend(steps);
10087 }
10088
10089 for wp in &workpapers {
10091 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10092 snapshot.samples.push(sample);
10093 }
10094 }
10095
10096 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10098 snapshot.analytical_results.extend(analytical);
10099
10100 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10102 snapshot.ia_functions.push(ia_func);
10103 snapshot.ia_reports.extend(ia_reports);
10104
10105 let vendor_names: Vec<String> = self
10107 .master_data
10108 .vendors
10109 .iter()
10110 .map(|v| v.name.clone())
10111 .collect();
10112 let customer_names: Vec<String> = self
10113 .master_data
10114 .customers
10115 .iter()
10116 .map(|c| c.name.clone())
10117 .collect();
10118 let (parties, rp_txns) =
10119 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10120 snapshot.related_parties.extend(parties);
10121 snapshot.related_party_transactions.extend(rp_txns);
10122
10123 snapshot.workpapers.extend(workpapers);
10125
10126 {
10128 let scope_id = format!(
10129 "SCOPE-{}-{}",
10130 engagement.engagement_id.simple(),
10131 &engagement.client_entity_id
10132 );
10133 let scope = datasynth_core::models::audit::AuditScope::new(
10134 scope_id.clone(),
10135 engagement.engagement_id.to_string(),
10136 engagement.client_entity_id.clone(),
10137 engagement.materiality,
10138 );
10139 let mut eng = engagement;
10141 eng.scope_id = Some(scope_id);
10142 snapshot.audit_scopes.push(scope);
10143 snapshot.engagements.push(eng);
10144 }
10145 }
10146 }
10147
10148 if self.config.companies.len() > 1 {
10152 let group_materiality = snapshot
10155 .engagements
10156 .first()
10157 .map(|e| e.materiality)
10158 .unwrap_or_else(|| {
10159 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10160 total_revenue * pct
10161 });
10162
10163 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10164 let group_engagement_id = snapshot
10165 .engagements
10166 .first()
10167 .map(|e| e.engagement_id.to_string())
10168 .unwrap_or_else(|| "GROUP-ENG".to_string());
10169
10170 let component_snapshot = component_gen.generate(
10171 &self.config.companies,
10172 group_materiality,
10173 &group_engagement_id,
10174 period_end,
10175 );
10176
10177 snapshot.component_auditors = component_snapshot.component_auditors;
10178 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10179 snapshot.component_instructions = component_snapshot.component_instructions;
10180 snapshot.component_reports = component_snapshot.component_reports;
10181
10182 info!(
10183 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10184 snapshot.component_auditors.len(),
10185 snapshot.component_instructions.len(),
10186 snapshot.component_reports.len(),
10187 );
10188 }
10189
10190 {
10194 let applicable_framework = self
10195 .config
10196 .accounting_standards
10197 .framework
10198 .as_ref()
10199 .map(|f| format!("{f:?}"))
10200 .unwrap_or_else(|| "IFRS".to_string());
10201
10202 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10203 let entity_count = self.config.companies.len();
10204
10205 for engagement in &snapshot.engagements {
10206 let company = self
10207 .config
10208 .companies
10209 .iter()
10210 .find(|c| c.code == engagement.client_entity_id);
10211 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10212 let letter_date = engagement.planning_start;
10213 let letter = letter_gen.generate(
10214 &engagement.engagement_id.to_string(),
10215 &engagement.client_name,
10216 entity_count,
10217 engagement.period_end_date,
10218 currency,
10219 &applicable_framework,
10220 letter_date,
10221 );
10222 snapshot.engagement_letters.push(letter);
10223 }
10224
10225 info!(
10226 "ISA 210 engagement letters: {} generated",
10227 snapshot.engagement_letters.len()
10228 );
10229 }
10230
10231 {
10235 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10236 let entity_codes: Vec<String> = self
10237 .config
10238 .companies
10239 .iter()
10240 .map(|c| c.code.clone())
10241 .collect();
10242 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10243 info!(
10244 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10245 subsequent.len(),
10246 subsequent
10247 .iter()
10248 .filter(|e| matches!(
10249 e.classification,
10250 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10251 ))
10252 .count(),
10253 subsequent
10254 .iter()
10255 .filter(|e| matches!(
10256 e.classification,
10257 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10258 ))
10259 .count(),
10260 );
10261 snapshot.subsequent_events = subsequent;
10262 }
10263
10264 {
10268 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10269 let entity_codes: Vec<String> = self
10270 .config
10271 .companies
10272 .iter()
10273 .map(|c| c.code.clone())
10274 .collect();
10275 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10276 info!(
10277 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10278 soc_snapshot.service_organizations.len(),
10279 soc_snapshot.soc_reports.len(),
10280 soc_snapshot.user_entity_controls.len(),
10281 );
10282 snapshot.service_organizations = soc_snapshot.service_organizations;
10283 snapshot.soc_reports = soc_snapshot.soc_reports;
10284 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10285 }
10286
10287 {
10291 use datasynth_generators::audit::going_concern_generator::{
10292 GoingConcernGenerator, GoingConcernInput,
10293 };
10294 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10295 let entity_codes: Vec<String> = self
10296 .config
10297 .companies
10298 .iter()
10299 .map(|c| c.code.clone())
10300 .collect();
10301 let assessment_date = period_end + chrono::Duration::days(75);
10303 let period_label = format!("FY{}", period_end.year());
10304
10305 let gc_inputs: Vec<GoingConcernInput> = self
10316 .config
10317 .companies
10318 .iter()
10319 .map(|company| {
10320 let code = &company.code;
10321 let mut revenue = rust_decimal::Decimal::ZERO;
10322 let mut expenses = rust_decimal::Decimal::ZERO;
10323 let mut current_assets = rust_decimal::Decimal::ZERO;
10324 let mut current_liabs = rust_decimal::Decimal::ZERO;
10325 let mut total_debt = rust_decimal::Decimal::ZERO;
10326
10327 for je in entries.iter().filter(|je| &je.header.company_code == code) {
10328 for line in &je.lines {
10329 let acct = line.gl_account.as_str();
10330 let net = line.debit_amount - line.credit_amount;
10331 if acct.starts_with('4') {
10332 revenue -= net;
10334 } else if acct.starts_with('6') {
10335 expenses += net;
10337 }
10338 if acct.starts_with('1') {
10340 if let Ok(n) = acct.parse::<u32>() {
10342 if (1000..=1499).contains(&n) {
10343 current_assets += net;
10344 }
10345 }
10346 } else if acct.starts_with('2') {
10347 if let Ok(n) = acct.parse::<u32>() {
10348 if (2000..=2499).contains(&n) {
10349 current_liabs -= net; } else if (2500..=2999).contains(&n) {
10352 total_debt -= net;
10354 }
10355 }
10356 }
10357 }
10358 }
10359
10360 let net_income = revenue - expenses;
10361 let working_capital = current_assets - current_liabs;
10362 let operating_cash_flow = net_income;
10365
10366 GoingConcernInput {
10367 entity_code: code.clone(),
10368 net_income,
10369 working_capital,
10370 operating_cash_flow,
10371 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
10372 assessment_date,
10373 }
10374 })
10375 .collect();
10376
10377 let assessments = if gc_inputs.is_empty() {
10378 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
10379 } else {
10380 gc_gen.generate_for_entities_with_inputs(
10381 &entity_codes,
10382 &gc_inputs,
10383 assessment_date,
10384 &period_label,
10385 )
10386 };
10387 info!(
10388 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
10389 assessments.len(),
10390 assessments.iter().filter(|a| matches!(
10391 a.auditor_conclusion,
10392 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
10393 )).count(),
10394 assessments.iter().filter(|a| matches!(
10395 a.auditor_conclusion,
10396 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
10397 )).count(),
10398 assessments.iter().filter(|a| matches!(
10399 a.auditor_conclusion,
10400 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
10401 )).count(),
10402 );
10403 snapshot.going_concern_assessments = assessments;
10404 }
10405
10406 {
10410 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
10411 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
10412 let entity_codes: Vec<String> = self
10413 .config
10414 .companies
10415 .iter()
10416 .map(|c| c.code.clone())
10417 .collect();
10418 let estimates = est_gen.generate_for_entities(&entity_codes);
10419 info!(
10420 "ISA 540 accounting estimates: {} estimates across {} entities \
10421 ({} with retrospective reviews, {} with auditor point estimates)",
10422 estimates.len(),
10423 entity_codes.len(),
10424 estimates
10425 .iter()
10426 .filter(|e| e.retrospective_review.is_some())
10427 .count(),
10428 estimates
10429 .iter()
10430 .filter(|e| e.auditor_point_estimate.is_some())
10431 .count(),
10432 );
10433 snapshot.accounting_estimates = estimates;
10434 }
10435
10436 {
10440 use datasynth_generators::audit::audit_opinion_generator::{
10441 AuditOpinionGenerator, AuditOpinionInput,
10442 };
10443
10444 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
10445
10446 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
10448 .engagements
10449 .iter()
10450 .map(|eng| {
10451 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10453 .findings
10454 .iter()
10455 .filter(|f| f.engagement_id == eng.engagement_id)
10456 .cloned()
10457 .collect();
10458
10459 let gc = snapshot
10461 .going_concern_assessments
10462 .iter()
10463 .find(|g| g.entity_code == eng.client_entity_id)
10464 .cloned();
10465
10466 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
10468 snapshot.component_reports.clone();
10469
10470 let auditor = self
10471 .master_data
10472 .employees
10473 .first()
10474 .map(|e| e.display_name.clone())
10475 .unwrap_or_else(|| "Global Audit LLP".into());
10476
10477 let partner = self
10478 .master_data
10479 .employees
10480 .get(1)
10481 .map(|e| e.display_name.clone())
10482 .unwrap_or_else(|| eng.engagement_partner_id.clone());
10483
10484 AuditOpinionInput {
10485 entity_code: eng.client_entity_id.clone(),
10486 entity_name: eng.client_name.clone(),
10487 engagement_id: eng.engagement_id,
10488 period_end: eng.period_end_date,
10489 findings: eng_findings,
10490 going_concern: gc,
10491 component_reports: comp_reports,
10492 is_us_listed: {
10494 let fw = &self.config.audit_standards.isa_compliance.framework;
10495 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
10496 },
10497 auditor_name: auditor,
10498 engagement_partner: partner,
10499 }
10500 })
10501 .collect();
10502
10503 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
10504
10505 for go in &generated_opinions {
10506 snapshot
10507 .key_audit_matters
10508 .extend(go.key_audit_matters.clone());
10509 }
10510 snapshot.audit_opinions = generated_opinions
10511 .into_iter()
10512 .map(|go| go.opinion)
10513 .collect();
10514
10515 info!(
10516 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
10517 snapshot.audit_opinions.len(),
10518 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
10519 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
10520 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
10521 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
10522 );
10523 }
10524
10525 {
10529 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
10530
10531 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
10532
10533 for (i, company) in self.config.companies.iter().enumerate() {
10534 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
10536 .engagements
10537 .iter()
10538 .filter(|e| e.client_entity_id == company.code)
10539 .map(|e| e.engagement_id)
10540 .collect();
10541
10542 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10543 .findings
10544 .iter()
10545 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
10546 .cloned()
10547 .collect();
10548
10549 let emp_count = self.master_data.employees.len();
10551 let ceo_name = if emp_count > 0 {
10552 self.master_data.employees[i % emp_count]
10553 .display_name
10554 .clone()
10555 } else {
10556 format!("CEO of {}", company.name)
10557 };
10558 let cfo_name = if emp_count > 1 {
10559 self.master_data.employees[(i + 1) % emp_count]
10560 .display_name
10561 .clone()
10562 } else {
10563 format!("CFO of {}", company.name)
10564 };
10565
10566 let materiality = snapshot
10568 .engagements
10569 .iter()
10570 .find(|e| e.client_entity_id == company.code)
10571 .map(|e| e.materiality)
10572 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
10573
10574 let input = SoxGeneratorInput {
10575 company_code: company.code.clone(),
10576 company_name: company.name.clone(),
10577 fiscal_year,
10578 period_end,
10579 findings: company_findings,
10580 ceo_name,
10581 cfo_name,
10582 materiality_threshold: materiality,
10583 revenue_percent: rust_decimal::Decimal::from(100),
10584 assets_percent: rust_decimal::Decimal::from(100),
10585 significant_accounts: vec![
10586 "Revenue".into(),
10587 "Accounts Receivable".into(),
10588 "Inventory".into(),
10589 "Fixed Assets".into(),
10590 "Accounts Payable".into(),
10591 ],
10592 };
10593
10594 let (certs, assessment) = sox_gen.generate(&input);
10595 snapshot.sox_302_certifications.extend(certs);
10596 snapshot.sox_404_assessments.push(assessment);
10597 }
10598
10599 info!(
10600 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
10601 snapshot.sox_302_certifications.len(),
10602 snapshot.sox_404_assessments.len(),
10603 snapshot
10604 .sox_404_assessments
10605 .iter()
10606 .filter(|a| a.icfr_effective)
10607 .count(),
10608 snapshot
10609 .sox_404_assessments
10610 .iter()
10611 .filter(|a| !a.icfr_effective)
10612 .count(),
10613 );
10614 }
10615
10616 {
10620 use datasynth_generators::audit::materiality_generator::{
10621 MaterialityGenerator, MaterialityInput,
10622 };
10623
10624 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
10625
10626 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
10630
10631 for company in &self.config.companies {
10632 let company_code = company.code.clone();
10633
10634 let company_revenue: rust_decimal::Decimal = entries
10636 .iter()
10637 .filter(|e| e.company_code() == company_code)
10638 .flat_map(|e| e.lines.iter())
10639 .filter(|l| l.account_code.starts_with('4'))
10640 .map(|l| l.credit_amount)
10641 .sum();
10642
10643 let total_assets: rust_decimal::Decimal = entries
10645 .iter()
10646 .filter(|e| e.company_code() == company_code)
10647 .flat_map(|e| e.lines.iter())
10648 .filter(|l| l.account_code.starts_with('1'))
10649 .map(|l| l.debit_amount)
10650 .sum();
10651
10652 let total_expenses: rust_decimal::Decimal = entries
10654 .iter()
10655 .filter(|e| e.company_code() == company_code)
10656 .flat_map(|e| e.lines.iter())
10657 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
10658 .map(|l| l.debit_amount)
10659 .sum();
10660
10661 let equity: rust_decimal::Decimal = entries
10663 .iter()
10664 .filter(|e| e.company_code() == company_code)
10665 .flat_map(|e| e.lines.iter())
10666 .filter(|l| l.account_code.starts_with('3'))
10667 .map(|l| l.credit_amount)
10668 .sum();
10669
10670 let pretax_income = company_revenue - total_expenses;
10671
10672 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
10674 let w = rust_decimal::Decimal::try_from(company.volume_weight)
10675 .unwrap_or(rust_decimal::Decimal::ONE);
10676 (
10677 total_revenue * w,
10678 total_revenue * w * rust_decimal::Decimal::from(3),
10679 total_revenue * w * rust_decimal::Decimal::new(1, 1),
10680 total_revenue * w * rust_decimal::Decimal::from(2),
10681 )
10682 } else {
10683 (company_revenue, total_assets, pretax_income, equity)
10684 };
10685
10686 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
10689 entity_code: company_code,
10690 period: format!("FY{}", fiscal_year),
10691 revenue: rev,
10692 pretax_income: pti,
10693 total_assets: assets,
10694 equity: eq,
10695 gross_profit,
10696 });
10697 }
10698
10699 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
10700
10701 info!(
10702 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
10703 {} total assets, {} equity benchmarks)",
10704 snapshot.materiality_calculations.len(),
10705 snapshot
10706 .materiality_calculations
10707 .iter()
10708 .filter(|m| matches!(
10709 m.benchmark,
10710 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
10711 ))
10712 .count(),
10713 snapshot
10714 .materiality_calculations
10715 .iter()
10716 .filter(|m| matches!(
10717 m.benchmark,
10718 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
10719 ))
10720 .count(),
10721 snapshot
10722 .materiality_calculations
10723 .iter()
10724 .filter(|m| matches!(
10725 m.benchmark,
10726 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
10727 ))
10728 .count(),
10729 snapshot
10730 .materiality_calculations
10731 .iter()
10732 .filter(|m| matches!(
10733 m.benchmark,
10734 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
10735 ))
10736 .count(),
10737 );
10738 }
10739
10740 {
10744 use datasynth_generators::audit::cra_generator::CraGenerator;
10745
10746 let mut cra_gen = CraGenerator::new(self.seed + 8315);
10747
10748 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
10750 .audit_scopes
10751 .iter()
10752 .map(|s| (s.entity_code.clone(), s.id.clone()))
10753 .collect();
10754
10755 for company in &self.config.companies {
10756 let cras = cra_gen.generate_for_entity(&company.code, None);
10757 let scope_id = entity_scope_map.get(&company.code).cloned();
10758 let cras_with_scope: Vec<_> = cras
10759 .into_iter()
10760 .map(|mut cra| {
10761 cra.scope_id = scope_id.clone();
10762 cra
10763 })
10764 .collect();
10765 snapshot.combined_risk_assessments.extend(cras_with_scope);
10766 }
10767
10768 let significant_count = snapshot
10769 .combined_risk_assessments
10770 .iter()
10771 .filter(|c| c.significant_risk)
10772 .count();
10773 let high_cra_count = snapshot
10774 .combined_risk_assessments
10775 .iter()
10776 .filter(|c| {
10777 matches!(
10778 c.combined_risk,
10779 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
10780 )
10781 })
10782 .count();
10783
10784 info!(
10785 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
10786 snapshot.combined_risk_assessments.len(),
10787 significant_count,
10788 high_cra_count,
10789 );
10790 }
10791
10792 {
10796 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
10797
10798 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
10799
10800 for company in &self.config.companies {
10802 let entity_code = company.code.clone();
10803
10804 let tolerable_error = snapshot
10806 .materiality_calculations
10807 .iter()
10808 .find(|m| m.entity_code == entity_code)
10809 .map(|m| m.tolerable_error);
10810
10811 let entity_cras: Vec<_> = snapshot
10813 .combined_risk_assessments
10814 .iter()
10815 .filter(|c| c.entity_code == entity_code)
10816 .cloned()
10817 .collect();
10818
10819 if !entity_cras.is_empty() {
10820 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
10821 snapshot.sampling_plans.extend(plans);
10822 snapshot.sampled_items.extend(items);
10823 }
10824 }
10825
10826 let misstatement_count = snapshot
10827 .sampled_items
10828 .iter()
10829 .filter(|i| i.misstatement_found)
10830 .count();
10831
10832 info!(
10833 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
10834 snapshot.sampling_plans.len(),
10835 snapshot.sampled_items.len(),
10836 misstatement_count,
10837 );
10838 }
10839
10840 {
10844 use datasynth_generators::audit::scots_generator::{
10845 ScotsGenerator, ScotsGeneratorConfig,
10846 };
10847
10848 let ic_enabled = self.config.intercompany.enabled;
10849
10850 let config = ScotsGeneratorConfig {
10851 intercompany_enabled: ic_enabled,
10852 ..ScotsGeneratorConfig::default()
10853 };
10854 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
10855
10856 for company in &self.config.companies {
10857 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
10858 snapshot
10859 .significant_transaction_classes
10860 .extend(entity_scots);
10861 }
10862
10863 let estimation_count = snapshot
10864 .significant_transaction_classes
10865 .iter()
10866 .filter(|s| {
10867 matches!(
10868 s.transaction_type,
10869 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
10870 )
10871 })
10872 .count();
10873
10874 info!(
10875 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
10876 snapshot.significant_transaction_classes.len(),
10877 estimation_count,
10878 );
10879 }
10880
10881 {
10885 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
10886
10887 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
10888 let entity_codes: Vec<String> = self
10889 .config
10890 .companies
10891 .iter()
10892 .map(|c| c.code.clone())
10893 .collect();
10894 let unusual_flags =
10895 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
10896 info!(
10897 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
10898 unusual_flags.len(),
10899 unusual_flags
10900 .iter()
10901 .filter(|f| matches!(
10902 f.severity,
10903 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
10904 ))
10905 .count(),
10906 unusual_flags
10907 .iter()
10908 .filter(|f| matches!(
10909 f.severity,
10910 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
10911 ))
10912 .count(),
10913 unusual_flags
10914 .iter()
10915 .filter(|f| matches!(
10916 f.severity,
10917 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
10918 ))
10919 .count(),
10920 );
10921 snapshot.unusual_items = unusual_flags;
10922 }
10923
10924 {
10928 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
10929
10930 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
10931 let entity_codes: Vec<String> = self
10932 .config
10933 .companies
10934 .iter()
10935 .map(|c| c.code.clone())
10936 .collect();
10937 let current_period_label = format!("FY{fiscal_year}");
10938 let prior_period_label = format!("FY{}", fiscal_year - 1);
10939 let analytical_rels = ar_gen.generate_for_entities(
10940 &entity_codes,
10941 entries,
10942 ¤t_period_label,
10943 &prior_period_label,
10944 );
10945 let out_of_range = analytical_rels
10946 .iter()
10947 .filter(|r| !r.within_expected_range)
10948 .count();
10949 info!(
10950 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
10951 analytical_rels.len(),
10952 out_of_range,
10953 );
10954 snapshot.analytical_relationships = analytical_rels;
10955 }
10956
10957 if let Some(pb) = pb {
10958 pb.finish_with_message(format!(
10959 "Audit data: {} engagements, {} workpapers, {} evidence, \
10960 {} confirmations, {} procedure steps, {} samples, \
10961 {} analytical, {} IA funcs, {} related parties, \
10962 {} component auditors, {} letters, {} subsequent events, \
10963 {} service orgs, {} going concern, {} accounting estimates, \
10964 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
10965 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
10966 {} unusual items, {} analytical relationships",
10967 snapshot.engagements.len(),
10968 snapshot.workpapers.len(),
10969 snapshot.evidence.len(),
10970 snapshot.confirmations.len(),
10971 snapshot.procedure_steps.len(),
10972 snapshot.samples.len(),
10973 snapshot.analytical_results.len(),
10974 snapshot.ia_functions.len(),
10975 snapshot.related_parties.len(),
10976 snapshot.component_auditors.len(),
10977 snapshot.engagement_letters.len(),
10978 snapshot.subsequent_events.len(),
10979 snapshot.service_organizations.len(),
10980 snapshot.going_concern_assessments.len(),
10981 snapshot.accounting_estimates.len(),
10982 snapshot.audit_opinions.len(),
10983 snapshot.key_audit_matters.len(),
10984 snapshot.sox_302_certifications.len(),
10985 snapshot.sox_404_assessments.len(),
10986 snapshot.materiality_calculations.len(),
10987 snapshot.combined_risk_assessments.len(),
10988 snapshot.sampling_plans.len(),
10989 snapshot.significant_transaction_classes.len(),
10990 snapshot.unusual_items.len(),
10991 snapshot.analytical_relationships.len(),
10992 ));
10993 }
10994
10995 {
11002 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11003 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11004 debug!(
11005 "PCAOB-ISA mappings generated: {} mappings",
11006 snapshot.isa_pcaob_mappings.len()
11007 );
11008 }
11009
11010 {
11017 use datasynth_standards::audit::isa_reference::IsaStandard;
11018 snapshot.isa_mappings = IsaStandard::standard_entries();
11019 debug!(
11020 "ISA standard entries generated: {} standards",
11021 snapshot.isa_mappings.len()
11022 );
11023 }
11024
11025 {
11028 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11029 .engagements
11030 .iter()
11031 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11032 .collect();
11033
11034 for rpt in &mut snapshot.related_party_transactions {
11035 if rpt.journal_entry_id.is_some() {
11036 continue; }
11038 let entity = engagement_by_id
11039 .get(&rpt.engagement_id.to_string())
11040 .copied()
11041 .unwrap_or("");
11042
11043 let best_je = entries
11045 .iter()
11046 .filter(|je| je.header.company_code == entity)
11047 .min_by_key(|je| {
11048 (je.header.posting_date - rpt.transaction_date)
11049 .num_days()
11050 .abs()
11051 });
11052
11053 if let Some(je) = best_je {
11054 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11055 }
11056 }
11057
11058 let linked = snapshot
11059 .related_party_transactions
11060 .iter()
11061 .filter(|t| t.journal_entry_id.is_some())
11062 .count();
11063 debug!(
11064 "Linked {}/{} related party transactions to journal entries",
11065 linked,
11066 snapshot.related_party_transactions.len()
11067 );
11068 }
11069
11070 Ok(snapshot)
11071 }
11072
11073 fn export_graphs(
11080 &mut self,
11081 entries: &[JournalEntry],
11082 _coa: &Arc<ChartOfAccounts>,
11083 stats: &mut EnhancedGenerationStatistics,
11084 ) -> SynthResult<GraphExportSnapshot> {
11085 let pb = self.create_progress_bar(100, "Exporting Graphs");
11086
11087 let mut snapshot = GraphExportSnapshot::default();
11088
11089 let output_dir = self
11091 .output_path
11092 .clone()
11093 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11094 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11095
11096 for graph_type in &self.config.graph_export.graph_types {
11098 if let Some(pb) = &pb {
11099 pb.inc(10);
11100 }
11101
11102 let graph_config = TransactionGraphConfig {
11104 include_vendors: false,
11105 include_customers: false,
11106 create_debit_credit_edges: true,
11107 include_document_nodes: graph_type.include_document_nodes,
11108 min_edge_weight: graph_type.min_edge_weight,
11109 aggregate_parallel_edges: graph_type.aggregate_edges,
11110 framework: None,
11111 };
11112
11113 let mut builder = TransactionGraphBuilder::new(graph_config);
11114 builder.add_journal_entries(entries);
11115 let graph = builder.build();
11116
11117 stats.graph_node_count += graph.node_count();
11119 stats.graph_edge_count += graph.edge_count();
11120
11121 if let Some(pb) = &pb {
11122 pb.inc(40);
11123 }
11124
11125 for format in &self.config.graph_export.formats {
11127 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
11128
11129 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11131 warn!("Failed to create graph output directory: {}", e);
11132 continue;
11133 }
11134
11135 match format {
11136 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
11137 let pyg_config = PyGExportConfig {
11138 common: datasynth_graph::CommonExportConfig {
11139 export_node_features: true,
11140 export_edge_features: true,
11141 export_node_labels: true,
11142 export_edge_labels: true,
11143 export_masks: true,
11144 train_ratio: self.config.graph_export.train_ratio,
11145 val_ratio: self.config.graph_export.validation_ratio,
11146 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11147 },
11148 one_hot_categoricals: false,
11149 };
11150
11151 let exporter = PyGExporter::new(pyg_config);
11152 match exporter.export(&graph, &format_dir) {
11153 Ok(metadata) => {
11154 snapshot.exports.insert(
11155 format!("{}_{}", graph_type.name, "pytorch_geometric"),
11156 GraphExportInfo {
11157 name: graph_type.name.clone(),
11158 format: "pytorch_geometric".to_string(),
11159 output_path: format_dir.clone(),
11160 node_count: metadata.num_nodes,
11161 edge_count: metadata.num_edges,
11162 },
11163 );
11164 snapshot.graph_count += 1;
11165 }
11166 Err(e) => {
11167 warn!("Failed to export PyTorch Geometric graph: {}", e);
11168 }
11169 }
11170 }
11171 datasynth_config::schema::GraphExportFormat::Neo4j => {
11172 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
11173
11174 let neo4j_config = Neo4jExportConfig {
11175 export_node_properties: true,
11176 export_edge_properties: true,
11177 export_features: true,
11178 generate_cypher: true,
11179 generate_admin_import: true,
11180 database_name: "synth".to_string(),
11181 cypher_batch_size: 1000,
11182 };
11183
11184 let exporter = Neo4jExporter::new(neo4j_config);
11185 match exporter.export(&graph, &format_dir) {
11186 Ok(metadata) => {
11187 snapshot.exports.insert(
11188 format!("{}_{}", graph_type.name, "neo4j"),
11189 GraphExportInfo {
11190 name: graph_type.name.clone(),
11191 format: "neo4j".to_string(),
11192 output_path: format_dir.clone(),
11193 node_count: metadata.num_nodes,
11194 edge_count: metadata.num_edges,
11195 },
11196 );
11197 snapshot.graph_count += 1;
11198 }
11199 Err(e) => {
11200 warn!("Failed to export Neo4j graph: {}", e);
11201 }
11202 }
11203 }
11204 datasynth_config::schema::GraphExportFormat::Dgl => {
11205 use datasynth_graph::{DGLExportConfig, DGLExporter};
11206
11207 let dgl_config = DGLExportConfig {
11208 common: datasynth_graph::CommonExportConfig {
11209 export_node_features: true,
11210 export_edge_features: true,
11211 export_node_labels: true,
11212 export_edge_labels: true,
11213 export_masks: true,
11214 train_ratio: self.config.graph_export.train_ratio,
11215 val_ratio: self.config.graph_export.validation_ratio,
11216 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11217 },
11218 heterogeneous: self.config.graph_export.dgl.heterogeneous,
11219 include_pickle_script: true, };
11221
11222 let exporter = DGLExporter::new(dgl_config);
11223 match exporter.export(&graph, &format_dir) {
11224 Ok(metadata) => {
11225 snapshot.exports.insert(
11226 format!("{}_{}", graph_type.name, "dgl"),
11227 GraphExportInfo {
11228 name: graph_type.name.clone(),
11229 format: "dgl".to_string(),
11230 output_path: format_dir.clone(),
11231 node_count: metadata.common.num_nodes,
11232 edge_count: metadata.common.num_edges,
11233 },
11234 );
11235 snapshot.graph_count += 1;
11236 }
11237 Err(e) => {
11238 warn!("Failed to export DGL graph: {}", e);
11239 }
11240 }
11241 }
11242 datasynth_config::schema::GraphExportFormat::RustGraph => {
11243 use datasynth_graph::{
11244 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
11245 };
11246
11247 let rustgraph_config = RustGraphExportConfig {
11248 include_features: true,
11249 include_temporal: true,
11250 include_labels: true,
11251 source_name: "datasynth".to_string(),
11252 batch_id: None,
11253 output_format: RustGraphOutputFormat::JsonLines,
11254 export_node_properties: true,
11255 export_edge_properties: true,
11256 pretty_print: false,
11257 };
11258
11259 let exporter = RustGraphExporter::new(rustgraph_config);
11260 match exporter.export(&graph, &format_dir) {
11261 Ok(metadata) => {
11262 snapshot.exports.insert(
11263 format!("{}_{}", graph_type.name, "rustgraph"),
11264 GraphExportInfo {
11265 name: graph_type.name.clone(),
11266 format: "rustgraph".to_string(),
11267 output_path: format_dir.clone(),
11268 node_count: metadata.num_nodes,
11269 edge_count: metadata.num_edges,
11270 },
11271 );
11272 snapshot.graph_count += 1;
11273 }
11274 Err(e) => {
11275 warn!("Failed to export RustGraph: {}", e);
11276 }
11277 }
11278 }
11279 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
11280 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
11282 }
11283 }
11284 }
11285
11286 if let Some(pb) = &pb {
11287 pb.inc(40);
11288 }
11289 }
11290
11291 stats.graph_export_count = snapshot.graph_count;
11292 snapshot.exported = snapshot.graph_count > 0;
11293
11294 if let Some(pb) = pb {
11295 pb.finish_with_message(format!(
11296 "Graphs exported: {} graphs ({} nodes, {} edges)",
11297 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
11298 ));
11299 }
11300
11301 Ok(snapshot)
11302 }
11303
11304 fn build_additional_graphs(
11309 &self,
11310 banking: &BankingSnapshot,
11311 intercompany: &IntercompanySnapshot,
11312 entries: &[JournalEntry],
11313 stats: &mut EnhancedGenerationStatistics,
11314 ) {
11315 let output_dir = self
11316 .output_path
11317 .clone()
11318 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11319 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11320
11321 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
11323 info!("Phase 10c: Building banking network graph");
11324 let config = BankingGraphConfig::default();
11325 let mut builder = BankingGraphBuilder::new(config);
11326 builder.add_customers(&banking.customers);
11327 builder.add_accounts(&banking.accounts, &banking.customers);
11328 builder.add_transactions(&banking.transactions);
11329 let graph = builder.build();
11330
11331 let node_count = graph.node_count();
11332 let edge_count = graph.edge_count();
11333 stats.graph_node_count += node_count;
11334 stats.graph_edge_count += edge_count;
11335
11336 for format in &self.config.graph_export.formats {
11338 if matches!(
11339 format,
11340 datasynth_config::schema::GraphExportFormat::PytorchGeometric
11341 ) {
11342 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
11343 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11344 warn!("Failed to create banking graph output dir: {}", e);
11345 continue;
11346 }
11347 let pyg_config = PyGExportConfig::default();
11348 let exporter = PyGExporter::new(pyg_config);
11349 if let Err(e) = exporter.export(&graph, &format_dir) {
11350 warn!("Failed to export banking graph as PyG: {}", e);
11351 } else {
11352 info!(
11353 "Banking network graph exported: {} nodes, {} edges",
11354 node_count, edge_count
11355 );
11356 }
11357 }
11358 }
11359 }
11360
11361 let approval_entries: Vec<_> = entries
11363 .iter()
11364 .filter(|je| je.header.approval_workflow.is_some())
11365 .collect();
11366
11367 if !approval_entries.is_empty() {
11368 info!(
11369 "Phase 10c: Building approval network graph ({} entries with approvals)",
11370 approval_entries.len()
11371 );
11372 let config = ApprovalGraphConfig::default();
11373 let mut builder = ApprovalGraphBuilder::new(config);
11374
11375 for je in &approval_entries {
11376 if let Some(ref wf) = je.header.approval_workflow {
11377 for action in &wf.actions {
11378 let record = datasynth_core::models::ApprovalRecord {
11379 approval_id: format!(
11380 "APR-{}-{}",
11381 je.header.document_id, action.approval_level
11382 ),
11383 document_number: je.header.document_id.to_string(),
11384 document_type: "JE".to_string(),
11385 company_code: je.company_code().to_string(),
11386 requester_id: wf.preparer_id.clone(),
11387 requester_name: Some(wf.preparer_name.clone()),
11388 approver_id: action.actor_id.clone(),
11389 approver_name: action.actor_name.clone(),
11390 approval_date: je.posting_date(),
11391 action: format!("{:?}", action.action),
11392 amount: wf.amount,
11393 approval_limit: None,
11394 comments: action.comments.clone(),
11395 delegation_from: None,
11396 is_auto_approved: false,
11397 };
11398 builder.add_approval(&record);
11399 }
11400 }
11401 }
11402
11403 let graph = builder.build();
11404 let node_count = graph.node_count();
11405 let edge_count = graph.edge_count();
11406 stats.graph_node_count += node_count;
11407 stats.graph_edge_count += edge_count;
11408
11409 for format in &self.config.graph_export.formats {
11411 if matches!(
11412 format,
11413 datasynth_config::schema::GraphExportFormat::PytorchGeometric
11414 ) {
11415 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
11416 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11417 warn!("Failed to create approval graph output dir: {}", e);
11418 continue;
11419 }
11420 let pyg_config = PyGExportConfig::default();
11421 let exporter = PyGExporter::new(pyg_config);
11422 if let Err(e) = exporter.export(&graph, &format_dir) {
11423 warn!("Failed to export approval graph as PyG: {}", e);
11424 } else {
11425 info!(
11426 "Approval network graph exported: {} nodes, {} edges",
11427 node_count, edge_count
11428 );
11429 }
11430 }
11431 }
11432 }
11433
11434 if self.config.companies.len() >= 2 {
11436 info!(
11437 "Phase 10c: Building entity relationship graph ({} companies)",
11438 self.config.companies.len()
11439 );
11440
11441 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11442 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
11443
11444 let parent_code = &self.config.companies[0].code;
11446 let mut companies: Vec<datasynth_core::models::Company> =
11447 Vec::with_capacity(self.config.companies.len());
11448
11449 let first = &self.config.companies[0];
11451 companies.push(datasynth_core::models::Company::parent(
11452 &first.code,
11453 &first.name,
11454 &first.country,
11455 &first.currency,
11456 ));
11457
11458 for cc in self.config.companies.iter().skip(1) {
11460 companies.push(datasynth_core::models::Company::subsidiary(
11461 &cc.code,
11462 &cc.name,
11463 &cc.country,
11464 &cc.currency,
11465 parent_code,
11466 rust_decimal::Decimal::from(100),
11467 ));
11468 }
11469
11470 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
11472 self.config
11473 .companies
11474 .iter()
11475 .skip(1)
11476 .enumerate()
11477 .map(|(i, cc)| {
11478 let mut rel =
11479 datasynth_core::models::intercompany::IntercompanyRelationship::new(
11480 format!("REL{:03}", i + 1),
11481 parent_code.clone(),
11482 cc.code.clone(),
11483 rust_decimal::Decimal::from(100),
11484 start_date,
11485 );
11486 rel.functional_currency = cc.currency.clone();
11487 rel
11488 })
11489 .collect();
11490
11491 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
11492 builder.add_companies(&companies);
11493 builder.add_ownership_relationships(&relationships);
11494
11495 for pair in &intercompany.matched_pairs {
11497 builder.add_intercompany_edge(
11498 &pair.seller_company,
11499 &pair.buyer_company,
11500 pair.amount,
11501 &format!("{:?}", pair.transaction_type),
11502 );
11503 }
11504
11505 let graph = builder.build();
11506 let node_count = graph.node_count();
11507 let edge_count = graph.edge_count();
11508 stats.graph_node_count += node_count;
11509 stats.graph_edge_count += edge_count;
11510
11511 for format in &self.config.graph_export.formats {
11513 if matches!(
11514 format,
11515 datasynth_config::schema::GraphExportFormat::PytorchGeometric
11516 ) {
11517 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
11518 if let Err(e) = std::fs::create_dir_all(&format_dir) {
11519 warn!("Failed to create entity graph output dir: {}", e);
11520 continue;
11521 }
11522 let pyg_config = PyGExportConfig::default();
11523 let exporter = PyGExporter::new(pyg_config);
11524 if let Err(e) = exporter.export(&graph, &format_dir) {
11525 warn!("Failed to export entity graph as PyG: {}", e);
11526 } else {
11527 info!(
11528 "Entity relationship graph exported: {} nodes, {} edges",
11529 node_count, edge_count
11530 );
11531 }
11532 }
11533 }
11534 } else {
11535 debug!(
11536 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
11537 self.config.companies.len()
11538 );
11539 }
11540 }
11541
11542 #[allow(clippy::too_many_arguments)]
11549 fn export_hypergraph(
11550 &self,
11551 coa: &Arc<ChartOfAccounts>,
11552 entries: &[JournalEntry],
11553 document_flows: &DocumentFlowSnapshot,
11554 sourcing: &SourcingSnapshot,
11555 hr: &HrSnapshot,
11556 manufacturing: &ManufacturingSnapshot,
11557 banking: &BankingSnapshot,
11558 audit: &AuditSnapshot,
11559 financial_reporting: &FinancialReportingSnapshot,
11560 ocpm: &OcpmSnapshot,
11561 compliance: &ComplianceRegulationsSnapshot,
11562 stats: &mut EnhancedGenerationStatistics,
11563 ) -> SynthResult<HypergraphExportInfo> {
11564 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
11565 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
11566 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
11567 use datasynth_graph::models::hypergraph::AggregationStrategy;
11568
11569 let hg_settings = &self.config.graph_export.hypergraph;
11570
11571 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
11573 "truncate" => AggregationStrategy::Truncate,
11574 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
11575 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
11576 "importance_sample" => AggregationStrategy::ImportanceSample,
11577 _ => AggregationStrategy::PoolByCounterparty,
11578 };
11579
11580 let builder_config = HypergraphConfig {
11581 max_nodes: hg_settings.max_nodes,
11582 aggregation_strategy,
11583 include_coso: hg_settings.governance_layer.include_coso,
11584 include_controls: hg_settings.governance_layer.include_controls,
11585 include_sox: hg_settings.governance_layer.include_sox,
11586 include_vendors: hg_settings.governance_layer.include_vendors,
11587 include_customers: hg_settings.governance_layer.include_customers,
11588 include_employees: hg_settings.governance_layer.include_employees,
11589 include_p2p: hg_settings.process_layer.include_p2p,
11590 include_o2c: hg_settings.process_layer.include_o2c,
11591 include_s2c: hg_settings.process_layer.include_s2c,
11592 include_h2r: hg_settings.process_layer.include_h2r,
11593 include_mfg: hg_settings.process_layer.include_mfg,
11594 include_bank: hg_settings.process_layer.include_bank,
11595 include_audit: hg_settings.process_layer.include_audit,
11596 include_r2r: hg_settings.process_layer.include_r2r,
11597 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
11598 docs_per_counterparty_threshold: hg_settings
11599 .process_layer
11600 .docs_per_counterparty_threshold,
11601 include_accounts: hg_settings.accounting_layer.include_accounts,
11602 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
11603 include_cross_layer_edges: hg_settings.cross_layer.enabled,
11604 include_compliance: self.config.compliance_regulations.enabled,
11605 include_tax: true,
11606 include_treasury: true,
11607 include_esg: true,
11608 include_project: true,
11609 include_intercompany: true,
11610 include_temporal_events: true,
11611 };
11612
11613 let mut builder = HypergraphBuilder::new(builder_config);
11614
11615 builder.add_coso_framework();
11617
11618 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
11621 let controls = InternalControl::standard_controls();
11622 builder.add_controls(&controls);
11623 }
11624
11625 builder.add_vendors(&self.master_data.vendors);
11627 builder.add_customers(&self.master_data.customers);
11628 builder.add_employees(&self.master_data.employees);
11629
11630 builder.add_p2p_documents(
11632 &document_flows.purchase_orders,
11633 &document_flows.goods_receipts,
11634 &document_flows.vendor_invoices,
11635 &document_flows.payments,
11636 );
11637 builder.add_o2c_documents(
11638 &document_flows.sales_orders,
11639 &document_flows.deliveries,
11640 &document_flows.customer_invoices,
11641 );
11642 builder.add_s2c_documents(
11643 &sourcing.sourcing_projects,
11644 &sourcing.qualifications,
11645 &sourcing.rfx_events,
11646 &sourcing.bids,
11647 &sourcing.bid_evaluations,
11648 &sourcing.contracts,
11649 );
11650 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
11651 builder.add_mfg_documents(
11652 &manufacturing.production_orders,
11653 &manufacturing.quality_inspections,
11654 &manufacturing.cycle_counts,
11655 );
11656 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
11657 builder.add_audit_documents(
11658 &audit.engagements,
11659 &audit.workpapers,
11660 &audit.findings,
11661 &audit.evidence,
11662 &audit.risk_assessments,
11663 &audit.judgments,
11664 );
11665 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
11666
11667 if let Some(ref event_log) = ocpm.event_log {
11669 builder.add_ocpm_events(event_log);
11670 }
11671
11672 if self.config.compliance_regulations.enabled
11674 && hg_settings.governance_layer.include_controls
11675 {
11676 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
11678 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
11679 .standard_records
11680 .iter()
11681 .filter_map(|r| {
11682 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
11683 registry.get(&sid).cloned()
11684 })
11685 .collect();
11686
11687 builder.add_compliance_regulations(
11688 &standards,
11689 &compliance.findings,
11690 &compliance.filings,
11691 );
11692 }
11693
11694 builder.add_accounts(coa);
11696 builder.add_journal_entries_as_hyperedges(entries);
11697
11698 let hypergraph = builder.build();
11700
11701 let output_dir = self
11703 .output_path
11704 .clone()
11705 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11706 let hg_dir = output_dir
11707 .join(&self.config.graph_export.output_subdirectory)
11708 .join(&hg_settings.output_subdirectory);
11709
11710 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
11712 "unified" => {
11713 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
11714 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
11715 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
11716 })?;
11717 (
11718 metadata.num_nodes,
11719 metadata.num_edges,
11720 metadata.num_hyperedges,
11721 )
11722 }
11723 _ => {
11724 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
11726 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
11727 SynthError::generation(format!("Hypergraph export failed: {e}"))
11728 })?;
11729 (
11730 metadata.num_nodes,
11731 metadata.num_edges,
11732 metadata.num_hyperedges,
11733 )
11734 }
11735 };
11736
11737 #[cfg(feature = "streaming")]
11739 if let Some(ref target_url) = hg_settings.stream_target {
11740 use crate::stream_client::{StreamClient, StreamConfig};
11741 use std::io::Write as _;
11742
11743 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
11744 let stream_config = StreamConfig {
11745 target_url: target_url.clone(),
11746 batch_size: hg_settings.stream_batch_size,
11747 api_key,
11748 ..StreamConfig::default()
11749 };
11750
11751 match StreamClient::new(stream_config) {
11752 Ok(mut client) => {
11753 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
11754 match exporter.export_to_writer(&hypergraph, &mut client) {
11755 Ok(_) => {
11756 if let Err(e) = client.flush() {
11757 warn!("Failed to flush stream client: {}", e);
11758 } else {
11759 info!("Streamed {} records to {}", client.total_sent(), target_url);
11760 }
11761 }
11762 Err(e) => {
11763 warn!("Streaming export failed: {}", e);
11764 }
11765 }
11766 }
11767 Err(e) => {
11768 warn!("Failed to create stream client: {}", e);
11769 }
11770 }
11771 }
11772
11773 stats.graph_node_count += num_nodes;
11775 stats.graph_edge_count += num_edges;
11776 stats.graph_export_count += 1;
11777
11778 Ok(HypergraphExportInfo {
11779 node_count: num_nodes,
11780 edge_count: num_edges,
11781 hyperedge_count: num_hyperedges,
11782 output_path: hg_dir,
11783 })
11784 }
11785
11786 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
11791 let pb = self.create_progress_bar(100, "Generating Banking Data");
11792
11793 let orchestrator = BankingOrchestratorBuilder::new()
11795 .config(self.config.banking.clone())
11796 .seed(self.seed + 9000)
11797 .country_pack(self.primary_pack().clone())
11798 .build();
11799
11800 if let Some(pb) = &pb {
11801 pb.inc(10);
11802 }
11803
11804 let result = orchestrator.generate();
11806
11807 if let Some(pb) = &pb {
11808 pb.inc(90);
11809 pb.finish_with_message(format!(
11810 "Banking: {} customers, {} transactions",
11811 result.customers.len(),
11812 result.transactions.len()
11813 ));
11814 }
11815
11816 let mut banking_customers = result.customers;
11821 let core_customers = &self.master_data.customers;
11822 if !core_customers.is_empty() {
11823 for (i, bc) in banking_customers.iter_mut().enumerate() {
11824 let core = &core_customers[i % core_customers.len()];
11825 bc.name = CustomerName::business(&core.name);
11826 bc.residence_country = core.country.clone();
11827 bc.enterprise_customer_id = Some(core.customer_id.clone());
11828 }
11829 debug!(
11830 "Cross-referenced {} banking customers with {} core customers",
11831 banking_customers.len(),
11832 core_customers.len()
11833 );
11834 }
11835
11836 Ok(BankingSnapshot {
11837 customers: banking_customers,
11838 accounts: result.accounts,
11839 transactions: result.transactions,
11840 transaction_labels: result.transaction_labels,
11841 customer_labels: result.customer_labels,
11842 account_labels: result.account_labels,
11843 relationship_labels: result.relationship_labels,
11844 narratives: result.narratives,
11845 suspicious_count: result.stats.suspicious_count,
11846 scenario_count: result.scenarios.len(),
11847 })
11848 }
11849
11850 fn calculate_total_transactions(&self) -> u64 {
11852 let months = self.config.global.period_months as f64;
11853 self.config
11854 .companies
11855 .iter()
11856 .map(|c| {
11857 let annual = c.annual_transaction_volume.count() as f64;
11858 let weighted = annual * c.volume_weight;
11859 (weighted * months / 12.0) as u64
11860 })
11861 .sum()
11862 }
11863
11864 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
11866 if !self.phase_config.show_progress {
11867 return None;
11868 }
11869
11870 let pb = if let Some(mp) = &self.multi_progress {
11871 mp.add(ProgressBar::new(total))
11872 } else {
11873 ProgressBar::new(total)
11874 };
11875
11876 pb.set_style(
11877 ProgressStyle::default_bar()
11878 .template(&format!(
11879 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
11880 ))
11881 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
11882 .progress_chars("#>-"),
11883 );
11884
11885 Some(pb)
11886 }
11887
11888 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
11890 self.coa.clone()
11891 }
11892
11893 pub fn get_master_data(&self) -> &MasterDataSnapshot {
11895 &self.master_data
11896 }
11897
11898 fn phase_compliance_regulations(
11900 &mut self,
11901 _stats: &mut EnhancedGenerationStatistics,
11902 ) -> SynthResult<ComplianceRegulationsSnapshot> {
11903 if !self.phase_config.generate_compliance_regulations {
11904 return Ok(ComplianceRegulationsSnapshot::default());
11905 }
11906
11907 info!("Phase: Generating Compliance Regulations Data");
11908
11909 let cr_config = &self.config.compliance_regulations;
11910
11911 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
11913 self.config
11914 .companies
11915 .iter()
11916 .map(|c| c.country.clone())
11917 .collect::<std::collections::HashSet<_>>()
11918 .into_iter()
11919 .collect()
11920 } else {
11921 cr_config.jurisdictions.clone()
11922 };
11923
11924 let fallback_date =
11926 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
11927 let reference_date = cr_config
11928 .reference_date
11929 .as_ref()
11930 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
11931 .unwrap_or_else(|| {
11932 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11933 .unwrap_or(fallback_date)
11934 });
11935
11936 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
11938 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
11939 let cross_reference_records = reg_gen.generate_cross_reference_records();
11940 let jurisdiction_records =
11941 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
11942
11943 info!(
11944 " Standards: {} records, {} cross-references, {} jurisdictions",
11945 standard_records.len(),
11946 cross_reference_records.len(),
11947 jurisdiction_records.len()
11948 );
11949
11950 let audit_procedures = if cr_config.audit_procedures.enabled {
11952 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
11953 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
11954 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
11955 confidence_level: cr_config.audit_procedures.confidence_level,
11956 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
11957 };
11958 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
11959 self.seed + 9000,
11960 proc_config,
11961 );
11962 let registry = reg_gen.registry();
11963 let mut all_procs = Vec::new();
11964 for jurisdiction in &jurisdictions {
11965 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
11966 all_procs.extend(procs);
11967 }
11968 info!(" Audit procedures: {}", all_procs.len());
11969 all_procs
11970 } else {
11971 Vec::new()
11972 };
11973
11974 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
11976 let finding_config =
11977 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
11978 finding_rate: cr_config.findings.finding_rate,
11979 material_weakness_rate: cr_config.findings.material_weakness_rate,
11980 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
11981 generate_remediation: cr_config.findings.generate_remediation,
11982 };
11983 let mut finding_gen =
11984 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
11985 self.seed + 9100,
11986 finding_config,
11987 );
11988 let mut all_findings = Vec::new();
11989 for company in &self.config.companies {
11990 let company_findings =
11991 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
11992 all_findings.extend(company_findings);
11993 }
11994 info!(" Compliance findings: {}", all_findings.len());
11995 all_findings
11996 } else {
11997 Vec::new()
11998 };
11999
12000 let filings = if cr_config.filings.enabled {
12002 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
12003 filing_types: cr_config.filings.filing_types.clone(),
12004 generate_status_progression: cr_config.filings.generate_status_progression,
12005 };
12006 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
12007 self.seed + 9200,
12008 filing_config,
12009 );
12010 let company_codes: Vec<String> = self
12011 .config
12012 .companies
12013 .iter()
12014 .map(|c| c.code.clone())
12015 .collect();
12016 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12017 .unwrap_or(fallback_date);
12018 let filings = filing_gen.generate_filings(
12019 &company_codes,
12020 &jurisdictions,
12021 start_date,
12022 self.config.global.period_months,
12023 );
12024 info!(" Regulatory filings: {}", filings.len());
12025 filings
12026 } else {
12027 Vec::new()
12028 };
12029
12030 let compliance_graph = if cr_config.graph.enabled {
12032 let graph_config = datasynth_graph::ComplianceGraphConfig {
12033 include_standard_nodes: cr_config.graph.include_compliance_nodes,
12034 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
12035 include_cross_references: cr_config.graph.include_cross_references,
12036 include_supersession_edges: cr_config.graph.include_supersession_edges,
12037 include_account_links: cr_config.graph.include_account_links,
12038 include_control_links: cr_config.graph.include_control_links,
12039 include_company_links: cr_config.graph.include_company_links,
12040 };
12041 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
12042
12043 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
12045 .iter()
12046 .map(|r| datasynth_graph::StandardNodeInput {
12047 standard_id: r.standard_id.clone(),
12048 title: r.title.clone(),
12049 category: r.category.clone(),
12050 domain: r.domain.clone(),
12051 is_active: r.is_active,
12052 features: vec![if r.is_active { 1.0 } else { 0.0 }],
12053 applicable_account_types: r.applicable_account_types.clone(),
12054 applicable_processes: r.applicable_processes.clone(),
12055 })
12056 .collect();
12057 builder.add_standards(&standard_inputs);
12058
12059 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
12061 jurisdiction_records
12062 .iter()
12063 .map(|r| datasynth_graph::JurisdictionNodeInput {
12064 country_code: r.country_code.clone(),
12065 country_name: r.country_name.clone(),
12066 framework: r.accounting_framework.clone(),
12067 standard_count: r.standard_count,
12068 tax_rate: r.statutory_tax_rate,
12069 })
12070 .collect();
12071 builder.add_jurisdictions(&jurisdiction_inputs);
12072
12073 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
12075 cross_reference_records
12076 .iter()
12077 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
12078 from_standard: r.from_standard.clone(),
12079 to_standard: r.to_standard.clone(),
12080 relationship: r.relationship.clone(),
12081 convergence_level: r.convergence_level,
12082 })
12083 .collect();
12084 builder.add_cross_references(&xref_inputs);
12085
12086 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
12088 .iter()
12089 .map(|r| datasynth_graph::JurisdictionMappingInput {
12090 country_code: r.jurisdiction.clone(),
12091 standard_id: r.standard_id.clone(),
12092 })
12093 .collect();
12094 builder.add_jurisdiction_mappings(&mapping_inputs);
12095
12096 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
12098 .iter()
12099 .map(|p| datasynth_graph::ProcedureNodeInput {
12100 procedure_id: p.procedure_id.clone(),
12101 standard_id: p.standard_id.clone(),
12102 procedure_type: p.procedure_type.clone(),
12103 sample_size: p.sample_size,
12104 confidence_level: p.confidence_level,
12105 })
12106 .collect();
12107 builder.add_procedures(&proc_inputs);
12108
12109 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
12111 .iter()
12112 .map(|f| datasynth_graph::FindingNodeInput {
12113 finding_id: f.finding_id.to_string(),
12114 standard_id: f
12115 .related_standards
12116 .first()
12117 .map(|s| s.as_str().to_string())
12118 .unwrap_or_default(),
12119 severity: f.severity.to_string(),
12120 deficiency_level: f.deficiency_level.to_string(),
12121 severity_score: f.deficiency_level.severity_score(),
12122 control_id: f.control_id.clone(),
12123 affected_accounts: f.affected_accounts.clone(),
12124 })
12125 .collect();
12126 builder.add_findings(&finding_inputs);
12127
12128 if cr_config.graph.include_account_links {
12130 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12131 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
12132 for std_record in &standard_records {
12133 if let Some(std_obj) =
12134 registry.get(&datasynth_core::models::compliance::StandardId::parse(
12135 &std_record.standard_id,
12136 ))
12137 {
12138 for acct_type in &std_obj.applicable_account_types {
12139 account_links.push(datasynth_graph::AccountLinkInput {
12140 standard_id: std_record.standard_id.clone(),
12141 account_code: acct_type.clone(),
12142 account_name: acct_type.clone(),
12143 });
12144 }
12145 }
12146 }
12147 builder.add_account_links(&account_links);
12148 }
12149
12150 if cr_config.graph.include_control_links {
12152 let mut control_links = Vec::new();
12153 let sox_like_ids: Vec<String> = standard_records
12155 .iter()
12156 .filter(|r| {
12157 r.standard_id.starts_with("SOX")
12158 || r.standard_id.starts_with("PCAOB-AS-2201")
12159 })
12160 .map(|r| r.standard_id.clone())
12161 .collect();
12162 let control_ids = [
12164 ("C001", "Cash Controls"),
12165 ("C002", "Large Transaction Approval"),
12166 ("C010", "PO Approval"),
12167 ("C011", "Three-Way Match"),
12168 ("C020", "Revenue Recognition"),
12169 ("C021", "Credit Check"),
12170 ("C030", "Manual JE Approval"),
12171 ("C031", "Period Close Review"),
12172 ("C032", "Account Reconciliation"),
12173 ("C040", "Payroll Processing"),
12174 ("C050", "Fixed Asset Capitalization"),
12175 ("C060", "Intercompany Elimination"),
12176 ];
12177 for sox_id in &sox_like_ids {
12178 for (ctrl_id, ctrl_name) in &control_ids {
12179 control_links.push(datasynth_graph::ControlLinkInput {
12180 standard_id: sox_id.clone(),
12181 control_id: ctrl_id.to_string(),
12182 control_name: ctrl_name.to_string(),
12183 });
12184 }
12185 }
12186 builder.add_control_links(&control_links);
12187 }
12188
12189 if cr_config.graph.include_company_links {
12191 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
12192 .iter()
12193 .enumerate()
12194 .map(|(i, f)| datasynth_graph::FilingNodeInput {
12195 filing_id: format!("F{:04}", i + 1),
12196 filing_type: f.filing_type.to_string(),
12197 company_code: f.company_code.clone(),
12198 jurisdiction: f.jurisdiction.clone(),
12199 status: format!("{:?}", f.status),
12200 })
12201 .collect();
12202 builder.add_filings(&filing_inputs);
12203 }
12204
12205 let graph = builder.build();
12206 info!(
12207 " Compliance graph: {} nodes, {} edges",
12208 graph.nodes.len(),
12209 graph.edges.len()
12210 );
12211 Some(graph)
12212 } else {
12213 None
12214 };
12215
12216 self.check_resources_with_log("post-compliance-regulations")?;
12217
12218 Ok(ComplianceRegulationsSnapshot {
12219 standard_records,
12220 cross_reference_records,
12221 jurisdiction_records,
12222 audit_procedures,
12223 findings,
12224 filings,
12225 compliance_graph,
12226 })
12227 }
12228
12229 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
12231 use super::lineage::LineageGraphBuilder;
12232
12233 let mut builder = LineageGraphBuilder::new();
12234
12235 builder.add_config_section("config:global", "Global Config");
12237 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
12238 builder.add_config_section("config:transactions", "Transaction Config");
12239
12240 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
12242 builder.add_generator_phase("phase:je", "Journal Entry Generation");
12243
12244 builder.configured_by("phase:coa", "config:chart_of_accounts");
12246 builder.configured_by("phase:je", "config:transactions");
12247
12248 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
12250 builder.produced_by("output:je", "phase:je");
12251
12252 if self.phase_config.generate_master_data {
12254 builder.add_config_section("config:master_data", "Master Data Config");
12255 builder.add_generator_phase("phase:master_data", "Master Data Generation");
12256 builder.configured_by("phase:master_data", "config:master_data");
12257 builder.input_to("phase:master_data", "phase:je");
12258 }
12259
12260 if self.phase_config.generate_document_flows {
12261 builder.add_config_section("config:document_flows", "Document Flow Config");
12262 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
12263 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
12264 builder.configured_by("phase:p2p", "config:document_flows");
12265 builder.configured_by("phase:o2c", "config:document_flows");
12266
12267 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
12268 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
12269 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
12270 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
12271 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
12272
12273 builder.produced_by("output:po", "phase:p2p");
12274 builder.produced_by("output:gr", "phase:p2p");
12275 builder.produced_by("output:vi", "phase:p2p");
12276 builder.produced_by("output:so", "phase:o2c");
12277 builder.produced_by("output:ci", "phase:o2c");
12278 }
12279
12280 if self.phase_config.inject_anomalies {
12281 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
12282 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
12283 builder.configured_by("phase:anomaly", "config:fraud");
12284 builder.add_output_file(
12285 "output:labels",
12286 "Anomaly Labels",
12287 "labels/anomaly_labels.csv",
12288 );
12289 builder.produced_by("output:labels", "phase:anomaly");
12290 }
12291
12292 if self.phase_config.generate_audit {
12293 builder.add_config_section("config:audit", "Audit Config");
12294 builder.add_generator_phase("phase:audit", "Audit Data Generation");
12295 builder.configured_by("phase:audit", "config:audit");
12296 }
12297
12298 if self.phase_config.generate_banking {
12299 builder.add_config_section("config:banking", "Banking Config");
12300 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
12301 builder.configured_by("phase:banking", "config:banking");
12302 }
12303
12304 if self.config.llm.enabled {
12305 builder.add_config_section("config:llm", "LLM Enrichment Config");
12306 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
12307 builder.configured_by("phase:llm_enrichment", "config:llm");
12308 }
12309
12310 if self.config.diffusion.enabled {
12311 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
12312 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
12313 builder.configured_by("phase:diffusion", "config:diffusion");
12314 }
12315
12316 if self.config.causal.enabled {
12317 builder.add_config_section("config:causal", "Causal Generation Config");
12318 builder.add_generator_phase("phase:causal", "Causal Overlay");
12319 builder.configured_by("phase:causal", "config:causal");
12320 }
12321
12322 builder.build()
12323 }
12324
12325 fn compute_company_revenue(
12334 entries: &[JournalEntry],
12335 company_code: &str,
12336 ) -> rust_decimal::Decimal {
12337 use rust_decimal::Decimal;
12338 let mut revenue = Decimal::ZERO;
12339 for je in entries {
12340 if je.header.company_code != company_code {
12341 continue;
12342 }
12343 for line in &je.lines {
12344 if line.gl_account.starts_with('4') {
12345 revenue += line.credit_amount - line.debit_amount;
12347 }
12348 }
12349 }
12350 revenue.max(Decimal::ZERO)
12351 }
12352
12353 fn compute_entity_net_assets(
12357 entries: &[JournalEntry],
12358 entity_code: &str,
12359 ) -> rust_decimal::Decimal {
12360 use rust_decimal::Decimal;
12361 let mut asset_net = Decimal::ZERO;
12362 let mut liability_net = Decimal::ZERO;
12363 for je in entries {
12364 if je.header.company_code != entity_code {
12365 continue;
12366 }
12367 for line in &je.lines {
12368 if line.gl_account.starts_with('1') {
12369 asset_net += line.debit_amount - line.credit_amount;
12370 } else if line.gl_account.starts_with('2') {
12371 liability_net += line.credit_amount - line.debit_amount;
12372 }
12373 }
12374 }
12375 asset_net - liability_net
12376 }
12377}
12378
12379fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
12381 match format {
12382 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
12383 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
12384 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
12385 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
12386 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
12387 }
12388}
12389
12390#[cfg(test)]
12391#[allow(clippy::unwrap_used)]
12392mod tests {
12393 use super::*;
12394 use datasynth_config::schema::*;
12395
12396 fn create_test_config() -> GeneratorConfig {
12397 GeneratorConfig {
12398 global: GlobalConfig {
12399 industry: IndustrySector::Manufacturing,
12400 start_date: "2024-01-01".to_string(),
12401 period_months: 1,
12402 seed: Some(42),
12403 parallel: false,
12404 group_currency: "USD".to_string(),
12405 presentation_currency: None,
12406 worker_threads: 0,
12407 memory_limit_mb: 0,
12408 fiscal_year_months: None,
12409 },
12410 companies: vec![CompanyConfig {
12411 code: "1000".to_string(),
12412 name: "Test Company".to_string(),
12413 currency: "USD".to_string(),
12414 functional_currency: None,
12415 country: "US".to_string(),
12416 annual_transaction_volume: TransactionVolume::TenK,
12417 volume_weight: 1.0,
12418 fiscal_year_variant: "K4".to_string(),
12419 }],
12420 chart_of_accounts: ChartOfAccountsConfig {
12421 complexity: CoAComplexity::Small,
12422 industry_specific: true,
12423 custom_accounts: None,
12424 min_hierarchy_depth: 2,
12425 max_hierarchy_depth: 4,
12426 },
12427 transactions: TransactionConfig::default(),
12428 output: OutputConfig::default(),
12429 fraud: FraudConfig::default(),
12430 internal_controls: InternalControlsConfig::default(),
12431 business_processes: BusinessProcessConfig::default(),
12432 user_personas: UserPersonaConfig::default(),
12433 templates: TemplateConfig::default(),
12434 approval: ApprovalConfig::default(),
12435 departments: DepartmentConfig::default(),
12436 master_data: MasterDataConfig::default(),
12437 document_flows: DocumentFlowConfig::default(),
12438 intercompany: IntercompanyConfig::default(),
12439 balance: BalanceConfig::default(),
12440 ocpm: OcpmConfig::default(),
12441 audit: AuditGenerationConfig::default(),
12442 banking: datasynth_banking::BankingConfig::default(),
12443 data_quality: DataQualitySchemaConfig::default(),
12444 scenario: ScenarioConfig::default(),
12445 temporal: TemporalDriftConfig::default(),
12446 graph_export: GraphExportConfig::default(),
12447 streaming: StreamingSchemaConfig::default(),
12448 rate_limit: RateLimitSchemaConfig::default(),
12449 temporal_attributes: TemporalAttributeSchemaConfig::default(),
12450 relationships: RelationshipSchemaConfig::default(),
12451 accounting_standards: AccountingStandardsConfig::default(),
12452 audit_standards: AuditStandardsConfig::default(),
12453 distributions: Default::default(),
12454 temporal_patterns: Default::default(),
12455 vendor_network: VendorNetworkSchemaConfig::default(),
12456 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
12457 relationship_strength: RelationshipStrengthSchemaConfig::default(),
12458 cross_process_links: CrossProcessLinksSchemaConfig::default(),
12459 organizational_events: OrganizationalEventsSchemaConfig::default(),
12460 behavioral_drift: BehavioralDriftSchemaConfig::default(),
12461 market_drift: MarketDriftSchemaConfig::default(),
12462 drift_labeling: DriftLabelingSchemaConfig::default(),
12463 anomaly_injection: Default::default(),
12464 industry_specific: Default::default(),
12465 fingerprint_privacy: Default::default(),
12466 quality_gates: Default::default(),
12467 compliance: Default::default(),
12468 webhooks: Default::default(),
12469 llm: Default::default(),
12470 diffusion: Default::default(),
12471 causal: Default::default(),
12472 source_to_pay: Default::default(),
12473 financial_reporting: Default::default(),
12474 hr: Default::default(),
12475 manufacturing: Default::default(),
12476 sales_quotes: Default::default(),
12477 tax: Default::default(),
12478 treasury: Default::default(),
12479 project_accounting: Default::default(),
12480 esg: Default::default(),
12481 country_packs: None,
12482 scenarios: Default::default(),
12483 session: Default::default(),
12484 compliance_regulations: Default::default(),
12485 }
12486 }
12487
12488 #[test]
12489 fn test_enhanced_orchestrator_creation() {
12490 let config = create_test_config();
12491 let orchestrator = EnhancedOrchestrator::with_defaults(config);
12492 assert!(orchestrator.is_ok());
12493 }
12494
12495 #[test]
12496 fn test_minimal_generation() {
12497 let config = create_test_config();
12498 let phase_config = PhaseConfig {
12499 generate_master_data: false,
12500 generate_document_flows: false,
12501 generate_journal_entries: true,
12502 inject_anomalies: false,
12503 show_progress: false,
12504 ..Default::default()
12505 };
12506
12507 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12508 let result = orchestrator.generate();
12509
12510 assert!(result.is_ok());
12511 let result = result.unwrap();
12512 assert!(!result.journal_entries.is_empty());
12513 }
12514
12515 #[test]
12516 fn test_master_data_generation() {
12517 let config = create_test_config();
12518 let phase_config = PhaseConfig {
12519 generate_master_data: true,
12520 generate_document_flows: false,
12521 generate_journal_entries: false,
12522 inject_anomalies: false,
12523 show_progress: false,
12524 vendors_per_company: 5,
12525 customers_per_company: 5,
12526 materials_per_company: 10,
12527 assets_per_company: 5,
12528 employees_per_company: 10,
12529 ..Default::default()
12530 };
12531
12532 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12533 let result = orchestrator.generate().unwrap();
12534
12535 assert!(!result.master_data.vendors.is_empty());
12536 assert!(!result.master_data.customers.is_empty());
12537 assert!(!result.master_data.materials.is_empty());
12538 }
12539
12540 #[test]
12541 fn test_document_flow_generation() {
12542 let config = create_test_config();
12543 let phase_config = PhaseConfig {
12544 generate_master_data: true,
12545 generate_document_flows: true,
12546 generate_journal_entries: false,
12547 inject_anomalies: false,
12548 inject_data_quality: false,
12549 validate_balances: false,
12550 generate_ocpm_events: false,
12551 show_progress: false,
12552 vendors_per_company: 5,
12553 customers_per_company: 5,
12554 materials_per_company: 10,
12555 assets_per_company: 5,
12556 employees_per_company: 10,
12557 p2p_chains: 5,
12558 o2c_chains: 5,
12559 ..Default::default()
12560 };
12561
12562 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12563 let result = orchestrator.generate().unwrap();
12564
12565 assert!(!result.document_flows.p2p_chains.is_empty());
12567 assert!(!result.document_flows.o2c_chains.is_empty());
12568
12569 assert!(!result.document_flows.purchase_orders.is_empty());
12571 assert!(!result.document_flows.sales_orders.is_empty());
12572 }
12573
12574 #[test]
12575 fn test_anomaly_injection() {
12576 let config = create_test_config();
12577 let phase_config = PhaseConfig {
12578 generate_master_data: false,
12579 generate_document_flows: false,
12580 generate_journal_entries: true,
12581 inject_anomalies: true,
12582 show_progress: false,
12583 ..Default::default()
12584 };
12585
12586 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12587 let result = orchestrator.generate().unwrap();
12588
12589 assert!(!result.journal_entries.is_empty());
12591
12592 assert!(result.anomaly_labels.summary.is_some());
12595 }
12596
12597 #[test]
12598 fn test_full_generation_pipeline() {
12599 let config = create_test_config();
12600 let phase_config = PhaseConfig {
12601 generate_master_data: true,
12602 generate_document_flows: true,
12603 generate_journal_entries: true,
12604 inject_anomalies: false,
12605 inject_data_quality: false,
12606 validate_balances: true,
12607 generate_ocpm_events: false,
12608 show_progress: false,
12609 vendors_per_company: 3,
12610 customers_per_company: 3,
12611 materials_per_company: 5,
12612 assets_per_company: 3,
12613 employees_per_company: 5,
12614 p2p_chains: 3,
12615 o2c_chains: 3,
12616 ..Default::default()
12617 };
12618
12619 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12620 let result = orchestrator.generate().unwrap();
12621
12622 assert!(!result.master_data.vendors.is_empty());
12624 assert!(!result.master_data.customers.is_empty());
12625 assert!(!result.document_flows.p2p_chains.is_empty());
12626 assert!(!result.document_flows.o2c_chains.is_empty());
12627 assert!(!result.journal_entries.is_empty());
12628 assert!(result.statistics.accounts_count > 0);
12629
12630 assert!(!result.subledger.ap_invoices.is_empty());
12632 assert!(!result.subledger.ar_invoices.is_empty());
12633
12634 assert!(result.balance_validation.validated);
12636 assert!(result.balance_validation.entries_processed > 0);
12637 }
12638
12639 #[test]
12640 fn test_subledger_linking() {
12641 let config = create_test_config();
12642 let phase_config = PhaseConfig {
12643 generate_master_data: true,
12644 generate_document_flows: true,
12645 generate_journal_entries: false,
12646 inject_anomalies: false,
12647 inject_data_quality: false,
12648 validate_balances: false,
12649 generate_ocpm_events: false,
12650 show_progress: false,
12651 vendors_per_company: 5,
12652 customers_per_company: 5,
12653 materials_per_company: 10,
12654 assets_per_company: 3,
12655 employees_per_company: 5,
12656 p2p_chains: 5,
12657 o2c_chains: 5,
12658 ..Default::default()
12659 };
12660
12661 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12662 let result = orchestrator.generate().unwrap();
12663
12664 assert!(!result.document_flows.vendor_invoices.is_empty());
12666 assert!(!result.document_flows.customer_invoices.is_empty());
12667
12668 assert!(!result.subledger.ap_invoices.is_empty());
12670 assert!(!result.subledger.ar_invoices.is_empty());
12671
12672 assert_eq!(
12674 result.subledger.ap_invoices.len(),
12675 result.document_flows.vendor_invoices.len()
12676 );
12677
12678 assert_eq!(
12680 result.subledger.ar_invoices.len(),
12681 result.document_flows.customer_invoices.len()
12682 );
12683
12684 assert_eq!(
12686 result.statistics.ap_invoice_count,
12687 result.subledger.ap_invoices.len()
12688 );
12689 assert_eq!(
12690 result.statistics.ar_invoice_count,
12691 result.subledger.ar_invoices.len()
12692 );
12693 }
12694
12695 #[test]
12696 fn test_balance_validation() {
12697 let config = create_test_config();
12698 let phase_config = PhaseConfig {
12699 generate_master_data: false,
12700 generate_document_flows: false,
12701 generate_journal_entries: true,
12702 inject_anomalies: false,
12703 validate_balances: true,
12704 show_progress: false,
12705 ..Default::default()
12706 };
12707
12708 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12709 let result = orchestrator.generate().unwrap();
12710
12711 assert!(result.balance_validation.validated);
12713 assert!(result.balance_validation.entries_processed > 0);
12714
12715 assert!(!result.balance_validation.has_unbalanced_entries);
12717
12718 assert_eq!(
12720 result.balance_validation.total_debits,
12721 result.balance_validation.total_credits
12722 );
12723 }
12724
12725 #[test]
12726 fn test_statistics_accuracy() {
12727 let config = create_test_config();
12728 let phase_config = PhaseConfig {
12729 generate_master_data: true,
12730 generate_document_flows: false,
12731 generate_journal_entries: true,
12732 inject_anomalies: false,
12733 show_progress: false,
12734 vendors_per_company: 10,
12735 customers_per_company: 20,
12736 materials_per_company: 15,
12737 assets_per_company: 5,
12738 employees_per_company: 8,
12739 ..Default::default()
12740 };
12741
12742 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12743 let result = orchestrator.generate().unwrap();
12744
12745 assert_eq!(
12747 result.statistics.vendor_count,
12748 result.master_data.vendors.len()
12749 );
12750 assert_eq!(
12751 result.statistics.customer_count,
12752 result.master_data.customers.len()
12753 );
12754 assert_eq!(
12755 result.statistics.material_count,
12756 result.master_data.materials.len()
12757 );
12758 assert_eq!(
12759 result.statistics.total_entries as usize,
12760 result.journal_entries.len()
12761 );
12762 }
12763
12764 #[test]
12765 fn test_phase_config_defaults() {
12766 let config = PhaseConfig::default();
12767 assert!(config.generate_master_data);
12768 assert!(config.generate_document_flows);
12769 assert!(config.generate_journal_entries);
12770 assert!(!config.inject_anomalies);
12771 assert!(config.validate_balances);
12772 assert!(config.show_progress);
12773 assert!(config.vendors_per_company > 0);
12774 assert!(config.customers_per_company > 0);
12775 }
12776
12777 #[test]
12778 fn test_get_coa_before_generation() {
12779 let config = create_test_config();
12780 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
12781
12782 assert!(orchestrator.get_coa().is_none());
12784 }
12785
12786 #[test]
12787 fn test_get_coa_after_generation() {
12788 let config = create_test_config();
12789 let phase_config = PhaseConfig {
12790 generate_master_data: false,
12791 generate_document_flows: false,
12792 generate_journal_entries: true,
12793 inject_anomalies: false,
12794 show_progress: false,
12795 ..Default::default()
12796 };
12797
12798 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12799 let _ = orchestrator.generate().unwrap();
12800
12801 assert!(orchestrator.get_coa().is_some());
12803 }
12804
12805 #[test]
12806 fn test_get_master_data() {
12807 let config = create_test_config();
12808 let phase_config = PhaseConfig {
12809 generate_master_data: true,
12810 generate_document_flows: false,
12811 generate_journal_entries: false,
12812 inject_anomalies: false,
12813 show_progress: false,
12814 vendors_per_company: 5,
12815 customers_per_company: 5,
12816 materials_per_company: 5,
12817 assets_per_company: 5,
12818 employees_per_company: 5,
12819 ..Default::default()
12820 };
12821
12822 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12823 let result = orchestrator.generate().unwrap();
12824
12825 assert!(!result.master_data.vendors.is_empty());
12827 }
12828
12829 #[test]
12830 fn test_with_progress_builder() {
12831 let config = create_test_config();
12832 let orchestrator = EnhancedOrchestrator::with_defaults(config)
12833 .unwrap()
12834 .with_progress(false);
12835
12836 assert!(!orchestrator.phase_config.show_progress);
12838 }
12839
12840 #[test]
12841 fn test_multi_company_generation() {
12842 let mut config = create_test_config();
12843 config.companies.push(CompanyConfig {
12844 code: "2000".to_string(),
12845 name: "Subsidiary".to_string(),
12846 currency: "EUR".to_string(),
12847 functional_currency: None,
12848 country: "DE".to_string(),
12849 annual_transaction_volume: TransactionVolume::TenK,
12850 volume_weight: 0.5,
12851 fiscal_year_variant: "K4".to_string(),
12852 });
12853
12854 let phase_config = PhaseConfig {
12855 generate_master_data: true,
12856 generate_document_flows: false,
12857 generate_journal_entries: true,
12858 inject_anomalies: false,
12859 show_progress: false,
12860 vendors_per_company: 5,
12861 customers_per_company: 5,
12862 materials_per_company: 5,
12863 assets_per_company: 5,
12864 employees_per_company: 5,
12865 ..Default::default()
12866 };
12867
12868 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12869 let result = orchestrator.generate().unwrap();
12870
12871 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
12874 assert!(result.statistics.companies_count == 2);
12875 }
12876
12877 #[test]
12878 fn test_empty_master_data_skips_document_flows() {
12879 let config = create_test_config();
12880 let phase_config = PhaseConfig {
12881 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
12884 inject_anomalies: false,
12885 show_progress: false,
12886 ..Default::default()
12887 };
12888
12889 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12890 let result = orchestrator.generate().unwrap();
12891
12892 assert!(result.document_flows.p2p_chains.is_empty());
12894 assert!(result.document_flows.o2c_chains.is_empty());
12895 }
12896
12897 #[test]
12898 fn test_journal_entry_line_item_count() {
12899 let config = create_test_config();
12900 let phase_config = PhaseConfig {
12901 generate_master_data: false,
12902 generate_document_flows: false,
12903 generate_journal_entries: true,
12904 inject_anomalies: false,
12905 show_progress: false,
12906 ..Default::default()
12907 };
12908
12909 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12910 let result = orchestrator.generate().unwrap();
12911
12912 let calculated_line_items: u64 = result
12914 .journal_entries
12915 .iter()
12916 .map(|e| e.line_count() as u64)
12917 .sum();
12918 assert_eq!(result.statistics.total_line_items, calculated_line_items);
12919 }
12920
12921 #[test]
12922 fn test_audit_generation() {
12923 let config = create_test_config();
12924 let phase_config = PhaseConfig {
12925 generate_master_data: false,
12926 generate_document_flows: false,
12927 generate_journal_entries: true,
12928 inject_anomalies: false,
12929 show_progress: false,
12930 generate_audit: true,
12931 audit_engagements: 2,
12932 workpapers_per_engagement: 5,
12933 evidence_per_workpaper: 2,
12934 risks_per_engagement: 3,
12935 findings_per_engagement: 2,
12936 judgments_per_engagement: 2,
12937 ..Default::default()
12938 };
12939
12940 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12941 let result = orchestrator.generate().unwrap();
12942
12943 assert_eq!(result.audit.engagements.len(), 2);
12945 assert!(!result.audit.workpapers.is_empty());
12946 assert!(!result.audit.evidence.is_empty());
12947 assert!(!result.audit.risk_assessments.is_empty());
12948 assert!(!result.audit.findings.is_empty());
12949 assert!(!result.audit.judgments.is_empty());
12950
12951 assert!(
12953 !result.audit.confirmations.is_empty(),
12954 "ISA 505 confirmations should be generated"
12955 );
12956 assert!(
12957 !result.audit.confirmation_responses.is_empty(),
12958 "ISA 505 confirmation responses should be generated"
12959 );
12960 assert!(
12961 !result.audit.procedure_steps.is_empty(),
12962 "ISA 330 procedure steps should be generated"
12963 );
12964 assert!(
12966 !result.audit.analytical_results.is_empty(),
12967 "ISA 520 analytical procedures should be generated"
12968 );
12969 assert!(
12970 !result.audit.ia_functions.is_empty(),
12971 "ISA 610 IA functions should be generated (one per engagement)"
12972 );
12973 assert!(
12974 !result.audit.related_parties.is_empty(),
12975 "ISA 550 related parties should be generated"
12976 );
12977
12978 assert_eq!(
12980 result.statistics.audit_engagement_count,
12981 result.audit.engagements.len()
12982 );
12983 assert_eq!(
12984 result.statistics.audit_workpaper_count,
12985 result.audit.workpapers.len()
12986 );
12987 assert_eq!(
12988 result.statistics.audit_evidence_count,
12989 result.audit.evidence.len()
12990 );
12991 assert_eq!(
12992 result.statistics.audit_risk_count,
12993 result.audit.risk_assessments.len()
12994 );
12995 assert_eq!(
12996 result.statistics.audit_finding_count,
12997 result.audit.findings.len()
12998 );
12999 assert_eq!(
13000 result.statistics.audit_judgment_count,
13001 result.audit.judgments.len()
13002 );
13003 assert_eq!(
13004 result.statistics.audit_confirmation_count,
13005 result.audit.confirmations.len()
13006 );
13007 assert_eq!(
13008 result.statistics.audit_confirmation_response_count,
13009 result.audit.confirmation_responses.len()
13010 );
13011 assert_eq!(
13012 result.statistics.audit_procedure_step_count,
13013 result.audit.procedure_steps.len()
13014 );
13015 assert_eq!(
13016 result.statistics.audit_sample_count,
13017 result.audit.samples.len()
13018 );
13019 assert_eq!(
13020 result.statistics.audit_analytical_result_count,
13021 result.audit.analytical_results.len()
13022 );
13023 assert_eq!(
13024 result.statistics.audit_ia_function_count,
13025 result.audit.ia_functions.len()
13026 );
13027 assert_eq!(
13028 result.statistics.audit_ia_report_count,
13029 result.audit.ia_reports.len()
13030 );
13031 assert_eq!(
13032 result.statistics.audit_related_party_count,
13033 result.audit.related_parties.len()
13034 );
13035 assert_eq!(
13036 result.statistics.audit_related_party_transaction_count,
13037 result.audit.related_party_transactions.len()
13038 );
13039 }
13040
13041 #[test]
13042 fn test_new_phases_disabled_by_default() {
13043 let config = create_test_config();
13044 assert!(!config.llm.enabled);
13046 assert!(!config.diffusion.enabled);
13047 assert!(!config.causal.enabled);
13048
13049 let phase_config = PhaseConfig {
13050 generate_master_data: false,
13051 generate_document_flows: false,
13052 generate_journal_entries: true,
13053 inject_anomalies: false,
13054 show_progress: false,
13055 ..Default::default()
13056 };
13057
13058 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13059 let result = orchestrator.generate().unwrap();
13060
13061 assert_eq!(result.statistics.llm_enrichment_ms, 0);
13063 assert_eq!(result.statistics.llm_vendors_enriched, 0);
13064 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
13065 assert_eq!(result.statistics.diffusion_samples_generated, 0);
13066 assert_eq!(result.statistics.causal_generation_ms, 0);
13067 assert_eq!(result.statistics.causal_samples_generated, 0);
13068 assert!(result.statistics.causal_validation_passed.is_none());
13069 assert_eq!(result.statistics.counterfactual_pair_count, 0);
13070 assert!(result.counterfactual_pairs.is_empty());
13071 }
13072
13073 #[test]
13074 fn test_counterfactual_generation_enabled() {
13075 let config = create_test_config();
13076 let phase_config = PhaseConfig {
13077 generate_master_data: false,
13078 generate_document_flows: false,
13079 generate_journal_entries: true,
13080 inject_anomalies: false,
13081 show_progress: false,
13082 generate_counterfactuals: true,
13083 generate_period_close: false, ..Default::default()
13085 };
13086
13087 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13088 let result = orchestrator.generate().unwrap();
13089
13090 if !result.journal_entries.is_empty() {
13092 assert_eq!(
13093 result.counterfactual_pairs.len(),
13094 result.journal_entries.len()
13095 );
13096 assert_eq!(
13097 result.statistics.counterfactual_pair_count,
13098 result.journal_entries.len()
13099 );
13100 let ids: std::collections::HashSet<_> = result
13102 .counterfactual_pairs
13103 .iter()
13104 .map(|p| p.pair_id.clone())
13105 .collect();
13106 assert_eq!(ids.len(), result.counterfactual_pairs.len());
13107 }
13108 }
13109
13110 #[test]
13111 fn test_llm_enrichment_enabled() {
13112 let mut config = create_test_config();
13113 config.llm.enabled = true;
13114 config.llm.max_vendor_enrichments = 3;
13115
13116 let phase_config = PhaseConfig {
13117 generate_master_data: true,
13118 generate_document_flows: false,
13119 generate_journal_entries: false,
13120 inject_anomalies: false,
13121 show_progress: false,
13122 vendors_per_company: 5,
13123 customers_per_company: 3,
13124 materials_per_company: 3,
13125 assets_per_company: 3,
13126 employees_per_company: 3,
13127 ..Default::default()
13128 };
13129
13130 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13131 let result = orchestrator.generate().unwrap();
13132
13133 assert!(result.statistics.llm_vendors_enriched > 0);
13135 assert!(result.statistics.llm_vendors_enriched <= 3);
13136 }
13137
13138 #[test]
13139 fn test_diffusion_enhancement_enabled() {
13140 let mut config = create_test_config();
13141 config.diffusion.enabled = true;
13142 config.diffusion.n_steps = 50;
13143 config.diffusion.sample_size = 20;
13144
13145 let phase_config = PhaseConfig {
13146 generate_master_data: false,
13147 generate_document_flows: false,
13148 generate_journal_entries: true,
13149 inject_anomalies: false,
13150 show_progress: false,
13151 ..Default::default()
13152 };
13153
13154 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13155 let result = orchestrator.generate().unwrap();
13156
13157 assert_eq!(result.statistics.diffusion_samples_generated, 20);
13159 }
13160
13161 #[test]
13162 fn test_causal_overlay_enabled() {
13163 let mut config = create_test_config();
13164 config.causal.enabled = true;
13165 config.causal.template = "fraud_detection".to_string();
13166 config.causal.sample_size = 100;
13167 config.causal.validate = true;
13168
13169 let phase_config = PhaseConfig {
13170 generate_master_data: false,
13171 generate_document_flows: false,
13172 generate_journal_entries: true,
13173 inject_anomalies: false,
13174 show_progress: false,
13175 ..Default::default()
13176 };
13177
13178 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13179 let result = orchestrator.generate().unwrap();
13180
13181 assert_eq!(result.statistics.causal_samples_generated, 100);
13183 assert!(result.statistics.causal_validation_passed.is_some());
13185 }
13186
13187 #[test]
13188 fn test_causal_overlay_revenue_cycle_template() {
13189 let mut config = create_test_config();
13190 config.causal.enabled = true;
13191 config.causal.template = "revenue_cycle".to_string();
13192 config.causal.sample_size = 50;
13193 config.causal.validate = false;
13194
13195 let phase_config = PhaseConfig {
13196 generate_master_data: false,
13197 generate_document_flows: false,
13198 generate_journal_entries: true,
13199 inject_anomalies: false,
13200 show_progress: false,
13201 ..Default::default()
13202 };
13203
13204 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13205 let result = orchestrator.generate().unwrap();
13206
13207 assert_eq!(result.statistics.causal_samples_generated, 50);
13209 assert!(result.statistics.causal_validation_passed.is_none());
13211 }
13212
13213 #[test]
13214 fn test_all_new_phases_enabled_together() {
13215 let mut config = create_test_config();
13216 config.llm.enabled = true;
13217 config.llm.max_vendor_enrichments = 2;
13218 config.diffusion.enabled = true;
13219 config.diffusion.n_steps = 20;
13220 config.diffusion.sample_size = 10;
13221 config.causal.enabled = true;
13222 config.causal.sample_size = 50;
13223 config.causal.validate = true;
13224
13225 let phase_config = PhaseConfig {
13226 generate_master_data: true,
13227 generate_document_flows: false,
13228 generate_journal_entries: true,
13229 inject_anomalies: false,
13230 show_progress: false,
13231 vendors_per_company: 5,
13232 customers_per_company: 3,
13233 materials_per_company: 3,
13234 assets_per_company: 3,
13235 employees_per_company: 3,
13236 ..Default::default()
13237 };
13238
13239 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13240 let result = orchestrator.generate().unwrap();
13241
13242 assert!(result.statistics.llm_vendors_enriched > 0);
13244 assert_eq!(result.statistics.diffusion_samples_generated, 10);
13245 assert_eq!(result.statistics.causal_samples_generated, 50);
13246 assert!(result.statistics.causal_validation_passed.is_some());
13247 }
13248
13249 #[test]
13250 fn test_statistics_serialization_with_new_fields() {
13251 let stats = EnhancedGenerationStatistics {
13252 total_entries: 100,
13253 total_line_items: 500,
13254 llm_enrichment_ms: 42,
13255 llm_vendors_enriched: 10,
13256 diffusion_enhancement_ms: 100,
13257 diffusion_samples_generated: 50,
13258 causal_generation_ms: 200,
13259 causal_samples_generated: 100,
13260 causal_validation_passed: Some(true),
13261 ..Default::default()
13262 };
13263
13264 let json = serde_json::to_string(&stats).unwrap();
13265 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
13266
13267 assert_eq!(deserialized.llm_enrichment_ms, 42);
13268 assert_eq!(deserialized.llm_vendors_enriched, 10);
13269 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
13270 assert_eq!(deserialized.diffusion_samples_generated, 50);
13271 assert_eq!(deserialized.causal_generation_ms, 200);
13272 assert_eq!(deserialized.causal_samples_generated, 100);
13273 assert_eq!(deserialized.causal_validation_passed, Some(true));
13274 }
13275
13276 #[test]
13277 fn test_statistics_backward_compat_deserialization() {
13278 let old_json = r#"{
13280 "total_entries": 100,
13281 "total_line_items": 500,
13282 "accounts_count": 50,
13283 "companies_count": 1,
13284 "period_months": 12,
13285 "vendor_count": 10,
13286 "customer_count": 20,
13287 "material_count": 15,
13288 "asset_count": 5,
13289 "employee_count": 8,
13290 "p2p_chain_count": 5,
13291 "o2c_chain_count": 5,
13292 "ap_invoice_count": 5,
13293 "ar_invoice_count": 5,
13294 "ocpm_event_count": 0,
13295 "ocpm_object_count": 0,
13296 "ocpm_case_count": 0,
13297 "audit_engagement_count": 0,
13298 "audit_workpaper_count": 0,
13299 "audit_evidence_count": 0,
13300 "audit_risk_count": 0,
13301 "audit_finding_count": 0,
13302 "audit_judgment_count": 0,
13303 "anomalies_injected": 0,
13304 "data_quality_issues": 0,
13305 "banking_customer_count": 0,
13306 "banking_account_count": 0,
13307 "banking_transaction_count": 0,
13308 "banking_suspicious_count": 0,
13309 "graph_export_count": 0,
13310 "graph_node_count": 0,
13311 "graph_edge_count": 0
13312 }"#;
13313
13314 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
13315
13316 assert_eq!(stats.llm_enrichment_ms, 0);
13318 assert_eq!(stats.llm_vendors_enriched, 0);
13319 assert_eq!(stats.diffusion_enhancement_ms, 0);
13320 assert_eq!(stats.diffusion_samples_generated, 0);
13321 assert_eq!(stats.causal_generation_ms, 0);
13322 assert_eq!(stats.causal_samples_generated, 0);
13323 assert!(stats.causal_validation_passed.is_none());
13324 }
13325}