1use std::collections::HashMap;
12
13use chrono::Datelike;
14use serde_json::Value;
15
16use datasynth_banking::models::{BankAccount, BankTransaction, BankingCustomer};
17use datasynth_core::models::audit::going_concern::GoingConcernAssessment;
18use datasynth_core::models::audit::materiality_calculation::MaterialityCalculation;
19use datasynth_core::models::audit::{
20 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
21 AuditSample, ConfirmationResponse, ExternalConfirmation, InternalAuditFunction,
22 InternalAuditReport, ProfessionalJudgment, RelatedParty, RelatedPartyTransaction,
23 RiskAssessment, Workpaper,
24};
25use datasynth_core::models::compliance::{ComplianceFinding, ComplianceStandard, RegulatoryFiling};
26use datasynth_core::models::intercompany::{EliminationEntry, ICMatchedPair};
27use datasynth_core::models::sourcing::{
28 BidEvaluation, ProcurementContract, RfxEvent, SourcingProject, SupplierBid,
29 SupplierQualification,
30};
31use datasynth_core::models::ExpenseReport;
32use datasynth_core::models::{
33 BankReconciliation, CashForecast, CashPosition, ChartOfAccounts, ClimateScenario,
34 CosoComponent, CosoPrinciple, Customer, CycleCount, DebtInstrument, EarnedValueMetric,
35 EmissionRecord, Employee, EsgDisclosure, FixedAsset, HedgeRelationship, InternalControl,
36 JournalEntry, Material, OrganizationalEvent, PayrollRun, ProcessEvolutionEvent,
37 ProductionOrder, Project, ProjectMilestone, QualityInspection, SupplierEsgAssessment, TaxCode,
38 TaxJurisdiction, TaxLine, TaxProvision, TaxReturn, TimeEntry, Vendor, WithholdingTaxRecord,
39};
40use datasynth_generators::disruption::DisruptionEvent;
41use datasynth_standards::audit::opinion::AuditOpinion;
42
43use crate::models::hypergraph::{
44 AggregationStrategy, CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph,
45 HypergraphLayer, HypergraphMetadata, HypergraphNode, NodeBudget, NodeBudgetReport,
46 NodeBudgetSuggestion,
47};
48
49const MONTH_END_DAY_THRESHOLD: u32 = 28;
51const WEEKDAY_NORMALIZER: f64 = 6.0;
53const DAY_OF_MONTH_NORMALIZER: f64 = 31.0;
55const MONTH_NORMALIZER: f64 = 12.0;
57
58#[allow(dead_code)]
62mod type_codes {
63 pub const ACCOUNT: u32 = 100;
65 pub const JOURNAL_ENTRY: u32 = 101;
66 pub const MATERIAL: u32 = 102;
67 pub const FIXED_ASSET: u32 = 103;
68 pub const COST_CENTER: u32 = 104;
69
70 pub const VENDOR: u32 = 200;
72 pub const CUSTOMER: u32 = 201;
73 pub const EMPLOYEE: u32 = 202;
74 pub const BANKING_CUSTOMER: u32 = 203;
75
76 pub const PURCHASE_ORDER: u32 = 300;
78 pub const GOODS_RECEIPT: u32 = 301;
79 pub const VENDOR_INVOICE: u32 = 302;
80 pub const PAYMENT: u32 = 303;
81 pub const SALES_ORDER: u32 = 310;
83 pub const DELIVERY: u32 = 311;
84 pub const CUSTOMER_INVOICE: u32 = 312;
85 pub const SOURCING_PROJECT: u32 = 320;
87 pub const RFX_EVENT: u32 = 321;
88 pub const SUPPLIER_BID: u32 = 322;
89 pub const BID_EVALUATION: u32 = 323;
90 pub const PROCUREMENT_CONTRACT: u32 = 324;
91 pub const SUPPLIER_QUALIFICATION: u32 = 325;
92 pub const PAYROLL_RUN: u32 = 330;
94 pub const TIME_ENTRY: u32 = 331;
95 pub const EXPENSE_REPORT: u32 = 332;
96 pub const PAYROLL_LINE_ITEM: u32 = 333;
97 pub const PRODUCTION_ORDER: u32 = 340;
99 pub const QUALITY_INSPECTION: u32 = 341;
100 pub const CYCLE_COUNT: u32 = 342;
101 pub const BANK_ACCOUNT: u32 = 350;
103 pub const BANK_TRANSACTION: u32 = 351;
104 pub const BANK_STATEMENT_LINE: u32 = 352;
105 pub const AUDIT_ENGAGEMENT: u32 = 360;
107 pub const WORKPAPER: u32 = 361;
108 pub const AUDIT_FINDING: u32 = 362;
109 pub const AUDIT_EVIDENCE: u32 = 363;
110 pub const RISK_ASSESSMENT: u32 = 364;
111 pub const PROFESSIONAL_JUDGMENT: u32 = 365;
112 pub const BANK_RECONCILIATION: u32 = 370;
114 pub const RECONCILING_ITEM: u32 = 372;
115 pub const OCPM_EVENT: u32 = 400;
117 pub const POOL_NODE: u32 = 399;
119
120 pub const COSO_COMPONENT: u32 = 500;
122 pub const COSO_PRINCIPLE: u32 = 501;
123 pub const SOX_ASSERTION: u32 = 502;
124 pub const INTERNAL_CONTROL: u32 = 503;
125 pub const KYC_PROFILE: u32 = 504;
126 pub const COMPLIANCE_STANDARD: u32 = 505;
127 pub const JURISDICTION: u32 = 506;
128 pub const REGULATORY_FILING: u32 = 507;
130 pub const COMPLIANCE_FINDING: u32 = 508;
131
132 pub const TAX_JURISDICTION: u32 = 410;
134 pub const TAX_CODE: u32 = 411;
135 pub const TAX_LINE: u32 = 412;
136 pub const TAX_RETURN: u32 = 413;
137 pub const TAX_PROVISION: u32 = 414;
138 pub const WITHHOLDING_TAX: u32 = 415;
139
140 pub const CASH_POSITION: u32 = 420;
142 pub const CASH_FORECAST: u32 = 421;
143 pub const HEDGE_RELATIONSHIP: u32 = 422;
144 pub const DEBT_INSTRUMENT: u32 = 423;
145
146 pub const EMISSION_RECORD: u32 = 430;
148 pub const ESG_DISCLOSURE: u32 = 431;
149 pub const SUPPLIER_ESG_ASSESSMENT: u32 = 432;
150 pub const CLIMATE_SCENARIO: u32 = 433;
151
152 pub const PROJECT: u32 = 451;
154 pub const EARNED_VALUE: u32 = 452;
155 pub const PROJECT_MILESTONE: u32 = 454;
156
157 pub const IC_MATCHED_PAIR: u32 = 460;
159 pub const ELIMINATION_ENTRY: u32 = 461;
160
161 pub const PROCESS_EVOLUTION: u32 = 470;
163 pub const ORGANIZATIONAL_EVENT: u32 = 471;
164 pub const DISRUPTION_EVENT: u32 = 472;
165
166 pub const AML_ALERT: u32 = 505;
168 pub const EXTERNAL_CONFIRMATION: u32 = 366;
172 pub const CONFIRMATION_RESPONSE: u32 = 367;
173 pub const AUDIT_PROCEDURE_STEP: u32 = 368;
174 pub const AUDIT_SAMPLE: u32 = 369;
175 pub const ANALYTICAL_PROCEDURE_RESULT: u32 = 375;
176 pub const INTERNAL_AUDIT_FUNCTION: u32 = 376;
177 pub const INTERNAL_AUDIT_REPORT: u32 = 377;
178 pub const RELATED_PARTY: u32 = 378;
179 pub const RELATED_PARTY_TRANSACTION: u32 = 379;
180 pub const MATERIALITY_CALCULATION: u32 = 380;
182 pub const AUDIT_OPINION: u32 = 381;
183 pub const GOING_CONCERN_ASSESSMENT: u32 = 382;
184
185 pub const IMPLEMENTS_CONTROL: u32 = 40;
187 pub const GOVERNED_BY_STANDARD: u32 = 41;
188 pub const OWNS_CONTROL: u32 = 42;
189 pub const OVERSEE_PROCESS: u32 = 43;
190 pub const ENFORCES_ASSERTION: u32 = 44;
191 pub const STANDARD_TO_CONTROL: u32 = 45;
192 pub const FINDING_ON_CONTROL: u32 = 46;
193 pub const STANDARD_TO_ACCOUNT: u32 = 47;
194 pub const SUPPLIES_TO: u32 = 48;
195 pub const FILED_BY_COMPANY: u32 = 49;
196 pub const COVERS_COSO_PRINCIPLE: u32 = 54;
197 pub const CONTAINS_ACCOUNT: u32 = 55;
198
199 pub const CONFIRMATION_FOR_ACCOUNT: u32 = 138;
201 pub const CONFIRMATION_RESPONSE_EDGE: u32 = 139;
202 pub const CONFIRMATION_IN_WORKPAPER: u32 = 140;
203 pub const STEP_IN_WORKPAPER: u32 = 141;
204 pub const STEP_USES_SAMPLE: u32 = 142;
205 pub const STEP_EVIDENCE: u32 = 143;
206 pub const SAMPLE_FROM_WORKPAPER: u32 = 144;
207 pub const AP_FOR_ACCOUNT: u32 = 145;
208 pub const AP_IN_WORKPAPER: u32 = 146;
209 pub const IAF_FOR_ENGAGEMENT: u32 = 147;
210 pub const REPORT_FROM_IAF: u32 = 148;
211 pub const IA_REPORT_FOR_ENGAGEMENT: u32 = 149;
212 pub const RP_FOR_ENGAGEMENT: u32 = 150;
213 pub const RPT_WITH_PARTY: u32 = 151;
214 pub const RPT_JOURNAL_ENTRY: u32 = 152;
215
216 pub const DOCUMENTED_BY: u32 = 153;
218 pub const IDENTIFIED_FROM: u32 = 154;
219 pub const OPINION_BASED_ON: u32 = 155;
220 pub const OPINION_FOR_ENGAGEMENT: u32 = 156;
221 pub const MATERIALITY_FOR_ENGAGEMENT: u32 = 157;
222 pub const GC_FOR_ENGAGEMENT: u32 = 158;
223}
224
225#[derive(Debug, Clone)]
227pub struct HypergraphConfig {
228 pub max_nodes: usize,
230 pub aggregation_strategy: AggregationStrategy,
232 pub include_coso: bool,
234 pub include_controls: bool,
235 pub include_sox: bool,
236 pub include_vendors: bool,
237 pub include_customers: bool,
238 pub include_employees: bool,
239 pub include_p2p: bool,
241 pub include_o2c: bool,
242 pub include_s2c: bool,
243 pub include_h2r: bool,
244 pub include_mfg: bool,
245 pub include_bank: bool,
246 pub include_audit: bool,
247 pub include_compliance: bool,
248 pub include_r2r: bool,
249 pub include_tax: bool,
250 pub include_treasury: bool,
251 pub include_esg: bool,
252 pub include_project: bool,
253 pub include_intercompany: bool,
254 pub include_temporal_events: bool,
255 pub events_as_hyperedges: bool,
256 pub docs_per_counterparty_threshold: usize,
258 pub include_accounts: bool,
260 pub je_as_hyperedges: bool,
261 pub include_cross_layer_edges: bool,
263}
264
265impl Default for HypergraphConfig {
266 fn default() -> Self {
267 Self {
268 max_nodes: 50_000,
269 aggregation_strategy: AggregationStrategy::PoolByCounterparty,
270 include_coso: true,
271 include_controls: true,
272 include_sox: true,
273 include_vendors: true,
274 include_customers: true,
275 include_employees: true,
276 include_p2p: true,
277 include_o2c: true,
278 include_s2c: true,
279 include_h2r: true,
280 include_mfg: true,
281 include_bank: true,
282 include_audit: true,
283 include_compliance: true,
284 include_r2r: true,
285 include_tax: true,
286 include_treasury: true,
287 include_esg: true,
288 include_project: true,
289 include_intercompany: true,
290 include_temporal_events: true,
291 events_as_hyperedges: true,
292 docs_per_counterparty_threshold: 20,
293 include_accounts: true,
294 je_as_hyperedges: true,
295 include_cross_layer_edges: true,
296 }
297 }
298}
299
300#[derive(Debug, Clone, Default)]
306pub struct LayerDemand {
307 pub l1: usize,
309 pub l2: usize,
311 pub l3: usize,
313}
314
315#[derive(Default)]
323pub struct BuilderInput<'a> {
324 pub controls: &'a [InternalControl],
327 pub vendors: &'a [Vendor],
329 pub customers: &'a [Customer],
331 pub employees: &'a [Employee],
333 pub materials: &'a [Material],
335 pub fixed_assets: &'a [FixedAsset],
337 pub compliance_standards: &'a [ComplianceStandard],
339 pub compliance_findings: &'a [ComplianceFinding],
341 pub regulatory_filings: &'a [RegulatoryFiling],
343 pub emissions: &'a [EmissionRecord],
345 pub esg_disclosures: &'a [EsgDisclosure],
347 pub supplier_esg_assessments: &'a [SupplierEsgAssessment],
349 pub climate_scenarios: &'a [ClimateScenario],
351
352 pub audit_engagements: &'a [AuditEngagement],
355 pub workpapers: &'a [Workpaper],
357 pub audit_findings: &'a [AuditFinding],
359 pub audit_evidence: &'a [AuditEvidence],
361 pub risk_assessments: &'a [RiskAssessment],
363 pub professional_judgments: &'a [ProfessionalJudgment],
365 pub materiality_calculations: &'a [MaterialityCalculation],
367 pub audit_opinions: &'a [AuditOpinion],
369 pub going_concern_assessments: &'a [GoingConcernAssessment],
371 pub external_confirmations: &'a [ExternalConfirmation],
373 pub confirmation_responses: &'a [ConfirmationResponse],
375 pub audit_procedure_steps: &'a [AuditProcedureStep],
377 pub audit_samples: &'a [AuditSample],
379 pub analytical_procedure_results: &'a [AnalyticalProcedureResult],
381 pub internal_audit_functions: &'a [InternalAuditFunction],
383 pub internal_audit_reports: &'a [InternalAuditReport],
385 pub related_parties: &'a [RelatedParty],
387 pub related_party_transactions: &'a [RelatedPartyTransaction],
389
390 pub purchase_orders: &'a [datasynth_core::models::documents::PurchaseOrder],
393 pub goods_receipts: &'a [datasynth_core::models::documents::GoodsReceipt],
395 pub vendor_invoices: &'a [datasynth_core::models::documents::VendorInvoice],
397 pub payments: &'a [datasynth_core::models::documents::Payment],
399 pub sales_orders: &'a [datasynth_core::models::documents::SalesOrder],
401 pub deliveries: &'a [datasynth_core::models::documents::Delivery],
403 pub customer_invoices: &'a [datasynth_core::models::documents::CustomerInvoice],
405 pub sourcing_projects: &'a [SourcingProject],
407 pub supplier_qualifications: &'a [SupplierQualification],
409 pub rfx_events: &'a [RfxEvent],
411 pub supplier_bids: &'a [SupplierBid],
413 pub bid_evaluations: &'a [BidEvaluation],
415 pub procurement_contracts: &'a [ProcurementContract],
417 pub payroll_runs: &'a [PayrollRun],
419 pub time_entries: &'a [TimeEntry],
421 pub expense_reports: &'a [ExpenseReport],
423 pub production_orders: &'a [ProductionOrder],
425 pub quality_inspections: &'a [QualityInspection],
427 pub cycle_counts: &'a [CycleCount],
429 pub banking_customers: &'a [BankingCustomer],
431 pub bank_accounts: &'a [BankAccount],
433 pub bank_transactions: &'a [BankTransaction],
435 pub bank_reconciliations: &'a [BankReconciliation],
437 pub process_evolution_events: &'a [ProcessEvolutionEvent],
439 pub organizational_events: &'a [OrganizationalEvent],
441 pub disruption_events: &'a [DisruptionEvent],
443 pub ic_matched_pairs: &'a [ICMatchedPair],
445 pub elimination_entries: &'a [EliminationEntry],
447 pub ocpm_event_log: Option<&'a datasynth_ocpm::OcpmEventLog>,
449
450 pub chart_of_accounts: Option<&'a ChartOfAccounts>,
453 pub journal_entries: &'a [JournalEntry],
455
456 pub tax_jurisdictions: &'a [TaxJurisdiction],
459 pub tax_codes: &'a [TaxCode],
461 pub tax_lines: &'a [TaxLine],
463 pub tax_returns: &'a [TaxReturn],
465 pub tax_provisions: &'a [TaxProvision],
467 pub withholding_records: &'a [WithholdingTaxRecord],
469 pub cash_positions: &'a [CashPosition],
471 pub cash_forecasts: &'a [CashForecast],
473 pub hedge_relationships: &'a [HedgeRelationship],
475 pub debt_instruments: &'a [DebtInstrument],
477 pub projects: &'a [Project],
479 pub earned_value_metrics: &'a [EarnedValueMetric],
481 pub project_milestones: &'a [ProjectMilestone],
483}
484
485pub struct HypergraphBuilder {
487 config: HypergraphConfig,
488 budget: NodeBudget,
489 nodes: Vec<HypergraphNode>,
490 edges: Vec<CrossLayerEdge>,
491 hyperedges: Vec<Hyperedge>,
492 node_index: HashMap<String, usize>,
494 aggregate_count: usize,
496 control_node_ids: HashMap<String, String>,
498 coso_component_ids: HashMap<String, String>,
500 account_node_ids: HashMap<String, String>,
502 vendor_node_ids: HashMap<String, String>,
504 customer_node_ids: HashMap<String, String>,
506 employee_node_ids: HashMap<String, String>,
508 doc_counterparty_links: Vec<(String, String, String)>, standard_node_ids: HashMap<String, String>,
513 compliance_finding_control_links: Vec<(String, String)>, #[allow(dead_code)]
517 standard_account_links: Vec<(String, String)>, }
519
520impl HypergraphBuilder {
521 pub fn new(config: HypergraphConfig) -> Self {
523 let budget = NodeBudget::new(config.max_nodes);
524 Self {
525 config,
526 budget,
527 nodes: Vec::new(),
528 edges: Vec::new(),
529 hyperedges: Vec::new(),
530 node_index: HashMap::new(),
531 aggregate_count: 0,
532 control_node_ids: HashMap::new(),
533 coso_component_ids: HashMap::new(),
534 account_node_ids: HashMap::new(),
535 vendor_node_ids: HashMap::new(),
536 customer_node_ids: HashMap::new(),
537 employee_node_ids: HashMap::new(),
538 doc_counterparty_links: Vec::new(),
539 standard_node_ids: HashMap::new(),
540 compliance_finding_control_links: Vec::new(),
541 standard_account_links: Vec::new(),
542 }
543 }
544
545 pub fn rebalance_budget(&mut self, l1_demand: usize, l2_demand: usize, l3_demand: usize) {
551 self.budget.rebalance(l1_demand, l2_demand, l3_demand);
552 }
553
554 pub fn suggest_budget(&self, demand: &LayerDemand) -> NodeBudgetSuggestion {
559 self.budget.suggest(demand.l1, demand.l2, demand.l3)
560 }
561
562 pub fn rebalance_with_demand(&mut self, demand: &LayerDemand) {
567 self.budget.rebalance(demand.l1, demand.l2, demand.l3);
568 }
569
570 pub fn budget(&self) -> &NodeBudget {
572 &self.budget
573 }
574
575 pub fn count_demand(input: &BuilderInput<'_>) -> LayerDemand {
581 let coso_count = 22;
583
584 let l1 = coso_count
586 + input.controls.len()
587 + input.vendors.len()
588 + input.customers.len()
589 + input.employees.len()
590 + input.materials.len()
591 + input.fixed_assets.len()
592 + input.compliance_standards.len()
593 + input.emissions.len()
594 + input.esg_disclosures.len()
595 + input.supplier_esg_assessments.len()
596 + input.climate_scenarios.len();
597
598 let ocpm_count = input
601 .ocpm_event_log
602 .map(|log| log.events.len())
603 .unwrap_or(0);
604 let l2 = input.audit_engagements.len()
605 + input.workpapers.len()
606 + input.audit_findings.len()
607 + input.audit_evidence.len()
608 + input.risk_assessments.len()
609 + input.professional_judgments.len()
610 + input.external_confirmations.len()
611 + input.confirmation_responses.len()
612 + input.audit_procedure_steps.len()
613 + input.audit_samples.len()
614 + input.analytical_procedure_results.len()
615 + input.internal_audit_functions.len()
616 + input.internal_audit_reports.len()
617 + input.related_parties.len()
618 + input.related_party_transactions.len()
619 + input.materiality_calculations.len()
620 + input.audit_opinions.len()
621 + input.going_concern_assessments.len()
622 + input.purchase_orders.len()
623 + input.goods_receipts.len()
624 + input.vendor_invoices.len()
625 + input.payments.len()
626 + input.sales_orders.len()
627 + input.deliveries.len()
628 + input.customer_invoices.len()
629 + input.sourcing_projects.len()
630 + input.supplier_qualifications.len()
631 + input.rfx_events.len()
632 + input.supplier_bids.len()
633 + input.bid_evaluations.len()
634 + input.procurement_contracts.len()
635 + input.payroll_runs.len()
636 + input.time_entries.len()
637 + input.expense_reports.len()
638 + input.production_orders.len()
639 + input.quality_inspections.len()
640 + input.cycle_counts.len()
641 + input.banking_customers.len()
642 + input.bank_accounts.len()
643 + input.bank_transactions.len()
644 + input.bank_reconciliations.len()
645 + input.compliance_findings.len()
646 + input.regulatory_filings.len()
647 + input.process_evolution_events.len()
648 + input.organizational_events.len()
649 + input.disruption_events.len()
650 + input.ic_matched_pairs.len()
651 + input.elimination_entries.len()
652 + ocpm_count;
653
654 let account_count = input
656 .chart_of_accounts
657 .map(|coa| coa.accounts.len())
658 .unwrap_or(0);
659 let l3 = account_count
660 + input.journal_entries.len()
661 + input.tax_jurisdictions.len()
662 + input.tax_codes.len()
663 + input.tax_lines.len()
664 + input.tax_returns.len()
665 + input.tax_provisions.len()
666 + input.withholding_records.len()
667 + input.cash_positions.len()
668 + input.cash_forecasts.len()
669 + input.hedge_relationships.len()
670 + input.debt_instruments.len()
671 + input.projects.len()
672 + input.earned_value_metrics.len()
673 + input.project_milestones.len();
674
675 LayerDemand { l1, l2, l3 }
676 }
677
678 pub fn add_all_ordered(&mut self, input: &BuilderInput<'_>) {
692 self.add_coso_framework();
694 self.add_controls(input.controls);
695 self.add_vendors(input.vendors);
696 self.add_customers(input.customers);
697 self.add_employees(input.employees);
698 self.add_materials(input.materials);
699 self.add_fixed_assets(input.fixed_assets);
700 self.add_compliance_regulations(
701 input.compliance_standards,
702 input.compliance_findings,
703 input.regulatory_filings,
704 );
705 self.add_esg_documents(
706 input.emissions,
707 input.esg_disclosures,
708 input.supplier_esg_assessments,
709 input.climate_scenarios,
710 );
711
712 self.add_audit_documents(
714 input.audit_engagements,
715 input.workpapers,
716 input.audit_findings,
717 input.audit_evidence,
718 input.risk_assessments,
719 input.professional_judgments,
720 input.materiality_calculations,
721 input.audit_opinions,
722 input.going_concern_assessments,
723 );
724 self.add_audit_procedure_entities(
725 input.external_confirmations,
726 input.confirmation_responses,
727 input.audit_procedure_steps,
728 input.audit_samples,
729 input.analytical_procedure_results,
730 input.internal_audit_functions,
731 input.internal_audit_reports,
732 input.related_parties,
733 input.related_party_transactions,
734 );
735
736 self.add_p2p_documents(
738 input.purchase_orders,
739 input.goods_receipts,
740 input.vendor_invoices,
741 input.payments,
742 );
743 self.add_o2c_documents(
744 input.sales_orders,
745 input.deliveries,
746 input.customer_invoices,
747 );
748 self.add_s2c_documents(
749 input.sourcing_projects,
750 input.supplier_qualifications,
751 input.rfx_events,
752 input.supplier_bids,
753 input.bid_evaluations,
754 input.procurement_contracts,
755 );
756 self.add_h2r_documents(
757 input.payroll_runs,
758 input.time_entries,
759 input.expense_reports,
760 );
761 self.add_mfg_documents(
762 input.production_orders,
763 input.quality_inspections,
764 input.cycle_counts,
765 );
766 self.add_bank_documents(
767 input.banking_customers,
768 input.bank_accounts,
769 input.bank_transactions,
770 );
771 self.add_aml_alerts(input.bank_transactions);
772 self.add_kyc_profiles(input.banking_customers);
773 self.add_bank_recon_documents(input.bank_reconciliations);
774 self.add_temporal_events(
775 input.process_evolution_events,
776 input.organizational_events,
777 input.disruption_events,
778 );
779 self.add_intercompany_documents(input.ic_matched_pairs, input.elimination_entries);
780 if let Some(ocpm) = input.ocpm_event_log {
781 self.add_ocpm_events(ocpm);
782 }
783
784 if let Some(coa) = input.chart_of_accounts {
786 self.add_accounts(coa);
787 }
788 if self.config.je_as_hyperedges {
789 self.add_journal_entries_as_hyperedges(input.journal_entries);
790 } else {
791 self.add_journal_entry_nodes(input.journal_entries);
792 }
793
794 self.add_tax_documents(
796 input.tax_jurisdictions,
797 input.tax_codes,
798 input.tax_lines,
799 input.tax_returns,
800 input.tax_provisions,
801 input.withholding_records,
802 );
803 self.add_treasury_documents(
804 input.cash_positions,
805 input.cash_forecasts,
806 input.hedge_relationships,
807 input.debt_instruments,
808 );
809 self.add_project_documents(
810 input.projects,
811 input.earned_value_metrics,
812 input.project_milestones,
813 );
814
815 self.tag_process_family();
817 }
818
819 pub fn add_coso_framework(&mut self) {
821 if !self.config.include_coso {
822 return;
823 }
824
825 let components = [
826 (CosoComponent::ControlEnvironment, "Control Environment"),
827 (CosoComponent::RiskAssessment, "Risk Assessment"),
828 (CosoComponent::ControlActivities, "Control Activities"),
829 (
830 CosoComponent::InformationCommunication,
831 "Information & Communication",
832 ),
833 (CosoComponent::MonitoringActivities, "Monitoring Activities"),
834 ];
835
836 for (component, name) in &components {
837 let id = format!("coso_comp_{}", name.replace(' ', "_").replace('&', "and"));
838 if self.try_add_node(HypergraphNode {
839 id: id.clone(),
840 entity_type: "coso_component".to_string(),
841 entity_type_code: type_codes::COSO_COMPONENT,
842 layer: HypergraphLayer::GovernanceControls,
843 external_id: format!("{component:?}"),
844 label: name.to_string(),
845 properties: HashMap::new(),
846 features: vec![component_to_feature(component)],
847 is_anomaly: false,
848 anomaly_type: None,
849 is_aggregate: false,
850 aggregate_count: 0,
851 }) {
852 self.coso_component_ids.insert(format!("{component:?}"), id);
853 }
854 }
855
856 let principles = [
857 (
858 CosoPrinciple::IntegrityAndEthics,
859 "Integrity and Ethics",
860 CosoComponent::ControlEnvironment,
861 ),
862 (
863 CosoPrinciple::BoardOversight,
864 "Board Oversight",
865 CosoComponent::ControlEnvironment,
866 ),
867 (
868 CosoPrinciple::OrganizationalStructure,
869 "Organizational Structure",
870 CosoComponent::ControlEnvironment,
871 ),
872 (
873 CosoPrinciple::CommitmentToCompetence,
874 "Commitment to Competence",
875 CosoComponent::ControlEnvironment,
876 ),
877 (
878 CosoPrinciple::Accountability,
879 "Accountability",
880 CosoComponent::ControlEnvironment,
881 ),
882 (
883 CosoPrinciple::ClearObjectives,
884 "Clear Objectives",
885 CosoComponent::RiskAssessment,
886 ),
887 (
888 CosoPrinciple::IdentifyRisks,
889 "Identify Risks",
890 CosoComponent::RiskAssessment,
891 ),
892 (
893 CosoPrinciple::FraudRisk,
894 "Fraud Risk",
895 CosoComponent::RiskAssessment,
896 ),
897 (
898 CosoPrinciple::ChangeIdentification,
899 "Change Identification",
900 CosoComponent::RiskAssessment,
901 ),
902 (
903 CosoPrinciple::ControlActions,
904 "Control Actions",
905 CosoComponent::ControlActivities,
906 ),
907 (
908 CosoPrinciple::TechnologyControls,
909 "Technology Controls",
910 CosoComponent::ControlActivities,
911 ),
912 (
913 CosoPrinciple::PoliciesAndProcedures,
914 "Policies and Procedures",
915 CosoComponent::ControlActivities,
916 ),
917 (
918 CosoPrinciple::QualityInformation,
919 "Quality Information",
920 CosoComponent::InformationCommunication,
921 ),
922 (
923 CosoPrinciple::InternalCommunication,
924 "Internal Communication",
925 CosoComponent::InformationCommunication,
926 ),
927 (
928 CosoPrinciple::ExternalCommunication,
929 "External Communication",
930 CosoComponent::InformationCommunication,
931 ),
932 (
933 CosoPrinciple::OngoingMonitoring,
934 "Ongoing Monitoring",
935 CosoComponent::MonitoringActivities,
936 ),
937 (
938 CosoPrinciple::DeficiencyEvaluation,
939 "Deficiency Evaluation",
940 CosoComponent::MonitoringActivities,
941 ),
942 ];
943
944 for (principle, name, parent_component) in &principles {
945 let principle_id = format!("coso_prin_{}", name.replace(' ', "_").replace('&', "and"));
946 if self.try_add_node(HypergraphNode {
947 id: principle_id.clone(),
948 entity_type: "coso_principle".to_string(),
949 entity_type_code: type_codes::COSO_PRINCIPLE,
950 layer: HypergraphLayer::GovernanceControls,
951 external_id: format!("{principle:?}"),
952 label: name.to_string(),
953 properties: {
954 let mut p = HashMap::new();
955 p.insert(
956 "principle_number".to_string(),
957 Value::Number(principle.principle_number().into()),
958 );
959 p
960 },
961 features: vec![principle.principle_number() as f64],
962 is_anomaly: false,
963 anomaly_type: None,
964 is_aggregate: false,
965 aggregate_count: 0,
966 }) {
967 let comp_key = format!("{parent_component:?}");
969 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
970 self.edges.push(CrossLayerEdge {
971 source_id: principle_id,
972 source_layer: HypergraphLayer::GovernanceControls,
973 target_id: comp_id.clone(),
974 target_layer: HypergraphLayer::GovernanceControls,
975 edge_type: "CoversCosoPrinciple".to_string(),
976 edge_type_code: type_codes::COVERS_COSO_PRINCIPLE,
977 properties: HashMap::new(),
978 });
979 }
980 }
981 }
982 }
983
984 pub fn add_controls(&mut self, controls: &[InternalControl]) {
986 if !self.config.include_controls {
987 return;
988 }
989
990 for control in controls {
991 let node_id = format!("ctrl_{}", control.control_id);
992 if self.try_add_node(HypergraphNode {
993 id: node_id.clone(),
994 entity_type: "internal_control".to_string(),
995 entity_type_code: type_codes::INTERNAL_CONTROL,
996 layer: HypergraphLayer::GovernanceControls,
997 external_id: control.control_id.clone(),
998 label: control.control_name.clone(),
999 properties: {
1000 let mut p = HashMap::new();
1001 p.insert(
1002 "control_type".to_string(),
1003 Value::String(format!("{:?}", control.control_type)),
1004 );
1005 p.insert(
1006 "risk_level".to_string(),
1007 Value::String(format!("{:?}", control.risk_level)),
1008 );
1009 p.insert(
1010 "is_key_control".to_string(),
1011 Value::Bool(control.is_key_control),
1012 );
1013 p.insert(
1014 "maturity_level".to_string(),
1015 Value::String(format!("{:?}", control.maturity_level)),
1016 );
1017 p.insert(
1018 "description".to_string(),
1019 Value::String(control.description.clone()),
1020 );
1021 p.insert(
1022 "objective".to_string(),
1023 Value::String(control.objective.clone()),
1024 );
1025 p.insert(
1026 "frequency".to_string(),
1027 Value::String(format!("{}", control.frequency).to_lowercase()),
1028 );
1029 p.insert(
1030 "owner".to_string(),
1031 Value::String(format!("{}", control.owner_role)),
1032 );
1033 p.insert(
1034 "coso_component".to_string(),
1035 Value::String(format!("{:?}", control.coso_component)),
1036 );
1037 p.insert(
1038 "sox_assertion".to_string(),
1039 Value::String(format!("{:?}", control.sox_assertion)),
1040 );
1041 p.insert(
1042 "control_scope".to_string(),
1043 Value::String(format!("{:?}", control.control_scope)),
1044 );
1045 p
1046 },
1047 features: vec![
1048 if control.is_key_control { 1.0 } else { 0.0 },
1049 control.maturity_level.level() as f64 / 5.0,
1050 ],
1051 is_anomaly: false,
1052 anomaly_type: None,
1053 is_aggregate: false,
1054 aggregate_count: 0,
1055 }) {
1056 self.control_node_ids
1057 .insert(control.control_id.clone(), node_id.clone());
1058
1059 let comp_key = format!("{:?}", control.coso_component);
1061 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
1062 self.edges.push(CrossLayerEdge {
1063 source_id: node_id.clone(),
1064 source_layer: HypergraphLayer::GovernanceControls,
1065 target_id: comp_id.clone(),
1066 target_layer: HypergraphLayer::GovernanceControls,
1067 edge_type: "ImplementsControl".to_string(),
1068 edge_type_code: type_codes::IMPLEMENTS_CONTROL,
1069 properties: HashMap::new(),
1070 });
1071 }
1072
1073 if self.config.include_sox {
1075 let assertion_id = format!("sox_{:?}", control.sox_assertion).to_lowercase();
1076 if !self.node_index.contains_key(&assertion_id) {
1078 self.try_add_node(HypergraphNode {
1079 id: assertion_id.clone(),
1080 entity_type: "sox_assertion".to_string(),
1081 entity_type_code: type_codes::SOX_ASSERTION,
1082 layer: HypergraphLayer::GovernanceControls,
1083 external_id: format!("{:?}", control.sox_assertion),
1084 label: format!("{:?}", control.sox_assertion),
1085 properties: HashMap::new(),
1086 features: vec![],
1087 is_anomaly: false,
1088 anomaly_type: None,
1089 is_aggregate: false,
1090 aggregate_count: 0,
1091 });
1092 }
1093 self.edges.push(CrossLayerEdge {
1094 source_id: node_id,
1095 source_layer: HypergraphLayer::GovernanceControls,
1096 target_id: assertion_id,
1097 target_layer: HypergraphLayer::GovernanceControls,
1098 edge_type: "EnforcesAssertion".to_string(),
1099 edge_type_code: type_codes::ENFORCES_ASSERTION,
1100 properties: HashMap::new(),
1101 });
1102 }
1103 }
1104 }
1105 }
1106
1107 pub fn add_vendors(&mut self, vendors: &[Vendor]) {
1109 if !self.config.include_vendors {
1110 return;
1111 }
1112
1113 for vendor in vendors {
1114 let node_id = format!("vnd_{}", vendor.vendor_id);
1115 if self.try_add_node(HypergraphNode {
1116 id: node_id.clone(),
1117 entity_type: "vendor".to_string(),
1118 entity_type_code: type_codes::VENDOR,
1119 layer: HypergraphLayer::GovernanceControls,
1120 external_id: vendor.vendor_id.clone(),
1121 label: vendor.name.clone(),
1122 properties: {
1123 let mut p = HashMap::new();
1124 p.insert(
1125 "vendor_type".to_string(),
1126 Value::String(format!("{:?}", vendor.vendor_type)),
1127 );
1128 p.insert("country".to_string(), Value::String(vendor.country.clone()));
1129 p.insert("is_active".to_string(), Value::Bool(vendor.is_active));
1130 p
1131 },
1132 features: vec![if vendor.is_active { 1.0 } else { 0.0 }],
1133 is_anomaly: false,
1134 anomaly_type: None,
1135 is_aggregate: false,
1136 aggregate_count: 0,
1137 }) {
1138 self.vendor_node_ids
1139 .insert(vendor.vendor_id.clone(), node_id);
1140 }
1141 }
1142 }
1143
1144 pub fn add_customers(&mut self, customers: &[Customer]) {
1146 if !self.config.include_customers {
1147 return;
1148 }
1149
1150 for customer in customers {
1151 let node_id = format!("cust_{}", customer.customer_id);
1152 if self.try_add_node(HypergraphNode {
1153 id: node_id.clone(),
1154 entity_type: "customer".to_string(),
1155 entity_type_code: type_codes::CUSTOMER,
1156 layer: HypergraphLayer::GovernanceControls,
1157 external_id: customer.customer_id.clone(),
1158 label: customer.name.clone(),
1159 properties: {
1160 let mut p = HashMap::new();
1161 p.insert(
1162 "customer_type".to_string(),
1163 Value::String(format!("{:?}", customer.customer_type)),
1164 );
1165 p.insert(
1166 "country".to_string(),
1167 Value::String(customer.country.clone()),
1168 );
1169 p.insert(
1170 "credit_rating".to_string(),
1171 Value::String(format!("{:?}", customer.credit_rating)),
1172 );
1173 p
1174 },
1175 features: vec![if customer.is_active { 1.0 } else { 0.0 }],
1176 is_anomaly: false,
1177 anomaly_type: None,
1178 is_aggregate: false,
1179 aggregate_count: 0,
1180 }) {
1181 self.customer_node_ids
1182 .insert(customer.customer_id.clone(), node_id);
1183 }
1184 }
1185 }
1186
1187 pub fn add_employees(&mut self, employees: &[Employee]) {
1189 if !self.config.include_employees {
1190 return;
1191 }
1192
1193 for employee in employees {
1194 let node_id = format!("emp_{}", employee.employee_id);
1195 if self.try_add_node(HypergraphNode {
1196 id: node_id.clone(),
1197 entity_type: "employee".to_string(),
1198 entity_type_code: type_codes::EMPLOYEE,
1199 layer: HypergraphLayer::GovernanceControls,
1200 external_id: employee.employee_id.clone(),
1201 label: employee.display_name.clone(),
1202 properties: {
1203 let mut p = HashMap::new();
1204 p.insert(
1205 "persona".to_string(),
1206 Value::String(employee.persona.to_string()),
1207 );
1208 p.insert(
1209 "job_level".to_string(),
1210 Value::String(format!("{:?}", employee.job_level)),
1211 );
1212 p.insert(
1213 "company_code".to_string(),
1214 Value::String(employee.company_code.clone()),
1215 );
1216 p.insert("email".to_string(), Value::String(employee.email.clone()));
1217 p.insert(
1218 "department".to_string(),
1219 Value::String(employee.department_id.clone().unwrap_or_default()),
1220 );
1221 p.insert(
1222 "job_title".to_string(),
1223 Value::String(employee.job_title.clone()),
1224 );
1225 p.insert(
1226 "status".to_string(),
1227 Value::String(format!("{:?}", employee.status)),
1228 );
1229 p
1230 },
1231 features: vec![employee
1232 .approval_limit
1233 .to_string()
1234 .parse::<f64>()
1235 .unwrap_or(0.0)
1236 .ln_1p()],
1237 is_anomaly: false,
1238 anomaly_type: None,
1239 is_aggregate: false,
1240 aggregate_count: 0,
1241 }) {
1242 self.employee_node_ids
1243 .insert(employee.employee_id.clone(), node_id);
1244 }
1245 }
1246 }
1247
1248 pub fn add_materials(&mut self, materials: &[Material]) {
1250 for mat in materials {
1251 let node_id = format!("mat_{}", mat.material_id);
1252 self.try_add_node(HypergraphNode {
1253 id: node_id,
1254 entity_type: "material".to_string(),
1255 entity_type_code: type_codes::MATERIAL,
1256 layer: HypergraphLayer::AccountingNetwork,
1257 external_id: mat.material_id.clone(),
1258 label: format!("{} ({})", mat.description, mat.material_id),
1259 properties: {
1260 let mut p = HashMap::new();
1261 p.insert(
1262 "material_type".to_string(),
1263 Value::String(format!("{:?}", mat.material_type)),
1264 );
1265 p.insert(
1266 "material_group".to_string(),
1267 Value::String(format!("{:?}", mat.material_group)),
1268 );
1269 let cost: f64 = mat.standard_cost.to_string().parse().unwrap_or(0.0);
1270 p.insert("standard_cost".to_string(), serde_json::json!(cost));
1271 p
1272 },
1273 features: vec![mat
1274 .standard_cost
1275 .to_string()
1276 .parse::<f64>()
1277 .unwrap_or(0.0)
1278 .ln_1p()],
1279 is_anomaly: false,
1280 anomaly_type: None,
1281 is_aggregate: false,
1282 aggregate_count: 0,
1283 });
1284 }
1285 }
1286
1287 pub fn add_fixed_assets(&mut self, assets: &[FixedAsset]) {
1289 for asset in assets {
1290 let node_id = format!("fa_{}", asset.asset_id);
1291 self.try_add_node(HypergraphNode {
1292 id: node_id,
1293 entity_type: "fixed_asset".to_string(),
1294 entity_type_code: type_codes::FIXED_ASSET,
1295 layer: HypergraphLayer::AccountingNetwork,
1296 external_id: asset.asset_id.clone(),
1297 label: format!("{} ({})", asset.description, asset.asset_id),
1298 properties: {
1299 let mut p = HashMap::new();
1300 p.insert(
1301 "asset_class".to_string(),
1302 Value::String(format!("{:?}", asset.asset_class)),
1303 );
1304 p.insert(
1305 "company_code".to_string(),
1306 Value::String(asset.company_code.clone()),
1307 );
1308 if let Some(ref cc) = asset.cost_center {
1309 p.insert("cost_center".to_string(), Value::String(cc.clone()));
1310 }
1311 let cost: f64 = asset.acquisition_cost.to_string().parse().unwrap_or(0.0);
1312 p.insert("acquisition_cost".to_string(), serde_json::json!(cost));
1313 p
1314 },
1315 features: vec![asset
1316 .acquisition_cost
1317 .to_string()
1318 .parse::<f64>()
1319 .unwrap_or(0.0)
1320 .ln_1p()],
1321 is_anomaly: false,
1322 anomaly_type: None,
1323 is_aggregate: false,
1324 aggregate_count: 0,
1325 });
1326 }
1327 }
1328
1329 pub fn add_accounts(&mut self, coa: &ChartOfAccounts) {
1331 if !self.config.include_accounts {
1332 return;
1333 }
1334
1335 for account in &coa.accounts {
1336 let node_id = format!("acct_{}", account.account_number);
1337 if self.try_add_node(HypergraphNode {
1338 id: node_id.clone(),
1339 entity_type: "account".to_string(),
1340 entity_type_code: type_codes::ACCOUNT,
1341 layer: HypergraphLayer::AccountingNetwork,
1342 external_id: account.account_number.clone(),
1343 label: account.short_description.clone(),
1344 properties: {
1345 let mut p = HashMap::new();
1346 p.insert(
1347 "account_type".to_string(),
1348 Value::String(format!("{:?}", account.account_type)),
1349 );
1350 p.insert(
1351 "is_control_account".to_string(),
1352 Value::Bool(account.is_control_account),
1353 );
1354 p.insert("is_postable".to_string(), Value::Bool(account.is_postable));
1355 p
1356 },
1357 features: vec![
1358 account_type_feature(&account.account_type),
1359 if account.is_control_account { 1.0 } else { 0.0 },
1360 if account.normal_debit_balance {
1361 1.0
1362 } else {
1363 0.0
1364 },
1365 ],
1366 is_anomaly: false,
1367 anomaly_type: None,
1368 is_aggregate: false,
1369 aggregate_count: 0,
1370 }) {
1371 self.account_node_ids
1372 .insert(account.account_number.clone(), node_id);
1373 }
1374 }
1375 }
1376
1377 pub fn add_journal_entries_as_hyperedges(&mut self, entries: &[JournalEntry]) {
1381 if !self.config.je_as_hyperedges {
1382 return;
1383 }
1384
1385 for entry in entries {
1386 let mut participants = Vec::new();
1387
1388 for line in &entry.lines {
1389 let account_id = format!("acct_{}", line.gl_account);
1390
1391 if !self.node_index.contains_key(&account_id) {
1393 self.try_add_node(HypergraphNode {
1394 id: account_id.clone(),
1395 entity_type: "account".to_string(),
1396 entity_type_code: type_codes::ACCOUNT,
1397 layer: HypergraphLayer::AccountingNetwork,
1398 external_id: line.gl_account.clone(),
1399 label: line
1400 .account_description
1401 .clone()
1402 .unwrap_or_else(|| line.gl_account.clone()),
1403 properties: HashMap::new(),
1404 features: vec![],
1405 is_anomaly: false,
1406 anomaly_type: None,
1407 is_aggregate: false,
1408 aggregate_count: 0,
1409 });
1410 self.account_node_ids
1411 .insert(line.gl_account.clone(), account_id.clone());
1412 }
1413
1414 let amount: f64 = if !line.debit_amount.is_zero() {
1415 line.debit_amount.to_string().parse().unwrap_or(0.0)
1416 } else {
1417 line.credit_amount.to_string().parse().unwrap_or(0.0)
1418 };
1419
1420 let role = if !line.debit_amount.is_zero() {
1421 "debit"
1422 } else {
1423 "credit"
1424 };
1425
1426 participants.push(HyperedgeParticipant {
1427 node_id: account_id,
1428 role: role.to_string(),
1429 weight: Some(amount),
1430 });
1431 }
1432
1433 if participants.is_empty() {
1434 continue;
1435 }
1436
1437 let doc_id = entry.header.document_id.to_string();
1438 let subtype = entry
1439 .header
1440 .business_process
1441 .as_ref()
1442 .map(|bp| format!("{bp:?}"))
1443 .unwrap_or_else(|| "General".to_string());
1444
1445 self.hyperedges.push(Hyperedge {
1446 id: format!("je_{doc_id}"),
1447 hyperedge_type: "JournalEntry".to_string(),
1448 subtype,
1449 participants,
1450 layer: HypergraphLayer::AccountingNetwork,
1451 properties: {
1452 let mut p = HashMap::new();
1453 p.insert("document_id".to_string(), Value::String(doc_id));
1454 p.insert(
1455 "company_code".to_string(),
1456 Value::String(entry.header.company_code.clone()),
1457 );
1458 p.insert(
1459 "document_type".to_string(),
1460 Value::String(entry.header.document_type.clone()),
1461 );
1462 p.insert(
1463 "created_by".to_string(),
1464 Value::String(entry.header.created_by.clone()),
1465 );
1466 p
1467 },
1468 timestamp: Some(entry.header.posting_date),
1469 is_anomaly: entry.header.is_anomaly || entry.header.is_fraud,
1470 anomaly_type: entry
1471 .header
1472 .anomaly_type
1473 .clone()
1474 .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}"))),
1475 features: compute_je_features(entry),
1476 });
1477 }
1478 }
1479
1480 pub fn add_journal_entry_nodes(&mut self, entries: &[JournalEntry]) {
1486 for entry in entries {
1487 let node_id = format!("je_{}", entry.header.document_id);
1488 let total_amount: f64 = entry
1489 .lines
1490 .iter()
1491 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
1492 .sum();
1493
1494 let is_anomaly = entry.header.is_anomaly || entry.header.is_fraud;
1495 let anomaly_type = entry
1496 .header
1497 .anomaly_type
1498 .clone()
1499 .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}")));
1500
1501 self.try_add_node(HypergraphNode {
1502 id: node_id,
1503 entity_type: "journal_entry".to_string(),
1504 entity_type_code: type_codes::JOURNAL_ENTRY,
1505 layer: HypergraphLayer::AccountingNetwork,
1506 external_id: entry.header.document_id.to_string(),
1507 label: format!("JE-{}", entry.header.document_id),
1508 properties: {
1509 let mut p = HashMap::new();
1510 p.insert(
1511 "amount".into(),
1512 Value::Number(
1513 serde_json::Number::from_f64(total_amount)
1514 .unwrap_or_else(|| serde_json::Number::from(0)),
1515 ),
1516 );
1517 p.insert(
1518 "date".into(),
1519 Value::String(entry.header.posting_date.to_string()),
1520 );
1521 p.insert(
1522 "company_code".into(),
1523 Value::String(entry.header.company_code.clone()),
1524 );
1525 p.insert(
1526 "line_count".into(),
1527 Value::Number((entry.lines.len() as u64).into()),
1528 );
1529 p.insert("is_anomaly".into(), Value::Bool(is_anomaly));
1530 if let Some(ref at) = anomaly_type {
1531 p.insert("anomaly_type".into(), Value::String(at.clone()));
1532 }
1533 p
1534 },
1535 features: vec![total_amount / 100_000.0],
1536 is_anomaly,
1537 anomaly_type,
1538 is_aggregate: false,
1539 aggregate_count: 0,
1540 });
1541 }
1542 }
1543
1544 pub fn add_p2p_documents(
1548 &mut self,
1549 purchase_orders: &[datasynth_core::models::documents::PurchaseOrder],
1550 goods_receipts: &[datasynth_core::models::documents::GoodsReceipt],
1551 vendor_invoices: &[datasynth_core::models::documents::VendorInvoice],
1552 payments: &[datasynth_core::models::documents::Payment],
1553 ) {
1554 if !self.config.include_p2p {
1555 return;
1556 }
1557
1558 let mut vendor_doc_counts: HashMap<String, usize> = HashMap::new();
1560 for po in purchase_orders {
1561 *vendor_doc_counts.entry(po.vendor_id.clone()).or_insert(0) += 1;
1562 }
1563
1564 let threshold = self.config.docs_per_counterparty_threshold;
1565 let should_aggregate = matches!(
1566 self.config.aggregation_strategy,
1567 AggregationStrategy::PoolByCounterparty
1568 );
1569
1570 let vendors_needing_pools: Vec<String> = if should_aggregate {
1572 vendor_doc_counts
1573 .iter()
1574 .filter(|(_, count)| **count > threshold)
1575 .map(|(vid, _)| vid.clone())
1576 .collect()
1577 } else {
1578 Vec::new()
1579 };
1580
1581 for vendor_id in &vendors_needing_pools {
1583 let count = vendor_doc_counts[vendor_id];
1584 let pool_id = format!("pool_p2p_{vendor_id}");
1585 if self.try_add_node(HypergraphNode {
1586 id: pool_id.clone(),
1587 entity_type: "p2p_pool".to_string(),
1588 entity_type_code: type_codes::POOL_NODE,
1589 layer: HypergraphLayer::ProcessEvents,
1590 external_id: format!("pool_p2p_{vendor_id}"),
1591 label: format!("P2P Pool ({vendor_id}): {count} docs"),
1592 properties: {
1593 let mut p = HashMap::new();
1594 p.insert("vendor_id".to_string(), Value::String(vendor_id.clone()));
1595 p.insert("document_count".to_string(), Value::Number(count.into()));
1596 p
1597 },
1598 features: vec![count as f64],
1599 is_anomaly: false,
1600 anomaly_type: None,
1601 is_aggregate: true,
1602 aggregate_count: count,
1603 }) {
1604 self.doc_counterparty_links.push((
1605 pool_id,
1606 "vendor".to_string(),
1607 vendor_id.clone(),
1608 ));
1609 }
1610 self.aggregate_count += 1;
1611 }
1612
1613 for po in purchase_orders {
1615 if should_aggregate && vendors_needing_pools.contains(&po.vendor_id) {
1616 continue; }
1618
1619 let doc_id = &po.header.document_id;
1620 let node_id = format!("po_{doc_id}");
1621 if self.try_add_node(HypergraphNode {
1622 id: node_id.clone(),
1623 entity_type: "purchase_order".to_string(),
1624 entity_type_code: type_codes::PURCHASE_ORDER,
1625 layer: HypergraphLayer::ProcessEvents,
1626 external_id: doc_id.clone(),
1627 label: format!("PO {doc_id}"),
1628 properties: {
1629 let mut p = HashMap::new();
1630 p.insert("vendor_id".to_string(), Value::String(po.vendor_id.clone()));
1631 p.insert(
1632 "company_code".to_string(),
1633 Value::String(po.header.company_code.clone()),
1634 );
1635 p
1636 },
1637 features: vec![po
1638 .total_net_amount
1639 .to_string()
1640 .parse::<f64>()
1641 .unwrap_or(0.0)
1642 .ln_1p()],
1643 is_anomaly: false,
1644 anomaly_type: None,
1645 is_aggregate: false,
1646 aggregate_count: 0,
1647 }) {
1648 self.doc_counterparty_links.push((
1649 node_id,
1650 "vendor".to_string(),
1651 po.vendor_id.clone(),
1652 ));
1653 }
1654 }
1655
1656 for gr in goods_receipts {
1658 let vendor_id = gr.vendor_id.as_deref().unwrap_or("UNKNOWN");
1659 if should_aggregate && vendors_needing_pools.contains(&vendor_id.to_string()) {
1660 continue;
1661 }
1662 let doc_id = &gr.header.document_id;
1663 let node_id = format!("gr_{doc_id}");
1664 self.try_add_node(HypergraphNode {
1665 id: node_id,
1666 entity_type: "goods_receipt".to_string(),
1667 entity_type_code: type_codes::GOODS_RECEIPT,
1668 layer: HypergraphLayer::ProcessEvents,
1669 external_id: doc_id.clone(),
1670 label: format!("GR {doc_id}"),
1671 properties: {
1672 let mut p = HashMap::new();
1673 p.insert(
1674 "vendor_id".to_string(),
1675 Value::String(vendor_id.to_string()),
1676 );
1677 p
1678 },
1679 features: vec![gr
1680 .total_value
1681 .to_string()
1682 .parse::<f64>()
1683 .unwrap_or(0.0)
1684 .ln_1p()],
1685 is_anomaly: false,
1686 anomaly_type: None,
1687 is_aggregate: false,
1688 aggregate_count: 0,
1689 });
1690 }
1691
1692 for inv in vendor_invoices {
1694 if should_aggregate && vendors_needing_pools.contains(&inv.vendor_id) {
1695 continue;
1696 }
1697 let doc_id = &inv.header.document_id;
1698 let node_id = format!("vinv_{doc_id}");
1699 self.try_add_node(HypergraphNode {
1700 id: node_id,
1701 entity_type: "vendor_invoice".to_string(),
1702 entity_type_code: type_codes::VENDOR_INVOICE,
1703 layer: HypergraphLayer::ProcessEvents,
1704 external_id: doc_id.clone(),
1705 label: format!("VI {doc_id}"),
1706 properties: {
1707 let mut p = HashMap::new();
1708 p.insert(
1709 "vendor_id".to_string(),
1710 Value::String(inv.vendor_id.clone()),
1711 );
1712 p
1713 },
1714 features: vec![inv
1715 .payable_amount
1716 .to_string()
1717 .parse::<f64>()
1718 .unwrap_or(0.0)
1719 .ln_1p()],
1720 is_anomaly: false,
1721 anomaly_type: None,
1722 is_aggregate: false,
1723 aggregate_count: 0,
1724 });
1725 }
1726
1727 for pmt in payments {
1729 let doc_id = &pmt.header.document_id;
1730 let node_id = format!("pmt_{doc_id}");
1731 self.try_add_node(HypergraphNode {
1732 id: node_id,
1733 entity_type: "payment".to_string(),
1734 entity_type_code: type_codes::PAYMENT,
1735 layer: HypergraphLayer::ProcessEvents,
1736 external_id: doc_id.clone(),
1737 label: format!("PMT {doc_id}"),
1738 properties: HashMap::new(),
1739 features: vec![pmt.amount.to_string().parse::<f64>().unwrap_or(0.0).ln_1p()],
1740 is_anomaly: false,
1741 anomaly_type: None,
1742 is_aggregate: false,
1743 aggregate_count: 0,
1744 });
1745 }
1746 }
1747
1748 pub fn add_o2c_documents(
1750 &mut self,
1751 sales_orders: &[datasynth_core::models::documents::SalesOrder],
1752 deliveries: &[datasynth_core::models::documents::Delivery],
1753 customer_invoices: &[datasynth_core::models::documents::CustomerInvoice],
1754 ) {
1755 if !self.config.include_o2c {
1756 return;
1757 }
1758
1759 let mut customer_doc_counts: HashMap<String, usize> = HashMap::new();
1761 for so in sales_orders {
1762 *customer_doc_counts
1763 .entry(so.customer_id.clone())
1764 .or_insert(0) += 1;
1765 }
1766
1767 let threshold = self.config.docs_per_counterparty_threshold;
1768 let should_aggregate = matches!(
1769 self.config.aggregation_strategy,
1770 AggregationStrategy::PoolByCounterparty
1771 );
1772
1773 let customers_needing_pools: Vec<String> = if should_aggregate {
1774 customer_doc_counts
1775 .iter()
1776 .filter(|(_, count)| **count > threshold)
1777 .map(|(cid, _)| cid.clone())
1778 .collect()
1779 } else {
1780 Vec::new()
1781 };
1782
1783 for customer_id in &customers_needing_pools {
1785 let count = customer_doc_counts[customer_id];
1786 let pool_id = format!("pool_o2c_{customer_id}");
1787 if self.try_add_node(HypergraphNode {
1788 id: pool_id.clone(),
1789 entity_type: "o2c_pool".to_string(),
1790 entity_type_code: type_codes::POOL_NODE,
1791 layer: HypergraphLayer::ProcessEvents,
1792 external_id: format!("pool_o2c_{customer_id}"),
1793 label: format!("O2C Pool ({customer_id}): {count} docs"),
1794 properties: {
1795 let mut p = HashMap::new();
1796 p.insert(
1797 "customer_id".to_string(),
1798 Value::String(customer_id.clone()),
1799 );
1800 p.insert("document_count".to_string(), Value::Number(count.into()));
1801 p
1802 },
1803 features: vec![count as f64],
1804 is_anomaly: false,
1805 anomaly_type: None,
1806 is_aggregate: true,
1807 aggregate_count: count,
1808 }) {
1809 self.doc_counterparty_links.push((
1810 pool_id,
1811 "customer".to_string(),
1812 customer_id.clone(),
1813 ));
1814 }
1815 self.aggregate_count += 1;
1816 }
1817
1818 for so in sales_orders {
1819 if should_aggregate && customers_needing_pools.contains(&so.customer_id) {
1820 continue;
1821 }
1822 let doc_id = &so.header.document_id;
1823 let node_id = format!("so_{doc_id}");
1824 if self.try_add_node(HypergraphNode {
1825 id: node_id.clone(),
1826 entity_type: "sales_order".to_string(),
1827 entity_type_code: type_codes::SALES_ORDER,
1828 layer: HypergraphLayer::ProcessEvents,
1829 external_id: doc_id.clone(),
1830 label: format!("SO {doc_id}"),
1831 properties: {
1832 let mut p = HashMap::new();
1833 p.insert(
1834 "customer_id".to_string(),
1835 Value::String(so.customer_id.clone()),
1836 );
1837 p
1838 },
1839 features: vec![so
1840 .total_net_amount
1841 .to_string()
1842 .parse::<f64>()
1843 .unwrap_or(0.0)
1844 .ln_1p()],
1845 is_anomaly: false,
1846 anomaly_type: None,
1847 is_aggregate: false,
1848 aggregate_count: 0,
1849 }) {
1850 self.doc_counterparty_links.push((
1851 node_id,
1852 "customer".to_string(),
1853 so.customer_id.clone(),
1854 ));
1855 }
1856 }
1857
1858 for del in deliveries {
1859 if should_aggregate && customers_needing_pools.contains(&del.customer_id) {
1860 continue;
1861 }
1862 let doc_id = &del.header.document_id;
1863 let node_id = format!("del_{doc_id}");
1864 self.try_add_node(HypergraphNode {
1865 id: node_id,
1866 entity_type: "delivery".to_string(),
1867 entity_type_code: type_codes::DELIVERY,
1868 layer: HypergraphLayer::ProcessEvents,
1869 external_id: doc_id.clone(),
1870 label: format!("DEL {doc_id}"),
1871 properties: HashMap::new(),
1872 features: vec![],
1873 is_anomaly: false,
1874 anomaly_type: None,
1875 is_aggregate: false,
1876 aggregate_count: 0,
1877 });
1878 }
1879
1880 for inv in customer_invoices {
1881 if should_aggregate && customers_needing_pools.contains(&inv.customer_id) {
1882 continue;
1883 }
1884 let doc_id = &inv.header.document_id;
1885 let node_id = format!("cinv_{doc_id}");
1886 self.try_add_node(HypergraphNode {
1887 id: node_id,
1888 entity_type: "customer_invoice".to_string(),
1889 entity_type_code: type_codes::CUSTOMER_INVOICE,
1890 layer: HypergraphLayer::ProcessEvents,
1891 external_id: doc_id.clone(),
1892 label: format!("CI {doc_id}"),
1893 properties: HashMap::new(),
1894 features: vec![inv
1895 .total_gross_amount
1896 .to_string()
1897 .parse::<f64>()
1898 .unwrap_or(0.0)
1899 .ln_1p()],
1900 is_anomaly: false,
1901 anomaly_type: None,
1902 is_aggregate: false,
1903 aggregate_count: 0,
1904 });
1905 }
1906 }
1907
1908 pub fn add_s2c_documents(
1910 &mut self,
1911 projects: &[SourcingProject],
1912 qualifications: &[SupplierQualification],
1913 rfx_events: &[RfxEvent],
1914 bids: &[SupplierBid],
1915 evaluations: &[BidEvaluation],
1916 contracts: &[ProcurementContract],
1917 ) {
1918 if !self.config.include_s2c {
1919 return;
1920 }
1921 for p in projects {
1922 let node_id = format!("s2c_proj_{}", p.project_id);
1923 self.try_add_node(HypergraphNode {
1924 id: node_id,
1925 entity_type: "sourcing_project".into(),
1926 entity_type_code: type_codes::SOURCING_PROJECT,
1927 layer: HypergraphLayer::ProcessEvents,
1928 external_id: p.project_id.clone(),
1929 label: format!("SPRJ {}", p.project_id),
1930 properties: HashMap::new(),
1931 features: vec![p
1932 .estimated_annual_spend
1933 .to_string()
1934 .parse::<f64>()
1935 .unwrap_or(0.0)
1936 .ln_1p()],
1937 is_anomaly: false,
1938 anomaly_type: None,
1939 is_aggregate: false,
1940 aggregate_count: 0,
1941 });
1942 }
1943 for q in qualifications {
1944 let node_id = format!("s2c_qual_{}", q.qualification_id);
1945 self.try_add_node(HypergraphNode {
1946 id: node_id,
1947 entity_type: "supplier_qualification".into(),
1948 entity_type_code: type_codes::SUPPLIER_QUALIFICATION,
1949 layer: HypergraphLayer::ProcessEvents,
1950 external_id: q.qualification_id.clone(),
1951 label: format!("SQUAL {}", q.qualification_id),
1952 properties: HashMap::new(),
1953 features: vec![],
1954 is_anomaly: false,
1955 anomaly_type: None,
1956 is_aggregate: false,
1957 aggregate_count: 0,
1958 });
1959 }
1960 for r in rfx_events {
1961 let node_id = format!("s2c_rfx_{}", r.rfx_id);
1962 self.try_add_node(HypergraphNode {
1963 id: node_id,
1964 entity_type: "rfx_event".into(),
1965 entity_type_code: type_codes::RFX_EVENT,
1966 layer: HypergraphLayer::ProcessEvents,
1967 external_id: r.rfx_id.clone(),
1968 label: format!("RFX {}", r.rfx_id),
1969 properties: HashMap::new(),
1970 features: vec![],
1971 is_anomaly: false,
1972 anomaly_type: None,
1973 is_aggregate: false,
1974 aggregate_count: 0,
1975 });
1976 }
1977 for b in bids {
1978 let node_id = format!("s2c_bid_{}", b.bid_id);
1979 self.try_add_node(HypergraphNode {
1980 id: node_id,
1981 entity_type: "supplier_bid".into(),
1982 entity_type_code: type_codes::SUPPLIER_BID,
1983 layer: HypergraphLayer::ProcessEvents,
1984 external_id: b.bid_id.clone(),
1985 label: format!("BID {}", b.bid_id),
1986 properties: HashMap::new(),
1987 features: vec![b
1988 .total_amount
1989 .to_string()
1990 .parse::<f64>()
1991 .unwrap_or(0.0)
1992 .ln_1p()],
1993 is_anomaly: false,
1994 anomaly_type: None,
1995 is_aggregate: false,
1996 aggregate_count: 0,
1997 });
1998 }
1999 for e in evaluations {
2000 let node_id = format!("s2c_eval_{}", e.evaluation_id);
2001 self.try_add_node(HypergraphNode {
2002 id: node_id,
2003 entity_type: "bid_evaluation".into(),
2004 entity_type_code: type_codes::BID_EVALUATION,
2005 layer: HypergraphLayer::ProcessEvents,
2006 external_id: e.evaluation_id.clone(),
2007 label: format!("BEVAL {}", e.evaluation_id),
2008 properties: HashMap::new(),
2009 features: vec![],
2010 is_anomaly: false,
2011 anomaly_type: None,
2012 is_aggregate: false,
2013 aggregate_count: 0,
2014 });
2015 }
2016 for c in contracts {
2017 let node_id = format!("s2c_ctr_{}", c.contract_id);
2018 self.try_add_node(HypergraphNode {
2019 id: node_id,
2020 entity_type: "procurement_contract".into(),
2021 entity_type_code: type_codes::PROCUREMENT_CONTRACT,
2022 layer: HypergraphLayer::ProcessEvents,
2023 external_id: c.contract_id.clone(),
2024 label: format!("CTR {}", c.contract_id),
2025 properties: HashMap::new(),
2026 features: vec![c
2027 .total_value
2028 .to_string()
2029 .parse::<f64>()
2030 .unwrap_or(0.0)
2031 .ln_1p()],
2032 is_anomaly: false,
2033 anomaly_type: None,
2034 is_aggregate: false,
2035 aggregate_count: 0,
2036 });
2037 self.doc_counterparty_links.push((
2039 format!("s2c_ctr_{}", c.contract_id),
2040 "vendor".into(),
2041 c.vendor_id.clone(),
2042 ));
2043 }
2044 }
2045
2046 pub fn add_h2r_documents(
2048 &mut self,
2049 payroll_runs: &[PayrollRun],
2050 time_entries: &[TimeEntry],
2051 expense_reports: &[ExpenseReport],
2052 ) {
2053 if !self.config.include_h2r {
2054 return;
2055 }
2056 for pr in payroll_runs {
2057 let node_id = format!("h2r_pay_{}", pr.payroll_id);
2058 self.try_add_node(HypergraphNode {
2059 id: node_id,
2060 entity_type: "payroll_run".into(),
2061 entity_type_code: type_codes::PAYROLL_RUN,
2062 layer: HypergraphLayer::ProcessEvents,
2063 external_id: pr.payroll_id.clone(),
2064 label: format!("PAY {}", pr.payroll_id),
2065 properties: HashMap::new(),
2066 features: vec![pr
2067 .total_gross
2068 .to_string()
2069 .parse::<f64>()
2070 .unwrap_or(0.0)
2071 .ln_1p()],
2072 is_anomaly: false,
2073 anomaly_type: None,
2074 is_aggregate: false,
2075 aggregate_count: 0,
2076 });
2077 }
2078 for te in time_entries {
2079 let node_id = format!("h2r_time_{}", te.entry_id);
2080 self.try_add_node(HypergraphNode {
2081 id: node_id,
2082 entity_type: "time_entry".into(),
2083 entity_type_code: type_codes::TIME_ENTRY,
2084 layer: HypergraphLayer::ProcessEvents,
2085 external_id: te.entry_id.clone(),
2086 label: format!("TIME {}", te.entry_id),
2087 properties: HashMap::new(),
2088 features: vec![te.hours_regular + te.hours_overtime],
2089 is_anomaly: false,
2090 anomaly_type: None,
2091 is_aggregate: false,
2092 aggregate_count: 0,
2093 });
2094 }
2095 for er in expense_reports {
2096 let node_id = format!("h2r_exp_{}", er.report_id);
2097 self.try_add_node(HypergraphNode {
2098 id: node_id,
2099 entity_type: "expense_report".into(),
2100 entity_type_code: type_codes::EXPENSE_REPORT,
2101 layer: HypergraphLayer::ProcessEvents,
2102 external_id: er.report_id.clone(),
2103 label: format!("EXP {}", er.report_id),
2104 properties: HashMap::new(),
2105 features: vec![er
2106 .total_amount
2107 .to_string()
2108 .parse::<f64>()
2109 .unwrap_or(0.0)
2110 .ln_1p()],
2111 is_anomaly: false,
2112 anomaly_type: None,
2113 is_aggregate: false,
2114 aggregate_count: 0,
2115 });
2116 }
2117 }
2118
2119 pub fn add_mfg_documents(
2121 &mut self,
2122 production_orders: &[ProductionOrder],
2123 quality_inspections: &[QualityInspection],
2124 cycle_counts: &[CycleCount],
2125 ) {
2126 if !self.config.include_mfg {
2127 return;
2128 }
2129 for po in production_orders {
2130 let node_id = format!("mfg_po_{}", po.order_id);
2131 self.try_add_node(HypergraphNode {
2132 id: node_id,
2133 entity_type: "production_order".into(),
2134 entity_type_code: type_codes::PRODUCTION_ORDER,
2135 layer: HypergraphLayer::ProcessEvents,
2136 external_id: po.order_id.clone(),
2137 label: format!("PROD {}", po.order_id),
2138 properties: HashMap::new(),
2139 features: vec![po
2140 .planned_quantity
2141 .to_string()
2142 .parse::<f64>()
2143 .unwrap_or(0.0)
2144 .ln_1p()],
2145 is_anomaly: false,
2146 anomaly_type: None,
2147 is_aggregate: false,
2148 aggregate_count: 0,
2149 });
2150 }
2151 for qi in quality_inspections {
2152 let node_id = format!("mfg_qi_{}", qi.inspection_id);
2153 self.try_add_node(HypergraphNode {
2154 id: node_id,
2155 entity_type: "quality_inspection".into(),
2156 entity_type_code: type_codes::QUALITY_INSPECTION,
2157 layer: HypergraphLayer::ProcessEvents,
2158 external_id: qi.inspection_id.clone(),
2159 label: format!("QI {}", qi.inspection_id),
2160 properties: HashMap::new(),
2161 features: vec![qi.defect_rate],
2162 is_anomaly: false,
2163 anomaly_type: None,
2164 is_aggregate: false,
2165 aggregate_count: 0,
2166 });
2167 }
2168 for cc in cycle_counts {
2169 let node_id = format!("mfg_cc_{}", cc.count_id);
2170 self.try_add_node(HypergraphNode {
2171 id: node_id,
2172 entity_type: "cycle_count".into(),
2173 entity_type_code: type_codes::CYCLE_COUNT,
2174 layer: HypergraphLayer::ProcessEvents,
2175 external_id: cc.count_id.clone(),
2176 label: format!("CC {}", cc.count_id),
2177 properties: HashMap::new(),
2178 features: vec![cc.variance_rate],
2179 is_anomaly: false,
2180 anomaly_type: None,
2181 is_aggregate: false,
2182 aggregate_count: 0,
2183 });
2184 }
2185 }
2186
2187 pub fn add_bank_documents(
2189 &mut self,
2190 customers: &[BankingCustomer],
2191 accounts: &[BankAccount],
2192 transactions: &[BankTransaction],
2193 ) {
2194 if !self.config.include_bank {
2195 return;
2196 }
2197 for cust in customers {
2198 let cid = cust.customer_id.to_string();
2199 let node_id = format!("bank_cust_{cid}");
2200 self.try_add_node(HypergraphNode {
2201 id: node_id,
2202 entity_type: "banking_customer".into(),
2203 entity_type_code: type_codes::BANKING_CUSTOMER,
2204 layer: HypergraphLayer::ProcessEvents,
2205 external_id: cid,
2206 label: format!("BCUST {}", cust.customer_id),
2207 properties: {
2208 let mut p = HashMap::new();
2209 p.insert(
2210 "customer_type".into(),
2211 Value::String(format!("{:?}", cust.customer_type)),
2212 );
2213 p.insert("name".into(), Value::String(cust.name.legal_name.clone()));
2214 p.insert(
2215 "residence_country".into(),
2216 Value::String(cust.residence_country.clone()),
2217 );
2218 p.insert(
2219 "risk_tier".into(),
2220 Value::String(format!("{:?}", cust.risk_tier)),
2221 );
2222 p.insert("is_pep".into(), Value::Bool(cust.is_pep));
2223 p
2224 },
2225 features: vec![],
2226 is_anomaly: cust.is_mule,
2227 anomaly_type: if cust.is_mule {
2228 Some("mule_account".into())
2229 } else {
2230 None
2231 },
2232 is_aggregate: false,
2233 aggregate_count: 0,
2234 });
2235 }
2236 for acct in accounts {
2237 let aid = acct.account_id.to_string();
2238 let node_id = format!("bank_acct_{aid}");
2239 self.try_add_node(HypergraphNode {
2240 id: node_id,
2241 entity_type: "bank_account".into(),
2242 entity_type_code: type_codes::BANK_ACCOUNT,
2243 layer: HypergraphLayer::ProcessEvents,
2244 external_id: aid,
2245 label: format!("BACCT {}", acct.account_number),
2246 properties: {
2247 let mut p = HashMap::new();
2248 p.insert(
2249 "account_type".into(),
2250 Value::String(format!("{:?}", acct.account_type)),
2251 );
2252 p.insert("status".into(), Value::String(format!("{:?}", acct.status)));
2253 p.insert("currency".into(), Value::String(acct.currency.clone()));
2254 let balance: f64 = acct.current_balance.to_string().parse().unwrap_or(0.0);
2255 p.insert("balance".into(), serde_json::json!(balance));
2256 p.insert(
2257 "account_number".into(),
2258 Value::String(acct.account_number.clone()),
2259 );
2260 p
2261 },
2262 features: vec![acct
2263 .current_balance
2264 .to_string()
2265 .parse::<f64>()
2266 .unwrap_or(0.0)
2267 .ln_1p()],
2268 is_anomaly: acct.is_mule_account,
2269 anomaly_type: if acct.is_mule_account {
2270 Some("mule_account".into())
2271 } else {
2272 None
2273 },
2274 is_aggregate: false,
2275 aggregate_count: 0,
2276 });
2277 }
2278 for txn in transactions {
2279 let tid = txn.transaction_id.to_string();
2280 let node_id = format!("bank_txn_{tid}");
2281 self.try_add_node(HypergraphNode {
2282 id: node_id,
2283 entity_type: "bank_transaction".into(),
2284 entity_type_code: type_codes::BANK_TRANSACTION,
2285 layer: HypergraphLayer::ProcessEvents,
2286 external_id: tid,
2287 label: format!("BTXN {}", txn.reference),
2288 properties: {
2289 let mut p = HashMap::new();
2290 let amount: f64 = txn.amount.to_string().parse().unwrap_or(0.0);
2291 p.insert("amount".into(), serde_json::json!(amount));
2292 p.insert("currency".into(), Value::String(txn.currency.clone()));
2293 p.insert("reference".into(), Value::String(txn.reference.clone()));
2294 p.insert(
2295 "direction".into(),
2296 Value::String(format!("{:?}", txn.direction)),
2297 );
2298 p.insert(
2299 "channel".into(),
2300 Value::String(format!("{:?}", txn.channel)),
2301 );
2302 p.insert(
2303 "category".into(),
2304 Value::String(format!("{:?}", txn.category)),
2305 );
2306 p.insert(
2307 "transaction_type".into(),
2308 Value::String(txn.transaction_type.clone()),
2309 );
2310 p.insert("status".into(), Value::String(format!("{:?}", txn.status)));
2311 if txn.is_suspicious {
2312 p.insert("is_suspicious".into(), Value::Bool(true));
2313 if let Some(ref reason) = txn.suspicion_reason {
2314 p.insert(
2315 "suspicion_reason".into(),
2316 Value::String(format!("{reason:?}")),
2317 );
2318 }
2319 if let Some(ref stage) = txn.laundering_stage {
2320 p.insert(
2321 "laundering_stage".into(),
2322 Value::String(format!("{stage:?}")),
2323 );
2324 }
2325 }
2326 p
2327 },
2328 features: vec![txn
2329 .amount
2330 .to_string()
2331 .parse::<f64>()
2332 .unwrap_or(0.0)
2333 .abs()
2334 .ln_1p()],
2335 is_anomaly: txn.is_suspicious,
2336 anomaly_type: txn.suspicion_reason.as_ref().map(|r| format!("{r:?}")),
2337 is_aggregate: false,
2338 aggregate_count: 0,
2339 });
2340 }
2341 }
2342
2343 #[allow(clippy::too_many_arguments)]
2345 pub fn add_audit_documents(
2346 &mut self,
2347 engagements: &[AuditEngagement],
2348 workpapers: &[Workpaper],
2349 findings: &[AuditFinding],
2350 evidence: &[AuditEvidence],
2351 risks: &[RiskAssessment],
2352 judgments: &[ProfessionalJudgment],
2353 materiality: &[MaterialityCalculation],
2354 opinions: &[AuditOpinion],
2355 going_concern: &[GoingConcernAssessment],
2356 ) {
2357 if !self.config.include_audit {
2358 return;
2359 }
2360 for eng in engagements {
2361 let eid = eng.engagement_id.to_string();
2362 let node_id = format!("audit_eng_{eid}");
2363 self.try_add_node(HypergraphNode {
2364 id: node_id,
2365 entity_type: "audit_engagement".into(),
2366 entity_type_code: type_codes::AUDIT_ENGAGEMENT,
2367 layer: HypergraphLayer::ProcessEvents,
2368 external_id: eid,
2369 label: format!("AENG {}", eng.engagement_ref),
2370 properties: {
2371 let mut p = HashMap::new();
2372 p.insert(
2373 "engagement_ref".into(),
2374 Value::String(eng.engagement_ref.clone()),
2375 );
2376 p.insert("status".into(), Value::String(format!("{:?}", eng.status)));
2377 p.insert(
2378 "engagement_type".into(),
2379 Value::String(format!("{:?}", eng.engagement_type)),
2380 );
2381 p.insert("client_name".into(), Value::String(eng.client_name.clone()));
2382 p.insert("fiscal_year".into(), serde_json::json!(eng.fiscal_year));
2383 let mat: f64 = eng.materiality.to_string().parse().unwrap_or(0.0);
2384 p.insert("materiality".into(), serde_json::json!(mat));
2385 p.insert(
2386 "fieldwork_start".into(),
2387 Value::String(eng.fieldwork_start.to_string()),
2388 );
2389 p.insert(
2390 "fieldwork_end".into(),
2391 Value::String(eng.fieldwork_end.to_string()),
2392 );
2393 p
2394 },
2395 features: vec![eng
2396 .materiality
2397 .to_string()
2398 .parse::<f64>()
2399 .unwrap_or(0.0)
2400 .ln_1p()],
2401 is_anomaly: false,
2402 anomaly_type: None,
2403 is_aggregate: false,
2404 aggregate_count: 0,
2405 });
2406 }
2407 for wp in workpapers {
2408 let wid = wp.workpaper_id.to_string();
2409 let node_id = format!("audit_wp_{wid}");
2410 self.try_add_node(HypergraphNode {
2411 id: node_id,
2412 entity_type: "workpaper".into(),
2413 entity_type_code: type_codes::WORKPAPER,
2414 layer: HypergraphLayer::ProcessEvents,
2415 external_id: wid,
2416 label: format!("WP {}", wp.workpaper_ref),
2417 properties: {
2418 let mut p = HashMap::new();
2419 p.insert(
2420 "workpaper_ref".into(),
2421 Value::String(wp.workpaper_ref.clone()),
2422 );
2423 p.insert("title".into(), Value::String(wp.title.clone()));
2424 p.insert("status".into(), Value::String(format!("{:?}", wp.status)));
2425 p.insert("section".into(), Value::String(format!("{:?}", wp.section)));
2426 p
2427 },
2428 features: vec![],
2429 is_anomaly: false,
2430 anomaly_type: None,
2431 is_aggregate: false,
2432 aggregate_count: 0,
2433 });
2434 }
2435 for f in findings {
2436 let fid = f.finding_id.to_string();
2437 let node_id = format!("audit_find_{fid}");
2438 self.try_add_node(HypergraphNode {
2439 id: node_id,
2440 entity_type: "audit_finding".into(),
2441 entity_type_code: type_codes::AUDIT_FINDING,
2442 layer: HypergraphLayer::ProcessEvents,
2443 external_id: fid,
2444 label: format!("AFIND {}", f.finding_ref),
2445 properties: {
2446 let mut p = HashMap::new();
2447 p.insert("finding_ref".into(), Value::String(f.finding_ref.clone()));
2448 p.insert("title".into(), Value::String(f.title.clone()));
2449 p.insert("description".into(), Value::String(f.condition.clone()));
2450 p.insert(
2451 "severity".into(),
2452 Value::String(format!("{:?}", f.severity)),
2453 );
2454 p.insert("status".into(), Value::String(format!("{:?}", f.status)));
2455 p.insert(
2456 "finding_type".into(),
2457 Value::String(format!("{:?}", f.finding_type)),
2458 );
2459 p
2460 },
2461 features: vec![f.severity.score() as f64 / 5.0],
2462 is_anomaly: false,
2463 anomaly_type: None,
2464 is_aggregate: false,
2465 aggregate_count: 0,
2466 });
2467 }
2468 for ev in evidence {
2469 let evid = ev.evidence_id.to_string();
2470 let node_id = format!("audit_ev_{evid}");
2471 self.try_add_node(HypergraphNode {
2472 id: node_id,
2473 entity_type: "audit_evidence".into(),
2474 entity_type_code: type_codes::AUDIT_EVIDENCE,
2475 layer: HypergraphLayer::ProcessEvents,
2476 external_id: evid,
2477 label: format!("AEV {}", ev.evidence_id),
2478 properties: {
2479 let mut p = HashMap::new();
2480 p.insert(
2481 "evidence_type".into(),
2482 Value::String(format!("{:?}", ev.evidence_type)),
2483 );
2484 p.insert("description".into(), Value::String(ev.description.clone()));
2485 p.insert(
2486 "source_type".into(),
2487 Value::String(format!("{:?}", ev.source_type)),
2488 );
2489 p.insert(
2490 "reliability".into(),
2491 Value::String(format!(
2492 "{:?}",
2493 ev.reliability_assessment.overall_reliability
2494 )),
2495 );
2496 p
2497 },
2498 features: vec![ev.reliability_assessment.overall_reliability.score() as f64 / 3.0],
2499 is_anomaly: false,
2500 anomaly_type: None,
2501 is_aggregate: false,
2502 aggregate_count: 0,
2503 });
2504 }
2505 for r in risks {
2506 let rid = r.risk_id.to_string();
2507 let node_id = format!("audit_risk_{rid}");
2508 self.try_add_node(HypergraphNode {
2509 id: node_id,
2510 entity_type: "risk_assessment".into(),
2511 entity_type_code: type_codes::RISK_ASSESSMENT,
2512 layer: HypergraphLayer::ProcessEvents,
2513 external_id: rid,
2514 label: format!("ARISK {}", r.risk_ref),
2515 properties: {
2516 let mut p = HashMap::new();
2517 p.insert("risk_ref".into(), Value::String(r.risk_ref.clone()));
2518 p.insert(
2519 "account_or_process".into(),
2520 Value::String(r.account_or_process.clone()),
2521 );
2522 p.insert(
2523 "response_nature".into(),
2524 Value::String(format!("{:?}", r.response_nature)),
2525 );
2526 p
2527 },
2528 features: vec![
2529 r.inherent_risk.score() as f64 / 4.0,
2530 r.control_risk.score() as f64 / 4.0,
2531 if r.is_significant_risk { 1.0 } else { 0.0 },
2532 ],
2533 is_anomaly: false,
2534 anomaly_type: None,
2535 is_aggregate: false,
2536 aggregate_count: 0,
2537 });
2538 }
2539 for j in judgments {
2540 let jid = j.judgment_id.to_string();
2541 let node_id = format!("audit_judg_{jid}");
2542 self.try_add_node(HypergraphNode {
2543 id: node_id,
2544 entity_type: "professional_judgment".into(),
2545 entity_type_code: type_codes::PROFESSIONAL_JUDGMENT,
2546 layer: HypergraphLayer::ProcessEvents,
2547 external_id: jid,
2548 label: format!("AJUDG {}", j.judgment_id),
2549 properties: {
2550 let mut p = HashMap::new();
2551 p.insert("judgment_ref".into(), Value::String(j.judgment_ref.clone()));
2552 p.insert("subject".into(), Value::String(j.subject.clone()));
2553 p.insert(
2554 "description".into(),
2555 Value::String(j.issue_description.clone()),
2556 );
2557 p.insert("conclusion".into(), Value::String(j.conclusion.clone()));
2558 p.insert(
2559 "judgment_type".into(),
2560 Value::String(format!("{:?}", j.judgment_type)),
2561 );
2562 p
2563 },
2564 features: vec![],
2565 is_anomaly: false,
2566 anomaly_type: None,
2567 is_aggregate: false,
2568 aggregate_count: 0,
2569 });
2570 }
2571
2572 for m in materiality {
2574 let node_id = format!("audit_mat_{}_{}", m.entity_code, m.period);
2575 self.try_add_node(HypergraphNode {
2576 id: node_id.clone(),
2577 entity_type: "materiality_calculation".into(),
2578 entity_type_code: type_codes::MATERIALITY_CALCULATION,
2579 layer: HypergraphLayer::ProcessEvents,
2580 external_id: format!("{}_{}", m.entity_code, m.period),
2581 label: format!("MAT {} {}", m.entity_code, m.period),
2582 properties: {
2583 let mut p = HashMap::new();
2584 p.insert("entity_code".into(), Value::String(m.entity_code.clone()));
2585 p.insert("period".into(), Value::String(m.period.clone()));
2586 p.insert(
2587 "benchmark".into(),
2588 Value::String(format!("{:?}", m.benchmark)),
2589 );
2590 let mat: f64 = m.overall_materiality.to_string().parse().unwrap_or(0.0);
2591 p.insert("overall_materiality".into(), serde_json::json!(mat));
2592 let perf: f64 = m.performance_materiality.to_string().parse().unwrap_or(0.0);
2593 p.insert("performance_materiality".into(), serde_json::json!(perf));
2594 p
2595 },
2596 features: vec![m
2597 .overall_materiality
2598 .to_string()
2599 .parse::<f64>()
2600 .unwrap_or(0.0)
2601 .ln_1p()],
2602 is_anomaly: false,
2603 anomaly_type: None,
2604 is_aggregate: false,
2605 aggregate_count: 0,
2606 });
2607 }
2608
2609 for op in opinions {
2611 let oid = op.opinion_id.to_string();
2612 let node_id = format!("audit_op_{oid}");
2613 let added = self.try_add_node(HypergraphNode {
2614 id: node_id.clone(),
2615 entity_type: "audit_opinion".into(),
2616 entity_type_code: type_codes::AUDIT_OPINION,
2617 layer: HypergraphLayer::ProcessEvents,
2618 external_id: oid,
2619 label: format!("AOPN {}", op.opinion_type),
2620 properties: {
2621 let mut p = HashMap::new();
2622 p.insert(
2623 "opinion_type".into(),
2624 Value::String(format!("{}", op.opinion_type)),
2625 );
2626 p.insert("entity_name".into(), Value::String(op.entity_name.clone()));
2627 p.insert(
2628 "opinion_date".into(),
2629 Value::String(op.opinion_date.to_string()),
2630 );
2631 p.insert(
2632 "material_uncertainty_gc".into(),
2633 serde_json::json!(op.material_uncertainty_going_concern),
2634 );
2635 p.insert(
2636 "kam_count".into(),
2637 serde_json::json!(op.key_audit_matters.len()),
2638 );
2639 p
2640 },
2641 features: vec![if op.is_unmodified() { 0.0 } else { 1.0 }],
2642 is_anomaly: op.is_modified(),
2643 anomaly_type: if op.is_modified() {
2644 Some("modified_opinion".into())
2645 } else {
2646 None
2647 },
2648 is_aggregate: false,
2649 aggregate_count: 0,
2650 });
2651 if added {
2652 self.edges.push(CrossLayerEdge {
2654 source_id: node_id.clone(),
2655 source_layer: HypergraphLayer::ProcessEvents,
2656 target_id: format!("audit_eng_{}", op.engagement_id),
2657 target_layer: HypergraphLayer::ProcessEvents,
2658 edge_type: "OPINION_FOR_ENGAGEMENT".into(),
2659 edge_type_code: type_codes::OPINION_FOR_ENGAGEMENT,
2660 properties: HashMap::new(),
2661 });
2662 }
2663 }
2664
2665 for gc in going_concern {
2667 let node_id = format!("audit_gc_{}_{}", gc.entity_code, gc.assessment_period);
2668 self.try_add_node(HypergraphNode {
2669 id: node_id,
2670 entity_type: "going_concern_assessment".into(),
2671 entity_type_code: type_codes::GOING_CONCERN_ASSESSMENT,
2672 layer: HypergraphLayer::ProcessEvents,
2673 external_id: format!("{}_{}", gc.entity_code, gc.assessment_period),
2674 label: format!("GC {} {}", gc.entity_code, gc.assessment_period),
2675 properties: {
2676 let mut p = HashMap::new();
2677 p.insert("entity_code".into(), Value::String(gc.entity_code.clone()));
2678 p.insert(
2679 "assessment_period".into(),
2680 Value::String(gc.assessment_period.clone()),
2681 );
2682 p.insert(
2683 "conclusion".into(),
2684 Value::String(format!("{:?}", gc.auditor_conclusion)),
2685 );
2686 p.insert(
2687 "material_uncertainty".into(),
2688 serde_json::json!(gc.material_uncertainty_exists),
2689 );
2690 p.insert(
2691 "indicator_count".into(),
2692 serde_json::json!(gc.indicators.len()),
2693 );
2694 p
2695 },
2696 features: vec![
2697 gc.indicators.len() as f64,
2698 if gc.material_uncertainty_exists {
2699 1.0
2700 } else {
2701 0.0
2702 },
2703 ],
2704 is_anomaly: gc.material_uncertainty_exists,
2705 anomaly_type: if gc.material_uncertainty_exists {
2706 Some("going_concern_uncertainty".into())
2707 } else {
2708 None
2709 },
2710 is_aggregate: false,
2711 aggregate_count: 0,
2712 });
2713 }
2714
2715 for wp in workpapers {
2719 self.edges.push(CrossLayerEdge {
2720 source_id: format!("audit_eng_{}", wp.engagement_id),
2721 source_layer: HypergraphLayer::ProcessEvents,
2722 target_id: format!("audit_wp_{}", wp.workpaper_id),
2723 target_layer: HypergraphLayer::ProcessEvents,
2724 edge_type: "DOCUMENTED_BY".into(),
2725 edge_type_code: type_codes::DOCUMENTED_BY,
2726 properties: HashMap::new(),
2727 });
2728 }
2729
2730 for f in findings {
2732 if let Some(ref risk_id) = f.related_risk_id {
2733 self.edges.push(CrossLayerEdge {
2734 source_id: format!("audit_find_{}", f.finding_id),
2735 source_layer: HypergraphLayer::ProcessEvents,
2736 target_id: format!("audit_risk_{risk_id}"),
2737 target_layer: HypergraphLayer::ProcessEvents,
2738 edge_type: "IDENTIFIED_FROM".into(),
2739 edge_type_code: type_codes::IDENTIFIED_FROM,
2740 properties: HashMap::new(),
2741 });
2742 }
2743 }
2744
2745 for op in opinions {
2747 for f in findings {
2748 if f.engagement_id == op.engagement_id {
2749 self.edges.push(CrossLayerEdge {
2750 source_id: format!("audit_op_{}", op.opinion_id),
2751 source_layer: HypergraphLayer::ProcessEvents,
2752 target_id: format!("audit_find_{}", f.finding_id),
2753 target_layer: HypergraphLayer::ProcessEvents,
2754 edge_type: "OPINION_BASED_ON".into(),
2755 edge_type_code: type_codes::OPINION_BASED_ON,
2756 properties: HashMap::new(),
2757 });
2758 }
2759 }
2760 }
2761 }
2762
2763 #[allow(clippy::too_many_arguments)]
2772 pub fn add_audit_procedure_entities(
2773 &mut self,
2774 confirmations: &[ExternalConfirmation],
2775 responses: &[ConfirmationResponse],
2776 steps: &[AuditProcedureStep],
2777 samples: &[AuditSample],
2778 analytical_results: &[AnalyticalProcedureResult],
2779 ia_functions: &[InternalAuditFunction],
2780 ia_reports: &[InternalAuditReport],
2781 related_parties: &[RelatedParty],
2782 rp_transactions: &[RelatedPartyTransaction],
2783 ) {
2784 if !self.config.include_audit {
2785 return;
2786 }
2787
2788 for conf in confirmations {
2790 let ext_id = conf.confirmation_id.to_string();
2791 let node_id = format!("audit_conf_{ext_id}");
2792 let added = self.try_add_node(HypergraphNode {
2793 id: node_id.clone(),
2794 entity_type: "external_confirmation".into(),
2795 entity_type_code: type_codes::EXTERNAL_CONFIRMATION,
2796 layer: HypergraphLayer::GovernanceControls,
2797 external_id: ext_id.clone(),
2798 label: format!("CONF {}", conf.confirmation_ref),
2799 properties: {
2800 let mut p = HashMap::new();
2801 p.insert(
2802 "entity_id".into(),
2803 Value::String(conf.confirmation_ref.clone()),
2804 );
2805 p.insert("process_family".into(), Value::String("AUDIT".into()));
2806 p
2807 },
2808 features: vec![],
2809 is_anomaly: false,
2810 anomaly_type: None,
2811 is_aggregate: false,
2812 aggregate_count: 0,
2813 });
2814 if added {
2815 if let Some(wp_id) = &conf.workpaper_id {
2816 self.edges.push(CrossLayerEdge {
2817 source_id: node_id.clone(),
2818 source_layer: HypergraphLayer::GovernanceControls,
2819 target_id: format!("audit_wp_{wp_id}"),
2820 target_layer: HypergraphLayer::ProcessEvents,
2821 edge_type: "CONFIRMATION_IN_WORKPAPER".into(),
2822 edge_type_code: type_codes::CONFIRMATION_IN_WORKPAPER,
2823 properties: HashMap::new(),
2824 });
2825 }
2826 if let Some(acct_id) = &conf.account_id {
2827 self.edges.push(CrossLayerEdge {
2828 source_id: node_id,
2829 source_layer: HypergraphLayer::GovernanceControls,
2830 target_id: format!("acct_{acct_id}"),
2831 target_layer: HypergraphLayer::AccountingNetwork,
2832 edge_type: "CONFIRMATION_FOR_ACCOUNT".into(),
2833 edge_type_code: type_codes::CONFIRMATION_FOR_ACCOUNT,
2834 properties: HashMap::new(),
2835 });
2836 }
2837 }
2838 }
2839
2840 for resp in responses {
2842 let ext_id = resp.response_id.to_string();
2843 let node_id = format!("audit_resp_{ext_id}");
2844 let added = self.try_add_node(HypergraphNode {
2845 id: node_id.clone(),
2846 entity_type: "confirmation_response".into(),
2847 entity_type_code: type_codes::CONFIRMATION_RESPONSE,
2848 layer: HypergraphLayer::GovernanceControls,
2849 external_id: ext_id.clone(),
2850 label: format!("RESP {}", resp.response_ref),
2851 properties: {
2852 let mut p = HashMap::new();
2853 p.insert("entity_id".into(), Value::String(resp.response_ref.clone()));
2854 p.insert("process_family".into(), Value::String("AUDIT".into()));
2855 p
2856 },
2857 features: vec![],
2858 is_anomaly: false,
2859 anomaly_type: None,
2860 is_aggregate: false,
2861 aggregate_count: 0,
2862 });
2863 if added {
2864 self.edges.push(CrossLayerEdge {
2865 source_id: node_id,
2866 source_layer: HypergraphLayer::GovernanceControls,
2867 target_id: format!("audit_conf_{}", resp.confirmation_id),
2868 target_layer: HypergraphLayer::GovernanceControls,
2869 edge_type: "CONFIRMATION_RESPONSE".into(),
2870 edge_type_code: type_codes::CONFIRMATION_RESPONSE_EDGE,
2871 properties: HashMap::new(),
2872 });
2873 }
2874 }
2875
2876 for step in steps {
2878 let ext_id = step.step_id.to_string();
2879 let node_id = format!("audit_step_{ext_id}");
2880 let added = self.try_add_node(HypergraphNode {
2881 id: node_id.clone(),
2882 entity_type: "audit_procedure_step".into(),
2883 entity_type_code: type_codes::AUDIT_PROCEDURE_STEP,
2884 layer: HypergraphLayer::GovernanceControls,
2885 external_id: ext_id.clone(),
2886 label: format!("STEP {}", step.step_ref),
2887 properties: {
2888 let mut p = HashMap::new();
2889 p.insert("entity_id".into(), Value::String(step.step_ref.clone()));
2890 p.insert("process_family".into(), Value::String("AUDIT".into()));
2891 p
2892 },
2893 features: vec![],
2894 is_anomaly: false,
2895 anomaly_type: None,
2896 is_aggregate: false,
2897 aggregate_count: 0,
2898 });
2899 if added {
2900 self.edges.push(CrossLayerEdge {
2901 source_id: node_id.clone(),
2902 source_layer: HypergraphLayer::GovernanceControls,
2903 target_id: format!("audit_wp_{}", step.workpaper_id),
2904 target_layer: HypergraphLayer::ProcessEvents,
2905 edge_type: "STEP_IN_WORKPAPER".into(),
2906 edge_type_code: type_codes::STEP_IN_WORKPAPER,
2907 properties: HashMap::new(),
2908 });
2909 if let Some(sid) = &step.sample_id {
2910 self.edges.push(CrossLayerEdge {
2911 source_id: node_id.clone(),
2912 source_layer: HypergraphLayer::GovernanceControls,
2913 target_id: format!("audit_samp_{sid}"),
2914 target_layer: HypergraphLayer::GovernanceControls,
2915 edge_type: "STEP_USES_SAMPLE".into(),
2916 edge_type_code: type_codes::STEP_USES_SAMPLE,
2917 properties: HashMap::new(),
2918 });
2919 }
2920 for eid in &step.evidence_ids {
2921 self.edges.push(CrossLayerEdge {
2922 source_id: node_id.clone(),
2923 source_layer: HypergraphLayer::GovernanceControls,
2924 target_id: format!("audit_ev_{eid}"),
2925 target_layer: HypergraphLayer::ProcessEvents,
2926 edge_type: "STEP_EVIDENCE".into(),
2927 edge_type_code: type_codes::STEP_EVIDENCE,
2928 properties: HashMap::new(),
2929 });
2930 }
2931 }
2932 }
2933
2934 for sample in samples {
2936 let ext_id = sample.sample_id.to_string();
2937 let node_id = format!("audit_samp_{ext_id}");
2938 let added = self.try_add_node(HypergraphNode {
2939 id: node_id.clone(),
2940 entity_type: "audit_sample".into(),
2941 entity_type_code: type_codes::AUDIT_SAMPLE,
2942 layer: HypergraphLayer::GovernanceControls,
2943 external_id: ext_id.clone(),
2944 label: format!("SAMP {}", sample.sample_ref),
2945 properties: {
2946 let mut p = HashMap::new();
2947 p.insert("entity_id".into(), Value::String(sample.sample_ref.clone()));
2948 p.insert("process_family".into(), Value::String("AUDIT".into()));
2949 p
2950 },
2951 features: vec![],
2952 is_anomaly: false,
2953 anomaly_type: None,
2954 is_aggregate: false,
2955 aggregate_count: 0,
2956 });
2957 if added {
2958 self.edges.push(CrossLayerEdge {
2959 source_id: node_id,
2960 source_layer: HypergraphLayer::GovernanceControls,
2961 target_id: format!("audit_wp_{}", sample.workpaper_id),
2962 target_layer: HypergraphLayer::ProcessEvents,
2963 edge_type: "SAMPLE_FROM_WORKPAPER".into(),
2964 edge_type_code: type_codes::SAMPLE_FROM_WORKPAPER,
2965 properties: HashMap::new(),
2966 });
2967 }
2968 }
2969
2970 for ap in analytical_results {
2972 let ext_id = ap.result_id.to_string();
2973 let node_id = format!("audit_ap_{ext_id}");
2974 let added = self.try_add_node(HypergraphNode {
2975 id: node_id.clone(),
2976 entity_type: "analytical_procedure_result".into(),
2977 entity_type_code: type_codes::ANALYTICAL_PROCEDURE_RESULT,
2978 layer: HypergraphLayer::GovernanceControls,
2979 external_id: ext_id.clone(),
2980 label: format!("AP {}", ap.result_ref),
2981 properties: {
2982 let mut p = HashMap::new();
2983 p.insert("entity_id".into(), Value::String(ap.result_ref.clone()));
2984 p.insert("process_family".into(), Value::String("AUDIT".into()));
2985 p
2986 },
2987 features: vec![ap.variance_percentage.abs().ln_1p()],
2988 is_anomaly: ap.requires_investigation,
2989 anomaly_type: if ap.requires_investigation {
2990 Some("analytical_variance".into())
2991 } else {
2992 None
2993 },
2994 is_aggregate: false,
2995 aggregate_count: 0,
2996 });
2997 if added {
2998 if let Some(wp_id) = &ap.workpaper_id {
2999 self.edges.push(CrossLayerEdge {
3000 source_id: node_id.clone(),
3001 source_layer: HypergraphLayer::GovernanceControls,
3002 target_id: format!("audit_wp_{wp_id}"),
3003 target_layer: HypergraphLayer::ProcessEvents,
3004 edge_type: "AP_IN_WORKPAPER".into(),
3005 edge_type_code: type_codes::AP_IN_WORKPAPER,
3006 properties: HashMap::new(),
3007 });
3008 }
3009 if let Some(acct_id) = &ap.account_id {
3010 self.edges.push(CrossLayerEdge {
3011 source_id: node_id,
3012 source_layer: HypergraphLayer::GovernanceControls,
3013 target_id: format!("acct_{acct_id}"),
3014 target_layer: HypergraphLayer::AccountingNetwork,
3015 edge_type: "AP_FOR_ACCOUNT".into(),
3016 edge_type_code: type_codes::AP_FOR_ACCOUNT,
3017 properties: HashMap::new(),
3018 });
3019 }
3020 }
3021 }
3022
3023 for iaf in ia_functions {
3025 let ext_id = iaf.function_id.to_string();
3026 let node_id = format!("audit_iaf_{ext_id}");
3027 let added = self.try_add_node(HypergraphNode {
3028 id: node_id.clone(),
3029 entity_type: "internal_audit_function".into(),
3030 entity_type_code: type_codes::INTERNAL_AUDIT_FUNCTION,
3031 layer: HypergraphLayer::GovernanceControls,
3032 external_id: ext_id.clone(),
3033 label: format!("IAF {}", iaf.function_ref),
3034 properties: {
3035 let mut p = HashMap::new();
3036 p.insert("entity_id".into(), Value::String(iaf.function_ref.clone()));
3037 p.insert("process_family".into(), Value::String("AUDIT".into()));
3038 p
3039 },
3040 features: vec![iaf.annual_plan_coverage],
3041 is_anomaly: false,
3042 anomaly_type: None,
3043 is_aggregate: false,
3044 aggregate_count: 0,
3045 });
3046 if added {
3047 self.edges.push(CrossLayerEdge {
3048 source_id: node_id,
3049 source_layer: HypergraphLayer::GovernanceControls,
3050 target_id: format!("audit_eng_{}", iaf.engagement_id),
3051 target_layer: HypergraphLayer::ProcessEvents,
3052 edge_type: "IAF_FOR_ENGAGEMENT".into(),
3053 edge_type_code: type_codes::IAF_FOR_ENGAGEMENT,
3054 properties: HashMap::new(),
3055 });
3056 }
3057 }
3058
3059 for iar in ia_reports {
3061 let ext_id = iar.report_id.to_string();
3062 let node_id = format!("audit_iar_{ext_id}");
3063 let added = self.try_add_node(HypergraphNode {
3064 id: node_id.clone(),
3065 entity_type: "internal_audit_report".into(),
3066 entity_type_code: type_codes::INTERNAL_AUDIT_REPORT,
3067 layer: HypergraphLayer::GovernanceControls,
3068 external_id: ext_id.clone(),
3069 label: format!("IAR {}", iar.report_ref),
3070 properties: {
3071 let mut p = HashMap::new();
3072 p.insert("entity_id".into(), Value::String(iar.report_ref.clone()));
3073 p.insert("process_family".into(), Value::String("AUDIT".into()));
3074 p
3075 },
3076 features: vec![],
3077 is_anomaly: false,
3078 anomaly_type: None,
3079 is_aggregate: false,
3080 aggregate_count: 0,
3081 });
3082 if added {
3083 self.edges.push(CrossLayerEdge {
3084 source_id: node_id.clone(),
3085 source_layer: HypergraphLayer::GovernanceControls,
3086 target_id: format!("audit_iaf_{}", iar.ia_function_id),
3087 target_layer: HypergraphLayer::GovernanceControls,
3088 edge_type: "REPORT_FROM_IAF".into(),
3089 edge_type_code: type_codes::REPORT_FROM_IAF,
3090 properties: HashMap::new(),
3091 });
3092 self.edges.push(CrossLayerEdge {
3093 source_id: node_id,
3094 source_layer: HypergraphLayer::GovernanceControls,
3095 target_id: format!("audit_eng_{}", iar.engagement_id),
3096 target_layer: HypergraphLayer::ProcessEvents,
3097 edge_type: "IA_REPORT_FOR_ENGAGEMENT".into(),
3098 edge_type_code: type_codes::IA_REPORT_FOR_ENGAGEMENT,
3099 properties: HashMap::new(),
3100 });
3101 }
3102 }
3103
3104 for rp in related_parties {
3106 let ext_id = rp.party_id.to_string();
3107 let node_id = format!("audit_rp_{ext_id}");
3108 let added = self.try_add_node(HypergraphNode {
3109 id: node_id.clone(),
3110 entity_type: "related_party".into(),
3111 entity_type_code: type_codes::RELATED_PARTY,
3112 layer: HypergraphLayer::GovernanceControls,
3113 external_id: ext_id.clone(),
3114 label: format!("RP {}", rp.party_ref),
3115 properties: {
3116 let mut p = HashMap::new();
3117 p.insert("entity_id".into(), Value::String(rp.party_ref.clone()));
3118 p.insert("process_family".into(), Value::String("AUDIT".into()));
3119 p
3120 },
3121 features: vec![],
3122 is_anomaly: false,
3123 anomaly_type: None,
3124 is_aggregate: false,
3125 aggregate_count: 0,
3126 });
3127 if added {
3128 self.edges.push(CrossLayerEdge {
3129 source_id: node_id,
3130 source_layer: HypergraphLayer::GovernanceControls,
3131 target_id: format!("audit_eng_{}", rp.engagement_id),
3132 target_layer: HypergraphLayer::ProcessEvents,
3133 edge_type: "RP_FOR_ENGAGEMENT".into(),
3134 edge_type_code: type_codes::RP_FOR_ENGAGEMENT,
3135 properties: HashMap::new(),
3136 });
3137 }
3138 }
3139
3140 for rpt in rp_transactions {
3142 let ext_id = rpt.transaction_id.to_string();
3143 let node_id = format!("audit_rpt_{ext_id}");
3144 let added = self.try_add_node(HypergraphNode {
3145 id: node_id.clone(),
3146 entity_type: "related_party_transaction".into(),
3147 entity_type_code: type_codes::RELATED_PARTY_TRANSACTION,
3148 layer: HypergraphLayer::ProcessEvents,
3149 external_id: ext_id.clone(),
3150 label: format!("RPT {}", rpt.transaction_ref),
3151 properties: {
3152 let mut p = HashMap::new();
3153 p.insert(
3154 "entity_id".into(),
3155 Value::String(rpt.transaction_ref.clone()),
3156 );
3157 p.insert("process_family".into(), Value::String("AUDIT".into()));
3158 p
3159 },
3160 features: vec![rpt
3161 .amount
3162 .to_string()
3163 .parse::<f64>()
3164 .unwrap_or(0.0)
3165 .abs()
3166 .ln_1p()],
3167 is_anomaly: rpt.management_override_risk,
3168 anomaly_type: if rpt.management_override_risk {
3169 Some("management_override_risk".into())
3170 } else {
3171 None
3172 },
3173 is_aggregate: false,
3174 aggregate_count: 0,
3175 });
3176 if added {
3177 self.edges.push(CrossLayerEdge {
3178 source_id: node_id,
3179 source_layer: HypergraphLayer::ProcessEvents,
3180 target_id: format!("audit_rp_{}", rpt.related_party_id),
3181 target_layer: HypergraphLayer::GovernanceControls,
3182 edge_type: "RPT_WITH_PARTY".into(),
3183 edge_type_code: type_codes::RPT_WITH_PARTY,
3184 properties: HashMap::new(),
3185 });
3186 }
3187 }
3188 }
3189
3190 pub fn add_bank_recon_documents(&mut self, reconciliations: &[BankReconciliation]) {
3192 if !self.config.include_r2r {
3193 return;
3194 }
3195 for recon in reconciliations {
3196 let node_id = format!("recon_{}", recon.reconciliation_id);
3197 self.try_add_node(HypergraphNode {
3198 id: node_id,
3199 entity_type: "bank_reconciliation".into(),
3200 entity_type_code: type_codes::BANK_RECONCILIATION,
3201 layer: HypergraphLayer::ProcessEvents,
3202 external_id: recon.reconciliation_id.clone(),
3203 label: format!("RECON {}", recon.reconciliation_id),
3204 properties: HashMap::new(),
3205 features: vec![recon
3206 .bank_ending_balance
3207 .to_string()
3208 .parse::<f64>()
3209 .unwrap_or(0.0)
3210 .ln_1p()],
3211 is_anomaly: false,
3212 anomaly_type: None,
3213 is_aggregate: false,
3214 aggregate_count: 0,
3215 });
3216 for line in &recon.statement_lines {
3217 let node_id = format!("recon_line_{}", line.line_id);
3218 self.try_add_node(HypergraphNode {
3219 id: node_id,
3220 entity_type: "bank_statement_line".into(),
3221 entity_type_code: type_codes::BANK_STATEMENT_LINE,
3222 layer: HypergraphLayer::ProcessEvents,
3223 external_id: line.line_id.clone(),
3224 label: format!("BSL {}", line.line_id),
3225 properties: HashMap::new(),
3226 features: vec![line
3227 .amount
3228 .to_string()
3229 .parse::<f64>()
3230 .unwrap_or(0.0)
3231 .abs()
3232 .ln_1p()],
3233 is_anomaly: false,
3234 anomaly_type: None,
3235 is_aggregate: false,
3236 aggregate_count: 0,
3237 });
3238 }
3239 for item in &recon.reconciling_items {
3240 let node_id = format!("recon_item_{}", item.item_id);
3241 self.try_add_node(HypergraphNode {
3242 id: node_id,
3243 entity_type: "reconciling_item".into(),
3244 entity_type_code: type_codes::RECONCILING_ITEM,
3245 layer: HypergraphLayer::ProcessEvents,
3246 external_id: item.item_id.clone(),
3247 label: format!("RITEM {}", item.item_id),
3248 properties: HashMap::new(),
3249 features: vec![item
3250 .amount
3251 .to_string()
3252 .parse::<f64>()
3253 .unwrap_or(0.0)
3254 .abs()
3255 .ln_1p()],
3256 is_anomaly: false,
3257 anomaly_type: None,
3258 is_aggregate: false,
3259 aggregate_count: 0,
3260 });
3261 }
3262 }
3263 }
3264
3265 pub fn add_ocpm_events(&mut self, event_log: &datasynth_ocpm::OcpmEventLog) {
3267 if !self.config.events_as_hyperedges {
3268 return;
3269 }
3270 for event in &event_log.events {
3271 let participants: Vec<HyperedgeParticipant> = event
3272 .object_refs
3273 .iter()
3274 .map(|obj_ref| {
3275 let node_id = format!("ocpm_obj_{}", obj_ref.object_id);
3276 self.try_add_node(HypergraphNode {
3278 id: node_id.clone(),
3279 entity_type: "ocpm_object".into(),
3280 entity_type_code: type_codes::OCPM_EVENT,
3281 layer: HypergraphLayer::ProcessEvents,
3282 external_id: obj_ref.object_id.to_string(),
3283 label: format!("OBJ {}", obj_ref.object_type_id),
3284 properties: HashMap::new(),
3285 features: vec![],
3286 is_anomaly: false,
3287 anomaly_type: None,
3288 is_aggregate: false,
3289 aggregate_count: 0,
3290 });
3291 HyperedgeParticipant {
3292 node_id,
3293 role: format!("{:?}", obj_ref.qualifier),
3294 weight: None,
3295 }
3296 })
3297 .collect();
3298
3299 if !participants.is_empty() {
3300 let mut props = HashMap::new();
3301 props.insert(
3302 "activity_id".into(),
3303 Value::String(event.activity_id.clone()),
3304 );
3305 props.insert(
3306 "timestamp".into(),
3307 Value::String(event.timestamp.to_rfc3339()),
3308 );
3309 if !event.resource_id.is_empty() {
3310 props.insert("resource".into(), Value::String(event.resource_id.clone()));
3311 }
3312
3313 self.hyperedges.push(Hyperedge {
3314 id: format!("ocpm_evt_{}", event.event_id),
3315 hyperedge_type: "OcpmEvent".into(),
3316 subtype: event.activity_id.clone(),
3317 participants,
3318 layer: HypergraphLayer::ProcessEvents,
3319 properties: props,
3320 timestamp: Some(event.timestamp.date_naive()),
3321 is_anomaly: false,
3322 anomaly_type: None,
3323 features: vec![],
3324 });
3325 }
3326 }
3327 }
3328
3329 pub fn add_compliance_regulations(
3336 &mut self,
3337 standards: &[ComplianceStandard],
3338 findings: &[ComplianceFinding],
3339 filings: &[RegulatoryFiling],
3340 ) {
3341 if !self.config.include_compliance {
3342 return;
3343 }
3344
3345 for std in standards {
3347 if std.is_superseded() {
3348 continue;
3349 }
3350 let sid = std.id.as_str().to_string();
3351 let node_id = format!("cr_std_{sid}");
3352 if self.try_add_node(HypergraphNode {
3353 id: node_id.clone(),
3354 entity_type: "compliance_standard".into(),
3355 entity_type_code: type_codes::COMPLIANCE_STANDARD,
3356 layer: HypergraphLayer::GovernanceControls,
3357 external_id: sid.clone(),
3358 label: format!("{}: {}", sid, std.title),
3359 properties: {
3360 let mut p = HashMap::new();
3361 p.insert("title".into(), Value::String(std.title.clone()));
3362 p.insert("category".into(), Value::String(std.category.to_string()));
3363 p.insert("domain".into(), Value::String(std.domain.to_string()));
3364 p.insert(
3365 "issuingBody".into(),
3366 Value::String(std.issuing_body.to_string()),
3367 );
3368 if !std.applicable_account_types.is_empty() {
3369 p.insert(
3370 "applicableAccountTypes".into(),
3371 Value::Array(
3372 std.applicable_account_types
3373 .iter()
3374 .map(|s| Value::String(s.clone()))
3375 .collect(),
3376 ),
3377 );
3378 }
3379 if !std.applicable_processes.is_empty() {
3380 p.insert(
3381 "applicableProcesses".into(),
3382 Value::Array(
3383 std.applicable_processes
3384 .iter()
3385 .map(|s| Value::String(s.clone()))
3386 .collect(),
3387 ),
3388 );
3389 }
3390 p
3391 },
3392 features: vec![
3393 std.versions.len() as f64,
3394 std.requirements.len() as f64,
3395 std.mandatory_jurisdictions.len() as f64,
3396 ],
3397 is_anomaly: false,
3398 anomaly_type: None,
3399 is_aggregate: false,
3400 aggregate_count: 0,
3401 }) {
3402 self.standard_node_ids.insert(sid.clone(), node_id.clone());
3403
3404 for _acct_type in &std.applicable_account_types {
3406 }
3409 }
3410 }
3411
3412 for finding in findings {
3414 let fid = finding.finding_id.to_string();
3415 let node_id = format!("cr_find_{fid}");
3416 if self.try_add_node(HypergraphNode {
3417 id: node_id.clone(),
3418 entity_type: "compliance_finding".into(),
3419 entity_type_code: type_codes::COMPLIANCE_FINDING,
3420 layer: HypergraphLayer::ProcessEvents,
3421 external_id: fid,
3422 label: format!("CF {} [{}]", finding.deficiency_level, finding.company_code),
3423 properties: {
3424 let mut p = HashMap::new();
3425 p.insert("title".into(), Value::String(finding.title.clone()));
3426 p.insert(
3427 "severity".into(),
3428 Value::String(finding.severity.to_string()),
3429 );
3430 p.insert(
3431 "deficiencyLevel".into(),
3432 Value::String(finding.deficiency_level.to_string()),
3433 );
3434 p.insert(
3435 "companyCode".into(),
3436 Value::String(finding.company_code.clone()),
3437 );
3438 p.insert(
3439 "remediationStatus".into(),
3440 Value::String(finding.remediation_status.to_string()),
3441 );
3442 p.insert("isRepeat".into(), Value::Bool(finding.is_repeat));
3443 p.insert(
3444 "identifiedDate".into(),
3445 Value::String(finding.identified_date.to_string()),
3446 );
3447 p
3448 },
3449 features: vec![
3450 finding.severity.score(),
3451 finding.deficiency_level.severity_score(),
3452 if finding.is_repeat { 1.0 } else { 0.0 },
3453 ],
3454 is_anomaly: false,
3455 anomaly_type: None,
3456 is_aggregate: false,
3457 aggregate_count: 0,
3458 }) {
3459 for std_id in &finding.related_standards {
3461 let sid = std_id.as_str().to_string();
3462 if let Some(std_node) = self.standard_node_ids.get(&sid) {
3463 self.edges.push(CrossLayerEdge {
3464 source_id: node_id.clone(),
3465 source_layer: HypergraphLayer::ProcessEvents,
3466 target_id: std_node.clone(),
3467 target_layer: HypergraphLayer::GovernanceControls,
3468 edge_type: "FindingOnStandard".to_string(),
3469 edge_type_code: type_codes::GOVERNED_BY_STANDARD,
3470 properties: HashMap::new(),
3471 });
3472 }
3473 }
3474
3475 if let Some(ref ctrl_id) = finding.control_id {
3477 self.compliance_finding_control_links
3478 .push((node_id, ctrl_id.clone()));
3479 }
3480 }
3481 }
3482
3483 for filing in filings {
3485 let filing_key = format!(
3486 "{}_{}_{}_{}",
3487 filing.filing_type, filing.company_code, filing.jurisdiction, filing.period_end
3488 );
3489 let node_id = format!("cr_filing_{filing_key}");
3490 self.try_add_node(HypergraphNode {
3491 id: node_id,
3492 entity_type: "regulatory_filing".into(),
3493 entity_type_code: type_codes::REGULATORY_FILING,
3494 layer: HypergraphLayer::ProcessEvents,
3495 external_id: filing_key,
3496 label: format!("{} [{}]", filing.filing_type, filing.company_code),
3497 properties: {
3498 let mut p = HashMap::new();
3499 p.insert(
3500 "filingType".into(),
3501 Value::String(filing.filing_type.to_string()),
3502 );
3503 p.insert(
3504 "companyCode".into(),
3505 Value::String(filing.company_code.clone()),
3506 );
3507 p.insert(
3508 "jurisdiction".into(),
3509 Value::String(filing.jurisdiction.clone()),
3510 );
3511 p.insert(
3512 "status".into(),
3513 Value::String(format!("{:?}", filing.status)),
3514 );
3515 p.insert(
3516 "periodEnd".into(),
3517 Value::String(filing.period_end.to_string()),
3518 );
3519 p.insert(
3520 "deadline".into(),
3521 Value::String(filing.deadline.to_string()),
3522 );
3523 p
3524 },
3525 features: vec![],
3526 is_anomaly: false,
3527 anomaly_type: None,
3528 is_aggregate: false,
3529 aggregate_count: 0,
3530 });
3531 }
3532 }
3533
3534 #[allow(clippy::too_many_arguments)]
3543 pub fn add_tax_documents(
3544 &mut self,
3545 jurisdictions: &[TaxJurisdiction],
3546 codes: &[TaxCode],
3547 tax_lines: &[TaxLine],
3548 tax_returns: &[TaxReturn],
3549 tax_provisions: &[TaxProvision],
3550 withholding_records: &[WithholdingTaxRecord],
3551 ) {
3552 if !self.config.include_tax {
3553 return;
3554 }
3555
3556 for jur in jurisdictions {
3557 let node_id = format!("tax_jur_{}", jur.id);
3558 self.try_add_node(HypergraphNode {
3559 id: node_id,
3560 entity_type: "tax_jurisdiction".into(),
3561 entity_type_code: type_codes::TAX_JURISDICTION,
3562 layer: HypergraphLayer::AccountingNetwork,
3563 external_id: jur.id.clone(),
3564 label: jur.name.clone(),
3565 properties: {
3566 let mut p = HashMap::new();
3567 p.insert(
3568 "country_code".into(),
3569 Value::String(jur.country_code.clone()),
3570 );
3571 p.insert(
3572 "jurisdiction_type".into(),
3573 Value::String(format!("{:?}", jur.jurisdiction_type)),
3574 );
3575 p.insert("vat_registered".into(), Value::Bool(jur.vat_registered));
3576 if let Some(ref region) = jur.region_code {
3577 p.insert("region_code".into(), Value::String(region.clone()));
3578 }
3579 p
3580 },
3581 features: vec![if jur.vat_registered { 1.0 } else { 0.0 }],
3582 is_anomaly: false,
3583 anomaly_type: None,
3584 is_aggregate: false,
3585 aggregate_count: 0,
3586 });
3587 }
3588
3589 for code in codes {
3590 let node_id = format!("tax_code_{}", code.id);
3591 self.try_add_node(HypergraphNode {
3592 id: node_id,
3593 entity_type: "tax_code".into(),
3594 entity_type_code: type_codes::TAX_CODE,
3595 layer: HypergraphLayer::AccountingNetwork,
3596 external_id: code.id.clone(),
3597 label: format!("{} ({})", code.code, code.description),
3598 properties: {
3599 let mut p = HashMap::new();
3600 p.insert("code".into(), Value::String(code.code.clone()));
3601 p.insert(
3602 "tax_type".into(),
3603 Value::String(format!("{:?}", code.tax_type)),
3604 );
3605 let rate: f64 = code.rate.to_string().parse().unwrap_or(0.0);
3606 p.insert("rate".into(), serde_json::json!(rate));
3607 p.insert(
3608 "jurisdiction_id".into(),
3609 Value::String(code.jurisdiction_id.clone()),
3610 );
3611 p.insert("is_exempt".into(), Value::Bool(code.is_exempt));
3612 p.insert(
3613 "is_reverse_charge".into(),
3614 Value::Bool(code.is_reverse_charge),
3615 );
3616 p
3617 },
3618 features: vec![code.rate.to_string().parse::<f64>().unwrap_or(0.0)],
3619 is_anomaly: false,
3620 anomaly_type: None,
3621 is_aggregate: false,
3622 aggregate_count: 0,
3623 });
3624 }
3625
3626 for line in tax_lines {
3627 let node_id = format!("tax_line_{}", line.id);
3628 self.try_add_node(HypergraphNode {
3629 id: node_id,
3630 entity_type: "tax_line".into(),
3631 entity_type_code: type_codes::TAX_LINE,
3632 layer: HypergraphLayer::AccountingNetwork,
3633 external_id: line.id.clone(),
3634 label: format!("TAXL {} L{}", line.document_id, line.line_number),
3635 properties: {
3636 let mut p = HashMap::new();
3637 p.insert(
3638 "document_type".into(),
3639 Value::String(format!("{:?}", line.document_type)),
3640 );
3641 p.insert(
3642 "document_id".into(),
3643 Value::String(line.document_id.clone()),
3644 );
3645 p.insert(
3646 "tax_code_id".into(),
3647 Value::String(line.tax_code_id.clone()),
3648 );
3649 let amt: f64 = line.tax_amount.to_string().parse().unwrap_or(0.0);
3650 p.insert("tax_amount".into(), serde_json::json!(amt));
3651 p
3652 },
3653 features: vec![line
3654 .tax_amount
3655 .to_string()
3656 .parse::<f64>()
3657 .unwrap_or(0.0)
3658 .abs()
3659 .ln_1p()],
3660 is_anomaly: false,
3661 anomaly_type: None,
3662 is_aggregate: false,
3663 aggregate_count: 0,
3664 });
3665 }
3666
3667 for ret in tax_returns {
3668 let node_id = format!("tax_ret_{}", ret.id);
3669 self.try_add_node(HypergraphNode {
3670 id: node_id,
3671 entity_type: "tax_return".into(),
3672 entity_type_code: type_codes::TAX_RETURN,
3673 layer: HypergraphLayer::AccountingNetwork,
3674 external_id: ret.id.clone(),
3675 label: format!("TAXR {} [{:?}]", ret.entity_id, ret.return_type),
3676 properties: {
3677 let mut p = HashMap::new();
3678 p.insert("entity_id".into(), Value::String(ret.entity_id.clone()));
3679 p.insert(
3680 "jurisdiction_id".into(),
3681 Value::String(ret.jurisdiction_id.clone()),
3682 );
3683 p.insert(
3684 "return_type".into(),
3685 Value::String(format!("{:?}", ret.return_type)),
3686 );
3687 p.insert("status".into(), Value::String(format!("{:?}", ret.status)));
3688 p.insert(
3689 "period_start".into(),
3690 Value::String(ret.period_start.to_string()),
3691 );
3692 p.insert(
3693 "period_end".into(),
3694 Value::String(ret.period_end.to_string()),
3695 );
3696 p.insert("is_late".into(), Value::Bool(ret.is_late));
3697 let net: f64 = ret.net_payable.to_string().parse().unwrap_or(0.0);
3698 p.insert("net_payable".into(), serde_json::json!(net));
3699 p
3700 },
3701 features: vec![
3702 ret.net_payable
3703 .to_string()
3704 .parse::<f64>()
3705 .unwrap_or(0.0)
3706 .abs()
3707 .ln_1p(),
3708 if ret.is_late { 1.0 } else { 0.0 },
3709 ],
3710 is_anomaly: ret.is_late,
3711 anomaly_type: if ret.is_late {
3712 Some("late_filing".into())
3713 } else {
3714 None
3715 },
3716 is_aggregate: false,
3717 aggregate_count: 0,
3718 });
3719 }
3720
3721 for prov in tax_provisions {
3722 let node_id = format!("tax_prov_{}", prov.id);
3723 self.try_add_node(HypergraphNode {
3724 id: node_id,
3725 entity_type: "tax_provision".into(),
3726 entity_type_code: type_codes::TAX_PROVISION,
3727 layer: HypergraphLayer::AccountingNetwork,
3728 external_id: prov.id.clone(),
3729 label: format!("TAXPROV {} {}", prov.entity_id, prov.period),
3730 properties: {
3731 let mut p = HashMap::new();
3732 p.insert("entity_id".into(), Value::String(prov.entity_id.clone()));
3733 p.insert("period".into(), Value::String(prov.period.to_string()));
3734 let eff: f64 = prov.effective_rate.to_string().parse().unwrap_or(0.0);
3735 p.insert("effective_rate".into(), serde_json::json!(eff));
3736 let stat: f64 = prov.statutory_rate.to_string().parse().unwrap_or(0.0);
3737 p.insert("statutory_rate".into(), serde_json::json!(stat));
3738 let expense: f64 = prov.current_tax_expense.to_string().parse().unwrap_or(0.0);
3739 p.insert("current_tax_expense".into(), serde_json::json!(expense));
3740 p
3741 },
3742 features: vec![
3743 prov.effective_rate
3744 .to_string()
3745 .parse::<f64>()
3746 .unwrap_or(0.0),
3747 prov.current_tax_expense
3748 .to_string()
3749 .parse::<f64>()
3750 .unwrap_or(0.0)
3751 .abs()
3752 .ln_1p(),
3753 ],
3754 is_anomaly: false,
3755 anomaly_type: None,
3756 is_aggregate: false,
3757 aggregate_count: 0,
3758 });
3759 }
3760
3761 for wht in withholding_records {
3762 let node_id = format!("tax_wht_{}", wht.id);
3763 self.try_add_node(HypergraphNode {
3764 id: node_id,
3765 entity_type: "withholding_tax_record".into(),
3766 entity_type_code: type_codes::WITHHOLDING_TAX,
3767 layer: HypergraphLayer::AccountingNetwork,
3768 external_id: wht.id.clone(),
3769 label: format!("WHT {} → {}", wht.payment_id, wht.vendor_id),
3770 properties: {
3771 let mut p = HashMap::new();
3772 p.insert("payment_id".into(), Value::String(wht.payment_id.clone()));
3773 p.insert("vendor_id".into(), Value::String(wht.vendor_id.clone()));
3774 p.insert(
3775 "withholding_type".into(),
3776 Value::String(format!("{:?}", wht.withholding_type)),
3777 );
3778 let amt: f64 = wht.withheld_amount.to_string().parse().unwrap_or(0.0);
3779 p.insert("withheld_amount".into(), serde_json::json!(amt));
3780 let rate: f64 = wht.applied_rate.to_string().parse().unwrap_or(0.0);
3781 p.insert("applied_rate".into(), serde_json::json!(rate));
3782 p
3783 },
3784 features: vec![wht
3785 .withheld_amount
3786 .to_string()
3787 .parse::<f64>()
3788 .unwrap_or(0.0)
3789 .abs()
3790 .ln_1p()],
3791 is_anomaly: false,
3792 anomaly_type: None,
3793 is_aggregate: false,
3794 aggregate_count: 0,
3795 });
3796 }
3797 }
3798
3799 pub fn add_treasury_documents(
3804 &mut self,
3805 cash_positions: &[CashPosition],
3806 cash_forecasts: &[CashForecast],
3807 hedge_relationships: &[HedgeRelationship],
3808 debt_instruments: &[DebtInstrument],
3809 ) {
3810 if !self.config.include_treasury {
3811 return;
3812 }
3813
3814 for pos in cash_positions {
3815 let node_id = format!("treas_pos_{}", pos.id);
3816 self.try_add_node(HypergraphNode {
3817 id: node_id,
3818 entity_type: "cash_position".into(),
3819 entity_type_code: type_codes::CASH_POSITION,
3820 layer: HypergraphLayer::AccountingNetwork,
3821 external_id: pos.id.clone(),
3822 label: format!("CPOS {} {}", pos.bank_account_id, pos.date),
3823 properties: {
3824 let mut p = HashMap::new();
3825 p.insert("entity_id".into(), Value::String(pos.entity_id.clone()));
3826 p.insert(
3827 "bank_account_id".into(),
3828 Value::String(pos.bank_account_id.clone()),
3829 );
3830 p.insert("currency".into(), Value::String(pos.currency.clone()));
3831 p.insert("date".into(), Value::String(pos.date.to_string()));
3832 let closing: f64 = pos.closing_balance.to_string().parse().unwrap_or(0.0);
3833 p.insert("closing_balance".into(), serde_json::json!(closing));
3834 p
3835 },
3836 features: vec![pos
3837 .closing_balance
3838 .to_string()
3839 .parse::<f64>()
3840 .unwrap_or(0.0)
3841 .abs()
3842 .ln_1p()],
3843 is_anomaly: false,
3844 anomaly_type: None,
3845 is_aggregate: false,
3846 aggregate_count: 0,
3847 });
3848 }
3849
3850 for fc in cash_forecasts {
3851 let node_id = format!("treas_fc_{}", fc.id);
3852 self.try_add_node(HypergraphNode {
3853 id: node_id,
3854 entity_type: "cash_forecast".into(),
3855 entity_type_code: type_codes::CASH_FORECAST,
3856 layer: HypergraphLayer::AccountingNetwork,
3857 external_id: fc.id.clone(),
3858 label: format!("CFOR {} {}d", fc.entity_id, fc.horizon_days),
3859 properties: {
3860 let mut p = HashMap::new();
3861 p.insert("entity_id".into(), Value::String(fc.entity_id.clone()));
3862 p.insert("currency".into(), Value::String(fc.currency.clone()));
3863 p.insert(
3864 "forecast_date".into(),
3865 Value::String(fc.forecast_date.to_string()),
3866 );
3867 p.insert(
3868 "horizon_days".into(),
3869 Value::Number((fc.horizon_days as u64).into()),
3870 );
3871 let net: f64 = fc.net_position.to_string().parse().unwrap_or(0.0);
3872 p.insert("net_position".into(), serde_json::json!(net));
3873 let conf: f64 = fc.confidence_level.to_string().parse().unwrap_or(0.0);
3874 p.insert("confidence_level".into(), serde_json::json!(conf));
3875 p
3876 },
3877 features: vec![
3878 fc.net_position
3879 .to_string()
3880 .parse::<f64>()
3881 .unwrap_or(0.0)
3882 .abs()
3883 .ln_1p(),
3884 fc.confidence_level
3885 .to_string()
3886 .parse::<f64>()
3887 .unwrap_or(0.0),
3888 ],
3889 is_anomaly: false,
3890 anomaly_type: None,
3891 is_aggregate: false,
3892 aggregate_count: 0,
3893 });
3894 }
3895
3896 for hr in hedge_relationships {
3897 let node_id = format!("treas_hedge_{}", hr.id);
3898 self.try_add_node(HypergraphNode {
3899 id: node_id,
3900 entity_type: "hedge_relationship".into(),
3901 entity_type_code: type_codes::HEDGE_RELATIONSHIP,
3902 layer: HypergraphLayer::AccountingNetwork,
3903 external_id: hr.id.clone(),
3904 label: format!("HEDGE {:?} {}", hr.hedge_type, hr.hedged_item_description),
3905 properties: {
3906 let mut p = HashMap::new();
3907 p.insert(
3908 "hedged_item_type".into(),
3909 Value::String(format!("{:?}", hr.hedged_item_type)),
3910 );
3911 p.insert(
3912 "hedge_type".into(),
3913 Value::String(format!("{:?}", hr.hedge_type)),
3914 );
3915 p.insert(
3916 "designation_date".into(),
3917 Value::String(hr.designation_date.to_string()),
3918 );
3919 p.insert("is_effective".into(), Value::Bool(hr.is_effective));
3920 let ratio: f64 = hr.effectiveness_ratio.to_string().parse().unwrap_or(0.0);
3921 p.insert("effectiveness_ratio".into(), serde_json::json!(ratio));
3922 p
3923 },
3924 features: vec![
3925 hr.effectiveness_ratio
3926 .to_string()
3927 .parse::<f64>()
3928 .unwrap_or(0.0),
3929 if hr.is_effective { 1.0 } else { 0.0 },
3930 ],
3931 is_anomaly: !hr.is_effective,
3932 anomaly_type: if !hr.is_effective {
3933 Some("ineffective_hedge".into())
3934 } else {
3935 None
3936 },
3937 is_aggregate: false,
3938 aggregate_count: 0,
3939 });
3940 }
3941
3942 for debt in debt_instruments {
3943 let node_id = format!("treas_debt_{}", debt.id);
3944 self.try_add_node(HypergraphNode {
3945 id: node_id,
3946 entity_type: "debt_instrument".into(),
3947 entity_type_code: type_codes::DEBT_INSTRUMENT,
3948 layer: HypergraphLayer::AccountingNetwork,
3949 external_id: debt.id.clone(),
3950 label: format!("DEBT {:?} {}", debt.instrument_type, debt.lender),
3951 properties: {
3952 let mut p = HashMap::new();
3953 p.insert("entity_id".into(), Value::String(debt.entity_id.clone()));
3954 p.insert(
3955 "instrument_type".into(),
3956 Value::String(format!("{:?}", debt.instrument_type)),
3957 );
3958 p.insert("lender".into(), Value::String(debt.lender.clone()));
3959 p.insert("currency".into(), Value::String(debt.currency.clone()));
3960 let principal: f64 = debt.principal.to_string().parse().unwrap_or(0.0);
3961 p.insert("principal".into(), serde_json::json!(principal));
3962 let rate: f64 = debt.interest_rate.to_string().parse().unwrap_or(0.0);
3963 p.insert("interest_rate".into(), serde_json::json!(rate));
3964 p.insert(
3965 "maturity_date".into(),
3966 Value::String(debt.maturity_date.to_string()),
3967 );
3968 p.insert(
3969 "covenant_count".into(),
3970 Value::Number((debt.covenants.len() as u64).into()),
3971 );
3972 p
3973 },
3974 features: vec![
3975 debt.principal
3976 .to_string()
3977 .parse::<f64>()
3978 .unwrap_or(0.0)
3979 .ln_1p(),
3980 debt.interest_rate.to_string().parse::<f64>().unwrap_or(0.0),
3981 ],
3982 is_anomaly: false,
3983 anomaly_type: None,
3984 is_aggregate: false,
3985 aggregate_count: 0,
3986 });
3987 }
3988 }
3989
3990 pub fn add_esg_documents(
3995 &mut self,
3996 emissions: &[EmissionRecord],
3997 disclosures: &[EsgDisclosure],
3998 supplier_assessments: &[SupplierEsgAssessment],
3999 climate_scenarios: &[ClimateScenario],
4000 ) {
4001 if !self.config.include_esg {
4002 return;
4003 }
4004
4005 for em in emissions {
4006 let node_id = format!("esg_em_{}", em.id);
4007 self.try_add_node(HypergraphNode {
4008 id: node_id,
4009 entity_type: "emission_record".into(),
4010 entity_type_code: type_codes::EMISSION_RECORD,
4011 layer: HypergraphLayer::GovernanceControls,
4012 external_id: em.id.clone(),
4013 label: format!("EM {:?} {}", em.scope, em.period),
4014 properties: {
4015 let mut p = HashMap::new();
4016 p.insert("entity_id".into(), Value::String(em.entity_id.clone()));
4017 p.insert("scope".into(), Value::String(format!("{:?}", em.scope)));
4018 p.insert("period".into(), Value::String(em.period.to_string()));
4019 let co2e: f64 = em.co2e_tonnes.to_string().parse().unwrap_or(0.0);
4020 p.insert("co2e_tonnes".into(), serde_json::json!(co2e));
4021 p.insert(
4022 "estimation_method".into(),
4023 Value::String(format!("{:?}", em.estimation_method)),
4024 );
4025 if let Some(ref fid) = em.facility_id {
4026 p.insert("facility_id".into(), Value::String(String::clone(fid)));
4027 }
4028 p
4029 },
4030 features: vec![em
4031 .co2e_tonnes
4032 .to_string()
4033 .parse::<f64>()
4034 .unwrap_or(0.0)
4035 .ln_1p()],
4036 is_anomaly: false,
4037 anomaly_type: None,
4038 is_aggregate: false,
4039 aggregate_count: 0,
4040 });
4041 }
4042
4043 for disc in disclosures {
4044 let node_id = format!("esg_disc_{}", disc.id);
4045 self.try_add_node(HypergraphNode {
4046 id: node_id,
4047 entity_type: "esg_disclosure".into(),
4048 entity_type_code: type_codes::ESG_DISCLOSURE,
4049 layer: HypergraphLayer::GovernanceControls,
4050 external_id: disc.id.clone(),
4051 label: format!("{:?}: {}", disc.framework, disc.disclosure_topic),
4052 properties: {
4053 let mut p = HashMap::new();
4054 p.insert("entity_id".into(), Value::String(disc.entity_id.clone()));
4055 p.insert(
4056 "framework".into(),
4057 Value::String(format!("{:?}", disc.framework)),
4058 );
4059 p.insert(
4060 "disclosure_topic".into(),
4061 Value::String(disc.disclosure_topic.clone()),
4062 );
4063 p.insert(
4064 "assurance_level".into(),
4065 Value::String(format!("{:?}", disc.assurance_level)),
4066 );
4067 p.insert("is_assured".into(), Value::Bool(disc.is_assured));
4068 p.insert(
4069 "reporting_period_start".into(),
4070 Value::String(disc.reporting_period_start.to_string()),
4071 );
4072 p.insert(
4073 "reporting_period_end".into(),
4074 Value::String(disc.reporting_period_end.to_string()),
4075 );
4076 p
4077 },
4078 features: vec![if disc.is_assured { 1.0 } else { 0.0 }],
4079 is_anomaly: false,
4080 anomaly_type: None,
4081 is_aggregate: false,
4082 aggregate_count: 0,
4083 });
4084 }
4085
4086 for sa in supplier_assessments {
4087 let node_id = format!("esg_sa_{}", sa.id);
4088 self.try_add_node(HypergraphNode {
4089 id: node_id,
4090 entity_type: "supplier_esg_assessment".into(),
4091 entity_type_code: type_codes::SUPPLIER_ESG_ASSESSMENT,
4092 layer: HypergraphLayer::GovernanceControls,
4093 external_id: sa.id.clone(),
4094 label: format!("ESG-SA {} ({})", sa.vendor_id, sa.assessment_date),
4095 properties: {
4096 let mut p = HashMap::new();
4097 p.insert("entity_id".into(), Value::String(sa.entity_id.clone()));
4098 p.insert("vendor_id".into(), Value::String(sa.vendor_id.clone()));
4099 p.insert(
4100 "assessment_date".into(),
4101 Value::String(sa.assessment_date.to_string()),
4102 );
4103 let overall: f64 = sa.overall_score.to_string().parse().unwrap_or(0.0);
4104 p.insert("overall_score".into(), serde_json::json!(overall));
4105 p.insert(
4106 "risk_flag".into(),
4107 Value::String(format!("{:?}", sa.risk_flag)),
4108 );
4109 p
4110 },
4111 features: vec![sa.overall_score.to_string().parse::<f64>().unwrap_or(0.0)],
4112 is_anomaly: false,
4113 anomaly_type: None,
4114 is_aggregate: false,
4115 aggregate_count: 0,
4116 });
4117 }
4118
4119 for cs in climate_scenarios {
4120 let node_id = format!("esg_cs_{}", cs.id);
4121 self.try_add_node(HypergraphNode {
4122 id: node_id,
4123 entity_type: "climate_scenario".into(),
4124 entity_type_code: type_codes::CLIMATE_SCENARIO,
4125 layer: HypergraphLayer::GovernanceControls,
4126 external_id: cs.id.clone(),
4127 label: format!("{:?} {:?}", cs.scenario_type, cs.time_horizon),
4128 properties: {
4129 let mut p = HashMap::new();
4130 p.insert("entity_id".into(), Value::String(cs.entity_id.clone()));
4131 p.insert(
4132 "scenario_type".into(),
4133 Value::String(format!("{:?}", cs.scenario_type)),
4134 );
4135 p.insert(
4136 "time_horizon".into(),
4137 Value::String(format!("{:?}", cs.time_horizon)),
4138 );
4139 p.insert("description".into(), Value::String(cs.description.clone()));
4140 let temp: f64 = cs.temperature_rise_c.to_string().parse().unwrap_or(0.0);
4141 p.insert("temperature_rise_c".into(), serde_json::json!(temp));
4142 let fin: f64 = cs.financial_impact.to_string().parse().unwrap_or(0.0);
4143 p.insert("financial_impact".into(), serde_json::json!(fin));
4144 p
4145 },
4146 features: vec![
4147 cs.temperature_rise_c
4148 .to_string()
4149 .parse::<f64>()
4150 .unwrap_or(0.0),
4151 cs.financial_impact
4152 .to_string()
4153 .parse::<f64>()
4154 .unwrap_or(0.0)
4155 .abs()
4156 .ln_1p(),
4157 ],
4158 is_anomaly: false,
4159 anomaly_type: None,
4160 is_aggregate: false,
4161 aggregate_count: 0,
4162 });
4163 }
4164 }
4165
4166 pub fn add_project_documents(
4170 &mut self,
4171 projects: &[Project],
4172 earned_value_metrics: &[EarnedValueMetric],
4173 milestones: &[ProjectMilestone],
4174 ) {
4175 if !self.config.include_project {
4176 return;
4177 }
4178
4179 for proj in projects {
4180 let node_id = format!("proj_{}", proj.project_id);
4181 self.try_add_node(HypergraphNode {
4182 id: node_id,
4183 entity_type: "project".into(),
4184 entity_type_code: type_codes::PROJECT,
4185 layer: HypergraphLayer::AccountingNetwork,
4186 external_id: proj.project_id.clone(),
4187 label: format!("{} ({})", proj.name, proj.project_id),
4188 properties: {
4189 let mut p = HashMap::new();
4190 p.insert("name".into(), Value::String(proj.name.clone()));
4191 p.insert(
4192 "project_type".into(),
4193 Value::String(format!("{:?}", proj.project_type)),
4194 );
4195 p.insert("status".into(), Value::String(format!("{:?}", proj.status)));
4196 p.insert(
4197 "company_code".into(),
4198 Value::String(proj.company_code.clone()),
4199 );
4200 let budget: f64 = proj.budget.to_string().parse().unwrap_or(0.0);
4201 p.insert("budget".into(), serde_json::json!(budget));
4202 p
4203 },
4204 features: vec![proj
4205 .budget
4206 .to_string()
4207 .parse::<f64>()
4208 .unwrap_or(0.0)
4209 .ln_1p()],
4210 is_anomaly: false,
4211 anomaly_type: None,
4212 is_aggregate: false,
4213 aggregate_count: 0,
4214 });
4215 }
4216
4217 for evm in earned_value_metrics {
4218 let node_id = format!("proj_evm_{}", evm.id);
4219 let spi: f64 = evm.spi.to_string().parse().unwrap_or(1.0);
4220 let cpi: f64 = evm.cpi.to_string().parse().unwrap_or(1.0);
4221 let is_anomaly = spi < 0.8 || cpi < 0.8;
4223 self.try_add_node(HypergraphNode {
4224 id: node_id,
4225 entity_type: "earned_value_metric".into(),
4226 entity_type_code: type_codes::EARNED_VALUE,
4227 layer: HypergraphLayer::AccountingNetwork,
4228 external_id: evm.id.clone(),
4229 label: format!("EVM {} {}", evm.project_id, evm.measurement_date),
4230 properties: {
4231 let mut p = HashMap::new();
4232 p.insert("project_id".into(), Value::String(evm.project_id.clone()));
4233 p.insert(
4234 "measurement_date".into(),
4235 Value::String(evm.measurement_date.to_string()),
4236 );
4237 p.insert("spi".into(), serde_json::json!(spi));
4238 p.insert("cpi".into(), serde_json::json!(cpi));
4239 let eac: f64 = evm.eac.to_string().parse().unwrap_or(0.0);
4240 p.insert("eac".into(), serde_json::json!(eac));
4241 p
4242 },
4243 features: vec![spi, cpi],
4244 is_anomaly,
4245 anomaly_type: if is_anomaly {
4246 Some("poor_project_performance".into())
4247 } else {
4248 None
4249 },
4250 is_aggregate: false,
4251 aggregate_count: 0,
4252 });
4253 }
4254
4255 for ms in milestones {
4256 let node_id = format!("proj_ms_{}", ms.id);
4257 self.try_add_node(HypergraphNode {
4258 id: node_id,
4259 entity_type: "project_milestone".into(),
4260 entity_type_code: type_codes::PROJECT_MILESTONE,
4261 layer: HypergraphLayer::AccountingNetwork,
4262 external_id: ms.id.clone(),
4263 label: format!("MS {} ({})", ms.name, ms.project_id),
4264 properties: {
4265 let mut p = HashMap::new();
4266 p.insert("project_id".into(), Value::String(ms.project_id.clone()));
4267 p.insert("name".into(), Value::String(ms.name.clone()));
4268 p.insert(
4269 "planned_date".into(),
4270 Value::String(ms.planned_date.to_string()),
4271 );
4272 p.insert("status".into(), Value::String(format!("{:?}", ms.status)));
4273 p.insert(
4274 "sequence".into(),
4275 Value::Number((ms.sequence as u64).into()),
4276 );
4277 let amt: f64 = ms.payment_amount.to_string().parse().unwrap_or(0.0);
4278 p.insert("payment_amount".into(), serde_json::json!(amt));
4279 if let Some(ref actual) = ms.actual_date {
4280 p.insert("actual_date".into(), Value::String(actual.to_string()));
4281 }
4282 p
4283 },
4284 features: vec![ms
4285 .payment_amount
4286 .to_string()
4287 .parse::<f64>()
4288 .unwrap_or(0.0)
4289 .ln_1p()],
4290 is_anomaly: false,
4291 anomaly_type: None,
4292 is_aggregate: false,
4293 aggregate_count: 0,
4294 });
4295 }
4296 }
4297
4298 pub fn add_intercompany_documents(
4302 &mut self,
4303 matched_pairs: &[ICMatchedPair],
4304 elimination_entries: &[EliminationEntry],
4305 ) {
4306 if !self.config.include_intercompany {
4307 return;
4308 }
4309
4310 for pair in matched_pairs {
4311 let node_id = format!("ic_pair_{}", pair.ic_reference);
4312 self.try_add_node(HypergraphNode {
4313 id: node_id,
4314 entity_type: "ic_matched_pair".into(),
4315 entity_type_code: type_codes::IC_MATCHED_PAIR,
4316 layer: HypergraphLayer::AccountingNetwork,
4317 external_id: pair.ic_reference.clone(),
4318 label: format!("IC {} → {}", pair.seller_company, pair.buyer_company),
4319 properties: {
4320 let mut p = HashMap::new();
4321 p.insert(
4322 "transaction_type".into(),
4323 Value::String(format!("{:?}", pair.transaction_type)),
4324 );
4325 p.insert(
4326 "seller_company".into(),
4327 Value::String(pair.seller_company.clone()),
4328 );
4329 p.insert(
4330 "buyer_company".into(),
4331 Value::String(pair.buyer_company.clone()),
4332 );
4333 let amt: f64 = pair.amount.to_string().parse().unwrap_or(0.0);
4334 p.insert("amount".into(), serde_json::json!(amt));
4335 p.insert("currency".into(), Value::String(pair.currency.clone()));
4336 p.insert(
4337 "settlement_status".into(),
4338 Value::String(format!("{:?}", pair.settlement_status)),
4339 );
4340 p.insert(
4341 "transaction_date".into(),
4342 Value::String(pair.transaction_date.to_string()),
4343 );
4344 p
4345 },
4346 features: vec![pair
4347 .amount
4348 .to_string()
4349 .parse::<f64>()
4350 .unwrap_or(0.0)
4351 .abs()
4352 .ln_1p()],
4353 is_anomaly: false,
4354 anomaly_type: None,
4355 is_aggregate: false,
4356 aggregate_count: 0,
4357 });
4358 }
4359
4360 for elim in elimination_entries {
4361 let node_id = format!("ic_elim_{}", elim.entry_id);
4362 self.try_add_node(HypergraphNode {
4363 id: node_id,
4364 entity_type: "elimination_entry".into(),
4365 entity_type_code: type_codes::ELIMINATION_ENTRY,
4366 layer: HypergraphLayer::AccountingNetwork,
4367 external_id: elim.entry_id.clone(),
4368 label: format!(
4369 "ELIM {:?} {} {}",
4370 elim.elimination_type, elim.consolidation_entity, elim.fiscal_period
4371 ),
4372 properties: {
4373 let mut p = HashMap::new();
4374 p.insert(
4375 "elimination_type".into(),
4376 Value::String(format!("{:?}", elim.elimination_type)),
4377 );
4378 p.insert(
4379 "consolidation_entity".into(),
4380 Value::String(elim.consolidation_entity.clone()),
4381 );
4382 p.insert(
4383 "fiscal_period".into(),
4384 Value::String(elim.fiscal_period.clone()),
4385 );
4386 p.insert("currency".into(), Value::String(elim.currency.clone()));
4387 p.insert("is_permanent".into(), Value::Bool(elim.is_permanent));
4388 let debit: f64 = elim.total_debit.to_string().parse().unwrap_or(0.0);
4389 p.insert("total_debit".into(), serde_json::json!(debit));
4390 p
4391 },
4392 features: vec![elim
4393 .total_debit
4394 .to_string()
4395 .parse::<f64>()
4396 .unwrap_or(0.0)
4397 .abs()
4398 .ln_1p()],
4399 is_anomaly: false,
4400 anomaly_type: None,
4401 is_aggregate: false,
4402 aggregate_count: 0,
4403 });
4404 }
4405 }
4406
4407 pub fn add_temporal_events(
4412 &mut self,
4413 process_events: &[ProcessEvolutionEvent],
4414 organizational_events: &[OrganizationalEvent],
4415 disruption_events: &[DisruptionEvent],
4416 ) {
4417 if !self.config.include_temporal_events {
4418 return;
4419 }
4420
4421 for pe in process_events {
4422 let node_id = format!("tevt_proc_{}", pe.event_id);
4423 self.try_add_node(HypergraphNode {
4424 id: node_id,
4425 entity_type: "process_evolution".into(),
4426 entity_type_code: type_codes::PROCESS_EVOLUTION,
4427 layer: HypergraphLayer::ProcessEvents,
4428 external_id: pe.event_id.clone(),
4429 label: format!("PEVOL {} {}", pe.event_id, pe.effective_date),
4430 properties: {
4431 let mut p = HashMap::new();
4432 p.insert(
4433 "event_type".into(),
4434 Value::String(format!("{:?}", pe.event_type)),
4435 );
4436 p.insert(
4437 "effective_date".into(),
4438 Value::String(pe.effective_date.to_string()),
4439 );
4440 if let Some(ref desc) = pe.description {
4441 p.insert("description".into(), Value::String(desc.clone()));
4442 }
4443 if !pe.tags.is_empty() {
4444 p.insert(
4445 "tags".into(),
4446 Value::Array(
4447 pe.tags.iter().map(|t| Value::String(t.clone())).collect(),
4448 ),
4449 );
4450 }
4451 p
4452 },
4453 features: vec![],
4454 is_anomaly: false,
4455 anomaly_type: None,
4456 is_aggregate: false,
4457 aggregate_count: 0,
4458 });
4459 }
4460
4461 for oe in organizational_events {
4462 let node_id = format!("tevt_org_{}", oe.event_id);
4463 self.try_add_node(HypergraphNode {
4464 id: node_id,
4465 entity_type: "organizational_event".into(),
4466 entity_type_code: type_codes::ORGANIZATIONAL_EVENT,
4467 layer: HypergraphLayer::ProcessEvents,
4468 external_id: oe.event_id.clone(),
4469 label: format!("ORGEV {} {}", oe.event_id, oe.effective_date),
4470 properties: {
4471 let mut p = HashMap::new();
4472 p.insert(
4473 "event_type".into(),
4474 Value::String(format!("{:?}", oe.event_type)),
4475 );
4476 p.insert(
4477 "effective_date".into(),
4478 Value::String(oe.effective_date.to_string()),
4479 );
4480 if let Some(ref desc) = oe.description {
4481 p.insert("description".into(), Value::String(desc.clone()));
4482 }
4483 if !oe.tags.is_empty() {
4484 p.insert(
4485 "tags".into(),
4486 Value::Array(
4487 oe.tags.iter().map(|t| Value::String(t.clone())).collect(),
4488 ),
4489 );
4490 }
4491 p
4492 },
4493 features: vec![],
4494 is_anomaly: false,
4495 anomaly_type: None,
4496 is_aggregate: false,
4497 aggregate_count: 0,
4498 });
4499 }
4500
4501 for de in disruption_events {
4502 let node_id = format!("tevt_dis_{}", de.event_id);
4503 self.try_add_node(HypergraphNode {
4504 id: node_id,
4505 entity_type: "disruption_event".into(),
4506 entity_type_code: type_codes::DISRUPTION_EVENT,
4507 layer: HypergraphLayer::ProcessEvents,
4508 external_id: de.event_id.clone(),
4509 label: format!("DISRUPT {} sev={}", de.event_id, de.severity),
4510 properties: {
4511 let mut p = HashMap::new();
4512 p.insert(
4513 "disruption_type".into(),
4514 Value::String(format!("{:?}", de.disruption_type)),
4515 );
4516 p.insert("description".into(), Value::String(de.description.clone()));
4517 p.insert("severity".into(), Value::Number(de.severity.into()));
4518 if !de.affected_companies.is_empty() {
4519 p.insert(
4520 "affected_companies".into(),
4521 Value::Array(
4522 de.affected_companies
4523 .iter()
4524 .map(|c| Value::String(c.clone()))
4525 .collect(),
4526 ),
4527 );
4528 }
4529 p
4530 },
4531 features: vec![de.severity as f64 / 5.0],
4532 is_anomaly: de.severity >= 4,
4533 anomaly_type: if de.severity >= 4 {
4534 Some("high_severity_disruption".into())
4535 } else {
4536 None
4537 },
4538 is_aggregate: false,
4539 aggregate_count: 0,
4540 });
4541 }
4542 }
4543
4544 pub fn add_aml_alerts(&mut self, transactions: &[BankTransaction]) {
4549 let suspicious: Vec<&BankTransaction> =
4550 transactions.iter().filter(|t| t.is_suspicious).collect();
4551
4552 for txn in suspicious {
4553 let tid = txn.transaction_id.to_string();
4554 let node_id = format!("aml_alert_{tid}");
4555 self.try_add_node(HypergraphNode {
4556 id: node_id,
4557 entity_type: "aml_alert".into(),
4558 entity_type_code: type_codes::AML_ALERT,
4559 layer: HypergraphLayer::ProcessEvents,
4560 external_id: format!("AML-{tid}"),
4561 label: format!("AML {}", txn.reference),
4562 properties: {
4563 let mut p = HashMap::new();
4564 p.insert("transaction_id".into(), Value::String(tid.clone()));
4565 let amount: f64 = txn.amount.to_string().parse().unwrap_or(0.0);
4566 p.insert("amount".into(), serde_json::json!(amount));
4567 p.insert("currency".into(), Value::String(txn.currency.clone()));
4568 p.insert("reference".into(), Value::String(txn.reference.clone()));
4569 if let Some(ref reason) = txn.suspicion_reason {
4570 p.insert(
4571 "suspicion_reason".into(),
4572 Value::String(format!("{reason:?}")),
4573 );
4574 }
4575 if let Some(ref stage) = txn.laundering_stage {
4576 p.insert(
4577 "laundering_stage".into(),
4578 Value::String(format!("{stage:?}")),
4579 );
4580 }
4581 p
4582 },
4583 features: vec![txn
4584 .amount
4585 .to_string()
4586 .parse::<f64>()
4587 .unwrap_or(0.0)
4588 .abs()
4589 .ln_1p()],
4590 is_anomaly: true,
4591 anomaly_type: txn.suspicion_reason.as_ref().map(|r| format!("{r:?}")),
4592 is_aggregate: false,
4593 aggregate_count: 0,
4594 });
4595 }
4596 }
4597
4598 pub fn add_kyc_profiles(&mut self, customers: &[BankingCustomer]) {
4603 for cust in customers {
4604 let cid = cust.customer_id.to_string();
4605 let node_id = format!("kyc_{cid}");
4606 self.try_add_node(HypergraphNode {
4607 id: node_id,
4608 entity_type: "kyc_profile".into(),
4609 entity_type_code: type_codes::KYC_PROFILE,
4610 layer: HypergraphLayer::ProcessEvents,
4611 external_id: format!("KYC-{cid}"),
4612 label: format!("KYC {}", cust.name.legal_name),
4613 properties: {
4614 let mut p = HashMap::new();
4615 p.insert("customer_id".into(), Value::String(cid.clone()));
4616 p.insert("name".into(), Value::String(cust.name.legal_name.clone()));
4617 p.insert(
4618 "customer_type".into(),
4619 Value::String(format!("{:?}", cust.customer_type)),
4620 );
4621 p.insert(
4622 "risk_tier".into(),
4623 Value::String(format!("{:?}", cust.risk_tier)),
4624 );
4625 p.insert(
4626 "residence_country".into(),
4627 Value::String(cust.residence_country.clone()),
4628 );
4629 p.insert("is_pep".into(), Value::Bool(cust.is_pep));
4630 p.insert("is_mule".into(), Value::Bool(cust.is_mule));
4631 p
4632 },
4633 features: vec![
4634 if cust.is_pep { 1.0 } else { 0.0 },
4635 if cust.is_mule { 1.0 } else { 0.0 },
4636 ],
4637 is_anomaly: cust.is_mule,
4638 anomaly_type: if cust.is_mule {
4639 Some("mule_account".into())
4640 } else {
4641 None
4642 },
4643 is_aggregate: false,
4644 aggregate_count: 0,
4645 });
4646 }
4647 }
4648
4649 pub fn tag_process_family(&mut self) {
4654 for node in &mut self.nodes {
4655 let family = match node.entity_type.as_str() {
4656 "purchase_order" | "goods_receipt" | "vendor_invoice" | "payment" | "p2p_pool" => {
4658 "P2P"
4659 }
4660 "sales_order" | "delivery" | "customer_invoice" | "o2c_pool" => "O2C",
4662 "sourcing_project"
4664 | "supplier_qualification"
4665 | "rfx_event"
4666 | "supplier_bid"
4667 | "bid_evaluation"
4668 | "procurement_contract" => "S2C",
4669 "payroll_run" | "time_entry" | "expense_report" | "payroll_line_item" => "H2R",
4671 "production_order" | "quality_inspection" | "cycle_count" => "MFG",
4673 "banking_customer" | "bank_account" | "bank_transaction" | "aml_alert"
4675 | "kyc_profile" => "BANK",
4676 "audit_engagement"
4678 | "workpaper"
4679 | "audit_finding"
4680 | "audit_evidence"
4681 | "risk_assessment"
4682 | "professional_judgment" => "AUDIT",
4683 "bank_reconciliation" | "bank_statement_line" | "reconciling_item" => "R2R",
4685 "tax_jurisdiction"
4687 | "tax_code"
4688 | "tax_line"
4689 | "tax_return"
4690 | "tax_provision"
4691 | "withholding_tax_record" => "TAX",
4692 "cash_position" | "cash_forecast" | "hedge_relationship" | "debt_instrument" => {
4694 "TREASURY"
4695 }
4696 "emission_record"
4698 | "esg_disclosure"
4699 | "supplier_esg_assessment"
4700 | "climate_scenario" => "ESG",
4701 "project" | "earned_value_metric" | "project_milestone" => "PROJECT",
4703 "ic_matched_pair" | "elimination_entry" => "IC",
4705 "process_evolution" | "organizational_event" | "disruption_event" => "TEMPORAL",
4707 "compliance_standard" | "compliance_finding" | "regulatory_filing" => "COMPLIANCE",
4709 "coso_component" | "coso_principle" | "sox_assertion" | "internal_control" => {
4711 "GOVERNANCE"
4712 }
4713 "vendor" | "customer" | "employee" | "material" | "fixed_asset" => "MASTER_DATA",
4715 "account" | "journal_entry" => "ACCOUNTING",
4717 "ocpm_object" => "OCPM",
4719 _ => "OTHER",
4721 };
4722 node.properties
4723 .insert("process_family".into(), Value::String(family.to_string()));
4724 }
4725 }
4726
4727 pub fn build_cross_layer_edges(&mut self) {
4729 if !self.config.include_cross_layer_edges {
4730 return;
4731 }
4732
4733 let links = std::mem::take(&mut self.doc_counterparty_links);
4735 for (doc_node_id, counterparty_type, counterparty_id) in &links {
4736 let source_node_id = match counterparty_type.as_str() {
4737 "vendor" => self.vendor_node_ids.get(counterparty_id),
4738 "customer" => self.customer_node_ids.get(counterparty_id),
4739 _ => None,
4740 };
4741 if let Some(source_id) = source_node_id {
4742 self.edges.push(CrossLayerEdge {
4743 source_id: source_id.clone(),
4744 source_layer: HypergraphLayer::GovernanceControls,
4745 target_id: doc_node_id.clone(),
4746 target_layer: HypergraphLayer::ProcessEvents,
4747 edge_type: "SuppliesTo".to_string(),
4748 edge_type_code: type_codes::SUPPLIES_TO,
4749 properties: HashMap::new(),
4750 });
4751 }
4752 }
4753 self.doc_counterparty_links = links;
4754
4755 let finding_ctrl_links = std::mem::take(&mut self.compliance_finding_control_links);
4757 for (finding_node_id, ctrl_id) in &finding_ctrl_links {
4758 if let Some(ctrl_node_id) = self.control_node_ids.get(ctrl_id) {
4759 self.edges.push(CrossLayerEdge {
4760 source_id: finding_node_id.clone(),
4761 source_layer: HypergraphLayer::ProcessEvents,
4762 target_id: ctrl_node_id.clone(),
4763 target_layer: HypergraphLayer::GovernanceControls,
4764 edge_type: "FindingOnControl".to_string(),
4765 edge_type_code: type_codes::FINDING_ON_CONTROL,
4766 properties: HashMap::new(),
4767 });
4768 }
4769 }
4770 self.compliance_finding_control_links = finding_ctrl_links;
4771
4772 let std_ids: Vec<(String, String)> = self
4774 .standard_node_ids
4775 .iter()
4776 .map(|(k, v)| (k.clone(), v.clone()))
4777 .collect();
4778 for (std_id, std_node_id) in &std_ids {
4779 if let Some(&node_idx) = self.node_index.get(std_node_id) {
4781 if let Some(node) = self.nodes.get(node_idx) {
4782 if let Some(Value::Array(acct_types)) =
4783 node.properties.get("applicableAccountTypes")
4784 {
4785 let type_strings: Vec<String> = acct_types
4786 .iter()
4787 .filter_map(|v| v.as_str().map(|s| s.to_lowercase()))
4788 .collect();
4789
4790 for (acct_code, acct_node_id) in &self.account_node_ids {
4792 if let Some(&acct_idx) = self.node_index.get(acct_node_id) {
4794 if let Some(acct_node) = self.nodes.get(acct_idx) {
4795 let label_lower = acct_node.label.to_lowercase();
4796 let matches = type_strings.iter().any(|t| {
4797 label_lower.contains(t)
4798 || acct_code.to_lowercase().contains(t)
4799 });
4800 if matches {
4801 self.edges.push(CrossLayerEdge {
4802 source_id: std_node_id.clone(),
4803 source_layer: HypergraphLayer::GovernanceControls,
4804 target_id: acct_node_id.clone(),
4805 target_layer: HypergraphLayer::AccountingNetwork,
4806 edge_type: format!("GovernedByStandard:{}", std_id),
4807 edge_type_code: type_codes::STANDARD_TO_ACCOUNT,
4808 properties: HashMap::new(),
4809 });
4810 }
4811 }
4812 }
4813 }
4814 }
4815 }
4816 }
4817 }
4818
4819 for (_std_id, std_node_id) in &std_ids {
4821 if let Some(&node_idx) = self.node_index.get(std_node_id) {
4822 if let Some(node) = self.nodes.get(node_idx) {
4823 if let Some(Value::Array(processes)) =
4824 node.properties.get("applicableProcesses")
4825 {
4826 let proc_strings: Vec<String> = processes
4827 .iter()
4828 .filter_map(|v| v.as_str().map(|s| s.to_string()))
4829 .collect();
4830
4831 let is_universal = proc_strings.len() >= 5;
4833 if is_universal {
4834 for ctrl_node_id in self.control_node_ids.values() {
4836 self.edges.push(CrossLayerEdge {
4837 source_id: std_node_id.clone(),
4838 source_layer: HypergraphLayer::GovernanceControls,
4839 target_id: ctrl_node_id.clone(),
4840 target_layer: HypergraphLayer::GovernanceControls,
4841 edge_type: "StandardToControl".to_string(),
4842 edge_type_code: type_codes::STANDARD_TO_CONTROL,
4843 properties: HashMap::new(),
4844 });
4845 }
4846 }
4847 }
4848 }
4849 }
4850 }
4851 }
4852
4853 pub fn build(mut self) -> Hypergraph {
4855 self.build_cross_layer_edges();
4857
4858 let mut layer_node_counts: HashMap<String, usize> = HashMap::new();
4860 let mut node_type_counts: HashMap<String, usize> = HashMap::new();
4861 let mut anomalous_nodes = 0;
4862
4863 for node in &self.nodes {
4864 *layer_node_counts
4865 .entry(node.layer.name().to_string())
4866 .or_insert(0) += 1;
4867 *node_type_counts
4868 .entry(node.entity_type.clone())
4869 .or_insert(0) += 1;
4870 if node.is_anomaly {
4871 anomalous_nodes += 1;
4872 }
4873 }
4874
4875 let mut edge_type_counts: HashMap<String, usize> = HashMap::new();
4876 for edge in &self.edges {
4877 *edge_type_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
4878 }
4879
4880 let mut hyperedge_type_counts: HashMap<String, usize> = HashMap::new();
4881 let mut anomalous_hyperedges = 0;
4882 for he in &self.hyperedges {
4883 *hyperedge_type_counts
4884 .entry(he.hyperedge_type.clone())
4885 .or_insert(0) += 1;
4886 if he.is_anomaly {
4887 anomalous_hyperedges += 1;
4888 }
4889 }
4890
4891 let budget_report = NodeBudgetReport {
4892 total_budget: self.budget.total_max(),
4893 total_used: self.budget.total_count(),
4894 layer1_budget: self.budget.layer1_max,
4895 layer1_used: self.budget.layer1_count,
4896 layer2_budget: self.budget.layer2_max,
4897 layer2_used: self.budget.layer2_count,
4898 layer3_budget: self.budget.layer3_max,
4899 layer3_used: self.budget.layer3_count,
4900 aggregate_nodes_created: self.aggregate_count,
4901 aggregation_triggered: self.aggregate_count > 0,
4902 };
4903
4904 let metadata = HypergraphMetadata {
4905 name: "multi_layer_hypergraph".to_string(),
4906 num_nodes: self.nodes.len(),
4907 num_edges: self.edges.len(),
4908 num_hyperedges: self.hyperedges.len(),
4909 layer_node_counts,
4910 node_type_counts,
4911 edge_type_counts,
4912 hyperedge_type_counts,
4913 anomalous_nodes,
4914 anomalous_hyperedges,
4915 source: "datasynth".to_string(),
4916 generated_at: chrono::Utc::now().to_rfc3339(),
4917 budget_report: budget_report.clone(),
4918 files: vec![
4919 "nodes.jsonl".to_string(),
4920 "edges.jsonl".to_string(),
4921 "hyperedges.jsonl".to_string(),
4922 "metadata.json".to_string(),
4923 ],
4924 };
4925
4926 Hypergraph {
4927 nodes: self.nodes,
4928 edges: self.edges,
4929 hyperedges: self.hyperedges,
4930 metadata,
4931 budget_report,
4932 }
4933 }
4934
4935 fn try_add_node(&mut self, node: HypergraphNode) -> bool {
4937 if self.node_index.contains_key(&node.id) {
4938 return false; }
4940
4941 if !self.budget.can_add(node.layer) {
4942 return false; }
4944
4945 let id = node.id.clone();
4946 let layer = node.layer;
4947 self.nodes.push(node);
4948 let idx = self.nodes.len() - 1;
4949 self.node_index.insert(id, idx);
4950 self.budget.record_add(layer);
4951 true
4952 }
4953}
4954
4955fn component_to_feature(component: &CosoComponent) -> f64 {
4957 match component {
4958 CosoComponent::ControlEnvironment => 1.0,
4959 CosoComponent::RiskAssessment => 2.0,
4960 CosoComponent::ControlActivities => 3.0,
4961 CosoComponent::InformationCommunication => 4.0,
4962 CosoComponent::MonitoringActivities => 5.0,
4963 }
4964}
4965
4966fn account_type_feature(account_type: &datasynth_core::models::AccountType) -> f64 {
4968 use datasynth_core::models::AccountType;
4969 match account_type {
4970 AccountType::Asset => 1.0,
4971 AccountType::Liability => 2.0,
4972 AccountType::Equity => 3.0,
4973 AccountType::Revenue => 4.0,
4974 AccountType::Expense => 5.0,
4975 AccountType::Statistical => 6.0,
4976 }
4977}
4978
4979fn compute_je_features(entry: &JournalEntry) -> Vec<f64> {
4981 let total_debit: f64 = entry
4982 .lines
4983 .iter()
4984 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
4985 .sum();
4986
4987 let line_count = entry.lines.len() as f64;
4988 let posting_date = entry.header.posting_date;
4989 let weekday = posting_date.weekday().num_days_from_monday() as f64 / WEEKDAY_NORMALIZER;
4990 let day = posting_date.day() as f64 / DAY_OF_MONTH_NORMALIZER;
4991 let month = posting_date.month() as f64 / MONTH_NORMALIZER;
4992 let is_month_end = if posting_date.day() >= MONTH_END_DAY_THRESHOLD {
4993 1.0
4994 } else {
4995 0.0
4996 };
4997
4998 vec![
4999 (total_debit.abs() + 1.0).ln(), line_count, weekday, day, month, is_month_end, ]
5006}
5007
5008#[cfg(test)]
5009#[allow(clippy::unwrap_used)]
5010mod tests {
5011 use super::*;
5012 use datasynth_core::models::{
5013 AccountSubType, AccountType, ChartOfAccounts, CoAComplexity, ControlFrequency, ControlType,
5014 CosoComponent, CosoMaturityLevel, GLAccount, InternalControl, RiskLevel, SoxAssertion,
5015 UserPersona,
5016 };
5017
5018 fn make_test_coa() -> ChartOfAccounts {
5019 let mut coa = ChartOfAccounts::new(
5020 "TEST_COA".to_string(),
5021 "Test Chart".to_string(),
5022 "US".to_string(),
5023 datasynth_core::models::IndustrySector::Manufacturing,
5024 CoAComplexity::Small,
5025 );
5026
5027 coa.add_account(GLAccount::new(
5028 "1000".to_string(),
5029 "Cash".to_string(),
5030 AccountType::Asset,
5031 AccountSubType::Cash,
5032 ));
5033 coa.add_account(GLAccount::new(
5034 "2000".to_string(),
5035 "AP".to_string(),
5036 AccountType::Liability,
5037 AccountSubType::AccountsPayable,
5038 ));
5039
5040 coa
5041 }
5042
5043 fn make_test_control() -> InternalControl {
5044 InternalControl {
5045 control_id: "C001".to_string(),
5046 control_name: "Three-Way Match".to_string(),
5047 control_type: ControlType::Preventive,
5048 objective: "Ensure proper matching".to_string(),
5049 frequency: ControlFrequency::Transactional,
5050 owner_role: UserPersona::Controller,
5051 risk_level: RiskLevel::High,
5052 description: "Test control".to_string(),
5053 is_key_control: true,
5054 sox_assertion: SoxAssertion::Existence,
5055 coso_component: CosoComponent::ControlActivities,
5056 coso_principles: vec![CosoPrinciple::ControlActions],
5057 control_scope: datasynth_core::models::ControlScope::TransactionLevel,
5058 maturity_level: CosoMaturityLevel::Managed,
5059 owner_employee_id: None,
5060 owner_name: "Test Controller".to_string(),
5061 test_count: 0,
5062 last_tested_date: None,
5063 test_result: datasynth_core::models::internal_control::TestResult::default(),
5064 effectiveness: datasynth_core::models::internal_control::ControlEffectiveness::default(
5065 ),
5066 mitigates_risk_ids: Vec::new(),
5067 covers_account_classes: Vec::new(),
5068 }
5069 }
5070
5071 #[test]
5072 fn test_builder_coso_framework() {
5073 let config = HypergraphConfig {
5074 max_nodes: 1000,
5075 ..Default::default()
5076 };
5077 let mut builder = HypergraphBuilder::new(config);
5078 builder.add_coso_framework();
5079
5080 let hg = builder.build();
5081 assert_eq!(hg.nodes.len(), 22);
5083 assert!(hg
5084 .nodes
5085 .iter()
5086 .all(|n| n.layer == HypergraphLayer::GovernanceControls));
5087 assert_eq!(
5089 hg.edges
5090 .iter()
5091 .filter(|e| e.edge_type == "CoversCosoPrinciple")
5092 .count(),
5093 17
5094 );
5095 }
5096
5097 #[test]
5098 fn test_builder_controls() {
5099 let config = HypergraphConfig {
5100 max_nodes: 1000,
5101 ..Default::default()
5102 };
5103 let mut builder = HypergraphBuilder::new(config);
5104 builder.add_coso_framework();
5105 builder.add_controls(&[make_test_control()]);
5106
5107 let hg = builder.build();
5108 assert_eq!(hg.nodes.len(), 24);
5110 assert!(hg.nodes.iter().any(|n| n.entity_type == "internal_control"));
5111 assert!(hg.nodes.iter().any(|n| n.entity_type == "sox_assertion"));
5112 }
5113
5114 #[test]
5115 fn test_builder_accounts() {
5116 let config = HypergraphConfig {
5117 max_nodes: 1000,
5118 ..Default::default()
5119 };
5120 let mut builder = HypergraphBuilder::new(config);
5121 builder.add_accounts(&make_test_coa());
5122
5123 let hg = builder.build();
5124 assert_eq!(hg.nodes.len(), 2);
5125 assert!(hg
5126 .nodes
5127 .iter()
5128 .all(|n| n.layer == HypergraphLayer::AccountingNetwork));
5129 }
5130
5131 #[test]
5132 fn test_budget_enforcement() {
5133 let config = HypergraphConfig {
5134 max_nodes: 10, include_coso: false,
5136 include_controls: false,
5137 include_sox: false,
5138 include_vendors: false,
5139 include_customers: false,
5140 include_employees: false,
5141 include_p2p: false,
5142 include_o2c: false,
5143 ..Default::default()
5144 };
5145 let mut builder = HypergraphBuilder::new(config);
5146 builder.add_accounts(&make_test_coa());
5147
5148 let hg = builder.build();
5149 assert!(hg.nodes.len() <= 1);
5151 }
5152
5153 #[test]
5154 fn test_full_build() {
5155 let config = HypergraphConfig {
5156 max_nodes: 10000,
5157 ..Default::default()
5158 };
5159 let mut builder = HypergraphBuilder::new(config);
5160 builder.add_coso_framework();
5161 builder.add_controls(&[make_test_control()]);
5162 builder.add_accounts(&make_test_coa());
5163
5164 let hg = builder.build();
5165 assert!(!hg.nodes.is_empty());
5166 assert!(!hg.edges.is_empty());
5167 assert_eq!(hg.metadata.num_nodes, hg.nodes.len());
5168 assert_eq!(hg.metadata.num_edges, hg.edges.len());
5169 }
5170}