1use std::collections::HashMap;
12
13use chrono::Datelike;
14use serde_json::Value;
15
16use datasynth_banking::models::{BankAccount, BankTransaction, BankingCustomer};
17use datasynth_core::models::audit::{
18 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
19};
20use datasynth_core::models::sourcing::{
21 BidEvaluation, ProcurementContract, RfxEvent, SourcingProject, SupplierBid,
22 SupplierQualification,
23};
24use datasynth_core::models::ExpenseReport;
25use datasynth_core::models::{
26 BankReconciliation, ChartOfAccounts, CosoComponent, CosoPrinciple, Customer, CycleCount,
27 Employee, InternalControl, JournalEntry, PayrollRun, ProductionOrder, QualityInspection,
28 TimeEntry, Vendor,
29};
30
31use crate::models::hypergraph::{
32 AggregationStrategy, CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph,
33 HypergraphLayer, HypergraphMetadata, HypergraphNode, NodeBudget, NodeBudgetReport,
34};
35
36const MONTH_END_DAY_THRESHOLD: u32 = 28;
38const WEEKDAY_NORMALIZER: f64 = 6.0;
40const DAY_OF_MONTH_NORMALIZER: f64 = 31.0;
42const MONTH_NORMALIZER: f64 = 12.0;
44
45#[allow(dead_code)]
47mod type_codes {
48 pub const ACCOUNT: u32 = 100;
50 pub const JOURNAL_ENTRY: u32 = 101;
51
52 pub const VENDOR: u32 = 200;
54 pub const CUSTOMER: u32 = 201;
55 pub const EMPLOYEE: u32 = 202;
56 pub const BANKING_CUSTOMER: u32 = 203;
57
58 pub const PURCHASE_ORDER: u32 = 300;
60 pub const GOODS_RECEIPT: u32 = 301;
61 pub const VENDOR_INVOICE: u32 = 302;
62 pub const PAYMENT: u32 = 303;
63 pub const SALES_ORDER: u32 = 310;
65 pub const DELIVERY: u32 = 311;
66 pub const CUSTOMER_INVOICE: u32 = 312;
67 pub const SOURCING_PROJECT: u32 = 320;
69 pub const RFX_EVENT: u32 = 321;
70 pub const SUPPLIER_BID: u32 = 322;
71 pub const BID_EVALUATION: u32 = 323;
72 pub const PROCUREMENT_CONTRACT: u32 = 324;
73 pub const SUPPLIER_QUALIFICATION: u32 = 325;
74 pub const PAYROLL_RUN: u32 = 330;
76 pub const TIME_ENTRY: u32 = 331;
77 pub const EXPENSE_REPORT: u32 = 332;
78 pub const PAYROLL_LINE_ITEM: u32 = 333;
79 pub const PRODUCTION_ORDER: u32 = 340;
81 pub const QUALITY_INSPECTION: u32 = 341;
82 pub const CYCLE_COUNT: u32 = 342;
83 pub const BANK_ACCOUNT: u32 = 350;
85 pub const BANK_TRANSACTION: u32 = 351;
86 pub const BANK_STATEMENT_LINE: u32 = 352;
87 pub const AUDIT_ENGAGEMENT: u32 = 360;
89 pub const WORKPAPER: u32 = 361;
90 pub const AUDIT_FINDING: u32 = 362;
91 pub const AUDIT_EVIDENCE: u32 = 363;
92 pub const RISK_ASSESSMENT: u32 = 364;
93 pub const PROFESSIONAL_JUDGMENT: u32 = 365;
94 pub const BANK_RECONCILIATION: u32 = 370;
96 pub const RECONCILING_ITEM: u32 = 372;
97 pub const OCPM_EVENT: u32 = 400;
99 pub const POOL_NODE: u32 = 399;
101
102 pub const COSO_COMPONENT: u32 = 500;
104 pub const COSO_PRINCIPLE: u32 = 501;
105 pub const SOX_ASSERTION: u32 = 502;
106 pub const INTERNAL_CONTROL: u32 = 503;
107 pub const KYC_PROFILE: u32 = 504;
108
109 pub const IMPLEMENTS_CONTROL: u32 = 40;
111 pub const GOVERNED_BY_STANDARD: u32 = 41;
112 pub const OWNS_CONTROL: u32 = 42;
113 pub const OVERSEE_PROCESS: u32 = 43;
114 pub const ENFORCES_ASSERTION: u32 = 44;
115 pub const SUPPLIES_TO: u32 = 48;
116 pub const COVERS_COSO_PRINCIPLE: u32 = 54;
117 pub const CONTAINS_ACCOUNT: u32 = 55;
118}
119
120#[derive(Debug, Clone)]
122pub struct HypergraphConfig {
123 pub max_nodes: usize,
125 pub aggregation_strategy: AggregationStrategy,
127 pub include_coso: bool,
129 pub include_controls: bool,
130 pub include_sox: bool,
131 pub include_vendors: bool,
132 pub include_customers: bool,
133 pub include_employees: bool,
134 pub include_p2p: bool,
136 pub include_o2c: bool,
137 pub include_s2c: bool,
138 pub include_h2r: bool,
139 pub include_mfg: bool,
140 pub include_bank: bool,
141 pub include_audit: bool,
142 pub include_r2r: bool,
143 pub events_as_hyperedges: bool,
144 pub docs_per_counterparty_threshold: usize,
146 pub include_accounts: bool,
148 pub je_as_hyperedges: bool,
149 pub include_cross_layer_edges: bool,
151}
152
153impl Default for HypergraphConfig {
154 fn default() -> Self {
155 Self {
156 max_nodes: 50_000,
157 aggregation_strategy: AggregationStrategy::PoolByCounterparty,
158 include_coso: true,
159 include_controls: true,
160 include_sox: true,
161 include_vendors: true,
162 include_customers: true,
163 include_employees: true,
164 include_p2p: true,
165 include_o2c: true,
166 include_s2c: true,
167 include_h2r: true,
168 include_mfg: true,
169 include_bank: true,
170 include_audit: true,
171 include_r2r: true,
172 events_as_hyperedges: true,
173 docs_per_counterparty_threshold: 20,
174 include_accounts: true,
175 je_as_hyperedges: true,
176 include_cross_layer_edges: true,
177 }
178 }
179}
180
181pub struct HypergraphBuilder {
183 config: HypergraphConfig,
184 budget: NodeBudget,
185 nodes: Vec<HypergraphNode>,
186 edges: Vec<CrossLayerEdge>,
187 hyperedges: Vec<Hyperedge>,
188 node_index: HashMap<String, usize>,
190 aggregate_count: usize,
192 control_node_ids: HashMap<String, String>,
194 coso_component_ids: HashMap<String, String>,
196 account_node_ids: HashMap<String, String>,
198 vendor_node_ids: HashMap<String, String>,
200 customer_node_ids: HashMap<String, String>,
202 employee_node_ids: HashMap<String, String>,
204 doc_counterparty_links: Vec<(String, String, String)>, }
208
209impl HypergraphBuilder {
210 pub fn new(config: HypergraphConfig) -> Self {
212 let budget = NodeBudget::new(config.max_nodes);
213 Self {
214 config,
215 budget,
216 nodes: Vec::new(),
217 edges: Vec::new(),
218 hyperedges: Vec::new(),
219 node_index: HashMap::new(),
220 aggregate_count: 0,
221 control_node_ids: HashMap::new(),
222 coso_component_ids: HashMap::new(),
223 account_node_ids: HashMap::new(),
224 vendor_node_ids: HashMap::new(),
225 customer_node_ids: HashMap::new(),
226 employee_node_ids: HashMap::new(),
227 doc_counterparty_links: Vec::new(),
228 }
229 }
230
231 pub fn add_coso_framework(&mut self) {
233 if !self.config.include_coso {
234 return;
235 }
236
237 let components = [
238 (CosoComponent::ControlEnvironment, "Control Environment"),
239 (CosoComponent::RiskAssessment, "Risk Assessment"),
240 (CosoComponent::ControlActivities, "Control Activities"),
241 (
242 CosoComponent::InformationCommunication,
243 "Information & Communication",
244 ),
245 (CosoComponent::MonitoringActivities, "Monitoring Activities"),
246 ];
247
248 for (component, name) in &components {
249 let id = format!("coso_comp_{}", name.replace(' ', "_").replace('&', "and"));
250 if self.try_add_node(HypergraphNode {
251 id: id.clone(),
252 entity_type: "CosoComponent".to_string(),
253 entity_type_code: type_codes::COSO_COMPONENT,
254 layer: HypergraphLayer::GovernanceControls,
255 external_id: format!("{:?}", component),
256 label: name.to_string(),
257 properties: HashMap::new(),
258 features: vec![component_to_feature(component)],
259 is_anomaly: false,
260 anomaly_type: None,
261 is_aggregate: false,
262 aggregate_count: 0,
263 }) {
264 self.coso_component_ids
265 .insert(format!("{:?}", component), id);
266 }
267 }
268
269 let principles = [
270 (
271 CosoPrinciple::IntegrityAndEthics,
272 "Integrity and Ethics",
273 CosoComponent::ControlEnvironment,
274 ),
275 (
276 CosoPrinciple::BoardOversight,
277 "Board Oversight",
278 CosoComponent::ControlEnvironment,
279 ),
280 (
281 CosoPrinciple::OrganizationalStructure,
282 "Organizational Structure",
283 CosoComponent::ControlEnvironment,
284 ),
285 (
286 CosoPrinciple::CommitmentToCompetence,
287 "Commitment to Competence",
288 CosoComponent::ControlEnvironment,
289 ),
290 (
291 CosoPrinciple::Accountability,
292 "Accountability",
293 CosoComponent::ControlEnvironment,
294 ),
295 (
296 CosoPrinciple::ClearObjectives,
297 "Clear Objectives",
298 CosoComponent::RiskAssessment,
299 ),
300 (
301 CosoPrinciple::IdentifyRisks,
302 "Identify Risks",
303 CosoComponent::RiskAssessment,
304 ),
305 (
306 CosoPrinciple::FraudRisk,
307 "Fraud Risk",
308 CosoComponent::RiskAssessment,
309 ),
310 (
311 CosoPrinciple::ChangeIdentification,
312 "Change Identification",
313 CosoComponent::RiskAssessment,
314 ),
315 (
316 CosoPrinciple::ControlActions,
317 "Control Actions",
318 CosoComponent::ControlActivities,
319 ),
320 (
321 CosoPrinciple::TechnologyControls,
322 "Technology Controls",
323 CosoComponent::ControlActivities,
324 ),
325 (
326 CosoPrinciple::PoliciesAndProcedures,
327 "Policies and Procedures",
328 CosoComponent::ControlActivities,
329 ),
330 (
331 CosoPrinciple::QualityInformation,
332 "Quality Information",
333 CosoComponent::InformationCommunication,
334 ),
335 (
336 CosoPrinciple::InternalCommunication,
337 "Internal Communication",
338 CosoComponent::InformationCommunication,
339 ),
340 (
341 CosoPrinciple::ExternalCommunication,
342 "External Communication",
343 CosoComponent::InformationCommunication,
344 ),
345 (
346 CosoPrinciple::OngoingMonitoring,
347 "Ongoing Monitoring",
348 CosoComponent::MonitoringActivities,
349 ),
350 (
351 CosoPrinciple::DeficiencyEvaluation,
352 "Deficiency Evaluation",
353 CosoComponent::MonitoringActivities,
354 ),
355 ];
356
357 for (principle, name, parent_component) in &principles {
358 let principle_id = format!("coso_prin_{}", name.replace(' ', "_").replace('&', "and"));
359 if self.try_add_node(HypergraphNode {
360 id: principle_id.clone(),
361 entity_type: "CosoPrinciple".to_string(),
362 entity_type_code: type_codes::COSO_PRINCIPLE,
363 layer: HypergraphLayer::GovernanceControls,
364 external_id: format!("{:?}", principle),
365 label: name.to_string(),
366 properties: {
367 let mut p = HashMap::new();
368 p.insert(
369 "principle_number".to_string(),
370 Value::Number(principle.principle_number().into()),
371 );
372 p
373 },
374 features: vec![principle.principle_number() as f64],
375 is_anomaly: false,
376 anomaly_type: None,
377 is_aggregate: false,
378 aggregate_count: 0,
379 }) {
380 let comp_key = format!("{:?}", parent_component);
382 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
383 self.edges.push(CrossLayerEdge {
384 source_id: principle_id,
385 source_layer: HypergraphLayer::GovernanceControls,
386 target_id: comp_id.clone(),
387 target_layer: HypergraphLayer::GovernanceControls,
388 edge_type: "CoversCosoPrinciple".to_string(),
389 edge_type_code: type_codes::COVERS_COSO_PRINCIPLE,
390 properties: HashMap::new(),
391 });
392 }
393 }
394 }
395 }
396
397 pub fn add_controls(&mut self, controls: &[InternalControl]) {
399 if !self.config.include_controls {
400 return;
401 }
402
403 for control in controls {
404 let node_id = format!("ctrl_{}", control.control_id);
405 if self.try_add_node(HypergraphNode {
406 id: node_id.clone(),
407 entity_type: "InternalControl".to_string(),
408 entity_type_code: type_codes::INTERNAL_CONTROL,
409 layer: HypergraphLayer::GovernanceControls,
410 external_id: control.control_id.clone(),
411 label: control.control_name.clone(),
412 properties: {
413 let mut p = HashMap::new();
414 p.insert(
415 "control_type".to_string(),
416 Value::String(format!("{:?}", control.control_type)),
417 );
418 p.insert(
419 "risk_level".to_string(),
420 Value::String(format!("{:?}", control.risk_level)),
421 );
422 p.insert(
423 "is_key_control".to_string(),
424 Value::Bool(control.is_key_control),
425 );
426 p.insert(
427 "maturity_level".to_string(),
428 Value::String(format!("{:?}", control.maturity_level)),
429 );
430 p
431 },
432 features: vec![
433 if control.is_key_control { 1.0 } else { 0.0 },
434 control.maturity_level.level() as f64 / 5.0,
435 ],
436 is_anomaly: false,
437 anomaly_type: None,
438 is_aggregate: false,
439 aggregate_count: 0,
440 }) {
441 self.control_node_ids
442 .insert(control.control_id.clone(), node_id.clone());
443
444 let comp_key = format!("{:?}", control.coso_component);
446 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
447 self.edges.push(CrossLayerEdge {
448 source_id: node_id.clone(),
449 source_layer: HypergraphLayer::GovernanceControls,
450 target_id: comp_id.clone(),
451 target_layer: HypergraphLayer::GovernanceControls,
452 edge_type: "ImplementsControl".to_string(),
453 edge_type_code: type_codes::IMPLEMENTS_CONTROL,
454 properties: HashMap::new(),
455 });
456 }
457
458 if self.config.include_sox {
460 let assertion_id = format!("sox_{:?}", control.sox_assertion).to_lowercase();
461 if !self.node_index.contains_key(&assertion_id) {
463 self.try_add_node(HypergraphNode {
464 id: assertion_id.clone(),
465 entity_type: "SoxAssertion".to_string(),
466 entity_type_code: type_codes::SOX_ASSERTION,
467 layer: HypergraphLayer::GovernanceControls,
468 external_id: format!("{:?}", control.sox_assertion),
469 label: format!("{:?}", control.sox_assertion),
470 properties: HashMap::new(),
471 features: vec![],
472 is_anomaly: false,
473 anomaly_type: None,
474 is_aggregate: false,
475 aggregate_count: 0,
476 });
477 }
478 self.edges.push(CrossLayerEdge {
479 source_id: node_id,
480 source_layer: HypergraphLayer::GovernanceControls,
481 target_id: assertion_id,
482 target_layer: HypergraphLayer::GovernanceControls,
483 edge_type: "EnforcesAssertion".to_string(),
484 edge_type_code: type_codes::ENFORCES_ASSERTION,
485 properties: HashMap::new(),
486 });
487 }
488 }
489 }
490 }
491
492 pub fn add_vendors(&mut self, vendors: &[Vendor]) {
494 if !self.config.include_vendors {
495 return;
496 }
497
498 for vendor in vendors {
499 let node_id = format!("vnd_{}", vendor.vendor_id);
500 if self.try_add_node(HypergraphNode {
501 id: node_id.clone(),
502 entity_type: "Vendor".to_string(),
503 entity_type_code: type_codes::VENDOR,
504 layer: HypergraphLayer::GovernanceControls,
505 external_id: vendor.vendor_id.clone(),
506 label: vendor.name.clone(),
507 properties: {
508 let mut p = HashMap::new();
509 p.insert(
510 "vendor_type".to_string(),
511 Value::String(format!("{:?}", vendor.vendor_type)),
512 );
513 p.insert("country".to_string(), Value::String(vendor.country.clone()));
514 p.insert("is_active".to_string(), Value::Bool(vendor.is_active));
515 p
516 },
517 features: vec![if vendor.is_active { 1.0 } else { 0.0 }],
518 is_anomaly: false,
519 anomaly_type: None,
520 is_aggregate: false,
521 aggregate_count: 0,
522 }) {
523 self.vendor_node_ids
524 .insert(vendor.vendor_id.clone(), node_id);
525 }
526 }
527 }
528
529 pub fn add_customers(&mut self, customers: &[Customer]) {
531 if !self.config.include_customers {
532 return;
533 }
534
535 for customer in customers {
536 let node_id = format!("cust_{}", customer.customer_id);
537 if self.try_add_node(HypergraphNode {
538 id: node_id.clone(),
539 entity_type: "Customer".to_string(),
540 entity_type_code: type_codes::CUSTOMER,
541 layer: HypergraphLayer::GovernanceControls,
542 external_id: customer.customer_id.clone(),
543 label: customer.name.clone(),
544 properties: {
545 let mut p = HashMap::new();
546 p.insert(
547 "customer_type".to_string(),
548 Value::String(format!("{:?}", customer.customer_type)),
549 );
550 p.insert(
551 "country".to_string(),
552 Value::String(customer.country.clone()),
553 );
554 p.insert(
555 "credit_rating".to_string(),
556 Value::String(format!("{:?}", customer.credit_rating)),
557 );
558 p
559 },
560 features: vec![if customer.is_active { 1.0 } else { 0.0 }],
561 is_anomaly: false,
562 anomaly_type: None,
563 is_aggregate: false,
564 aggregate_count: 0,
565 }) {
566 self.customer_node_ids
567 .insert(customer.customer_id.clone(), node_id);
568 }
569 }
570 }
571
572 pub fn add_employees(&mut self, employees: &[Employee]) {
574 if !self.config.include_employees {
575 return;
576 }
577
578 for employee in employees {
579 let node_id = format!("emp_{}", employee.employee_id);
580 if self.try_add_node(HypergraphNode {
581 id: node_id.clone(),
582 entity_type: "Employee".to_string(),
583 entity_type_code: type_codes::EMPLOYEE,
584 layer: HypergraphLayer::GovernanceControls,
585 external_id: employee.employee_id.clone(),
586 label: employee.display_name.clone(),
587 properties: {
588 let mut p = HashMap::new();
589 p.insert(
590 "persona".to_string(),
591 Value::String(employee.persona.to_string()),
592 );
593 p.insert(
594 "job_level".to_string(),
595 Value::String(format!("{:?}", employee.job_level)),
596 );
597 p.insert(
598 "company_code".to_string(),
599 Value::String(employee.company_code.clone()),
600 );
601 p
602 },
603 features: vec![employee
604 .approval_limit
605 .to_string()
606 .parse::<f64>()
607 .unwrap_or(0.0)
608 .ln_1p()],
609 is_anomaly: false,
610 anomaly_type: None,
611 is_aggregate: false,
612 aggregate_count: 0,
613 }) {
614 self.employee_node_ids
615 .insert(employee.employee_id.clone(), node_id);
616 }
617 }
618 }
619
620 pub fn add_accounts(&mut self, coa: &ChartOfAccounts) {
622 if !self.config.include_accounts {
623 return;
624 }
625
626 for account in &coa.accounts {
627 let node_id = format!("acct_{}", account.account_number);
628 if self.try_add_node(HypergraphNode {
629 id: node_id.clone(),
630 entity_type: "Account".to_string(),
631 entity_type_code: type_codes::ACCOUNT,
632 layer: HypergraphLayer::AccountingNetwork,
633 external_id: account.account_number.clone(),
634 label: account.short_description.clone(),
635 properties: {
636 let mut p = HashMap::new();
637 p.insert(
638 "account_type".to_string(),
639 Value::String(format!("{:?}", account.account_type)),
640 );
641 p.insert(
642 "is_control_account".to_string(),
643 Value::Bool(account.is_control_account),
644 );
645 p.insert("is_postable".to_string(), Value::Bool(account.is_postable));
646 p
647 },
648 features: vec![
649 account_type_feature(&account.account_type),
650 if account.is_control_account { 1.0 } else { 0.0 },
651 if account.normal_debit_balance {
652 1.0
653 } else {
654 0.0
655 },
656 ],
657 is_anomaly: false,
658 anomaly_type: None,
659 is_aggregate: false,
660 aggregate_count: 0,
661 }) {
662 self.account_node_ids
663 .insert(account.account_number.clone(), node_id);
664 }
665 }
666 }
667
668 pub fn add_journal_entries_as_hyperedges(&mut self, entries: &[JournalEntry]) {
672 if !self.config.je_as_hyperedges {
673 return;
674 }
675
676 for entry in entries {
677 let mut participants = Vec::new();
678
679 for line in &entry.lines {
680 let account_id = format!("acct_{}", line.gl_account);
681
682 if !self.node_index.contains_key(&account_id) {
684 self.try_add_node(HypergraphNode {
685 id: account_id.clone(),
686 entity_type: "Account".to_string(),
687 entity_type_code: type_codes::ACCOUNT,
688 layer: HypergraphLayer::AccountingNetwork,
689 external_id: line.gl_account.clone(),
690 label: line
691 .account_description
692 .clone()
693 .unwrap_or_else(|| line.gl_account.clone()),
694 properties: HashMap::new(),
695 features: vec![],
696 is_anomaly: false,
697 anomaly_type: None,
698 is_aggregate: false,
699 aggregate_count: 0,
700 });
701 self.account_node_ids
702 .insert(line.gl_account.clone(), account_id.clone());
703 }
704
705 let amount: f64 = if !line.debit_amount.is_zero() {
706 line.debit_amount.to_string().parse().unwrap_or(0.0)
707 } else {
708 line.credit_amount.to_string().parse().unwrap_or(0.0)
709 };
710
711 let role = if !line.debit_amount.is_zero() {
712 "debit"
713 } else {
714 "credit"
715 };
716
717 participants.push(HyperedgeParticipant {
718 node_id: account_id,
719 role: role.to_string(),
720 weight: Some(amount),
721 });
722 }
723
724 if participants.is_empty() {
725 continue;
726 }
727
728 let doc_id = entry.header.document_id.to_string();
729 let subtype = entry
730 .header
731 .business_process
732 .as_ref()
733 .map(|bp| format!("{:?}", bp))
734 .unwrap_or_else(|| "General".to_string());
735
736 self.hyperedges.push(Hyperedge {
737 id: format!("je_{}", doc_id),
738 hyperedge_type: "JournalEntry".to_string(),
739 subtype,
740 participants,
741 layer: HypergraphLayer::AccountingNetwork,
742 properties: {
743 let mut p = HashMap::new();
744 p.insert("document_id".to_string(), Value::String(doc_id));
745 p.insert(
746 "company_code".to_string(),
747 Value::String(entry.header.company_code.clone()),
748 );
749 p.insert(
750 "document_type".to_string(),
751 Value::String(entry.header.document_type.clone()),
752 );
753 p.insert(
754 "created_by".to_string(),
755 Value::String(entry.header.created_by.clone()),
756 );
757 p
758 },
759 timestamp: Some(entry.header.posting_date),
760 is_anomaly: entry.header.is_anomaly || entry.header.is_fraud,
761 anomaly_type: entry.header.anomaly_type.clone().or_else(|| {
762 entry
763 .header
764 .fraud_type
765 .as_ref()
766 .map(|ft| format!("{:?}", ft))
767 }),
768 features: compute_je_features(entry),
769 });
770 }
771 }
772
773 pub fn add_journal_entry_nodes(&mut self, entries: &[JournalEntry]) {
779 for entry in entries {
780 let node_id = format!("je_{}", entry.header.document_id);
781 let total_amount: f64 = entry
782 .lines
783 .iter()
784 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
785 .sum();
786
787 let is_anomaly = entry.header.is_anomaly || entry.header.is_fraud;
788 let anomaly_type = entry.header.anomaly_type.clone().or_else(|| {
789 entry
790 .header
791 .fraud_type
792 .as_ref()
793 .map(|ft| format!("{:?}", ft))
794 });
795
796 self.try_add_node(HypergraphNode {
797 id: node_id,
798 entity_type: "JournalEntry".to_string(),
799 entity_type_code: type_codes::JOURNAL_ENTRY,
800 layer: HypergraphLayer::AccountingNetwork,
801 external_id: entry.header.document_id.to_string(),
802 label: format!("JE-{}", entry.header.document_id),
803 properties: {
804 let mut p = HashMap::new();
805 p.insert(
806 "amount".into(),
807 Value::Number(
808 serde_json::Number::from_f64(total_amount)
809 .unwrap_or_else(|| serde_json::Number::from(0)),
810 ),
811 );
812 p.insert(
813 "date".into(),
814 Value::String(entry.header.posting_date.to_string()),
815 );
816 p.insert(
817 "company_code".into(),
818 Value::String(entry.header.company_code.clone()),
819 );
820 p.insert(
821 "line_count".into(),
822 Value::Number((entry.lines.len() as u64).into()),
823 );
824 p.insert("is_anomaly".into(), Value::Bool(is_anomaly));
825 if let Some(ref at) = anomaly_type {
826 p.insert("anomaly_type".into(), Value::String(at.clone()));
827 }
828 p
829 },
830 features: vec![total_amount / 100_000.0],
831 is_anomaly,
832 anomaly_type,
833 is_aggregate: false,
834 aggregate_count: 0,
835 });
836 }
837 }
838
839 pub fn add_p2p_documents(
843 &mut self,
844 purchase_orders: &[datasynth_core::models::documents::PurchaseOrder],
845 goods_receipts: &[datasynth_core::models::documents::GoodsReceipt],
846 vendor_invoices: &[datasynth_core::models::documents::VendorInvoice],
847 payments: &[datasynth_core::models::documents::Payment],
848 ) {
849 if !self.config.include_p2p {
850 return;
851 }
852
853 let mut vendor_doc_counts: HashMap<String, usize> = HashMap::new();
855 for po in purchase_orders {
856 *vendor_doc_counts.entry(po.vendor_id.clone()).or_insert(0) += 1;
857 }
858
859 let threshold = self.config.docs_per_counterparty_threshold;
860 let should_aggregate = matches!(
861 self.config.aggregation_strategy,
862 AggregationStrategy::PoolByCounterparty
863 );
864
865 let vendors_needing_pools: Vec<String> = if should_aggregate {
867 vendor_doc_counts
868 .iter()
869 .filter(|(_, count)| **count > threshold)
870 .map(|(vid, _)| vid.clone())
871 .collect()
872 } else {
873 Vec::new()
874 };
875
876 for vendor_id in &vendors_needing_pools {
878 let count = vendor_doc_counts[vendor_id];
879 let pool_id = format!("pool_p2p_{}", vendor_id);
880 if self.try_add_node(HypergraphNode {
881 id: pool_id.clone(),
882 entity_type: "P2PPool".to_string(),
883 entity_type_code: type_codes::POOL_NODE,
884 layer: HypergraphLayer::ProcessEvents,
885 external_id: format!("pool_p2p_{}", vendor_id),
886 label: format!("P2P Pool ({}): {} docs", vendor_id, count),
887 properties: {
888 let mut p = HashMap::new();
889 p.insert("vendor_id".to_string(), Value::String(vendor_id.clone()));
890 p.insert("document_count".to_string(), Value::Number(count.into()));
891 p
892 },
893 features: vec![count as f64],
894 is_anomaly: false,
895 anomaly_type: None,
896 is_aggregate: true,
897 aggregate_count: count,
898 }) {
899 self.doc_counterparty_links.push((
900 pool_id,
901 "vendor".to_string(),
902 vendor_id.clone(),
903 ));
904 }
905 self.aggregate_count += 1;
906 }
907
908 for po in purchase_orders {
910 if should_aggregate && vendors_needing_pools.contains(&po.vendor_id) {
911 continue; }
913
914 let doc_id = &po.header.document_id;
915 let node_id = format!("po_{}", doc_id);
916 if self.try_add_node(HypergraphNode {
917 id: node_id.clone(),
918 entity_type: "PurchaseOrder".to_string(),
919 entity_type_code: type_codes::PURCHASE_ORDER,
920 layer: HypergraphLayer::ProcessEvents,
921 external_id: doc_id.clone(),
922 label: format!("PO {}", doc_id),
923 properties: {
924 let mut p = HashMap::new();
925 p.insert("vendor_id".to_string(), Value::String(po.vendor_id.clone()));
926 p.insert(
927 "company_code".to_string(),
928 Value::String(po.header.company_code.clone()),
929 );
930 p
931 },
932 features: vec![po
933 .total_net_amount
934 .to_string()
935 .parse::<f64>()
936 .unwrap_or(0.0)
937 .ln_1p()],
938 is_anomaly: false,
939 anomaly_type: None,
940 is_aggregate: false,
941 aggregate_count: 0,
942 }) {
943 self.doc_counterparty_links.push((
944 node_id,
945 "vendor".to_string(),
946 po.vendor_id.clone(),
947 ));
948 }
949 }
950
951 for gr in goods_receipts {
953 let vendor_id = gr.vendor_id.as_deref().unwrap_or("UNKNOWN");
954 if should_aggregate && vendors_needing_pools.contains(&vendor_id.to_string()) {
955 continue;
956 }
957 let doc_id = &gr.header.document_id;
958 let node_id = format!("gr_{}", doc_id);
959 self.try_add_node(HypergraphNode {
960 id: node_id,
961 entity_type: "GoodsReceipt".to_string(),
962 entity_type_code: type_codes::GOODS_RECEIPT,
963 layer: HypergraphLayer::ProcessEvents,
964 external_id: doc_id.clone(),
965 label: format!("GR {}", doc_id),
966 properties: {
967 let mut p = HashMap::new();
968 p.insert(
969 "vendor_id".to_string(),
970 Value::String(vendor_id.to_string()),
971 );
972 p
973 },
974 features: vec![gr
975 .total_value
976 .to_string()
977 .parse::<f64>()
978 .unwrap_or(0.0)
979 .ln_1p()],
980 is_anomaly: false,
981 anomaly_type: None,
982 is_aggregate: false,
983 aggregate_count: 0,
984 });
985 }
986
987 for inv in vendor_invoices {
989 if should_aggregate && vendors_needing_pools.contains(&inv.vendor_id) {
990 continue;
991 }
992 let doc_id = &inv.header.document_id;
993 let node_id = format!("vinv_{}", doc_id);
994 self.try_add_node(HypergraphNode {
995 id: node_id,
996 entity_type: "VendorInvoice".to_string(),
997 entity_type_code: type_codes::VENDOR_INVOICE,
998 layer: HypergraphLayer::ProcessEvents,
999 external_id: doc_id.clone(),
1000 label: format!("VI {}", doc_id),
1001 properties: {
1002 let mut p = HashMap::new();
1003 p.insert(
1004 "vendor_id".to_string(),
1005 Value::String(inv.vendor_id.clone()),
1006 );
1007 p
1008 },
1009 features: vec![inv
1010 .payable_amount
1011 .to_string()
1012 .parse::<f64>()
1013 .unwrap_or(0.0)
1014 .ln_1p()],
1015 is_anomaly: false,
1016 anomaly_type: None,
1017 is_aggregate: false,
1018 aggregate_count: 0,
1019 });
1020 }
1021
1022 for pmt in payments {
1024 let doc_id = &pmt.header.document_id;
1025 let node_id = format!("pmt_{}", doc_id);
1026 self.try_add_node(HypergraphNode {
1027 id: node_id,
1028 entity_type: "Payment".to_string(),
1029 entity_type_code: type_codes::PAYMENT,
1030 layer: HypergraphLayer::ProcessEvents,
1031 external_id: doc_id.clone(),
1032 label: format!("PMT {}", doc_id),
1033 properties: HashMap::new(),
1034 features: vec![pmt.amount.to_string().parse::<f64>().unwrap_or(0.0).ln_1p()],
1035 is_anomaly: false,
1036 anomaly_type: None,
1037 is_aggregate: false,
1038 aggregate_count: 0,
1039 });
1040 }
1041 }
1042
1043 pub fn add_o2c_documents(
1045 &mut self,
1046 sales_orders: &[datasynth_core::models::documents::SalesOrder],
1047 deliveries: &[datasynth_core::models::documents::Delivery],
1048 customer_invoices: &[datasynth_core::models::documents::CustomerInvoice],
1049 ) {
1050 if !self.config.include_o2c {
1051 return;
1052 }
1053
1054 let mut customer_doc_counts: HashMap<String, usize> = HashMap::new();
1056 for so in sales_orders {
1057 *customer_doc_counts
1058 .entry(so.customer_id.clone())
1059 .or_insert(0) += 1;
1060 }
1061
1062 let threshold = self.config.docs_per_counterparty_threshold;
1063 let should_aggregate = matches!(
1064 self.config.aggregation_strategy,
1065 AggregationStrategy::PoolByCounterparty
1066 );
1067
1068 let customers_needing_pools: Vec<String> = if should_aggregate {
1069 customer_doc_counts
1070 .iter()
1071 .filter(|(_, count)| **count > threshold)
1072 .map(|(cid, _)| cid.clone())
1073 .collect()
1074 } else {
1075 Vec::new()
1076 };
1077
1078 for customer_id in &customers_needing_pools {
1080 let count = customer_doc_counts[customer_id];
1081 let pool_id = format!("pool_o2c_{}", customer_id);
1082 if self.try_add_node(HypergraphNode {
1083 id: pool_id.clone(),
1084 entity_type: "O2CPool".to_string(),
1085 entity_type_code: type_codes::POOL_NODE,
1086 layer: HypergraphLayer::ProcessEvents,
1087 external_id: format!("pool_o2c_{}", customer_id),
1088 label: format!("O2C Pool ({}): {} docs", customer_id, count),
1089 properties: {
1090 let mut p = HashMap::new();
1091 p.insert(
1092 "customer_id".to_string(),
1093 Value::String(customer_id.clone()),
1094 );
1095 p.insert("document_count".to_string(), Value::Number(count.into()));
1096 p
1097 },
1098 features: vec![count as f64],
1099 is_anomaly: false,
1100 anomaly_type: None,
1101 is_aggregate: true,
1102 aggregate_count: count,
1103 }) {
1104 self.doc_counterparty_links.push((
1105 pool_id,
1106 "customer".to_string(),
1107 customer_id.clone(),
1108 ));
1109 }
1110 self.aggregate_count += 1;
1111 }
1112
1113 for so in sales_orders {
1114 if should_aggregate && customers_needing_pools.contains(&so.customer_id) {
1115 continue;
1116 }
1117 let doc_id = &so.header.document_id;
1118 let node_id = format!("so_{}", doc_id);
1119 if self.try_add_node(HypergraphNode {
1120 id: node_id.clone(),
1121 entity_type: "SalesOrder".to_string(),
1122 entity_type_code: type_codes::SALES_ORDER,
1123 layer: HypergraphLayer::ProcessEvents,
1124 external_id: doc_id.clone(),
1125 label: format!("SO {}", doc_id),
1126 properties: {
1127 let mut p = HashMap::new();
1128 p.insert(
1129 "customer_id".to_string(),
1130 Value::String(so.customer_id.clone()),
1131 );
1132 p
1133 },
1134 features: vec![so
1135 .total_net_amount
1136 .to_string()
1137 .parse::<f64>()
1138 .unwrap_or(0.0)
1139 .ln_1p()],
1140 is_anomaly: false,
1141 anomaly_type: None,
1142 is_aggregate: false,
1143 aggregate_count: 0,
1144 }) {
1145 self.doc_counterparty_links.push((
1146 node_id,
1147 "customer".to_string(),
1148 so.customer_id.clone(),
1149 ));
1150 }
1151 }
1152
1153 for del in deliveries {
1154 if should_aggregate && customers_needing_pools.contains(&del.customer_id) {
1155 continue;
1156 }
1157 let doc_id = &del.header.document_id;
1158 let node_id = format!("del_{}", doc_id);
1159 self.try_add_node(HypergraphNode {
1160 id: node_id,
1161 entity_type: "Delivery".to_string(),
1162 entity_type_code: type_codes::DELIVERY,
1163 layer: HypergraphLayer::ProcessEvents,
1164 external_id: doc_id.clone(),
1165 label: format!("DEL {}", doc_id),
1166 properties: HashMap::new(),
1167 features: vec![],
1168 is_anomaly: false,
1169 anomaly_type: None,
1170 is_aggregate: false,
1171 aggregate_count: 0,
1172 });
1173 }
1174
1175 for inv in customer_invoices {
1176 if should_aggregate && customers_needing_pools.contains(&inv.customer_id) {
1177 continue;
1178 }
1179 let doc_id = &inv.header.document_id;
1180 let node_id = format!("cinv_{}", doc_id);
1181 self.try_add_node(HypergraphNode {
1182 id: node_id,
1183 entity_type: "CustomerInvoice".to_string(),
1184 entity_type_code: type_codes::CUSTOMER_INVOICE,
1185 layer: HypergraphLayer::ProcessEvents,
1186 external_id: doc_id.clone(),
1187 label: format!("CI {}", doc_id),
1188 properties: HashMap::new(),
1189 features: vec![inv
1190 .total_gross_amount
1191 .to_string()
1192 .parse::<f64>()
1193 .unwrap_or(0.0)
1194 .ln_1p()],
1195 is_anomaly: false,
1196 anomaly_type: None,
1197 is_aggregate: false,
1198 aggregate_count: 0,
1199 });
1200 }
1201 }
1202
1203 pub fn add_s2c_documents(
1205 &mut self,
1206 projects: &[SourcingProject],
1207 qualifications: &[SupplierQualification],
1208 rfx_events: &[RfxEvent],
1209 bids: &[SupplierBid],
1210 evaluations: &[BidEvaluation],
1211 contracts: &[ProcurementContract],
1212 ) {
1213 if !self.config.include_s2c {
1214 return;
1215 }
1216 for p in projects {
1217 let node_id = format!("s2c_proj_{}", p.project_id);
1218 self.try_add_node(HypergraphNode {
1219 id: node_id,
1220 entity_type: "SourcingProject".into(),
1221 entity_type_code: type_codes::SOURCING_PROJECT,
1222 layer: HypergraphLayer::ProcessEvents,
1223 external_id: p.project_id.clone(),
1224 label: format!("SPRJ {}", p.project_id),
1225 properties: HashMap::new(),
1226 features: vec![p
1227 .estimated_annual_spend
1228 .to_string()
1229 .parse::<f64>()
1230 .unwrap_or(0.0)
1231 .ln_1p()],
1232 is_anomaly: false,
1233 anomaly_type: None,
1234 is_aggregate: false,
1235 aggregate_count: 0,
1236 });
1237 }
1238 for q in qualifications {
1239 let node_id = format!("s2c_qual_{}", q.qualification_id);
1240 self.try_add_node(HypergraphNode {
1241 id: node_id,
1242 entity_type: "SupplierQualification".into(),
1243 entity_type_code: type_codes::SUPPLIER_QUALIFICATION,
1244 layer: HypergraphLayer::ProcessEvents,
1245 external_id: q.qualification_id.clone(),
1246 label: format!("SQUAL {}", q.qualification_id),
1247 properties: HashMap::new(),
1248 features: vec![],
1249 is_anomaly: false,
1250 anomaly_type: None,
1251 is_aggregate: false,
1252 aggregate_count: 0,
1253 });
1254 }
1255 for r in rfx_events {
1256 let node_id = format!("s2c_rfx_{}", r.rfx_id);
1257 self.try_add_node(HypergraphNode {
1258 id: node_id,
1259 entity_type: "RfxEvent".into(),
1260 entity_type_code: type_codes::RFX_EVENT,
1261 layer: HypergraphLayer::ProcessEvents,
1262 external_id: r.rfx_id.clone(),
1263 label: format!("RFX {}", r.rfx_id),
1264 properties: HashMap::new(),
1265 features: vec![],
1266 is_anomaly: false,
1267 anomaly_type: None,
1268 is_aggregate: false,
1269 aggregate_count: 0,
1270 });
1271 }
1272 for b in bids {
1273 let node_id = format!("s2c_bid_{}", b.bid_id);
1274 self.try_add_node(HypergraphNode {
1275 id: node_id,
1276 entity_type: "SupplierBid".into(),
1277 entity_type_code: type_codes::SUPPLIER_BID,
1278 layer: HypergraphLayer::ProcessEvents,
1279 external_id: b.bid_id.clone(),
1280 label: format!("BID {}", b.bid_id),
1281 properties: HashMap::new(),
1282 features: vec![b
1283 .total_amount
1284 .to_string()
1285 .parse::<f64>()
1286 .unwrap_or(0.0)
1287 .ln_1p()],
1288 is_anomaly: false,
1289 anomaly_type: None,
1290 is_aggregate: false,
1291 aggregate_count: 0,
1292 });
1293 }
1294 for e in evaluations {
1295 let node_id = format!("s2c_eval_{}", e.evaluation_id);
1296 self.try_add_node(HypergraphNode {
1297 id: node_id,
1298 entity_type: "BidEvaluation".into(),
1299 entity_type_code: type_codes::BID_EVALUATION,
1300 layer: HypergraphLayer::ProcessEvents,
1301 external_id: e.evaluation_id.clone(),
1302 label: format!("BEVAL {}", e.evaluation_id),
1303 properties: HashMap::new(),
1304 features: vec![],
1305 is_anomaly: false,
1306 anomaly_type: None,
1307 is_aggregate: false,
1308 aggregate_count: 0,
1309 });
1310 }
1311 for c in contracts {
1312 let node_id = format!("s2c_ctr_{}", c.contract_id);
1313 self.try_add_node(HypergraphNode {
1314 id: node_id,
1315 entity_type: "ProcurementContract".into(),
1316 entity_type_code: type_codes::PROCUREMENT_CONTRACT,
1317 layer: HypergraphLayer::ProcessEvents,
1318 external_id: c.contract_id.clone(),
1319 label: format!("CTR {}", c.contract_id),
1320 properties: HashMap::new(),
1321 features: vec![c
1322 .total_value
1323 .to_string()
1324 .parse::<f64>()
1325 .unwrap_or(0.0)
1326 .ln_1p()],
1327 is_anomaly: false,
1328 anomaly_type: None,
1329 is_aggregate: false,
1330 aggregate_count: 0,
1331 });
1332 self.doc_counterparty_links.push((
1334 format!("s2c_ctr_{}", c.contract_id),
1335 "vendor".into(),
1336 c.vendor_id.clone(),
1337 ));
1338 }
1339 }
1340
1341 pub fn add_h2r_documents(
1343 &mut self,
1344 payroll_runs: &[PayrollRun],
1345 time_entries: &[TimeEntry],
1346 expense_reports: &[ExpenseReport],
1347 ) {
1348 if !self.config.include_h2r {
1349 return;
1350 }
1351 for pr in payroll_runs {
1352 let node_id = format!("h2r_pay_{}", pr.payroll_id);
1353 self.try_add_node(HypergraphNode {
1354 id: node_id,
1355 entity_type: "PayrollRun".into(),
1356 entity_type_code: type_codes::PAYROLL_RUN,
1357 layer: HypergraphLayer::ProcessEvents,
1358 external_id: pr.payroll_id.clone(),
1359 label: format!("PAY {}", pr.payroll_id),
1360 properties: HashMap::new(),
1361 features: vec![pr
1362 .total_gross
1363 .to_string()
1364 .parse::<f64>()
1365 .unwrap_or(0.0)
1366 .ln_1p()],
1367 is_anomaly: false,
1368 anomaly_type: None,
1369 is_aggregate: false,
1370 aggregate_count: 0,
1371 });
1372 }
1373 for te in time_entries {
1374 let node_id = format!("h2r_time_{}", te.entry_id);
1375 self.try_add_node(HypergraphNode {
1376 id: node_id,
1377 entity_type: "TimeEntry".into(),
1378 entity_type_code: type_codes::TIME_ENTRY,
1379 layer: HypergraphLayer::ProcessEvents,
1380 external_id: te.entry_id.clone(),
1381 label: format!("TIME {}", te.entry_id),
1382 properties: HashMap::new(),
1383 features: vec![te.hours_regular + te.hours_overtime],
1384 is_anomaly: false,
1385 anomaly_type: None,
1386 is_aggregate: false,
1387 aggregate_count: 0,
1388 });
1389 }
1390 for er in expense_reports {
1391 let node_id = format!("h2r_exp_{}", er.report_id);
1392 self.try_add_node(HypergraphNode {
1393 id: node_id,
1394 entity_type: "ExpenseReport".into(),
1395 entity_type_code: type_codes::EXPENSE_REPORT,
1396 layer: HypergraphLayer::ProcessEvents,
1397 external_id: er.report_id.clone(),
1398 label: format!("EXP {}", er.report_id),
1399 properties: HashMap::new(),
1400 features: vec![er
1401 .total_amount
1402 .to_string()
1403 .parse::<f64>()
1404 .unwrap_or(0.0)
1405 .ln_1p()],
1406 is_anomaly: false,
1407 anomaly_type: None,
1408 is_aggregate: false,
1409 aggregate_count: 0,
1410 });
1411 }
1412 }
1413
1414 pub fn add_mfg_documents(
1416 &mut self,
1417 production_orders: &[ProductionOrder],
1418 quality_inspections: &[QualityInspection],
1419 cycle_counts: &[CycleCount],
1420 ) {
1421 if !self.config.include_mfg {
1422 return;
1423 }
1424 for po in production_orders {
1425 let node_id = format!("mfg_po_{}", po.order_id);
1426 self.try_add_node(HypergraphNode {
1427 id: node_id,
1428 entity_type: "ProductionOrder".into(),
1429 entity_type_code: type_codes::PRODUCTION_ORDER,
1430 layer: HypergraphLayer::ProcessEvents,
1431 external_id: po.order_id.clone(),
1432 label: format!("PROD {}", po.order_id),
1433 properties: HashMap::new(),
1434 features: vec![po
1435 .planned_quantity
1436 .to_string()
1437 .parse::<f64>()
1438 .unwrap_or(0.0)
1439 .ln_1p()],
1440 is_anomaly: false,
1441 anomaly_type: None,
1442 is_aggregate: false,
1443 aggregate_count: 0,
1444 });
1445 }
1446 for qi in quality_inspections {
1447 let node_id = format!("mfg_qi_{}", qi.inspection_id);
1448 self.try_add_node(HypergraphNode {
1449 id: node_id,
1450 entity_type: "QualityInspection".into(),
1451 entity_type_code: type_codes::QUALITY_INSPECTION,
1452 layer: HypergraphLayer::ProcessEvents,
1453 external_id: qi.inspection_id.clone(),
1454 label: format!("QI {}", qi.inspection_id),
1455 properties: HashMap::new(),
1456 features: vec![qi.defect_rate],
1457 is_anomaly: false,
1458 anomaly_type: None,
1459 is_aggregate: false,
1460 aggregate_count: 0,
1461 });
1462 }
1463 for cc in cycle_counts {
1464 let node_id = format!("mfg_cc_{}", cc.count_id);
1465 self.try_add_node(HypergraphNode {
1466 id: node_id,
1467 entity_type: "CycleCount".into(),
1468 entity_type_code: type_codes::CYCLE_COUNT,
1469 layer: HypergraphLayer::ProcessEvents,
1470 external_id: cc.count_id.clone(),
1471 label: format!("CC {}", cc.count_id),
1472 properties: HashMap::new(),
1473 features: vec![cc.variance_rate],
1474 is_anomaly: false,
1475 anomaly_type: None,
1476 is_aggregate: false,
1477 aggregate_count: 0,
1478 });
1479 }
1480 }
1481
1482 pub fn add_bank_documents(
1484 &mut self,
1485 customers: &[BankingCustomer],
1486 accounts: &[BankAccount],
1487 transactions: &[BankTransaction],
1488 ) {
1489 if !self.config.include_bank {
1490 return;
1491 }
1492 for cust in customers {
1493 let cid = cust.customer_id.to_string();
1494 let node_id = format!("bank_cust_{}", cid);
1495 self.try_add_node(HypergraphNode {
1496 id: node_id,
1497 entity_type: "BankingCustomer".into(),
1498 entity_type_code: type_codes::BANKING_CUSTOMER,
1499 layer: HypergraphLayer::ProcessEvents,
1500 external_id: cid,
1501 label: format!("BCUST {}", cust.customer_id),
1502 properties: HashMap::new(),
1503 features: vec![],
1504 is_anomaly: false,
1505 anomaly_type: None,
1506 is_aggregate: false,
1507 aggregate_count: 0,
1508 });
1509 }
1510 for acct in accounts {
1511 let aid = acct.account_id.to_string();
1512 let node_id = format!("bank_acct_{}", aid);
1513 self.try_add_node(HypergraphNode {
1514 id: node_id,
1515 entity_type: "BankAccount".into(),
1516 entity_type_code: type_codes::BANK_ACCOUNT,
1517 layer: HypergraphLayer::ProcessEvents,
1518 external_id: aid,
1519 label: format!("BACCT {}", acct.account_number),
1520 properties: HashMap::new(),
1521 features: vec![acct
1522 .current_balance
1523 .to_string()
1524 .parse::<f64>()
1525 .unwrap_or(0.0)
1526 .ln_1p()],
1527 is_anomaly: false,
1528 anomaly_type: None,
1529 is_aggregate: false,
1530 aggregate_count: 0,
1531 });
1532 }
1533 for txn in transactions {
1534 let tid = txn.transaction_id.to_string();
1535 let node_id = format!("bank_txn_{}", tid);
1536 self.try_add_node(HypergraphNode {
1537 id: node_id,
1538 entity_type: "BankTransaction".into(),
1539 entity_type_code: type_codes::BANK_TRANSACTION,
1540 layer: HypergraphLayer::ProcessEvents,
1541 external_id: tid,
1542 label: format!("BTXN {}", txn.reference),
1543 properties: HashMap::new(),
1544 features: vec![txn
1545 .amount
1546 .to_string()
1547 .parse::<f64>()
1548 .unwrap_or(0.0)
1549 .abs()
1550 .ln_1p()],
1551 is_anomaly: txn.is_suspicious,
1552 anomaly_type: None,
1553 is_aggregate: false,
1554 aggregate_count: 0,
1555 });
1556 }
1557 }
1558
1559 #[allow(clippy::too_many_arguments)]
1561 pub fn add_audit_documents(
1562 &mut self,
1563 engagements: &[AuditEngagement],
1564 workpapers: &[Workpaper],
1565 findings: &[AuditFinding],
1566 evidence: &[AuditEvidence],
1567 risks: &[RiskAssessment],
1568 judgments: &[ProfessionalJudgment],
1569 ) {
1570 if !self.config.include_audit {
1571 return;
1572 }
1573 for eng in engagements {
1574 let eid = eng.engagement_id.to_string();
1575 let node_id = format!("audit_eng_{}", eid);
1576 self.try_add_node(HypergraphNode {
1577 id: node_id,
1578 entity_type: "AuditEngagement".into(),
1579 entity_type_code: type_codes::AUDIT_ENGAGEMENT,
1580 layer: HypergraphLayer::ProcessEvents,
1581 external_id: eid,
1582 label: format!("AENG {}", eng.engagement_ref),
1583 properties: HashMap::new(),
1584 features: vec![eng
1585 .materiality
1586 .to_string()
1587 .parse::<f64>()
1588 .unwrap_or(0.0)
1589 .ln_1p()],
1590 is_anomaly: false,
1591 anomaly_type: None,
1592 is_aggregate: false,
1593 aggregate_count: 0,
1594 });
1595 }
1596 for wp in workpapers {
1597 let wid = wp.workpaper_id.to_string();
1598 let node_id = format!("audit_wp_{}", wid);
1599 self.try_add_node(HypergraphNode {
1600 id: node_id,
1601 entity_type: "Workpaper".into(),
1602 entity_type_code: type_codes::WORKPAPER,
1603 layer: HypergraphLayer::ProcessEvents,
1604 external_id: wid,
1605 label: format!("WP {}", wp.workpaper_ref),
1606 properties: HashMap::new(),
1607 features: vec![],
1608 is_anomaly: false,
1609 anomaly_type: None,
1610 is_aggregate: false,
1611 aggregate_count: 0,
1612 });
1613 }
1614 for f in findings {
1615 let fid = f.finding_id.to_string();
1616 let node_id = format!("audit_find_{}", fid);
1617 self.try_add_node(HypergraphNode {
1618 id: node_id,
1619 entity_type: "AuditFinding".into(),
1620 entity_type_code: type_codes::AUDIT_FINDING,
1621 layer: HypergraphLayer::ProcessEvents,
1622 external_id: fid,
1623 label: format!("AFIND {}", f.finding_ref),
1624 properties: HashMap::new(),
1625 features: vec![],
1626 is_anomaly: false,
1627 anomaly_type: None,
1628 is_aggregate: false,
1629 aggregate_count: 0,
1630 });
1631 }
1632 for ev in evidence {
1633 let evid = ev.evidence_id.to_string();
1634 let node_id = format!("audit_ev_{}", evid);
1635 self.try_add_node(HypergraphNode {
1636 id: node_id,
1637 entity_type: "AuditEvidence".into(),
1638 entity_type_code: type_codes::AUDIT_EVIDENCE,
1639 layer: HypergraphLayer::ProcessEvents,
1640 external_id: evid,
1641 label: format!("AEV {}", ev.evidence_id),
1642 properties: HashMap::new(),
1643 features: vec![],
1644 is_anomaly: false,
1645 anomaly_type: None,
1646 is_aggregate: false,
1647 aggregate_count: 0,
1648 });
1649 }
1650 for r in risks {
1651 let rid = r.risk_id.to_string();
1652 let node_id = format!("audit_risk_{}", rid);
1653 self.try_add_node(HypergraphNode {
1654 id: node_id,
1655 entity_type: "RiskAssessment".into(),
1656 entity_type_code: type_codes::RISK_ASSESSMENT,
1657 layer: HypergraphLayer::ProcessEvents,
1658 external_id: rid,
1659 label: format!("ARISK {}", r.risk_id),
1660 properties: HashMap::new(),
1661 features: vec![],
1662 is_anomaly: false,
1663 anomaly_type: None,
1664 is_aggregate: false,
1665 aggregate_count: 0,
1666 });
1667 }
1668 for j in judgments {
1669 let jid = j.judgment_id.to_string();
1670 let node_id = format!("audit_judg_{}", jid);
1671 self.try_add_node(HypergraphNode {
1672 id: node_id,
1673 entity_type: "ProfessionalJudgment".into(),
1674 entity_type_code: type_codes::PROFESSIONAL_JUDGMENT,
1675 layer: HypergraphLayer::ProcessEvents,
1676 external_id: jid,
1677 label: format!("AJUDG {}", j.judgment_id),
1678 properties: HashMap::new(),
1679 features: vec![],
1680 is_anomaly: false,
1681 anomaly_type: None,
1682 is_aggregate: false,
1683 aggregate_count: 0,
1684 });
1685 }
1686 }
1687
1688 pub fn add_bank_recon_documents(&mut self, reconciliations: &[BankReconciliation]) {
1690 if !self.config.include_r2r {
1691 return;
1692 }
1693 for recon in reconciliations {
1694 let node_id = format!("recon_{}", recon.reconciliation_id);
1695 self.try_add_node(HypergraphNode {
1696 id: node_id,
1697 entity_type: "BankReconciliation".into(),
1698 entity_type_code: type_codes::BANK_RECONCILIATION,
1699 layer: HypergraphLayer::ProcessEvents,
1700 external_id: recon.reconciliation_id.clone(),
1701 label: format!("RECON {}", recon.reconciliation_id),
1702 properties: HashMap::new(),
1703 features: vec![recon
1704 .bank_ending_balance
1705 .to_string()
1706 .parse::<f64>()
1707 .unwrap_or(0.0)
1708 .ln_1p()],
1709 is_anomaly: false,
1710 anomaly_type: None,
1711 is_aggregate: false,
1712 aggregate_count: 0,
1713 });
1714 for line in &recon.statement_lines {
1715 let node_id = format!("recon_line_{}", line.line_id);
1716 self.try_add_node(HypergraphNode {
1717 id: node_id,
1718 entity_type: "BankStatementLine".into(),
1719 entity_type_code: type_codes::BANK_STATEMENT_LINE,
1720 layer: HypergraphLayer::ProcessEvents,
1721 external_id: line.line_id.clone(),
1722 label: format!("BSL {}", line.line_id),
1723 properties: HashMap::new(),
1724 features: vec![line
1725 .amount
1726 .to_string()
1727 .parse::<f64>()
1728 .unwrap_or(0.0)
1729 .abs()
1730 .ln_1p()],
1731 is_anomaly: false,
1732 anomaly_type: None,
1733 is_aggregate: false,
1734 aggregate_count: 0,
1735 });
1736 }
1737 for item in &recon.reconciling_items {
1738 let node_id = format!("recon_item_{}", item.item_id);
1739 self.try_add_node(HypergraphNode {
1740 id: node_id,
1741 entity_type: "ReconcilingItem".into(),
1742 entity_type_code: type_codes::RECONCILING_ITEM,
1743 layer: HypergraphLayer::ProcessEvents,
1744 external_id: item.item_id.clone(),
1745 label: format!("RITEM {}", item.item_id),
1746 properties: HashMap::new(),
1747 features: vec![item
1748 .amount
1749 .to_string()
1750 .parse::<f64>()
1751 .unwrap_or(0.0)
1752 .abs()
1753 .ln_1p()],
1754 is_anomaly: false,
1755 anomaly_type: None,
1756 is_aggregate: false,
1757 aggregate_count: 0,
1758 });
1759 }
1760 }
1761 }
1762
1763 pub fn add_ocpm_events(&mut self, event_log: &datasynth_ocpm::OcpmEventLog) {
1765 if !self.config.events_as_hyperedges {
1766 return;
1767 }
1768 for event in &event_log.events {
1769 let participants: Vec<HyperedgeParticipant> = event
1770 .object_refs
1771 .iter()
1772 .map(|obj_ref| {
1773 let node_id = format!("ocpm_obj_{}", obj_ref.object_id);
1774 self.try_add_node(HypergraphNode {
1776 id: node_id.clone(),
1777 entity_type: "OcpmObject".into(),
1778 entity_type_code: type_codes::OCPM_EVENT,
1779 layer: HypergraphLayer::ProcessEvents,
1780 external_id: obj_ref.object_id.to_string(),
1781 label: format!("OBJ {}", obj_ref.object_type_id),
1782 properties: HashMap::new(),
1783 features: vec![],
1784 is_anomaly: false,
1785 anomaly_type: None,
1786 is_aggregate: false,
1787 aggregate_count: 0,
1788 });
1789 HyperedgeParticipant {
1790 node_id,
1791 role: format!("{:?}", obj_ref.qualifier),
1792 weight: None,
1793 }
1794 })
1795 .collect();
1796
1797 if !participants.is_empty() {
1798 let mut props = HashMap::new();
1799 props.insert(
1800 "activity_id".into(),
1801 Value::String(event.activity_id.clone()),
1802 );
1803 props.insert(
1804 "timestamp".into(),
1805 Value::String(event.timestamp.to_rfc3339()),
1806 );
1807 if !event.resource_id.is_empty() {
1808 props.insert("resource".into(), Value::String(event.resource_id.clone()));
1809 }
1810
1811 self.hyperedges.push(Hyperedge {
1812 id: format!("ocpm_evt_{}", event.event_id),
1813 hyperedge_type: "OcpmEvent".into(),
1814 subtype: event.activity_id.clone(),
1815 participants,
1816 layer: HypergraphLayer::ProcessEvents,
1817 properties: props,
1818 timestamp: Some(event.timestamp.date_naive()),
1819 is_anomaly: false,
1820 anomaly_type: None,
1821 features: vec![],
1822 });
1823 }
1824 }
1825 }
1826
1827 pub fn build_cross_layer_edges(&mut self) {
1829 if !self.config.include_cross_layer_edges {
1830 return;
1831 }
1832
1833 let links = std::mem::take(&mut self.doc_counterparty_links);
1835 for (doc_node_id, counterparty_type, counterparty_id) in &links {
1836 let source_node_id = match counterparty_type.as_str() {
1837 "vendor" => self.vendor_node_ids.get(counterparty_id),
1838 "customer" => self.customer_node_ids.get(counterparty_id),
1839 _ => None,
1840 };
1841 if let Some(source_id) = source_node_id {
1842 self.edges.push(CrossLayerEdge {
1843 source_id: source_id.clone(),
1844 source_layer: HypergraphLayer::GovernanceControls,
1845 target_id: doc_node_id.clone(),
1846 target_layer: HypergraphLayer::ProcessEvents,
1847 edge_type: "SuppliesTo".to_string(),
1848 edge_type_code: type_codes::SUPPLIES_TO,
1849 properties: HashMap::new(),
1850 });
1851 }
1852 }
1853 self.doc_counterparty_links = links;
1854 }
1855
1856 pub fn build(mut self) -> Hypergraph {
1858 self.build_cross_layer_edges();
1860
1861 let mut layer_node_counts: HashMap<String, usize> = HashMap::new();
1863 let mut node_type_counts: HashMap<String, usize> = HashMap::new();
1864 let mut anomalous_nodes = 0;
1865
1866 for node in &self.nodes {
1867 *layer_node_counts
1868 .entry(node.layer.name().to_string())
1869 .or_insert(0) += 1;
1870 *node_type_counts
1871 .entry(node.entity_type.clone())
1872 .or_insert(0) += 1;
1873 if node.is_anomaly {
1874 anomalous_nodes += 1;
1875 }
1876 }
1877
1878 let mut edge_type_counts: HashMap<String, usize> = HashMap::new();
1879 for edge in &self.edges {
1880 *edge_type_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
1881 }
1882
1883 let mut hyperedge_type_counts: HashMap<String, usize> = HashMap::new();
1884 let mut anomalous_hyperedges = 0;
1885 for he in &self.hyperedges {
1886 *hyperedge_type_counts
1887 .entry(he.hyperedge_type.clone())
1888 .or_insert(0) += 1;
1889 if he.is_anomaly {
1890 anomalous_hyperedges += 1;
1891 }
1892 }
1893
1894 let budget_report = NodeBudgetReport {
1895 total_budget: self.budget.total_max(),
1896 total_used: self.budget.total_count(),
1897 layer1_budget: self.budget.layer1_max,
1898 layer1_used: self.budget.layer1_count,
1899 layer2_budget: self.budget.layer2_max,
1900 layer2_used: self.budget.layer2_count,
1901 layer3_budget: self.budget.layer3_max,
1902 layer3_used: self.budget.layer3_count,
1903 aggregate_nodes_created: self.aggregate_count,
1904 aggregation_triggered: self.aggregate_count > 0,
1905 };
1906
1907 let metadata = HypergraphMetadata {
1908 name: "multi_layer_hypergraph".to_string(),
1909 num_nodes: self.nodes.len(),
1910 num_edges: self.edges.len(),
1911 num_hyperedges: self.hyperedges.len(),
1912 layer_node_counts,
1913 node_type_counts,
1914 edge_type_counts,
1915 hyperedge_type_counts,
1916 anomalous_nodes,
1917 anomalous_hyperedges,
1918 source: "datasynth".to_string(),
1919 generated_at: chrono::Utc::now().to_rfc3339(),
1920 budget_report: budget_report.clone(),
1921 files: vec![
1922 "nodes.jsonl".to_string(),
1923 "edges.jsonl".to_string(),
1924 "hyperedges.jsonl".to_string(),
1925 "metadata.json".to_string(),
1926 ],
1927 };
1928
1929 Hypergraph {
1930 nodes: self.nodes,
1931 edges: self.edges,
1932 hyperedges: self.hyperedges,
1933 metadata,
1934 budget_report,
1935 }
1936 }
1937
1938 fn try_add_node(&mut self, node: HypergraphNode) -> bool {
1940 if self.node_index.contains_key(&node.id) {
1941 return false; }
1943
1944 if !self.budget.can_add(node.layer) {
1945 return false; }
1947
1948 let id = node.id.clone();
1949 let layer = node.layer;
1950 self.nodes.push(node);
1951 let idx = self.nodes.len() - 1;
1952 self.node_index.insert(id, idx);
1953 self.budget.record_add(layer);
1954 true
1955 }
1956}
1957
1958fn component_to_feature(component: &CosoComponent) -> f64 {
1960 match component {
1961 CosoComponent::ControlEnvironment => 1.0,
1962 CosoComponent::RiskAssessment => 2.0,
1963 CosoComponent::ControlActivities => 3.0,
1964 CosoComponent::InformationCommunication => 4.0,
1965 CosoComponent::MonitoringActivities => 5.0,
1966 }
1967}
1968
1969fn account_type_feature(account_type: &datasynth_core::models::AccountType) -> f64 {
1971 use datasynth_core::models::AccountType;
1972 match account_type {
1973 AccountType::Asset => 1.0,
1974 AccountType::Liability => 2.0,
1975 AccountType::Equity => 3.0,
1976 AccountType::Revenue => 4.0,
1977 AccountType::Expense => 5.0,
1978 AccountType::Statistical => 6.0,
1979 }
1980}
1981
1982fn compute_je_features(entry: &JournalEntry) -> Vec<f64> {
1984 let total_debit: f64 = entry
1985 .lines
1986 .iter()
1987 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
1988 .sum();
1989
1990 let line_count = entry.lines.len() as f64;
1991 let posting_date = entry.header.posting_date;
1992 let weekday = posting_date.weekday().num_days_from_monday() as f64 / WEEKDAY_NORMALIZER;
1993 let day = posting_date.day() as f64 / DAY_OF_MONTH_NORMALIZER;
1994 let month = posting_date.month() as f64 / MONTH_NORMALIZER;
1995 let is_month_end = if posting_date.day() >= MONTH_END_DAY_THRESHOLD {
1996 1.0
1997 } else {
1998 0.0
1999 };
2000
2001 vec![
2002 (total_debit.abs() + 1.0).ln(), line_count, weekday, day, month, is_month_end, ]
2009}
2010
2011#[cfg(test)]
2012#[allow(clippy::unwrap_used)]
2013mod tests {
2014 use super::*;
2015 use datasynth_core::models::{
2016 AccountSubType, AccountType, ChartOfAccounts, CoAComplexity, ControlFrequency, ControlType,
2017 CosoComponent, CosoMaturityLevel, GLAccount, InternalControl, RiskLevel, SoxAssertion,
2018 UserPersona,
2019 };
2020
2021 fn make_test_coa() -> ChartOfAccounts {
2022 let mut coa = ChartOfAccounts::new(
2023 "TEST_COA".to_string(),
2024 "Test Chart".to_string(),
2025 "US".to_string(),
2026 datasynth_core::models::IndustrySector::Manufacturing,
2027 CoAComplexity::Small,
2028 );
2029
2030 coa.add_account(GLAccount::new(
2031 "1000".to_string(),
2032 "Cash".to_string(),
2033 AccountType::Asset,
2034 AccountSubType::Cash,
2035 ));
2036 coa.add_account(GLAccount::new(
2037 "2000".to_string(),
2038 "AP".to_string(),
2039 AccountType::Liability,
2040 AccountSubType::AccountsPayable,
2041 ));
2042
2043 coa
2044 }
2045
2046 fn make_test_control() -> InternalControl {
2047 InternalControl {
2048 control_id: "C001".to_string(),
2049 control_name: "Three-Way Match".to_string(),
2050 control_type: ControlType::Preventive,
2051 objective: "Ensure proper matching".to_string(),
2052 frequency: ControlFrequency::Transactional,
2053 owner_role: UserPersona::Controller,
2054 risk_level: RiskLevel::High,
2055 description: "Test control".to_string(),
2056 is_key_control: true,
2057 sox_assertion: SoxAssertion::Existence,
2058 coso_component: CosoComponent::ControlActivities,
2059 coso_principles: vec![CosoPrinciple::ControlActions],
2060 control_scope: datasynth_core::models::ControlScope::TransactionLevel,
2061 maturity_level: CosoMaturityLevel::Managed,
2062 }
2063 }
2064
2065 #[test]
2066 fn test_builder_coso_framework() {
2067 let config = HypergraphConfig {
2068 max_nodes: 1000,
2069 ..Default::default()
2070 };
2071 let mut builder = HypergraphBuilder::new(config);
2072 builder.add_coso_framework();
2073
2074 let hg = builder.build();
2075 assert_eq!(hg.nodes.len(), 22);
2077 assert!(hg
2078 .nodes
2079 .iter()
2080 .all(|n| n.layer == HypergraphLayer::GovernanceControls));
2081 assert_eq!(
2083 hg.edges
2084 .iter()
2085 .filter(|e| e.edge_type == "CoversCosoPrinciple")
2086 .count(),
2087 17
2088 );
2089 }
2090
2091 #[test]
2092 fn test_builder_controls() {
2093 let config = HypergraphConfig {
2094 max_nodes: 1000,
2095 ..Default::default()
2096 };
2097 let mut builder = HypergraphBuilder::new(config);
2098 builder.add_coso_framework();
2099 builder.add_controls(&[make_test_control()]);
2100
2101 let hg = builder.build();
2102 assert_eq!(hg.nodes.len(), 24);
2104 assert!(hg.nodes.iter().any(|n| n.entity_type == "InternalControl"));
2105 assert!(hg.nodes.iter().any(|n| n.entity_type == "SoxAssertion"));
2106 }
2107
2108 #[test]
2109 fn test_builder_accounts() {
2110 let config = HypergraphConfig {
2111 max_nodes: 1000,
2112 ..Default::default()
2113 };
2114 let mut builder = HypergraphBuilder::new(config);
2115 builder.add_accounts(&make_test_coa());
2116
2117 let hg = builder.build();
2118 assert_eq!(hg.nodes.len(), 2);
2119 assert!(hg
2120 .nodes
2121 .iter()
2122 .all(|n| n.layer == HypergraphLayer::AccountingNetwork));
2123 }
2124
2125 #[test]
2126 fn test_budget_enforcement() {
2127 let config = HypergraphConfig {
2128 max_nodes: 10, include_coso: false,
2130 include_controls: false,
2131 include_sox: false,
2132 include_vendors: false,
2133 include_customers: false,
2134 include_employees: false,
2135 include_p2p: false,
2136 include_o2c: false,
2137 ..Default::default()
2138 };
2139 let mut builder = HypergraphBuilder::new(config);
2140 builder.add_accounts(&make_test_coa());
2141
2142 let hg = builder.build();
2143 assert!(hg.nodes.len() <= 1);
2145 }
2146
2147 #[test]
2148 fn test_full_build() {
2149 let config = HypergraphConfig {
2150 max_nodes: 10000,
2151 ..Default::default()
2152 };
2153 let mut builder = HypergraphBuilder::new(config);
2154 builder.add_coso_framework();
2155 builder.add_controls(&[make_test_control()]);
2156 builder.add_accounts(&make_test_coa());
2157
2158 let hg = builder.build();
2159 assert!(!hg.nodes.is_empty());
2160 assert!(!hg.edges.is_empty());
2161 assert_eq!(hg.metadata.num_nodes, hg.nodes.len());
2162 assert_eq!(hg.metadata.num_edges, hg.edges.len());
2163 }
2164}