1use std::collections::HashMap;
12
13use chrono::Datelike;
14use serde_json::Value;
15
16use datasynth_banking::models::{BankAccount, BankTransaction, BankingCustomer};
17use datasynth_core::models::audit::{
18 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
19};
20use datasynth_core::models::sourcing::{
21 BidEvaluation, ProcurementContract, RfxEvent, SourcingProject, SupplierBid,
22 SupplierQualification,
23};
24use datasynth_core::models::ExpenseReport;
25use datasynth_core::models::{
26 BankReconciliation, ChartOfAccounts, CosoComponent, CosoPrinciple, Customer, CycleCount,
27 Employee, InternalControl, JournalEntry, PayrollRun, ProductionOrder, QualityInspection,
28 TimeEntry, Vendor,
29};
30
31use crate::models::hypergraph::{
32 AggregationStrategy, CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph,
33 HypergraphLayer, HypergraphMetadata, HypergraphNode, NodeBudget, NodeBudgetReport,
34};
35
36const MONTH_END_DAY_THRESHOLD: u32 = 28;
38const WEEKDAY_NORMALIZER: f64 = 6.0;
40const DAY_OF_MONTH_NORMALIZER: f64 = 31.0;
42const MONTH_NORMALIZER: f64 = 12.0;
44
45#[allow(dead_code)]
47mod type_codes {
48 pub const ACCOUNT: u32 = 100;
50 pub const VENDOR: u32 = 200;
51 pub const CUSTOMER: u32 = 201;
52 pub const EMPLOYEE: u32 = 202;
53
54 pub const COSO_COMPONENT: u32 = 500;
56 pub const COSO_PRINCIPLE: u32 = 501;
57 pub const SOX_ASSERTION: u32 = 502;
58 pub const INTERNAL_CONTROL: u32 = 504;
59
60 pub const PURCHASE_ORDER: u32 = 300;
62 pub const GOODS_RECEIPT: u32 = 301;
63 pub const VENDOR_INVOICE: u32 = 302;
64 pub const PAYMENT: u32 = 303;
65 pub const SALES_ORDER: u32 = 310;
67 pub const DELIVERY: u32 = 311;
68 pub const CUSTOMER_INVOICE: u32 = 312;
69 pub const SOURCING_PROJECT: u32 = 320;
71 pub const RFX_EVENT: u32 = 321;
72 pub const SUPPLIER_BID: u32 = 322;
73 pub const BID_EVALUATION: u32 = 323;
74 pub const PROCUREMENT_CONTRACT: u32 = 324;
75 pub const SUPPLIER_QUALIFICATION: u32 = 325;
76 pub const PAYROLL_RUN: u32 = 330;
78 pub const TIME_ENTRY: u32 = 331;
79 pub const EXPENSE_REPORT: u32 = 332;
80 pub const PAYROLL_LINE_ITEM: u32 = 333;
81 pub const PRODUCTION_ORDER: u32 = 340;
83 pub const ROUTING_OPERATION: u32 = 341;
84 pub const QUALITY_INSPECTION: u32 = 342;
85 pub const CYCLE_COUNT: u32 = 343;
86 pub const BANKING_CUSTOMER: u32 = 350;
88 pub const BANK_ACCOUNT: u32 = 351;
89 pub const BANK_TRANSACTION: u32 = 352;
90 pub const AUDIT_ENGAGEMENT: u32 = 360;
92 pub const WORKPAPER: u32 = 361;
93 pub const AUDIT_FINDING: u32 = 362;
94 pub const AUDIT_EVIDENCE: u32 = 363;
95 pub const RISK_ASSESSMENT: u32 = 364;
96 pub const PROFESSIONAL_JUDGMENT: u32 = 365;
97 pub const BANK_RECONCILIATION: u32 = 370;
99 pub const BANK_STATEMENT_LINE: u32 = 371;
100 pub const RECONCILING_ITEM: u32 = 372;
101 pub const OCPM_EVENT: u32 = 400;
103 pub const POOL_NODE: u32 = 399;
105
106 pub const IMPLEMENTS_CONTROL: u32 = 40;
108 pub const GOVERNED_BY_STANDARD: u32 = 41;
109 pub const OWNS_CONTROL: u32 = 42;
110 pub const OVERSEE_PROCESS: u32 = 43;
111 pub const ENFORCES_ASSERTION: u32 = 44;
112 pub const SUPPLIES_TO: u32 = 48;
113 pub const COVERS_COSO_PRINCIPLE: u32 = 54;
114 pub const CONTAINS_ACCOUNT: u32 = 55;
115}
116
117#[derive(Debug, Clone)]
119pub struct HypergraphConfig {
120 pub max_nodes: usize,
122 pub aggregation_strategy: AggregationStrategy,
124 pub include_coso: bool,
126 pub include_controls: bool,
127 pub include_sox: bool,
128 pub include_vendors: bool,
129 pub include_customers: bool,
130 pub include_employees: bool,
131 pub include_p2p: bool,
133 pub include_o2c: bool,
134 pub include_s2c: bool,
135 pub include_h2r: bool,
136 pub include_mfg: bool,
137 pub include_bank: bool,
138 pub include_audit: bool,
139 pub include_r2r: bool,
140 pub events_as_hyperedges: bool,
141 pub docs_per_counterparty_threshold: usize,
143 pub include_accounts: bool,
145 pub je_as_hyperedges: bool,
146 pub include_cross_layer_edges: bool,
148}
149
150impl Default for HypergraphConfig {
151 fn default() -> Self {
152 Self {
153 max_nodes: 50_000,
154 aggregation_strategy: AggregationStrategy::PoolByCounterparty,
155 include_coso: true,
156 include_controls: true,
157 include_sox: true,
158 include_vendors: true,
159 include_customers: true,
160 include_employees: true,
161 include_p2p: true,
162 include_o2c: true,
163 include_s2c: true,
164 include_h2r: true,
165 include_mfg: true,
166 include_bank: true,
167 include_audit: true,
168 include_r2r: true,
169 events_as_hyperedges: true,
170 docs_per_counterparty_threshold: 20,
171 include_accounts: true,
172 je_as_hyperedges: true,
173 include_cross_layer_edges: true,
174 }
175 }
176}
177
178pub struct HypergraphBuilder {
180 config: HypergraphConfig,
181 budget: NodeBudget,
182 nodes: Vec<HypergraphNode>,
183 edges: Vec<CrossLayerEdge>,
184 hyperedges: Vec<Hyperedge>,
185 node_index: HashMap<String, usize>,
187 aggregate_count: usize,
189 control_node_ids: HashMap<String, String>,
191 coso_component_ids: HashMap<String, String>,
193 account_node_ids: HashMap<String, String>,
195 vendor_node_ids: HashMap<String, String>,
197 customer_node_ids: HashMap<String, String>,
199 employee_node_ids: HashMap<String, String>,
201 doc_counterparty_links: Vec<(String, String, String)>, }
205
206impl HypergraphBuilder {
207 pub fn new(config: HypergraphConfig) -> Self {
209 let budget = NodeBudget::new(config.max_nodes);
210 Self {
211 config,
212 budget,
213 nodes: Vec::new(),
214 edges: Vec::new(),
215 hyperedges: Vec::new(),
216 node_index: HashMap::new(),
217 aggregate_count: 0,
218 control_node_ids: HashMap::new(),
219 coso_component_ids: HashMap::new(),
220 account_node_ids: HashMap::new(),
221 vendor_node_ids: HashMap::new(),
222 customer_node_ids: HashMap::new(),
223 employee_node_ids: HashMap::new(),
224 doc_counterparty_links: Vec::new(),
225 }
226 }
227
228 pub fn add_coso_framework(&mut self) {
230 if !self.config.include_coso {
231 return;
232 }
233
234 let components = [
235 (CosoComponent::ControlEnvironment, "Control Environment"),
236 (CosoComponent::RiskAssessment, "Risk Assessment"),
237 (CosoComponent::ControlActivities, "Control Activities"),
238 (
239 CosoComponent::InformationCommunication,
240 "Information & Communication",
241 ),
242 (CosoComponent::MonitoringActivities, "Monitoring Activities"),
243 ];
244
245 for (component, name) in &components {
246 let id = format!("coso_comp_{}", name.replace(' ', "_").replace('&', "and"));
247 if self.try_add_node(HypergraphNode {
248 id: id.clone(),
249 entity_type: "CosoComponent".to_string(),
250 entity_type_code: type_codes::COSO_COMPONENT,
251 layer: HypergraphLayer::GovernanceControls,
252 external_id: format!("{:?}", component),
253 label: name.to_string(),
254 properties: HashMap::new(),
255 features: vec![component_to_feature(component)],
256 is_anomaly: false,
257 anomaly_type: None,
258 is_aggregate: false,
259 aggregate_count: 0,
260 }) {
261 self.coso_component_ids
262 .insert(format!("{:?}", component), id);
263 }
264 }
265
266 let principles = [
267 (
268 CosoPrinciple::IntegrityAndEthics,
269 "Integrity and Ethics",
270 CosoComponent::ControlEnvironment,
271 ),
272 (
273 CosoPrinciple::BoardOversight,
274 "Board Oversight",
275 CosoComponent::ControlEnvironment,
276 ),
277 (
278 CosoPrinciple::OrganizationalStructure,
279 "Organizational Structure",
280 CosoComponent::ControlEnvironment,
281 ),
282 (
283 CosoPrinciple::CommitmentToCompetence,
284 "Commitment to Competence",
285 CosoComponent::ControlEnvironment,
286 ),
287 (
288 CosoPrinciple::Accountability,
289 "Accountability",
290 CosoComponent::ControlEnvironment,
291 ),
292 (
293 CosoPrinciple::ClearObjectives,
294 "Clear Objectives",
295 CosoComponent::RiskAssessment,
296 ),
297 (
298 CosoPrinciple::IdentifyRisks,
299 "Identify Risks",
300 CosoComponent::RiskAssessment,
301 ),
302 (
303 CosoPrinciple::FraudRisk,
304 "Fraud Risk",
305 CosoComponent::RiskAssessment,
306 ),
307 (
308 CosoPrinciple::ChangeIdentification,
309 "Change Identification",
310 CosoComponent::RiskAssessment,
311 ),
312 (
313 CosoPrinciple::ControlActions,
314 "Control Actions",
315 CosoComponent::ControlActivities,
316 ),
317 (
318 CosoPrinciple::TechnologyControls,
319 "Technology Controls",
320 CosoComponent::ControlActivities,
321 ),
322 (
323 CosoPrinciple::PoliciesAndProcedures,
324 "Policies and Procedures",
325 CosoComponent::ControlActivities,
326 ),
327 (
328 CosoPrinciple::QualityInformation,
329 "Quality Information",
330 CosoComponent::InformationCommunication,
331 ),
332 (
333 CosoPrinciple::InternalCommunication,
334 "Internal Communication",
335 CosoComponent::InformationCommunication,
336 ),
337 (
338 CosoPrinciple::ExternalCommunication,
339 "External Communication",
340 CosoComponent::InformationCommunication,
341 ),
342 (
343 CosoPrinciple::OngoingMonitoring,
344 "Ongoing Monitoring",
345 CosoComponent::MonitoringActivities,
346 ),
347 (
348 CosoPrinciple::DeficiencyEvaluation,
349 "Deficiency Evaluation",
350 CosoComponent::MonitoringActivities,
351 ),
352 ];
353
354 for (principle, name, parent_component) in &principles {
355 let principle_id = format!("coso_prin_{}", name.replace(' ', "_").replace('&', "and"));
356 if self.try_add_node(HypergraphNode {
357 id: principle_id.clone(),
358 entity_type: "CosoPrinciple".to_string(),
359 entity_type_code: type_codes::COSO_PRINCIPLE,
360 layer: HypergraphLayer::GovernanceControls,
361 external_id: format!("{:?}", principle),
362 label: name.to_string(),
363 properties: {
364 let mut p = HashMap::new();
365 p.insert(
366 "principle_number".to_string(),
367 Value::Number(principle.principle_number().into()),
368 );
369 p
370 },
371 features: vec![principle.principle_number() as f64],
372 is_anomaly: false,
373 anomaly_type: None,
374 is_aggregate: false,
375 aggregate_count: 0,
376 }) {
377 let comp_key = format!("{:?}", parent_component);
379 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
380 self.edges.push(CrossLayerEdge {
381 source_id: principle_id,
382 source_layer: HypergraphLayer::GovernanceControls,
383 target_id: comp_id.clone(),
384 target_layer: HypergraphLayer::GovernanceControls,
385 edge_type: "CoversCosoPrinciple".to_string(),
386 edge_type_code: type_codes::COVERS_COSO_PRINCIPLE,
387 properties: HashMap::new(),
388 });
389 }
390 }
391 }
392 }
393
394 pub fn add_controls(&mut self, controls: &[InternalControl]) {
396 if !self.config.include_controls {
397 return;
398 }
399
400 for control in controls {
401 let node_id = format!("ctrl_{}", control.control_id);
402 if self.try_add_node(HypergraphNode {
403 id: node_id.clone(),
404 entity_type: "InternalControl".to_string(),
405 entity_type_code: type_codes::INTERNAL_CONTROL,
406 layer: HypergraphLayer::GovernanceControls,
407 external_id: control.control_id.clone(),
408 label: control.control_name.clone(),
409 properties: {
410 let mut p = HashMap::new();
411 p.insert(
412 "control_type".to_string(),
413 Value::String(format!("{:?}", control.control_type)),
414 );
415 p.insert(
416 "risk_level".to_string(),
417 Value::String(format!("{:?}", control.risk_level)),
418 );
419 p.insert(
420 "is_key_control".to_string(),
421 Value::Bool(control.is_key_control),
422 );
423 p.insert(
424 "maturity_level".to_string(),
425 Value::String(format!("{:?}", control.maturity_level)),
426 );
427 p
428 },
429 features: vec![
430 if control.is_key_control { 1.0 } else { 0.0 },
431 control.maturity_level.level() as f64 / 5.0,
432 ],
433 is_anomaly: false,
434 anomaly_type: None,
435 is_aggregate: false,
436 aggregate_count: 0,
437 }) {
438 self.control_node_ids
439 .insert(control.control_id.clone(), node_id.clone());
440
441 let comp_key = format!("{:?}", control.coso_component);
443 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
444 self.edges.push(CrossLayerEdge {
445 source_id: node_id.clone(),
446 source_layer: HypergraphLayer::GovernanceControls,
447 target_id: comp_id.clone(),
448 target_layer: HypergraphLayer::GovernanceControls,
449 edge_type: "ImplementsControl".to_string(),
450 edge_type_code: type_codes::IMPLEMENTS_CONTROL,
451 properties: HashMap::new(),
452 });
453 }
454
455 if self.config.include_sox {
457 let assertion_id = format!("sox_{:?}", control.sox_assertion).to_lowercase();
458 if !self.node_index.contains_key(&assertion_id) {
460 self.try_add_node(HypergraphNode {
461 id: assertion_id.clone(),
462 entity_type: "SoxAssertion".to_string(),
463 entity_type_code: type_codes::SOX_ASSERTION,
464 layer: HypergraphLayer::GovernanceControls,
465 external_id: format!("{:?}", control.sox_assertion),
466 label: format!("{:?}", control.sox_assertion),
467 properties: HashMap::new(),
468 features: vec![],
469 is_anomaly: false,
470 anomaly_type: None,
471 is_aggregate: false,
472 aggregate_count: 0,
473 });
474 }
475 self.edges.push(CrossLayerEdge {
476 source_id: node_id,
477 source_layer: HypergraphLayer::GovernanceControls,
478 target_id: assertion_id,
479 target_layer: HypergraphLayer::GovernanceControls,
480 edge_type: "EnforcesAssertion".to_string(),
481 edge_type_code: type_codes::ENFORCES_ASSERTION,
482 properties: HashMap::new(),
483 });
484 }
485 }
486 }
487 }
488
489 pub fn add_vendors(&mut self, vendors: &[Vendor]) {
491 if !self.config.include_vendors {
492 return;
493 }
494
495 for vendor in vendors {
496 let node_id = format!("vnd_{}", vendor.vendor_id);
497 if self.try_add_node(HypergraphNode {
498 id: node_id.clone(),
499 entity_type: "Vendor".to_string(),
500 entity_type_code: type_codes::VENDOR,
501 layer: HypergraphLayer::GovernanceControls,
502 external_id: vendor.vendor_id.clone(),
503 label: vendor.name.clone(),
504 properties: {
505 let mut p = HashMap::new();
506 p.insert(
507 "vendor_type".to_string(),
508 Value::String(format!("{:?}", vendor.vendor_type)),
509 );
510 p.insert("country".to_string(), Value::String(vendor.country.clone()));
511 p.insert("is_active".to_string(), Value::Bool(vendor.is_active));
512 p
513 },
514 features: vec![if vendor.is_active { 1.0 } else { 0.0 }],
515 is_anomaly: false,
516 anomaly_type: None,
517 is_aggregate: false,
518 aggregate_count: 0,
519 }) {
520 self.vendor_node_ids
521 .insert(vendor.vendor_id.clone(), node_id);
522 }
523 }
524 }
525
526 pub fn add_customers(&mut self, customers: &[Customer]) {
528 if !self.config.include_customers {
529 return;
530 }
531
532 for customer in customers {
533 let node_id = format!("cust_{}", customer.customer_id);
534 if self.try_add_node(HypergraphNode {
535 id: node_id.clone(),
536 entity_type: "Customer".to_string(),
537 entity_type_code: type_codes::CUSTOMER,
538 layer: HypergraphLayer::GovernanceControls,
539 external_id: customer.customer_id.clone(),
540 label: customer.name.clone(),
541 properties: {
542 let mut p = HashMap::new();
543 p.insert(
544 "customer_type".to_string(),
545 Value::String(format!("{:?}", customer.customer_type)),
546 );
547 p.insert(
548 "country".to_string(),
549 Value::String(customer.country.clone()),
550 );
551 p.insert(
552 "credit_rating".to_string(),
553 Value::String(format!("{:?}", customer.credit_rating)),
554 );
555 p
556 },
557 features: vec![if customer.is_active { 1.0 } else { 0.0 }],
558 is_anomaly: false,
559 anomaly_type: None,
560 is_aggregate: false,
561 aggregate_count: 0,
562 }) {
563 self.customer_node_ids
564 .insert(customer.customer_id.clone(), node_id);
565 }
566 }
567 }
568
569 pub fn add_employees(&mut self, employees: &[Employee]) {
571 if !self.config.include_employees {
572 return;
573 }
574
575 for employee in employees {
576 let node_id = format!("emp_{}", employee.employee_id);
577 if self.try_add_node(HypergraphNode {
578 id: node_id.clone(),
579 entity_type: "Employee".to_string(),
580 entity_type_code: type_codes::EMPLOYEE,
581 layer: HypergraphLayer::GovernanceControls,
582 external_id: employee.employee_id.clone(),
583 label: employee.display_name.clone(),
584 properties: {
585 let mut p = HashMap::new();
586 p.insert(
587 "persona".to_string(),
588 Value::String(format!("{:?}", employee.persona)),
589 );
590 p.insert(
591 "job_level".to_string(),
592 Value::String(format!("{:?}", employee.job_level)),
593 );
594 p.insert(
595 "company_code".to_string(),
596 Value::String(employee.company_code.clone()),
597 );
598 p
599 },
600 features: vec![employee
601 .approval_limit
602 .to_string()
603 .parse::<f64>()
604 .unwrap_or(0.0)
605 .ln_1p()],
606 is_anomaly: false,
607 anomaly_type: None,
608 is_aggregate: false,
609 aggregate_count: 0,
610 }) {
611 self.employee_node_ids
612 .insert(employee.employee_id.clone(), node_id);
613 }
614 }
615 }
616
617 pub fn add_accounts(&mut self, coa: &ChartOfAccounts) {
619 if !self.config.include_accounts {
620 return;
621 }
622
623 for account in &coa.accounts {
624 let node_id = format!("acct_{}", account.account_number);
625 if self.try_add_node(HypergraphNode {
626 id: node_id.clone(),
627 entity_type: "Account".to_string(),
628 entity_type_code: type_codes::ACCOUNT,
629 layer: HypergraphLayer::AccountingNetwork,
630 external_id: account.account_number.clone(),
631 label: account.short_description.clone(),
632 properties: {
633 let mut p = HashMap::new();
634 p.insert(
635 "account_type".to_string(),
636 Value::String(format!("{:?}", account.account_type)),
637 );
638 p.insert(
639 "is_control_account".to_string(),
640 Value::Bool(account.is_control_account),
641 );
642 p.insert("is_postable".to_string(), Value::Bool(account.is_postable));
643 p
644 },
645 features: vec![
646 account_type_feature(&account.account_type),
647 if account.is_control_account { 1.0 } else { 0.0 },
648 if account.normal_debit_balance {
649 1.0
650 } else {
651 0.0
652 },
653 ],
654 is_anomaly: false,
655 anomaly_type: None,
656 is_aggregate: false,
657 aggregate_count: 0,
658 }) {
659 self.account_node_ids
660 .insert(account.account_number.clone(), node_id);
661 }
662 }
663 }
664
665 pub fn add_journal_entries_as_hyperedges(&mut self, entries: &[JournalEntry]) {
669 if !self.config.je_as_hyperedges {
670 return;
671 }
672
673 for entry in entries {
674 let mut participants = Vec::new();
675
676 for line in &entry.lines {
677 let account_id = format!("acct_{}", line.gl_account);
678
679 if !self.node_index.contains_key(&account_id) {
681 self.try_add_node(HypergraphNode {
682 id: account_id.clone(),
683 entity_type: "Account".to_string(),
684 entity_type_code: type_codes::ACCOUNT,
685 layer: HypergraphLayer::AccountingNetwork,
686 external_id: line.gl_account.clone(),
687 label: line
688 .account_description
689 .clone()
690 .unwrap_or_else(|| line.gl_account.clone()),
691 properties: HashMap::new(),
692 features: vec![],
693 is_anomaly: false,
694 anomaly_type: None,
695 is_aggregate: false,
696 aggregate_count: 0,
697 });
698 self.account_node_ids
699 .insert(line.gl_account.clone(), account_id.clone());
700 }
701
702 let amount: f64 = if !line.debit_amount.is_zero() {
703 line.debit_amount.to_string().parse().unwrap_or(0.0)
704 } else {
705 line.credit_amount.to_string().parse().unwrap_or(0.0)
706 };
707
708 let role = if !line.debit_amount.is_zero() {
709 "debit"
710 } else {
711 "credit"
712 };
713
714 participants.push(HyperedgeParticipant {
715 node_id: account_id,
716 role: role.to_string(),
717 weight: Some(amount),
718 });
719 }
720
721 if participants.is_empty() {
722 continue;
723 }
724
725 let doc_id = entry.header.document_id.to_string();
726 let subtype = entry
727 .header
728 .business_process
729 .as_ref()
730 .map(|bp| format!("{:?}", bp))
731 .unwrap_or_else(|| "General".to_string());
732
733 self.hyperedges.push(Hyperedge {
734 id: format!("je_{}", doc_id),
735 hyperedge_type: "JournalEntry".to_string(),
736 subtype,
737 participants,
738 layer: HypergraphLayer::AccountingNetwork,
739 properties: {
740 let mut p = HashMap::new();
741 p.insert("document_id".to_string(), Value::String(doc_id));
742 p.insert(
743 "company_code".to_string(),
744 Value::String(entry.header.company_code.clone()),
745 );
746 p.insert(
747 "document_type".to_string(),
748 Value::String(entry.header.document_type.clone()),
749 );
750 p.insert(
751 "created_by".to_string(),
752 Value::String(entry.header.created_by.clone()),
753 );
754 p
755 },
756 timestamp: Some(entry.header.posting_date),
757 is_anomaly: entry.header.is_anomaly || entry.header.is_fraud,
758 anomaly_type: entry.header.anomaly_type.clone().or_else(|| {
759 entry
760 .header
761 .fraud_type
762 .as_ref()
763 .map(|ft| format!("{:?}", ft))
764 }),
765 features: compute_je_features(entry),
766 });
767 }
768 }
769
770 pub fn add_p2p_documents(
774 &mut self,
775 purchase_orders: &[datasynth_core::models::documents::PurchaseOrder],
776 goods_receipts: &[datasynth_core::models::documents::GoodsReceipt],
777 vendor_invoices: &[datasynth_core::models::documents::VendorInvoice],
778 payments: &[datasynth_core::models::documents::Payment],
779 ) {
780 if !self.config.include_p2p {
781 return;
782 }
783
784 let mut vendor_doc_counts: HashMap<String, usize> = HashMap::new();
786 for po in purchase_orders {
787 *vendor_doc_counts.entry(po.vendor_id.clone()).or_insert(0) += 1;
788 }
789
790 let threshold = self.config.docs_per_counterparty_threshold;
791 let should_aggregate = matches!(
792 self.config.aggregation_strategy,
793 AggregationStrategy::PoolByCounterparty
794 );
795
796 let vendors_needing_pools: Vec<String> = if should_aggregate {
798 vendor_doc_counts
799 .iter()
800 .filter(|(_, count)| **count > threshold)
801 .map(|(vid, _)| vid.clone())
802 .collect()
803 } else {
804 Vec::new()
805 };
806
807 for vendor_id in &vendors_needing_pools {
809 let count = vendor_doc_counts[vendor_id];
810 let pool_id = format!("pool_p2p_{}", vendor_id);
811 if self.try_add_node(HypergraphNode {
812 id: pool_id.clone(),
813 entity_type: "P2PPool".to_string(),
814 entity_type_code: type_codes::POOL_NODE,
815 layer: HypergraphLayer::ProcessEvents,
816 external_id: format!("pool_p2p_{}", vendor_id),
817 label: format!("P2P Pool ({}): {} docs", vendor_id, count),
818 properties: {
819 let mut p = HashMap::new();
820 p.insert("vendor_id".to_string(), Value::String(vendor_id.clone()));
821 p.insert("document_count".to_string(), Value::Number(count.into()));
822 p
823 },
824 features: vec![count as f64],
825 is_anomaly: false,
826 anomaly_type: None,
827 is_aggregate: true,
828 aggregate_count: count,
829 }) {
830 self.doc_counterparty_links.push((
831 pool_id,
832 "vendor".to_string(),
833 vendor_id.clone(),
834 ));
835 }
836 self.aggregate_count += 1;
837 }
838
839 for po in purchase_orders {
841 if should_aggregate && vendors_needing_pools.contains(&po.vendor_id) {
842 continue; }
844
845 let doc_id = &po.header.document_id;
846 let node_id = format!("po_{}", doc_id);
847 if self.try_add_node(HypergraphNode {
848 id: node_id.clone(),
849 entity_type: "PurchaseOrder".to_string(),
850 entity_type_code: type_codes::PURCHASE_ORDER,
851 layer: HypergraphLayer::ProcessEvents,
852 external_id: doc_id.clone(),
853 label: format!("PO {}", doc_id),
854 properties: {
855 let mut p = HashMap::new();
856 p.insert("vendor_id".to_string(), Value::String(po.vendor_id.clone()));
857 p.insert(
858 "company_code".to_string(),
859 Value::String(po.header.company_code.clone()),
860 );
861 p
862 },
863 features: vec![po
864 .total_net_amount
865 .to_string()
866 .parse::<f64>()
867 .unwrap_or(0.0)
868 .ln_1p()],
869 is_anomaly: false,
870 anomaly_type: None,
871 is_aggregate: false,
872 aggregate_count: 0,
873 }) {
874 self.doc_counterparty_links.push((
875 node_id,
876 "vendor".to_string(),
877 po.vendor_id.clone(),
878 ));
879 }
880 }
881
882 for gr in goods_receipts {
884 let vendor_id = gr.vendor_id.as_deref().unwrap_or("UNKNOWN");
885 if should_aggregate && vendors_needing_pools.contains(&vendor_id.to_string()) {
886 continue;
887 }
888 let doc_id = &gr.header.document_id;
889 let node_id = format!("gr_{}", doc_id);
890 self.try_add_node(HypergraphNode {
891 id: node_id,
892 entity_type: "GoodsReceipt".to_string(),
893 entity_type_code: type_codes::GOODS_RECEIPT,
894 layer: HypergraphLayer::ProcessEvents,
895 external_id: doc_id.clone(),
896 label: format!("GR {}", doc_id),
897 properties: {
898 let mut p = HashMap::new();
899 p.insert(
900 "vendor_id".to_string(),
901 Value::String(vendor_id.to_string()),
902 );
903 p
904 },
905 features: vec![gr
906 .total_value
907 .to_string()
908 .parse::<f64>()
909 .unwrap_or(0.0)
910 .ln_1p()],
911 is_anomaly: false,
912 anomaly_type: None,
913 is_aggregate: false,
914 aggregate_count: 0,
915 });
916 }
917
918 for inv in vendor_invoices {
920 if should_aggregate && vendors_needing_pools.contains(&inv.vendor_id) {
921 continue;
922 }
923 let doc_id = &inv.header.document_id;
924 let node_id = format!("vinv_{}", doc_id);
925 self.try_add_node(HypergraphNode {
926 id: node_id,
927 entity_type: "VendorInvoice".to_string(),
928 entity_type_code: type_codes::VENDOR_INVOICE,
929 layer: HypergraphLayer::ProcessEvents,
930 external_id: doc_id.clone(),
931 label: format!("VI {}", doc_id),
932 properties: {
933 let mut p = HashMap::new();
934 p.insert(
935 "vendor_id".to_string(),
936 Value::String(inv.vendor_id.clone()),
937 );
938 p
939 },
940 features: vec![inv
941 .payable_amount
942 .to_string()
943 .parse::<f64>()
944 .unwrap_or(0.0)
945 .ln_1p()],
946 is_anomaly: false,
947 anomaly_type: None,
948 is_aggregate: false,
949 aggregate_count: 0,
950 });
951 }
952
953 for pmt in payments {
955 let doc_id = &pmt.header.document_id;
956 let node_id = format!("pmt_{}", doc_id);
957 self.try_add_node(HypergraphNode {
958 id: node_id,
959 entity_type: "Payment".to_string(),
960 entity_type_code: type_codes::PAYMENT,
961 layer: HypergraphLayer::ProcessEvents,
962 external_id: doc_id.clone(),
963 label: format!("PMT {}", doc_id),
964 properties: HashMap::new(),
965 features: vec![pmt.amount.to_string().parse::<f64>().unwrap_or(0.0).ln_1p()],
966 is_anomaly: false,
967 anomaly_type: None,
968 is_aggregate: false,
969 aggregate_count: 0,
970 });
971 }
972 }
973
974 pub fn add_o2c_documents(
976 &mut self,
977 sales_orders: &[datasynth_core::models::documents::SalesOrder],
978 deliveries: &[datasynth_core::models::documents::Delivery],
979 customer_invoices: &[datasynth_core::models::documents::CustomerInvoice],
980 ) {
981 if !self.config.include_o2c {
982 return;
983 }
984
985 let mut customer_doc_counts: HashMap<String, usize> = HashMap::new();
987 for so in sales_orders {
988 *customer_doc_counts
989 .entry(so.customer_id.clone())
990 .or_insert(0) += 1;
991 }
992
993 let threshold = self.config.docs_per_counterparty_threshold;
994 let should_aggregate = matches!(
995 self.config.aggregation_strategy,
996 AggregationStrategy::PoolByCounterparty
997 );
998
999 let customers_needing_pools: Vec<String> = if should_aggregate {
1000 customer_doc_counts
1001 .iter()
1002 .filter(|(_, count)| **count > threshold)
1003 .map(|(cid, _)| cid.clone())
1004 .collect()
1005 } else {
1006 Vec::new()
1007 };
1008
1009 for customer_id in &customers_needing_pools {
1011 let count = customer_doc_counts[customer_id];
1012 let pool_id = format!("pool_o2c_{}", customer_id);
1013 if self.try_add_node(HypergraphNode {
1014 id: pool_id.clone(),
1015 entity_type: "O2CPool".to_string(),
1016 entity_type_code: type_codes::POOL_NODE,
1017 layer: HypergraphLayer::ProcessEvents,
1018 external_id: format!("pool_o2c_{}", customer_id),
1019 label: format!("O2C Pool ({}): {} docs", customer_id, count),
1020 properties: {
1021 let mut p = HashMap::new();
1022 p.insert(
1023 "customer_id".to_string(),
1024 Value::String(customer_id.clone()),
1025 );
1026 p.insert("document_count".to_string(), Value::Number(count.into()));
1027 p
1028 },
1029 features: vec![count as f64],
1030 is_anomaly: false,
1031 anomaly_type: None,
1032 is_aggregate: true,
1033 aggregate_count: count,
1034 }) {
1035 self.doc_counterparty_links.push((
1036 pool_id,
1037 "customer".to_string(),
1038 customer_id.clone(),
1039 ));
1040 }
1041 self.aggregate_count += 1;
1042 }
1043
1044 for so in sales_orders {
1045 if should_aggregate && customers_needing_pools.contains(&so.customer_id) {
1046 continue;
1047 }
1048 let doc_id = &so.header.document_id;
1049 let node_id = format!("so_{}", doc_id);
1050 if self.try_add_node(HypergraphNode {
1051 id: node_id.clone(),
1052 entity_type: "SalesOrder".to_string(),
1053 entity_type_code: type_codes::SALES_ORDER,
1054 layer: HypergraphLayer::ProcessEvents,
1055 external_id: doc_id.clone(),
1056 label: format!("SO {}", doc_id),
1057 properties: {
1058 let mut p = HashMap::new();
1059 p.insert(
1060 "customer_id".to_string(),
1061 Value::String(so.customer_id.clone()),
1062 );
1063 p
1064 },
1065 features: vec![so
1066 .total_net_amount
1067 .to_string()
1068 .parse::<f64>()
1069 .unwrap_or(0.0)
1070 .ln_1p()],
1071 is_anomaly: false,
1072 anomaly_type: None,
1073 is_aggregate: false,
1074 aggregate_count: 0,
1075 }) {
1076 self.doc_counterparty_links.push((
1077 node_id,
1078 "customer".to_string(),
1079 so.customer_id.clone(),
1080 ));
1081 }
1082 }
1083
1084 for del in deliveries {
1085 if should_aggregate && customers_needing_pools.contains(&del.customer_id) {
1086 continue;
1087 }
1088 let doc_id = &del.header.document_id;
1089 let node_id = format!("del_{}", doc_id);
1090 self.try_add_node(HypergraphNode {
1091 id: node_id,
1092 entity_type: "Delivery".to_string(),
1093 entity_type_code: type_codes::DELIVERY,
1094 layer: HypergraphLayer::ProcessEvents,
1095 external_id: doc_id.clone(),
1096 label: format!("DEL {}", doc_id),
1097 properties: HashMap::new(),
1098 features: vec![],
1099 is_anomaly: false,
1100 anomaly_type: None,
1101 is_aggregate: false,
1102 aggregate_count: 0,
1103 });
1104 }
1105
1106 for inv in customer_invoices {
1107 if should_aggregate && customers_needing_pools.contains(&inv.customer_id) {
1108 continue;
1109 }
1110 let doc_id = &inv.header.document_id;
1111 let node_id = format!("cinv_{}", doc_id);
1112 self.try_add_node(HypergraphNode {
1113 id: node_id,
1114 entity_type: "CustomerInvoice".to_string(),
1115 entity_type_code: type_codes::CUSTOMER_INVOICE,
1116 layer: HypergraphLayer::ProcessEvents,
1117 external_id: doc_id.clone(),
1118 label: format!("CI {}", doc_id),
1119 properties: HashMap::new(),
1120 features: vec![inv
1121 .total_gross_amount
1122 .to_string()
1123 .parse::<f64>()
1124 .unwrap_or(0.0)
1125 .ln_1p()],
1126 is_anomaly: false,
1127 anomaly_type: None,
1128 is_aggregate: false,
1129 aggregate_count: 0,
1130 });
1131 }
1132 }
1133
1134 pub fn add_s2c_documents(
1136 &mut self,
1137 projects: &[SourcingProject],
1138 qualifications: &[SupplierQualification],
1139 rfx_events: &[RfxEvent],
1140 bids: &[SupplierBid],
1141 evaluations: &[BidEvaluation],
1142 contracts: &[ProcurementContract],
1143 ) {
1144 if !self.config.include_s2c {
1145 return;
1146 }
1147 for p in projects {
1148 let node_id = format!("s2c_proj_{}", p.project_id);
1149 self.try_add_node(HypergraphNode {
1150 id: node_id,
1151 entity_type: "SourcingProject".into(),
1152 entity_type_code: type_codes::SOURCING_PROJECT,
1153 layer: HypergraphLayer::ProcessEvents,
1154 external_id: p.project_id.clone(),
1155 label: format!("SPRJ {}", p.project_id),
1156 properties: HashMap::new(),
1157 features: vec![p
1158 .estimated_annual_spend
1159 .to_string()
1160 .parse::<f64>()
1161 .unwrap_or(0.0)
1162 .ln_1p()],
1163 is_anomaly: false,
1164 anomaly_type: None,
1165 is_aggregate: false,
1166 aggregate_count: 0,
1167 });
1168 }
1169 for q in qualifications {
1170 let node_id = format!("s2c_qual_{}", q.qualification_id);
1171 self.try_add_node(HypergraphNode {
1172 id: node_id,
1173 entity_type: "SupplierQualification".into(),
1174 entity_type_code: type_codes::SUPPLIER_QUALIFICATION,
1175 layer: HypergraphLayer::ProcessEvents,
1176 external_id: q.qualification_id.clone(),
1177 label: format!("SQUAL {}", q.qualification_id),
1178 properties: HashMap::new(),
1179 features: vec![],
1180 is_anomaly: false,
1181 anomaly_type: None,
1182 is_aggregate: false,
1183 aggregate_count: 0,
1184 });
1185 }
1186 for r in rfx_events {
1187 let node_id = format!("s2c_rfx_{}", r.rfx_id);
1188 self.try_add_node(HypergraphNode {
1189 id: node_id,
1190 entity_type: "RfxEvent".into(),
1191 entity_type_code: type_codes::RFX_EVENT,
1192 layer: HypergraphLayer::ProcessEvents,
1193 external_id: r.rfx_id.clone(),
1194 label: format!("RFX {}", r.rfx_id),
1195 properties: HashMap::new(),
1196 features: vec![],
1197 is_anomaly: false,
1198 anomaly_type: None,
1199 is_aggregate: false,
1200 aggregate_count: 0,
1201 });
1202 }
1203 for b in bids {
1204 let node_id = format!("s2c_bid_{}", b.bid_id);
1205 self.try_add_node(HypergraphNode {
1206 id: node_id,
1207 entity_type: "SupplierBid".into(),
1208 entity_type_code: type_codes::SUPPLIER_BID,
1209 layer: HypergraphLayer::ProcessEvents,
1210 external_id: b.bid_id.clone(),
1211 label: format!("BID {}", b.bid_id),
1212 properties: HashMap::new(),
1213 features: vec![b
1214 .total_amount
1215 .to_string()
1216 .parse::<f64>()
1217 .unwrap_or(0.0)
1218 .ln_1p()],
1219 is_anomaly: false,
1220 anomaly_type: None,
1221 is_aggregate: false,
1222 aggregate_count: 0,
1223 });
1224 }
1225 for e in evaluations {
1226 let node_id = format!("s2c_eval_{}", e.evaluation_id);
1227 self.try_add_node(HypergraphNode {
1228 id: node_id,
1229 entity_type: "BidEvaluation".into(),
1230 entity_type_code: type_codes::BID_EVALUATION,
1231 layer: HypergraphLayer::ProcessEvents,
1232 external_id: e.evaluation_id.clone(),
1233 label: format!("BEVAL {}", e.evaluation_id),
1234 properties: HashMap::new(),
1235 features: vec![],
1236 is_anomaly: false,
1237 anomaly_type: None,
1238 is_aggregate: false,
1239 aggregate_count: 0,
1240 });
1241 }
1242 for c in contracts {
1243 let node_id = format!("s2c_ctr_{}", c.contract_id);
1244 self.try_add_node(HypergraphNode {
1245 id: node_id,
1246 entity_type: "ProcurementContract".into(),
1247 entity_type_code: type_codes::PROCUREMENT_CONTRACT,
1248 layer: HypergraphLayer::ProcessEvents,
1249 external_id: c.contract_id.clone(),
1250 label: format!("CTR {}", c.contract_id),
1251 properties: HashMap::new(),
1252 features: vec![c
1253 .total_value
1254 .to_string()
1255 .parse::<f64>()
1256 .unwrap_or(0.0)
1257 .ln_1p()],
1258 is_anomaly: false,
1259 anomaly_type: None,
1260 is_aggregate: false,
1261 aggregate_count: 0,
1262 });
1263 self.doc_counterparty_links.push((
1265 format!("s2c_ctr_{}", c.contract_id),
1266 "vendor".into(),
1267 c.vendor_id.clone(),
1268 ));
1269 }
1270 }
1271
1272 pub fn add_h2r_documents(
1274 &mut self,
1275 payroll_runs: &[PayrollRun],
1276 time_entries: &[TimeEntry],
1277 expense_reports: &[ExpenseReport],
1278 ) {
1279 if !self.config.include_h2r {
1280 return;
1281 }
1282 for pr in payroll_runs {
1283 let node_id = format!("h2r_pay_{}", pr.payroll_id);
1284 self.try_add_node(HypergraphNode {
1285 id: node_id,
1286 entity_type: "PayrollRun".into(),
1287 entity_type_code: type_codes::PAYROLL_RUN,
1288 layer: HypergraphLayer::ProcessEvents,
1289 external_id: pr.payroll_id.clone(),
1290 label: format!("PAY {}", pr.payroll_id),
1291 properties: HashMap::new(),
1292 features: vec![pr
1293 .total_gross
1294 .to_string()
1295 .parse::<f64>()
1296 .unwrap_or(0.0)
1297 .ln_1p()],
1298 is_anomaly: false,
1299 anomaly_type: None,
1300 is_aggregate: false,
1301 aggregate_count: 0,
1302 });
1303 }
1304 for te in time_entries {
1305 let node_id = format!("h2r_time_{}", te.entry_id);
1306 self.try_add_node(HypergraphNode {
1307 id: node_id,
1308 entity_type: "TimeEntry".into(),
1309 entity_type_code: type_codes::TIME_ENTRY,
1310 layer: HypergraphLayer::ProcessEvents,
1311 external_id: te.entry_id.clone(),
1312 label: format!("TIME {}", te.entry_id),
1313 properties: HashMap::new(),
1314 features: vec![te.hours_regular + te.hours_overtime],
1315 is_anomaly: false,
1316 anomaly_type: None,
1317 is_aggregate: false,
1318 aggregate_count: 0,
1319 });
1320 }
1321 for er in expense_reports {
1322 let node_id = format!("h2r_exp_{}", er.report_id);
1323 self.try_add_node(HypergraphNode {
1324 id: node_id,
1325 entity_type: "ExpenseReport".into(),
1326 entity_type_code: type_codes::EXPENSE_REPORT,
1327 layer: HypergraphLayer::ProcessEvents,
1328 external_id: er.report_id.clone(),
1329 label: format!("EXP {}", er.report_id),
1330 properties: HashMap::new(),
1331 features: vec![er
1332 .total_amount
1333 .to_string()
1334 .parse::<f64>()
1335 .unwrap_or(0.0)
1336 .ln_1p()],
1337 is_anomaly: false,
1338 anomaly_type: None,
1339 is_aggregate: false,
1340 aggregate_count: 0,
1341 });
1342 }
1343 }
1344
1345 pub fn add_mfg_documents(
1347 &mut self,
1348 production_orders: &[ProductionOrder],
1349 quality_inspections: &[QualityInspection],
1350 cycle_counts: &[CycleCount],
1351 ) {
1352 if !self.config.include_mfg {
1353 return;
1354 }
1355 for po in production_orders {
1356 let node_id = format!("mfg_po_{}", po.order_id);
1357 self.try_add_node(HypergraphNode {
1358 id: node_id,
1359 entity_type: "ProductionOrder".into(),
1360 entity_type_code: type_codes::PRODUCTION_ORDER,
1361 layer: HypergraphLayer::ProcessEvents,
1362 external_id: po.order_id.clone(),
1363 label: format!("PROD {}", po.order_id),
1364 properties: HashMap::new(),
1365 features: vec![po
1366 .planned_quantity
1367 .to_string()
1368 .parse::<f64>()
1369 .unwrap_or(0.0)
1370 .ln_1p()],
1371 is_anomaly: false,
1372 anomaly_type: None,
1373 is_aggregate: false,
1374 aggregate_count: 0,
1375 });
1376 }
1377 for qi in quality_inspections {
1378 let node_id = format!("mfg_qi_{}", qi.inspection_id);
1379 self.try_add_node(HypergraphNode {
1380 id: node_id,
1381 entity_type: "QualityInspection".into(),
1382 entity_type_code: type_codes::QUALITY_INSPECTION,
1383 layer: HypergraphLayer::ProcessEvents,
1384 external_id: qi.inspection_id.clone(),
1385 label: format!("QI {}", qi.inspection_id),
1386 properties: HashMap::new(),
1387 features: vec![qi.defect_rate],
1388 is_anomaly: false,
1389 anomaly_type: None,
1390 is_aggregate: false,
1391 aggregate_count: 0,
1392 });
1393 }
1394 for cc in cycle_counts {
1395 let node_id = format!("mfg_cc_{}", cc.count_id);
1396 self.try_add_node(HypergraphNode {
1397 id: node_id,
1398 entity_type: "CycleCount".into(),
1399 entity_type_code: type_codes::CYCLE_COUNT,
1400 layer: HypergraphLayer::ProcessEvents,
1401 external_id: cc.count_id.clone(),
1402 label: format!("CC {}", cc.count_id),
1403 properties: HashMap::new(),
1404 features: vec![cc.variance_rate],
1405 is_anomaly: false,
1406 anomaly_type: None,
1407 is_aggregate: false,
1408 aggregate_count: 0,
1409 });
1410 }
1411 }
1412
1413 pub fn add_bank_documents(
1415 &mut self,
1416 customers: &[BankingCustomer],
1417 accounts: &[BankAccount],
1418 transactions: &[BankTransaction],
1419 ) {
1420 if !self.config.include_bank {
1421 return;
1422 }
1423 for cust in customers {
1424 let cid = cust.customer_id.to_string();
1425 let node_id = format!("bank_cust_{}", cid);
1426 self.try_add_node(HypergraphNode {
1427 id: node_id,
1428 entity_type: "BankingCustomer".into(),
1429 entity_type_code: type_codes::BANKING_CUSTOMER,
1430 layer: HypergraphLayer::ProcessEvents,
1431 external_id: cid,
1432 label: format!("BCUST {}", cust.customer_id),
1433 properties: HashMap::new(),
1434 features: vec![],
1435 is_anomaly: false,
1436 anomaly_type: None,
1437 is_aggregate: false,
1438 aggregate_count: 0,
1439 });
1440 }
1441 for acct in accounts {
1442 let aid = acct.account_id.to_string();
1443 let node_id = format!("bank_acct_{}", aid);
1444 self.try_add_node(HypergraphNode {
1445 id: node_id,
1446 entity_type: "BankAccount".into(),
1447 entity_type_code: type_codes::BANK_ACCOUNT,
1448 layer: HypergraphLayer::ProcessEvents,
1449 external_id: aid,
1450 label: format!("BACCT {}", acct.account_number),
1451 properties: HashMap::new(),
1452 features: vec![acct
1453 .current_balance
1454 .to_string()
1455 .parse::<f64>()
1456 .unwrap_or(0.0)
1457 .ln_1p()],
1458 is_anomaly: false,
1459 anomaly_type: None,
1460 is_aggregate: false,
1461 aggregate_count: 0,
1462 });
1463 }
1464 for txn in transactions {
1465 let tid = txn.transaction_id.to_string();
1466 let node_id = format!("bank_txn_{}", tid);
1467 self.try_add_node(HypergraphNode {
1468 id: node_id,
1469 entity_type: "BankTransaction".into(),
1470 entity_type_code: type_codes::BANK_TRANSACTION,
1471 layer: HypergraphLayer::ProcessEvents,
1472 external_id: tid,
1473 label: format!("BTXN {}", txn.reference),
1474 properties: HashMap::new(),
1475 features: vec![txn
1476 .amount
1477 .to_string()
1478 .parse::<f64>()
1479 .unwrap_or(0.0)
1480 .abs()
1481 .ln_1p()],
1482 is_anomaly: txn.is_suspicious,
1483 anomaly_type: None,
1484 is_aggregate: false,
1485 aggregate_count: 0,
1486 });
1487 }
1488 }
1489
1490 #[allow(clippy::too_many_arguments)]
1492 pub fn add_audit_documents(
1493 &mut self,
1494 engagements: &[AuditEngagement],
1495 workpapers: &[Workpaper],
1496 findings: &[AuditFinding],
1497 evidence: &[AuditEvidence],
1498 risks: &[RiskAssessment],
1499 judgments: &[ProfessionalJudgment],
1500 ) {
1501 if !self.config.include_audit {
1502 return;
1503 }
1504 for eng in engagements {
1505 let eid = eng.engagement_id.to_string();
1506 let node_id = format!("audit_eng_{}", eid);
1507 self.try_add_node(HypergraphNode {
1508 id: node_id,
1509 entity_type: "AuditEngagement".into(),
1510 entity_type_code: type_codes::AUDIT_ENGAGEMENT,
1511 layer: HypergraphLayer::ProcessEvents,
1512 external_id: eid,
1513 label: format!("AENG {}", eng.engagement_ref),
1514 properties: HashMap::new(),
1515 features: vec![eng
1516 .materiality
1517 .to_string()
1518 .parse::<f64>()
1519 .unwrap_or(0.0)
1520 .ln_1p()],
1521 is_anomaly: false,
1522 anomaly_type: None,
1523 is_aggregate: false,
1524 aggregate_count: 0,
1525 });
1526 }
1527 for wp in workpapers {
1528 let wid = wp.workpaper_id.to_string();
1529 let node_id = format!("audit_wp_{}", wid);
1530 self.try_add_node(HypergraphNode {
1531 id: node_id,
1532 entity_type: "Workpaper".into(),
1533 entity_type_code: type_codes::WORKPAPER,
1534 layer: HypergraphLayer::ProcessEvents,
1535 external_id: wid,
1536 label: format!("WP {}", wp.workpaper_ref),
1537 properties: HashMap::new(),
1538 features: vec![],
1539 is_anomaly: false,
1540 anomaly_type: None,
1541 is_aggregate: false,
1542 aggregate_count: 0,
1543 });
1544 }
1545 for f in findings {
1546 let fid = f.finding_id.to_string();
1547 let node_id = format!("audit_find_{}", fid);
1548 self.try_add_node(HypergraphNode {
1549 id: node_id,
1550 entity_type: "AuditFinding".into(),
1551 entity_type_code: type_codes::AUDIT_FINDING,
1552 layer: HypergraphLayer::ProcessEvents,
1553 external_id: fid,
1554 label: format!("AFIND {}", f.finding_ref),
1555 properties: HashMap::new(),
1556 features: vec![],
1557 is_anomaly: false,
1558 anomaly_type: None,
1559 is_aggregate: false,
1560 aggregate_count: 0,
1561 });
1562 }
1563 for ev in evidence {
1564 let evid = ev.evidence_id.to_string();
1565 let node_id = format!("audit_ev_{}", evid);
1566 self.try_add_node(HypergraphNode {
1567 id: node_id,
1568 entity_type: "AuditEvidence".into(),
1569 entity_type_code: type_codes::AUDIT_EVIDENCE,
1570 layer: HypergraphLayer::ProcessEvents,
1571 external_id: evid,
1572 label: format!("AEV {}", ev.evidence_id),
1573 properties: HashMap::new(),
1574 features: vec![],
1575 is_anomaly: false,
1576 anomaly_type: None,
1577 is_aggregate: false,
1578 aggregate_count: 0,
1579 });
1580 }
1581 for r in risks {
1582 let rid = r.risk_id.to_string();
1583 let node_id = format!("audit_risk_{}", rid);
1584 self.try_add_node(HypergraphNode {
1585 id: node_id,
1586 entity_type: "RiskAssessment".into(),
1587 entity_type_code: type_codes::RISK_ASSESSMENT,
1588 layer: HypergraphLayer::ProcessEvents,
1589 external_id: rid,
1590 label: format!("ARISK {}", r.risk_id),
1591 properties: HashMap::new(),
1592 features: vec![],
1593 is_anomaly: false,
1594 anomaly_type: None,
1595 is_aggregate: false,
1596 aggregate_count: 0,
1597 });
1598 }
1599 for j in judgments {
1600 let jid = j.judgment_id.to_string();
1601 let node_id = format!("audit_judg_{}", jid);
1602 self.try_add_node(HypergraphNode {
1603 id: node_id,
1604 entity_type: "ProfessionalJudgment".into(),
1605 entity_type_code: type_codes::PROFESSIONAL_JUDGMENT,
1606 layer: HypergraphLayer::ProcessEvents,
1607 external_id: jid,
1608 label: format!("AJUDG {}", j.judgment_id),
1609 properties: HashMap::new(),
1610 features: vec![],
1611 is_anomaly: false,
1612 anomaly_type: None,
1613 is_aggregate: false,
1614 aggregate_count: 0,
1615 });
1616 }
1617 }
1618
1619 pub fn add_bank_recon_documents(&mut self, reconciliations: &[BankReconciliation]) {
1621 if !self.config.include_r2r {
1622 return;
1623 }
1624 for recon in reconciliations {
1625 let node_id = format!("recon_{}", recon.reconciliation_id);
1626 self.try_add_node(HypergraphNode {
1627 id: node_id,
1628 entity_type: "BankReconciliation".into(),
1629 entity_type_code: type_codes::BANK_RECONCILIATION,
1630 layer: HypergraphLayer::ProcessEvents,
1631 external_id: recon.reconciliation_id.clone(),
1632 label: format!("RECON {}", recon.reconciliation_id),
1633 properties: HashMap::new(),
1634 features: vec![recon
1635 .bank_ending_balance
1636 .to_string()
1637 .parse::<f64>()
1638 .unwrap_or(0.0)
1639 .ln_1p()],
1640 is_anomaly: false,
1641 anomaly_type: None,
1642 is_aggregate: false,
1643 aggregate_count: 0,
1644 });
1645 for line in &recon.statement_lines {
1646 let node_id = format!("recon_line_{}", line.line_id);
1647 self.try_add_node(HypergraphNode {
1648 id: node_id,
1649 entity_type: "BankStatementLine".into(),
1650 entity_type_code: type_codes::BANK_STATEMENT_LINE,
1651 layer: HypergraphLayer::ProcessEvents,
1652 external_id: line.line_id.clone(),
1653 label: format!("BSL {}", line.line_id),
1654 properties: HashMap::new(),
1655 features: vec![line
1656 .amount
1657 .to_string()
1658 .parse::<f64>()
1659 .unwrap_or(0.0)
1660 .abs()
1661 .ln_1p()],
1662 is_anomaly: false,
1663 anomaly_type: None,
1664 is_aggregate: false,
1665 aggregate_count: 0,
1666 });
1667 }
1668 for item in &recon.reconciling_items {
1669 let node_id = format!("recon_item_{}", item.item_id);
1670 self.try_add_node(HypergraphNode {
1671 id: node_id,
1672 entity_type: "ReconcilingItem".into(),
1673 entity_type_code: type_codes::RECONCILING_ITEM,
1674 layer: HypergraphLayer::ProcessEvents,
1675 external_id: item.item_id.clone(),
1676 label: format!("RITEM {}", item.item_id),
1677 properties: HashMap::new(),
1678 features: vec![item
1679 .amount
1680 .to_string()
1681 .parse::<f64>()
1682 .unwrap_or(0.0)
1683 .abs()
1684 .ln_1p()],
1685 is_anomaly: false,
1686 anomaly_type: None,
1687 is_aggregate: false,
1688 aggregate_count: 0,
1689 });
1690 }
1691 }
1692 }
1693
1694 pub fn add_ocpm_events(&mut self, event_log: &datasynth_ocpm::OcpmEventLog) {
1696 if !self.config.events_as_hyperedges {
1697 return;
1698 }
1699 for event in &event_log.events {
1700 let participants: Vec<HyperedgeParticipant> = event
1701 .object_refs
1702 .iter()
1703 .map(|obj_ref| {
1704 let node_id = format!("ocpm_obj_{}", obj_ref.object_id);
1705 self.try_add_node(HypergraphNode {
1707 id: node_id.clone(),
1708 entity_type: "OcpmObject".into(),
1709 entity_type_code: type_codes::OCPM_EVENT,
1710 layer: HypergraphLayer::ProcessEvents,
1711 external_id: obj_ref.object_id.to_string(),
1712 label: format!("OBJ {}", obj_ref.object_type_id),
1713 properties: HashMap::new(),
1714 features: vec![],
1715 is_anomaly: false,
1716 anomaly_type: None,
1717 is_aggregate: false,
1718 aggregate_count: 0,
1719 });
1720 HyperedgeParticipant {
1721 node_id,
1722 role: format!("{:?}", obj_ref.qualifier),
1723 weight: None,
1724 }
1725 })
1726 .collect();
1727
1728 if !participants.is_empty() {
1729 let mut props = HashMap::new();
1730 props.insert(
1731 "activity_id".into(),
1732 Value::String(event.activity_id.clone()),
1733 );
1734 props.insert(
1735 "timestamp".into(),
1736 Value::String(event.timestamp.to_rfc3339()),
1737 );
1738 if !event.resource_id.is_empty() {
1739 props.insert("resource".into(), Value::String(event.resource_id.clone()));
1740 }
1741
1742 self.hyperedges.push(Hyperedge {
1743 id: format!("ocpm_evt_{}", event.event_id),
1744 hyperedge_type: "OcpmEvent".into(),
1745 subtype: event.activity_id.clone(),
1746 participants,
1747 layer: HypergraphLayer::ProcessEvents,
1748 properties: props,
1749 timestamp: Some(event.timestamp.date_naive()),
1750 is_anomaly: false,
1751 anomaly_type: None,
1752 features: vec![],
1753 });
1754 }
1755 }
1756 }
1757
1758 pub fn build_cross_layer_edges(&mut self) {
1760 if !self.config.include_cross_layer_edges {
1761 return;
1762 }
1763
1764 let links = std::mem::take(&mut self.doc_counterparty_links);
1766 for (doc_node_id, counterparty_type, counterparty_id) in &links {
1767 let source_node_id = match counterparty_type.as_str() {
1768 "vendor" => self.vendor_node_ids.get(counterparty_id),
1769 "customer" => self.customer_node_ids.get(counterparty_id),
1770 _ => None,
1771 };
1772 if let Some(source_id) = source_node_id {
1773 self.edges.push(CrossLayerEdge {
1774 source_id: source_id.clone(),
1775 source_layer: HypergraphLayer::GovernanceControls,
1776 target_id: doc_node_id.clone(),
1777 target_layer: HypergraphLayer::ProcessEvents,
1778 edge_type: "SuppliesTo".to_string(),
1779 edge_type_code: type_codes::SUPPLIES_TO,
1780 properties: HashMap::new(),
1781 });
1782 }
1783 }
1784 self.doc_counterparty_links = links;
1785 }
1786
1787 pub fn build(mut self) -> Hypergraph {
1789 self.build_cross_layer_edges();
1791
1792 let mut layer_node_counts: HashMap<String, usize> = HashMap::new();
1794 let mut node_type_counts: HashMap<String, usize> = HashMap::new();
1795 let mut anomalous_nodes = 0;
1796
1797 for node in &self.nodes {
1798 *layer_node_counts
1799 .entry(node.layer.name().to_string())
1800 .or_insert(0) += 1;
1801 *node_type_counts
1802 .entry(node.entity_type.clone())
1803 .or_insert(0) += 1;
1804 if node.is_anomaly {
1805 anomalous_nodes += 1;
1806 }
1807 }
1808
1809 let mut edge_type_counts: HashMap<String, usize> = HashMap::new();
1810 for edge in &self.edges {
1811 *edge_type_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
1812 }
1813
1814 let mut hyperedge_type_counts: HashMap<String, usize> = HashMap::new();
1815 let mut anomalous_hyperedges = 0;
1816 for he in &self.hyperedges {
1817 *hyperedge_type_counts
1818 .entry(he.hyperedge_type.clone())
1819 .or_insert(0) += 1;
1820 if he.is_anomaly {
1821 anomalous_hyperedges += 1;
1822 }
1823 }
1824
1825 let budget_report = NodeBudgetReport {
1826 total_budget: self.budget.total_max(),
1827 total_used: self.budget.total_count(),
1828 layer1_budget: self.budget.layer1_max,
1829 layer1_used: self.budget.layer1_count,
1830 layer2_budget: self.budget.layer2_max,
1831 layer2_used: self.budget.layer2_count,
1832 layer3_budget: self.budget.layer3_max,
1833 layer3_used: self.budget.layer3_count,
1834 aggregate_nodes_created: self.aggregate_count,
1835 aggregation_triggered: self.aggregate_count > 0,
1836 };
1837
1838 let metadata = HypergraphMetadata {
1839 name: "multi_layer_hypergraph".to_string(),
1840 num_nodes: self.nodes.len(),
1841 num_edges: self.edges.len(),
1842 num_hyperedges: self.hyperedges.len(),
1843 layer_node_counts,
1844 node_type_counts,
1845 edge_type_counts,
1846 hyperedge_type_counts,
1847 anomalous_nodes,
1848 anomalous_hyperedges,
1849 source: "datasynth".to_string(),
1850 generated_at: chrono::Utc::now().to_rfc3339(),
1851 budget_report: budget_report.clone(),
1852 files: vec![
1853 "nodes.jsonl".to_string(),
1854 "edges.jsonl".to_string(),
1855 "hyperedges.jsonl".to_string(),
1856 "metadata.json".to_string(),
1857 ],
1858 };
1859
1860 Hypergraph {
1861 nodes: self.nodes,
1862 edges: self.edges,
1863 hyperedges: self.hyperedges,
1864 metadata,
1865 budget_report,
1866 }
1867 }
1868
1869 fn try_add_node(&mut self, node: HypergraphNode) -> bool {
1871 if self.node_index.contains_key(&node.id) {
1872 return false; }
1874
1875 if !self.budget.can_add(node.layer) {
1876 return false; }
1878
1879 let id = node.id.clone();
1880 let layer = node.layer;
1881 self.nodes.push(node);
1882 let idx = self.nodes.len() - 1;
1883 self.node_index.insert(id, idx);
1884 self.budget.record_add(layer);
1885 true
1886 }
1887}
1888
1889fn component_to_feature(component: &CosoComponent) -> f64 {
1891 match component {
1892 CosoComponent::ControlEnvironment => 1.0,
1893 CosoComponent::RiskAssessment => 2.0,
1894 CosoComponent::ControlActivities => 3.0,
1895 CosoComponent::InformationCommunication => 4.0,
1896 CosoComponent::MonitoringActivities => 5.0,
1897 }
1898}
1899
1900fn account_type_feature(account_type: &datasynth_core::models::AccountType) -> f64 {
1902 use datasynth_core::models::AccountType;
1903 match account_type {
1904 AccountType::Asset => 1.0,
1905 AccountType::Liability => 2.0,
1906 AccountType::Equity => 3.0,
1907 AccountType::Revenue => 4.0,
1908 AccountType::Expense => 5.0,
1909 AccountType::Statistical => 6.0,
1910 }
1911}
1912
1913fn compute_je_features(entry: &JournalEntry) -> Vec<f64> {
1915 let total_debit: f64 = entry
1916 .lines
1917 .iter()
1918 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
1919 .sum();
1920
1921 let line_count = entry.lines.len() as f64;
1922 let posting_date = entry.header.posting_date;
1923 let weekday = posting_date.weekday().num_days_from_monday() as f64 / WEEKDAY_NORMALIZER;
1924 let day = posting_date.day() as f64 / DAY_OF_MONTH_NORMALIZER;
1925 let month = posting_date.month() as f64 / MONTH_NORMALIZER;
1926 let is_month_end = if posting_date.day() >= MONTH_END_DAY_THRESHOLD {
1927 1.0
1928 } else {
1929 0.0
1930 };
1931
1932 vec![
1933 (total_debit.abs() + 1.0).ln(), line_count, weekday, day, month, is_month_end, ]
1940}
1941
1942#[cfg(test)]
1943#[allow(clippy::unwrap_used)]
1944mod tests {
1945 use super::*;
1946 use datasynth_core::models::{
1947 AccountSubType, AccountType, ChartOfAccounts, CoAComplexity, ControlFrequency, ControlType,
1948 CosoComponent, CosoMaturityLevel, GLAccount, InternalControl, RiskLevel, SoxAssertion,
1949 UserPersona,
1950 };
1951
1952 fn make_test_coa() -> ChartOfAccounts {
1953 let mut coa = ChartOfAccounts::new(
1954 "TEST_COA".to_string(),
1955 "Test Chart".to_string(),
1956 "US".to_string(),
1957 datasynth_core::models::IndustrySector::Manufacturing,
1958 CoAComplexity::Small,
1959 );
1960
1961 coa.add_account(GLAccount::new(
1962 "1000".to_string(),
1963 "Cash".to_string(),
1964 AccountType::Asset,
1965 AccountSubType::Cash,
1966 ));
1967 coa.add_account(GLAccount::new(
1968 "2000".to_string(),
1969 "AP".to_string(),
1970 AccountType::Liability,
1971 AccountSubType::AccountsPayable,
1972 ));
1973
1974 coa
1975 }
1976
1977 fn make_test_control() -> InternalControl {
1978 InternalControl {
1979 control_id: "C001".to_string(),
1980 control_name: "Three-Way Match".to_string(),
1981 control_type: ControlType::Preventive,
1982 objective: "Ensure proper matching".to_string(),
1983 frequency: ControlFrequency::Transactional,
1984 owner_role: UserPersona::Controller,
1985 risk_level: RiskLevel::High,
1986 description: "Test control".to_string(),
1987 is_key_control: true,
1988 sox_assertion: SoxAssertion::Existence,
1989 coso_component: CosoComponent::ControlActivities,
1990 coso_principles: vec![CosoPrinciple::ControlActions],
1991 control_scope: datasynth_core::models::ControlScope::TransactionLevel,
1992 maturity_level: CosoMaturityLevel::Managed,
1993 }
1994 }
1995
1996 #[test]
1997 fn test_builder_coso_framework() {
1998 let config = HypergraphConfig {
1999 max_nodes: 1000,
2000 ..Default::default()
2001 };
2002 let mut builder = HypergraphBuilder::new(config);
2003 builder.add_coso_framework();
2004
2005 let hg = builder.build();
2006 assert_eq!(hg.nodes.len(), 22);
2008 assert!(hg
2009 .nodes
2010 .iter()
2011 .all(|n| n.layer == HypergraphLayer::GovernanceControls));
2012 assert_eq!(
2014 hg.edges
2015 .iter()
2016 .filter(|e| e.edge_type == "CoversCosoPrinciple")
2017 .count(),
2018 17
2019 );
2020 }
2021
2022 #[test]
2023 fn test_builder_controls() {
2024 let config = HypergraphConfig {
2025 max_nodes: 1000,
2026 ..Default::default()
2027 };
2028 let mut builder = HypergraphBuilder::new(config);
2029 builder.add_coso_framework();
2030 builder.add_controls(&[make_test_control()]);
2031
2032 let hg = builder.build();
2033 assert_eq!(hg.nodes.len(), 24);
2035 assert!(hg.nodes.iter().any(|n| n.entity_type == "InternalControl"));
2036 assert!(hg.nodes.iter().any(|n| n.entity_type == "SoxAssertion"));
2037 }
2038
2039 #[test]
2040 fn test_builder_accounts() {
2041 let config = HypergraphConfig {
2042 max_nodes: 1000,
2043 ..Default::default()
2044 };
2045 let mut builder = HypergraphBuilder::new(config);
2046 builder.add_accounts(&make_test_coa());
2047
2048 let hg = builder.build();
2049 assert_eq!(hg.nodes.len(), 2);
2050 assert!(hg
2051 .nodes
2052 .iter()
2053 .all(|n| n.layer == HypergraphLayer::AccountingNetwork));
2054 }
2055
2056 #[test]
2057 fn test_budget_enforcement() {
2058 let config = HypergraphConfig {
2059 max_nodes: 10, include_coso: false,
2061 include_controls: false,
2062 include_sox: false,
2063 include_vendors: false,
2064 include_customers: false,
2065 include_employees: false,
2066 include_p2p: false,
2067 include_o2c: false,
2068 ..Default::default()
2069 };
2070 let mut builder = HypergraphBuilder::new(config);
2071 builder.add_accounts(&make_test_coa());
2072
2073 let hg = builder.build();
2074 assert!(hg.nodes.len() <= 1);
2076 }
2077
2078 #[test]
2079 fn test_full_build() {
2080 let config = HypergraphConfig {
2081 max_nodes: 10000,
2082 ..Default::default()
2083 };
2084 let mut builder = HypergraphBuilder::new(config);
2085 builder.add_coso_framework();
2086 builder.add_controls(&[make_test_control()]);
2087 builder.add_accounts(&make_test_coa());
2088
2089 let hg = builder.build();
2090 assert!(!hg.nodes.is_empty());
2091 assert!(!hg.edges.is_empty());
2092 assert_eq!(hg.metadata.num_nodes, hg.nodes.len());
2093 assert_eq!(hg.metadata.num_edges, hg.edges.len());
2094 }
2095}