1use std::collections::HashMap;
12
13use chrono::Datelike;
14use serde_json::Value;
15
16use datasynth_core::models::{
17 ChartOfAccounts, CosoComponent, CosoPrinciple, Customer, Employee, InternalControl,
18 JournalEntry, Vendor,
19};
20
21use crate::models::hypergraph::{
22 AggregationStrategy, CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph,
23 HypergraphLayer, HypergraphMetadata, HypergraphNode, NodeBudget, NodeBudgetReport,
24};
25
26const MONTH_END_DAY_THRESHOLD: u32 = 28;
28const WEEKDAY_NORMALIZER: f64 = 6.0;
30const DAY_OF_MONTH_NORMALIZER: f64 = 31.0;
32const MONTH_NORMALIZER: f64 = 12.0;
34
35#[allow(dead_code)]
37mod type_codes {
38 pub const ACCOUNT: u32 = 100;
40 pub const VENDOR: u32 = 200;
41 pub const CUSTOMER: u32 = 201;
42 pub const EMPLOYEE: u32 = 202;
43
44 pub const COSO_COMPONENT: u32 = 500;
46 pub const COSO_PRINCIPLE: u32 = 501;
47 pub const SOX_ASSERTION: u32 = 502;
48 pub const INTERNAL_CONTROL: u32 = 504;
49
50 pub const PURCHASE_ORDER: u32 = 300;
52 pub const GOODS_RECEIPT: u32 = 301;
53 pub const VENDOR_INVOICE: u32 = 302;
54 pub const PAYMENT: u32 = 303;
55 pub const SALES_ORDER: u32 = 310;
56 pub const DELIVERY: u32 = 311;
57 pub const CUSTOMER_INVOICE: u32 = 312;
58 pub const POOL_NODE: u32 = 399;
59
60 pub const IMPLEMENTS_CONTROL: u32 = 40;
62 pub const GOVERNED_BY_STANDARD: u32 = 41;
63 pub const OWNS_CONTROL: u32 = 42;
64 pub const OVERSEE_PROCESS: u32 = 43;
65 pub const ENFORCES_ASSERTION: u32 = 44;
66 pub const SUPPLIES_TO: u32 = 48;
67 pub const COVERS_COSO_PRINCIPLE: u32 = 54;
68 pub const CONTAINS_ACCOUNT: u32 = 55;
69}
70
71#[derive(Debug, Clone)]
73pub struct HypergraphConfig {
74 pub max_nodes: usize,
76 pub aggregation_strategy: AggregationStrategy,
78 pub include_coso: bool,
80 pub include_controls: bool,
81 pub include_sox: bool,
82 pub include_vendors: bool,
83 pub include_customers: bool,
84 pub include_employees: bool,
85 pub include_p2p: bool,
87 pub include_o2c: bool,
88 pub events_as_hyperedges: bool,
89 pub docs_per_counterparty_threshold: usize,
91 pub include_accounts: bool,
93 pub je_as_hyperedges: bool,
94 pub include_cross_layer_edges: bool,
96}
97
98impl Default for HypergraphConfig {
99 fn default() -> Self {
100 Self {
101 max_nodes: 50_000,
102 aggregation_strategy: AggregationStrategy::PoolByCounterparty,
103 include_coso: true,
104 include_controls: true,
105 include_sox: true,
106 include_vendors: true,
107 include_customers: true,
108 include_employees: true,
109 include_p2p: true,
110 include_o2c: true,
111 events_as_hyperedges: true,
112 docs_per_counterparty_threshold: 20,
113 include_accounts: true,
114 je_as_hyperedges: true,
115 include_cross_layer_edges: true,
116 }
117 }
118}
119
120pub struct HypergraphBuilder {
122 config: HypergraphConfig,
123 budget: NodeBudget,
124 nodes: Vec<HypergraphNode>,
125 edges: Vec<CrossLayerEdge>,
126 hyperedges: Vec<Hyperedge>,
127 node_index: HashMap<String, usize>,
129 aggregate_count: usize,
131 control_node_ids: HashMap<String, String>,
133 coso_component_ids: HashMap<String, String>,
135 account_node_ids: HashMap<String, String>,
137 vendor_node_ids: HashMap<String, String>,
139 customer_node_ids: HashMap<String, String>,
141 employee_node_ids: HashMap<String, String>,
143 doc_counterparty_links: Vec<(String, String, String)>, }
147
148impl HypergraphBuilder {
149 pub fn new(config: HypergraphConfig) -> Self {
151 let budget = NodeBudget::new(config.max_nodes);
152 Self {
153 config,
154 budget,
155 nodes: Vec::new(),
156 edges: Vec::new(),
157 hyperedges: Vec::new(),
158 node_index: HashMap::new(),
159 aggregate_count: 0,
160 control_node_ids: HashMap::new(),
161 coso_component_ids: HashMap::new(),
162 account_node_ids: HashMap::new(),
163 vendor_node_ids: HashMap::new(),
164 customer_node_ids: HashMap::new(),
165 employee_node_ids: HashMap::new(),
166 doc_counterparty_links: Vec::new(),
167 }
168 }
169
170 pub fn add_coso_framework(&mut self) {
172 if !self.config.include_coso {
173 return;
174 }
175
176 let components = [
177 (CosoComponent::ControlEnvironment, "Control Environment"),
178 (CosoComponent::RiskAssessment, "Risk Assessment"),
179 (CosoComponent::ControlActivities, "Control Activities"),
180 (
181 CosoComponent::InformationCommunication,
182 "Information & Communication",
183 ),
184 (CosoComponent::MonitoringActivities, "Monitoring Activities"),
185 ];
186
187 for (component, name) in &components {
188 let id = format!("coso_comp_{}", name.replace(' ', "_").replace('&', "and"));
189 if self.try_add_node(HypergraphNode {
190 id: id.clone(),
191 entity_type: "CosoComponent".to_string(),
192 entity_type_code: type_codes::COSO_COMPONENT,
193 layer: HypergraphLayer::GovernanceControls,
194 external_id: format!("{:?}", component),
195 label: name.to_string(),
196 properties: HashMap::new(),
197 features: vec![component_to_feature(component)],
198 is_anomaly: false,
199 anomaly_type: None,
200 is_aggregate: false,
201 aggregate_count: 0,
202 }) {
203 self.coso_component_ids
204 .insert(format!("{:?}", component), id);
205 }
206 }
207
208 let principles = [
209 (
210 CosoPrinciple::IntegrityAndEthics,
211 "Integrity and Ethics",
212 CosoComponent::ControlEnvironment,
213 ),
214 (
215 CosoPrinciple::BoardOversight,
216 "Board Oversight",
217 CosoComponent::ControlEnvironment,
218 ),
219 (
220 CosoPrinciple::OrganizationalStructure,
221 "Organizational Structure",
222 CosoComponent::ControlEnvironment,
223 ),
224 (
225 CosoPrinciple::CommitmentToCompetence,
226 "Commitment to Competence",
227 CosoComponent::ControlEnvironment,
228 ),
229 (
230 CosoPrinciple::Accountability,
231 "Accountability",
232 CosoComponent::ControlEnvironment,
233 ),
234 (
235 CosoPrinciple::ClearObjectives,
236 "Clear Objectives",
237 CosoComponent::RiskAssessment,
238 ),
239 (
240 CosoPrinciple::IdentifyRisks,
241 "Identify Risks",
242 CosoComponent::RiskAssessment,
243 ),
244 (
245 CosoPrinciple::FraudRisk,
246 "Fraud Risk",
247 CosoComponent::RiskAssessment,
248 ),
249 (
250 CosoPrinciple::ChangeIdentification,
251 "Change Identification",
252 CosoComponent::RiskAssessment,
253 ),
254 (
255 CosoPrinciple::ControlActions,
256 "Control Actions",
257 CosoComponent::ControlActivities,
258 ),
259 (
260 CosoPrinciple::TechnologyControls,
261 "Technology Controls",
262 CosoComponent::ControlActivities,
263 ),
264 (
265 CosoPrinciple::PoliciesAndProcedures,
266 "Policies and Procedures",
267 CosoComponent::ControlActivities,
268 ),
269 (
270 CosoPrinciple::QualityInformation,
271 "Quality Information",
272 CosoComponent::InformationCommunication,
273 ),
274 (
275 CosoPrinciple::InternalCommunication,
276 "Internal Communication",
277 CosoComponent::InformationCommunication,
278 ),
279 (
280 CosoPrinciple::ExternalCommunication,
281 "External Communication",
282 CosoComponent::InformationCommunication,
283 ),
284 (
285 CosoPrinciple::OngoingMonitoring,
286 "Ongoing Monitoring",
287 CosoComponent::MonitoringActivities,
288 ),
289 (
290 CosoPrinciple::DeficiencyEvaluation,
291 "Deficiency Evaluation",
292 CosoComponent::MonitoringActivities,
293 ),
294 ];
295
296 for (principle, name, parent_component) in &principles {
297 let principle_id = format!("coso_prin_{}", name.replace(' ', "_").replace('&', "and"));
298 if self.try_add_node(HypergraphNode {
299 id: principle_id.clone(),
300 entity_type: "CosoPrinciple".to_string(),
301 entity_type_code: type_codes::COSO_PRINCIPLE,
302 layer: HypergraphLayer::GovernanceControls,
303 external_id: format!("{:?}", principle),
304 label: name.to_string(),
305 properties: {
306 let mut p = HashMap::new();
307 p.insert(
308 "principle_number".to_string(),
309 Value::Number(principle.principle_number().into()),
310 );
311 p
312 },
313 features: vec![principle.principle_number() as f64],
314 is_anomaly: false,
315 anomaly_type: None,
316 is_aggregate: false,
317 aggregate_count: 0,
318 }) {
319 let comp_key = format!("{:?}", parent_component);
321 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
322 self.edges.push(CrossLayerEdge {
323 source_id: principle_id,
324 source_layer: HypergraphLayer::GovernanceControls,
325 target_id: comp_id.clone(),
326 target_layer: HypergraphLayer::GovernanceControls,
327 edge_type: "CoversCosoPrinciple".to_string(),
328 edge_type_code: type_codes::COVERS_COSO_PRINCIPLE,
329 properties: HashMap::new(),
330 });
331 }
332 }
333 }
334 }
335
336 pub fn add_controls(&mut self, controls: &[InternalControl]) {
338 if !self.config.include_controls {
339 return;
340 }
341
342 for control in controls {
343 let node_id = format!("ctrl_{}", control.control_id);
344 if self.try_add_node(HypergraphNode {
345 id: node_id.clone(),
346 entity_type: "InternalControl".to_string(),
347 entity_type_code: type_codes::INTERNAL_CONTROL,
348 layer: HypergraphLayer::GovernanceControls,
349 external_id: control.control_id.clone(),
350 label: control.control_name.clone(),
351 properties: {
352 let mut p = HashMap::new();
353 p.insert(
354 "control_type".to_string(),
355 Value::String(format!("{:?}", control.control_type)),
356 );
357 p.insert(
358 "risk_level".to_string(),
359 Value::String(format!("{:?}", control.risk_level)),
360 );
361 p.insert(
362 "is_key_control".to_string(),
363 Value::Bool(control.is_key_control),
364 );
365 p.insert(
366 "maturity_level".to_string(),
367 Value::String(format!("{:?}", control.maturity_level)),
368 );
369 p
370 },
371 features: vec![
372 if control.is_key_control { 1.0 } else { 0.0 },
373 control.maturity_level.level() as f64 / 5.0,
374 ],
375 is_anomaly: false,
376 anomaly_type: None,
377 is_aggregate: false,
378 aggregate_count: 0,
379 }) {
380 self.control_node_ids
381 .insert(control.control_id.clone(), node_id.clone());
382
383 let comp_key = format!("{:?}", control.coso_component);
385 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
386 self.edges.push(CrossLayerEdge {
387 source_id: node_id.clone(),
388 source_layer: HypergraphLayer::GovernanceControls,
389 target_id: comp_id.clone(),
390 target_layer: HypergraphLayer::GovernanceControls,
391 edge_type: "ImplementsControl".to_string(),
392 edge_type_code: type_codes::IMPLEMENTS_CONTROL,
393 properties: HashMap::new(),
394 });
395 }
396
397 if self.config.include_sox {
399 let assertion_id = format!("sox_{:?}", control.sox_assertion).to_lowercase();
400 if !self.node_index.contains_key(&assertion_id) {
402 self.try_add_node(HypergraphNode {
403 id: assertion_id.clone(),
404 entity_type: "SoxAssertion".to_string(),
405 entity_type_code: type_codes::SOX_ASSERTION,
406 layer: HypergraphLayer::GovernanceControls,
407 external_id: format!("{:?}", control.sox_assertion),
408 label: format!("{:?}", control.sox_assertion),
409 properties: HashMap::new(),
410 features: vec![],
411 is_anomaly: false,
412 anomaly_type: None,
413 is_aggregate: false,
414 aggregate_count: 0,
415 });
416 }
417 self.edges.push(CrossLayerEdge {
418 source_id: node_id,
419 source_layer: HypergraphLayer::GovernanceControls,
420 target_id: assertion_id,
421 target_layer: HypergraphLayer::GovernanceControls,
422 edge_type: "EnforcesAssertion".to_string(),
423 edge_type_code: type_codes::ENFORCES_ASSERTION,
424 properties: HashMap::new(),
425 });
426 }
427 }
428 }
429 }
430
431 pub fn add_vendors(&mut self, vendors: &[Vendor]) {
433 if !self.config.include_vendors {
434 return;
435 }
436
437 for vendor in vendors {
438 let node_id = format!("vnd_{}", vendor.vendor_id);
439 if self.try_add_node(HypergraphNode {
440 id: node_id.clone(),
441 entity_type: "Vendor".to_string(),
442 entity_type_code: type_codes::VENDOR,
443 layer: HypergraphLayer::GovernanceControls,
444 external_id: vendor.vendor_id.clone(),
445 label: vendor.name.clone(),
446 properties: {
447 let mut p = HashMap::new();
448 p.insert(
449 "vendor_type".to_string(),
450 Value::String(format!("{:?}", vendor.vendor_type)),
451 );
452 p.insert("country".to_string(), Value::String(vendor.country.clone()));
453 p.insert("is_active".to_string(), Value::Bool(vendor.is_active));
454 p
455 },
456 features: vec![if vendor.is_active { 1.0 } else { 0.0 }],
457 is_anomaly: false,
458 anomaly_type: None,
459 is_aggregate: false,
460 aggregate_count: 0,
461 }) {
462 self.vendor_node_ids
463 .insert(vendor.vendor_id.clone(), node_id);
464 }
465 }
466 }
467
468 pub fn add_customers(&mut self, customers: &[Customer]) {
470 if !self.config.include_customers {
471 return;
472 }
473
474 for customer in customers {
475 let node_id = format!("cust_{}", customer.customer_id);
476 if self.try_add_node(HypergraphNode {
477 id: node_id.clone(),
478 entity_type: "Customer".to_string(),
479 entity_type_code: type_codes::CUSTOMER,
480 layer: HypergraphLayer::GovernanceControls,
481 external_id: customer.customer_id.clone(),
482 label: customer.name.clone(),
483 properties: {
484 let mut p = HashMap::new();
485 p.insert(
486 "customer_type".to_string(),
487 Value::String(format!("{:?}", customer.customer_type)),
488 );
489 p.insert(
490 "country".to_string(),
491 Value::String(customer.country.clone()),
492 );
493 p.insert(
494 "credit_rating".to_string(),
495 Value::String(format!("{:?}", customer.credit_rating)),
496 );
497 p
498 },
499 features: vec![if customer.is_active { 1.0 } else { 0.0 }],
500 is_anomaly: false,
501 anomaly_type: None,
502 is_aggregate: false,
503 aggregate_count: 0,
504 }) {
505 self.customer_node_ids
506 .insert(customer.customer_id.clone(), node_id);
507 }
508 }
509 }
510
511 pub fn add_employees(&mut self, employees: &[Employee]) {
513 if !self.config.include_employees {
514 return;
515 }
516
517 for employee in employees {
518 let node_id = format!("emp_{}", employee.employee_id);
519 if self.try_add_node(HypergraphNode {
520 id: node_id.clone(),
521 entity_type: "Employee".to_string(),
522 entity_type_code: type_codes::EMPLOYEE,
523 layer: HypergraphLayer::GovernanceControls,
524 external_id: employee.employee_id.clone(),
525 label: employee.display_name.clone(),
526 properties: {
527 let mut p = HashMap::new();
528 p.insert(
529 "persona".to_string(),
530 Value::String(format!("{:?}", employee.persona)),
531 );
532 p.insert(
533 "job_level".to_string(),
534 Value::String(format!("{:?}", employee.job_level)),
535 );
536 p.insert(
537 "company_code".to_string(),
538 Value::String(employee.company_code.clone()),
539 );
540 p
541 },
542 features: vec![employee
543 .approval_limit
544 .to_string()
545 .parse::<f64>()
546 .unwrap_or(0.0)
547 .ln_1p()],
548 is_anomaly: false,
549 anomaly_type: None,
550 is_aggregate: false,
551 aggregate_count: 0,
552 }) {
553 self.employee_node_ids
554 .insert(employee.employee_id.clone(), node_id);
555 }
556 }
557 }
558
559 pub fn add_accounts(&mut self, coa: &ChartOfAccounts) {
561 if !self.config.include_accounts {
562 return;
563 }
564
565 for account in &coa.accounts {
566 let node_id = format!("acct_{}", account.account_number);
567 if self.try_add_node(HypergraphNode {
568 id: node_id.clone(),
569 entity_type: "Account".to_string(),
570 entity_type_code: type_codes::ACCOUNT,
571 layer: HypergraphLayer::AccountingNetwork,
572 external_id: account.account_number.clone(),
573 label: account.short_description.clone(),
574 properties: {
575 let mut p = HashMap::new();
576 p.insert(
577 "account_type".to_string(),
578 Value::String(format!("{:?}", account.account_type)),
579 );
580 p.insert(
581 "is_control_account".to_string(),
582 Value::Bool(account.is_control_account),
583 );
584 p.insert("is_postable".to_string(), Value::Bool(account.is_postable));
585 p
586 },
587 features: vec![
588 account_type_feature(&account.account_type),
589 if account.is_control_account { 1.0 } else { 0.0 },
590 if account.normal_debit_balance {
591 1.0
592 } else {
593 0.0
594 },
595 ],
596 is_anomaly: false,
597 anomaly_type: None,
598 is_aggregate: false,
599 aggregate_count: 0,
600 }) {
601 self.account_node_ids
602 .insert(account.account_number.clone(), node_id);
603 }
604 }
605 }
606
607 pub fn add_journal_entries_as_hyperedges(&mut self, entries: &[JournalEntry]) {
611 if !self.config.je_as_hyperedges {
612 return;
613 }
614
615 for entry in entries {
616 let mut participants = Vec::new();
617
618 for line in &entry.lines {
619 let account_id = format!("acct_{}", line.gl_account);
620
621 if !self.node_index.contains_key(&account_id) {
623 self.try_add_node(HypergraphNode {
624 id: account_id.clone(),
625 entity_type: "Account".to_string(),
626 entity_type_code: type_codes::ACCOUNT,
627 layer: HypergraphLayer::AccountingNetwork,
628 external_id: line.gl_account.clone(),
629 label: line
630 .account_description
631 .clone()
632 .unwrap_or_else(|| line.gl_account.clone()),
633 properties: HashMap::new(),
634 features: vec![],
635 is_anomaly: false,
636 anomaly_type: None,
637 is_aggregate: false,
638 aggregate_count: 0,
639 });
640 self.account_node_ids
641 .insert(line.gl_account.clone(), account_id.clone());
642 }
643
644 let amount: f64 = if !line.debit_amount.is_zero() {
645 line.debit_amount.to_string().parse().unwrap_or(0.0)
646 } else {
647 line.credit_amount.to_string().parse().unwrap_or(0.0)
648 };
649
650 let role = if !line.debit_amount.is_zero() {
651 "debit"
652 } else {
653 "credit"
654 };
655
656 participants.push(HyperedgeParticipant {
657 node_id: account_id,
658 role: role.to_string(),
659 weight: Some(amount),
660 });
661 }
662
663 if participants.is_empty() {
664 continue;
665 }
666
667 let doc_id = entry.header.document_id.to_string();
668 let subtype = entry
669 .header
670 .business_process
671 .as_ref()
672 .map(|bp| format!("{:?}", bp))
673 .unwrap_or_else(|| "General".to_string());
674
675 self.hyperedges.push(Hyperedge {
676 id: format!("je_{}", doc_id),
677 hyperedge_type: "JournalEntry".to_string(),
678 subtype,
679 participants,
680 layer: HypergraphLayer::AccountingNetwork,
681 properties: {
682 let mut p = HashMap::new();
683 p.insert("document_id".to_string(), Value::String(doc_id));
684 p.insert(
685 "company_code".to_string(),
686 Value::String(entry.header.company_code.clone()),
687 );
688 p.insert(
689 "document_type".to_string(),
690 Value::String(entry.header.document_type.clone()),
691 );
692 p.insert(
693 "created_by".to_string(),
694 Value::String(entry.header.created_by.clone()),
695 );
696 p
697 },
698 timestamp: Some(entry.header.posting_date),
699 is_anomaly: entry.header.is_anomaly || entry.header.is_fraud,
700 anomaly_type: entry.header.anomaly_type.clone().or_else(|| {
701 entry
702 .header
703 .fraud_type
704 .as_ref()
705 .map(|ft| format!("{:?}", ft))
706 }),
707 features: compute_je_features(entry),
708 });
709 }
710 }
711
712 pub fn add_p2p_documents(
716 &mut self,
717 purchase_orders: &[datasynth_core::models::documents::PurchaseOrder],
718 goods_receipts: &[datasynth_core::models::documents::GoodsReceipt],
719 vendor_invoices: &[datasynth_core::models::documents::VendorInvoice],
720 payments: &[datasynth_core::models::documents::Payment],
721 ) {
722 if !self.config.include_p2p {
723 return;
724 }
725
726 let mut vendor_doc_counts: HashMap<String, usize> = HashMap::new();
728 for po in purchase_orders {
729 *vendor_doc_counts.entry(po.vendor_id.clone()).or_insert(0) += 1;
730 }
731
732 let threshold = self.config.docs_per_counterparty_threshold;
733 let should_aggregate = matches!(
734 self.config.aggregation_strategy,
735 AggregationStrategy::PoolByCounterparty
736 );
737
738 let vendors_needing_pools: Vec<String> = if should_aggregate {
740 vendor_doc_counts
741 .iter()
742 .filter(|(_, count)| **count > threshold)
743 .map(|(vid, _)| vid.clone())
744 .collect()
745 } else {
746 Vec::new()
747 };
748
749 for vendor_id in &vendors_needing_pools {
751 let count = vendor_doc_counts[vendor_id];
752 let pool_id = format!("pool_p2p_{}", vendor_id);
753 if self.try_add_node(HypergraphNode {
754 id: pool_id.clone(),
755 entity_type: "P2PPool".to_string(),
756 entity_type_code: type_codes::POOL_NODE,
757 layer: HypergraphLayer::ProcessEvents,
758 external_id: format!("pool_p2p_{}", vendor_id),
759 label: format!("P2P Pool ({}): {} docs", vendor_id, count),
760 properties: {
761 let mut p = HashMap::new();
762 p.insert("vendor_id".to_string(), Value::String(vendor_id.clone()));
763 p.insert("document_count".to_string(), Value::Number(count.into()));
764 p
765 },
766 features: vec![count as f64],
767 is_anomaly: false,
768 anomaly_type: None,
769 is_aggregate: true,
770 aggregate_count: count,
771 }) {
772 self.doc_counterparty_links.push((
773 pool_id,
774 "vendor".to_string(),
775 vendor_id.clone(),
776 ));
777 }
778 self.aggregate_count += 1;
779 }
780
781 for po in purchase_orders {
783 if should_aggregate && vendors_needing_pools.contains(&po.vendor_id) {
784 continue; }
786
787 let doc_id = &po.header.document_id;
788 let node_id = format!("po_{}", doc_id);
789 if self.try_add_node(HypergraphNode {
790 id: node_id.clone(),
791 entity_type: "PurchaseOrder".to_string(),
792 entity_type_code: type_codes::PURCHASE_ORDER,
793 layer: HypergraphLayer::ProcessEvents,
794 external_id: doc_id.clone(),
795 label: format!("PO {}", doc_id),
796 properties: {
797 let mut p = HashMap::new();
798 p.insert("vendor_id".to_string(), Value::String(po.vendor_id.clone()));
799 p.insert(
800 "company_code".to_string(),
801 Value::String(po.header.company_code.clone()),
802 );
803 p
804 },
805 features: vec![po
806 .total_net_amount
807 .to_string()
808 .parse::<f64>()
809 .unwrap_or(0.0)
810 .ln_1p()],
811 is_anomaly: false,
812 anomaly_type: None,
813 is_aggregate: false,
814 aggregate_count: 0,
815 }) {
816 self.doc_counterparty_links.push((
817 node_id,
818 "vendor".to_string(),
819 po.vendor_id.clone(),
820 ));
821 }
822 }
823
824 for gr in goods_receipts {
826 let vendor_id = gr.vendor_id.as_deref().unwrap_or("UNKNOWN");
827 if should_aggregate && vendors_needing_pools.contains(&vendor_id.to_string()) {
828 continue;
829 }
830 let doc_id = &gr.header.document_id;
831 let node_id = format!("gr_{}", doc_id);
832 self.try_add_node(HypergraphNode {
833 id: node_id,
834 entity_type: "GoodsReceipt".to_string(),
835 entity_type_code: type_codes::GOODS_RECEIPT,
836 layer: HypergraphLayer::ProcessEvents,
837 external_id: doc_id.clone(),
838 label: format!("GR {}", doc_id),
839 properties: {
840 let mut p = HashMap::new();
841 p.insert(
842 "vendor_id".to_string(),
843 Value::String(vendor_id.to_string()),
844 );
845 p
846 },
847 features: vec![gr
848 .total_value
849 .to_string()
850 .parse::<f64>()
851 .unwrap_or(0.0)
852 .ln_1p()],
853 is_anomaly: false,
854 anomaly_type: None,
855 is_aggregate: false,
856 aggregate_count: 0,
857 });
858 }
859
860 for inv in vendor_invoices {
862 if should_aggregate && vendors_needing_pools.contains(&inv.vendor_id) {
863 continue;
864 }
865 let doc_id = &inv.header.document_id;
866 let node_id = format!("vinv_{}", doc_id);
867 self.try_add_node(HypergraphNode {
868 id: node_id,
869 entity_type: "VendorInvoice".to_string(),
870 entity_type_code: type_codes::VENDOR_INVOICE,
871 layer: HypergraphLayer::ProcessEvents,
872 external_id: doc_id.clone(),
873 label: format!("VI {}", doc_id),
874 properties: {
875 let mut p = HashMap::new();
876 p.insert(
877 "vendor_id".to_string(),
878 Value::String(inv.vendor_id.clone()),
879 );
880 p
881 },
882 features: vec![inv
883 .payable_amount
884 .to_string()
885 .parse::<f64>()
886 .unwrap_or(0.0)
887 .ln_1p()],
888 is_anomaly: false,
889 anomaly_type: None,
890 is_aggregate: false,
891 aggregate_count: 0,
892 });
893 }
894
895 for pmt in payments {
897 let doc_id = &pmt.header.document_id;
898 let node_id = format!("pmt_{}", doc_id);
899 self.try_add_node(HypergraphNode {
900 id: node_id,
901 entity_type: "Payment".to_string(),
902 entity_type_code: type_codes::PAYMENT,
903 layer: HypergraphLayer::ProcessEvents,
904 external_id: doc_id.clone(),
905 label: format!("PMT {}", doc_id),
906 properties: HashMap::new(),
907 features: vec![pmt.amount.to_string().parse::<f64>().unwrap_or(0.0).ln_1p()],
908 is_anomaly: false,
909 anomaly_type: None,
910 is_aggregate: false,
911 aggregate_count: 0,
912 });
913 }
914 }
915
916 pub fn add_o2c_documents(
918 &mut self,
919 sales_orders: &[datasynth_core::models::documents::SalesOrder],
920 deliveries: &[datasynth_core::models::documents::Delivery],
921 customer_invoices: &[datasynth_core::models::documents::CustomerInvoice],
922 ) {
923 if !self.config.include_o2c {
924 return;
925 }
926
927 let mut customer_doc_counts: HashMap<String, usize> = HashMap::new();
929 for so in sales_orders {
930 *customer_doc_counts
931 .entry(so.customer_id.clone())
932 .or_insert(0) += 1;
933 }
934
935 let threshold = self.config.docs_per_counterparty_threshold;
936 let should_aggregate = matches!(
937 self.config.aggregation_strategy,
938 AggregationStrategy::PoolByCounterparty
939 );
940
941 let customers_needing_pools: Vec<String> = if should_aggregate {
942 customer_doc_counts
943 .iter()
944 .filter(|(_, count)| **count > threshold)
945 .map(|(cid, _)| cid.clone())
946 .collect()
947 } else {
948 Vec::new()
949 };
950
951 for customer_id in &customers_needing_pools {
953 let count = customer_doc_counts[customer_id];
954 let pool_id = format!("pool_o2c_{}", customer_id);
955 if self.try_add_node(HypergraphNode {
956 id: pool_id.clone(),
957 entity_type: "O2CPool".to_string(),
958 entity_type_code: type_codes::POOL_NODE,
959 layer: HypergraphLayer::ProcessEvents,
960 external_id: format!("pool_o2c_{}", customer_id),
961 label: format!("O2C Pool ({}): {} docs", customer_id, count),
962 properties: {
963 let mut p = HashMap::new();
964 p.insert(
965 "customer_id".to_string(),
966 Value::String(customer_id.clone()),
967 );
968 p.insert("document_count".to_string(), Value::Number(count.into()));
969 p
970 },
971 features: vec![count as f64],
972 is_anomaly: false,
973 anomaly_type: None,
974 is_aggregate: true,
975 aggregate_count: count,
976 }) {
977 self.doc_counterparty_links.push((
978 pool_id,
979 "customer".to_string(),
980 customer_id.clone(),
981 ));
982 }
983 self.aggregate_count += 1;
984 }
985
986 for so in sales_orders {
987 if should_aggregate && customers_needing_pools.contains(&so.customer_id) {
988 continue;
989 }
990 let doc_id = &so.header.document_id;
991 let node_id = format!("so_{}", doc_id);
992 if self.try_add_node(HypergraphNode {
993 id: node_id.clone(),
994 entity_type: "SalesOrder".to_string(),
995 entity_type_code: type_codes::SALES_ORDER,
996 layer: HypergraphLayer::ProcessEvents,
997 external_id: doc_id.clone(),
998 label: format!("SO {}", doc_id),
999 properties: {
1000 let mut p = HashMap::new();
1001 p.insert(
1002 "customer_id".to_string(),
1003 Value::String(so.customer_id.clone()),
1004 );
1005 p
1006 },
1007 features: vec![so
1008 .total_net_amount
1009 .to_string()
1010 .parse::<f64>()
1011 .unwrap_or(0.0)
1012 .ln_1p()],
1013 is_anomaly: false,
1014 anomaly_type: None,
1015 is_aggregate: false,
1016 aggregate_count: 0,
1017 }) {
1018 self.doc_counterparty_links.push((
1019 node_id,
1020 "customer".to_string(),
1021 so.customer_id.clone(),
1022 ));
1023 }
1024 }
1025
1026 for del in deliveries {
1027 if should_aggregate && customers_needing_pools.contains(&del.customer_id) {
1028 continue;
1029 }
1030 let doc_id = &del.header.document_id;
1031 let node_id = format!("del_{}", doc_id);
1032 self.try_add_node(HypergraphNode {
1033 id: node_id,
1034 entity_type: "Delivery".to_string(),
1035 entity_type_code: type_codes::DELIVERY,
1036 layer: HypergraphLayer::ProcessEvents,
1037 external_id: doc_id.clone(),
1038 label: format!("DEL {}", doc_id),
1039 properties: HashMap::new(),
1040 features: vec![],
1041 is_anomaly: false,
1042 anomaly_type: None,
1043 is_aggregate: false,
1044 aggregate_count: 0,
1045 });
1046 }
1047
1048 for inv in customer_invoices {
1049 if should_aggregate && customers_needing_pools.contains(&inv.customer_id) {
1050 continue;
1051 }
1052 let doc_id = &inv.header.document_id;
1053 let node_id = format!("cinv_{}", doc_id);
1054 self.try_add_node(HypergraphNode {
1055 id: node_id,
1056 entity_type: "CustomerInvoice".to_string(),
1057 entity_type_code: type_codes::CUSTOMER_INVOICE,
1058 layer: HypergraphLayer::ProcessEvents,
1059 external_id: doc_id.clone(),
1060 label: format!("CI {}", doc_id),
1061 properties: HashMap::new(),
1062 features: vec![inv
1063 .total_gross_amount
1064 .to_string()
1065 .parse::<f64>()
1066 .unwrap_or(0.0)
1067 .ln_1p()],
1068 is_anomaly: false,
1069 anomaly_type: None,
1070 is_aggregate: false,
1071 aggregate_count: 0,
1072 });
1073 }
1074 }
1075
1076 pub fn build_cross_layer_edges(&mut self) {
1078 if !self.config.include_cross_layer_edges {
1079 return;
1080 }
1081
1082 let links = std::mem::take(&mut self.doc_counterparty_links);
1084 for (doc_node_id, counterparty_type, counterparty_id) in &links {
1085 let source_node_id = match counterparty_type.as_str() {
1086 "vendor" => self.vendor_node_ids.get(counterparty_id),
1087 "customer" => self.customer_node_ids.get(counterparty_id),
1088 _ => None,
1089 };
1090 if let Some(source_id) = source_node_id {
1091 self.edges.push(CrossLayerEdge {
1092 source_id: source_id.clone(),
1093 source_layer: HypergraphLayer::GovernanceControls,
1094 target_id: doc_node_id.clone(),
1095 target_layer: HypergraphLayer::ProcessEvents,
1096 edge_type: "SuppliesTo".to_string(),
1097 edge_type_code: type_codes::SUPPLIES_TO,
1098 properties: HashMap::new(),
1099 });
1100 }
1101 }
1102 self.doc_counterparty_links = links;
1103 }
1104
1105 pub fn build(mut self) -> Hypergraph {
1107 self.build_cross_layer_edges();
1109
1110 let mut layer_node_counts: HashMap<String, usize> = HashMap::new();
1112 let mut node_type_counts: HashMap<String, usize> = HashMap::new();
1113 let mut anomalous_nodes = 0;
1114
1115 for node in &self.nodes {
1116 *layer_node_counts
1117 .entry(node.layer.name().to_string())
1118 .or_insert(0) += 1;
1119 *node_type_counts
1120 .entry(node.entity_type.clone())
1121 .or_insert(0) += 1;
1122 if node.is_anomaly {
1123 anomalous_nodes += 1;
1124 }
1125 }
1126
1127 let mut edge_type_counts: HashMap<String, usize> = HashMap::new();
1128 for edge in &self.edges {
1129 *edge_type_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
1130 }
1131
1132 let mut hyperedge_type_counts: HashMap<String, usize> = HashMap::new();
1133 let mut anomalous_hyperedges = 0;
1134 for he in &self.hyperedges {
1135 *hyperedge_type_counts
1136 .entry(he.hyperedge_type.clone())
1137 .or_insert(0) += 1;
1138 if he.is_anomaly {
1139 anomalous_hyperedges += 1;
1140 }
1141 }
1142
1143 let budget_report = NodeBudgetReport {
1144 total_budget: self.budget.total_max(),
1145 total_used: self.budget.total_count(),
1146 layer1_budget: self.budget.layer1_max,
1147 layer1_used: self.budget.layer1_count,
1148 layer2_budget: self.budget.layer2_max,
1149 layer2_used: self.budget.layer2_count,
1150 layer3_budget: self.budget.layer3_max,
1151 layer3_used: self.budget.layer3_count,
1152 aggregate_nodes_created: self.aggregate_count,
1153 aggregation_triggered: self.aggregate_count > 0,
1154 };
1155
1156 let metadata = HypergraphMetadata {
1157 name: "multi_layer_hypergraph".to_string(),
1158 num_nodes: self.nodes.len(),
1159 num_edges: self.edges.len(),
1160 num_hyperedges: self.hyperedges.len(),
1161 layer_node_counts,
1162 node_type_counts,
1163 edge_type_counts,
1164 hyperedge_type_counts,
1165 anomalous_nodes,
1166 anomalous_hyperedges,
1167 source: "datasynth".to_string(),
1168 generated_at: chrono::Utc::now().to_rfc3339(),
1169 budget_report: budget_report.clone(),
1170 files: vec![
1171 "nodes.jsonl".to_string(),
1172 "edges.jsonl".to_string(),
1173 "hyperedges.jsonl".to_string(),
1174 "metadata.json".to_string(),
1175 ],
1176 };
1177
1178 Hypergraph {
1179 nodes: self.nodes,
1180 edges: self.edges,
1181 hyperedges: self.hyperedges,
1182 metadata,
1183 budget_report,
1184 }
1185 }
1186
1187 fn try_add_node(&mut self, node: HypergraphNode) -> bool {
1189 if self.node_index.contains_key(&node.id) {
1190 return false; }
1192
1193 if !self.budget.can_add(node.layer) {
1194 return false; }
1196
1197 let id = node.id.clone();
1198 let layer = node.layer;
1199 self.nodes.push(node);
1200 let idx = self.nodes.len() - 1;
1201 self.node_index.insert(id, idx);
1202 self.budget.record_add(layer);
1203 true
1204 }
1205}
1206
1207fn component_to_feature(component: &CosoComponent) -> f64 {
1209 match component {
1210 CosoComponent::ControlEnvironment => 1.0,
1211 CosoComponent::RiskAssessment => 2.0,
1212 CosoComponent::ControlActivities => 3.0,
1213 CosoComponent::InformationCommunication => 4.0,
1214 CosoComponent::MonitoringActivities => 5.0,
1215 }
1216}
1217
1218fn account_type_feature(account_type: &datasynth_core::models::AccountType) -> f64 {
1220 use datasynth_core::models::AccountType;
1221 match account_type {
1222 AccountType::Asset => 1.0,
1223 AccountType::Liability => 2.0,
1224 AccountType::Equity => 3.0,
1225 AccountType::Revenue => 4.0,
1226 AccountType::Expense => 5.0,
1227 AccountType::Statistical => 6.0,
1228 }
1229}
1230
1231fn compute_je_features(entry: &JournalEntry) -> Vec<f64> {
1233 let total_debit: f64 = entry
1234 .lines
1235 .iter()
1236 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
1237 .sum();
1238
1239 let line_count = entry.lines.len() as f64;
1240 let posting_date = entry.header.posting_date;
1241 let weekday = posting_date.weekday().num_days_from_monday() as f64 / WEEKDAY_NORMALIZER;
1242 let day = posting_date.day() as f64 / DAY_OF_MONTH_NORMALIZER;
1243 let month = posting_date.month() as f64 / MONTH_NORMALIZER;
1244 let is_month_end = if posting_date.day() >= MONTH_END_DAY_THRESHOLD {
1245 1.0
1246 } else {
1247 0.0
1248 };
1249
1250 vec![
1251 (total_debit.abs() + 1.0).ln(), line_count, weekday, day, month, is_month_end, ]
1258}
1259
1260#[cfg(test)]
1261mod tests {
1262 use super::*;
1263 use datasynth_core::models::{
1264 AccountSubType, AccountType, ChartOfAccounts, CoAComplexity, ControlFrequency, ControlType,
1265 CosoComponent, CosoMaturityLevel, GLAccount, InternalControl, RiskLevel, SoxAssertion,
1266 UserPersona,
1267 };
1268
1269 fn make_test_coa() -> ChartOfAccounts {
1270 let mut coa = ChartOfAccounts::new(
1271 "TEST_COA".to_string(),
1272 "Test Chart".to_string(),
1273 "US".to_string(),
1274 datasynth_core::models::IndustrySector::Manufacturing,
1275 CoAComplexity::Small,
1276 );
1277
1278 coa.add_account(GLAccount::new(
1279 "1000".to_string(),
1280 "Cash".to_string(),
1281 AccountType::Asset,
1282 AccountSubType::Cash,
1283 ));
1284 coa.add_account(GLAccount::new(
1285 "2000".to_string(),
1286 "AP".to_string(),
1287 AccountType::Liability,
1288 AccountSubType::AccountsPayable,
1289 ));
1290
1291 coa
1292 }
1293
1294 fn make_test_control() -> InternalControl {
1295 InternalControl {
1296 control_id: "C001".to_string(),
1297 control_name: "Three-Way Match".to_string(),
1298 control_type: ControlType::Preventive,
1299 objective: "Ensure proper matching".to_string(),
1300 frequency: ControlFrequency::Transactional,
1301 owner_role: UserPersona::Controller,
1302 risk_level: RiskLevel::High,
1303 description: "Test control".to_string(),
1304 is_key_control: true,
1305 sox_assertion: SoxAssertion::Existence,
1306 coso_component: CosoComponent::ControlActivities,
1307 coso_principles: vec![CosoPrinciple::ControlActions],
1308 control_scope: datasynth_core::models::ControlScope::TransactionLevel,
1309 maturity_level: CosoMaturityLevel::Managed,
1310 }
1311 }
1312
1313 #[test]
1314 fn test_builder_coso_framework() {
1315 let config = HypergraphConfig {
1316 max_nodes: 1000,
1317 ..Default::default()
1318 };
1319 let mut builder = HypergraphBuilder::new(config);
1320 builder.add_coso_framework();
1321
1322 let hg = builder.build();
1323 assert_eq!(hg.nodes.len(), 22);
1325 assert!(hg
1326 .nodes
1327 .iter()
1328 .all(|n| n.layer == HypergraphLayer::GovernanceControls));
1329 assert_eq!(
1331 hg.edges
1332 .iter()
1333 .filter(|e| e.edge_type == "CoversCosoPrinciple")
1334 .count(),
1335 17
1336 );
1337 }
1338
1339 #[test]
1340 fn test_builder_controls() {
1341 let config = HypergraphConfig {
1342 max_nodes: 1000,
1343 ..Default::default()
1344 };
1345 let mut builder = HypergraphBuilder::new(config);
1346 builder.add_coso_framework();
1347 builder.add_controls(&[make_test_control()]);
1348
1349 let hg = builder.build();
1350 assert_eq!(hg.nodes.len(), 24);
1352 assert!(hg.nodes.iter().any(|n| n.entity_type == "InternalControl"));
1353 assert!(hg.nodes.iter().any(|n| n.entity_type == "SoxAssertion"));
1354 }
1355
1356 #[test]
1357 fn test_builder_accounts() {
1358 let config = HypergraphConfig {
1359 max_nodes: 1000,
1360 ..Default::default()
1361 };
1362 let mut builder = HypergraphBuilder::new(config);
1363 builder.add_accounts(&make_test_coa());
1364
1365 let hg = builder.build();
1366 assert_eq!(hg.nodes.len(), 2);
1367 assert!(hg
1368 .nodes
1369 .iter()
1370 .all(|n| n.layer == HypergraphLayer::AccountingNetwork));
1371 }
1372
1373 #[test]
1374 fn test_budget_enforcement() {
1375 let config = HypergraphConfig {
1376 max_nodes: 10, include_coso: false,
1378 include_controls: false,
1379 include_sox: false,
1380 include_vendors: false,
1381 include_customers: false,
1382 include_employees: false,
1383 include_p2p: false,
1384 include_o2c: false,
1385 ..Default::default()
1386 };
1387 let mut builder = HypergraphBuilder::new(config);
1388 builder.add_accounts(&make_test_coa());
1389
1390 let hg = builder.build();
1391 assert!(hg.nodes.len() <= 1);
1393 }
1394
1395 #[test]
1396 fn test_full_build() {
1397 let config = HypergraphConfig {
1398 max_nodes: 10000,
1399 ..Default::default()
1400 };
1401 let mut builder = HypergraphBuilder::new(config);
1402 builder.add_coso_framework();
1403 builder.add_controls(&[make_test_control()]);
1404 builder.add_accounts(&make_test_coa());
1405
1406 let hg = builder.build();
1407 assert!(!hg.nodes.is_empty());
1408 assert!(!hg.edges.is_empty());
1409 assert_eq!(hg.metadata.num_nodes, hg.nodes.len());
1410 assert_eq!(hg.metadata.num_edges, hg.edges.len());
1411 }
1412}