Skip to main content

datasynth_graph/models/
hypergraph.rs

1//! Multi-layer hypergraph model types for RustGraph integration.
2//!
3//! Defines a 3-layer hypergraph structure:
4//! - Layer 1: Governance & Controls (COSO, SOX, internal controls, organizational)
5//! - Layer 2: Process Events (P2P/O2C document flows, OCPM events)
6//! - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
7
8use std::collections::HashMap;
9
10use chrono::NaiveDate;
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13
14/// Which layer of the hypergraph a node or hyperedge belongs to.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
16#[serde(rename_all = "snake_case")]
17pub enum HypergraphLayer {
18    /// Layer 1: Governance & Controls (COSO components, internal controls, SOX, organizational).
19    GovernanceControls,
20    /// Layer 2: Process Events (P2P/O2C document flows, OCPM process events).
21    ProcessEvents,
22    /// Layer 3: Accounting Network (GL accounts, journal entries as hyperedges).
23    AccountingNetwork,
24}
25
26impl HypergraphLayer {
27    /// Returns the numeric layer index (1-3).
28    pub fn index(&self) -> u8 {
29        match self {
30            HypergraphLayer::GovernanceControls => 1,
31            HypergraphLayer::ProcessEvents => 2,
32            HypergraphLayer::AccountingNetwork => 3,
33        }
34    }
35
36    /// Returns the display name for the layer.
37    pub fn name(&self) -> &'static str {
38        match self {
39            HypergraphLayer::GovernanceControls => "Governance & Controls",
40            HypergraphLayer::ProcessEvents => "Process Events",
41            HypergraphLayer::AccountingNetwork => "Accounting Network",
42        }
43    }
44}
45
46/// Strategy for aggregating nodes when budget is exceeded.
47#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum AggregationStrategy {
50    /// Truncate: simply stop adding nodes after budget is reached.
51    Truncate,
52    /// Pool documents by their counterparty (vendor/customer).
53    #[default]
54    PoolByCounterparty,
55    /// Pool documents by time period (month).
56    PoolByTimePeriod,
57    /// Keep most important nodes based on transaction volume.
58    ImportanceSample,
59}
60
61/// A participant in a hyperedge (node reference with role and optional weight).
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct HyperedgeParticipant {
64    /// ID of the participating node.
65    pub node_id: String,
66    /// Role of this participant (e.g., "debit", "credit", "approver", "vendor").
67    pub role: String,
68    /// Optional weight (e.g., line amount for journal entry lines).
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub weight: Option<f64>,
71}
72
73/// A hyperedge connecting multiple nodes simultaneously.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct Hyperedge {
76    /// Unique hyperedge identifier.
77    pub id: String,
78    /// High-level type: "ProcessFamily", "MultiRelation", "JournalEntry".
79    pub hyperedge_type: String,
80    /// Subtype with more detail: "P2P", "O2C", "JournalEntry".
81    pub subtype: String,
82    /// Nodes participating in this hyperedge with their roles.
83    pub participants: Vec<HyperedgeParticipant>,
84    /// Which layer this hyperedge belongs to.
85    pub layer: HypergraphLayer,
86    /// Additional properties as key-value pairs.
87    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
88    pub properties: HashMap<String, Value>,
89    /// Optional timestamp for temporal hyperedges.
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub timestamp: Option<NaiveDate>,
92    /// Whether this hyperedge represents an anomaly.
93    #[serde(default)]
94    pub is_anomaly: bool,
95    /// Anomaly type if anomalous.
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub anomaly_type: Option<String>,
98    /// Numeric feature vector for ML.
99    #[serde(default, skip_serializing_if = "Vec::is_empty")]
100    pub features: Vec<f64>,
101}
102
103/// A node in the hypergraph with layer assignment and RustGraph type codes.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct HypergraphNode {
106    /// Unique node identifier.
107    pub id: String,
108    /// Entity type name (e.g., "Account", "Vendor", "CosoComponent").
109    pub entity_type: String,
110    /// RustGraph entity type code for import.
111    pub entity_type_code: u32,
112    /// Which layer this node belongs to.
113    pub layer: HypergraphLayer,
114    /// External identifier from the source system.
115    pub external_id: String,
116    /// Human-readable label.
117    pub label: String,
118    /// Additional properties as key-value pairs.
119    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
120    pub properties: HashMap<String, Value>,
121    /// Numeric feature vector for ML.
122    #[serde(default, skip_serializing_if = "Vec::is_empty")]
123    pub features: Vec<f64>,
124    /// Whether this node represents an anomaly.
125    #[serde(default)]
126    pub is_anomaly: bool,
127    /// Anomaly type if anomalous.
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub anomaly_type: Option<String>,
130    /// Whether this is an aggregate (pool) node from budget compression.
131    #[serde(default)]
132    pub is_aggregate: bool,
133    /// Number of original entities this aggregate node represents.
134    #[serde(default)]
135    pub aggregate_count: usize,
136}
137
138/// A pairwise edge connecting nodes across or within layers.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct CrossLayerEdge {
141    /// Source node ID.
142    pub source_id: String,
143    /// Source node's layer.
144    pub source_layer: HypergraphLayer,
145    /// Target node ID.
146    pub target_id: String,
147    /// Target node's layer.
148    pub target_layer: HypergraphLayer,
149    /// Edge type name (e.g., "ImplementsControl", "GovernedByStandard").
150    pub edge_type: String,
151    /// RustGraph edge type code for import.
152    pub edge_type_code: u32,
153    /// Additional properties as key-value pairs.
154    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
155    pub properties: HashMap<String, Value>,
156}
157
158/// Fraction of total budget for Layer 1 (Governance): 1/5 = 20%.
159const DEFAULT_L1_BUDGET_DIVISOR: usize = 5;
160/// Fraction of total budget for Layer 3 (Accounting): 1/10 = 10%.
161const DEFAULT_L3_BUDGET_DIVISOR: usize = 10;
162
163/// Per-layer node budget allocation and tracking.
164#[derive(Debug, Clone, Default, Serialize, Deserialize)]
165pub struct NodeBudget {
166    /// Maximum nodes allowed for Layer 1 (Governance).
167    pub layer1_max: usize,
168    /// Maximum nodes allowed for Layer 2 (Process).
169    pub layer2_max: usize,
170    /// Maximum nodes allowed for Layer 3 (Accounting).
171    pub layer3_max: usize,
172    /// Current count for Layer 1.
173    pub layer1_count: usize,
174    /// Current count for Layer 2.
175    pub layer2_count: usize,
176    /// Current count for Layer 3.
177    pub layer3_count: usize,
178}
179
180impl NodeBudget {
181    /// Create a budget with the given total max nodes.
182    /// Default allocation: L1 gets 20%, L3 gets 10%, L2 gets remainder (70%).
183    pub fn new(max_nodes: usize) -> Self {
184        let l1 = max_nodes / DEFAULT_L1_BUDGET_DIVISOR;
185        let l3 = max_nodes / DEFAULT_L3_BUDGET_DIVISOR;
186        let l2 = max_nodes - l1 - l3; // 70%
187        Self {
188            layer1_max: l1,
189            layer2_max: l2,
190            layer3_max: l3,
191            layer1_count: 0,
192            layer2_count: 0,
193            layer3_count: 0,
194        }
195    }
196
197    /// Check if a layer can accept more nodes.
198    pub fn can_add(&self, layer: HypergraphLayer) -> bool {
199        match layer {
200            HypergraphLayer::GovernanceControls => self.layer1_count < self.layer1_max,
201            HypergraphLayer::ProcessEvents => self.layer2_count < self.layer2_max,
202            HypergraphLayer::AccountingNetwork => self.layer3_count < self.layer3_max,
203        }
204    }
205
206    /// Record a node addition.
207    pub fn record_add(&mut self, layer: HypergraphLayer) {
208        match layer {
209            HypergraphLayer::GovernanceControls => self.layer1_count += 1,
210            HypergraphLayer::ProcessEvents => self.layer2_count += 1,
211            HypergraphLayer::AccountingNetwork => self.layer3_count += 1,
212        }
213    }
214
215    /// Total nodes across all layers.
216    pub fn total_count(&self) -> usize {
217        self.layer1_count + self.layer2_count + self.layer3_count
218    }
219
220    /// Total budget across all layers.
221    pub fn total_max(&self) -> usize {
222        self.layer1_max + self.layer2_max + self.layer3_max
223    }
224
225    /// Rebalance the budget based on actual demand per layer.
226    /// Unused budget from layers with fewer entities than max is redistributed.
227    pub fn rebalance(&mut self, l1_demand: usize, l2_demand: usize, l3_demand: usize) {
228        let total = self.total_max();
229
230        // Clamp each layer to its demand
231        let l1_actual = l1_demand.min(self.layer1_max);
232        let l3_actual = l3_demand.min(self.layer3_max);
233
234        // Give surplus to L2
235        let surplus = (self.layer1_max - l1_actual) + (self.layer3_max - l3_actual);
236        let l2_actual = (self.layer2_max + surplus)
237            .min(l2_demand)
238            .min(total - l1_actual - l3_actual.min(total.saturating_sub(l1_actual)));
239
240        self.layer1_max = l1_actual;
241        self.layer3_max = total.saturating_sub(l1_actual).saturating_sub(l2_actual);
242        self.layer2_max = l2_actual;
243    }
244}
245
246/// Report on node budget utilization after building.
247#[derive(Debug, Clone, Default, Serialize, Deserialize)]
248pub struct NodeBudgetReport {
249    /// Total budget configured.
250    pub total_budget: usize,
251    /// Total nodes actually created.
252    pub total_used: usize,
253    /// Layer 1 budget and usage.
254    pub layer1_budget: usize,
255    pub layer1_used: usize,
256    /// Layer 2 budget and usage.
257    pub layer2_budget: usize,
258    pub layer2_used: usize,
259    /// Layer 3 budget and usage.
260    pub layer3_budget: usize,
261    pub layer3_used: usize,
262    /// Number of aggregate (pool) nodes created.
263    pub aggregate_nodes_created: usize,
264    /// Whether aggregation was triggered.
265    pub aggregation_triggered: bool,
266}
267
268/// Metadata about the exported hypergraph.
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct HypergraphMetadata {
271    /// Name of this hypergraph export.
272    pub name: String,
273    /// Total number of nodes.
274    pub num_nodes: usize,
275    /// Total number of pairwise edges.
276    pub num_edges: usize,
277    /// Total number of hyperedges.
278    pub num_hyperedges: usize,
279    /// Node counts per layer.
280    pub layer_node_counts: HashMap<String, usize>,
281    /// Node counts per entity type.
282    pub node_type_counts: HashMap<String, usize>,
283    /// Edge counts per edge type.
284    pub edge_type_counts: HashMap<String, usize>,
285    /// Hyperedge counts per type.
286    pub hyperedge_type_counts: HashMap<String, usize>,
287    /// Number of anomalous nodes.
288    pub anomalous_nodes: usize,
289    /// Number of anomalous hyperedges.
290    pub anomalous_hyperedges: usize,
291    /// Source system identifier.
292    pub source: String,
293    /// Generation timestamp (ISO 8601).
294    pub generated_at: String,
295    /// Budget utilization report.
296    pub budget_report: NodeBudgetReport,
297    /// Files included in export.
298    pub files: Vec<String>,
299}
300
301/// The complete built hypergraph with all components.
302#[derive(Debug, Clone, Serialize, Deserialize)]
303pub struct Hypergraph {
304    /// All nodes across all layers.
305    pub nodes: Vec<HypergraphNode>,
306    /// All pairwise edges (cross-layer and intra-layer).
307    pub edges: Vec<CrossLayerEdge>,
308    /// All hyperedges (journal entries, OCPM events).
309    pub hyperedges: Vec<Hyperedge>,
310    /// Export metadata.
311    pub metadata: HypergraphMetadata,
312    /// Budget utilization report.
313    pub budget_report: NodeBudgetReport,
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    #[test]
321    fn test_layer_index() {
322        assert_eq!(HypergraphLayer::GovernanceControls.index(), 1);
323        assert_eq!(HypergraphLayer::ProcessEvents.index(), 2);
324        assert_eq!(HypergraphLayer::AccountingNetwork.index(), 3);
325    }
326
327    #[test]
328    fn test_node_budget_new() {
329        let budget = NodeBudget::new(50_000);
330        assert_eq!(budget.layer1_max, 10_000); // 20%
331        assert_eq!(budget.layer2_max, 35_000); // 70%
332        assert_eq!(budget.layer3_max, 5_000); // 10%
333        assert_eq!(budget.total_max(), 50_000);
334    }
335
336    #[test]
337    fn test_node_budget_can_add() {
338        let mut budget = NodeBudget::new(100);
339        assert!(budget.can_add(HypergraphLayer::GovernanceControls));
340
341        // Fill L1 to max (20)
342        for _ in 0..20 {
343            budget.record_add(HypergraphLayer::GovernanceControls);
344        }
345        assert!(!budget.can_add(HypergraphLayer::GovernanceControls));
346        assert!(budget.can_add(HypergraphLayer::ProcessEvents));
347    }
348
349    #[test]
350    fn test_node_budget_total() {
351        let mut budget = NodeBudget::new(1000);
352        budget.record_add(HypergraphLayer::GovernanceControls);
353        budget.record_add(HypergraphLayer::ProcessEvents);
354        budget.record_add(HypergraphLayer::AccountingNetwork);
355        assert_eq!(budget.total_count(), 3);
356    }
357
358    #[test]
359    fn test_hypergraph_node_serialization() {
360        let node = HypergraphNode {
361            id: "node_1".to_string(),
362            entity_type: "Account".to_string(),
363            entity_type_code: 100,
364            layer: HypergraphLayer::AccountingNetwork,
365            external_id: "1000".to_string(),
366            label: "Cash".to_string(),
367            properties: HashMap::new(),
368            features: vec![1.0, 2.0],
369            is_anomaly: false,
370            anomaly_type: None,
371            is_aggregate: false,
372            aggregate_count: 0,
373        };
374
375        let json = serde_json::to_string(&node).unwrap();
376        let deserialized: HypergraphNode = serde_json::from_str(&json).unwrap();
377        assert_eq!(deserialized.id, "node_1");
378        assert_eq!(deserialized.entity_type_code, 100);
379        assert_eq!(deserialized.layer, HypergraphLayer::AccountingNetwork);
380    }
381
382    #[test]
383    fn test_hyperedge_serialization() {
384        let he = Hyperedge {
385            id: "he_1".to_string(),
386            hyperedge_type: "JournalEntry".to_string(),
387            subtype: "R2R".to_string(),
388            participants: vec![
389                HyperedgeParticipant {
390                    node_id: "acct_1000".to_string(),
391                    role: "debit".to_string(),
392                    weight: Some(500.0),
393                },
394                HyperedgeParticipant {
395                    node_id: "acct_2000".to_string(),
396                    role: "credit".to_string(),
397                    weight: Some(500.0),
398                },
399            ],
400            layer: HypergraphLayer::AccountingNetwork,
401            properties: HashMap::new(),
402            timestamp: Some(NaiveDate::from_ymd_opt(2024, 6, 15).unwrap()),
403            is_anomaly: true,
404            anomaly_type: Some("split_transaction".to_string()),
405            features: vec![6.2, 1.0],
406        };
407
408        let json = serde_json::to_string(&he).unwrap();
409        let deserialized: Hyperedge = serde_json::from_str(&json).unwrap();
410        assert_eq!(deserialized.participants.len(), 2);
411        assert!(deserialized.is_anomaly);
412    }
413
414    #[test]
415    fn test_cross_layer_edge_serialization() {
416        let edge = CrossLayerEdge {
417            source_id: "ctrl_C001".to_string(),
418            source_layer: HypergraphLayer::GovernanceControls,
419            target_id: "acct_1000".to_string(),
420            target_layer: HypergraphLayer::AccountingNetwork,
421            edge_type: "ImplementsControl".to_string(),
422            edge_type_code: 40,
423            properties: HashMap::new(),
424        };
425
426        let json = serde_json::to_string(&edge).unwrap();
427        let deserialized: CrossLayerEdge = serde_json::from_str(&json).unwrap();
428        assert_eq!(deserialized.edge_type, "ImplementsControl");
429        assert_eq!(
430            deserialized.source_layer,
431            HypergraphLayer::GovernanceControls
432        );
433        assert_eq!(
434            deserialized.target_layer,
435            HypergraphLayer::AccountingNetwork
436        );
437    }
438
439    #[test]
440    fn test_aggregation_strategy_default() {
441        assert_eq!(
442            AggregationStrategy::default(),
443            AggregationStrategy::PoolByCounterparty
444        );
445    }
446}