Skip to main content

datasynth_graph/models/
hypergraph.rs

1//! Multi-layer hypergraph model types for RustGraph integration.
2//!
3//! Defines a 3-layer hypergraph structure:
4//! - Layer 1: Governance & Controls (COSO, SOX, internal controls, organizational)
5//! - Layer 2: Process Events (P2P/O2C document flows, OCPM events)
6//! - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
7
8use std::collections::HashMap;
9
10use chrono::NaiveDate;
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13
14/// Which layer of the hypergraph a node or hyperedge belongs to.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
16#[serde(rename_all = "snake_case")]
17pub enum HypergraphLayer {
18    /// Layer 1: Governance & Controls (COSO components, internal controls, SOX, organizational).
19    GovernanceControls,
20    /// Layer 2: Process Events (P2P/O2C document flows, OCPM process events).
21    ProcessEvents,
22    /// Layer 3: Accounting Network (GL accounts, journal entries as hyperedges).
23    AccountingNetwork,
24}
25
26impl HypergraphLayer {
27    /// Returns the numeric layer index (1-3).
28    pub fn index(&self) -> u8 {
29        match self {
30            HypergraphLayer::GovernanceControls => 1,
31            HypergraphLayer::ProcessEvents => 2,
32            HypergraphLayer::AccountingNetwork => 3,
33        }
34    }
35
36    /// Returns the display name for the layer.
37    pub fn name(&self) -> &'static str {
38        match self {
39            HypergraphLayer::GovernanceControls => "Governance & Controls",
40            HypergraphLayer::ProcessEvents => "Process Events",
41            HypergraphLayer::AccountingNetwork => "Accounting Network",
42        }
43    }
44}
45
46/// Strategy for aggregating nodes when budget is exceeded.
47#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum AggregationStrategy {
50    /// Truncate: simply stop adding nodes after budget is reached.
51    Truncate,
52    /// Pool documents by their counterparty (vendor/customer).
53    #[default]
54    PoolByCounterparty,
55    /// Pool documents by time period (month).
56    PoolByTimePeriod,
57    /// Keep most important nodes based on transaction volume.
58    ImportanceSample,
59}
60
61/// A participant in a hyperedge (node reference with role and optional weight).
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct HyperedgeParticipant {
64    /// ID of the participating node.
65    pub node_id: String,
66    /// Role of this participant (e.g., "debit", "credit", "approver", "vendor").
67    pub role: String,
68    /// Optional weight (e.g., line amount for journal entry lines).
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub weight: Option<f64>,
71}
72
73/// A hyperedge connecting multiple nodes simultaneously.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct Hyperedge {
76    /// Unique hyperedge identifier.
77    pub id: String,
78    /// High-level type: "ProcessFamily", "MultiRelation", "JournalEntry".
79    pub hyperedge_type: String,
80    /// Subtype with more detail: "P2P", "O2C", "JournalEntry".
81    pub subtype: String,
82    /// Nodes participating in this hyperedge with their roles.
83    pub participants: Vec<HyperedgeParticipant>,
84    /// Which layer this hyperedge belongs to.
85    pub layer: HypergraphLayer,
86    /// Additional properties as key-value pairs.
87    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
88    pub properties: HashMap<String, Value>,
89    /// Optional timestamp for temporal hyperedges.
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub timestamp: Option<NaiveDate>,
92    /// Whether this hyperedge represents an anomaly.
93    #[serde(default)]
94    pub is_anomaly: bool,
95    /// Anomaly type if anomalous.
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub anomaly_type: Option<String>,
98    /// Numeric feature vector for ML.
99    #[serde(default, skip_serializing_if = "Vec::is_empty")]
100    pub features: Vec<f64>,
101}
102
103/// A node in the hypergraph with layer assignment and RustGraph type codes.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct HypergraphNode {
106    /// Unique node identifier.
107    pub id: String,
108    /// Entity type name in snake_case (e.g., "account", "vendor", "coso_component").
109    pub entity_type: String,
110    /// RustGraph entity type code for import.
111    pub entity_type_code: u32,
112    /// Which layer this node belongs to.
113    pub layer: HypergraphLayer,
114    /// External identifier from the source system.
115    pub external_id: String,
116    /// Human-readable label.
117    pub label: String,
118    /// Additional properties as key-value pairs.
119    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
120    pub properties: HashMap<String, Value>,
121    /// Numeric feature vector for ML.
122    #[serde(default, skip_serializing_if = "Vec::is_empty")]
123    pub features: Vec<f64>,
124    /// Whether this node represents an anomaly.
125    #[serde(default)]
126    pub is_anomaly: bool,
127    /// Anomaly type if anomalous.
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub anomaly_type: Option<String>,
130    /// Whether this is an aggregate (pool) node from budget compression.
131    #[serde(default)]
132    pub is_aggregate: bool,
133    /// Number of original entities this aggregate node represents.
134    #[serde(default)]
135    pub aggregate_count: usize,
136}
137
138/// A pairwise edge connecting nodes across or within layers.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct CrossLayerEdge {
141    /// Source node ID.
142    pub source_id: String,
143    /// Source node's layer.
144    pub source_layer: HypergraphLayer,
145    /// Target node ID.
146    pub target_id: String,
147    /// Target node's layer.
148    pub target_layer: HypergraphLayer,
149    /// Edge type name (e.g., "ImplementsControl", "GovernedByStandard").
150    pub edge_type: String,
151    /// RustGraph edge type code for import.
152    pub edge_type_code: u32,
153    /// Additional properties as key-value pairs.
154    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
155    pub properties: HashMap<String, Value>,
156}
157
158/// Fraction of total budget for Layer 1 (Governance): 1/5 = 20%.
159const DEFAULT_L1_BUDGET_DIVISOR: usize = 5;
160/// Fraction of total budget for Layer 3 (Accounting): 1/10 = 10%.
161const DEFAULT_L3_BUDGET_DIVISOR: usize = 10;
162
163/// Suggested per-layer budget allocation based on actual demand analysis.
164///
165/// Returned by [`NodeBudget::suggest`] after analyzing entity counts.
166/// The suggestion redistributes unused capacity from low-demand layers
167/// to high-demand layers while respecting the total budget.
168#[derive(Debug, Clone, Default, Serialize, Deserialize)]
169pub struct NodeBudgetSuggestion {
170    /// Suggested L1 (Governance) budget.
171    pub l1_suggested: usize,
172    /// Suggested L2 (Process) budget.
173    pub l2_suggested: usize,
174    /// Suggested L3 (Accounting) budget.
175    pub l3_suggested: usize,
176    /// Total budget (unchanged).
177    pub total: usize,
178    /// Surplus redistributed from low-demand layers.
179    pub surplus_redistributed: usize,
180}
181
182/// Per-layer node budget allocation and tracking.
183#[derive(Debug, Clone, Default, Serialize, Deserialize)]
184pub struct NodeBudget {
185    /// Maximum nodes allowed for Layer 1 (Governance).
186    pub layer1_max: usize,
187    /// Maximum nodes allowed for Layer 2 (Process).
188    pub layer2_max: usize,
189    /// Maximum nodes allowed for Layer 3 (Accounting).
190    pub layer3_max: usize,
191    /// Current count for Layer 1.
192    pub layer1_count: usize,
193    /// Current count for Layer 2.
194    pub layer2_count: usize,
195    /// Current count for Layer 3.
196    pub layer3_count: usize,
197}
198
199impl NodeBudget {
200    /// Create a budget with the given total max nodes.
201    /// Default allocation: L1 gets 20%, L3 gets 10%, L2 gets remainder (70%).
202    pub fn new(max_nodes: usize) -> Self {
203        let l1 = max_nodes / DEFAULT_L1_BUDGET_DIVISOR;
204        let l3 = max_nodes / DEFAULT_L3_BUDGET_DIVISOR;
205        let l2 = max_nodes - l1 - l3; // 70%
206        Self {
207            layer1_max: l1,
208            layer2_max: l2,
209            layer3_max: l3,
210            layer1_count: 0,
211            layer2_count: 0,
212            layer3_count: 0,
213        }
214    }
215
216    /// Check if a layer can accept more nodes.
217    pub fn can_add(&self, layer: HypergraphLayer) -> bool {
218        match layer {
219            HypergraphLayer::GovernanceControls => self.layer1_count < self.layer1_max,
220            HypergraphLayer::ProcessEvents => self.layer2_count < self.layer2_max,
221            HypergraphLayer::AccountingNetwork => self.layer3_count < self.layer3_max,
222        }
223    }
224
225    /// Record a node addition.
226    pub fn record_add(&mut self, layer: HypergraphLayer) {
227        match layer {
228            HypergraphLayer::GovernanceControls => self.layer1_count += 1,
229            HypergraphLayer::ProcessEvents => self.layer2_count += 1,
230            HypergraphLayer::AccountingNetwork => self.layer3_count += 1,
231        }
232    }
233
234    /// Total nodes across all layers.
235    pub fn total_count(&self) -> usize {
236        self.layer1_count + self.layer2_count + self.layer3_count
237    }
238
239    /// Total budget across all layers.
240    pub fn total_max(&self) -> usize {
241        self.layer1_max + self.layer2_max + self.layer3_max
242    }
243
244    /// Compute a suggested budget allocation based on actual demand per layer.
245    ///
246    /// Each layer gets at least its demand (up to its current max). Surplus from
247    /// layers that need less than their max is redistributed proportionally to
248    /// layers that need more than their max.
249    pub fn suggest(
250        &self,
251        l1_demand: usize,
252        l2_demand: usize,
253        l3_demand: usize,
254    ) -> NodeBudgetSuggestion {
255        let total = self.total_max();
256
257        // Phase 1: clamp each layer to min(demand, current_max).
258        let l1_clamped = l1_demand.min(self.layer1_max);
259        let l2_clamped = l2_demand.min(self.layer2_max);
260        let l3_clamped = l3_demand.min(self.layer3_max);
261
262        // Phase 2: compute surplus from layers that need less than max.
263        let surplus = (self.layer1_max - l1_clamped)
264            + (self.layer2_max - l2_clamped)
265            + (self.layer3_max - l3_clamped);
266
267        // Phase 3: compute unsatisfied demand per layer.
268        let l1_unsat = l1_demand.saturating_sub(self.layer1_max);
269        let l2_unsat = l2_demand.saturating_sub(self.layer2_max);
270        let l3_unsat = l3_demand.saturating_sub(self.layer3_max);
271        let total_unsat = l1_unsat + l2_unsat + l3_unsat;
272
273        // Phase 4: distribute surplus proportionally to unsatisfied demand.
274        let (l1_bonus, l2_bonus, l3_bonus) = if total_unsat > 0 && surplus > 0 {
275            let l1_b = (surplus as f64 * l1_unsat as f64 / total_unsat as f64).floor() as usize;
276            let l2_b = (surplus as f64 * l2_unsat as f64 / total_unsat as f64).floor() as usize;
277            // Give remainder to L3 (or whichever has unsat) to avoid rounding loss
278            let l3_b = surplus.saturating_sub(l1_b).saturating_sub(l2_b);
279            (l1_b, l2_b, l3_b)
280        } else if surplus > 0 {
281            // No unsatisfied demand — give surplus to L2 (largest consumer by convention)
282            (0, surplus, 0)
283        } else {
284            (0, 0, 0)
285        };
286
287        let l1_suggested = l1_clamped + l1_bonus;
288        let l2_suggested = l2_clamped + l2_bonus;
289        let l3_suggested = l3_clamped + l3_bonus;
290        let redistributed = l1_bonus + l2_bonus + l3_bonus;
291
292        NodeBudgetSuggestion {
293            l1_suggested,
294            l2_suggested,
295            l3_suggested,
296            total,
297            surplus_redistributed: redistributed,
298        }
299    }
300
301    /// Rebalance the budget based on actual demand per layer.
302    ///
303    /// Each layer gets at least its demand (capped at the total budget).
304    /// Surplus from low-demand layers is redistributed proportionally to
305    /// layers with unsatisfied demand. If no layer has unsatisfied demand,
306    /// surplus goes to L2 (the largest consumer by convention).
307    pub fn rebalance(&mut self, l1_demand: usize, l2_demand: usize, l3_demand: usize) {
308        let suggestion = self.suggest(l1_demand, l2_demand, l3_demand);
309        self.layer1_max = suggestion.l1_suggested;
310        self.layer2_max = suggestion.l2_suggested;
311        self.layer3_max = suggestion.l3_suggested;
312    }
313}
314
315/// Report on node budget utilization after building.
316#[derive(Debug, Clone, Default, Serialize, Deserialize)]
317pub struct NodeBudgetReport {
318    /// Total budget configured.
319    pub total_budget: usize,
320    /// Total nodes actually created.
321    pub total_used: usize,
322    /// Layer 1 budget and usage.
323    pub layer1_budget: usize,
324    pub layer1_used: usize,
325    /// Layer 2 budget and usage.
326    pub layer2_budget: usize,
327    pub layer2_used: usize,
328    /// Layer 3 budget and usage.
329    pub layer3_budget: usize,
330    pub layer3_used: usize,
331    /// Number of aggregate (pool) nodes created.
332    pub aggregate_nodes_created: usize,
333    /// Whether aggregation was triggered.
334    pub aggregation_triggered: bool,
335}
336
337/// Metadata about the exported hypergraph.
338#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct HypergraphMetadata {
340    /// Name of this hypergraph export.
341    pub name: String,
342    /// Total number of nodes.
343    pub num_nodes: usize,
344    /// Total number of pairwise edges.
345    pub num_edges: usize,
346    /// Total number of hyperedges.
347    pub num_hyperedges: usize,
348    /// Node counts per layer.
349    pub layer_node_counts: HashMap<String, usize>,
350    /// Node counts per entity type.
351    pub node_type_counts: HashMap<String, usize>,
352    /// Edge counts per edge type.
353    pub edge_type_counts: HashMap<String, usize>,
354    /// Hyperedge counts per type.
355    pub hyperedge_type_counts: HashMap<String, usize>,
356    /// Number of anomalous nodes.
357    pub anomalous_nodes: usize,
358    /// Number of anomalous hyperedges.
359    pub anomalous_hyperedges: usize,
360    /// Source system identifier.
361    pub source: String,
362    /// Generation timestamp (ISO 8601).
363    pub generated_at: String,
364    /// Budget utilization report.
365    pub budget_report: NodeBudgetReport,
366    /// Files included in export.
367    pub files: Vec<String>,
368}
369
370/// The complete built hypergraph with all components.
371#[derive(Debug, Clone, Serialize, Deserialize)]
372pub struct Hypergraph {
373    /// All nodes across all layers.
374    pub nodes: Vec<HypergraphNode>,
375    /// All pairwise edges (cross-layer and intra-layer).
376    pub edges: Vec<CrossLayerEdge>,
377    /// All hyperedges (journal entries, OCPM events).
378    pub hyperedges: Vec<Hyperedge>,
379    /// Export metadata.
380    pub metadata: HypergraphMetadata,
381    /// Budget utilization report.
382    pub budget_report: NodeBudgetReport,
383}
384
385#[cfg(test)]
386#[allow(clippy::unwrap_used)]
387mod tests {
388    use super::*;
389
390    #[test]
391    fn test_layer_index() {
392        assert_eq!(HypergraphLayer::GovernanceControls.index(), 1);
393        assert_eq!(HypergraphLayer::ProcessEvents.index(), 2);
394        assert_eq!(HypergraphLayer::AccountingNetwork.index(), 3);
395    }
396
397    #[test]
398    fn test_node_budget_new() {
399        let budget = NodeBudget::new(50_000);
400        assert_eq!(budget.layer1_max, 10_000); // 20%
401        assert_eq!(budget.layer2_max, 35_000); // 70%
402        assert_eq!(budget.layer3_max, 5_000); // 10%
403        assert_eq!(budget.total_max(), 50_000);
404    }
405
406    #[test]
407    fn test_node_budget_can_add() {
408        let mut budget = NodeBudget::new(100);
409        assert!(budget.can_add(HypergraphLayer::GovernanceControls));
410
411        // Fill L1 to max (20)
412        for _ in 0..20 {
413            budget.record_add(HypergraphLayer::GovernanceControls);
414        }
415        assert!(!budget.can_add(HypergraphLayer::GovernanceControls));
416        assert!(budget.can_add(HypergraphLayer::ProcessEvents));
417    }
418
419    #[test]
420    fn test_node_budget_total() {
421        let mut budget = NodeBudget::new(1000);
422        budget.record_add(HypergraphLayer::GovernanceControls);
423        budget.record_add(HypergraphLayer::ProcessEvents);
424        budget.record_add(HypergraphLayer::AccountingNetwork);
425        assert_eq!(budget.total_count(), 3);
426    }
427
428    #[test]
429    fn test_hypergraph_node_serialization() {
430        let node = HypergraphNode {
431            id: "node_1".to_string(),
432            entity_type: "account".to_string(),
433            entity_type_code: 100,
434            layer: HypergraphLayer::AccountingNetwork,
435            external_id: "1000".to_string(),
436            label: "Cash".to_string(),
437            properties: HashMap::new(),
438            features: vec![1.0, 2.0],
439            is_anomaly: false,
440            anomaly_type: None,
441            is_aggregate: false,
442            aggregate_count: 0,
443        };
444
445        let json = serde_json::to_string(&node).unwrap();
446        let deserialized: HypergraphNode = serde_json::from_str(&json).unwrap();
447        assert_eq!(deserialized.id, "node_1");
448        assert_eq!(deserialized.entity_type_code, 100);
449        assert_eq!(deserialized.layer, HypergraphLayer::AccountingNetwork);
450    }
451
452    #[test]
453    fn test_hyperedge_serialization() {
454        let he = Hyperedge {
455            id: "he_1".to_string(),
456            hyperedge_type: "JournalEntry".to_string(),
457            subtype: "R2R".to_string(),
458            participants: vec![
459                HyperedgeParticipant {
460                    node_id: "acct_1000".to_string(),
461                    role: "debit".to_string(),
462                    weight: Some(500.0),
463                },
464                HyperedgeParticipant {
465                    node_id: "acct_2000".to_string(),
466                    role: "credit".to_string(),
467                    weight: Some(500.0),
468                },
469            ],
470            layer: HypergraphLayer::AccountingNetwork,
471            properties: HashMap::new(),
472            timestamp: Some(NaiveDate::from_ymd_opt(2024, 6, 15).unwrap()),
473            is_anomaly: true,
474            anomaly_type: Some("split_transaction".to_string()),
475            features: vec![6.2, 1.0],
476        };
477
478        let json = serde_json::to_string(&he).unwrap();
479        let deserialized: Hyperedge = serde_json::from_str(&json).unwrap();
480        assert_eq!(deserialized.participants.len(), 2);
481        assert!(deserialized.is_anomaly);
482    }
483
484    #[test]
485    fn test_cross_layer_edge_serialization() {
486        let edge = CrossLayerEdge {
487            source_id: "ctrl_C001".to_string(),
488            source_layer: HypergraphLayer::GovernanceControls,
489            target_id: "acct_1000".to_string(),
490            target_layer: HypergraphLayer::AccountingNetwork,
491            edge_type: "ImplementsControl".to_string(),
492            edge_type_code: 40,
493            properties: HashMap::new(),
494        };
495
496        let json = serde_json::to_string(&edge).unwrap();
497        let deserialized: CrossLayerEdge = serde_json::from_str(&json).unwrap();
498        assert_eq!(deserialized.edge_type, "ImplementsControl");
499        assert_eq!(
500            deserialized.source_layer,
501            HypergraphLayer::GovernanceControls
502        );
503        assert_eq!(
504            deserialized.target_layer,
505            HypergraphLayer::AccountingNetwork
506        );
507    }
508
509    #[test]
510    fn test_aggregation_strategy_default() {
511        assert_eq!(
512            AggregationStrategy::default(),
513            AggregationStrategy::PoolByCounterparty
514        );
515    }
516}