nodedb_graph/csr/index/interning.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Node / label string↔id interning and the node-label bitset.
4
5use std::collections::hash_map::Entry;
6
7use super::types::CsrIndex;
8
9impl CsrIndex {
10 /// Get or create a dense ID for a node.
11 ///
12 /// Returns `Err(GraphError::NodeOverflow)` when the partition already holds
13 /// `MAX_NODES_PER_CSR` nodes and a new name is introduced. The check uses a
14 /// typed `Result` rather than `debug_assert!` so the failure mode is loud
15 /// and deterministic — a silent `u32` wrap would reproduce the same class
16 /// of bug as the label-overflow issue this crate fixed previously.
17 pub(crate) fn ensure_node(&mut self, node: &str) -> Result<u32, crate::GraphError> {
18 match self.node_to_id.entry(node.to_string()) {
19 Entry::Occupied(e) => Ok(*e.get()),
20 Entry::Vacant(e) => {
21 let len = self.id_to_node.len();
22 if len >= crate::MAX_NODES_PER_CSR {
23 return Err(crate::GraphError::NodeOverflow { used: len });
24 }
25 let id = len as u32;
26 e.insert(id);
27 self.id_to_node.push(node.to_string());
28 // Extend dense offsets (new node has 0 edges in dense part).
29 self.out_offsets
30 .push(*self.out_offsets.last().unwrap_or(&0));
31 self.in_offsets.push(*self.in_offsets.last().unwrap_or(&0));
32 // Extend buffer and access tracking.
33 self.buffer_out.push(Vec::new());
34 self.buffer_in.push(Vec::new());
35 self.buffer_out_weights.push(Vec::new());
36 self.buffer_in_weights.push(Vec::new());
37 self.node_label_bits.push(0);
38 // Surrogate is populated later by the EdgePut handler; start
39 // with the ZERO sentinel so unset nodes are never in a bitmap.
40 self.node_surrogates.push(0);
41 self.access_counts.push(std::cell::Cell::new(0));
42 Ok(id)
43 }
44 }
45 }
46
47 /// Get or create a dense ID for a label.
48 ///
49 /// Returns `Err(GraphError::LabelOverflow)` if the id space is
50 /// exhausted (`MAX_EDGE_LABELS` distinct labels already interned).
51 /// The DSL ingress layer enforces a far tighter user-facing cap,
52 /// so this branch is effectively unreachable in practice — but the
53 /// Result is propagated so the ceiling failure mode is typed and
54 /// loud rather than a silent cast.
55 pub(crate) fn ensure_label(&mut self, label: &str) -> Result<u32, crate::GraphError> {
56 match self.label_to_id.entry(label.to_string()) {
57 Entry::Occupied(e) => Ok(*e.get()),
58 Entry::Vacant(e) => {
59 let len = self.id_to_label.len();
60 if len >= crate::MAX_EDGE_LABELS {
61 return Err(crate::GraphError::LabelOverflow { used: len });
62 }
63 let id = len as u32;
64 e.insert(id);
65 self.id_to_label.push(label.to_string());
66 Ok(id)
67 }
68 }
69 }
70
71 // ── Surrogate methods ──
72
73 /// Set the surrogate for a node, identified by its user-visible name.
74 ///
75 /// Call once per node after `add_edge` / `add_edge_weighted` has allocated
76 /// a CSR-local id for it. If the node is not yet interned this is a no-op
77 /// (the edge insertion that follows will intern it and a subsequent call
78 /// with the correct surrogate will set it). Calling with `Surrogate::ZERO`
79 /// is a no-op — the zero sentinel is the initial state and has no meaning.
80 pub fn set_node_surrogate(&mut self, node: &str, surrogate: nodedb_types::Surrogate) {
81 let raw = surrogate.as_u32();
82 if raw == 0 {
83 return;
84 }
85 if let Some(&id) = self.node_to_id.get(node)
86 && let Some(slot) = self.node_surrogates.get_mut(id as usize)
87 {
88 *slot = raw;
89 self.surrogate_to_local.insert(raw, id);
90 }
91 }
92
93 /// Return the raw surrogate `u32` for a CSR-local node id.
94 ///
95 /// Returns `0` (the ZERO sentinel) when the node has no surrogate set.
96 pub fn node_surrogate_raw(&self, local_id: u32) -> u32 {
97 self.node_surrogates
98 .get(local_id as usize)
99 .copied()
100 .unwrap_or(0)
101 }
102
103 /// Look up the user-visible node name bound to a `Surrogate`.
104 ///
105 /// Returns `None` for `Surrogate::ZERO` and for surrogates that have
106 /// never been bound to a node in this partition. Used by cross-engine
107 /// fusion (graph RAG) to translate a vector-side surrogate into a
108 /// graph BFS seed name.
109 pub fn node_id_for_surrogate(&self, surrogate: nodedb_types::Surrogate) -> Option<&str> {
110 let raw = surrogate.as_u32();
111 if raw == 0 {
112 return None;
113 }
114 let local_id = *self.surrogate_to_local.get(&raw)?;
115 self.id_to_node.get(local_id as usize).map(String::as_str)
116 }
117
118 // ── Node label methods ──
119
120 /// Get or create a node label ID (max 64 labels).
121 fn ensure_node_label(&mut self, label: &str) -> Option<u8> {
122 if let Some(&id) = self.node_label_to_id.get(label) {
123 return Some(id);
124 }
125 let id = self.node_label_names.len();
126 if id >= 64 {
127 return None; // bitset limit
128 }
129 let id = id as u8;
130 self.node_label_to_id.insert(label.to_string(), id);
131 self.node_label_names.push(label.to_string());
132 Some(id)
133 }
134
135 /// Add a label to a node.
136 ///
137 /// Returns `Ok(false)` if the 64 distinct node-label limit is hit (the
138 /// label is silently ignored). Returns `Err(GraphError::NodeOverflow)` if
139 /// the node is new and the partition's node-id space is exhausted.
140 pub fn add_node_label(&mut self, node: &str, label: &str) -> Result<bool, crate::GraphError> {
141 let node_id = self.ensure_node(node)?;
142 let Some(label_id) = self.ensure_node_label(label) else {
143 return Ok(false);
144 };
145 self.node_label_bits[node_id as usize] |= 1u64 << label_id;
146 Ok(true)
147 }
148
149 /// Remove a label from a node.
150 pub fn remove_node_label(&mut self, node: &str, label: &str) {
151 let Some(&node_id) = self.node_to_id.get(node) else {
152 return;
153 };
154 let Some(&label_id) = self.node_label_to_id.get(label) else {
155 return;
156 };
157 self.node_label_bits[node_id as usize] &= !(1u64 << label_id);
158 }
159
160 /// Check if a node has a specific label.
161 pub fn node_has_label(&self, node_id: u32, label: &str) -> bool {
162 let Some(&label_id) = self.node_label_to_id.get(label) else {
163 return false;
164 };
165 let bits = self
166 .node_label_bits
167 .get(node_id as usize)
168 .copied()
169 .unwrap_or(0);
170 bits & (1u64 << label_id) != 0
171 }
172
173 /// Get all label names for a node.
174 pub fn node_labels(&self, node_id: u32) -> Vec<&str> {
175 let bits = self
176 .node_label_bits
177 .get(node_id as usize)
178 .copied()
179 .unwrap_or(0);
180 if bits == 0 {
181 return Vec::new();
182 }
183 let mut labels = Vec::new();
184 for (i, name) in self.node_label_names.iter().enumerate() {
185 if bits & (1u64 << i) != 0 {
186 labels.push(name.as_str());
187 }
188 }
189 labels
190 }
191}