1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
// SPDX-License-Identifier: Apache-2.0
//! Node / label string↔id interning and the node-label bitset.
use std::collections::hash_map::Entry;
use super::types::CsrIndex;
impl CsrIndex {
/// Get or create a dense ID for a node.
///
/// Returns `Err(GraphError::NodeOverflow)` when the partition already holds
/// `MAX_NODES_PER_CSR` nodes and a new name is introduced. The check uses a
/// typed `Result` rather than `debug_assert!` so the failure mode is loud
/// and deterministic — a silent `u32` wrap would reproduce the same class
/// of bug as the label-overflow issue this crate fixed previously.
pub(crate) fn ensure_node(&mut self, node: &str) -> Result<u32, crate::GraphError> {
match self.node_to_id.entry(node.to_string()) {
Entry::Occupied(e) => Ok(*e.get()),
Entry::Vacant(e) => {
let len = self.id_to_node.len();
if len >= crate::MAX_NODES_PER_CSR {
return Err(crate::GraphError::NodeOverflow { used: len });
}
let id = len as u32;
e.insert(id);
self.id_to_node.push(node.to_string());
// Extend dense offsets (new node has 0 edges in dense part).
self.out_offsets
.push(*self.out_offsets.last().unwrap_or(&0));
self.in_offsets.push(*self.in_offsets.last().unwrap_or(&0));
// Extend buffer and access tracking.
self.buffer_out.push(Vec::new());
self.buffer_in.push(Vec::new());
self.buffer_out_weights.push(Vec::new());
self.buffer_in_weights.push(Vec::new());
self.node_label_bits.push(0);
// Surrogate is populated later by the EdgePut handler; start
// with the ZERO sentinel so unset nodes are never in a bitmap.
self.node_surrogates.push(0);
self.access_counts.push(std::cell::Cell::new(0));
Ok(id)
}
}
}
/// Get or create a dense ID for a label.
///
/// Returns `Err(GraphError::LabelOverflow)` if the id space is
/// exhausted (`MAX_EDGE_LABELS` distinct labels already interned).
/// The DSL ingress layer enforces a far tighter user-facing cap,
/// so this branch is effectively unreachable in practice — but the
/// Result is propagated so the ceiling failure mode is typed and
/// loud rather than a silent cast.
pub(crate) fn ensure_label(&mut self, label: &str) -> Result<u32, crate::GraphError> {
match self.label_to_id.entry(label.to_string()) {
Entry::Occupied(e) => Ok(*e.get()),
Entry::Vacant(e) => {
let len = self.id_to_label.len();
if len >= crate::MAX_EDGE_LABELS {
return Err(crate::GraphError::LabelOverflow { used: len });
}
let id = len as u32;
e.insert(id);
self.id_to_label.push(label.to_string());
Ok(id)
}
}
}
// ── Surrogate methods ──
/// Set the surrogate for a node, identified by its user-visible name.
///
/// Call once per node after `add_edge` / `add_edge_weighted` has allocated
/// a CSR-local id for it. If the node is not yet interned this is a no-op
/// (the edge insertion that follows will intern it and a subsequent call
/// with the correct surrogate will set it). Calling with `Surrogate::ZERO`
/// is a no-op — the zero sentinel is the initial state and has no meaning.
pub fn set_node_surrogate(&mut self, node: &str, surrogate: nodedb_types::Surrogate) {
let raw = surrogate.as_u32();
if raw == 0 {
return;
}
if let Some(&id) = self.node_to_id.get(node)
&& let Some(slot) = self.node_surrogates.get_mut(id as usize)
{
*slot = raw;
self.surrogate_to_local.insert(raw, id);
}
}
/// Return the raw surrogate `u32` for a CSR-local node id.
///
/// Returns `0` (the ZERO sentinel) when the node has no surrogate set.
pub fn node_surrogate_raw(&self, local_id: u32) -> u32 {
self.node_surrogates
.get(local_id as usize)
.copied()
.unwrap_or(0)
}
/// Look up the user-visible node name bound to a `Surrogate`.
///
/// Returns `None` for `Surrogate::ZERO` and for surrogates that have
/// never been bound to a node in this partition. Used by cross-engine
/// fusion (graph RAG) to translate a vector-side surrogate into a
/// graph BFS seed name.
pub fn node_id_for_surrogate(&self, surrogate: nodedb_types::Surrogate) -> Option<&str> {
let raw = surrogate.as_u32();
if raw == 0 {
return None;
}
let local_id = *self.surrogate_to_local.get(&raw)?;
self.id_to_node.get(local_id as usize).map(String::as_str)
}
// ── Node label methods ──
/// Get or create a node label ID (max 64 labels).
fn ensure_node_label(&mut self, label: &str) -> Option<u8> {
if let Some(&id) = self.node_label_to_id.get(label) {
return Some(id);
}
let id = self.node_label_names.len();
if id >= 64 {
return None; // bitset limit
}
let id = id as u8;
self.node_label_to_id.insert(label.to_string(), id);
self.node_label_names.push(label.to_string());
Some(id)
}
/// Add a label to a node.
///
/// Returns `Ok(false)` if the 64 distinct node-label limit is hit (the
/// label is silently ignored). Returns `Err(GraphError::NodeOverflow)` if
/// the node is new and the partition's node-id space is exhausted.
pub fn add_node_label(&mut self, node: &str, label: &str) -> Result<bool, crate::GraphError> {
let node_id = self.ensure_node(node)?;
let Some(label_id) = self.ensure_node_label(label) else {
return Ok(false);
};
self.node_label_bits[node_id as usize] |= 1u64 << label_id;
Ok(true)
}
/// Remove a label from a node.
pub fn remove_node_label(&mut self, node: &str, label: &str) {
let Some(&node_id) = self.node_to_id.get(node) else {
return;
};
let Some(&label_id) = self.node_label_to_id.get(label) else {
return;
};
self.node_label_bits[node_id as usize] &= !(1u64 << label_id);
}
/// Check if a node has a specific label.
pub fn node_has_label(&self, node_id: u32, label: &str) -> bool {
let Some(&label_id) = self.node_label_to_id.get(label) else {
return false;
};
let bits = self
.node_label_bits
.get(node_id as usize)
.copied()
.unwrap_or(0);
bits & (1u64 << label_id) != 0
}
/// Get all label names for a node.
pub fn node_labels(&self, node_id: u32) -> Vec<&str> {
let bits = self
.node_label_bits
.get(node_id as usize)
.copied()
.unwrap_or(0);
if bits == 0 {
return Vec::new();
}
let mut labels = Vec::new();
for (i, name) in self.node_label_names.iter().enumerate() {
if bits & (1u64 << i) != 0 {
labels.push(name.as_str());
}
}
labels
}
}