Skip to main content

nodedb_graph/csr/index/
interning.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Node / label string↔id interning and the node-label bitset.
4
5use std::collections::hash_map::Entry;
6
7use super::types::CsrIndex;
8
9impl CsrIndex {
10    /// Get or create a dense ID for a node.
11    ///
12    /// Returns `Err(GraphError::NodeOverflow)` when the partition already holds
13    /// `MAX_NODES_PER_CSR` nodes and a new name is introduced. The check uses a
14    /// typed `Result` rather than `debug_assert!` so the failure mode is loud
15    /// and deterministic — a silent `u32` wrap would reproduce the same class
16    /// of bug as the label-overflow issue this crate fixed previously.
17    pub(crate) fn ensure_node(&mut self, node: &str) -> Result<u32, crate::GraphError> {
18        match self.node_to_id.entry(node.to_string()) {
19            Entry::Occupied(e) => Ok(*e.get()),
20            Entry::Vacant(e) => {
21                let len = self.id_to_node.len();
22                if len >= crate::MAX_NODES_PER_CSR {
23                    return Err(crate::GraphError::NodeOverflow { used: len });
24                }
25                let id = len as u32;
26                e.insert(id);
27                self.id_to_node.push(node.to_string());
28                // Extend dense offsets (new node has 0 edges in dense part).
29                self.out_offsets
30                    .push(*self.out_offsets.last().unwrap_or(&0));
31                self.in_offsets.push(*self.in_offsets.last().unwrap_or(&0));
32                // Extend buffer and access tracking.
33                self.buffer_out.push(Vec::new());
34                self.buffer_in.push(Vec::new());
35                self.buffer_out_weights.push(Vec::new());
36                self.buffer_in_weights.push(Vec::new());
37                self.node_label_bits.push(0);
38                // Surrogate is populated later by the EdgePut handler; start
39                // with the ZERO sentinel so unset nodes are never in a bitmap.
40                self.node_surrogates.push(0);
41                self.access_counts.push(std::cell::Cell::new(0));
42                Ok(id)
43            }
44        }
45    }
46
47    /// Get or create a dense ID for a label.
48    ///
49    /// Returns `Err(GraphError::LabelOverflow)` if the id space is
50    /// exhausted (`MAX_EDGE_LABELS` distinct labels already interned).
51    /// The DSL ingress layer enforces a far tighter user-facing cap,
52    /// so this branch is effectively unreachable in practice — but the
53    /// Result is propagated so the ceiling failure mode is typed and
54    /// loud rather than a silent cast.
55    pub(crate) fn ensure_label(&mut self, label: &str) -> Result<u32, crate::GraphError> {
56        match self.label_to_id.entry(label.to_string()) {
57            Entry::Occupied(e) => Ok(*e.get()),
58            Entry::Vacant(e) => {
59                let len = self.id_to_label.len();
60                if len >= crate::MAX_EDGE_LABELS {
61                    return Err(crate::GraphError::LabelOverflow { used: len });
62                }
63                let id = len as u32;
64                e.insert(id);
65                self.id_to_label.push(label.to_string());
66                Ok(id)
67            }
68        }
69    }
70
71    // ── Surrogate methods ──
72
73    /// Set the surrogate for a node, identified by its user-visible name.
74    ///
75    /// Call once per node after `add_edge` / `add_edge_weighted` has allocated
76    /// a CSR-local id for it. If the node is not yet interned this is a no-op
77    /// (the edge insertion that follows will intern it and a subsequent call
78    /// with the correct surrogate will set it). Calling with `Surrogate::ZERO`
79    /// is a no-op — the zero sentinel is the initial state and has no meaning.
80    pub fn set_node_surrogate(&mut self, node: &str, surrogate: nodedb_types::Surrogate) {
81        let raw = surrogate.as_u32();
82        if raw == 0 {
83            return;
84        }
85        if let Some(&id) = self.node_to_id.get(node)
86            && let Some(slot) = self.node_surrogates.get_mut(id as usize)
87        {
88            *slot = raw;
89            self.surrogate_to_local.insert(raw, id);
90        }
91    }
92
93    /// Return the raw surrogate `u32` for a CSR-local node id.
94    ///
95    /// Returns `0` (the ZERO sentinel) when the node has no surrogate set.
96    pub fn node_surrogate_raw(&self, local_id: u32) -> u32 {
97        self.node_surrogates
98            .get(local_id as usize)
99            .copied()
100            .unwrap_or(0)
101    }
102
103    /// Look up the user-visible node name bound to a `Surrogate`.
104    ///
105    /// Returns `None` for `Surrogate::ZERO` and for surrogates that have
106    /// never been bound to a node in this partition. Used by cross-engine
107    /// fusion (graph RAG) to translate a vector-side surrogate into a
108    /// graph BFS seed name.
109    pub fn node_id_for_surrogate(&self, surrogate: nodedb_types::Surrogate) -> Option<&str> {
110        let raw = surrogate.as_u32();
111        if raw == 0 {
112            return None;
113        }
114        let local_id = *self.surrogate_to_local.get(&raw)?;
115        self.id_to_node.get(local_id as usize).map(String::as_str)
116    }
117
118    // ── Node label methods ──
119
120    /// Get or create a node label ID (max 64 labels).
121    fn ensure_node_label(&mut self, label: &str) -> Option<u8> {
122        if let Some(&id) = self.node_label_to_id.get(label) {
123            return Some(id);
124        }
125        let id = self.node_label_names.len();
126        if id >= 64 {
127            return None; // bitset limit
128        }
129        let id = id as u8;
130        self.node_label_to_id.insert(label.to_string(), id);
131        self.node_label_names.push(label.to_string());
132        Some(id)
133    }
134
135    /// Add a label to a node.
136    ///
137    /// Returns `Ok(false)` if the 64 distinct node-label limit is hit (the
138    /// label is silently ignored). Returns `Err(GraphError::NodeOverflow)` if
139    /// the node is new and the partition's node-id space is exhausted.
140    pub fn add_node_label(&mut self, node: &str, label: &str) -> Result<bool, crate::GraphError> {
141        let node_id = self.ensure_node(node)?;
142        let Some(label_id) = self.ensure_node_label(label) else {
143            return Ok(false);
144        };
145        self.node_label_bits[node_id as usize] |= 1u64 << label_id;
146        Ok(true)
147    }
148
149    /// Remove a label from a node.
150    pub fn remove_node_label(&mut self, node: &str, label: &str) {
151        let Some(&node_id) = self.node_to_id.get(node) else {
152            return;
153        };
154        let Some(&label_id) = self.node_label_to_id.get(label) else {
155            return;
156        };
157        self.node_label_bits[node_id as usize] &= !(1u64 << label_id);
158    }
159
160    /// Check if a node has a specific label.
161    pub fn node_has_label(&self, node_id: u32, label: &str) -> bool {
162        let Some(&label_id) = self.node_label_to_id.get(label) else {
163            return false;
164        };
165        let bits = self
166            .node_label_bits
167            .get(node_id as usize)
168            .copied()
169            .unwrap_or(0);
170        bits & (1u64 << label_id) != 0
171    }
172
173    /// Get all label names for a node.
174    pub fn node_labels(&self, node_id: u32) -> Vec<&str> {
175        let bits = self
176            .node_label_bits
177            .get(node_id as usize)
178            .copied()
179            .unwrap_or(0);
180        if bits == 0 {
181            return Vec::new();
182        }
183        let mut labels = Vec::new();
184        for (i, name) in self.node_label_names.iter().enumerate() {
185            if bits & (1u64 << i) != 0 {
186                labels.push(name.as_str());
187            }
188        }
189        labels
190    }
191}