Skip to main content

nodedb_graph/csr/index/
lookup.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Read-side queries: neighbor lookup, counters, degree, iterators,
4//! dense-array helpers, and the `add_node` / `build_dense` utilities.
5//!
6//! Public entry points take [`LocalNodeId`] so that cross-partition id
7//! use panics at the boundary. Crate-internal iteration over dense
8//! ranges uses raw `u32` via `dense_out_edges` / `dense_in_edges`.
9
10use std::mem::size_of;
11use std::sync::Arc;
12
13use nodedb_mem::{EngineId, MemoryGovernor};
14
15use super::types::{CsrIndex, Direction};
16use crate::GraphError;
17use crate::csr::LocalNodeId;
18
19/// Contiguous CSR adjacency arrays produced by [`CsrIndex::build_dense`].
20pub(crate) struct DenseAdjacency {
21    pub(crate) offsets: Vec<u32>,
22    pub(crate) targets: Vec<u32>,
23    pub(crate) labels: Vec<u32>,
24}
25
26impl CsrIndex {
27    /// Partition tag assigned at construction. Embedded in every
28    /// `LocalNodeId` this index produces.
29    #[inline]
30    pub fn partition_tag(&self) -> u32 {
31        self.partition_tag
32    }
33
34    /// Mint a `LocalNodeId` for this partition from a raw dense index.
35    /// Used by algorithm code that iterates `0..node_count` and needs
36    /// to call `LocalNodeId`-taking APIs.
37    #[inline]
38    pub fn local(&self, id: u32) -> LocalNodeId {
39        LocalNodeId::new(id, self.partition_tag)
40    }
41
42    /// Get immediate neighbors by string name.
43    pub fn neighbors(
44        &self,
45        node: &str,
46        label_filter: Option<&str>,
47        direction: Direction,
48    ) -> Vec<(String, String)> {
49        let Some(&node_id) = self.node_to_id.get(node) else {
50            return Vec::new();
51        };
52        self.record_access(node_id);
53        let label_id = label_filter.and_then(|l| self.label_to_id.get(l).copied());
54
55        let mut result = Vec::new();
56
57        if matches!(direction, Direction::Out | Direction::Both) {
58            for (lid, dst) in self.dense_iter_out(node_id) {
59                if label_id.is_none_or(|f| f == lid) {
60                    result.push((
61                        self.id_to_label[lid as usize].clone(),
62                        self.id_to_node[dst as usize].clone(),
63                    ));
64                }
65            }
66        }
67        if matches!(direction, Direction::In | Direction::Both) {
68            for (lid, src) in self.dense_iter_in(node_id) {
69                if label_id.is_none_or(|f| f == lid) {
70                    result.push((
71                        self.id_to_label[lid as usize].clone(),
72                        self.id_to_node[src as usize].clone(),
73                    ));
74                }
75            }
76        }
77
78        result
79    }
80
81    /// Get neighbors with multi-label filter. Empty labels = all edges.
82    pub fn neighbors_multi(
83        &self,
84        node: &str,
85        label_filters: &[&str],
86        direction: Direction,
87    ) -> Vec<(String, String)> {
88        let Some(&node_id) = self.node_to_id.get(node) else {
89            return Vec::new();
90        };
91        self.record_access(node_id);
92        let label_ids: Vec<u32> = label_filters
93            .iter()
94            .filter_map(|l| self.label_to_id.get(*l).copied())
95            .collect();
96        let match_label = |lid: u32| label_ids.is_empty() || label_ids.contains(&lid);
97
98        let mut result = Vec::new();
99
100        if matches!(direction, Direction::Out | Direction::Both) {
101            for (lid, dst) in self.dense_iter_out(node_id) {
102                if match_label(lid) {
103                    result.push((
104                        self.id_to_label[lid as usize].clone(),
105                        self.id_to_node[dst as usize].clone(),
106                    ));
107                }
108            }
109        }
110        if matches!(direction, Direction::In | Direction::Both) {
111            for (lid, src) in self.dense_iter_in(node_id) {
112                if match_label(lid) {
113                    result.push((
114                        self.id_to_label[lid as usize].clone(),
115                        self.id_to_node[src as usize].clone(),
116                    ));
117                }
118            }
119        }
120
121        result
122    }
123
124    /// Add a node without any edges. Idempotent — returns the existing
125    /// tagged id if the name is already present.
126    ///
127    /// Returns `Err(GraphError::NodeOverflow)` when the partition's node-id
128    /// space is exhausted (more than `MAX_NODES_PER_CSR` distinct nodes).
129    pub fn add_node(&mut self, name: &str) -> Result<LocalNodeId, crate::GraphError> {
130        let raw = self.ensure_node(name)?;
131        Ok(LocalNodeId::new(raw, self.partition_tag))
132    }
133
134    pub fn node_count(&self) -> usize {
135        self.id_to_node.len()
136    }
137
138    pub fn contains_node(&self, node: &str) -> bool {
139        self.node_to_id.contains_key(node)
140    }
141
142    /// Get the string name for a tagged node id.
143    pub fn node_name(&self, id: LocalNodeId) -> &str {
144        &self.id_to_node[id.raw(self.partition_tag) as usize]
145    }
146
147    /// Look up the tagged node id for a string name.
148    pub fn node_id(&self, name: &str) -> Option<LocalNodeId> {
149        self.node_to_id
150            .get(name)
151            .copied()
152            .map(|raw| LocalNodeId::new(raw, self.partition_tag))
153    }
154
155    /// Get the string label for a dense label index.
156    pub fn label_name(&self, label_id: u32) -> &str {
157        &self.id_to_label[label_id as usize]
158    }
159
160    /// Look up the dense label id for a string label.
161    pub fn label_id(&self, name: &str) -> Option<u32> {
162        self.label_to_id.get(name).copied()
163    }
164
165    /// Out-degree of a node (including buffer, excluding deleted).
166    pub fn out_degree(&self, id: LocalNodeId) -> usize {
167        self.dense_iter_out(id.raw(self.partition_tag)).count()
168    }
169
170    /// In-degree of a node.
171    pub fn in_degree(&self, id: LocalNodeId) -> usize {
172        self.dense_iter_in(id.raw(self.partition_tag)).count()
173    }
174
175    /// Total edge count (dense + buffer - deleted). O(V).
176    pub fn edge_count(&self) -> usize {
177        let n = self.id_to_node.len();
178        (0..n as u32).map(|i| self.out_degree(self.local(i))).sum()
179    }
180
181    // ── Internal helpers ──
182
183    /// Build contiguous offset/target/label arrays from per-node edge lists.
184    ///
185    /// # Errors
186    ///
187    /// Returns [`GraphError::MemoryBudget`] if `governor` is `Some` and the
188    /// reservation for the three output arrays exceeds the `Graph` engine budget.
189    pub(crate) fn build_dense(
190        edges: &[Vec<(u32, u32)>],
191        governor: Option<&Arc<MemoryGovernor>>,
192    ) -> Result<DenseAdjacency, GraphError> {
193        let n = edges.len();
194        let total: usize = edges.iter().map(|e| e.len()).sum();
195        // Reserve budget for offsets (n+1 u32s), targets (total u32s), labels (total u32s).
196        let reserve_bytes = (n + 1 + 2 * total) * size_of::<u32>();
197        let _budget_guard = governor
198            .map(|g| g.reserve(EngineId::Graph, reserve_bytes))
199            .transpose()?;
200        let mut offsets = Vec::with_capacity(n + 1);
201        let mut targets = Vec::with_capacity(total);
202        let mut labels = Vec::with_capacity(total);
203
204        let mut offset = 0u32;
205        for node_edges in edges {
206            offsets.push(offset);
207            for &(lid, target) in node_edges {
208                targets.push(target);
209                labels.push(lid);
210            }
211            offset += node_edges.len() as u32;
212        }
213        offsets.push(offset);
214
215        Ok(DenseAdjacency {
216            offsets,
217            targets,
218            labels,
219        })
220    }
221
222    /// Check if a specific edge exists in the dense CSR.
223    pub(crate) fn dense_has_edge(&self, src: u32, label: u32, dst: u32) -> bool {
224        for (lid, target) in self.dense_out_edges(src) {
225            if lid == label && target == dst {
226                return true;
227            }
228        }
229        false
230    }
231
232    /// Iterate dense outbound edges for a node (raw u32, no tag check).
233    pub(crate) fn dense_out_edges(&self, node: u32) -> impl Iterator<Item = (u32, u32)> + '_ {
234        let idx = node as usize;
235        if idx + 1 >= self.out_offsets.len() {
236            return Vec::new().into_iter();
237        }
238        let start = self.out_offsets[idx] as usize;
239        let end = self.out_offsets[idx + 1] as usize;
240        (start..end)
241            .map(move |i| (self.out_labels[i], self.out_targets[i]))
242            .collect::<Vec<_>>()
243            .into_iter()
244    }
245
246    /// Iterate dense inbound edges for a node (raw u32, no tag check).
247    pub(crate) fn dense_in_edges(&self, node: u32) -> impl Iterator<Item = (u32, u32)> + '_ {
248        let idx = node as usize;
249        if idx + 1 >= self.in_offsets.len() {
250            return Vec::new().into_iter();
251        }
252        let start = self.in_offsets[idx] as usize;
253        let end = self.in_offsets[idx + 1] as usize;
254        (start..end)
255            .map(move |i| (self.in_labels[i], self.in_targets[i]))
256            .collect::<Vec<_>>()
257            .into_iter()
258    }
259
260    /// Raw u32 iteration over outbound edges (dense + buffer - deleted).
261    /// Crate-internal: used by label-dispatching helpers and algorithms
262    /// that already hold a validated partition borrow.
263    pub(crate) fn dense_iter_out(&self, node: u32) -> impl Iterator<Item = (u32, u32)> + '_ {
264        let dense = self
265            .dense_out_edges(node)
266            .filter(move |&(lid, dst)| !self.deleted_edges.contains(&(node, lid, dst)));
267        dense.chain(self.buffer_out_iter(node))
268    }
269
270    /// Raw u32 iteration over inbound edges (dense + buffer - deleted).
271    pub(crate) fn dense_iter_in(&self, node: u32) -> impl Iterator<Item = (u32, u32)> + '_ {
272        let dense = self
273            .dense_in_edges(node)
274            .filter(move |&(lid, src)| !self.deleted_edges.contains(&(src, lid, node)));
275        dense.chain(self.buffer_in_iter(node))
276    }
277
278    /// Buffer-only iteration over outbound edges for a node.
279    pub(crate) fn buffer_out_iter(&self, node: u32) -> std::vec::IntoIter<(u32, u32)> {
280        let idx = node as usize;
281        if idx < self.buffer_out.len() {
282            self.buffer_out[idx].clone().into_iter()
283        } else {
284            Vec::new().into_iter()
285        }
286    }
287
288    /// Buffer-only iteration over inbound edges for a node.
289    pub(crate) fn buffer_in_iter(&self, node: u32) -> std::vec::IntoIter<(u32, u32)> {
290        let idx = node as usize;
291        if idx < self.buffer_in.len() {
292            self.buffer_in[idx].clone().into_iter()
293        } else {
294            Vec::new().into_iter()
295        }
296    }
297
298    /// Iterate all outbound edges for a tagged node. Yields
299    /// `(label_id, dst)` with `dst` tagged to this partition.
300    pub fn iter_out_edges(
301        &self,
302        node: LocalNodeId,
303    ) -> impl Iterator<Item = (u32, LocalNodeId)> + '_ {
304        let raw = node.raw(self.partition_tag);
305        let tag = self.partition_tag;
306        self.dense_iter_out(raw)
307            .map(move |(lid, dst)| (lid, LocalNodeId::new(dst, tag)))
308    }
309
310    /// Iterate all inbound edges for a tagged node.
311    pub fn iter_in_edges(
312        &self,
313        node: LocalNodeId,
314    ) -> impl Iterator<Item = (u32, LocalNodeId)> + '_ {
315        let raw = node.raw(self.partition_tag);
316        let tag = self.partition_tag;
317        self.dense_iter_in(raw)
318            .map(move |(lid, src)| (lid, LocalNodeId::new(src, tag)))
319    }
320
321    // ── Raw u32 helpers for in-partition algorithm use ──
322    //
323    // The tagged `LocalNodeId` API catches cross-partition id leakage
324    // at runtime. In-partition algorithms that iterate dense ranges
325    // within a single `&CsrIndex` borrow cannot produce a cross-
326    // partition id by construction — no other partition is reachable
327    // from the borrow. These helpers expose the underlying raw u32
328    // iteration at zero cost for that case.
329
330    /// Raw dense out-edges iteration. In-partition algorithm use only.
331    pub fn iter_out_edges_raw(&self, node: u32) -> impl Iterator<Item = (u32, u32)> + '_ {
332        self.dense_iter_out(node)
333    }
334
335    /// Raw dense in-edges iteration. In-partition algorithm use only.
336    pub fn iter_in_edges_raw(&self, node: u32) -> impl Iterator<Item = (u32, u32)> + '_ {
337        self.dense_iter_in(node)
338    }
339
340    /// Raw out-degree by dense index.
341    pub fn out_degree_raw(&self, node: u32) -> usize {
342        self.dense_iter_out(node).count()
343    }
344
345    /// Raw in-degree by dense index.
346    pub fn in_degree_raw(&self, node: u32) -> usize {
347        self.dense_iter_in(node).count()
348    }
349
350    /// String name for a raw dense index. In-partition algorithm use only.
351    pub fn node_name_raw(&self, id: u32) -> &str {
352        &self.id_to_node[id as usize]
353    }
354
355    /// Raw dense index lookup by name. In-partition algorithm use only.
356    pub fn node_id_raw(&self, name: &str) -> Option<u32> {
357        self.node_to_id.get(name).copied()
358    }
359}