Skip to main content

nodedb_graph/csr/index/
types.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! `CsrIndex` struct definition and constructor.
4//!
5//! Memory layout at scale (1B edges):
6//! - Old: `Vec<Vec<(String, u32)>>` ≈ 60 GB (heap String per edge)
7//! - New: contiguous `Vec<u32>` offsets + targets + `Vec<u32>` labels ≈ 12 GB
8//!
9//! Writes accumulate in a mutable buffer (`buffer_out`/`buffer_in`).
10//! Reads check both the dense CSR arrays and the mutable buffer.
11//! `compact()` merges the buffer into the dense arrays (double-buffered swap).
12//!
13//! ## Edge Weights
14//!
15//! Optional `f64` weight per edge stored in parallel arrays. `None` when the
16//! graph is entirely unweighted (zero memory overhead). Populated from the
17//! `"weight"` edge property at insertion time. Unweighted edges default to 1.0.
18
19use std::collections::{HashMap, HashSet};
20use std::sync::Arc;
21
22use nodedb_mem::MemoryGovernor;
23
24use crate::csr::dense_array::DenseArray;
25
26// Re-export shared Direction from nodedb-types.
27pub use nodedb_types::graph::Direction;
28
29/// Dense integer CSR adjacency index with interned node IDs and labels.
30pub struct CsrIndex {
31    // ── Node interning ──
32    pub(crate) node_to_id: HashMap<String, u32>,
33    pub(crate) id_to_node: Vec<String>,
34
35    // ── Label interning ──
36    pub(crate) label_to_id: HashMap<String, u32>,
37    pub(crate) id_to_label: Vec<String>,
38
39    // ── Dense CSR (read-only between compactions) ──
40    //
41    // Offsets are `Vec<u32>` (mutable — extended on node creation).
42    // Targets/labels/weights use `DenseArray<T>` for zero-copy mmap support:
43    // after cold start from rkyv checkpoint, these point directly into the
44    // archived buffer with no deserialization. Compaction replaces them with
45    // owned Vecs.
46    /// `out_offsets[i]..out_offsets[i+1]` = range in `out_targets`/`out_labels`.
47    /// Length: `num_nodes + 1`.
48    pub(crate) out_offsets: Vec<u32>,
49    pub(crate) out_targets: DenseArray<u32>,
50    pub(crate) out_labels: DenseArray<u32>,
51    /// Parallel edge weight array. `None` if graph has no weighted edges.
52    pub(crate) out_weights: Option<DenseArray<f64>>,
53
54    pub(crate) in_offsets: Vec<u32>,
55    pub(crate) in_targets: DenseArray<u32>,
56    pub(crate) in_labels: DenseArray<u32>,
57    /// Parallel inbound edge weight array. `None` if graph has no weighted edges.
58    pub(crate) in_weights: Option<DenseArray<f64>>,
59
60    // ── Mutable write buffer ──
61    /// Per-node outbound buffer: `buffer_out[node_id]` = `[(label_id, dst_id)]`.
62    pub(crate) buffer_out: Vec<Vec<(u32, u32)>>,
63    pub(crate) buffer_in: Vec<Vec<(u32, u32)>>,
64    /// Per-node outbound weight buffer (parallel to `buffer_out`).
65    /// Only populated when `has_weights` is true.
66    pub(crate) buffer_out_weights: Vec<Vec<f64>>,
67    /// Per-node inbound weight buffer (parallel to `buffer_in`).
68    pub(crate) buffer_in_weights: Vec<Vec<f64>>,
69
70    /// Edges deleted since last compaction: `(src, label, dst)`.
71    pub(crate) deleted_edges: HashSet<(u32, u32, u32)>,
72
73    /// Whether any edge has a non-default weight. When false, weight arrays
74    /// are `None` and weight buffers are empty — zero overhead for unweighted graphs.
75    pub(crate) has_weights: bool,
76
77    // ── Node labels (bitset) ──
78    //
79    // Each node has a `u64` bitset where bit `i` corresponds to label ID `i`.
80    // Supports up to 64 distinct node labels. Labels are interned in
81    // `node_label_to_id` / `node_label_names` (separate from edge labels).
82    // Used by MATCH pattern `(a:Person)` — filters nodes by label membership.
83    pub(crate) node_label_bits: Vec<u64>,
84    pub(crate) node_label_to_id: HashMap<String, u8>,
85    pub(crate) node_label_names: Vec<String>,
86
87    // ── Surrogate storage ──
88    /// Per-node surrogate: `node_surrogates[local_id]` = global `Surrogate.as_u32()`.
89    ///
90    /// `0` (Surrogate::ZERO) is the unset sentinel — populated at `EdgePut` time
91    /// from the surrogates resolved by the Control Plane. A node whose surrogate
92    /// is zero was inserted without surrogate plumbing (e.g. legacy paths or tests)
93    /// and is treated as "not in any prefilter bitmap" when a bitmap is active.
94    pub(crate) node_surrogates: Vec<u32>,
95    /// Reverse map: `Surrogate.as_u32()` → CSR-local node id. Maintained
96    /// in step with `node_surrogates` by `set_node_surrogate`. Excludes the
97    /// zero sentinel. Used by cross-engine fusion (graph RAG) to resolve a
98    /// vector-side surrogate to the corresponding graph node name.
99    pub(crate) surrogate_to_local: HashMap<u32, u32>,
100
101    // ── Hot/cold access tracking ──
102    /// Per-node access counter: incremented on each neighbor/BFS/path query.
103    /// Uses `Cell<u32>` so access can be tracked through `&self` references
104    /// (traversal methods are `&self` for shared read access).
105    pub(crate) access_counts: Vec<std::cell::Cell<u32>>,
106    /// Total queries served since last access counter reset.
107    pub(crate) query_epoch: u64,
108
109    /// Unique partition tag assigned at construction. Embedded into
110    /// every `LocalNodeId` this index produces; cross-partition use is
111    /// caught by comparing tags at API boundaries.
112    pub(crate) partition_tag: u32,
113
114    /// Optional memory governor for budget tracking.
115    ///
116    /// When `None`, all memory operations proceed without budget enforcement
117    /// (the behavior for NodeDB-Lite / WASM deployments that have no governor).
118    /// When `Some`, `compact()`, `checkpoint_to_bytes()`, and `compute_statistics()`
119    /// reserve bytes against `EngineId::Graph` before allocating and release them
120    /// on drop via `BudgetGuard`.
121    pub(crate) governor: Option<Arc<MemoryGovernor>>,
122}
123
124impl Default for CsrIndex {
125    fn default() -> Self {
126        Self::new()
127    }
128}
129
130impl CsrIndex {
131    pub fn new() -> Self {
132        Self {
133            node_to_id: HashMap::new(),
134            id_to_node: Vec::new(),
135            label_to_id: HashMap::new(),
136            id_to_label: Vec::new(),
137            out_offsets: vec![0],
138            out_targets: DenseArray::default(),
139            out_labels: DenseArray::default(),
140            out_weights: None,
141            in_offsets: vec![0],
142            in_targets: DenseArray::default(),
143            in_labels: DenseArray::default(),
144            in_weights: None,
145            buffer_out: Vec::new(),
146            buffer_in: Vec::new(),
147            buffer_out_weights: Vec::new(),
148            buffer_in_weights: Vec::new(),
149            deleted_edges: HashSet::new(),
150            has_weights: false,
151            node_label_bits: Vec::new(),
152            node_label_to_id: HashMap::new(),
153            node_label_names: Vec::new(),
154            node_surrogates: Vec::new(),
155            surrogate_to_local: HashMap::new(),
156            access_counts: Vec::new(),
157            query_epoch: 0,
158            partition_tag: crate::csr::local_node_id::next_partition_tag(),
159            governor: None,
160        }
161    }
162
163    /// Create a new `CsrIndex` wired to a memory governor.
164    ///
165    /// Subsequent calls to `compact()`, `checkpoint_to_bytes()`, and
166    /// `compute_statistics()` will reserve bytes against `EngineId::Graph`
167    /// before allocating and return `Err(GraphError::MemoryBudget(_))` if
168    /// the budget is exhausted.
169    ///
170    /// Use `CsrIndex::new()` when deploying without a governor (NodeDB-Lite,
171    /// WASM, or tests that do not need budget enforcement).
172    pub fn with_governor(governor: Arc<MemoryGovernor>) -> Self {
173        Self {
174            governor: Some(governor),
175            ..Self::new()
176        }
177    }
178
179    /// Attach a memory governor to an existing `CsrIndex`.
180    ///
181    /// Used by the REINDEX path: a partition is rebuilt on a background thread
182    /// (without a governor since `MemoryGovernor` is `Arc<...>` but the thread
183    /// is independent), and on cutover the Data Plane installs the governor.
184    pub fn with_governor_attached(mut self, governor: Arc<MemoryGovernor>) -> Self {
185        self.governor = Some(governor);
186        self
187    }
188
189    /// Whether any edge in this index has a non-default (1.0) weight.
190    pub fn has_weighted_edges(&self) -> bool {
191        self.has_weights
192    }
193}