selene_graph/store.rs
1//! Structure-of-arrays node and edge stores per spec 03 section 3.1.
2
3use std::sync::Arc;
4
5use roaring::RoaringBitmap;
6
7use selene_core::{DbString, EdgeId, LabelSet, NodeId, PropertyMap};
8
9use crate::chunked_vec::ChunkedVec;
10
11/// Internal storage row index — the position of a node or edge in its store's
12/// structure-of-arrays columns.
13///
14/// Distinct from the external [`NodeId`]/[`EdgeId`]: a `RowIndex` is dense,
15/// remappable by compaction (D22 / BRIEF-Item-4b/4c), and **never persisted** —
16/// only external ids reach the WAL, snapshot, or `Change` stream. There is **no**
17/// fixed arithmetic relationship between a row and its id: post-4c new rows are
18/// appended at the dense end (the current row count) while the monotonic id
19/// counter advances independently, and a compaction pass renumbers rows under
20/// stable ids. The mapping is resolved *only* through the
21/// [`SeleneGraph`](crate::SeleneGraph) `node_id_to_row`/`edge_id_to_row` maps and
22/// the per-store `row_to_id` reverse columns — never by index arithmetic.
23/// Keeping it a newtype lets the compiler flag any site that still conflates a
24/// row with an external id.
25#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
26pub struct RowIndex(u32);
27
28impl RowIndex {
29 /// Sentinel for "no row" (`u32::MAX`, never a valid dense row position).
30 pub const TOMBSTONE: RowIndex = RowIndex(u32::MAX);
31
32 /// Construct a `RowIndex` from a raw `u32` row position.
33 #[must_use]
34 pub const fn new(raw: u32) -> Self {
35 Self(raw)
36 }
37
38 /// Return the raw `u32` row position.
39 #[must_use]
40 pub const fn get(self) -> u32 {
41 self.0
42 }
43}
44
45/// Node columns plus liveness bitmap.
46#[derive(Clone, Debug)]
47pub struct NodeStore {
48 /// Per-row node label sets.
49 pub labels: ChunkedVec<LabelSet>,
50 /// Per-row node property maps.
51 pub properties: ChunkedVec<PropertyMap>,
52 /// Per-row external node id (`RowIndex -> NodeId`). Dead / hole rows hold
53 /// [`NodeId::TOMBSTONE`]. Parallel with [`labels`](Self::labels): the stable
54 /// id is read here, never synthesized as `row + 1`, so compaction can
55 /// renumber rows under stable ids.
56 pub row_to_id: ChunkedVec<NodeId>,
57 /// Alive row indexes, shared copy-on-write across snapshots (B1): cloning
58 /// the store bumps a refcount; mutate only through
59 /// [`alive_mut`](Self::alive_mut) so a pre-clone snapshot never observes
60 /// the change.
61 pub alive: Arc<RoaringBitmap>,
62}
63
64impl NodeStore {
65 /// Construct an empty node store.
66 #[must_use]
67 pub fn new() -> Self {
68 Self {
69 labels: ChunkedVec::new(),
70 properties: ChunkedVec::new(),
71 row_to_id: ChunkedVec::new(),
72 alive: Arc::new(RoaringBitmap::new()),
73 }
74 }
75
76 /// Mutable access to the alive bitmap via `Arc::make_mut` (B1 COW): the
77 /// first mutation after a snapshot clone pays one bitmap clone; while the
78 /// Arc is unique this is a plain `&mut` borrow.
79 pub fn alive_mut(&mut self) -> &mut RoaringBitmap {
80 Arc::make_mut(&mut self.alive)
81 }
82
83 /// Number of allocated node rows, including dead holes.
84 #[must_use]
85 pub fn len(&self) -> usize {
86 self.labels.len()
87 }
88
89 /// Return true when no node rows exist.
90 #[must_use]
91 pub fn is_empty(&self) -> bool {
92 self.len() == 0
93 }
94
95 /// Return true when `index` is alive.
96 #[must_use]
97 pub fn is_alive(&self, index: u32) -> bool {
98 self.alive.contains(index)
99 }
100}
101
102impl Default for NodeStore {
103 fn default() -> Self {
104 Self::new()
105 }
106}
107
108/// Edge columns plus liveness bitmap.
109///
110/// Stored edges are directed by construction: every live row has exactly one
111/// source node and one target node. Undirected query patterns are a matching
112/// convenience and do not add an undirected storage bit.
113#[derive(Clone, Debug)]
114pub struct EdgeStore {
115 /// Per-row edge label.
116 pub label: ChunkedVec<DbString>,
117 /// Per-row edge source node.
118 pub source: ChunkedVec<NodeId>,
119 /// Per-row edge target node.
120 pub target: ChunkedVec<NodeId>,
121 /// Per-row edge property maps.
122 pub properties: ChunkedVec<PropertyMap>,
123 /// Per-row external edge id (`RowIndex -> EdgeId`). Dead / hole rows hold
124 /// [`EdgeId::TOMBSTONE`]. Parallel with [`label`](Self::label): the stable id
125 /// is read here, never synthesized as `row + 1`.
126 pub row_to_id: ChunkedVec<EdgeId>,
127 /// Alive row indexes, shared copy-on-write across snapshots (B1): cloning
128 /// the store bumps a refcount; mutate only through
129 /// [`alive_mut`](Self::alive_mut) so a pre-clone snapshot never observes
130 /// the change.
131 pub alive: Arc<RoaringBitmap>,
132}
133
134impl EdgeStore {
135 /// Construct an empty edge store.
136 #[must_use]
137 pub fn new() -> Self {
138 Self {
139 label: ChunkedVec::new(),
140 source: ChunkedVec::new(),
141 target: ChunkedVec::new(),
142 properties: ChunkedVec::new(),
143 row_to_id: ChunkedVec::new(),
144 alive: Arc::new(RoaringBitmap::new()),
145 }
146 }
147
148 /// Mutable access to the alive bitmap via `Arc::make_mut` (B1 COW): the
149 /// first mutation after a snapshot clone pays one bitmap clone; while the
150 /// Arc is unique this is a plain `&mut` borrow.
151 pub fn alive_mut(&mut self) -> &mut RoaringBitmap {
152 Arc::make_mut(&mut self.alive)
153 }
154
155 /// Number of allocated edge rows, including dead holes.
156 #[must_use]
157 pub fn len(&self) -> usize {
158 self.label.len()
159 }
160
161 /// Return true when no edge rows exist.
162 #[must_use]
163 pub fn is_empty(&self) -> bool {
164 self.len() == 0
165 }
166
167 /// Return true when `index` is alive.
168 #[must_use]
169 pub fn is_alive(&self, index: u32) -> bool {
170 self.alive.contains(index)
171 }
172}
173
174impl Default for EdgeStore {
175 fn default() -> Self {
176 Self::new()
177 }
178}
179
180// BRIEF-Item-4c retired `node_row_index_arith` / `edge_row_index_arith`: both the
181// live create path (mutator) and the recovery WAL-replay path now allocate rows
182// by APPEND (`row = store.len()`) rather than `id - 1`. After 4b compaction the
183// monotonic high-water id far exceeds the dense row count, so id-arith would
184// re-pad the reclaimed holes; append keeps stores dense. No production read path
185// derives a row from an id anymore — the grep-gate (`check-no-rowid-arith.sh`)
186// needs no binding-authority allowlist.
187
188#[cfg(test)]
189mod tests {
190 use proptest::prelude::*;
191
192 use super::*;
193 use selene_core::db_string;
194
195 #[test]
196 fn node_store_new_is_empty() {
197 let store = NodeStore::new();
198 assert_eq!(store.len(), 0);
199 assert!(store.alive.is_empty());
200 }
201
202 #[test]
203 fn edge_store_new_is_empty() {
204 let store = EdgeStore::new();
205 assert_eq!(store.len(), 0);
206 assert!(store.alive.is_empty());
207 }
208
209 #[test]
210 fn is_alive_after_simulated_insert() {
211 let mut store = NodeStore::new();
212 store
213 .labels
214 .push(LabelSet::single(db_string("store.node").unwrap()));
215 store.properties.push(PropertyMap::new());
216 store.alive_mut().insert(0);
217 assert!(store.is_alive(0));
218 assert!(!store.is_alive(1));
219 }
220
221 #[test]
222 fn clone_shares_alive_bitmap_until_mutation() {
223 // B1: a read-only store clone shares the alive bitmap by refcount.
224 let mut store = NodeStore::new();
225 store.alive_mut().insert(0);
226 store.alive_mut().insert(7);
227 let cloned = store.clone();
228 assert!(Arc::ptr_eq(&store.alive, &cloned.alive));
229 assert_eq!(Arc::strong_count(&store.alive), 2);
230 }
231
232 #[test]
233 fn alive_mutation_on_clone_isolates_snapshot() {
234 // B1: delete (remove) and create (insert) on a clone must COW the
235 // bitmap; the original snapshot keeps its pre-mutation liveness.
236 let mut store = EdgeStore::new();
237 store.alive_mut().insert(0);
238 store.alive_mut().insert(1);
239 let mut cloned = store.clone();
240 cloned.alive_mut().remove(1);
241 cloned.alive_mut().insert(9);
242 assert!(store.is_alive(0));
243 assert!(store.is_alive(1));
244 assert!(!store.is_alive(9));
245 assert!(cloned.is_alive(0));
246 assert!(!cloned.is_alive(1));
247 assert!(cloned.is_alive(9));
248 assert!(!Arc::ptr_eq(&store.alive, &cloned.alive));
249 }
250
251 proptest! {
252 #[test]
253 fn alive_clone_isolation_across_create_delete_compact(
254 seed_rows in proptest::collection::btree_set(0_u32..4096, 0..256),
255 post_ops in proptest::collection::vec((any::<bool>(), 0_u32..4096), 1..256),
256 compact in any::<bool>(),
257 ) {
258 // B1 (c): a snapshot clone of the alive bitmap is never disturbed
259 // by later creates (insert), deletes (remove), or a
260 // compaction-style dense rebuild on the live store.
261 let mut live = NodeStore::new();
262 for row in &seed_rows {
263 live.alive_mut().insert(*row);
264 }
265 let snapshot = live.clone();
266 let mut model: std::collections::BTreeSet<u32> = seed_rows.clone();
267 for (insert, row) in post_ops {
268 if insert {
269 live.alive_mut().insert(row);
270 model.insert(row);
271 } else {
272 live.alive_mut().remove(row);
273 model.remove(&row);
274 }
275 }
276 if compact {
277 // compact_core-shaped rebuild: renumber survivors dense from a
278 // fresh store, then overwrite the live store wholesale.
279 let mut dense = NodeStore::new();
280 for new_row in 0..model.len() as u32 {
281 dense.alive_mut().insert(new_row);
282 }
283 live = dense;
284 model = (0..model.len() as u32).collect();
285 }
286 prop_assert_eq!(
287 snapshot.alive.iter().collect::<Vec<_>>(),
288 seed_rows.iter().copied().collect::<Vec<_>>()
289 );
290 prop_assert_eq!(
291 live.alive.iter().collect::<Vec<_>>(),
292 model.iter().copied().collect::<Vec<_>>()
293 );
294 }
295 }
296}