Skip to main content

selene_graph/
consistency.rs

1//! Debug-only structural consistency net for built-in graph indexes.
2//!
3//! Every selene-graph derived index — the label / edge-label bitmaps, the
4//! per-`(label, property)` typed indexes, the composite typed indexes, and the
5//! in/out adjacency maps — is maintained incrementally on the commit path
6//! (`crate::mutator`) and rebuilt wholesale on the snapshot-load /
7//! recovery path (`crate::shared::rebuild_derived_state` +
8//! `crate::property_index::rebuild_property_indexes` +
9//! `crate::property_index::rebuild_edge_property_indexes` +
10//! `crate::composite_property_index::rebuild_composite_property_indexes` +
11//! `crate::vector_index::rebuild_vector_indexes` +
12//! `crate::text_index::rebuild_text_indexes`). A
13//! bug in either path corrupts query results silently — the engine keeps
14//! answering, just with wrong rows.
15//!
16//! [`SeleneGraph::assert_indexes_consistent`] re-derives every index from the
17//! authoritative columns (`node_store`, `edge_store`) and compares it against
18//! the maintained state, returning a descriptive [`Err`] on the first
19//! mismatch. It is wired behind `#[cfg(debug_assertions)]` at the two graph
20//! publication seams (the write-transaction commit and the snapshot-load
21//! constructor) so every debug/test build self-checks each published snapshot
22//! at zero release cost.
23
24use roaring::RoaringBitmap;
25
26use selene_core::{DbString, EdgeId, NodeId};
27
28use crate::adjacency::AdjacencyEdge;
29use crate::graph::SeleneGraph;
30use crate::id_map::{EngineIdMap, engine_id_map};
31use crate::vector_index::VectorIndexConfig;
32
33impl SeleneGraph {
34    /// Re-derive every built-in index from the authoritative node/edge columns
35    /// and assert it matches the maintained index state.
36    ///
37    /// Returns `Ok(())` when the snapshot is internally consistent, or
38    /// `Err(message)` describing the first detected drift. Intended for
39    /// debug assertions and tests — it allocates fresh reference indexes and
40    /// is `O(nodes * indexes + edges)` per call.
41    ///
42    /// # Checked invariant families
43    ///
44    /// 1. **Label / edge-label bitmaps** match a re-derivation from alive node
45    ///    label sets and alive edge labels; no bucket is present-but-empty.
46    /// 2. **Typed property indexes** match a fresh lenient re-build
47    ///    (`build_property_index_lenient`). The lenient policy is reused so
48    ///    open-graph kind drift and NaN — which the commit path legitimately
49    ///    skips — do not false-positive.
50    /// 3. **Composite typed indexes** match a fresh lenient re-build, same
51    ///    skip-aware policy.
52    /// 4. **Vector row-set indexes** match a fresh lenient re-build, same
53    ///    skip-aware policy.
54    /// 5. **Text BM25 indexes** match a fresh re-build from string properties.
55    /// 6. **Store integrity / alive-set parity**: per-store columns share one
56    ///    length and every alive row index is in range. Dead rows are
57    ///    permitted holes (D11) and are only asserted absent from derived
58    ///    state, never from the columns. The snapshot's `meta.next_*_id`
59    ///    allocator high-water marks are intentionally **not** checked here:
60    ///    the published snapshot's `meta` is allowed to carry stale allocator
61    ///    fields after a `from_graph` / recovery load (the real allocator
62    ///    floor is enforced separately by `IdAllocator::from_meta_with_floors`),
63    ///    so they are not a derived-index invariant.
64    /// 7. **Adjacency** matches a re-derivation from alive edges in both
65    ///    directions, with no present-but-empty entry.
66    ///
67    /// # Errors
68    ///
69    /// Returns the first mismatch as a human-readable `String`.
70    pub fn assert_indexes_consistent(&self) -> Result<(), String> {
71        self.check_store_integrity()?;
72        self.check_label_index()?;
73        self.check_edge_label_index()?;
74        self.check_property_indexes()?;
75        self.check_edge_property_indexes()?;
76        self.check_composite_property_indexes()?;
77        self.check_vector_indexes()?;
78        self.check_text_indexes()?;
79        self.check_adjacency()?;
80        Ok(())
81    }
82
83    /// Family (4): column lengths agree, alive rows are in range, allocator
84    /// floors clear the highest alive row.
85    fn check_store_integrity(&self) -> Result<(), String> {
86        let node_len = self.node_store.labels.len();
87        if self.node_store.properties.len() != node_len {
88            return Err(format!(
89                "node store column length mismatch: labels={node_len} properties={}",
90                self.node_store.properties.len()
91            ));
92        }
93        let edge_len = self.edge_store.label.len();
94        if self.edge_store.source.len() != edge_len
95            || self.edge_store.target.len() != edge_len
96            || self.edge_store.properties.len() != edge_len
97        {
98            return Err(format!(
99                "edge store column length mismatch: label={edge_len} source={} target={} \
100                 properties={}",
101                self.edge_store.source.len(),
102                self.edge_store.target.len(),
103                self.edge_store.properties.len()
104            ));
105        }
106
107        for row in self.node_store.alive.iter() {
108            if (row as usize) >= node_len {
109                return Err(format!(
110                    "node alive bitmap references out-of-range row {row} (node store len {node_len})"
111                ));
112            }
113        }
114        for row in self.edge_store.alive.iter() {
115            if (row as usize) >= edge_len {
116                return Err(format!(
117                    "edge alive bitmap references out-of-range row {row} (edge store len {edge_len})"
118                ));
119            }
120        }
121        Ok(())
122    }
123
124    /// Family (1a): node label bitmaps.
125    fn check_label_index(&self) -> Result<(), String> {
126        let mut reference: imbl::HashMap<DbString, RoaringBitmap> = imbl::HashMap::new();
127        for row in self.node_store.alive.iter() {
128            let Some(labels) = self.node_store.labels.get(row as usize) else {
129                return Err(format!("alive node row {row} has no label column entry"));
130            };
131            for label in labels.iter() {
132                reference.entry(label.clone()).or_default().insert(row);
133            }
134        }
135        compare_bitmap_index("node label index", &self.idx_label, &reference)
136    }
137
138    /// Family (1b): edge label bitmaps.
139    fn check_edge_label_index(&self) -> Result<(), String> {
140        let mut reference: imbl::HashMap<DbString, RoaringBitmap> = imbl::HashMap::new();
141        for row in self.edge_store.alive.iter() {
142            let Some(label) = self.edge_store.label.get(row as usize) else {
143                return Err(format!("alive edge row {row} has no label column entry"));
144            };
145            reference.entry(label.clone()).or_default().insert(row);
146        }
147        compare_bitmap_index("edge label index", &self.idx_edge_label, &reference)
148    }
149
150    /// Family (2): per-`(label, property)` typed indexes.
151    fn check_property_indexes(&self) -> Result<(), String> {
152        for ((label, property), entry) in &self.property_index {
153            if entry.index.has_empty_bucket() {
154                return Err(format!(
155                    "property index ({label}, {property}) holds a present-but-empty bucket"
156                ));
157            }
158            let reference = crate::property_index::build_property_index_lenient(
159                self,
160                label.clone(),
161                property.clone(),
162                entry.kind(),
163            )
164            .map_err(|err| {
165                format!("failed to re-derive property index ({label}, {property}): {err}")
166            })?;
167            if !entry.index.buckets_eq(&reference) {
168                return Err(format!(
169                    "property index ({label}, {property}) drifted from a fresh re-derivation \
170                     (maintained cardinality {}, reference cardinality {})",
171                    entry.index.cardinality(),
172                    reference.cardinality(),
173                ));
174            }
175        }
176        Ok(())
177    }
178
179    /// Family (2b): edge typed property indexes.
180    fn check_edge_property_indexes(&self) -> Result<(), String> {
181        for ((label, property), entry) in &self.edge_property_index {
182            if entry.index.has_empty_bucket() {
183                return Err(format!(
184                    "edge property index ({label}, {property}) holds a present-but-empty bucket"
185                ));
186            }
187            let reference = crate::property_index::build_edge_property_index_lenient(
188                self,
189                label.clone(),
190                property.clone(),
191                entry.kind(),
192            )
193            .map_err(|err| {
194                format!("failed to re-derive edge property index ({label}, {property}): {err}")
195            })?;
196            if !entry.index.buckets_eq(&reference) {
197                return Err(format!(
198                    "edge property index ({label}, {property}) drifted from a fresh \
199                     re-derivation (maintained cardinality {}, reference cardinality {})",
200                    entry.index.cardinality(),
201                    reference.cardinality(),
202                ));
203            }
204        }
205        Ok(())
206    }
207
208    /// Family (3): composite typed indexes.
209    fn check_composite_property_indexes(&self) -> Result<(), String> {
210        for ((label, _key), entry) in &self.composite_property_index {
211            if entry.index.has_empty_bucket() {
212                return Err(format!(
213                    "composite index ({label}, {:?}) holds a present-but-empty bucket",
214                    entry.declared_properties
215                ));
216            }
217            let reference =
218                crate::composite_property_index::build_composite_property_index_lenient(
219                    self,
220                    label.clone(),
221                    entry.declared_properties.clone(),
222                    entry.kinds(),
223                )
224                .map_err(|err| {
225                    format!(
226                        "failed to re-derive composite index ({label}, {:?}): {err}",
227                        entry.declared_properties
228                    )
229                })?;
230            if !entry.index.buckets_eq(&reference) {
231                return Err(format!(
232                    "composite index ({label}, {:?}) drifted from a fresh re-derivation \
233                     (maintained cardinality {}, reference cardinality {})",
234                    entry.declared_properties,
235                    entry.index.cardinality(),
236                    reference.cardinality(),
237                ));
238            }
239        }
240        Ok(())
241    }
242
243    /// Family (4): vector row-set indexes.
244    fn check_vector_indexes(&self) -> Result<(), String> {
245        for ((label, property), entry) in &self.vector_index {
246            let reference = crate::vector_index::build_vector_index_lenient_with_configs(
247                self,
248                label.clone(),
249                property.clone(),
250                entry.kind(),
251                entry.dimension(),
252                VectorIndexConfig::new(entry.hnsw_config(), entry.ivf_config()),
253            )
254            .map_err(|err| {
255                format!("failed to re-derive vector index ({label}, {property}): {err}")
256            })?;
257            if !entry.index.rows_eq(&reference) {
258                return Err(format!(
259                    "vector index ({label}, {property}) drifted from a fresh re-derivation \
260                     (maintained cardinality {}, reference cardinality {})",
261                    entry.index.cardinality(),
262                    reference.cardinality(),
263                ));
264            }
265        }
266        Ok(())
267    }
268
269    /// Family (5): BM25 text indexes.
270    fn check_text_indexes(&self) -> Result<(), String> {
271        for ((label, property), entry) in &self.text_index {
272            let reference = crate::TextIndex::build(self, label.clone(), property.clone())
273                .map_err(|err| {
274                    format!("failed to re-derive text index ({label}, {property}): {err}")
275                })?;
276            if !entry.index.rows_eq(&reference) {
277                return Err(format!(
278                    "text index ({label}, {property}) drifted from a fresh re-derivation \
279                     (maintained documents {}, reference documents {})",
280                    entry.index.document_count(),
281                    reference.document_count(),
282                ));
283            }
284        }
285        Ok(())
286    }
287
288    /// Family (7): in/out adjacency.
289    fn check_adjacency(&self) -> Result<(), String> {
290        let mut out_reference: EngineIdMap<NodeId, Vec<AdjacencyEdge>> = engine_id_map();
291        let mut in_reference: EngineIdMap<NodeId, Vec<AdjacencyEdge>> = engine_id_map();
292        for row in self.edge_store.alive.iter() {
293            let Some(edge_id) = self.edge_id_for_row(crate::store::RowIndex::new(row)) else {
294                return Err(format!("alive edge row {row} has no mapped external id"));
295            };
296            let Some(label) = self.edge_store.label.get(row as usize).cloned() else {
297                return Err(format!("alive edge row {row} has no label column entry"));
298            };
299            let Some(source) = self.edge_store.source.get(row as usize).copied() else {
300                return Err(format!("alive edge row {row} has no source column entry"));
301            };
302            let Some(target) = self.edge_store.target.get(row as usize).copied() else {
303                return Err(format!("alive edge row {row} has no target column entry"));
304            };
305            out_reference
306                .entry(source)
307                .or_default()
308                .push(AdjacencyEdge {
309                    label: label.clone(),
310                    neighbor: target,
311                    edge_id,
312                });
313            in_reference.entry(target).or_default().push(AdjacencyEdge {
314                label,
315                neighbor: source,
316                edge_id,
317            });
318        }
319        compare_adjacency("outgoing", &self.adjacency_out, &out_reference)?;
320        compare_adjacency("incoming", &self.adjacency_in, &in_reference)?;
321        Ok(())
322    }
323}
324
325/// Compare a maintained `DbString`-keyed bitmap index against a re-derivation,
326/// failing on any key difference, bitmap difference, or empty maintained
327/// bucket.
328fn compare_bitmap_index(
329    name: &str,
330    maintained: &imbl::HashMap<DbString, RoaringBitmap>,
331    reference: &imbl::HashMap<DbString, RoaringBitmap>,
332) -> Result<(), String> {
333    for (label, bitmap) in maintained {
334        if bitmap.is_empty() {
335            return Err(format!(
336                "{name}: key {label} holds a present-but-empty bitmap"
337            ));
338        }
339        match reference.get(label) {
340            None => {
341                return Err(format!(
342                    "{name}: key {label} is maintained but not re-derived (stale rows)"
343                ));
344            }
345            Some(expected) if expected != bitmap => {
346                return Err(format!(
347                    "{name}: key {label} bitmap drifted (maintained {:?}, reference {:?})",
348                    bitmap.iter().collect::<Vec<_>>(),
349                    expected.iter().collect::<Vec<_>>()
350                ));
351            }
352            Some(_) => {}
353        }
354    }
355    for label in reference.keys() {
356        if !maintained.contains_key(label) {
357            return Err(format!(
358                "{name}: key {label} is re-derived but missing from the maintained index"
359            ));
360        }
361    }
362    Ok(())
363}
364
365/// Compare a maintained adjacency map against a re-derivation. The maintained
366/// entry is sorted by `(label, neighbor, edge_id)`; the reference is sorted to
367/// match before comparison so parallel edges and ordering are both checked.
368fn compare_adjacency(
369    direction: &str,
370    maintained: &EngineIdMap<NodeId, crate::adjacency::AdjacencyEntry>,
371    reference: &EngineIdMap<NodeId, Vec<AdjacencyEdge>>,
372) -> Result<(), String> {
373    for (node, entry) in maintained {
374        if entry.is_empty() {
375            return Err(format!(
376                "{direction} adjacency: node {node} holds a present-but-empty entry"
377            ));
378        }
379        let maintained_edges: Vec<AdjacencyEdge> = entry.iter().cloned().collect();
380        match reference.get(node) {
381            None => {
382                return Err(format!(
383                    "{direction} adjacency: node {node} is maintained but has no alive edges \
384                     (dangling adjacency)"
385                ));
386            }
387            Some(expected) => {
388                let mut expected_sorted = expected.clone();
389                expected_sorted.sort_by_key(adjacency_sort_key);
390                if maintained_edges != expected_sorted {
391                    return Err(format!(
392                        "{direction} adjacency: node {node} drifted (maintained {maintained_edges:?}, \
393                         reference {expected_sorted:?})"
394                    ));
395                }
396            }
397        }
398    }
399    for node in reference.keys() {
400        if !maintained.contains_key(node) {
401            return Err(format!(
402                "{direction} adjacency: node {node} has alive edges but is missing from the \
403                 maintained adjacency map"
404            ));
405        }
406    }
407    Ok(())
408}
409
410fn adjacency_sort_key(edge: &AdjacencyEdge) -> (DbString, NodeId, EdgeId) {
411    (edge.label.clone(), edge.neighbor, edge.edge_id)
412}
413
414#[cfg(test)]
415#[path = "consistency_tests.rs"]
416mod tests;