Skip to main content

selene_graph/
consistency.rs

1//! Debug-only structural consistency net for built-in graph indexes.
2//!
3//! Every selene-graph derived index — the label / edge-label bitmaps, the
4//! per-`(label, property)` typed indexes, the composite typed indexes, and the
5//! in/out adjacency maps — is maintained incrementally on the commit path
6//! (`crate::mutator`) and rebuilt wholesale on the snapshot-load /
7//! recovery path (`crate::shared::rebuild_derived_state` +
8//! `crate::property_index::rebuild_property_indexes` +
9//! `crate::composite_property_index::rebuild_composite_property_indexes` +
10//! `crate::vector_index::rebuild_vector_indexes` +
11//! `crate::text_index::rebuild_text_indexes`). A
12//! bug in either path corrupts query results silently — the engine keeps
13//! answering, just with wrong rows.
14//!
15//! [`SeleneGraph::assert_indexes_consistent`] re-derives every index from the
16//! authoritative columns (`node_store`, `edge_store`) and compares it against
17//! the maintained state, returning a descriptive [`Err`] on the first
18//! mismatch. It is wired behind `#[cfg(debug_assertions)]` at the two graph
19//! publication seams (the write-transaction commit and the snapshot-load
20//! constructor) so every debug/test build self-checks each published snapshot
21//! at zero release cost.
22
23use roaring::RoaringBitmap;
24
25use selene_core::{DbString, EdgeId, NodeId};
26
27use crate::adjacency::AdjacencyEdge;
28use crate::graph::SeleneGraph;
29use crate::id_map::{EngineIdMap, engine_id_map};
30use crate::vector_index::VectorIndexConfig;
31
32impl SeleneGraph {
33    /// Re-derive every built-in index from the authoritative node/edge columns
34    /// and assert it matches the maintained index state.
35    ///
36    /// Returns `Ok(())` when the snapshot is internally consistent, or
37    /// `Err(message)` describing the first detected drift. Intended for
38    /// debug assertions and tests — it allocates fresh reference indexes and
39    /// is `O(nodes * indexes + edges)` per call.
40    ///
41    /// # Checked invariant families
42    ///
43    /// 1. **Label / edge-label bitmaps** match a re-derivation from alive node
44    ///    label sets and alive edge labels; no bucket is present-but-empty.
45    /// 2. **Typed property indexes** match a fresh lenient re-build
46    ///    (`build_property_index_lenient`). The lenient policy is reused so
47    ///    open-graph kind drift and NaN — which the commit path legitimately
48    ///    skips — do not false-positive.
49    /// 3. **Composite typed indexes** match a fresh lenient re-build, same
50    ///    skip-aware policy.
51    /// 4. **Vector row-set indexes** match a fresh lenient re-build, same
52    ///    skip-aware policy.
53    /// 5. **Text BM25 indexes** match a fresh re-build from string properties.
54    /// 6. **Store integrity / alive-set parity**: per-store columns share one
55    ///    length and every alive row index is in range. Dead rows are
56    ///    permitted holes (D11) and are only asserted absent from derived
57    ///    state, never from the columns. The snapshot's `meta.next_*_id`
58    ///    allocator high-water marks are intentionally **not** checked here:
59    ///    the published snapshot's `meta` is allowed to carry stale allocator
60    ///    fields after a `from_graph` / recovery load (the real allocator
61    ///    floor is enforced separately by `IdAllocator::from_meta_with_floors`),
62    ///    so they are not a derived-index invariant.
63    /// 7. **Adjacency** matches a re-derivation from alive edges in both
64    ///    directions, with no present-but-empty entry.
65    ///
66    /// # Errors
67    ///
68    /// Returns the first mismatch as a human-readable `String`.
69    pub fn assert_indexes_consistent(&self) -> Result<(), String> {
70        self.check_store_integrity()?;
71        self.check_label_index()?;
72        self.check_edge_label_index()?;
73        self.check_property_indexes()?;
74        self.check_composite_property_indexes()?;
75        self.check_vector_indexes()?;
76        self.check_text_indexes()?;
77        self.check_adjacency()?;
78        Ok(())
79    }
80
81    /// Family (4): column lengths agree, alive rows are in range, allocator
82    /// floors clear the highest alive row.
83    fn check_store_integrity(&self) -> Result<(), String> {
84        let node_len = self.node_store.labels.len();
85        if self.node_store.properties.len() != node_len {
86            return Err(format!(
87                "node store column length mismatch: labels={node_len} properties={}",
88                self.node_store.properties.len()
89            ));
90        }
91        let edge_len = self.edge_store.label.len();
92        if self.edge_store.source.len() != edge_len
93            || self.edge_store.target.len() != edge_len
94            || self.edge_store.properties.len() != edge_len
95        {
96            return Err(format!(
97                "edge store column length mismatch: label={edge_len} source={} target={} \
98                 properties={}",
99                self.edge_store.source.len(),
100                self.edge_store.target.len(),
101                self.edge_store.properties.len()
102            ));
103        }
104
105        for row in self.node_store.alive.iter() {
106            if (row as usize) >= node_len {
107                return Err(format!(
108                    "node alive bitmap references out-of-range row {row} (node store len {node_len})"
109                ));
110            }
111        }
112        for row in self.edge_store.alive.iter() {
113            if (row as usize) >= edge_len {
114                return Err(format!(
115                    "edge alive bitmap references out-of-range row {row} (edge store len {edge_len})"
116                ));
117            }
118        }
119        Ok(())
120    }
121
122    /// Family (1a): node label bitmaps.
123    fn check_label_index(&self) -> Result<(), String> {
124        let mut reference: imbl::HashMap<DbString, RoaringBitmap> = imbl::HashMap::new();
125        for row in self.node_store.alive.iter() {
126            let Some(labels) = self.node_store.labels.get(row as usize) else {
127                return Err(format!("alive node row {row} has no label column entry"));
128            };
129            for label in labels.iter() {
130                reference.entry(label.clone()).or_default().insert(row);
131            }
132        }
133        compare_bitmap_index("node label index", &self.idx_label, &reference)
134    }
135
136    /// Family (1b): edge label bitmaps.
137    fn check_edge_label_index(&self) -> Result<(), String> {
138        let mut reference: imbl::HashMap<DbString, RoaringBitmap> = imbl::HashMap::new();
139        for row in self.edge_store.alive.iter() {
140            let Some(label) = self.edge_store.label.get(row as usize) else {
141                return Err(format!("alive edge row {row} has no label column entry"));
142            };
143            reference.entry(label.clone()).or_default().insert(row);
144        }
145        compare_bitmap_index("edge label index", &self.idx_edge_label, &reference)
146    }
147
148    /// Family (2): per-`(label, property)` typed indexes.
149    fn check_property_indexes(&self) -> Result<(), String> {
150        for ((label, property), entry) in &self.property_index {
151            if entry.index.has_empty_bucket() {
152                return Err(format!(
153                    "property index ({label}, {property}) holds a present-but-empty bucket"
154                ));
155            }
156            let reference = crate::property_index::build_property_index_lenient(
157                self,
158                label.clone(),
159                property.clone(),
160                entry.kind(),
161            )
162            .map_err(|err| {
163                format!("failed to re-derive property index ({label}, {property}): {err}")
164            })?;
165            if !entry.index.buckets_eq(&reference) {
166                return Err(format!(
167                    "property index ({label}, {property}) drifted from a fresh re-derivation \
168                     (maintained cardinality {}, reference cardinality {})",
169                    entry.index.cardinality(),
170                    reference.cardinality(),
171                ));
172            }
173        }
174        Ok(())
175    }
176
177    /// Family (3): composite typed indexes.
178    fn check_composite_property_indexes(&self) -> Result<(), String> {
179        for ((label, _key), entry) in &self.composite_property_index {
180            if entry.index.has_empty_bucket() {
181                return Err(format!(
182                    "composite index ({label}, {:?}) holds a present-but-empty bucket",
183                    entry.declared_properties
184                ));
185            }
186            let reference =
187                crate::composite_property_index::build_composite_property_index_lenient(
188                    self,
189                    label.clone(),
190                    entry.declared_properties.clone(),
191                    entry.kinds(),
192                )
193                .map_err(|err| {
194                    format!(
195                        "failed to re-derive composite index ({label}, {:?}): {err}",
196                        entry.declared_properties
197                    )
198                })?;
199            if !entry.index.buckets_eq(&reference) {
200                return Err(format!(
201                    "composite index ({label}, {:?}) drifted from a fresh re-derivation \
202                     (maintained cardinality {}, reference cardinality {})",
203                    entry.declared_properties,
204                    entry.index.cardinality(),
205                    reference.cardinality(),
206                ));
207            }
208        }
209        Ok(())
210    }
211
212    /// Family (4): vector row-set indexes.
213    fn check_vector_indexes(&self) -> Result<(), String> {
214        for ((label, property), entry) in &self.vector_index {
215            let reference = crate::vector_index::build_vector_index_lenient_with_configs(
216                self,
217                label.clone(),
218                property.clone(),
219                entry.kind(),
220                entry.dimension(),
221                VectorIndexConfig::new(entry.hnsw_config(), entry.ivf_config()),
222            )
223            .map_err(|err| {
224                format!("failed to re-derive vector index ({label}, {property}): {err}")
225            })?;
226            if !entry.index.rows_eq(&reference) {
227                return Err(format!(
228                    "vector index ({label}, {property}) drifted from a fresh re-derivation \
229                     (maintained cardinality {}, reference cardinality {})",
230                    entry.index.cardinality(),
231                    reference.cardinality(),
232                ));
233            }
234        }
235        Ok(())
236    }
237
238    /// Family (5): BM25 text indexes.
239    fn check_text_indexes(&self) -> Result<(), String> {
240        for ((label, property), entry) in &self.text_index {
241            let reference = crate::TextIndex::build(self, label.clone(), property.clone())
242                .map_err(|err| {
243                    format!("failed to re-derive text index ({label}, {property}): {err}")
244                })?;
245            if !entry.index.rows_eq(&reference) {
246                return Err(format!(
247                    "text index ({label}, {property}) drifted from a fresh re-derivation \
248                     (maintained documents {}, reference documents {})",
249                    entry.index.document_count(),
250                    reference.document_count(),
251                ));
252            }
253        }
254        Ok(())
255    }
256
257    /// Family (7): in/out adjacency.
258    fn check_adjacency(&self) -> Result<(), String> {
259        let mut out_reference: EngineIdMap<NodeId, Vec<AdjacencyEdge>> = engine_id_map();
260        let mut in_reference: EngineIdMap<NodeId, Vec<AdjacencyEdge>> = engine_id_map();
261        for row in self.edge_store.alive.iter() {
262            let Some(edge_id) = self.edge_id_for_row(crate::store::RowIndex::new(row)) else {
263                return Err(format!("alive edge row {row} has no mapped external id"));
264            };
265            let Some(label) = self.edge_store.label.get(row as usize).cloned() else {
266                return Err(format!("alive edge row {row} has no label column entry"));
267            };
268            let Some(source) = self.edge_store.source.get(row as usize).copied() else {
269                return Err(format!("alive edge row {row} has no source column entry"));
270            };
271            let Some(target) = self.edge_store.target.get(row as usize).copied() else {
272                return Err(format!("alive edge row {row} has no target column entry"));
273            };
274            out_reference
275                .entry(source)
276                .or_default()
277                .push(AdjacencyEdge {
278                    label: label.clone(),
279                    neighbor: target,
280                    edge_id,
281                });
282            in_reference.entry(target).or_default().push(AdjacencyEdge {
283                label,
284                neighbor: source,
285                edge_id,
286            });
287        }
288        compare_adjacency("outgoing", &self.adjacency_out, &out_reference)?;
289        compare_adjacency("incoming", &self.adjacency_in, &in_reference)?;
290        Ok(())
291    }
292}
293
294/// Compare a maintained `DbString`-keyed bitmap index against a re-derivation,
295/// failing on any key difference, bitmap difference, or empty maintained
296/// bucket.
297fn compare_bitmap_index(
298    name: &str,
299    maintained: &imbl::HashMap<DbString, RoaringBitmap>,
300    reference: &imbl::HashMap<DbString, RoaringBitmap>,
301) -> Result<(), String> {
302    for (label, bitmap) in maintained {
303        if bitmap.is_empty() {
304            return Err(format!(
305                "{name}: key {label} holds a present-but-empty bitmap"
306            ));
307        }
308        match reference.get(label) {
309            None => {
310                return Err(format!(
311                    "{name}: key {label} is maintained but not re-derived (stale rows)"
312                ));
313            }
314            Some(expected) if expected != bitmap => {
315                return Err(format!(
316                    "{name}: key {label} bitmap drifted (maintained {:?}, reference {:?})",
317                    bitmap.iter().collect::<Vec<_>>(),
318                    expected.iter().collect::<Vec<_>>()
319                ));
320            }
321            Some(_) => {}
322        }
323    }
324    for label in reference.keys() {
325        if !maintained.contains_key(label) {
326            return Err(format!(
327                "{name}: key {label} is re-derived but missing from the maintained index"
328            ));
329        }
330    }
331    Ok(())
332}
333
334/// Compare a maintained adjacency map against a re-derivation. The maintained
335/// entry is sorted by `(label, neighbor, edge_id)`; the reference is sorted to
336/// match before comparison so parallel edges and ordering are both checked.
337fn compare_adjacency(
338    direction: &str,
339    maintained: &EngineIdMap<NodeId, crate::adjacency::AdjacencyEntry>,
340    reference: &EngineIdMap<NodeId, Vec<AdjacencyEdge>>,
341) -> Result<(), String> {
342    for (node, entry) in maintained {
343        if entry.is_empty() {
344            return Err(format!(
345                "{direction} adjacency: node {node} holds a present-but-empty entry"
346            ));
347        }
348        let maintained_edges: Vec<AdjacencyEdge> = entry.iter().cloned().collect();
349        match reference.get(node) {
350            None => {
351                return Err(format!(
352                    "{direction} adjacency: node {node} is maintained but has no alive edges \
353                     (dangling adjacency)"
354                ));
355            }
356            Some(expected) => {
357                let mut expected_sorted = expected.clone();
358                expected_sorted.sort_by_key(adjacency_sort_key);
359                if maintained_edges != expected_sorted {
360                    return Err(format!(
361                        "{direction} adjacency: node {node} drifted (maintained {maintained_edges:?}, \
362                         reference {expected_sorted:?})"
363                    ));
364                }
365            }
366        }
367    }
368    for node in reference.keys() {
369        if !maintained.contains_key(node) {
370            return Err(format!(
371                "{direction} adjacency: node {node} has alive edges but is missing from the \
372                 maintained adjacency map"
373            ));
374        }
375    }
376    Ok(())
377}
378
379fn adjacency_sort_key(edge: &AdjacencyEdge) -> (DbString, NodeId, EdgeId) {
380    (edge.label.clone(), edge.neighbor, edge.edge_id)
381}
382
383#[cfg(test)]
384#[path = "consistency_tests.rs"]
385mod tests;