Skip to main content

selene_graph/
graph.rs

1//! Immutable graph snapshot and read accessors.
2
3use std::borrow::Cow;
4use std::ops::RangeBounds;
5use std::sync::Arc;
6
7use imbl::HashMap;
8use roaring::RoaringBitmap;
9use rustc_hash::FxHashMap;
10use serde::{Deserialize, Serialize};
11use smallvec::SmallVec;
12
13use selene_core::{DbString, EdgeId, GraphId, LabelSet, NodeId, PropertyMap, Value};
14
15use crate::adjacency::AdjacencyEntry;
16use crate::composite_typed_index::CompositeTypedIndex;
17use crate::graph_types::GraphTypeDef;
18use crate::id_map::{EngineIdMap, engine_id_map};
19use crate::store::{EdgeStore, NodeStore, RowIndex};
20use crate::text_index::TextIndex;
21use crate::typed_index::{TypedIndex, TypedIndexKind};
22use crate::vector_index::VectorIndex;
23
24mod index_entries;
25
26pub use index_entries::{
27    CompositePropertyIndexEntry, CompositePropertyIndexEntryRow, PropertyIndexEntry,
28    TextIndexEntry, TextIndexEntryRow, VectorIndexEntry, VectorIndexEntryRow,
29};
30
31/// Snapshot metadata.
32#[derive(
33    Clone,
34    Debug,
35    Deserialize,
36    PartialEq,
37    rkyv::Archive,
38    rkyv::Deserialize,
39    rkyv::Serialize,
40    Serialize,
41)]
42pub struct GraphMeta {
43    /// Graph identifier.
44    pub graph_id: GraphId,
45    /// Published generation counter.
46    pub generation: u64,
47    /// Next node ID to allocate.
48    pub next_node_id: u64,
49    /// Next edge ID to allocate.
50    pub next_edge_id: u64,
51    /// Bound closed graph type. `None` means GG01/open graph.
52    pub bound_type: Option<Arc<GraphTypeDef>>,
53}
54
55/// Immutable graph snapshot.
56#[derive(Clone, Debug)]
57pub struct SeleneGraph {
58    /// Snapshot metadata.
59    pub meta: GraphMeta,
60    /// Node storage.
61    pub node_store: NodeStore,
62    /// Edge storage.
63    pub edge_store: EdgeStore,
64    /// Outgoing adjacency keyed by source node.
65    pub adjacency_out: EngineIdMap<NodeId, AdjacencyEntry>,
66    /// Incoming adjacency keyed by target node.
67    pub adjacency_in: EngineIdMap<NodeId, AdjacencyEntry>,
68    /// Bitmap of node rows carrying each label.
69    pub idx_label: HashMap<DbString, RoaringBitmap>,
70    /// Bitmap of edge rows carrying each edge label.
71    pub idx_edge_label: HashMap<DbString, RoaringBitmap>,
72    /// Per-`(label, property)` node value indexes. See spec 03 section 5.2.
73    pub property_index: FxHashMap<(DbString, DbString), PropertyIndexEntry>,
74    /// Per-`(edge label, property)` edge value indexes.
75    pub edge_property_index: FxHashMap<(DbString, DbString), PropertyIndexEntry>,
76    /// Per-`(label, properties...)` node composite value indexes.
77    pub composite_property_index:
78        FxHashMap<(DbString, SmallVec<[DbString; 4]>), CompositePropertyIndexEntry>,
79    /// Per-`(label, property)` node vector indexes.
80    pub vector_index: FxHashMap<(DbString, DbString), VectorIndexEntry>,
81    /// Per-`(label, property)` node BM25 text indexes.
82    pub text_index: FxHashMap<(DbString, DbString), TextIndexEntry>,
83    /// External `NodeId -> RowIndex` lookup (the inverse of
84    /// [`NodeStore::row_to_id`]). Replaces the `id.get() - 1` arithmetic so the
85    /// external id can stay stable while the row is remapped by compaction
86    /// (D22 / BRIEF-Item-4a). `imbl` for cheap copy-on-write snapshot clones.
87    pub node_id_to_row: EngineIdMap<NodeId, RowIndex>,
88    /// External `EdgeId -> RowIndex` lookup (inverse of [`EdgeStore::row_to_id`]).
89    pub edge_id_to_row: EngineIdMap<EdgeId, RowIndex>,
90}
91
92impl SeleneGraph {
93    /// Construct an empty graph snapshot.
94    #[must_use]
95    pub fn new(graph_id: GraphId) -> Self {
96        Self {
97            meta: GraphMeta {
98                graph_id,
99                generation: 0,
100                next_node_id: 1,
101                next_edge_id: 1,
102                bound_type: None,
103            },
104            node_store: NodeStore::new(),
105            edge_store: EdgeStore::new(),
106            adjacency_out: engine_id_map(),
107            adjacency_in: engine_id_map(),
108            idx_label: HashMap::new(),
109            idx_edge_label: HashMap::new(),
110            property_index: FxHashMap::default(),
111            edge_property_index: FxHashMap::default(),
112            composite_property_index: FxHashMap::default(),
113            vector_index: FxHashMap::default(),
114            text_index: FxHashMap::default(),
115            node_id_to_row: engine_id_map(),
116            edge_id_to_row: engine_id_map(),
117        }
118    }
119
120    /// Return this graph snapshot's stable graph ID.
121    #[must_use]
122    pub const fn graph_id(&self) -> GraphId {
123        self.meta.graph_id
124    }
125
126    /// Number of alive nodes.
127    #[must_use]
128    pub fn node_count(&self) -> usize {
129        self.node_store.alive.len() as usize
130    }
131
132    /// Bitmap of alive node *row indices*.
133    ///
134    /// Returned bitmap is row-indexed (matching `nodes_with_label`), not
135    /// `NodeId`-indexed; consumers convert a row to its external `NodeId` via
136    /// [`Self::node_id_for_row`] (never by `row + 1` arithmetic — the external id
137    /// is stable while compaction renumbers the row). Used by `selene-algorithms`
138    /// to seed the "all alive nodes" baseline of a `GraphProjection`.
139    #[must_use]
140    pub fn live_nodes(&self) -> &RoaringBitmap {
141        // B1: alive is Arc-shared COW state; expose the bitmap, not the Arc,
142        // so the crate boundary (selene-algorithms) is unchanged.
143        &self.node_store.alive
144    }
145
146    /// Number of alive edges.
147    #[must_use]
148    pub fn edge_count(&self) -> usize {
149        self.edge_store.alive.len() as usize
150    }
151
152    /// Return current row-space pressure for compaction planning.
153    ///
154    /// This is a cheap read over store lengths and liveness bitmaps; it does not
155    /// rebuild indexes or allocate a dense graph.
156    #[must_use]
157    pub fn compaction_stats(&self) -> crate::compaction::CompactionStats {
158        crate::compaction::CompactionStats::from_graph(self)
159    }
160
161    /// Bitmap of alive edge *row indices*.
162    ///
163    /// The edge-side sibling of [`Self::live_nodes`]. The returned bitmap is
164    /// row-indexed (matching `edges_with_label`), not `EdgeId`-indexed; consumers
165    /// convert a row to its external `EdgeId` via [`Self::edge_id_for_row`] (never
166    /// by `row + 1` arithmetic). Covers every alive edge regardless of label —
167    /// used by the `DROP GRAPH` factory-reset (BRIEF-152) to enumerate every live
168    /// edge, including untyped/arbitrary-label ones that a per-type truncate would
169    /// miss.
170    #[must_use]
171    pub fn live_edges(&self) -> &RoaringBitmap {
172        // B1: see `live_nodes` — deref the COW Arc at the boundary.
173        &self.edge_store.alive
174    }
175
176    /// Map an external [`NodeId`] to its internal [`RowIndex`].
177    ///
178    /// Returns `None` for a never-committed (aborted-tx hole) id. A deleted id
179    /// still resolves — to its now-dead row — so liveness, not existence,
180    /// distinguishes it (the row's `alive` bit is clear). This is the map-backed
181    /// replacement for the old `id - 1` arithmetic; the external id stays stable
182    /// while BRIEF-Item-4b compaction renumbers the row.
183    #[must_use]
184    pub fn row_for_node_id(&self, id: NodeId) -> Option<RowIndex> {
185        self.node_id_to_row.get(&id).copied()
186    }
187
188    /// Map an external [`EdgeId`] to its internal [`RowIndex`]; see
189    /// [`Self::row_for_node_id`].
190    #[must_use]
191    pub fn row_for_edge_id(&self, id: EdgeId) -> Option<RowIndex> {
192        self.edge_id_to_row.get(&id).copied()
193    }
194
195    /// Recover the external [`NodeId`] bound to a materialized [`RowIndex`].
196    ///
197    /// Reads the `row_to_id` column (the persistence-stable per-row id), never
198    /// synthesizing `row + 1`. Returns `None` past the column end or for a
199    /// never-committed hole row (which holds [`NodeId::TOMBSTONE`]).
200    #[must_use]
201    pub fn node_id_for_row(&self, row: RowIndex) -> Option<NodeId> {
202        self.node_store
203            .row_to_id
204            .get(row.get() as usize)
205            .copied()
206            .filter(|id| *id != NodeId::TOMBSTONE)
207    }
208
209    /// Recover the external [`EdgeId`] bound to a materialized [`RowIndex`]; see
210    /// [`Self::node_id_for_row`].
211    #[must_use]
212    pub fn edge_id_for_row(&self, row: RowIndex) -> Option<EdgeId> {
213        self.edge_store
214            .row_to_id
215            .get(row.get() as usize)
216            .copied()
217            .filter(|id| *id != EdgeId::TOMBSTONE)
218    }
219
220    /// Return true when `id` names an alive node.
221    #[must_use]
222    pub fn is_node_alive(&self, id: NodeId) -> bool {
223        self.live_node_row(id).is_some()
224    }
225
226    /// Return true when `id` names an alive edge.
227    #[must_use]
228    pub fn is_edge_alive(&self, id: EdgeId) -> bool {
229        self.live_edge_row(id).is_some()
230    }
231
232    /// Return node labels for an alive node.
233    #[must_use]
234    pub fn node_labels(&self, id: NodeId) -> Option<&LabelSet> {
235        self.live_node_row(id)
236            .and_then(|row| self.node_store.labels.get(row))
237    }
238
239    /// Return node properties for an alive node.
240    #[must_use]
241    pub fn node_properties(&self, id: NodeId) -> Option<&PropertyMap> {
242        self.live_node_row(id)
243            .and_then(|row| self.node_store.properties.get(row))
244    }
245
246    /// Return edge label for an alive edge.
247    #[must_use]
248    pub fn edge_label(&self, id: EdgeId) -> Option<&DbString> {
249        self.live_edge_row(id)
250            .and_then(|row| self.edge_store.label.get(row))
251    }
252
253    /// Return edge endpoints for an alive edge.
254    #[must_use]
255    pub fn edge_endpoints(&self, id: EdgeId) -> Option<(NodeId, NodeId)> {
256        self.live_edge_row(id).and_then(|row| {
257            Some((
258                *self.edge_store.source.get(row)?,
259                *self.edge_store.target.get(row)?,
260            ))
261        })
262    }
263
264    /// Return edge properties for an alive edge.
265    #[must_use]
266    pub fn edge_properties(&self, id: EdgeId) -> Option<&PropertyMap> {
267        self.live_edge_row(id)
268            .and_then(|row| self.edge_store.properties.get(row))
269    }
270
271    /// Return outgoing adjacency for `source`.
272    #[must_use]
273    pub fn outgoing_edges(&self, source: NodeId) -> Option<&AdjacencyEntry> {
274        self.adjacency_out.get(&source)
275    }
276
277    /// Return incoming adjacency for `target`.
278    #[must_use]
279    pub fn incoming_edges(&self, target: NodeId) -> Option<&AdjacencyEntry> {
280        self.adjacency_in.get(&target)
281    }
282
283    /// Return true when an alive node has at least one incident edge.
284    #[must_use]
285    pub fn node_has_incident_edges(&self, id: NodeId) -> bool {
286        self.outgoing_edges(id)
287            .is_some_and(|entry| !entry.is_empty())
288            || self
289                .incoming_edges(id)
290                .is_some_and(|entry| !entry.is_empty())
291    }
292
293    /// Return the bitmap of node rows carrying `label`.
294    #[must_use]
295    pub fn nodes_with_label(&self, label: &DbString) -> Option<&RoaringBitmap> {
296        self.idx_label.get(label)
297    }
298
299    /// Return the bitmap of edge rows carrying `label`.
300    #[must_use]
301    pub fn edges_with_label(&self, label: &DbString) -> Option<&RoaringBitmap> {
302        self.idx_edge_label.get(label)
303    }
304
305    /// Number of distinct node labels currently indexed.
306    #[must_use]
307    pub fn label_count(&self) -> usize {
308        self.idx_label.len()
309    }
310
311    /// Number of distinct edge labels currently indexed.
312    #[must_use]
313    pub fn edge_label_count(&self) -> usize {
314        self.idx_edge_label.len()
315    }
316
317    /// Return a clone of the registered `(label, property)` index.
318    #[must_use]
319    pub fn property_index_for(
320        &self,
321        label: &DbString,
322        property: &DbString,
323    ) -> Option<Arc<TypedIndex>> {
324        self.property_index
325            .get(&(label.clone(), property.clone()))
326            .map(|entry| Arc::clone(&entry.index))
327    }
328
329    /// Return a clone of the registered edge `(label, property)` index.
330    #[must_use]
331    pub fn edge_property_index_for(
332        &self,
333        label: &DbString,
334        property: &DbString,
335    ) -> Option<Arc<TypedIndex>> {
336        self.edge_property_index
337            .get(&(label.clone(), property.clone()))
338            .map(|entry| Arc::clone(&entry.index))
339    }
340
341    /// Return a clone of the registered composite index.
342    #[must_use]
343    pub fn composite_property_index_for(
344        &self,
345        label: &DbString,
346        properties: &[DbString],
347    ) -> Option<Arc<CompositeTypedIndex>> {
348        self.composite_property_index_entry_for(label, properties)
349            .map(|entry| Arc::clone(&entry.index))
350    }
351
352    /// Return composite index metadata for a property set.
353    #[must_use]
354    pub fn composite_property_index_entry_for(
355        &self,
356        label: &DbString,
357        properties: &[DbString],
358    ) -> Option<&CompositePropertyIndexEntry> {
359        let key = composite_property_key(properties);
360        self.composite_property_index.get(&(label.clone(), key))
361    }
362
363    /// Return a clone of the registered vector index.
364    #[must_use]
365    pub fn vector_index_for(
366        &self,
367        label: &DbString,
368        property: &DbString,
369    ) -> Option<Arc<VectorIndex>> {
370        self.vector_index
371            .get(&(label.clone(), property.clone()))
372            .map(|entry| Arc::clone(&entry.index))
373    }
374
375    /// Return a clone of the registered text index.
376    #[must_use]
377    pub fn text_index_for(&self, label: &DbString, property: &DbString) -> Option<Arc<TextIndex>> {
378        self.text_index
379            .get(&(label.clone(), property.clone()))
380            .map(|entry| Arc::clone(&entry.index))
381    }
382
383    /// Number of distinct `(label, property)` indexes currently registered.
384    #[must_use]
385    pub fn property_index_count(&self) -> usize {
386        self.property_index.len()
387    }
388
389    /// Number of registered edge property indexes.
390    #[must_use]
391    pub fn edge_property_index_count(&self) -> usize {
392        self.edge_property_index.len()
393    }
394
395    /// Number of distinct `(label, properties...)` indexes currently registered.
396    #[must_use]
397    pub fn composite_property_index_count(&self) -> usize {
398        self.composite_property_index.len()
399    }
400
401    /// Number of distinct `(label, property)` vector indexes currently registered.
402    #[must_use]
403    pub fn vector_index_count(&self) -> usize {
404        self.vector_index.len()
405    }
406
407    /// Number of distinct `(label, property)` text indexes currently registered.
408    #[must_use]
409    pub fn text_index_count(&self) -> usize {
410        self.text_index.len()
411    }
412
413    /// Iterate built-in property indexes as owned `(label, property, kind)` tuples.
414    ///
415    /// This covers only SeleneGraph's built-in property indexes.
416    /// Extension-provider index state is surfaced through that provider's own
417    /// procedures.
418    pub fn iter_property_indexes(
419        &self,
420    ) -> impl Iterator<Item = (DbString, DbString, TypedIndexKind)> + '_ {
421        self.property_index
422            .iter()
423            .map(|((label, property), entry)| (label.clone(), property.clone(), entry.kind()))
424    }
425
426    /// Iterate built-in property indexes with optional explicit catalog names.
427    pub fn iter_property_index_entries(
428        &self,
429    ) -> impl Iterator<Item = (DbString, DbString, TypedIndexKind, Option<DbString>)> + '_ {
430        self.property_index
431            .iter()
432            .map(|((label, property), entry)| {
433                (
434                    label.clone(),
435                    property.clone(),
436                    entry.kind(),
437                    entry.name.clone(),
438                )
439            })
440    }
441
442    /// Iterate built-in composite property indexes with optional explicit catalog names.
443    pub fn iter_composite_property_index_entries(
444        &self,
445    ) -> impl Iterator<Item = CompositePropertyIndexEntryRow> + '_ {
446        self.composite_property_index
447            .iter()
448            .map(|((label, _), entry)| {
449                (
450                    label.clone(),
451                    entry.declared_properties.clone(),
452                    entry.kinds(),
453                    entry.name.clone(),
454                )
455            })
456    }
457
458    /// Iterate built-in vector indexes with optional explicit catalog names.
459    pub fn iter_vector_index_entries(&self) -> impl Iterator<Item = VectorIndexEntryRow> + '_ {
460        self.vector_index.iter().map(|((label, property), entry)| {
461            (
462                label.clone(),
463                property.clone(),
464                entry.kind(),
465                entry.dimension(),
466                entry.hnsw_config(),
467                entry.ivf_config(),
468                entry.name.clone(),
469            )
470        })
471    }
472
473    /// Iterate built-in text indexes with optional explicit catalog names.
474    pub fn iter_text_index_entries(&self) -> impl Iterator<Item = TextIndexEntryRow> + '_ {
475        self.text_index.iter().map(|((label, property), entry)| {
476            (
477                label.clone(),
478                property.clone(),
479                entry.stats(),
480                entry.memory_usage(),
481                entry.name.clone(),
482            )
483        })
484    }
485
486    /// Return rows matching `value` under a registered property index.
487    ///
488    /// `None` means no index is registered for `(label, property)` or the
489    /// supplied value cannot be used with that index kind. `Some(empty)` means
490    /// the index exists but no row matches. A kind-mismatched probe returns
491    /// `None` so the caller drops to a linear scan; open-graph kind drift
492    /// remains discoverable via cross-variant `value_compare`.
493    #[must_use]
494    pub fn nodes_with_property_eq(
495        &self,
496        label: &DbString,
497        property: &DbString,
498        value: &Value,
499    ) -> Option<Cow<'_, RoaringBitmap>> {
500        self.property_index
501            .get(&(label.clone(), property.clone()))
502            .and_then(|entry| entry.index.lookup_eq(value))
503    }
504
505    /// Return the union of node rows matching any indexed scalar value.
506    ///
507    /// `None` means no node property index is registered for `(label, property)`
508    /// or at least one supplied value cannot be used with that index kind.
509    /// `Some(empty)` means the index exists but no row matches the value set.
510    #[must_use]
511    pub fn nodes_with_property_any(
512        &self,
513        label: &DbString,
514        property: &DbString,
515        values: &[Value],
516    ) -> Option<RoaringBitmap> {
517        let entry = self
518            .property_index
519            .get(&(label.clone(), property.clone()))?;
520        let mut rows = RoaringBitmap::new();
521        for value in values {
522            rows |= entry.index.lookup_eq(value)?.as_ref();
523        }
524        Some(rows)
525    }
526
527    /// Return edge rows matching `value` under a registered edge property index.
528    ///
529    /// `None` means no edge index is registered for `(label, property)` or the
530    /// supplied value cannot be used with that index kind. `Some(empty)` means
531    /// the index exists but no edge row matches.
532    #[must_use]
533    pub fn edges_with_property_eq(
534        &self,
535        label: &DbString,
536        property: &DbString,
537        value: &Value,
538    ) -> Option<Cow<'_, RoaringBitmap>> {
539        self.edge_property_index
540            .get(&(label.clone(), property.clone()))
541            .and_then(|entry| entry.index.lookup_eq(value))
542    }
543
544    /// Return the union of edge rows matching any indexed scalar value.
545    ///
546    /// `None` means no edge property index is registered for `(label,
547    /// property)` or at least one supplied value cannot be used with that index
548    /// kind. `Some(empty)` means the index exists but no row matches the value
549    /// set.
550    #[must_use]
551    pub fn edges_with_property_any(
552        &self,
553        label: &DbString,
554        property: &DbString,
555        values: &[Value],
556    ) -> Option<RoaringBitmap> {
557        let entry = self
558            .edge_property_index
559            .get(&(label.clone(), property.clone()))?;
560        let mut rows = RoaringBitmap::new();
561        for value in values {
562            rows |= entry.index.lookup_eq(value)?.as_ref();
563        }
564        Some(rows)
565    }
566
567    /// Return rows matching `range` under a registered property index.
568    ///
569    /// `None` means no index is registered or the supplied bounds do not match
570    /// the index kind. `Some(empty)` means the index exists but the range
571    /// matches no rows.
572    #[must_use]
573    pub fn nodes_with_property_range<R>(
574        &self,
575        label: &DbString,
576        property: &DbString,
577        range: R,
578    ) -> Option<RoaringBitmap>
579    where
580        R: RangeBounds<Value>,
581    {
582        self.property_index
583            .get(&(label.clone(), property.clone()))
584            .and_then(|entry| entry.index.lookup_range(range))
585    }
586
587    /// Return edge rows matching `range` under a registered edge property index.
588    ///
589    /// `None` means no edge index is registered or the supplied bounds do not
590    /// match the index kind. `Some(empty)` means the index exists but no edge
591    /// row matches.
592    #[must_use]
593    pub fn edges_with_property_range<R>(
594        &self,
595        label: &DbString,
596        property: &DbString,
597        range: R,
598    ) -> Option<RoaringBitmap>
599    where
600        R: RangeBounds<Value>,
601    {
602        self.edge_property_index
603            .get(&(label.clone(), property.clone()))
604            .and_then(|entry| entry.index.lookup_range(range))
605    }
606
607    /// Return rows whose string property key starts with `prefix`.
608    ///
609    /// `None` means no index is registered or the registered index is not a
610    /// string index.
611    #[must_use]
612    pub fn nodes_with_property_prefix(
613        &self,
614        label: &DbString,
615        property: &DbString,
616        prefix: &str,
617    ) -> Option<RoaringBitmap> {
618        self.property_index
619            .get(&(label.clone(), property.clone()))
620            .and_then(|entry| entry.index.lookup_prefix(prefix))
621    }
622
623    fn live_node_row(&self, id: NodeId) -> Option<usize> {
624        let row = self.row_for_node_id(id)?.get();
625        ((row as usize) < self.node_store.len() && self.node_store.is_alive(row))
626            .then_some(row as usize)
627    }
628
629    fn live_edge_row(&self, id: EdgeId) -> Option<usize> {
630        let row = self.row_for_edge_id(id)?.get();
631        ((row as usize) < self.edge_store.len() && self.edge_store.is_alive(row))
632            .then_some(row as usize)
633    }
634}
635
636pub(crate) fn composite_property_key(properties: &[DbString]) -> SmallVec<[DbString; 4]> {
637    let mut key: SmallVec<[DbString; 4]> = properties.iter().cloned().collect();
638    key.sort();
639    key
640}
641
642#[cfg(test)]
643mod tests;