Skip to main content

selene_graph/
typed_index.rs

1//! Built-in per-`(label, property)` value index. See spec 03 section 5.2.
2//!
3//! # Value coercion
4//!
5//! `typed_key` is the single `Value`→`TypedKey` coercion shared by write, read,
6//! and diff callers. Kind-mismatched reads return `None` so callers can scan.
7//!
8//! The same collapse is mirrored in [`crate::composite_typed_index`] for
9//! composite indexes.
10
11use std::collections::BTreeMap;
12
13use roaring::RoaringBitmap;
14use selene_core::{DbString, DurationOrderKey, Value};
15use serde::{Deserialize, Serialize};
16
17mod keying;
18mod lookup;
19
20pub(crate) use keying::{TypedIndexValueError, observed_value_kind};
21use keying::{TypedKey, typed_key};
22
23pub use crate::typed_float_key::{NotNanError, NotNanF32, NotNanF64};
24
25/// Indexable value kind for built-in node property indexes.
26#[derive(
27    Clone,
28    Copy,
29    Debug,
30    Deserialize,
31    Eq,
32    Hash,
33    PartialEq,
34    rkyv::Archive,
35    rkyv::Deserialize,
36    rkyv::Serialize,
37    Serialize,
38)]
39pub enum TypedIndexKind {
40    /// Boolean value. Backs [`Value::Bool`].
41    Bool,
42    /// Signed 64-bit integer. Backs [`Value::Int`].
43    I64,
44    /// Unsigned 64-bit integer. Backs [`Value::Uint`].
45    U64,
46    /// Signed 128-bit integer. Backs [`Value::Int128`].
47    I128,
48    /// Unsigned 128-bit integer. Backs [`Value::Uint128`].
49    U128,
50    /// Fixed-precision decimal. Backs [`Value::Decimal`].
51    Decimal,
52    /// Finite `f32`. Backs [`Value::Float32`]; NaN is rejected.
53    F32,
54    /// Finite `f64`. Backs [`Value::Float`]; NaN is rejected.
55    F64,
56    /// Database string. Backs [`Value::String`].
57    String,
58    /// Civil date. Backs [`Value::Date`].
59    Date,
60    /// Civil local date-time. Backs [`Value::LocalDateTime`].
61    LocalDateTime,
62    /// Zoned date-time. Backs [`Value::ZonedDateTime`].
63    ZonedDateTime,
64    /// Civil local time. Backs [`Value::LocalTime`].
65    LocalTime,
66    /// Zoned time. Backs [`Value::ZonedTime`].
67    ZonedTime,
68    /// Duration. Backs [`Value::Duration`].
69    Duration,
70    /// UUID. Backs [`Value::Uuid`].
71    Uuid,
72}
73
74/// Built-in per-`(label, property)` node value index.
75#[derive(Clone, Debug)]
76pub enum TypedIndex {
77    /// Boolean index.
78    Bool(BTreeMap<bool, RoaringBitmap>),
79    /// Signed integer index.
80    I64(BTreeMap<i64, RoaringBitmap>),
81    /// Unsigned integer index.
82    U64(BTreeMap<u64, RoaringBitmap>),
83    /// Signed 128-bit integer index.
84    I128(BTreeMap<i128, RoaringBitmap>),
85    /// Unsigned 128-bit integer index.
86    U128(BTreeMap<u128, RoaringBitmap>),
87    /// Fixed-precision decimal index.
88    Decimal(BTreeMap<rust_decimal::Decimal, RoaringBitmap>),
89    /// 32-bit floating-point index with NaN excluded.
90    F32(BTreeMap<NotNanF32, RoaringBitmap>),
91    /// Floating-point index with NaN excluded.
92    F64(BTreeMap<NotNanF64, RoaringBitmap>),
93    /// Database-string index.
94    String(BTreeMap<DbString, RoaringBitmap>),
95    /// Civil date index.
96    Date(BTreeMap<jiff::civil::Date, RoaringBitmap>),
97    /// Civil local date-time index.
98    LocalDateTime(BTreeMap<jiff::civil::DateTime, RoaringBitmap>),
99    /// Zoned date-time index.
100    ZonedDateTime(BTreeMap<jiff::Zoned, RoaringBitmap>),
101    /// Civil local time index.
102    LocalTime(BTreeMap<jiff::civil::Time, RoaringBitmap>),
103    /// Zoned time index.
104    ZonedTime(BTreeMap<jiff::Zoned, RoaringBitmap>),
105    /// Duration index.
106    Duration(BTreeMap<DurationOrderKey, RoaringBitmap>),
107    /// UUID index.
108    Uuid(BTreeMap<uuid::Uuid, RoaringBitmap>),
109}
110
111impl TypedIndex {
112    /// Construct an empty index of `kind`.
113    #[must_use]
114    pub fn new(kind: TypedIndexKind) -> Self {
115        match kind {
116            TypedIndexKind::Bool => Self::Bool(BTreeMap::new()),
117            TypedIndexKind::I64 => Self::I64(BTreeMap::new()),
118            TypedIndexKind::U64 => Self::U64(BTreeMap::new()),
119            TypedIndexKind::I128 => Self::I128(BTreeMap::new()),
120            TypedIndexKind::U128 => Self::U128(BTreeMap::new()),
121            TypedIndexKind::Decimal => Self::Decimal(BTreeMap::new()),
122            TypedIndexKind::F32 => Self::F32(BTreeMap::new()),
123            TypedIndexKind::F64 => Self::F64(BTreeMap::new()),
124            TypedIndexKind::String => Self::String(BTreeMap::new()),
125            TypedIndexKind::Date => Self::Date(BTreeMap::new()),
126            TypedIndexKind::LocalDateTime => Self::LocalDateTime(BTreeMap::new()),
127            TypedIndexKind::ZonedDateTime => Self::ZonedDateTime(BTreeMap::new()),
128            TypedIndexKind::LocalTime => Self::LocalTime(BTreeMap::new()),
129            TypedIndexKind::ZonedTime => Self::ZonedTime(BTreeMap::new()),
130            TypedIndexKind::Duration => Self::Duration(BTreeMap::new()),
131            TypedIndexKind::Uuid => Self::Uuid(BTreeMap::new()),
132        }
133    }
134
135    /// Return the value kind indexed by this index.
136    #[must_use]
137    pub const fn kind(&self) -> TypedIndexKind {
138        match self {
139            Self::Bool(_) => TypedIndexKind::Bool,
140            Self::I64(_) => TypedIndexKind::I64,
141            Self::U64(_) => TypedIndexKind::U64,
142            Self::I128(_) => TypedIndexKind::I128,
143            Self::U128(_) => TypedIndexKind::U128,
144            Self::Decimal(_) => TypedIndexKind::Decimal,
145            Self::F32(_) => TypedIndexKind::F32,
146            Self::F64(_) => TypedIndexKind::F64,
147            Self::String(_) => TypedIndexKind::String,
148            Self::Date(_) => TypedIndexKind::Date,
149            Self::LocalDateTime(_) => TypedIndexKind::LocalDateTime,
150            Self::ZonedDateTime(_) => TypedIndexKind::ZonedDateTime,
151            Self::LocalTime(_) => TypedIndexKind::LocalTime,
152            Self::ZonedTime(_) => TypedIndexKind::ZonedTime,
153            Self::Duration(_) => TypedIndexKind::Duration,
154            Self::Uuid(_) => TypedIndexKind::Uuid,
155        }
156    }
157
158    /// Return total row cardinality across all indexed keys.
159    ///
160    /// This is the sum of every bucket's row count, NOT the number of distinct
161    /// keys. For the distinct-key count (e.g. to estimate an average bucket size
162    /// `cardinality / distinct_keys` for parameter-equality cost estimation) use
163    /// [`TypedIndex::distinct_keys`].
164    #[must_use]
165    pub fn cardinality(&self) -> u64 {
166        match self {
167            Self::Bool(index) => cardinality(index),
168            Self::I64(index) => cardinality(index),
169            Self::U64(index) => cardinality(index),
170            Self::I128(index) => cardinality(index),
171            Self::U128(index) => cardinality(index),
172            Self::Decimal(index) => cardinality(index),
173            Self::F32(index) => cardinality(index),
174            Self::F64(index) => cardinality(index),
175            Self::String(index) => cardinality(index),
176            Self::Date(index) => cardinality(index),
177            Self::LocalDateTime(index) => cardinality(index),
178            Self::ZonedDateTime(index) => cardinality(index),
179            Self::LocalTime(index) => cardinality(index),
180            Self::ZonedTime(index) => cardinality(index),
181            Self::Duration(index) => cardinality(index),
182            Self::Uuid(index) => cardinality(index),
183        }
184    }
185
186    /// Return the number of distinct indexed keys (BTreeMap entry count).
187    ///
188    /// Unlike [`TypedIndex::cardinality`] (total rows), this is the number of
189    /// distinct values present in the index. The optimizer cost model divides
190    /// `cardinality / distinct_keys` to estimate the expected rows returned by a
191    /// parameter-equality probe whose value is unknown at plan time. Returns `0`
192    /// for an empty index.
193    #[must_use]
194    pub fn distinct_keys(&self) -> u64 {
195        match self {
196            Self::Bool(index) => index.len() as u64,
197            Self::I64(index) => index.len() as u64,
198            Self::U64(index) => index.len() as u64,
199            Self::I128(index) => index.len() as u64,
200            Self::U128(index) => index.len() as u64,
201            Self::Decimal(index) => index.len() as u64,
202            Self::F32(index) => index.len() as u64,
203            Self::F64(index) => index.len() as u64,
204            Self::String(index) => index.len() as u64,
205            Self::Date(index) => index.len() as u64,
206            Self::LocalDateTime(index) => index.len() as u64,
207            Self::ZonedDateTime(index) => index.len() as u64,
208            Self::LocalTime(index) => index.len() as u64,
209            Self::ZonedTime(index) => index.len() as u64,
210            Self::Duration(index) => index.len() as u64,
211            Self::Uuid(index) => index.len() as u64,
212        }
213    }
214
215    /// Return true when this index holds exactly the same `(key -> rows)`
216    /// buckets as `reference`.
217    ///
218    /// Used by the debug-only structural consistency net
219    /// ([`crate::SeleneGraph::assert_indexes_consistent`]) to compare the
220    /// commit-path-maintained index against a freshly re-derived reference
221    /// built with the same lenient admission policy. Two indexes are equal
222    /// only when their kinds match and every bucket maps to an identical
223    /// row bitmap; a missing key, an extra key, or a differing bitmap all
224    /// fail the comparison.
225    #[must_use]
226    pub(crate) fn buckets_eq(&self, reference: &Self) -> bool {
227        match (self, reference) {
228            (Self::Bool(lhs), Self::Bool(rhs)) => lhs == rhs,
229            (Self::I64(lhs), Self::I64(rhs)) => lhs == rhs,
230            (Self::U64(lhs), Self::U64(rhs)) => lhs == rhs,
231            (Self::I128(lhs), Self::I128(rhs)) => lhs == rhs,
232            (Self::U128(lhs), Self::U128(rhs)) => lhs == rhs,
233            (Self::Decimal(lhs), Self::Decimal(rhs)) => lhs == rhs,
234            (Self::F32(lhs), Self::F32(rhs)) => lhs == rhs,
235            (Self::F64(lhs), Self::F64(rhs)) => lhs == rhs,
236            (Self::String(lhs), Self::String(rhs)) => lhs == rhs,
237            (Self::Date(lhs), Self::Date(rhs)) => lhs == rhs,
238            (Self::LocalDateTime(lhs), Self::LocalDateTime(rhs)) => lhs == rhs,
239            (Self::ZonedDateTime(lhs), Self::ZonedDateTime(rhs)) => lhs == rhs,
240            (Self::LocalTime(lhs), Self::LocalTime(rhs)) => lhs == rhs,
241            (Self::ZonedTime(lhs), Self::ZonedTime(rhs)) => lhs == rhs,
242            (Self::Duration(lhs), Self::Duration(rhs)) => lhs == rhs,
243            (Self::Uuid(lhs), Self::Uuid(rhs)) => lhs == rhs,
244            _ => false,
245        }
246    }
247
248    /// Return true when any indexed key maps to an empty row bitmap.
249    ///
250    /// Commit-path maintenance prunes a bucket when its bitmap empties
251    /// (see `remove_row`), so a present-but-empty bucket is a maintenance
252    /// leak the debug-only consistency net flags.
253    #[must_use]
254    pub(crate) fn has_empty_bucket(&self) -> bool {
255        match self {
256            Self::Bool(index) => index.values().any(RoaringBitmap::is_empty),
257            Self::I64(index) => index.values().any(RoaringBitmap::is_empty),
258            Self::U64(index) => index.values().any(RoaringBitmap::is_empty),
259            Self::I128(index) => index.values().any(RoaringBitmap::is_empty),
260            Self::U128(index) => index.values().any(RoaringBitmap::is_empty),
261            Self::Decimal(index) => index.values().any(RoaringBitmap::is_empty),
262            Self::F32(index) => index.values().any(RoaringBitmap::is_empty),
263            Self::F64(index) => index.values().any(RoaringBitmap::is_empty),
264            Self::String(index) => index.values().any(RoaringBitmap::is_empty),
265            Self::Date(index) => index.values().any(RoaringBitmap::is_empty),
266            Self::LocalDateTime(index) => index.values().any(RoaringBitmap::is_empty),
267            Self::ZonedDateTime(index) => index.values().any(RoaringBitmap::is_empty),
268            Self::LocalTime(index) => index.values().any(RoaringBitmap::is_empty),
269            Self::ZonedTime(index) => index.values().any(RoaringBitmap::is_empty),
270            Self::Duration(index) => index.values().any(RoaringBitmap::is_empty),
271            Self::Uuid(index) => index.values().any(RoaringBitmap::is_empty),
272        }
273    }
274
275    /// Insert `row` into the bitmap for `value`.
276    pub(crate) fn insert(&mut self, value: &Value, row: u32) -> Result<(), TypedIndexValueError> {
277        let expected_kind = self.kind();
278        match (self, typed_key(value, expected_kind)?) {
279            (Self::Bool(index), TypedKey::Bool(key)) => {
280                index.entry(key).or_default().insert(row);
281                Ok(())
282            }
283            (Self::I64(index), TypedKey::I64(key)) => {
284                index.entry(key).or_default().insert(row);
285                Ok(())
286            }
287            (Self::U64(index), TypedKey::U64(key)) => {
288                index.entry(key).or_default().insert(row);
289                Ok(())
290            }
291            (Self::I128(index), TypedKey::I128(key)) => {
292                index.entry(key).or_default().insert(row);
293                Ok(())
294            }
295            (Self::U128(index), TypedKey::U128(key)) => {
296                index.entry(key).or_default().insert(row);
297                Ok(())
298            }
299            (Self::Decimal(index), TypedKey::Decimal(key)) => {
300                index.entry(key).or_default().insert(row);
301                Ok(())
302            }
303            (Self::F32(index), TypedKey::F32(key)) => {
304                index.entry(key).or_default().insert(row);
305                Ok(())
306            }
307            (Self::F64(index), TypedKey::F64(key)) => {
308                index.entry(key).or_default().insert(row);
309                Ok(())
310            }
311            (Self::String(index), TypedKey::String(key)) => {
312                index.entry(key).or_default().insert(row);
313                Ok(())
314            }
315            (Self::Date(index), TypedKey::Date(key)) => {
316                index.entry(key).or_default().insert(row);
317                Ok(())
318            }
319            (Self::LocalDateTime(index), TypedKey::LocalDateTime(key)) => {
320                index.entry(key).or_default().insert(row);
321                Ok(())
322            }
323            (Self::ZonedDateTime(index), TypedKey::ZonedDateTime(key)) => {
324                index.entry(key).or_default().insert(row);
325                Ok(())
326            }
327            (Self::LocalTime(index), TypedKey::LocalTime(key)) => {
328                index.entry(key).or_default().insert(row);
329                Ok(())
330            }
331            (Self::ZonedTime(index), TypedKey::ZonedTime(key)) => {
332                index.entry(key).or_default().insert(row);
333                Ok(())
334            }
335            (Self::Duration(index), TypedKey::Duration(key)) => {
336                index.entry(key).or_default().insert(row);
337                Ok(())
338            }
339            (Self::Uuid(index), TypedKey::Uuid(key)) => {
340                index.entry(key).or_default().insert(row);
341                Ok(())
342            }
343            (index, key) => Err(TypedIndexValueError::KindMismatch {
344                expected_kind: index.kind(),
345                observed: key.observed(),
346            }),
347        }
348    }
349
350    /// Remove `row` from the bitmap for `value`.
351    ///
352    /// Missing rows are ignored. If the bitmap for a key becomes empty, the key
353    /// is pruned from the inner map.
354    pub(crate) fn remove(&mut self, value: &Value, row: u32) -> Result<(), TypedIndexValueError> {
355        let expected_kind = self.kind();
356        match (self, typed_key(value, expected_kind)?) {
357            (Self::Bool(index), TypedKey::Bool(key)) => {
358                remove_row(index, &key, row);
359                Ok(())
360            }
361            (Self::I64(index), TypedKey::I64(key)) => {
362                remove_row(index, &key, row);
363                Ok(())
364            }
365            (Self::U64(index), TypedKey::U64(key)) => {
366                remove_row(index, &key, row);
367                Ok(())
368            }
369            (Self::I128(index), TypedKey::I128(key)) => {
370                remove_row(index, &key, row);
371                Ok(())
372            }
373            (Self::U128(index), TypedKey::U128(key)) => {
374                remove_row(index, &key, row);
375                Ok(())
376            }
377            (Self::Decimal(index), TypedKey::Decimal(key)) => {
378                remove_row(index, &key, row);
379                Ok(())
380            }
381            (Self::F32(index), TypedKey::F32(key)) => {
382                remove_row(index, &key, row);
383                Ok(())
384            }
385            (Self::F64(index), TypedKey::F64(key)) => {
386                remove_row(index, &key, row);
387                Ok(())
388            }
389            (Self::String(index), TypedKey::String(key)) => {
390                remove_row(index, &key, row);
391                Ok(())
392            }
393            (Self::Date(index), TypedKey::Date(key)) => {
394                remove_row(index, &key, row);
395                Ok(())
396            }
397            (Self::LocalDateTime(index), TypedKey::LocalDateTime(key)) => {
398                remove_row(index, &key, row);
399                Ok(())
400            }
401            (Self::ZonedDateTime(index), TypedKey::ZonedDateTime(key)) => {
402                remove_row(index, &key, row);
403                Ok(())
404            }
405            (Self::LocalTime(index), TypedKey::LocalTime(key)) => {
406                remove_row(index, &key, row);
407                Ok(())
408            }
409            (Self::ZonedTime(index), TypedKey::ZonedTime(key)) => {
410                remove_row(index, &key, row);
411                Ok(())
412            }
413            (Self::Duration(index), TypedKey::Duration(key)) => {
414                remove_row(index, &key, row);
415                Ok(())
416            }
417            (Self::Uuid(index), TypedKey::Uuid(key)) => {
418                remove_row(index, &key, row);
419                Ok(())
420            }
421            (index, key) => Err(TypedIndexValueError::KindMismatch {
422                expected_kind: index.kind(),
423                observed: key.observed(),
424            }),
425        }
426    }
427}
428
429fn cardinality<K>(index: &BTreeMap<K, RoaringBitmap>) -> u64 {
430    index.values().map(RoaringBitmap::len).sum()
431}
432
433fn remove_row<K: Ord>(index: &mut BTreeMap<K, RoaringBitmap>, key: &K, row: u32) {
434    if let Some(bitmap) = index.get_mut(key) {
435        bitmap.remove(row);
436        if bitmap.is_empty() {
437            index.remove(key);
438        }
439    }
440}
441
442#[cfg(test)]
443#[path = "typed_index_tests.rs"]
444mod tests;