Skip to main content

selene_graph/
composite_typed_index.rs

1//! Built-in composite-property value index.
2
3use std::collections::BTreeMap;
4use std::hash::{Hash, Hasher};
5
6use roaring::RoaringBitmap;
7use selene_core::{DbString, DurationOrderKey, Value, duration_order_key};
8use smallvec::SmallVec;
9
10use crate::typed_index::{NotNanError, NotNanF32, NotNanF64, TypedIndexKind, TypedIndexValueError};
11
12/// Composite key used by a composite-property index.
13pub type CompositeKey = SmallVec<[CompositeKeyComponent; 4]>;
14
15/// One ordered component in a [`CompositeKey`].
16#[derive(Clone, Debug, Eq, PartialEq)]
17pub enum CompositeKeyComponent {
18    /// Boolean component.
19    Bool(bool),
20    /// Signed integer component.
21    I64(i64),
22    /// Unsigned integer component.
23    U64(u64),
24    /// Signed 128-bit integer component.
25    I128(i128),
26    /// Unsigned 128-bit integer component.
27    U128(u128),
28    /// Fixed-precision decimal component.
29    Decimal(rust_decimal::Decimal),
30    /// 32-bit floating-point component with NaN excluded.
31    F32(NotNanF32),
32    /// Floating-point component with NaN excluded.
33    F64(NotNanF64),
34    /// Database-string component.
35    String(DbString),
36    /// Civil date component.
37    Date(jiff::civil::Date),
38    /// Civil local date-time component.
39    LocalDateTime(jiff::civil::DateTime),
40    /// Zoned date-time component.
41    ZonedDateTime(jiff::Zoned),
42    /// Civil local time component.
43    LocalTime(jiff::civil::Time),
44    /// Zoned time component.
45    ZonedTime(jiff::Zoned),
46    /// Duration component.
47    Duration(DurationOrderKey),
48    /// UUID component.
49    Uuid(uuid::Uuid),
50}
51
52impl Ord for CompositeKeyComponent {
53    fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
54        use CompositeKeyComponent as K;
55        match (self, rhs) {
56            (K::Bool(lhs), K::Bool(rhs)) => lhs.cmp(rhs),
57            (K::I64(lhs), K::I64(rhs)) => lhs.cmp(rhs),
58            (K::U64(lhs), K::U64(rhs)) => lhs.cmp(rhs),
59            (K::I128(lhs), K::I128(rhs)) => lhs.cmp(rhs),
60            (K::U128(lhs), K::U128(rhs)) => lhs.cmp(rhs),
61            (K::Decimal(lhs), K::Decimal(rhs)) => lhs.cmp(rhs),
62            (K::F32(lhs), K::F32(rhs)) => lhs.cmp(rhs),
63            (K::F64(lhs), K::F64(rhs)) => lhs.cmp(rhs),
64            (K::String(lhs), K::String(rhs)) => lhs.cmp(rhs),
65            (K::Date(lhs), K::Date(rhs)) => lhs.cmp(rhs),
66            (K::LocalDateTime(lhs), K::LocalDateTime(rhs)) => lhs.cmp(rhs),
67            (K::ZonedDateTime(lhs), K::ZonedDateTime(rhs)) => lhs.cmp(rhs),
68            (K::LocalTime(lhs), K::LocalTime(rhs)) => lhs.cmp(rhs),
69            (K::ZonedTime(lhs), K::ZonedTime(rhs)) => lhs.cmp(rhs),
70            (K::Duration(lhs), K::Duration(rhs)) => lhs.cmp(rhs),
71            (K::Uuid(lhs), K::Uuid(rhs)) => lhs.cmp(rhs),
72            _ => self.discriminant().cmp(&rhs.discriminant()),
73        }
74    }
75}
76
77impl PartialOrd for CompositeKeyComponent {
78    fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
79        Some(self.cmp(rhs))
80    }
81}
82
83impl Hash for CompositeKeyComponent {
84    fn hash<H: Hasher>(&self, state: &mut H) {
85        self.discriminant().hash(state);
86        match self {
87            Self::Bool(value) => value.hash(state),
88            Self::I64(value) => value.hash(state),
89            Self::U64(value) => value.hash(state),
90            Self::I128(value) => value.hash(state),
91            Self::U128(value) => value.hash(state),
92            Self::Decimal(value) => value.hash(state),
93            Self::F32(value) => value.hash(state),
94            Self::F64(value) => value.hash(state),
95            Self::String(value) => value.hash(state),
96            Self::Date(value) => value.hash(state),
97            Self::LocalDateTime(value) => value.hash(state),
98            Self::ZonedDateTime(value) => value.hash(state),
99            Self::LocalTime(value) => value.hash(state),
100            Self::ZonedTime(value) => value.hash(state),
101            Self::Duration(value) => value.hash(state),
102            Self::Uuid(value) => value.hash(state),
103        }
104    }
105}
106
107impl CompositeKeyComponent {
108    const fn discriminant(&self) -> u8 {
109        match self {
110            Self::Bool(_) => 0,
111            Self::I64(_) => 1,
112            Self::U64(_) => 2,
113            Self::I128(_) => 3,
114            Self::U128(_) => 4,
115            Self::Decimal(_) => 5,
116            Self::F32(_) => 6,
117            Self::F64(_) => 7,
118            Self::String(_) => 8,
119            Self::Date(_) => 9,
120            Self::LocalDateTime(_) => 10,
121            Self::ZonedDateTime(_) => 11,
122            Self::LocalTime(_) => 12,
123            Self::ZonedTime(_) => 13,
124            Self::Duration(_) => 14,
125            Self::Uuid(_) => 15,
126        }
127    }
128}
129
130/// Built-in node index for an ordered tuple of property values.
131#[derive(Clone, Debug)]
132pub struct CompositeTypedIndex {
133    kinds: SmallVec<[TypedIndexKind; 4]>,
134    entries: BTreeMap<CompositeKey, RoaringBitmap>,
135}
136
137impl CompositeTypedIndex {
138    /// Construct an empty composite index for the supplied ordered kinds.
139    #[must_use]
140    pub fn new(kinds: SmallVec<[TypedIndexKind; 4]>) -> Self {
141        Self {
142            kinds,
143            entries: BTreeMap::new(),
144        }
145    }
146
147    /// Return the ordered component kinds.
148    #[must_use]
149    pub fn kinds(&self) -> &[TypedIndexKind] {
150        &self.kinds
151    }
152
153    /// Return total row cardinality across all composite keys.
154    ///
155    /// This is the sum of every bucket's row count, NOT the number of distinct
156    /// composite keys. For the distinct-key count use
157    /// [`CompositeTypedIndex::distinct_keys`].
158    #[must_use]
159    pub fn cardinality(&self) -> u64 {
160        self.entries.values().map(RoaringBitmap::len).sum()
161    }
162
163    /// Return the number of distinct composite keys (BTreeMap entry count).
164    ///
165    /// Unlike [`CompositeTypedIndex::cardinality`] (total rows), this counts the
166    /// distinct composite-key buckets. The optimizer cost model divides
167    /// `cardinality / distinct_keys` to estimate the expected rows returned by a
168    /// parameter-keyed composite probe whose values are unknown at plan time.
169    /// Returns `0` for an empty index.
170    #[must_use]
171    pub fn distinct_keys(&self) -> u64 {
172        self.entries.len() as u64
173    }
174
175    /// Iterate composite-key buckets and their matching row bitmaps.
176    pub fn entries(&self) -> impl Iterator<Item = (&CompositeKey, &RoaringBitmap)> {
177        self.entries.iter()
178    }
179
180    /// Return true when this index holds exactly the same `(key -> rows)`
181    /// buckets as `reference`.
182    ///
183    /// Used by the debug-only structural consistency net
184    /// ([`crate::SeleneGraph::assert_indexes_consistent`]). Component kinds
185    /// and every composite-key bucket's row bitmap must match.
186    #[must_use]
187    pub(crate) fn buckets_eq(&self, reference: &Self) -> bool {
188        self.kinds == reference.kinds && self.entries == reference.entries
189    }
190
191    /// Return true when any composite key maps to an empty row bitmap.
192    ///
193    /// Maintenance prunes a bucket when its bitmap empties (see
194    /// [`Self::remove`]); a present-but-empty bucket is a leak the
195    /// debug-only consistency net flags.
196    #[must_use]
197    pub(crate) fn has_empty_bucket(&self) -> bool {
198        self.entries.values().any(RoaringBitmap::is_empty)
199    }
200
201    /// Insert `row` under the composite key formed from `values`.
202    pub fn insert(&mut self, values: &[&Value], row: u32) -> Result<(), CompositeIndexValueError> {
203        let key = self.key_from_values(values)?;
204        self.entries.entry(key).or_default().insert(row);
205        Ok(())
206    }
207
208    /// Remove `row` from the composite key formed from `values`.
209    pub fn remove(&mut self, values: &[&Value], row: u32) -> Result<(), CompositeIndexValueError> {
210        let key = self.key_from_values(values)?;
211        if let Some(bitmap) = self.entries.get_mut(&key) {
212            bitmap.remove(row);
213            if bitmap.is_empty() {
214                self.entries.remove(&key);
215            }
216        }
217        Ok(())
218    }
219
220    /// Return rows matching `key`.
221    #[must_use]
222    pub fn lookup_key(&self, key: &CompositeKey) -> Option<&RoaringBitmap> {
223        self.entries.get(key)
224    }
225
226    /// Build a composite key from the index's ordered component kinds.
227    ///
228    /// This is the single coercion shared by write/maintenance and read paths.
229    /// Every coercible `STRING` component resolves directly to a
230    /// database-string key; an arity or per-component kind mismatch raises
231    /// [`CompositeIndexValueError`].
232    pub fn key_from_values(
233        &self,
234        values: &[&Value],
235    ) -> Result<CompositeKey, CompositeIndexValueError> {
236        composite_key_from_values(&self.kinds, values)
237    }
238
239    /// Return true when two value tuples address the same key.
240    ///
241    /// Uses [`Self::key_from_values`]; when either side cannot be coerced to a
242    /// key it falls through to a pairwise content compare on the raw values.
243    pub fn values_share_key(&self, lhs: &[&Value], rhs: &[&Value]) -> bool {
244        match (self.key_from_values(lhs), self.key_from_values(rhs)) {
245            (Ok(lhs_key), Ok(rhs_key)) => lhs_key == rhs_key,
246            (Err(_), Err(_)) => true,
247            _ => false,
248        }
249    }
250}
251
252/// Value-admission error for composite index mutation.
253#[derive(Debug)]
254#[non_exhaustive]
255pub enum CompositeIndexValueError {
256    /// The tuple length did not match the registered component count.
257    ArityMismatch {
258        /// Registered component count.
259        expected: usize,
260        /// Observed value count.
261        observed: usize,
262    },
263    /// A component value was not admissible for its registered kind.
264    Component {
265        /// Zero-based component index.
266        index: usize,
267        /// Registered component kind.
268        expected_kind: TypedIndexKind,
269        /// Observed value kind or `"NaN"`.
270        observed: &'static str,
271    },
272}
273
274/// Build a composite key from ordered component kinds and values.
275///
276/// This is the single coercion shared by write/maintenance and read paths. Every
277/// coercible `STRING` component resolves directly to a database-string key; an
278/// arity mismatch or a per-component kind/NaN mismatch raises
279/// [`CompositeIndexValueError`].
280pub(crate) fn composite_key_from_values(
281    kinds: &[TypedIndexKind],
282    values: &[&Value],
283) -> Result<CompositeKey, CompositeIndexValueError> {
284    if kinds.len() != values.len() {
285        return Err(CompositeIndexValueError::ArityMismatch {
286            expected: kinds.len(),
287            observed: values.len(),
288        });
289    }
290    kinds
291        .iter()
292        .zip(values)
293        .enumerate()
294        .map(|(index, (kind, value))| {
295            component_from_value(*kind, value).map_err(|source| {
296                CompositeIndexValueError::Component {
297                    index,
298                    expected_kind: source.expected_kind(),
299                    observed: source.observed(),
300                }
301            })
302        })
303        .collect()
304}
305
306fn component_from_value(
307    kind: TypedIndexKind,
308    value: &Value,
309) -> Result<CompositeKeyComponent, TypedIndexValueError> {
310    match (kind, value) {
311        (TypedIndexKind::Bool, Value::Bool(value)) => Ok(CompositeKeyComponent::Bool(*value)),
312        (TypedIndexKind::I64, Value::Int(value)) => Ok(CompositeKeyComponent::I64(*value)),
313        (TypedIndexKind::U64, Value::Uint(value)) => Ok(CompositeKeyComponent::U64(*value)),
314        (TypedIndexKind::I128, Value::Int128(value)) => Ok(CompositeKeyComponent::I128(*value)),
315        (TypedIndexKind::U128, Value::Uint128(value)) => Ok(CompositeKeyComponent::U128(*value)),
316        (TypedIndexKind::Decimal, Value::Decimal(value)) => {
317            Ok(CompositeKeyComponent::Decimal(*value))
318        }
319        (TypedIndexKind::F32, Value::Float32(value)) => NotNanF32::new(*value)
320            .map(CompositeKeyComponent::F32)
321            .map_err(|NotNanError| TypedIndexValueError::NaN {
322                expected_kind: TypedIndexKind::F32,
323            }),
324        (TypedIndexKind::F64, Value::Float(value)) => NotNanF64::new(*value)
325            .map(CompositeKeyComponent::F64)
326            .map_err(|NotNanError| TypedIndexValueError::NaN {
327                expected_kind: TypedIndexKind::F64,
328            }),
329        (TypedIndexKind::String, Value::String(value)) => {
330            Ok(CompositeKeyComponent::String(value.clone()))
331        }
332        (TypedIndexKind::Date, Value::Date(value)) => Ok(CompositeKeyComponent::Date(*value)),
333        (TypedIndexKind::LocalDateTime, Value::LocalDateTime(value)) => {
334            Ok(CompositeKeyComponent::LocalDateTime(*value))
335        }
336        (TypedIndexKind::ZonedDateTime, Value::ZonedDateTime(value)) => {
337            Ok(CompositeKeyComponent::ZonedDateTime((**value).clone()))
338        }
339        (TypedIndexKind::LocalTime, Value::LocalTime(value)) => {
340            Ok(CompositeKeyComponent::LocalTime(*value))
341        }
342        (TypedIndexKind::ZonedTime, Value::ZonedTime(value)) => {
343            Ok(CompositeKeyComponent::ZonedTime((**value).clone()))
344        }
345        (TypedIndexKind::Duration, Value::Duration(value)) => {
346            Ok(CompositeKeyComponent::Duration(duration_order_key(value)))
347        }
348        (TypedIndexKind::Uuid, Value::Uuid(value)) => Ok(CompositeKeyComponent::Uuid(*value)),
349        (expected_kind, value) => Err(TypedIndexValueError::KindMismatch {
350            expected_kind,
351            observed: crate::typed_index::observed_value_kind(value),
352        }),
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use selene_core::db_string;
359    use smallvec::smallvec;
360
361    use super::*;
362
363    fn decimal(value: &str) -> rust_decimal::Decimal {
364        value.parse().expect("test decimal parses")
365    }
366
367    #[test]
368    fn component_from_value_string_kind() {
369        let probe = "component_admit.string.unique-1";
370        let value = Value::String(db_string(probe).unwrap());
371
372        let component =
373            component_from_value(TypedIndexKind::String, &value).expect("string component coerces");
374
375        let CompositeKeyComponent::String(db_string) = component else {
376            panic!("expected String component, got {component:?}");
377        };
378        assert_eq!(db_string.as_str(), probe);
379    }
380
381    #[test]
382    fn component_from_value_bool_kind() {
383        let value = Value::Bool(true);
384
385        let component =
386            component_from_value(TypedIndexKind::Bool, &value).expect("bool component coerces");
387
388        assert_eq!(component, CompositeKeyComponent::Bool(true));
389    }
390
391    #[test]
392    fn component_from_value_u64_kind() {
393        let value = Value::Uint(42);
394
395        let component =
396            component_from_value(TypedIndexKind::U64, &value).expect("u64 component coerces");
397
398        assert_eq!(component, CompositeKeyComponent::U64(42));
399    }
400
401    #[test]
402    fn component_from_value_exact_numeric_kinds() {
403        let signed = Value::Int128(i128::MIN + 42);
404        let unsigned = Value::Uint128(u128::MAX - 42);
405        let amount = Value::Decimal(decimal("42.25"));
406
407        assert_eq!(
408            component_from_value(TypedIndexKind::I128, &signed).expect("i128 component coerces"),
409            CompositeKeyComponent::I128(i128::MIN + 42)
410        );
411        assert_eq!(
412            component_from_value(TypedIndexKind::U128, &unsigned).expect("u128 component coerces"),
413            CompositeKeyComponent::U128(u128::MAX - 42)
414        );
415        assert_eq!(
416            component_from_value(TypedIndexKind::Decimal, &amount)
417                .expect("decimal component coerces"),
418            CompositeKeyComponent::Decimal(decimal("42.25"))
419        );
420    }
421
422    #[test]
423    fn component_from_value_float32_kind() {
424        let value = Value::Float32(1.25_f32);
425
426        let component =
427            component_from_value(TypedIndexKind::F32, &value).expect("f32 component coerces");
428
429        assert_eq!(
430            component,
431            CompositeKeyComponent::F32(NotNanF32::new(1.25_f32).unwrap())
432        );
433    }
434
435    #[test]
436    fn component_from_value_duration_kind() {
437        let value = Value::Duration(Box::new("PT1H2S".parse().unwrap()));
438
439        let component = component_from_value(TypedIndexKind::Duration, &value)
440            .expect("duration component coerces");
441
442        assert_eq!(
443            component,
444            CompositeKeyComponent::Duration(selene_core::duration_order_key(match &value {
445                Value::Duration(value) => value,
446                _ => unreachable!("test value is duration"),
447            }))
448        );
449    }
450
451    #[test]
452    fn composite_key_rejects_when_later_component_kind_mismatches() {
453        let kinds: SmallVec<[TypedIndexKind; 4]> =
454            smallvec![TypedIndexKind::String, TypedIndexKind::I64];
455        let location = Value::String(db_string("composite_admit.left_to_right.loc").unwrap());
456        // Component 1 is kind-mismatched — a String value bound to an I64
457        // index component triggers `KindMismatch` on the second component.
458        let bad = Value::String(db_string("composite_admit.left_to_right.bad").unwrap());
459        let refs: Vec<&Value> = vec![&location, &bad];
460
461        let err = composite_key_from_values(&kinds, &refs)
462            .expect_err("tuple kind mismatch on later component rejects whole tuple");
463
464        assert!(matches!(
465            err,
466            CompositeIndexValueError::Component {
467                index: 1,
468                expected_kind: TypedIndexKind::I64,
469                observed: "String",
470            }
471        ));
472    }
473
474    #[test]
475    fn composite_key_from_values_admits_string_component() {
476        let kinds: SmallVec<[TypedIndexKind; 4]> =
477            smallvec![TypedIndexKind::I64, TypedIndexKind::String];
478        let ts = Value::Int(7);
479        let location = Value::String(db_string("composite_admit.string.unique-1").unwrap());
480        let refs: Vec<&Value> = vec![&ts, &location];
481
482        let key = composite_key_from_values(&kinds, &refs).expect("string component coerces");
483
484        assert_eq!(key.len(), 2);
485    }
486
487    #[test]
488    fn values_share_key_matches_equal_string_components() {
489        let index =
490            CompositeTypedIndex::new(smallvec![TypedIndexKind::I64, TypedIndexKind::String]);
491        let ts_lhs = Value::Int(1);
492        let ts_rhs = Value::Int(1);
493        let loc_lhs =
494            Value::String(db_string("values_share_key.composite.string.unique-1").unwrap());
495        let loc_rhs =
496            Value::String(db_string("values_share_key.composite.string.unique-1").unwrap());
497        let lhs: Vec<&Value> = vec![&ts_lhs, &loc_lhs];
498        let rhs: Vec<&Value> = vec![&ts_rhs, &loc_rhs];
499
500        assert!(index.values_share_key(&lhs, &rhs));
501    }
502
503    #[test]
504    fn distinct_keys_counts_composite_buckets_not_rows() {
505        let mut index =
506            CompositeTypedIndex::new(smallvec![TypedIndexKind::I64, TypedIndexKind::String]);
507        assert_eq!(index.distinct_keys(), 0, "empty index");
508
509        let k1 = db_string("k1").unwrap();
510        let v_k1 = Value::String(k1);
511        let one = Value::Int(1);
512        let two = Value::Int(2);
513
514        // (1, k1) on two rows, (2, k1) on one row: 3 rows, 2 distinct composite keys.
515        index.insert(&[&one, &v_k1], 0).unwrap();
516        index.insert(&[&one, &v_k1], 1).unwrap();
517        index.insert(&[&two, &v_k1], 2).unwrap();
518        assert_eq!(index.cardinality(), 3);
519        assert_eq!(index.distinct_keys(), 2);
520
521        // Remove one of the two rows on (1, k1): bucket stays alive.
522        index.remove(&[&one, &v_k1], 0).unwrap();
523        assert_eq!(index.cardinality(), 2);
524        assert_eq!(index.distinct_keys(), 2);
525
526        // Remove the last row on (1, k1): bucket pruned → distinct drops.
527        index.remove(&[&one, &v_k1], 1).unwrap();
528        assert_eq!(index.cardinality(), 1);
529        assert_eq!(index.distinct_keys(), 1);
530    }
531
532    #[test]
533    fn values_share_key_returns_false_for_distinct_strings() {
534        let index =
535            CompositeTypedIndex::new(smallvec![TypedIndexKind::I64, TypedIndexKind::String]);
536        let ts_lhs = Value::Int(1);
537        let ts_rhs = Value::Int(1);
538        let loc_lhs = Value::String(db_string("values_share_key.composite.lhs-unique").unwrap());
539        let loc_rhs = Value::String(db_string("values_share_key.composite.rhs-unique").unwrap());
540        let lhs: Vec<&Value> = vec![&ts_lhs, &loc_lhs];
541        let rhs: Vec<&Value> = vec![&ts_rhs, &loc_rhs];
542
543        assert!(!index.values_share_key(&lhs, &rhs));
544    }
545}