Skip to main content

reddb_server/storage/index/
mod.rs

1//! Unified index abstraction shared across RedDB data structures.
2//!
3//! Tables, graphs, vectors, timeseries, documents and queues each maintain
4//! their own access structures today (btree, hash, bitmap, HNSW, inverted
5//! lists, adjacency maps...). This module does not replace those concrete
6//! implementations — it defines the cross-cutting traits and primitives so
7//! the query planner, segment layer and diagnostics can treat them uniformly.
8//!
9//! # Components
10//!
11//! - [`IndexBase`]   — metadata, stats, bloom, lifecycle
12//! - [`PointIndex`]  — key → values lookups
13//! - [`RangeIndex`]  — ordered iteration over key ranges
14//! - [`IndexStats`]  — cardinality/selectivity surfaced to the planner
15//! - [`IndexKind`]   — enum of supported index families
16//! - [`BloomSegment`] — reusable bloom header attachable to any segment
17//!
18//! Each concrete index (existing or future) can opt-in by implementing the
19//! traits that match its access patterns. Cross-structure features such as
20//! the hybrid executor, plan cache statistics and segment pruning consume the
21//! traits, not concrete types.
22
23pub mod bloom_segment;
24pub mod heavy_hitters;
25pub mod registry;
26pub mod stats;
27pub mod tid_bitmap;
28pub mod zone_map;
29pub mod zone_map_persist;
30
31pub use bloom_segment::{BloomSegment, BloomSegmentBuilder, HasBloom};
32pub use heavy_hitters::HeavyHitters;
33pub use registry::{IndexRegistry, IndexScope, SharedIndex};
34pub use stats::{IndexKind, IndexStats};
35pub use zone_map::{ZoneDecision, ZoneMap, ZonePredicate};
36
37use std::fmt;
38
39/// Error type emitted by index operations.
40#[derive(Debug, Clone)]
41pub enum IndexError {
42    /// Key was not valid for this index (e.g. wrong type).
43    InvalidKey(String),
44    /// Value was not valid for this index.
45    InvalidValue(String),
46    /// Index is read-only / sealed.
47    ReadOnly,
48    /// Capacity exceeded.
49    Full,
50    /// Underlying storage error.
51    Storage(String),
52}
53
54impl fmt::Display for IndexError {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        match self {
57            IndexError::InvalidKey(m) => write!(f, "invalid key: {m}"),
58            IndexError::InvalidValue(m) => write!(f, "invalid value: {m}"),
59            IndexError::ReadOnly => write!(f, "index is read-only"),
60            IndexError::Full => write!(f, "index capacity exceeded"),
61            IndexError::Storage(m) => write!(f, "storage error: {m}"),
62        }
63    }
64}
65
66impl std::error::Error for IndexError {}
67
68/// Cross-cutting metadata every index exposes. Used by the planner for
69/// cost estimation, by the segment layer for bloom-based pruning and by
70/// diagnostics tooling.
71pub trait IndexBase: Send + Sync {
72    /// Human-readable name (e.g. "users.email", "graph.city_by_node").
73    fn name(&self) -> &str;
74
75    /// Index family (btree, hash, bitmap, hnsw, ...).
76    fn kind(&self) -> IndexKind;
77
78    /// Current statistics (cardinality, estimated selectivity, memory).
79    fn stats(&self) -> IndexStats;
80
81    /// Optional bloom filter for fast negative lookups. Cross-structure
82    /// pruning relies on this.
83    fn bloom(&self) -> Option<&crate::storage::primitives::BloomFilter> {
84        None
85    }
86
87    /// Returns `true` iff the key is *guaranteed* to be absent from this
88    /// index. Default implementation consults [`IndexBase::bloom`] and falls
89    /// back to `false` when no bloom is available (meaning "don't know —
90    /// caller must probe").
91    ///
92    /// Concrete indexes may override with tighter signals (e.g. zone map
93    /// min/max for range indexes).
94    fn definitely_absent(&self, key_bytes: &[u8]) -> bool {
95        self.bloom()
96            .map(|b| !b.contains(key_bytes))
97            .unwrap_or(false)
98    }
99}
100
101/// Point lookup access pattern. Implemented by hash, btree (as exact match),
102/// bitmap, cuckoo, etc.
103pub trait PointIndex<K: ?Sized, V>: IndexBase {
104    /// Insert `value` under `key`. Multi-value semantics are index-specific.
105    fn insert(&mut self, key: &K, value: V) -> Result<(), IndexError>;
106
107    /// Remove all values under `key`. Returns number removed.
108    fn remove(&mut self, key: &K) -> Result<usize, IndexError>;
109
110    /// Look up every value associated with `key`.
111    fn lookup(&self, key: &K) -> Vec<V>;
112
113    /// Convenience: does the key exist at all?
114    fn contains(&self, key: &K) -> bool {
115        !self.lookup(key).is_empty()
116    }
117}
118
119/// Range / ordered access pattern. Implemented by btree, skiplist, zone map.
120pub trait RangeIndex<K: ?Sized, V>: PointIndex<K, V> {
121    /// Iterate values whose key falls in `[start, end)`.
122    /// `None` bounds are open (min/max).
123    fn range(&self, start: Option<&K>, end: Option<&K>) -> Vec<(Vec<u8>, V)>;
124
125    /// Total number of distinct keys.
126    fn distinct_keys(&self) -> usize;
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use crate::storage::primitives::BloomFilter;
133    use std::collections::BTreeMap;
134
135    /// Tiny in-memory btree to exercise the traits end-to-end.
136    struct TestBTree {
137        name: String,
138        map: BTreeMap<Vec<u8>, Vec<u64>>,
139        bloom: BloomFilter,
140    }
141
142    impl TestBTree {
143        fn new(name: &str) -> Self {
144            Self {
145                name: name.to_string(),
146                map: BTreeMap::new(),
147                bloom: BloomFilter::with_capacity(1024, 0.01),
148            }
149        }
150    }
151
152    impl IndexBase for TestBTree {
153        fn name(&self) -> &str {
154            &self.name
155        }
156        fn kind(&self) -> IndexKind {
157            IndexKind::BTree
158        }
159        fn stats(&self) -> IndexStats {
160            IndexStats {
161                entries: self.map.values().map(|v| v.len()).sum(),
162                distinct_keys: self.map.len(),
163                approx_bytes: 0,
164                kind: IndexKind::BTree,
165                has_bloom: true,
166                index_correlation: 0.0,
167            }
168        }
169        fn bloom(&self) -> Option<&BloomFilter> {
170            Some(&self.bloom)
171        }
172    }
173
174    impl PointIndex<[u8], u64> for TestBTree {
175        fn insert(&mut self, key: &[u8], value: u64) -> Result<(), IndexError> {
176            self.bloom.insert(key);
177            self.map.entry(key.to_vec()).or_default().push(value);
178            Ok(())
179        }
180        fn remove(&mut self, key: &[u8]) -> Result<usize, IndexError> {
181            Ok(self.map.remove(key).map(|v| v.len()).unwrap_or(0))
182        }
183        fn lookup(&self, key: &[u8]) -> Vec<u64> {
184            self.map.get(key).cloned().unwrap_or_default()
185        }
186    }
187
188    impl RangeIndex<[u8], u64> for TestBTree {
189        fn range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Vec<(Vec<u8>, u64)> {
190            use std::ops::Bound;
191            let lo = start.map(Bound::Included).unwrap_or(Bound::Unbounded);
192            let hi = end.map(Bound::Excluded).unwrap_or(Bound::Unbounded);
193            self.map
194                .range::<[u8], _>((lo, hi))
195                .flat_map(|(k, vs)| vs.iter().map(move |v| (k.clone(), *v)))
196                .collect()
197        }
198        fn distinct_keys(&self) -> usize {
199            self.map.len()
200        }
201    }
202
203    #[test]
204    fn point_index_roundtrip() {
205        let mut idx = TestBTree::new("test");
206        idx.insert(b"alpha", 1).unwrap();
207        idx.insert(b"alpha", 2).unwrap();
208        idx.insert(b"beta", 3).unwrap();
209
210        assert_eq!(idx.lookup(b"alpha"), vec![1, 2]);
211        assert!(idx.contains(b"beta"));
212        assert!(!idx.contains(b"gamma"));
213    }
214
215    #[test]
216    fn bloom_prunes_absent_keys() {
217        let mut idx = TestBTree::new("test");
218        idx.insert(b"alpha", 1).unwrap();
219        // Bloom must never produce false negatives.
220        assert!(!idx.definitely_absent(b"alpha"));
221        // Random unknown key: bloom may say "absent" or "unknown".
222        // Either way, probing must still return empty.
223        assert!(idx.lookup(b"not-there").is_empty());
224    }
225
226    #[test]
227    fn range_iteration() {
228        let mut idx = TestBTree::new("test");
229        for (i, k) in [b"a", b"b", b"c", b"d"].iter().enumerate() {
230            idx.insert(*k, i as u64).unwrap();
231        }
232        let out = idx.range(Some(b"b"), Some(b"d"));
233        let keys: Vec<&[u8]> = out.iter().map(|(k, _)| k.as_slice()).collect();
234        assert_eq!(keys, vec![b"b".as_slice(), b"c".as_slice()]);
235    }
236
237    #[test]
238    fn stats_surface_cardinality() {
239        let mut idx = TestBTree::new("test");
240        idx.insert(b"a", 1).unwrap();
241        idx.insert(b"a", 2).unwrap();
242        idx.insert(b"b", 3).unwrap();
243        let s = idx.stats();
244        assert_eq!(s.entries, 3);
245        assert_eq!(s.distinct_keys, 2);
246        assert!(s.has_bloom);
247    }
248}