pub mod bloom_segment;
pub mod heavy_hitters;
pub mod registry;
pub mod stats;
pub mod tid_bitmap;
pub mod zone_map;
pub mod zone_map_persist;
pub use bloom_segment::{BloomSegment, BloomSegmentBuilder, HasBloom};
pub use heavy_hitters::HeavyHitters;
pub use registry::{IndexRegistry, IndexScope, SharedIndex};
pub use stats::{IndexKind, IndexStats};
pub use zone_map::{ZoneDecision, ZoneMap, ZonePredicate};
use std::fmt;
#[derive(Debug, Clone)]
pub enum IndexError {
InvalidKey(String),
InvalidValue(String),
ReadOnly,
Full,
Storage(String),
}
impl fmt::Display for IndexError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexError::InvalidKey(m) => write!(f, "invalid key: {m}"),
IndexError::InvalidValue(m) => write!(f, "invalid value: {m}"),
IndexError::ReadOnly => write!(f, "index is read-only"),
IndexError::Full => write!(f, "index capacity exceeded"),
IndexError::Storage(m) => write!(f, "storage error: {m}"),
}
}
}
impl std::error::Error for IndexError {}
pub trait IndexBase: Send + Sync {
fn name(&self) -> &str;
fn kind(&self) -> IndexKind;
fn stats(&self) -> IndexStats;
fn bloom(&self) -> Option<&crate::storage::primitives::BloomFilter> {
None
}
fn definitely_absent(&self, key_bytes: &[u8]) -> bool {
self.bloom()
.map(|b| !b.contains(key_bytes))
.unwrap_or(false)
}
}
pub trait PointIndex<K: ?Sized, V>: IndexBase {
fn insert(&mut self, key: &K, value: V) -> Result<(), IndexError>;
fn remove(&mut self, key: &K) -> Result<usize, IndexError>;
fn lookup(&self, key: &K) -> Vec<V>;
fn contains(&self, key: &K) -> bool {
!self.lookup(key).is_empty()
}
}
pub trait RangeIndex<K: ?Sized, V>: PointIndex<K, V> {
fn range(&self, start: Option<&K>, end: Option<&K>) -> Vec<(Vec<u8>, V)>;
fn distinct_keys(&self) -> usize;
}
#[cfg(test)]
mod tests {
use super::*;
use crate::storage::primitives::BloomFilter;
use std::collections::BTreeMap;
struct TestBTree {
name: String,
map: BTreeMap<Vec<u8>, Vec<u64>>,
bloom: BloomFilter,
}
impl TestBTree {
fn new(name: &str) -> Self {
Self {
name: name.to_string(),
map: BTreeMap::new(),
bloom: BloomFilter::with_capacity(1024, 0.01),
}
}
}
impl IndexBase for TestBTree {
fn name(&self) -> &str {
&self.name
}
fn kind(&self) -> IndexKind {
IndexKind::BTree
}
fn stats(&self) -> IndexStats {
IndexStats {
entries: self.map.values().map(|v| v.len()).sum(),
distinct_keys: self.map.len(),
approx_bytes: 0,
kind: IndexKind::BTree,
has_bloom: true,
index_correlation: 0.0,
}
}
fn bloom(&self) -> Option<&BloomFilter> {
Some(&self.bloom)
}
}
impl PointIndex<[u8], u64> for TestBTree {
fn insert(&mut self, key: &[u8], value: u64) -> Result<(), IndexError> {
self.bloom.insert(key);
self.map.entry(key.to_vec()).or_default().push(value);
Ok(())
}
fn remove(&mut self, key: &[u8]) -> Result<usize, IndexError> {
Ok(self.map.remove(key).map(|v| v.len()).unwrap_or(0))
}
fn lookup(&self, key: &[u8]) -> Vec<u64> {
self.map.get(key).cloned().unwrap_or_default()
}
}
impl RangeIndex<[u8], u64> for TestBTree {
fn range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Vec<(Vec<u8>, u64)> {
use std::ops::Bound;
let lo = start.map(Bound::Included).unwrap_or(Bound::Unbounded);
let hi = end.map(Bound::Excluded).unwrap_or(Bound::Unbounded);
self.map
.range::<[u8], _>((lo, hi))
.flat_map(|(k, vs)| vs.iter().map(move |v| (k.clone(), *v)))
.collect()
}
fn distinct_keys(&self) -> usize {
self.map.len()
}
}
#[test]
fn point_index_roundtrip() {
let mut idx = TestBTree::new("test");
idx.insert(b"alpha", 1).unwrap();
idx.insert(b"alpha", 2).unwrap();
idx.insert(b"beta", 3).unwrap();
assert_eq!(idx.lookup(b"alpha"), vec![1, 2]);
assert!(idx.contains(b"beta"));
assert!(!idx.contains(b"gamma"));
}
#[test]
fn bloom_prunes_absent_keys() {
let mut idx = TestBTree::new("test");
idx.insert(b"alpha", 1).unwrap();
assert!(!idx.definitely_absent(b"alpha"));
assert!(idx.lookup(b"not-there").is_empty());
}
#[test]
fn range_iteration() {
let mut idx = TestBTree::new("test");
for (i, k) in [b"a", b"b", b"c", b"d"].iter().enumerate() {
idx.insert(*k, i as u64).unwrap();
}
let out = idx.range(Some(b"b"), Some(b"d"));
let keys: Vec<&[u8]> = out.iter().map(|(k, _)| k.as_slice()).collect();
assert_eq!(keys, vec![b"b".as_slice(), b"c".as_slice()]);
}
#[test]
fn stats_surface_cardinality() {
let mut idx = TestBTree::new("test");
idx.insert(b"a", 1).unwrap();
idx.insert(b"a", 2).unwrap();
idx.insert(b"b", 3).unwrap();
let s = idx.stats();
assert_eq!(s.entries, 3);
assert_eq!(s.distinct_keys, 2);
assert!(s.has_bloom);
}
}