#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum IndexKind {
#[default]
Unknown,
BTree,
Hash,
Bitmap,
RoaringBitmap,
Inverted,
Hnsw,
IvfFlat,
ProductQuantization,
Spatial,
GraphAdjacency,
Temporal,
Cuckoo,
ZoneMap,
UnifiedRef,
HeavyHitters,
}
impl IndexKind {
pub fn supports_range(self) -> bool {
matches!(
self,
IndexKind::BTree | IndexKind::Spatial | IndexKind::Temporal | IndexKind::ZoneMap
)
}
pub fn supports_ann(self) -> bool {
matches!(
self,
IndexKind::Hnsw | IndexKind::IvfFlat | IndexKind::ProductQuantization
)
}
}
#[derive(Debug, Clone, Default)]
pub struct IndexStats {
pub entries: usize,
pub distinct_keys: usize,
pub approx_bytes: usize,
pub kind: IndexKind,
pub has_bloom: bool,
pub index_correlation: f64,
}
impl IndexStats {
pub fn point_selectivity(&self) -> f64 {
if self.distinct_keys == 0 {
return 1.0;
}
(1.0 / self.distinct_keys as f64).clamp(0.0, 1.0)
}
pub fn avg_values_per_key(&self) -> f64 {
if self.distinct_keys == 0 {
return 0.0;
}
self.entries as f64 / self.distinct_keys as f64
}
pub fn correlated_io_cost(&self, result_rows: f64, heap_pages: f64) -> f64 {
const SEQ_PAGE_COST: f64 = 1.0;
const RANDOM_PAGE_COST: f64 = 4.0;
if heap_pages <= 0.0 || result_rows <= 0.0 {
return 0.0;
}
let frac = result_rows / heap_pages;
let pages_fetched = heap_pages * (1.0 - (-frac).exp());
let pages_fetched = pages_fetched.min(heap_pages);
let random_cost = RANDOM_PAGE_COST * pages_fetched;
let seq_cost = SEQ_PAGE_COST * pages_fetched;
let corr2 = self.index_correlation.powi(2).clamp(0.0, 1.0);
seq_cost * corr2 + random_cost * (1.0 - corr2)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn selectivity_scales_with_cardinality() {
let s = IndexStats {
entries: 100,
distinct_keys: 100,
approx_bytes: 0,
kind: IndexKind::Hash,
has_bloom: false,
index_correlation: 0.0,
};
assert_eq!(s.point_selectivity(), 0.01);
assert_eq!(s.avg_values_per_key(), 1.0);
}
#[test]
fn range_capability_flag() {
assert!(IndexKind::BTree.supports_range());
assert!(!IndexKind::Hash.supports_range());
assert!(IndexKind::Hnsw.supports_ann());
}
#[test]
fn empty_stats_do_not_divide_by_zero() {
let s = IndexStats::default();
assert_eq!(s.point_selectivity(), 1.0);
assert_eq!(s.avg_values_per_key(), 0.0);
}
#[test]
fn correlated_io_cheaper_than_random() {
let base = IndexStats {
entries: 10_000,
distinct_keys: 10_000,
approx_bytes: 0,
kind: IndexKind::BTree,
has_bloom: false,
index_correlation: 0.0,
};
let correlated = IndexStats {
index_correlation: 1.0,
..base.clone()
};
let heap_pages = 1000.0;
let result_rows = 100.0;
let random_cost = base.correlated_io_cost(result_rows, heap_pages);
let seq_cost = correlated.correlated_io_cost(result_rows, heap_pages);
assert!(
seq_cost < random_cost,
"correlated (seq) should be cheaper than uncorrelated (random): {seq_cost} vs {random_cost}"
);
}
#[test]
fn timeseries_gets_full_correlation() {
let s = IndexStats {
index_correlation: 1.0,
kind: IndexKind::Temporal,
..IndexStats::default()
};
assert_eq!(s.correlated_io_cost(0.0, 1000.0), 0.0);
}
}