use iqdb_types::Filter;
use crate::FilterEvaluator;
pub(crate) const EQ_SELECTIVITY: f64 = 0.1;
pub(crate) const RANGE_SELECTIVITY: f64 = 1.0 / 3.0;
#[must_use]
pub fn estimate_selectivity(evaluator: &FilterEvaluator) -> f64 {
estimate(evaluator.filter()).clamp(0.0, 1.0)
}
pub(crate) fn structural(filter: &Filter) -> f64 {
estimate(filter)
}
fn estimate(filter: &Filter) -> f64 {
match filter {
Filter::Eq { .. } => EQ_SELECTIVITY,
Filter::Neq { .. } => 1.0 - EQ_SELECTIVITY,
Filter::Lt { .. } | Filter::Lte { .. } | Filter::Gt { .. } | Filter::Gte { .. } => {
RANGE_SELECTIVITY
}
Filter::In { values, .. } => (EQ_SELECTIVITY * values.len() as f64).min(1.0),
Filter::And(children) => children.iter().map(estimate).product(),
Filter::Or(children) => 1.0 - children.iter().map(|c| 1.0 - estimate(c)).product::<f64>(),
Filter::Not(inner) => 1.0 - estimate(inner),
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
use super::*;
use iqdb_types::Value;
fn sel(filter: Filter) -> f64 {
estimate_selectivity(&FilterEvaluator::new(filter).unwrap())
}
#[test]
fn eq_is_narrow_neq_is_broad() {
assert!(sel(Filter::eq("k", Value::Int(1))) < 0.5);
assert!(sel(Filter::neq("k", Value::Int(1))) > 0.5);
}
#[test]
fn range_sits_between() {
let eq = sel(Filter::eq("k", Value::Int(1)));
let range = sel(Filter::gt("k", Value::Int(1)));
let neq = sel(Filter::neq("k", Value::Int(1)));
assert!(eq < range && range < neq);
}
#[test]
fn and_narrows_or_widens() {
let eq = sel(Filter::eq("a", Value::Int(1)));
let and = sel(Filter::and(vec![
Filter::eq("a", Value::Int(1)),
Filter::eq("b", Value::Int(2)),
]));
let or = sel(Filter::or(vec![
Filter::eq("a", Value::Int(1)),
Filter::eq("b", Value::Int(2)),
]));
assert!(and < eq);
assert!(or > eq);
}
#[test]
fn not_complements() {
let eq = sel(Filter::eq("k", Value::Int(1)));
let not = sel(Filter::not(Filter::eq("k", Value::Int(1))));
assert!((eq + not - 1.0).abs() < 1e-9);
}
#[test]
fn in_scales_with_cardinality_and_saturates() {
let one = sel(Filter::is_in("k", vec![Value::Int(1)]));
let three = sel(Filter::is_in(
"k",
vec![Value::Int(1), Value::Int(2), Value::Int(3)],
));
assert!(three > one);
let wide = sel(Filter::is_in("k", vec![Value::Int(0); 100]));
assert!((wide - 1.0).abs() < 1e-9);
}
#[test]
fn empty_and_matches_all_empty_or_matches_none() {
assert!((sel(Filter::and(vec![])) - 1.0).abs() < 1e-9);
assert!(sel(Filter::or(vec![])).abs() < 1e-9);
}
#[test]
fn always_a_probability() {
let deeply_nested = Filter::or(vec![
Filter::not(Filter::and(vec![
Filter::eq("a", Value::Int(1)),
Filter::neq("b", Value::Int(2)),
])),
Filter::is_in("c", vec![Value::Int(0); 50]),
]);
let s = sel(deeply_nested);
assert!((0.0..=1.0).contains(&s));
}
}