use selene_core::{DbString, Value};
use crate::{
BinaryOp, LabelExpr, Literal, ValueExpr,
plan::{
BindingDef, FilterPredicate, FilterPredicateKind, IndexTarget, optimize::IndexCatalog,
optimize::OptimizeContext,
},
};
use super::binding_refs::{self, match_property_predicate};
const HEURISTIC_EQUALS: f64 = 0.05;
const HEURISTIC_NEQ: f64 = 0.95;
const HEURISTIC_RANGE: f64 = 0.33;
const HEURISTIC_OR: f64 = 0.66;
const HEURISTIC_EXISTS: f64 = 0.5;
const HEURISTIC_DEFAULT: f64 = 0.5;
pub(crate) struct ScanContext<'a> {
pub bindings: &'a [BindingDef],
pub catalog: Option<&'a dyn IndexCatalog>,
}
pub(crate) fn estimate(
pred: &FilterPredicate,
_ctx: &OptimizeContext<'_>,
scan_ctx: &ScanContext<'_>,
) -> f64 {
if let Some(selectivity) = stats_selectivity(pred, scan_ctx) {
return selectivity;
}
match pred.kind {
FilterPredicateKind::PropertyEquals { .. } => HEURISTIC_EQUALS,
FilterPredicateKind::Expression => estimate_expr(&pred.expr),
}
}
fn stats_selectivity(pred: &FilterPredicate, scan_ctx: &ScanContext<'_>) -> Option<f64> {
let catalog = scan_ctx.catalog?;
let matched = match_property_predicate(pred, scan_ctx.bindings)?;
let label = label_for_binding(scan_ctx.bindings, matched.binding)?;
let binding_refs::PropertyPredicateShape::Equality(value_expr) = matched.shape else {
return None;
};
let value = equality_literal_value(value_expr)?;
let matches =
catalog.equality_cardinality(IndexTarget::Node, label.clone(), matched.key, &value)?;
let population = catalog.label_cardinality(IndexTarget::Node, label)?;
if population == 0 {
return None;
}
Some((matches as f64 / population as f64).clamp(0.0, 1.0))
}
fn equality_literal_value(expr: &ValueExpr) -> Option<Value> {
let literal = binding_refs::literal(expr)?;
match literal {
Literal::Bool(value, _) => Some(Value::Bool(*value)),
Literal::Integer(value, _) | Literal::RadixInteger(value, _, _) => Some(Value::Int(*value)),
Literal::Decimal(value, _, _) => Some(Value::Decimal(*value)),
Literal::Float(value, _, _) => Some(Value::Float(*value)),
Literal::String(value, _, _) => Some(Value::String(value.clone())),
Literal::Bytes(value, _) => Some(Value::Bytes(value.clone())),
Literal::Uuid(value, _, _) => Some(Value::Uuid(*value)),
Literal::ZonedDateTime(value, _, _) => Some(Value::ZonedDateTime(value.clone())),
Literal::LocalDateTime(value, _, _) => Some(Value::LocalDateTime(*value)),
Literal::Date(value, _, _) => Some(Value::Date(*value)),
Literal::ZonedTime(value, _, _) => Some(Value::ZonedTime(value.clone())),
Literal::LocalTime(value, _, _) => Some(Value::LocalTime(*value)),
Literal::Duration(value, _, _) => Some(Value::Duration(value.clone())),
Literal::Null(_) => None,
}
}
fn estimate_expr(expr: &ValueExpr) -> f64 {
match expr {
ValueExpr::BinaryOp {
op: BinaryOp::Eq, ..
} => HEURISTIC_EQUALS,
ValueExpr::BinaryOp {
op: BinaryOp::Ne, ..
} => HEURISTIC_NEQ,
ValueExpr::BinaryOp {
op: BinaryOp::Lt | BinaryOp::Le | BinaryOp::Gt | BinaryOp::Ge,
..
} => HEURISTIC_RANGE,
ValueExpr::BinaryOp {
op: BinaryOp::Or, ..
} => HEURISTIC_OR,
ValueExpr::BinaryOp {
op: BinaryOp::And,
lhs,
rhs,
..
} => estimate_expr(lhs) * estimate_expr(rhs),
ValueExpr::InList { list, .. } => {
let k = list.len().max(1) as i32;
1.0 - (1.0 - HEURISTIC_EQUALS).powi(k)
}
ValueExpr::Exists { .. } | ValueExpr::PropertyExists { .. } => HEURISTIC_EXISTS,
_ => HEURISTIC_DEFAULT,
}
}
fn label_for_binding(bindings: &[BindingDef], binding_id: crate::BindingId) -> Option<DbString> {
bindings
.iter()
.find(|binding| binding.binding == binding_id)
.and_then(|binding| match &binding.label_predicate {
Some(LabelExpr::Single(label)) => Some(label.clone()),
_ => None,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
ExprId, FilterPredicateKind, Literal, SourceSpan, analyze::AnalyzedType,
plan::FilterPredicate,
};
fn in_list_predicate(len: usize) -> FilterPredicate {
let span = SourceSpan::new(0, 1);
FilterPredicate {
expr: ValueExpr::InList {
operand: Box::new(ValueExpr::Literal(Literal::Integer(1, span))),
list: (0..len)
.map(|index| ValueExpr::Literal(Literal::Integer(index as i64, span)))
.collect(),
negated: false,
span,
},
expr_id: ExprId::new(0),
ty: AnalyzedType::DYNAMIC,
binding_refs: Vec::new(),
kind: FilterPredicateKind::Expression,
index_consumed: false,
span,
}
}
#[test]
fn selectivity_in_list_caps_at_one() {
let ctx = OptimizeContext::default();
let scan_ctx = ScanContext {
bindings: &[],
catalog: None,
};
let estimate = estimate(&in_list_predicate(100), &ctx, &scan_ctx);
assert!(estimate <= 1.0);
}
#[test]
fn selectivity_in_list_ranks_between_eq_and_neq_for_small_k() {
let ctx = OptimizeContext::default();
let scan_ctx = ScanContext {
bindings: &[],
catalog: None,
};
let estimate = estimate(&in_list_predicate(10), &ctx, &scan_ctx);
assert!((estimate - 0.401_263_060_761_621).abs() < 1e-12);
assert!(estimate > HEURISTIC_EQUALS);
assert!(estimate < HEURISTIC_NEQ);
}
}