use crate::filter::ast::FilterExpr;
use crate::filter::error::FilterError;
use crate::filter::evaluator::evaluate;
use crate::metadata::MetadataValue;
use rand::seq::SliceRandom;
use rand::SeedableRng;
use rand_chacha::ChaCha8Rng;
use std::collections::HashMap;
pub const MAX_OVERSAMPLE: f32 = 10.0;
pub const DEFAULT_OVERSAMPLE: f32 = 3.0;
pub const EF_CAP: usize = 1000;
pub const SELECTIVITY_SAMPLE_SIZE: usize = 100;
pub const PREFILTER_THRESHOLD: f32 = 0.8;
pub const POSTFILTER_THRESHOLD: f32 = 0.05;
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum FilterStrategy {
PostFilter {
oversample: f32,
},
PreFilter,
Hybrid {
oversample_min: f32,
oversample_max: f32,
},
#[default]
Auto,
}
impl FilterStrategy {
pub const POST_FILTER_DEFAULT: Self = FilterStrategy::PostFilter {
oversample: DEFAULT_OVERSAMPLE,
};
pub const HYBRID_DEFAULT: Self = FilterStrategy::Hybrid {
oversample_min: 1.5,
oversample_max: MAX_OVERSAMPLE,
};
pub fn validate(&self) -> Result<(), FilterError> {
match self {
FilterStrategy::PostFilter { oversample } => {
if *oversample < 1.0 {
return Err(FilterError::InvalidStrategy(
"oversample must be >= 1.0".into(),
));
}
if *oversample > MAX_OVERSAMPLE {
return Err(FilterError::InvalidStrategy(format!(
"oversample must be <= {MAX_OVERSAMPLE}"
)));
}
Ok(())
}
FilterStrategy::Hybrid {
oversample_min,
oversample_max,
} => {
if *oversample_min < 1.0 {
return Err(FilterError::InvalidStrategy(
"oversample_min must be >= 1.0".into(),
));
}
if *oversample_max < *oversample_min {
return Err(FilterError::InvalidStrategy(
"oversample_max must be >= oversample_min".into(),
));
}
if *oversample_max > MAX_OVERSAMPLE {
return Err(FilterError::InvalidStrategy(format!(
"oversample_max must be <= {MAX_OVERSAMPLE}"
)));
}
Ok(())
}
FilterStrategy::PreFilter | FilterStrategy::Auto => Ok(()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct SelectivityEstimate {
pub selectivity: f32,
pub sample_size: usize,
pub passed: usize,
}
#[must_use]
pub fn calculate_oversample(selectivity: f32) -> f32 {
if selectivity <= 0.0 {
return MAX_OVERSAMPLE;
}
(1.0 / selectivity).min(MAX_OVERSAMPLE)
}
#[must_use]
pub fn select_strategy(selectivity: f32) -> FilterStrategy {
if selectivity > PREFILTER_THRESHOLD {
FilterStrategy::PreFilter
} else if selectivity < POSTFILTER_THRESHOLD {
FilterStrategy::PostFilter {
oversample: calculate_oversample(selectivity),
}
} else {
FilterStrategy::Hybrid {
oversample_min: 1.5,
oversample_max: calculate_oversample(selectivity),
}
}
}
mod selectivity_heuristics {
pub const EQUALITY: f64 = 0.10;
pub const NOT_EQUALS: f64 = 0.90;
pub const RANGE_STRICT: f64 = 0.30;
pub const RANGE_INCLUSIVE: f64 = 0.35;
pub const CONTAINS: f64 = 0.20;
pub const PREFIX_SUFFIX: f64 = 0.15;
pub const IN_ARRAY: f64 = 0.25;
pub const BETWEEN: f64 = 0.20;
pub const IS_NULL: f64 = 0.05;
pub const IS_NOT_NULL: f64 = 0.95;
pub const DEFAULT: f64 = 0.50;
}
#[must_use]
pub fn estimate_filter_selectivity(filter: &FilterExpr) -> f64 {
use selectivity_heuristics::{
BETWEEN, CONTAINS, DEFAULT, EQUALITY, IN_ARRAY, IS_NOT_NULL, IS_NULL, NOT_EQUALS,
PREFIX_SUFFIX, RANGE_INCLUSIVE, RANGE_STRICT,
};
match filter {
FilterExpr::Eq(_, _) => EQUALITY,
FilterExpr::Ne(_, _) => NOT_EQUALS,
FilterExpr::Lt(_, _) | FilterExpr::Gt(_, _) => RANGE_STRICT,
FilterExpr::Le(_, _) | FilterExpr::Ge(_, _) => RANGE_INCLUSIVE,
FilterExpr::Contains(_, _) | FilterExpr::Like(_, _) => CONTAINS,
FilterExpr::StartsWith(_, _) | FilterExpr::EndsWith(_, _) => PREFIX_SUFFIX,
FilterExpr::In(_, _)
| FilterExpr::Any(_, _)
| FilterExpr::All(_, _)
| FilterExpr::None(_, _) => IN_ARRAY,
FilterExpr::NotIn(_, _) => 1.0 - IN_ARRAY,
FilterExpr::Between(_, _, _) => BETWEEN,
FilterExpr::IsNull(_) => IS_NULL,
FilterExpr::IsNotNull(_) => IS_NOT_NULL,
FilterExpr::And(left, right) => {
let left_sel = estimate_filter_selectivity(left);
let right_sel = estimate_filter_selectivity(right);
left_sel * right_sel
}
FilterExpr::Or(left, right) => {
let left_sel = estimate_filter_selectivity(left);
let right_sel = estimate_filter_selectivity(right);
(left_sel + right_sel - left_sel * right_sel).min(1.0)
}
FilterExpr::Not(inner) => {
1.0 - estimate_filter_selectivity(inner)
}
FilterExpr::LiteralString(_)
| FilterExpr::LiteralInt(_)
| FilterExpr::LiteralFloat(_)
| FilterExpr::LiteralBool(_)
| FilterExpr::LiteralArray(_)
| FilterExpr::Field(_) => DEFAULT,
}
}
#[must_use]
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
pub fn overfetch_from_selectivity(selectivity: f64) -> usize {
if selectivity <= 0.0 {
return 10; }
let factor = (1.0 / selectivity).ceil().clamp(2.0, 10.0);
factor as usize
}
pub trait MetadataStore {
fn get_metadata(&self, id: usize) -> Option<&HashMap<String, MetadataValue>>;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
}
pub fn estimate_selectivity<M: MetadataStore>(
filter: &FilterExpr,
metadata_store: &M,
seed: Option<u64>,
) -> SelectivityEstimate {
let total_vectors = metadata_store.len();
if total_vectors == 0 {
return SelectivityEstimate {
selectivity: 0.0,
sample_size: 0,
passed: 0,
};
}
let sample_size = SELECTIVITY_SAMPLE_SIZE.min(total_vectors);
let mut rng = if let Some(s) = seed {
ChaCha8Rng::seed_from_u64(s)
} else {
ChaCha8Rng::from_entropy()
};
let mut indices: Vec<usize> = (0..total_vectors).collect();
indices.shuffle(&mut rng);
let sample_indices = &indices[..sample_size];
let mut passed = 0;
for &idx in sample_indices {
if let Some(metadata) = metadata_store.get_metadata(idx) {
if evaluate(filter, metadata).unwrap_or(false) {
passed += 1;
}
}
}
#[allow(clippy::cast_precision_loss)]
let raw_selectivity = (passed as f32) / (sample_size as f32);
let selectivity = raw_selectivity.clamp(0.01, 1.0);
SelectivityEstimate {
selectivity,
sample_size,
passed,
}
}
impl SelectivityEstimate {
#[must_use]
pub fn new(selectivity: f32, sample_size: usize, passed: usize) -> Self {
Self {
selectivity: selectivity.clamp(0.0, 1.0),
sample_size,
passed,
}
}
#[must_use]
pub fn zero() -> Self {
Self {
selectivity: 0.01, sample_size: 0,
passed: 0,
}
}
#[must_use]
pub fn full() -> Self {
Self {
selectivity: 1.0,
sample_size: 0,
passed: 0,
}
}
#[must_use]
#[allow(clippy::cast_precision_loss)] pub fn confidence(&self) -> f32 {
if self.sample_size == 0 {
return 0.0;
}
(self.sample_size as f32 / 100.0).min(1.0)
}
}
#[must_use]
pub fn is_tautology(filter: &FilterExpr) -> bool {
match filter {
FilterExpr::LiteralBool(true) => true,
FilterExpr::Or(left, right) => {
if are_complementary(left, right) {
return true;
}
is_tautology(left) || is_tautology(right)
}
FilterExpr::And(left, right) => {
is_tautology(left) && is_tautology(right)
}
FilterExpr::Not(inner) => is_contradiction(inner),
_ => false,
}
}
#[must_use]
pub fn is_contradiction(filter: &FilterExpr) -> bool {
match filter {
FilterExpr::LiteralBool(false) => true,
FilterExpr::And(left, right) => {
if are_complementary(left, right) {
return true;
}
if is_impossible_range(left, right) || is_impossible_range(right, left) {
return true;
}
is_contradiction(left) || is_contradiction(right)
}
FilterExpr::Or(left, right) => {
is_contradiction(left) && is_contradiction(right)
}
FilterExpr::Not(inner) => is_tautology(inner),
_ => false,
}
}
fn are_complementary(a: &FilterExpr, b: &FilterExpr) -> bool {
match (a, b) {
(FilterExpr::Not(inner), other) | (other, FilterExpr::Not(inner)) => {
inner.as_ref() == other
}
_ => false,
}
}
fn is_impossible_range(a: &FilterExpr, b: &FilterExpr) -> bool {
match (a, b) {
(
FilterExpr::Gt(field1, val1) | FilterExpr::Ge(field1, val1),
FilterExpr::Lt(field2, val2) | FilterExpr::Le(field2, val2),
) => {
if field1 != field2 {
return false;
}
compare_values_gte(val1, val2)
}
(
FilterExpr::Lt(field1, val1) | FilterExpr::Le(field1, val1),
FilterExpr::Gt(field2, val2) | FilterExpr::Ge(field2, val2),
) => {
if field1 != field2 {
return false;
}
compare_values_gte(val2, val1)
}
_ => false,
}
}
#[allow(clippy::cast_precision_loss)]
fn compare_values_gte(left: &FilterExpr, right: &FilterExpr) -> bool {
match (left, right) {
(FilterExpr::LiteralInt(v1), FilterExpr::LiteralInt(v2)) => v1 >= v2,
(FilterExpr::LiteralFloat(v1), FilterExpr::LiteralFloat(v2)) => v1 >= v2,
(FilterExpr::LiteralInt(v1), FilterExpr::LiteralFloat(v2)) => (*v1 as f64) >= *v2,
(FilterExpr::LiteralFloat(v1), FilterExpr::LiteralInt(v2)) => *v1 >= (*v2 as f64),
_ => false,
}
}
#[cfg(test)]
#[allow(clippy::float_cmp)] mod tests {
use super::*;
use crate::filter::parse;
#[test]
fn test_default_is_auto() {
assert_eq!(FilterStrategy::default(), FilterStrategy::Auto);
}
#[test]
fn test_validate_post_filter_valid() {
assert!(FilterStrategy::PostFilter { oversample: 1.0 }
.validate()
.is_ok());
assert!(FilterStrategy::PostFilter { oversample: 5.0 }
.validate()
.is_ok());
assert!(FilterStrategy::PostFilter { oversample: 10.0 }
.validate()
.is_ok());
}
#[test]
fn test_validate_post_filter_invalid() {
assert!(FilterStrategy::PostFilter { oversample: 0.5 }
.validate()
.is_err());
assert!(FilterStrategy::PostFilter { oversample: 15.0 }
.validate()
.is_err());
assert!(FilterStrategy::PostFilter { oversample: 0.0 }
.validate()
.is_err());
assert!(FilterStrategy::PostFilter { oversample: -1.0 }
.validate()
.is_err());
}
#[test]
fn test_validate_hybrid_valid() {
assert!(FilterStrategy::Hybrid {
oversample_min: 1.0,
oversample_max: 10.0
}
.validate()
.is_ok());
assert!(FilterStrategy::Hybrid {
oversample_min: 1.5,
oversample_max: 5.0
}
.validate()
.is_ok());
assert!(FilterStrategy::HYBRID_DEFAULT.validate().is_ok());
}
#[test]
fn test_validate_hybrid_invalid() {
assert!(FilterStrategy::Hybrid {
oversample_min: 0.5,
oversample_max: 10.0
}
.validate()
.is_err());
assert!(FilterStrategy::Hybrid {
oversample_min: 5.0,
oversample_max: 3.0
}
.validate()
.is_err());
assert!(FilterStrategy::Hybrid {
oversample_min: 1.0,
oversample_max: 15.0
}
.validate()
.is_err());
}
#[test]
fn test_validate_prefilter_auto_always_valid() {
assert!(FilterStrategy::PreFilter.validate().is_ok());
assert!(FilterStrategy::Auto.validate().is_ok());
}
#[test]
fn test_post_filter_default() {
assert_eq!(
FilterStrategy::POST_FILTER_DEFAULT,
FilterStrategy::PostFilter { oversample: 3.0 }
);
}
#[test]
fn test_hybrid_default() {
assert_eq!(
FilterStrategy::HYBRID_DEFAULT,
FilterStrategy::Hybrid {
oversample_min: 1.5,
oversample_max: 10.0
}
);
}
#[test]
fn test_calculate_oversample_normal() {
assert!((calculate_oversample(1.0) - 1.0).abs() < 0.001);
assert!((calculate_oversample(0.5) - 2.0).abs() < 0.001);
assert!((calculate_oversample(0.25) - 4.0).abs() < 0.001);
assert!((calculate_oversample(0.1) - 10.0).abs() < 0.001);
}
#[test]
fn test_calculate_oversample_capped() {
assert_eq!(calculate_oversample(0.05), MAX_OVERSAMPLE);
assert_eq!(calculate_oversample(0.01), MAX_OVERSAMPLE);
assert_eq!(calculate_oversample(0.001), MAX_OVERSAMPLE);
}
#[test]
fn test_calculate_oversample_edge_cases() {
assert_eq!(calculate_oversample(0.0), MAX_OVERSAMPLE);
assert_eq!(calculate_oversample(-0.1), MAX_OVERSAMPLE);
}
#[test]
fn test_select_strategy_high_selectivity() {
assert_eq!(select_strategy(0.9), FilterStrategy::PreFilter);
assert_eq!(select_strategy(0.85), FilterStrategy::PreFilter);
assert_eq!(select_strategy(0.81), FilterStrategy::PreFilter);
}
#[test]
fn test_select_strategy_at_threshold() {
assert!(matches!(
select_strategy(0.8),
FilterStrategy::Hybrid { .. }
));
assert!(matches!(
select_strategy(0.05),
FilterStrategy::Hybrid { .. }
));
}
#[test]
fn test_select_strategy_low_selectivity() {
match select_strategy(0.03) {
FilterStrategy::PostFilter { oversample } => {
assert!(oversample > 5.0);
assert!(oversample <= MAX_OVERSAMPLE);
}
_ => panic!("Expected PostFilter"),
}
match select_strategy(0.01) {
FilterStrategy::PostFilter { oversample } => {
assert_eq!(oversample, MAX_OVERSAMPLE);
}
_ => panic!("Expected PostFilter"),
}
}
#[test]
fn test_select_strategy_medium_selectivity() {
match select_strategy(0.3) {
FilterStrategy::Hybrid {
oversample_min,
oversample_max,
} => {
assert!((oversample_min - 1.5).abs() < 0.001);
assert!(oversample_max > 3.0);
}
_ => panic!("Expected Hybrid"),
}
match select_strategy(0.5) {
FilterStrategy::Hybrid { .. } => {}
_ => panic!("Expected Hybrid"),
}
}
#[test]
fn test_is_tautology_true_literal() {
assert!(is_tautology(&FilterExpr::LiteralBool(true)));
}
#[test]
fn test_is_tautology_false_literal() {
assert!(!is_tautology(&FilterExpr::LiteralBool(false)));
}
#[test]
fn test_is_tautology_a_or_not_a() {
let a = FilterExpr::Field("x".to_string());
let not_a = FilterExpr::Not(Box::new(a.clone()));
let or = FilterExpr::Or(Box::new(a), Box::new(not_a));
assert!(is_tautology(&or));
}
#[test]
fn test_is_tautology_not_a_or_a() {
let a = FilterExpr::Field("x".to_string());
let not_a = FilterExpr::Not(Box::new(a.clone()));
let or = FilterExpr::Or(Box::new(not_a), Box::new(a));
assert!(is_tautology(&or));
}
#[test]
fn test_is_tautology_nested_or() {
let a = FilterExpr::Field("x".to_string());
let b = FilterExpr::Field("y".to_string());
let and = FilterExpr::And(Box::new(a), Box::new(b));
let or = FilterExpr::Or(Box::new(and), Box::new(FilterExpr::LiteralBool(true)));
assert!(is_tautology(&or));
}
#[test]
fn test_is_tautology_simple_expression() {
let filter = parse("x = 5").unwrap();
assert!(!is_tautology(&filter));
}
#[test]
fn test_is_tautology_not_contradiction() {
let filter = FilterExpr::Not(Box::new(FilterExpr::LiteralBool(false)));
assert!(is_tautology(&filter));
}
#[test]
fn test_is_contradiction_false_literal() {
assert!(is_contradiction(&FilterExpr::LiteralBool(false)));
}
#[test]
fn test_is_contradiction_true_literal() {
assert!(!is_contradiction(&FilterExpr::LiteralBool(true)));
}
#[test]
fn test_is_contradiction_a_and_not_a() {
let a = FilterExpr::Field("x".to_string());
let not_a = FilterExpr::Not(Box::new(a.clone()));
let and = FilterExpr::And(Box::new(a), Box::new(not_a));
assert!(is_contradiction(&and));
}
#[test]
fn test_is_contradiction_not_a_and_a() {
let a = FilterExpr::Field("x".to_string());
let not_a = FilterExpr::Not(Box::new(a.clone()));
let and = FilterExpr::And(Box::new(not_a), Box::new(a));
assert!(is_contradiction(&and));
}
#[test]
fn test_is_contradiction_impossible_range_gt_lt() {
let gt10 = parse("x > 10").unwrap();
let lt5 = parse("x < 5").unwrap();
let and = FilterExpr::And(Box::new(gt10), Box::new(lt5));
assert!(is_contradiction(&and));
}
#[test]
fn test_is_contradiction_impossible_range_ge_le() {
let ge10 = parse("x >= 10").unwrap();
let le5 = parse("x <= 5").unwrap();
let and = FilterExpr::And(Box::new(ge10), Box::new(le5));
assert!(is_contradiction(&and));
}
#[test]
fn test_is_contradiction_impossible_range_reverse() {
let lt5 = parse("x < 5").unwrap();
let gt10 = parse("x > 10").unwrap();
let and = FilterExpr::And(Box::new(lt5), Box::new(gt10));
assert!(is_contradiction(&and));
}
#[test]
fn test_is_contradiction_possible_range() {
let gt5 = parse("x > 5").unwrap();
let lt10 = parse("x < 10").unwrap();
let and = FilterExpr::And(Box::new(gt5), Box::new(lt10));
assert!(!is_contradiction(&and));
}
#[test]
fn test_is_contradiction_float_range() {
let gt = parse("x > 10.5").unwrap();
let lt = parse("x < 5.0").unwrap();
let and = FilterExpr::And(Box::new(gt), Box::new(lt));
assert!(is_contradiction(&and));
}
#[test]
fn test_is_contradiction_nested_or() {
let or = FilterExpr::Or(
Box::new(FilterExpr::LiteralBool(false)),
Box::new(FilterExpr::LiteralBool(false)),
);
assert!(is_contradiction(&or));
}
#[test]
fn test_is_contradiction_simple_expression() {
let filter = parse("x = 5").unwrap();
assert!(!is_contradiction(&filter));
}
#[test]
fn test_is_contradiction_not_tautology() {
let filter = FilterExpr::Not(Box::new(FilterExpr::LiteralBool(true)));
assert!(is_contradiction(&filter));
}
#[test]
fn test_is_contradiction_different_fields() {
let gt = parse("x > 10").unwrap();
let lt = parse("y < 5").unwrap();
let and = FilterExpr::And(Box::new(gt), Box::new(lt));
assert!(!is_contradiction(&and));
}
#[test]
fn test_constants_have_expected_values() {
assert_eq!(MAX_OVERSAMPLE, 10.0);
assert_eq!(DEFAULT_OVERSAMPLE, 3.0);
assert_eq!(EF_CAP, 1000);
assert_eq!(SELECTIVITY_SAMPLE_SIZE, 100);
assert_eq!(PREFILTER_THRESHOLD, 0.8);
assert_eq!(POSTFILTER_THRESHOLD, 0.05);
}
struct MockMetadataStore {
metadata: Vec<HashMap<String, MetadataValue>>,
}
impl MockMetadataStore {
#[allow(dead_code)]
fn new(metadata: Vec<HashMap<String, MetadataValue>>) -> Self {
Self { metadata }
}
fn empty() -> Self {
Self { metadata: vec![] }
}
fn all_active(count: usize) -> Self {
let metadata = (0..count)
.map(|_| {
let mut m = HashMap::new();
m.insert("active".to_string(), MetadataValue::Boolean(true));
m
})
.collect();
Self { metadata }
}
fn half_active(count: usize) -> Self {
let metadata = (0..count)
.map(|i| {
let mut m = HashMap::new();
m.insert("active".to_string(), MetadataValue::Boolean(i % 2 == 0));
m
})
.collect();
Self { metadata }
}
fn none_active(count: usize) -> Self {
let metadata = (0..count)
.map(|_| {
let mut m = HashMap::new();
m.insert("active".to_string(), MetadataValue::Boolean(false));
m
})
.collect();
Self { metadata }
}
}
impl MetadataStore for MockMetadataStore {
fn get_metadata(&self, id: usize) -> Option<&HashMap<String, MetadataValue>> {
self.metadata.get(id)
}
fn len(&self) -> usize {
self.metadata.len()
}
}
#[test]
fn test_estimate_selectivity_empty_store() {
let filter = parse("active = true").unwrap();
let store = MockMetadataStore::empty();
let estimate = estimate_selectivity(&filter, &store, Some(42));
assert_eq!(estimate.selectivity, 0.0);
assert_eq!(estimate.sample_size, 0);
assert_eq!(estimate.passed, 0);
}
#[test]
fn test_estimate_selectivity_all_pass() {
let filter = parse("active = true").unwrap();
let store = MockMetadataStore::all_active(200);
let estimate = estimate_selectivity(&filter, &store, Some(42));
assert_eq!(estimate.selectivity, 1.0);
assert_eq!(estimate.sample_size, SELECTIVITY_SAMPLE_SIZE);
assert_eq!(estimate.passed, SELECTIVITY_SAMPLE_SIZE);
}
#[test]
fn test_estimate_selectivity_none_pass() {
let filter = parse("active = true").unwrap();
let store = MockMetadataStore::none_active(200);
let estimate = estimate_selectivity(&filter, &store, Some(42));
assert_eq!(estimate.selectivity, 0.01);
assert_eq!(estimate.sample_size, SELECTIVITY_SAMPLE_SIZE);
assert_eq!(estimate.passed, 0);
}
#[test]
fn test_estimate_selectivity_half_pass() {
let filter = parse("active = true").unwrap();
let store = MockMetadataStore::half_active(200);
let estimate = estimate_selectivity(&filter, &store, Some(42));
assert!(estimate.selectivity > 0.3 && estimate.selectivity < 0.7);
assert_eq!(estimate.sample_size, SELECTIVITY_SAMPLE_SIZE);
}
#[test]
fn test_estimate_selectivity_small_store() {
let filter = parse("active = true").unwrap();
let store = MockMetadataStore::all_active(10); let estimate = estimate_selectivity(&filter, &store, Some(42));
assert_eq!(estimate.selectivity, 1.0);
assert_eq!(estimate.sample_size, 10); assert_eq!(estimate.passed, 10);
}
#[test]
fn test_estimate_selectivity_deterministic() {
let filter = parse("active = true").unwrap();
let store = MockMetadataStore::half_active(200);
let estimate1 = estimate_selectivity(&filter, &store, Some(42));
let estimate2 = estimate_selectivity(&filter, &store, Some(42));
assert_eq!(estimate1.selectivity, estimate2.selectivity);
assert_eq!(estimate1.passed, estimate2.passed);
}
#[test]
fn test_estimate_selectivity_different_seeds() {
let filter = parse("active = true").unwrap();
let store = MockMetadataStore::half_active(1000);
let estimate1 = estimate_selectivity(&filter, &store, Some(1));
let estimate2 = estimate_selectivity(&filter, &store, Some(999));
assert!(estimate1.selectivity > 0.3 && estimate1.selectivity < 0.7);
assert!(estimate2.selectivity > 0.3 && estimate2.selectivity < 0.7);
}
#[test]
fn test_selectivity_estimate_new() {
let estimate = SelectivityEstimate::new(0.5, 100, 50);
assert_eq!(estimate.selectivity, 0.5);
assert_eq!(estimate.sample_size, 100);
assert_eq!(estimate.passed, 50);
}
#[test]
fn test_selectivity_estimate_clamping() {
let estimate = SelectivityEstimate::new(1.5, 100, 150);
assert_eq!(estimate.selectivity, 1.0);
let estimate = SelectivityEstimate::new(-0.5, 100, 0);
assert_eq!(estimate.selectivity, 0.0);
}
#[test]
fn test_selectivity_estimate_zero() {
let estimate = SelectivityEstimate::zero();
assert_eq!(estimate.selectivity, 0.01); assert_eq!(estimate.sample_size, 0);
assert_eq!(estimate.passed, 0);
}
#[test]
fn test_selectivity_estimate_full() {
let estimate = SelectivityEstimate::full();
assert_eq!(estimate.selectivity, 1.0);
}
#[test]
fn test_selectivity_estimate_confidence() {
let estimate = SelectivityEstimate::new(0.5, 100, 50);
assert_eq!(estimate.confidence(), 1.0);
let estimate = SelectivityEstimate::new(0.5, 50, 25);
assert_eq!(estimate.confidence(), 0.5);
let estimate = SelectivityEstimate::new(0.5, 0, 0);
assert_eq!(estimate.confidence(), 0.0);
}
use proptest::prelude::*;
proptest! {
#[test]
fn prop_selectivity_always_clamped(raw_selectivity in -100.0f32..100.0f32) {
let estimate = SelectivityEstimate::new(raw_selectivity, 100, 50);
prop_assert!(estimate.selectivity >= 0.0, "Selectivity should be >= 0.0");
prop_assert!(estimate.selectivity <= 1.0, "Selectivity should be <= 1.0");
}
#[test]
fn prop_oversample_always_bounded(selectivity in 0.0f32..=1.0f32) {
let oversample = calculate_oversample(selectivity);
prop_assert!(oversample >= 1.0, "Oversample should be >= 1.0, got {} for selectivity {}", oversample, selectivity);
prop_assert!(oversample <= MAX_OVERSAMPLE, "Oversample should be <= MAX_OVERSAMPLE, got {} for selectivity {}", oversample, selectivity);
}
#[test]
fn prop_tautology_implies_not_contradiction(b in proptest::bool::ANY) {
let expr = FilterExpr::LiteralBool(b);
if is_tautology(&expr) {
prop_assert!(!is_contradiction(&expr), "Tautology cannot be contradiction");
}
}
#[test]
fn prop_contradiction_implies_not_tautology(b in proptest::bool::ANY) {
let expr = FilterExpr::LiteralBool(b);
if is_contradiction(&expr) {
prop_assert!(!is_tautology(&expr), "Contradiction cannot be tautology");
}
}
#[test]
fn prop_strategy_selection_deterministic(selectivity in 0.0f32..1.0f32) {
let strategy1 = select_strategy(selectivity);
let strategy2 = select_strategy(selectivity);
prop_assert_eq!(strategy1, strategy2, "Strategy selection should be deterministic");
}
}
}