use fsqlite_types::value::SqliteValue;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct HistogramBucket {
pub lower: SqliteValue,
pub upper: SqliteValue,
pub count: u64,
pub ndv: u64,
}
impl HistogramBucket {
pub fn contains(&self, value: &SqliteValue) -> bool {
value >= &self.lower && value <= &self.upper
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct Histogram {
pub buckets: Vec<HistogramBucket>,
}
impl Histogram {
pub fn estimate_equality_rows(&self, value: &SqliteValue) -> f64 {
for bucket in &self.buckets {
if bucket.contains(value) {
let ndv = bucket.ndv.max(1) as f64;
return bucket.count as f64 / ndv;
}
}
1.0
}
pub fn estimate_less_than_rows(&self, value: &SqliteValue) -> f64 {
let mut count = 0.0;
for bucket in &self.buckets {
if value > &bucket.upper {
count += bucket.count as f64;
} else if value <= &bucket.lower {
break;
} else {
let fraction = interpolate_position(&bucket.lower, &bucket.upper, value);
count = (bucket.count as f64).mul_add(fraction, count);
break;
}
}
count
}
pub fn estimate_greater_than_rows(&self, value: &SqliteValue) -> f64 {
let mut count = 0.0;
for bucket in self.buckets.iter().rev() {
if value < &bucket.lower {
count += bucket.count as f64;
} else if value >= &bucket.upper {
break;
} else {
let fraction = 1.0 - interpolate_position(&bucket.lower, &bucket.upper, value);
count = (bucket.count as f64).mul_add(fraction, count);
break;
}
}
count
}
}
fn bytes_to_fraction(bytes: &[u8]) -> f64 {
let mut fraction = 0.0;
let mut weight = 1.0 / 256.0;
for &b in bytes.iter().take(8) {
fraction = f64::from(b).mul_add(weight, fraction);
weight /= 256.0;
}
fraction
}
fn interpolate_position(min: &SqliteValue, max: &SqliteValue, val: &SqliteValue) -> f64 {
match (min, max, val) {
(SqliteValue::Integer(min_i), SqliteValue::Integer(max_i), SqliteValue::Integer(val_i)) => {
if max_i <= min_i {
return 0.5;
}
let range = *max_i as f64 - *min_i as f64;
let offset = *val_i as f64 - *min_i as f64;
let fraction = offset / range;
if fraction.is_nan() {
0.5
} else {
fraction.clamp(0.0, 1.0)
}
}
(SqliteValue::Float(min_f), SqliteValue::Float(max_f), SqliteValue::Float(val_f)) => {
if max_f <= min_f || min_f.is_nan() || max_f.is_nan() || val_f.is_nan() {
return 0.5;
}
let range = max_f - min_f;
let offset = val_f - min_f;
let fraction = offset / range;
if fraction.is_nan() {
0.5
} else {
fraction.clamp(0.0, 1.0)
}
}
(SqliteValue::Text(min_s), SqliteValue::Text(max_s), SqliteValue::Text(val_s)) => {
if max_s <= min_s {
return 0.5;
}
let min_frac = bytes_to_fraction(min_s.as_bytes());
let max_frac = bytes_to_fraction(max_s.as_bytes());
let val_frac = bytes_to_fraction(val_s.as_bytes());
let range = max_frac - min_frac;
if range <= 0.0 {
return 0.5;
}
let offset = val_frac - min_frac;
(offset / range).clamp(0.0, 1.0)
}
(SqliteValue::Blob(min_b), SqliteValue::Blob(max_b), SqliteValue::Blob(val_b)) => {
if max_b <= min_b {
return 0.5;
}
let min_frac = bytes_to_fraction(min_b);
let max_frac = bytes_to_fraction(max_b);
let val_frac = bytes_to_fraction(val_b);
let range = max_frac - min_frac;
if range <= 0.0 {
return 0.5;
}
let offset = val_frac - min_frac;
(offset / range).clamp(0.0, 1.0)
}
_ => 0.5,
}
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
pub struct ColumnStats {
pub table_row_count: u64,
pub null_count: u64,
pub ndv: u64,
pub min_value: Option<SqliteValue>,
pub max_value: Option<SqliteValue>,
pub avg_width: f64,
pub histogram: Option<Histogram>,
}
impl ColumnStats {
pub fn estimate_selectivity(&self, op: &Operator, value: &SqliteValue) -> f64 {
if self.table_row_count == 0 {
return 0.0;
}
let non_null_count = self.table_row_count.saturating_sub(self.null_count) as f64;
if non_null_count <= 0.0 {
return 0.0;
}
let estimated_matches = match op {
Operator::Eq => {
if let Some(hist) = &self.histogram {
hist.estimate_equality_rows(value)
} else {
let ndv = self.ndv.max(1) as f64;
non_null_count / ndv
}
}
Operator::Lt => {
if let Some(hist) = &self.histogram {
hist.estimate_less_than_rows(value)
} else {
non_null_count / 3.0
}
}
Operator::Gt => {
if let Some(hist) = &self.histogram {
hist.estimate_greater_than_rows(value)
} else {
non_null_count / 3.0
}
}
Operator::Le => {
let lt = if let Some(hist) = &self.histogram {
hist.estimate_less_than_rows(value)
} else {
non_null_count / 3.0
};
let eq = if let Some(hist) = &self.histogram {
hist.estimate_equality_rows(value)
} else {
let ndv = self.ndv.max(1) as f64;
non_null_count / ndv
};
lt + eq
}
Operator::Ge => {
let gt = if let Some(hist) = &self.histogram {
hist.estimate_greater_than_rows(value)
} else {
non_null_count / 3.0
};
let eq = if let Some(hist) = &self.histogram {
hist.estimate_equality_rows(value)
} else {
let ndv = self.ndv.max(1) as f64;
non_null_count / ndv
};
gt + eq
}
Operator::Ne => {
let eq_matches = if let Some(hist) = &self.histogram {
hist.estimate_equality_rows(value)
} else {
let ndv = self.ndv.max(1) as f64;
non_null_count / ndv
};
(non_null_count - eq_matches).max(0.0)
}
_ => non_null_count * 0.1, };
if self.table_row_count == 0 {
return 0.0;
}
let fraction = estimated_matches / self.table_row_count as f64;
if fraction.is_nan() {
0.0
} else {
fraction.clamp(0.0, 1.0)
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Operator {
Eq,
Ne,
Lt,
Le,
Gt,
Ge,
Like,
Glob,
Is,
IsNot,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct TableStatistics {
pub row_count: u64,
pub columns: HashMap<String, ColumnStats>,
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Stat1Summary {
pub n_rows: u64,
pub per_column_distinct: Vec<u64>,
}
#[must_use]
pub fn parse_stat1(stat: &str) -> Option<Stat1Summary> {
let mut parts = stat.split_ascii_whitespace();
let first = parts.next()?;
let n_rows: u64 = first.parse().ok()?;
let per_column_distinct: Vec<u64> = parts.filter_map(|t| t.parse::<u64>().ok()).collect();
Some(Stat1Summary {
n_rows,
per_column_distinct,
})
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EstimationMethod {
Histogram,
Ndv,
Sampling,
Heuristic,
}
#[derive(Debug, Clone)]
pub struct CardinalityEstimate {
pub estimated_rows: f64,
pub selectivity: f64,
pub method: EstimationMethod,
}
impl ColumnStats {
pub fn estimate_cardinality(
&self,
op: &Operator,
value: &SqliteValue,
sample: Option<&[SqliteValue]>,
) -> CardinalityEstimate {
let rows = self.table_row_count as f64;
if rows <= 0.0 {
return CardinalityEstimate {
estimated_rows: 0.0,
selectivity: 0.0,
method: EstimationMethod::Heuristic,
};
}
if self.histogram.is_some() {
let sel = self.estimate_selectivity(op, value);
let span = tracing::debug_span!(
target: "fsqlite.planner",
"cost_estimate",
table = tracing::field::Empty,
estimated_rows = (sel * rows),
actual_method = "histogram",
);
let _g = span.enter();
return CardinalityEstimate {
estimated_rows: sel * rows,
selectivity: sel,
method: EstimationMethod::Histogram,
};
}
if let Some(sample) = sample {
if !sample.is_empty() {
let matching = sample
.iter()
.filter(|sv| cmp_matches(sv, *op, value))
.count();
let sel = (matching as f64 / sample.len() as f64).clamp(0.0, 1.0);
let span = tracing::debug_span!(
target: "fsqlite.planner",
"cost_estimate",
table = tracing::field::Empty,
estimated_rows = (sel * rows),
actual_method = "sampling",
);
let _g = span.enter();
return CardinalityEstimate {
estimated_rows: sel * rows,
selectivity: sel,
method: EstimationMethod::Sampling,
};
}
}
if matches!(op, Operator::Eq | Operator::Is) && self.ndv > 0 {
let sel = 1.0 / self.ndv as f64;
let span = tracing::debug_span!(
target: "fsqlite.planner",
"cost_estimate",
table = tracing::field::Empty,
estimated_rows = (sel * rows),
actual_method = "ndv",
);
let _g = span.enter();
return CardinalityEstimate {
estimated_rows: sel * rows,
selectivity: sel,
method: EstimationMethod::Ndv,
};
}
let sel = default_selectivity(*op);
let span = tracing::debug_span!(
target: "fsqlite.planner",
"cost_estimate",
table = tracing::field::Empty,
estimated_rows = (sel * rows),
actual_method = "heuristic",
);
let _g = span.enter();
CardinalityEstimate {
estimated_rows: sel * rows,
selectivity: sel,
method: EstimationMethod::Heuristic,
}
}
}
fn default_selectivity(op: Operator) -> f64 {
match op {
Operator::Eq | Operator::Is => 0.01, Operator::Ne | Operator::IsNot => 0.99,
Operator::Lt | Operator::Le | Operator::Gt | Operator::Ge => 1.0 / 3.0,
Operator::Like | Operator::Glob => 0.1,
}
}
fn cmp_matches(sample_val: &SqliteValue, op: Operator, probe: &SqliteValue) -> bool {
let ord = sample_val.partial_cmp(probe);
match op {
Operator::Eq | Operator::Is => ord == Some(Ordering::Equal),
Operator::Ne | Operator::IsNot => ord != Some(Ordering::Equal),
Operator::Lt => ord == Some(Ordering::Less),
Operator::Le => matches!(ord, Some(Ordering::Less | Ordering::Equal)),
Operator::Gt => ord == Some(Ordering::Greater),
Operator::Ge => matches!(ord, Some(Ordering::Greater | Ordering::Equal)),
Operator::Like | Operator::Glob => false, }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_histogram_interpolation_integer() {
let bucket = HistogramBucket {
lower: SqliteValue::Integer(0),
upper: SqliteValue::Integer(100),
count: 100,
ndv: 100,
};
let hist = Histogram {
buckets: vec![bucket],
};
let est = hist.estimate_less_than_rows(&SqliteValue::Integer(50));
assert!((est - 50.0).abs() < 1.0);
}
#[test]
fn test_histogram_bucket_contains_is_inclusive_on_both_ends() {
let int_bucket = HistogramBucket {
lower: SqliteValue::Integer(10),
upper: SqliteValue::Integer(20),
count: 5,
ndv: 3,
};
assert!(
int_bucket.contains(&SqliteValue::Integer(10)),
"lower bound is inclusive"
);
assert!(
int_bucket.contains(&SqliteValue::Integer(20)),
"upper bound is inclusive"
);
assert!(int_bucket.contains(&SqliteValue::Integer(15)));
assert!(!int_bucket.contains(&SqliteValue::Integer(9)));
assert!(!int_bucket.contains(&SqliteValue::Integer(21)));
let point = HistogramBucket {
lower: SqliteValue::Integer(5),
upper: SqliteValue::Integer(5),
count: 1,
ndv: 1,
};
assert!(point.contains(&SqliteValue::Integer(5)));
assert!(!point.contains(&SqliteValue::Integer(4)));
assert!(!point.contains(&SqliteValue::Integer(6)));
let float_bucket = HistogramBucket {
lower: SqliteValue::Float(1.0),
upper: SqliteValue::Float(2.0),
count: 4,
ndv: 4,
};
assert!(float_bucket.contains(&SqliteValue::Float(1.0)));
assert!(float_bucket.contains(&SqliteValue::Float(2.0)));
assert!(float_bucket.contains(&SqliteValue::Float(1.5)));
assert!(!float_bucket.contains(&SqliteValue::Float(0.99)));
assert!(!float_bucket.contains(&SqliteValue::Float(2.01)));
}
#[test]
fn test_bytes_to_fraction_base256_encoding() {
let bf = bytes_to_fraction;
let approx = |a: f64, b: f64| (a - b).abs() < 1e-12;
assert!(approx(bf(b""), 0.0));
assert!(approx(bf(&[0x80]), 0.5));
assert!(approx(bf(&[0x40]), 0.25));
assert!(approx(bf(&[0xFF]), 255.0 / 256.0));
assert!(approx(bf(&[0x00, 0x80]), 128.0 / 65536.0)); assert!(approx(bf(&[0x80, 0x80]), 0.5 + 128.0 / 65536.0));
assert!(approx(bf(&[0xFF; 9]), bf(&[0xFF; 8])));
assert!(bf(&[0x02]) > bf(&[0x01]));
assert!(bf(&[0x01]) > bf(&[0x00, 0xFF, 0xFF]));
let max8 = bf(&[0xFF; 8]);
assert!(
(0.0..1.0).contains(&max8),
"eight 0xFF bytes must stay below 1.0, got {max8}"
);
}
#[test]
fn test_interpolate_position_clamps_handles_degenerate_and_mixed_types() {
use SqliteValue::{Float, Integer};
let approx = |a: f64, b: f64| (a - b).abs() < 1e-9;
assert!(approx(
interpolate_position(&Integer(0), &Integer(100), &Integer(50)),
0.5
));
assert!(approx(
interpolate_position(&Integer(0), &Integer(100), &Integer(0)),
0.0
));
assert!(approx(
interpolate_position(&Integer(0), &Integer(100), &Integer(100)),
1.0
));
assert!(
approx(
interpolate_position(&Integer(0), &Integer(100), &Integer(-50)),
0.0
),
"below min clamps to 0"
);
assert!(
approx(
interpolate_position(&Integer(0), &Integer(100), &Integer(200)),
1.0
),
"above max clamps to 1"
);
assert!(approx(
interpolate_position(&Integer(50), &Integer(50), &Integer(50)),
0.5
));
assert!(approx(
interpolate_position(&Integer(100), &Integer(0), &Integer(50)),
0.5
));
assert!(approx(
interpolate_position(&Float(0.0), &Float(10.0), &Float(2.5)),
0.25
));
assert!(
approx(
interpolate_position(&Float(0.0), &Float(10.0), &Float(f64::NAN)),
0.5
),
"NaN value -> 0.5"
);
assert!(
approx(
interpolate_position(&Float(f64::NAN), &Float(10.0), &Float(5.0)),
0.5
),
"NaN min -> 0.5"
);
assert!(approx(
interpolate_position(&Integer(0), &Float(10.0), &Integer(5)),
0.5
));
}
#[test]
fn test_interpolate_position_text_and_blob_branches() {
let approx = |a: f64, b: f64| (a - b).abs() < 1e-9;
let t = |s: &str| SqliteValue::from(s);
assert!(approx(interpolate_position(&t("A"), &t("C"), &t("B")), 0.5));
assert!(approx(interpolate_position(&t("A"), &t("C"), &t("A")), 0.0));
assert!(approx(interpolate_position(&t("A"), &t("C"), &t("C")), 1.0));
assert!(approx(interpolate_position(&t("A"), &t("C"), &t("@")), 0.0)); assert!(approx(interpolate_position(&t("A"), &t("C"), &t("D")), 1.0)); assert!(approx(interpolate_position(&t("C"), &t("A"), &t("B")), 0.5));
assert!(approx(interpolate_position(&t("B"), &t("B"), &t("B")), 0.5));
let b = |bytes: &[u8]| SqliteValue::from(bytes);
assert!(approx(
interpolate_position(&b(&[0x00]), &b(&[0x80]), &b(&[0x40])),
0.5
));
assert!(approx(
interpolate_position(&b(&[0x80]), &b(&[0x00]), &b(&[0x40])),
0.5
));
assert!(approx(
interpolate_position(&b(&[0x01]), &b(&[0x01, 0x00]), &b(&[0x01])),
0.5
));
}
#[test]
fn test_histogram_equality_and_range_estimates_multi_bucket() {
let hist = Histogram {
buckets: vec![
HistogramBucket {
lower: SqliteValue::Integer(0),
upper: SqliteValue::Integer(99),
count: 100,
ndv: 10,
},
HistogramBucket {
lower: SqliteValue::Integer(100),
upper: SqliteValue::Integer(199),
count: 200,
ndv: 50,
},
],
};
let total = 300.0;
let iv = SqliteValue::Integer;
assert!((hist.estimate_equality_rows(&iv(50)) - 10.0).abs() < f64::EPSILON); assert!((hist.estimate_equality_rows(&iv(0)) - 10.0).abs() < f64::EPSILON); assert!((hist.estimate_equality_rows(&iv(99)) - 10.0).abs() < f64::EPSILON); assert!((hist.estimate_equality_rows(&iv(100)) - 4.0).abs() < f64::EPSILON); assert!((hist.estimate_equality_rows(&iv(150)) - 4.0).abs() < f64::EPSILON);
assert!((hist.estimate_equality_rows(&iv(10_000)) - 1.0).abs() < f64::EPSILON);
assert!((hist.estimate_less_than_rows(&iv(-100)) - 0.0).abs() < f64::EPSILON);
assert!((hist.estimate_less_than_rows(&iv(10_000)) - total).abs() < f64::EPSILON);
assert!((hist.estimate_greater_than_rows(&iv(10_000)) - 0.0).abs() < f64::EPSILON);
assert!((hist.estimate_greater_than_rows(&iv(-100)) - total).abs() < f64::EPSILON);
let lt_lo = hist.estimate_less_than_rows(&iv(-100));
let lt_a = hist.estimate_less_than_rows(&iv(50));
let lt_b = hist.estimate_less_than_rows(&iv(150));
let lt_hi = hist.estimate_less_than_rows(&iv(10_000));
assert!(
lt_lo <= lt_a && lt_a <= lt_b && lt_b <= lt_hi,
"less_than must be monotonic: {lt_lo} <= {lt_a} <= {lt_b} <= {lt_hi}"
);
for n in [-100_i64, 0, 50, 99, 100, 150, 199, 10_000] {
let lt = hist.estimate_less_than_rows(&iv(n));
let gt = hist.estimate_greater_than_rows(&iv(n));
assert!(
(0.0..=total).contains(<),
"less_than({n})={lt} out of [0,{total}]"
);
assert!(
(0.0..=total).contains(>),
"greater_than({n})={gt} out of [0,{total}]"
);
}
}
#[test]
fn test_selectivity_defaults() {
let stats = ColumnStats {
table_row_count: 1000,
null_count: 0,
ndv: 100,
min_value: Some(SqliteValue::Integer(0)),
max_value: Some(SqliteValue::Integer(1000)),
avg_width: 8.0,
histogram: None,
};
let sel = stats.estimate_selectivity(&Operator::Eq, &SqliteValue::Integer(50));
assert!((sel - 0.01).abs() < 0.001);
let sel = stats.estimate_selectivity(&Operator::Gt, &SqliteValue::Integer(50));
assert!((sel - 0.333).abs() < 0.001);
}
#[test]
fn test_estimate_selectivity_operator_dispatch_and_null_handling() {
let base = ColumnStats {
table_row_count: 1000,
null_count: 0,
ndv: 100,
min_value: Some(SqliteValue::Integer(0)),
max_value: Some(SqliteValue::Integer(1000)),
avg_width: 8.0,
histogram: None,
};
let v = SqliteValue::Integer(50);
let sel = |stats: &ColumnStats, op: Operator| stats.estimate_selectivity(&op, &v);
let eq = sel(&base, Operator::Eq);
let ne = sel(&base, Operator::Ne);
let lt = sel(&base, Operator::Lt);
let gt = sel(&base, Operator::Gt);
let le = sel(&base, Operator::Le);
let ge = sel(&base, Operator::Ge);
assert!((eq - 0.01).abs() < 1e-9, "Eq = 1/ndv = 0.01, got {eq}");
assert!((lt - gt).abs() < 1e-12, "Lt and Gt share the 1/3 default");
assert!((lt - 1.0 / 3.0).abs() < 1e-9, "Lt = 1/3 default, got {lt}");
assert!(
(eq + ne - 1.0).abs() < 1e-9,
"Eq + Ne should be 1.0 with no NULLs, got {}",
eq + ne
);
assert!(((le - lt) - eq).abs() < 1e-9, "Le - Lt should equal Eq");
assert!(((ge - gt) - eq).abs() < 1e-9, "Ge - Gt should equal Eq");
let with_nulls = ColumnStats {
null_count: 200,
..base.clone()
};
let eq_n = sel(&with_nulls, Operator::Eq);
let ne_n = sel(&with_nulls, Operator::Ne);
assert!(
(eq_n - 0.008).abs() < 1e-9,
"Eq with 200/1000 NULLs = (800/100)/1000 = 0.008, got {eq_n}"
);
assert!(
(eq_n + ne_n - 0.8).abs() < 1e-9,
"Eq+Ne should equal the non-NULL fraction 0.8, got {}",
eq_n + ne_n
);
let empty = ColumnStats {
table_row_count: 0,
..base.clone()
};
let all_null = ColumnStats {
table_row_count: 500,
null_count: 500,
..base.clone()
};
for op in [
Operator::Eq,
Operator::Ne,
Operator::Lt,
Operator::Le,
Operator::Gt,
Operator::Ge,
] {
assert!(
sel(&empty, op).abs() < f64::EPSILON,
"empty table -> 0 selectivity"
);
assert!(
sel(&all_null, op).abs() < f64::EPSILON,
"all-NULL -> 0 selectivity"
);
for stats in [&base, &with_nulls] {
let s = sel(stats, op);
assert!((0.0..=1.0).contains(&s), "selectivity {s} out of [0,1]");
}
}
}
#[test]
fn test_cardinality_estimate_histogram_preferred() {
let hist = Histogram {
buckets: vec![HistogramBucket {
lower: SqliteValue::Integer(0),
upper: SqliteValue::Integer(100),
count: 1000,
ndv: 100,
}],
};
let stats = ColumnStats {
table_row_count: 1000,
null_count: 0,
ndv: 100,
min_value: Some(SqliteValue::Integer(0)),
max_value: Some(SqliteValue::Integer(100)),
avg_width: 8.0,
histogram: Some(hist),
};
let est = stats.estimate_cardinality(
&Operator::Eq,
&SqliteValue::Integer(50),
Some(&[SqliteValue::Integer(50)]), );
assert_eq!(est.method, EstimationMethod::Histogram);
assert!(est.estimated_rows > 0.0);
}
#[test]
fn test_cardinality_estimate_sampling_fallback() {
let stats = ColumnStats {
table_row_count: 1000,
null_count: 0,
ndv: 0,
min_value: None,
max_value: None,
avg_width: 0.0,
histogram: None,
};
let sample: Vec<SqliteValue> = (0..10)
.map(|i| SqliteValue::Integer(if i < 3 { 42 } else { i + 100 }))
.collect();
let est =
stats.estimate_cardinality(&Operator::Eq, &SqliteValue::Integer(42), Some(&sample));
assert_eq!(est.method, EstimationMethod::Sampling);
assert!((est.selectivity - 0.3).abs() < 0.01);
assert!((est.estimated_rows - 300.0).abs() < 1.0);
}
#[test]
fn test_cardinality_estimate_ndv_fallback() {
let stats = ColumnStats {
table_row_count: 1000,
null_count: 0,
ndv: 50,
min_value: None,
max_value: None,
avg_width: 0.0,
histogram: None,
};
let est = stats.estimate_cardinality(&Operator::Eq, &SqliteValue::Integer(42), None);
assert_eq!(est.method, EstimationMethod::Ndv);
assert!((est.selectivity - 0.02).abs() < 0.001);
assert!((est.estimated_rows - 20.0).abs() < 0.1);
}
#[test]
fn test_estimate_cardinality_empty_table_and_ndv_only_for_equality() {
let empty = ColumnStats {
table_row_count: 0,
ndv: 100,
..ColumnStats::default()
};
let est = empty.estimate_cardinality(&Operator::Eq, &SqliteValue::Integer(1), None);
assert!(est.estimated_rows.abs() < f64::EPSILON);
assert!(est.selectivity.abs() < f64::EPSILON);
assert_eq!(est.method, EstimationMethod::Heuristic);
let stats = ColumnStats {
table_row_count: 1000,
ndv: 50,
..ColumnStats::default()
};
let eq = stats.estimate_cardinality(&Operator::Eq, &SqliteValue::Integer(1), None);
assert_eq!(eq.method, EstimationMethod::Ndv, "equality uses NDV");
let lt = stats.estimate_cardinality(&Operator::Lt, &SqliteValue::Integer(1), None);
assert_eq!(
lt.method,
EstimationMethod::Heuristic,
"a range op falls to the heuristic, not NDV"
);
for est in [&eq, <] {
assert!(
est.selectivity.mul_add(-1000.0, est.estimated_rows).abs() < 1e-6,
"estimated_rows must equal selectivity * row_count"
);
}
}
#[test]
fn test_cardinality_estimate_heuristic_fallback() {
let stats = ColumnStats {
table_row_count: 1000,
null_count: 0,
ndv: 0,
min_value: None,
max_value: None,
avg_width: 0.0,
histogram: None,
};
let est = stats.estimate_cardinality(&Operator::Gt, &SqliteValue::Integer(42), None);
assert_eq!(est.method, EstimationMethod::Heuristic);
assert!((est.selectivity - 1.0 / 3.0).abs() < 0.01);
}
#[test]
fn test_default_selectivity_values() {
assert!((default_selectivity(Operator::Eq) - 0.01).abs() < 0.001);
assert!((default_selectivity(Operator::Ne) - 0.99).abs() < 0.001);
assert!((default_selectivity(Operator::Lt) - 0.333).abs() < 0.001);
assert!((default_selectivity(Operator::Like) - 0.1).abs() < 0.001);
let eps = 1e-9;
assert!(
(default_selectivity(Operator::Is) - default_selectivity(Operator::Eq)).abs() < eps
);
assert!(
(default_selectivity(Operator::IsNot) - default_selectivity(Operator::Ne)).abs() < eps
);
assert!(
(default_selectivity(Operator::Glob) - default_selectivity(Operator::Like)).abs() < eps
);
for op in [Operator::Le, Operator::Gt, Operator::Ge] {
assert!((default_selectivity(op) - default_selectivity(Operator::Lt)).abs() < eps);
}
}
#[test]
fn test_cmp_matches() {
let v50 = SqliteValue::Integer(50);
let v100 = SqliteValue::Integer(100);
assert!(cmp_matches(&v50, Operator::Eq, &v50));
assert!(!cmp_matches(&v50, Operator::Eq, &v100));
assert!(cmp_matches(&v50, Operator::Lt, &v100));
assert!(!cmp_matches(&v100, Operator::Lt, &v50));
assert!(cmp_matches(&v50, Operator::Le, &v50));
assert!(cmp_matches(&v100, Operator::Gt, &v50));
assert!(cmp_matches(&v100, Operator::Ge, &v100));
assert!(cmp_matches(&v50, Operator::Ne, &v100));
assert!(cmp_matches(&v50, Operator::Is, &v50));
assert!(!cmp_matches(&v50, Operator::Is, &v100));
assert!(cmp_matches(&v50, Operator::IsNot, &v100));
assert!(!cmp_matches(&v50, Operator::IsNot, &v50));
assert!(cmp_matches(&v50, Operator::Le, &v100));
assert!(cmp_matches(&v100, Operator::Ge, &v50));
assert!(!cmp_matches(&v50, Operator::Like, &v50));
assert!(!cmp_matches(&v50, Operator::Glob, &v50));
}
#[test]
fn test_estimation_method_hierarchy() {
let stats = ColumnStats {
table_row_count: 1000,
null_count: 0,
ndv: 50,
min_value: None,
max_value: None,
avg_width: 0.0,
histogram: None,
};
let sample = vec![SqliteValue::Integer(42); 10];
let est =
stats.estimate_cardinality(&Operator::Eq, &SqliteValue::Integer(42), Some(&sample));
assert_eq!(est.method, EstimationMethod::Sampling);
assert!((est.selectivity - 1.0).abs() < 0.01);
}
#[test]
fn parse_stat1_table_only_row() {
let parsed = parse_stat1("12345").unwrap();
assert_eq!(parsed.n_rows, 12345);
assert!(parsed.per_column_distinct.is_empty());
}
#[test]
fn parse_stat1_index_row_with_distinct_counts() {
let parsed = parse_stat1("1000 10 1").unwrap();
assert_eq!(parsed.n_rows, 1000);
assert_eq!(parsed.per_column_distinct, vec![10, 1]);
}
#[test]
fn parse_stat1_tolerates_extra_whitespace() {
let parsed = parse_stat1(" 500\t 20 5 ").unwrap();
assert_eq!(parsed.n_rows, 500);
assert_eq!(parsed.per_column_distinct, vec![20, 5]);
}
#[test]
fn parse_stat1_rejects_empty_and_non_numeric() {
assert!(parse_stat1("").is_none());
assert!(parse_stat1(" ").is_none());
assert!(parse_stat1("not-a-number 1 2").is_none());
}
#[test]
fn parse_stat1_drops_trailing_garbage() {
let parsed = parse_stat1("42 7 foo 3").unwrap();
assert_eq!(parsed.n_rows, 42);
assert_eq!(parsed.per_column_distinct, vec![7, 3]);
}
}