pub mod codegen;
pub mod decision_contract;
pub mod differential;
pub mod stats;
use decision_contract::access_path_kind_label;
use fsqlite_ast::{
BinaryOp as AstBinaryOp, ColumnRef, CompoundOp, Expr, FromClause, InSet, IndexHint,
JoinConstraint, JoinKind, LikeOp, Literal, NullsOrder, OrderingTerm, ResultColumn, SelectBody,
SelectCore, SortDirection, Span, TableOrSubquery,
};
use lru::LruCache;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt;
use std::num::NonZeroUsize;
use std::rc::Rc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{LazyLock, Mutex};
use xxhash_rust::xxh3::xxh3_64_with_seed;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ResolvedCompoundOrderBy {
pub column_idx: usize,
pub direction: Option<SortDirection>,
pub collation: Option<String>,
pub nulls: Option<NullsOrder>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CompoundOrderByError {
ColumnNotFound { name: String, span: Span },
IndexOutOfRange {
index: usize,
num_columns: usize,
span: Span,
},
IndexZeroOrNegative { value: i64, span: Span },
ExpressionNotAllowed { span: Span },
}
impl std::fmt::Display for CompoundOrderByError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ColumnNotFound { name, .. } => {
write!(
f,
"1st ORDER BY term does not match any column in the result set: {name}"
)
}
Self::IndexOutOfRange {
index, num_columns, ..
} => {
write!(
f,
"ORDER BY column index {index} out of range (result has {num_columns} columns)"
)
}
Self::IndexZeroOrNegative { value, .. } => {
write!(
f,
"ORDER BY column index {value} out of range - must be positive"
)
}
Self::ExpressionNotAllowed { .. } => {
write!(
f,
"ORDER BY expression not allowed in compound SELECT - use column name or number"
)
}
}
}
}
impl std::error::Error for CompoundOrderByError {}
#[must_use]
pub fn extract_output_aliases(core: &SelectCore) -> Vec<Option<String>> {
match core {
SelectCore::Select { columns, .. } => columns
.iter()
.map(|rc| match rc {
ResultColumn::Expr { alias: Some(a), .. } => Some(a.clone()),
ResultColumn::Expr {
expr: Expr::Column(col_ref, _),
alias: None,
..
} => Some(col_ref.column.to_string()),
_ => None,
})
.collect(),
SelectCore::Values(rows) => {
let width = rows.first().map_or(0, Vec::len);
vec![None; width]
}
}
}
#[must_use]
pub fn count_output_columns(core: &SelectCore) -> usize {
match core {
SelectCore::Select { columns, .. } => columns.len(),
SelectCore::Values(rows) => rows.first().map_or(0, Vec::len),
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SingleTableProjectionError {
NotSelectCore,
MissingFromClause,
UnsupportedFromSource,
UnknownTableQualifier { qualifier: String },
ColumnNotFound { column: String },
}
impl fmt::Display for SingleTableProjectionError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NotSelectCore => write!(f, "projection resolution requires SELECT core"),
Self::MissingFromClause => write!(f, "projection resolution requires FROM clause"),
Self::UnsupportedFromSource => {
write!(f, "only single-table FROM without JOIN is supported")
}
Self::UnknownTableQualifier { qualifier } => {
write!(f, "unknown table qualifier: {qualifier}")
}
Self::ColumnNotFound { column } => write!(f, "column not found: {column}"),
}
}
}
impl std::error::Error for SingleTableProjectionError {}
pub fn resolve_single_table_result_columns(
core: &SelectCore,
table_columns: &[String],
) -> Result<Vec<ResultColumn>, SingleTableProjectionError> {
resolve_single_table_result_columns_with_options(core, table_columns, true)
}
pub fn resolve_single_table_result_columns_with_options(
core: &SelectCore,
table_columns: &[String],
supports_hidden_rowid: bool,
) -> Result<Vec<ResultColumn>, SingleTableProjectionError> {
let SelectCore::Select { columns, from, .. } = core else {
return Err(SingleTableProjectionError::NotSelectCore);
};
let from_clause = from
.as_ref()
.ok_or(SingleTableProjectionError::MissingFromClause)?;
let (table_name, table_alias) = single_table_source_name_and_alias(from_clause)?;
let mut resolved = Vec::new();
for result_col in columns {
match result_col {
ResultColumn::Star => {
for column_name in table_columns {
resolved.push(ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare(column_name.clone()), Span::ZERO),
alias: None,
});
}
}
ResultColumn::TableStar(qualifier) => {
if !qualifier_matches_table(&qualifier.name, table_name, table_alias) {
return Err(SingleTableProjectionError::UnknownTableQualifier {
qualifier: qualifier.to_string(),
});
}
for column_name in table_columns {
resolved.push(ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare(column_name.clone()), Span::ZERO),
alias: None,
});
}
}
ResultColumn::Expr {
expr: Expr::Column(col_ref, _),
..
} => {
if let Some(qualifier) = &col_ref.table {
if !qualifier_matches_table(qualifier, table_name, table_alias) {
return Err(SingleTableProjectionError::UnknownTableQualifier {
qualifier: qualifier.to_string(),
});
}
}
if !(column_exists_ignore_case(table_columns, &col_ref.column)
|| supports_hidden_rowid && is_rowid_alias_name(&col_ref.column))
{
return Err(SingleTableProjectionError::ColumnNotFound {
column: col_ref.column.to_string(),
});
}
resolved.push(result_col.clone());
}
ResultColumn::Expr { .. } => resolved.push(result_col.clone()),
}
}
Ok(resolved)
}
fn single_table_source_name_and_alias(
from_clause: &FromClause,
) -> Result<(&str, Option<&str>), SingleTableProjectionError> {
if !from_clause.joins.is_empty() {
return Err(SingleTableProjectionError::UnsupportedFromSource);
}
match &from_clause.source {
TableOrSubquery::Table { name, alias, .. } => Ok((&name.name, alias.as_deref())),
_ => Err(SingleTableProjectionError::UnsupportedFromSource),
}
}
fn column_exists_ignore_case(columns: &[String], name: &str) -> bool {
columns.iter().any(|c| c.eq_ignore_ascii_case(name))
}
fn qualifier_matches_table(qualifier: &str, table_name: &str, table_alias: Option<&str>) -> bool {
qualifier.eq_ignore_ascii_case(table_name)
|| table_alias.is_some_and(|alias| qualifier.eq_ignore_ascii_case(alias))
}
fn is_rowid_alias_name(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
lower == "rowid" || lower == "_rowid_" || lower == "oid"
}
pub fn resolve_compound_order_by(
body: &SelectBody,
order_by: &[OrderingTerm],
) -> Result<Vec<ResolvedCompoundOrderBy>, CompoundOrderByError> {
let mut all_aliases: Vec<Vec<Option<String>>> = Vec::with_capacity(1 + body.compounds.len());
all_aliases.push(extract_output_aliases(&body.select));
for (_, core) in &body.compounds {
all_aliases.push(extract_output_aliases(core));
}
let num_columns = count_output_columns(&body.select);
let mut resolved = Vec::with_capacity(order_by.len());
for term in order_by {
let (col_idx, collation) = resolve_single_term(&term.expr, &all_aliases, num_columns)?;
resolved.push(ResolvedCompoundOrderBy {
column_idx: col_idx,
direction: term.direction,
collation,
nulls: term.nulls,
});
}
Ok(resolved)
}
fn resolve_single_term(
expr: &Expr,
all_aliases: &[Vec<Option<String>>],
num_columns: usize,
) -> Result<(usize, Option<String>), CompoundOrderByError> {
match expr {
Expr::Literal(Literal::Integer(n), span) => {
if *n <= 0 {
return Err(CompoundOrderByError::IndexZeroOrNegative {
value: *n,
span: *span,
});
}
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
let idx = (*n as usize) - 1;
if idx >= num_columns {
return Err(CompoundOrderByError::IndexOutOfRange {
index: idx + 1,
num_columns,
span: *span,
});
}
Ok((idx, None))
}
Expr::Column(col_ref, span) => {
let name = &col_ref.column;
for aliases in all_aliases {
for (pos, alias_opt) in aliases.iter().enumerate() {
if let Some(alias) = alias_opt {
if alias.eq_ignore_ascii_case(name) {
return Ok((pos, None));
}
}
}
}
Err(CompoundOrderByError::ColumnNotFound {
name: name.to_string(),
span: *span,
})
}
Expr::Collate {
expr: inner,
collation,
..
} => {
let (idx, _) = resolve_single_term(inner, all_aliases, num_columns)?;
Ok((idx, Some(collation.clone())))
}
other => Err(CompoundOrderByError::ExpressionNotAllowed { span: other.span() }),
}
}
#[must_use]
pub fn is_compound(body: &SelectBody) -> bool {
!body.compounds.is_empty()
}
#[must_use]
pub fn compound_op_name(op: CompoundOp) -> &'static str {
match op {
CompoundOp::Union => "UNION",
CompoundOp::UnionAll => "UNION ALL",
CompoundOp::Intersect => "INTERSECT",
CompoundOp::Except => "EXCEPT",
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StatsSource {
Analyze,
Heuristic,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TableStats {
pub name: String,
pub n_pages: u64,
pub n_rows: u64,
pub source: StatsSource,
}
#[derive(Debug, Clone, PartialEq)]
pub struct IndexInfo {
pub name: String,
pub table: String,
pub columns: Vec<String>,
pub unique: bool,
pub n_pages: u64,
pub source: StatsSource,
pub partial_where: Option<Expr>,
pub expression_columns: Vec<Expr>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RowidAliasHint {
pub qualifier: Option<String>,
pub column: String,
}
impl RowidAliasHint {
#[must_use]
pub fn new(column: impl Into<String>) -> Self {
Self {
qualifier: None,
column: column.into(),
}
}
#[must_use]
pub fn qualified(qualifier: impl Into<String>, column: impl Into<String>) -> Self {
Self {
qualifier: Some(qualifier.into()),
column: column.into(),
}
}
fn matches_column(&self, table_name: &str, column: &WhereColumn) -> bool {
if !column.column.eq_ignore_ascii_case(&self.column) {
return false;
}
match (column.table.as_deref(), self.qualifier.as_deref()) {
(None, _) => true,
(Some(column_qualifier), Some(hint_qualifier)) => {
column_qualifier.eq_ignore_ascii_case(hint_qualifier)
}
(Some(column_qualifier), None) => column_qualifier.eq_ignore_ascii_case(table_name),
}
}
}
#[derive(Debug, Clone, PartialEq)]
#[allow(clippy::derive_partial_eq_without_eq)]
pub enum AccessPathKind {
FullTableScan,
IndexScanRange { selectivity: f64 },
IndexScanEquality,
CoveringIndexScan { selectivity: f64 },
RowidLookup,
}
#[derive(Debug, Clone, PartialEq)]
#[allow(clippy::derive_partial_eq_without_eq)]
pub enum AccessPathProbe {
RowidEquality { target: Box<Expr> },
Equality { column: String, target: Box<Expr> },
Range {
column: String,
lower: Option<(Box<Expr>, bool)>,
upper: Option<(Box<Expr>, bool)>,
},
InList {
column: String,
values: Vec<Box<Expr>>,
},
}
#[derive(Debug, Clone, PartialEq)]
#[allow(clippy::derive_partial_eq_without_eq)]
pub struct AccessPath {
pub table: String,
pub kind: AccessPathKind,
pub index: Option<String>,
pub estimated_cost: f64,
pub estimated_rows: f64,
pub time_travel: Option<fsqlite_ast::TimeTravelClause>,
pub probe: Option<AccessPathProbe>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct MorselEligibility {
pub eligible: bool,
pub driving_table: Option<String>,
pub estimated_rows: f64,
pub morsel_count: u16,
pub rows_per_morsel: u64,
pub reason: MorselIneligibleReason,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MorselIneligibleReason {
None,
TooFewRows,
NoFullTableScan,
MultiTableJoin,
HasLimit,
CompoundQuery,
}
impl MorselEligibility {
const MIN_ROWS_FOR_MORSEL: f64 = 4096.0;
const DEFAULT_MORSEL_TARGET_ROWS: u64 = 1024;
const MAX_MORSELS: u16 = 64;
fn ineligible(reason: MorselIneligibleReason) -> Self {
Self {
eligible: false,
driving_table: None,
estimated_rows: 0.0,
morsel_count: 1,
rows_per_morsel: 0,
reason,
}
}
#[must_use]
pub fn evaluate(
plan: &QueryPlan,
has_limit: bool,
is_compound: bool,
available_workers: u16,
) -> Self {
if is_compound {
return Self::ineligible(MorselIneligibleReason::CompoundQuery);
}
if has_limit {
return Self::ineligible(MorselIneligibleReason::HasLimit);
}
if plan.join_order.len() != 1 {
return Self::ineligible(MorselIneligibleReason::MultiTableJoin);
}
let path = match plan.access_paths.first() {
Some(p) => p,
None => return Self::ineligible(MorselIneligibleReason::NoFullTableScan),
};
if !matches!(path.kind, AccessPathKind::FullTableScan) {
return Self::ineligible(MorselIneligibleReason::NoFullTableScan);
}
if path.estimated_rows < Self::MIN_ROWS_FOR_MORSEL {
return Self::ineligible(MorselIneligibleReason::TooFewRows);
}
let est_rows = path.estimated_rows as u64;
let workers = u64::from(available_workers.clamp(1, Self::MAX_MORSELS));
let rows_per_morsel = (est_rows / workers).max(Self::DEFAULT_MORSEL_TARGET_ROWS);
let morsel_count =
u16::try_from((est_rows / rows_per_morsel).max(1)).unwrap_or(Self::MAX_MORSELS);
Self {
eligible: true,
driving_table: Some(path.table.clone()),
estimated_rows: path.estimated_rows,
morsel_count,
rows_per_morsel,
reason: MorselIneligibleReason::None,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct QueryPlan {
pub join_order: Vec<String>,
pub access_paths: Vec<AccessPath>,
pub join_segments: Vec<JoinPlanSegment>,
pub total_cost: f64,
pub morsel_eligibility: Option<MorselEligibility>,
}
pub const DEFAULT_PLAN_CACHE_CAPACITY: usize = 128;
#[derive(Debug)]
pub struct QueryPlanner {
plan_cache: LruCache<u64, Rc<QueryPlan>>,
cached_schema_cookie: Option<u32>,
hot_plan_cache_key: Option<u64>,
hot_plan_cache_plan: Option<Rc<QueryPlan>>,
hot_plan_cache_needs_lru_touch: bool,
}
impl Default for QueryPlanner {
fn default() -> Self {
Self::new()
}
}
impl QueryPlanner {
#[must_use]
pub fn new() -> Self {
Self::with_plan_cache_capacity(DEFAULT_PLAN_CACHE_CAPACITY)
}
#[must_use]
pub fn with_plan_cache_capacity(capacity: usize) -> Self {
Self {
plan_cache: LruCache::new(normalize_plan_cache_capacity(capacity)),
cached_schema_cookie: None,
hot_plan_cache_key: None,
hot_plan_cache_plan: None,
hot_plan_cache_needs_lru_touch: false,
}
}
#[must_use]
pub fn plan_cache_len(&self) -> usize {
self.plan_cache.len()
}
#[must_use]
pub fn is_plan_cache_empty(&self) -> bool {
self.plan_cache.is_empty()
}
pub fn clear_plan_cache(&mut self) {
self.plan_cache.clear();
self.cached_schema_cookie = None;
self.clear_hot_plan_cache();
}
#[must_use]
pub fn cached_plan<F>(
&mut self,
sql_template: &str,
schema_cookie: u32,
build: F,
) -> Rc<QueryPlan>
where
F: FnOnce() -> QueryPlan,
{
self.invalidate_plan_cache_if_schema_cookie_changed(schema_cookie);
let key = plan_cache_key(sql_template, schema_cookie);
self.prepare_plan_cache_lookup(key);
if let Some(plan) = self.lookup_hot_plan_cache(key) {
return plan;
}
if let Some(plan) = self.plan_cache.get(&key).map(Rc::clone) {
return self.record_plan_cache_hit(key, plan);
}
let plan = Rc::new(build());
self.plan_cache.put(key, Rc::clone(&plan));
self.record_plan_cache_hit(key, plan)
}
#[allow(clippy::too_many_arguments)]
#[must_use]
pub fn order_joins_with_cache(
&mut self,
sql_template: &str,
schema_cookie: u32,
tables: &[TableStats],
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
cross_join_pairs: &[(String, String)],
table_index_hints: Option<&BTreeMap<String, IndexHint>>,
cracking_hints: Option<&mut CrackingHintStore>,
feature_flags: PlannerFeatureFlags,
) -> Rc<QueryPlan> {
if cracking_hints.is_some() {
return Rc::new(order_joins_with_hints_and_features(
tables,
indexes,
where_terms,
needed_columns,
cross_join_pairs,
table_index_hints,
cracking_hints,
feature_flags,
));
}
self.invalidate_plan_cache_if_schema_cookie_changed(schema_cookie);
let key = plan_cache_key_with_feature_flags(sql_template, schema_cookie, feature_flags);
self.prepare_plan_cache_lookup(key);
if let Some(plan) = self.lookup_hot_plan_cache(key) {
return plan;
}
if let Some(plan) = self.plan_cache.get(&key).map(Rc::clone) {
return self.record_plan_cache_hit(key, plan);
}
let plan = Rc::new(order_joins_with_hints_and_features(
tables,
indexes,
where_terms,
needed_columns,
cross_join_pairs,
table_index_hints,
cracking_hints,
feature_flags,
));
self.plan_cache.put(key, Rc::clone(&plan));
self.record_plan_cache_hit(key, plan)
}
fn invalidate_plan_cache_if_schema_cookie_changed(&mut self, schema_cookie: u32) {
if self
.cached_schema_cookie
.is_some_and(|cached| cached != schema_cookie)
{
self.plan_cache.clear();
self.clear_hot_plan_cache();
}
self.cached_schema_cookie = Some(schema_cookie);
}
fn prepare_plan_cache_lookup(&mut self, key: u64) {
if self
.hot_plan_cache_key
.is_some_and(|hot_key| hot_key != key)
{
self.flush_hot_plan_cache_lru_touch();
self.clear_hot_plan_cache();
}
}
fn lookup_hot_plan_cache(&mut self, key: u64) -> Option<Rc<QueryPlan>> {
if self.hot_plan_cache_key == Some(key) {
self.hot_plan_cache_needs_lru_touch = true;
return self.hot_plan_cache_plan.as_ref().map(Rc::clone);
}
None
}
fn record_plan_cache_hit(&mut self, key: u64, plan: Rc<QueryPlan>) -> Rc<QueryPlan> {
self.hot_plan_cache_key = Some(key);
self.hot_plan_cache_plan = Some(Rc::clone(&plan));
self.hot_plan_cache_needs_lru_touch = false;
plan
}
fn flush_hot_plan_cache_lru_touch(&mut self) {
if !self.hot_plan_cache_needs_lru_touch {
return;
}
if let Some(key) = self.hot_plan_cache_key {
let _ = self.plan_cache.get(&key);
}
self.hot_plan_cache_needs_lru_touch = false;
}
fn clear_hot_plan_cache(&mut self) {
self.hot_plan_cache_key = None;
self.hot_plan_cache_plan = None;
self.hot_plan_cache_needs_lru_touch = false;
}
}
fn normalize_plan_cache_capacity(capacity: usize) -> NonZeroUsize {
let normalized = capacity.max(1);
if let Some(capacity) = NonZeroUsize::new(normalized) {
capacity
} else {
unreachable!("cache capacity is clamped to a non-zero value");
}
}
const PLAN_CACHE_DIRECT_SEED_TAG: u64 = 0x5A00_0000_0000_0000;
const PLAN_CACHE_JOIN_SEED_TAG: u64 = 0xA500_0000_0000_0000;
const PLAN_CACHE_FEATURE_LEAPFROG: u64 = 1_u64 << 32;
const PLAN_CACHE_FEATURE_DPCCP: u64 = 1_u64 << 33;
fn plan_cache_key(sql_template: &str, schema_cookie: u32) -> u64 {
xxh3_64_with_seed(
sql_template.as_bytes(),
PLAN_CACHE_DIRECT_SEED_TAG | u64::from(schema_cookie),
)
}
fn plan_cache_key_with_feature_flags(
sql_template: &str,
schema_cookie: u32,
feature_flags: PlannerFeatureFlags,
) -> u64 {
let feature_mask = if feature_flags.leapfrog_join {
PLAN_CACHE_FEATURE_LEAPFROG
} else {
0
} | if feature_flags.dpccp_join {
PLAN_CACHE_FEATURE_DPCCP
} else {
0
};
xxh3_64_with_seed(
sql_template.as_bytes(),
PLAN_CACHE_JOIN_SEED_TAG | u64::from(schema_cookie) | feature_mask,
)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct PlannerFeatureFlags {
pub leapfrog_join: bool,
pub dpccp_join: bool,
}
#[allow(dead_code)]
const DPCCP_MAX_TABLES: usize = 8;
static FSQLITE_PLANNER_PLANS_ENUMERATED: AtomicU64 = AtomicU64::new(0);
#[must_use]
pub fn plans_enumerated_total() -> u64 {
FSQLITE_PLANNER_PLANS_ENUMERATED.load(Ordering::Relaxed)
}
pub fn reset_plans_enumerated() {
FSQLITE_PLANNER_PLANS_ENUMERATED.store(0, Ordering::Relaxed);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum JoinOperator {
HashJoin,
LeapfrogTriejoin,
}
impl JoinOperator {
#[must_use]
pub const fn label(self) -> &'static str {
match self {
Self::HashJoin => "HASH JOIN",
Self::LeapfrogTriejoin => "LEAPFROG TRIEJOIN",
}
}
}
#[derive(Debug, Clone, PartialEq)]
#[allow(clippy::derive_partial_eq_without_eq)]
pub struct JoinPlanSegment {
pub relations: Vec<String>,
pub operator: JoinOperator,
pub estimated_cost: f64,
pub reason: String,
}
impl fmt::Display for QueryPlan {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "QUERY PLAN (est. cost {:.1}):", self.total_cost)?;
for (i, ap) in self.access_paths.iter().enumerate() {
let idx_str = ap
.index
.as_deref()
.map_or(String::new(), |n| format!(" USING INDEX {n}"));
writeln!(
f,
" {i}: SCAN {}{idx_str} (~{:.0} rows, cost {:.1})",
ap.table, ap.estimated_rows, ap.estimated_cost
)?;
}
if !self.join_segments.is_empty() {
writeln!(f, "JOIN OPERATORS:")?;
for segment in &self.join_segments {
writeln!(
f,
" {} {} (est. {:.1}) [{}]",
segment.operator.label(),
segment.relations.join(" JOIN "),
segment.estimated_cost,
segment.reason
)?;
}
}
Ok(())
}
}
#[must_use]
pub fn estimate_cost(kind: &AccessPathKind, table_pages: u64, index_pages: u64) -> f64 {
estimate_cost_ext(kind, table_pages, index_pages, 0)
}
const ROW_DECODE_COST: f64 = 0.01;
const ROW_ACCESS_COST: f64 = 0.02;
#[must_use]
pub fn estimate_cost_ext(
kind: &AccessPathKind,
table_pages: u64,
index_pages: u64,
n_rows: u64,
) -> f64 {
let tp = table_pages.max(1) as f64;
let ip = index_pages.max(1) as f64;
let nr = n_rows as f64;
let cost = match kind {
AccessPathKind::FullTableScan => nr.mul_add(ROW_DECODE_COST, tp),
AccessPathKind::IndexScanRange { selectivity } => {
let page_cost = ip.log2() + selectivity * ip + selectivity * tp;
(selectivity * nr).mul_add(ROW_ACCESS_COST, page_cost)
}
AccessPathKind::IndexScanEquality => {
let page_cost = ip.log2() + tp.log2();
let matched_rows: f64 = if nr > 0.0 { 1.0 } else { 0.0 };
matched_rows.mul_add(ROW_ACCESS_COST, page_cost)
}
AccessPathKind::CoveringIndexScan { selectivity } => {
let page_cost = ip.log2() + selectivity * ip;
(selectivity * nr).mul_add(ROW_DECODE_COST, page_cost)
}
AccessPathKind::RowidLookup => {
let page_cost = tp.log2();
let matched_rows: f64 = if nr > 0.0 { 1.0 } else { 0.0 };
matched_rows.mul_add(ROW_ACCESS_COST, page_cost)
}
};
FSQLITE_PLANNER_COST_ESTIMATES_TOTAL.fetch_add(1, Ordering::Relaxed);
tracing::debug!(
target: "fsqlite.planner",
table_pages,
index_pages,
n_rows,
estimated_cost = cost,
actual_method = %access_path_metric_label(kind),
"cost_estimate"
);
cost
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TableRefWithStats {
pub name: String,
pub n_pages: u64,
pub n_rows: u64,
pub has_stats: bool,
}
impl TableRefWithStats {
#[must_use]
pub fn from_table_stats(stats: &TableStats) -> Self {
Self {
name: stats.name.clone(),
n_pages: stats.n_pages,
n_rows: stats.n_rows,
has_stats: matches!(stats.source, StatsSource::Analyze),
}
}
}
const JOIN_ORDER_EXHAUSTIVE_LIMIT: usize = 4;
#[must_use]
pub fn order_join_inputs_with_hints(tables: &[TableRefWithStats]) -> Vec<usize> {
let n = tables.len();
if n <= 1 {
return (0..n).collect();
}
if !tables.iter().any(|t| t.has_stats) {
return (0..n).collect();
}
let scan_cost = |idx: usize| -> f64 {
let t = &tables[idx];
estimate_cost_ext(&AccessPathKind::FullTableScan, t.n_pages, 0, t.n_rows)
};
if n <= JOIN_ORDER_EXHAUSTIVE_LIMIT {
let indices: Vec<usize> = (0..n).collect();
let mut best_perm = indices.clone();
let mut best_score = f64::INFINITY;
let mut scratch = indices.clone();
permute_scoring(
&mut scratch,
0,
n,
&scan_cost,
&mut best_score,
&mut best_perm,
);
best_perm
} else {
let mut indexed: Vec<(usize, f64)> = (0..n).map(|i| (i, scan_cost(i))).collect();
indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
indexed.into_iter().map(|(i, _)| i).collect()
}
}
fn permute_scoring<F>(
slice: &mut [usize],
k: usize,
n: usize,
scan_cost: &F,
best_score: &mut f64,
best_perm: &mut Vec<usize>,
) where
F: Fn(usize) -> f64,
{
if k == n {
let mut score = 0.0_f64;
for (i, &tbl_idx) in slice.iter().enumerate() {
let weight = (n - i) as f64;
score = scan_cost(tbl_idx).mul_add(weight, score);
}
if score < *best_score {
*best_score = score;
best_perm.clear();
best_perm.extend_from_slice(slice);
}
return;
}
for i in k..n {
slice.swap(k, i);
permute_scoring(slice, k + 1, n, scan_cost, best_score, best_perm);
slice.swap(k, i);
}
}
const ADAPTIVE_HINT_COST_BIAS: f64 = 0.90;
struct AccessPathSelectionCounters {
full_table_scan: AtomicU64,
index_scan_range: AtomicU64,
index_scan_equality: AtomicU64,
covering_index_scan: AtomicU64,
rowid_lookup: AtomicU64,
}
impl AccessPathSelectionCounters {
const fn new() -> Self {
Self {
full_table_scan: AtomicU64::new(0),
index_scan_range: AtomicU64::new(0),
index_scan_equality: AtomicU64::new(0),
covering_index_scan: AtomicU64::new(0),
rowid_lookup: AtomicU64::new(0),
}
}
fn counter_for(&self, kind: &AccessPathKind) -> &AtomicU64 {
match kind {
AccessPathKind::FullTableScan => &self.full_table_scan,
AccessPathKind::IndexScanRange { .. } => &self.index_scan_range,
AccessPathKind::IndexScanEquality => &self.index_scan_equality,
AccessPathKind::CoveringIndexScan { .. } => &self.covering_index_scan,
AccessPathKind::RowidLookup => &self.rowid_lookup,
}
}
fn snapshot(&self) -> BTreeMap<String, u64> {
[
(
"covering_index_scan",
self.covering_index_scan.load(Ordering::Relaxed),
),
(
"full_table_scan",
self.full_table_scan.load(Ordering::Relaxed),
),
(
"index_scan_equality",
self.index_scan_equality.load(Ordering::Relaxed),
),
(
"index_scan_range",
self.index_scan_range.load(Ordering::Relaxed),
),
("rowid_lookup", self.rowid_lookup.load(Ordering::Relaxed)),
]
.into_iter()
.map(|(label, count)| (label.to_owned(), count))
.collect()
}
}
static INDEX_SELECTION_TOTAL: AccessPathSelectionCounters = AccessPathSelectionCounters::new();
static FSQLITE_PLANNER_COST_ESTIMATES_TOTAL: AtomicU64 = AtomicU64::new(0);
static ESTIMATION_ERROR_OBSERVATIONS: LazyLock<Mutex<Vec<f64>>> =
LazyLock::new(|| Mutex::new(Vec::new()));
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CostMetricsSnapshot {
pub fsqlite_planner_cost_estimates_total: u64,
pub error_ratio_buckets: [u64; 5],
pub error_ratio_mean: f64,
}
const ERROR_RATIO_BOUNDARIES: [f64; 4] = [0.5, 1.0, 2.0, 5.0];
#[must_use]
pub fn cost_metrics_snapshot() -> CostMetricsSnapshot {
let total = FSQLITE_PLANNER_COST_ESTIMATES_TOTAL.load(Ordering::Relaxed);
let observations = ESTIMATION_ERROR_OBSERVATIONS
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let mut buckets = [0u64; 5];
let mut sum = 0.0;
for &ratio in observations.iter() {
sum += ratio;
let idx = ERROR_RATIO_BOUNDARIES
.iter()
.position(|&b| ratio < b)
.unwrap_or(4);
buckets[idx] += 1;
}
let mean = if observations.is_empty() {
f64::NAN
} else {
sum / observations.len() as f64
};
CostMetricsSnapshot {
fsqlite_planner_cost_estimates_total: total,
error_ratio_buckets: buckets,
error_ratio_mean: mean,
}
}
pub fn reset_cost_metrics() {
FSQLITE_PLANNER_COST_ESTIMATES_TOTAL.store(0, Ordering::Relaxed);
let mut obs = ESTIMATION_ERROR_OBSERVATIONS
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
obs.clear();
}
pub fn record_estimation_error(actual: f64, estimated: f64) {
if estimated <= 0.0 || actual < 0.0 {
return;
}
let ratio = actual / estimated;
{
let mut obs = ESTIMATION_ERROR_OBSERVATIONS
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
obs.push(ratio);
}
tracing::debug!(
actual,
estimated,
ratio,
miscalibrated = !(0.2..=5.0).contains(&ratio),
"planner.estimation_error"
);
}
const UNDERESTIMATE_PENALTY: f64 = 3.0;
#[must_use]
pub fn asymmetric_estimation_loss(estimated: f64, actual: f64) -> f64 {
if estimated <= 0.0 {
return actual; }
let ratio = actual / estimated;
if ratio > 1.0 {
UNDERESTIMATE_PENALTY * (ratio - 1.0).powi(2)
} else {
1.0 - ratio
}
}
fn access_path_metric_label(kind: &AccessPathKind) -> &'static str {
match kind {
AccessPathKind::FullTableScan => "full_table_scan",
AccessPathKind::IndexScanRange { .. } => "index_scan_range",
AccessPathKind::IndexScanEquality => "index_scan_equality",
AccessPathKind::CoveringIndexScan { .. } => "covering_index_scan",
AccessPathKind::RowidLookup => "rowid_lookup",
}
}
fn increment_index_selection_total(kind: &AccessPathKind) -> u64 {
INDEX_SELECTION_TOTAL
.counter_for(kind)
.fetch_add(1, Ordering::Relaxed)
+ 1
}
#[must_use]
pub fn snapshot_index_selection_totals() -> BTreeMap<String, u64> {
INDEX_SELECTION_TOTAL.snapshot()
}
fn canonical_table_key(table_name: &str) -> String {
table_name.to_ascii_lowercase()
}
fn lookup_table_index_hint<'a>(
table_name: &str,
table_index_hints: Option<&'a BTreeMap<String, IndexHint>>,
) -> Option<&'a IndexHint> {
table_index_hints.and_then(|hints| hints.get(&canonical_table_key(table_name)))
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct CrackingHintStore {
preferred_index_by_table: HashMap<String, String>,
}
impl CrackingHintStore {
#[must_use]
pub fn preferred_index(&self, table_name: &str) -> Option<&str> {
self.preferred_index_by_table
.get(&canonical_table_key(table_name))
.map(String::as_str)
}
pub fn record_access_path(&mut self, access_path: &AccessPath) {
if let Some(index_name) = &access_path.index {
self.preferred_index_by_table
.insert(canonical_table_key(&access_path.table), index_name.clone());
}
}
}
fn collect_table_index_hints_inner(
from_clause: &FromClause,
output: &mut BTreeMap<String, IndexHint>,
) {
fn collect_source(source: &TableOrSubquery, output: &mut BTreeMap<String, IndexHint>) {
match source {
TableOrSubquery::Table {
name,
alias,
index_hint,
..
} => {
if let Some(hint) = index_hint {
output.insert(canonical_table_key(&name.name), hint.clone());
if let Some(alias_name) = alias {
output.insert(canonical_table_key(alias_name), hint.clone());
}
}
}
TableOrSubquery::ParenJoin(inner) => {
collect_table_index_hints_inner(inner, output);
}
TableOrSubquery::Subquery { .. } | TableOrSubquery::TableFunction { .. } => {}
}
}
collect_source(&from_clause.source, output);
for join in &from_clause.joins {
collect_source(&join.table, output);
}
}
#[must_use]
pub fn collect_table_index_hints(from_clause: &FromClause) -> BTreeMap<String, IndexHint> {
let mut hints = BTreeMap::new();
collect_table_index_hints_inner(from_clause, &mut hints);
hints
}
#[must_use]
pub fn best_access_path(
table: &TableStats,
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
) -> AccessPath {
best_access_path_with_hints(table, indexes, where_terms, needed_columns, None, None)
}
#[must_use]
pub fn best_access_path_with_rowid_alias_hints(
table: &TableStats,
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
rowid_alias_hints: &[RowidAliasHint],
) -> AccessPath {
best_access_path_internal(
table,
indexes,
where_terms,
needed_columns,
None,
None,
rowid_alias_hints,
)
}
#[must_use]
pub fn best_access_path_with_hints(
table: &TableStats,
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
index_hint: Option<&IndexHint>,
cracking_hints: Option<&mut CrackingHintStore>,
) -> AccessPath {
let adaptive_preferred_index = cracking_hints
.as_deref()
.and_then(|store| store.preferred_index(&table.name))
.map(ToOwned::to_owned);
let best = best_access_path_internal(
table,
indexes,
where_terms,
needed_columns,
index_hint,
adaptive_preferred_index.as_deref(),
&[],
);
if let Some(store) = cracking_hints {
store.record_access_path(&best);
}
best
}
#[must_use]
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
fn best_access_path_internal(
table: &TableStats,
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
index_hint: Option<&IndexHint>,
adaptive_preferred_index: Option<&str>,
rowid_alias_hints: &[RowidAliasHint],
) -> AccessPath {
let started = tracing::enabled!(tracing::Level::INFO).then(std::time::Instant::now);
let explicit_indexed_by = match index_hint {
Some(IndexHint::IndexedBy(index_name)) => Some(index_name.as_str()),
_ => None,
};
let not_indexed = matches!(index_hint, Some(IndexHint::NotIndexed));
let rowid_equality_candidate =
find_rowid_equality_term(&table.name, where_terms, rowid_alias_hints).is_some();
let rowid_range_candidate = !rowid_equality_candidate
&& where_terms
.iter()
.any(|term| where_term_matches_rowid_range(&table.name, term, rowid_alias_hints));
let mut best = if explicit_indexed_by.is_some() {
AccessPath {
table: table.name.clone(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: f64::INFINITY,
estimated_rows: table.n_rows as f64,
time_travel: None,
probe: None,
}
} else if !not_indexed && rowid_equality_candidate {
let kind = AccessPathKind::RowidLookup;
AccessPath {
table: table.name.clone(),
estimated_cost: estimate_cost_ext(&kind, table.n_pages, 0, table.n_rows),
kind,
index: None,
estimated_rows: 1.0,
time_travel: None,
probe: None,
}
} else if !not_indexed && rowid_range_candidate {
let selectivity = DEFAULT_RANGE_SELECTIVITY;
let kind = AccessPathKind::IndexScanRange { selectivity };
AccessPath {
table: table.name.clone(),
estimated_cost: estimate_cost_ext(&kind, table.n_pages, 0, table.n_rows),
kind,
index: None,
estimated_rows: (table.n_rows as f64 * selectivity).max(1.0),
time_travel: None,
probe: None,
}
} else {
AccessPath {
table: table.name.clone(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: estimate_cost_ext(
&AccessPathKind::FullTableScan,
table.n_pages,
0,
table.n_rows,
),
estimated_rows: table.n_rows as f64,
time_travel: None,
probe: None,
}
};
let mut candidates_considered: usize = 0;
let mut partial_indexes_pruned: usize = 0;
let mut hint_filtered_indexes: usize = 0;
let mut skip_scan_candidates: usize = 0;
let mut adaptive_hint_applied = false;
let mut explicit_hint_applied = false;
let mut explicit_hint_missing = explicit_indexed_by.is_some();
for idx in indexes {
if !idx.table.eq_ignore_ascii_case(&table.name) {
continue;
}
if not_indexed {
hint_filtered_indexes += 1;
continue;
}
if let Some(hinted_name) = explicit_indexed_by {
if !idx.name.eq_ignore_ascii_case(hinted_name) {
hint_filtered_indexes += 1;
continue;
}
explicit_hint_missing = false;
}
if let Some(ref partial_pred) = idx.partial_where {
if !where_terms_imply_predicate(where_terms, partial_pred) {
partial_indexes_pruned += 1;
continue;
}
}
let mut skip_scan_candidate = None;
let usability = match analyze_index_usability(idx, where_terms) {
IndexUsability::NotUsable => {
if let Some(candidate) = analyze_skip_scan_candidate(table, idx, where_terms) {
skip_scan_candidates += 1;
skip_scan_candidate = Some(candidate);
IndexUsability::Range {
selectivity: candidate.per_probe_selectivity,
}
} else {
IndexUsability::NotUsable
}
}
usable => usable,
};
if matches!(usability, IndexUsability::NotUsable) {
continue;
}
candidates_considered += 1;
let is_covering = needed_columns.is_some_and(|needed| {
needed.iter().all(|column| {
idx.columns
.iter()
.any(|index_column| index_column.eq_ignore_ascii_case(column))
|| is_rowid_alias_name(column)
})
});
let mut cost_multiplier: f64 = 1.0;
let (kind, mut est_rows) = match usability {
IndexUsability::Equality => {
let rows = if idx.unique {
1.0
} else {
(table.n_rows as f64 / 10.0).max(1.0)
};
if is_covering {
(
AccessPathKind::CoveringIndexScan {
selectivity: rows / table.n_rows.max(1) as f64,
},
rows,
)
} else {
(AccessPathKind::IndexScanEquality, rows)
}
}
IndexUsability::MultiColumnEquality {
eq_columns,
trailing_constraint,
} => {
let equality_width = eq_columns
+ usize::from(matches!(
trailing_constraint,
MultiColumnTrailingConstraint::InExpansion { .. }
));
#[allow(clippy::cast_precision_loss)]
let per_probe_rows = if idx.unique
&& equality_width == idx.columns.len()
&& !matches!(
trailing_constraint,
MultiColumnTrailingConstraint::Range
| MultiColumnTrailingConstraint::LikePrefix
) {
1.0
} else {
let divisor = 10.0_f64.powi(i32::try_from(equality_width).unwrap_or(i32::MAX));
(table.n_rows as f64 / divisor).max(1.0)
};
let (rows, sel) = match trailing_constraint {
MultiColumnTrailingConstraint::Range => {
let range_factor = DEFAULT_RANGE_SELECTIVITY;
let r = (per_probe_rows * range_factor).max(1.0);
(
r,
range_factor * per_probe_rows / table.n_rows.max(1) as f64,
)
}
MultiColumnTrailingConstraint::LikePrefix => {
let range_factor = LIKE_PREFIX_SELECTIVITY;
let r = (per_probe_rows * range_factor).max(1.0);
(
r,
range_factor * per_probe_rows / table.n_rows.max(1) as f64,
)
}
MultiColumnTrailingConstraint::InExpansion { probe_count } => {
cost_multiplier = probe_count as f64;
let r =
(per_probe_rows * probe_count as f64).min(table.n_rows.max(1) as f64);
(r, r / table.n_rows.max(1) as f64)
}
MultiColumnTrailingConstraint::None => {
(per_probe_rows, per_probe_rows / table.n_rows.max(1) as f64)
}
};
if is_covering {
(AccessPathKind::CoveringIndexScan { selectivity: sel }, rows)
} else if matches!(
trailing_constraint,
MultiColumnTrailingConstraint::Range
| MultiColumnTrailingConstraint::LikePrefix
) {
(AccessPathKind::IndexScanRange { selectivity: sel }, rows)
} else {
(AccessPathKind::IndexScanEquality, rows)
}
}
IndexUsability::Range { selectivity } => {
let rows = (selectivity * table.n_rows as f64).max(1.0);
if is_covering {
(AccessPathKind::CoveringIndexScan { selectivity }, rows)
} else {
(AccessPathKind::IndexScanRange { selectivity }, rows)
}
}
IndexUsability::InExpansion { probe_count } => {
let per_probe_rows: f64 = if idx.unique {
1.0
} else {
(table.n_rows as f64 / 10.0).max(1.0)
};
let rows = per_probe_rows * probe_count as f64;
cost_multiplier = probe_count as f64;
(AccessPathKind::IndexScanEquality, rows)
}
IndexUsability::LikePrefix { .. } => {
let selectivity = LIKE_PREFIX_SELECTIVITY;
let rows = (selectivity * table.n_rows as f64).max(1.0);
if is_covering {
(AccessPathKind::CoveringIndexScan { selectivity }, rows)
} else {
(AccessPathKind::IndexScanRange { selectivity }, rows)
}
}
IndexUsability::NotUsable => unreachable!(),
};
if let Some(candidate) = skip_scan_candidate {
let probe_multiplier =
(candidate.leading_probes * candidate.trailing_probe_count) as f64;
cost_multiplier *= probe_multiplier;
est_rows = (est_rows * probe_multiplier).min(table.n_rows.max(1) as f64);
}
let mut cost =
estimate_cost_ext(&kind, table.n_pages, idx.n_pages, table.n_rows) * cost_multiplier;
if let Some(hinted_name) = explicit_indexed_by {
if idx.name.eq_ignore_ascii_case(hinted_name) {
cost *= 0.01;
explicit_hint_applied = true;
}
} else if let Some(adaptive_hint) = adaptive_preferred_index {
if idx.name.eq_ignore_ascii_case(adaptive_hint) {
cost *= ADAPTIVE_HINT_COST_BIAS;
adaptive_hint_applied = true;
}
}
if cost < best.estimated_cost {
best = AccessPath {
table: table.name.clone(),
kind,
index: Some(idx.name.clone()),
estimated_cost: cost,
estimated_rows: est_rows,
time_travel: None,
probe: None,
};
}
}
if !best.estimated_cost.is_finite() {
best = AccessPath {
table: table.name.clone(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: estimate_cost_ext(
&AccessPathKind::FullTableScan,
table.n_pages,
0,
table.n_rows,
),
estimated_rows: table.n_rows as f64,
time_travel: None,
probe: None,
};
}
best.probe = extract_access_path_probe_with_rowid_aliases(
&best,
indexes,
where_terms,
rowid_alias_hints,
);
let metric_total = increment_index_selection_total(&best.kind);
if tracing::enabled!(tracing::Level::INFO) {
let chosen_index = best.index.as_deref().unwrap_or("(none)");
let selectivity = match &best.kind {
AccessPathKind::IndexScanRange { selectivity }
| AccessPathKind::CoveringIndexScan { selectivity } => *selectivity,
AccessPathKind::IndexScanEquality | AccessPathKind::RowidLookup => {
best.estimated_rows / table.n_rows.max(1) as f64
}
AccessPathKind::FullTableScan => 1.0,
};
let metric_index_type = access_path_metric_label(&best.kind);
let explicit_hint = match index_hint {
Some(IndexHint::IndexedBy(index_name)) => format!("indexed_by:{index_name}"),
Some(IndexHint::NotIndexed) => "not_indexed".to_owned(),
None => "(none)".to_owned(),
};
let run_id = std::env::var("RUN_ID").unwrap_or_else(|_| "(none)".to_owned());
let trace_id = std::env::var("TRACE_ID")
.ok()
.and_then(|value| value.parse::<u64>().ok())
.unwrap_or(0);
let scenario_id = std::env::var("SCENARIO_ID").unwrap_or_else(|_| "(none)".to_owned());
let selection_elapsed_us = started.map_or(1, |start| start.elapsed().as_micros().max(1));
let adaptive_hint = adaptive_preferred_index.unwrap_or("(none)");
let hint_applied = explicit_hint_applied || adaptive_hint_applied;
let span = tracing::info_span!(
"index_select",
run_id = %run_id,
trace_id,
scenario_id = %scenario_id,
table = %table.name,
explicit_hint = %explicit_hint,
adaptive_hint = %adaptive_hint,
candidates = candidates_considered,
partial_pruned = partial_indexes_pruned,
hint_filtered = hint_filtered_indexes,
skip_scan_candidates
);
let _span_guard = span.enter();
tracing::info!(
table = %table.name,
candidates = candidates_considered,
chosen_index = %chosen_index,
estimated_selectivity = selectivity,
access_path = %access_path_kind_label(&best.kind),
estimated_cost = best.estimated_cost,
estimated_rows = best.estimated_rows,
selection_elapsed_us,
run_id = %run_id,
trace_id,
scenario_id = %scenario_id,
index_type = metric_index_type,
fsqlite_index_selection_total = metric_total,
hint_applied,
explicit_hint_missing,
"planner.index_select.choice"
);
}
best
}
fn where_terms_imply_predicate(terms: &[WhereTerm<'_>], predicate: &Expr) -> bool {
let pred_conjuncts = decompose_where(predicate);
pred_conjuncts.iter().all(|predicate_conjunct| {
terms
.iter()
.any(|term| expr_implies_partial_predicate(term.expr, predicate_conjunct))
})
}
fn expr_implies_partial_predicate(query_expr: &Expr, predicate: &Expr) -> bool {
if query_expr == predicate {
return true;
}
if let Some(predicate_column) = normalize_is_not_null_predicate(predicate) {
return expr_guarantees_non_null(query_expr, &predicate_column);
}
match (
normalize_column_literal_comparison(query_expr),
normalize_column_literal_comparison(predicate),
) {
(Some(query_cmp), Some(predicate_cmp)) => query_cmp.implies(&predicate_cmp),
_ => false,
}
}
#[derive(Debug, Clone, PartialEq)]
struct NormalizedColumnComparison {
column: WhereColumn,
op: AstBinaryOp,
literal: Literal,
}
impl NormalizedColumnComparison {
fn implies(&self, predicate: &Self) -> bool {
if !where_columns_compatible(&self.column, &predicate.column) {
return false;
}
let Some(ordering) = compare_partial_index_literals(&self.literal, &predicate.literal)
else {
return false;
};
match self.op {
AstBinaryOp::Eq => literal_satisfies_predicate_literal(ordering, predicate.op),
AstBinaryOp::Gt => {
matches!(predicate.op, AstBinaryOp::Gt | AstBinaryOp::Ge)
&& matches!(
ordering,
std::cmp::Ordering::Greater | std::cmp::Ordering::Equal
)
}
AstBinaryOp::Ge => match predicate.op {
AstBinaryOp::Gt => matches!(ordering, std::cmp::Ordering::Greater),
AstBinaryOp::Ge => matches!(
ordering,
std::cmp::Ordering::Greater | std::cmp::Ordering::Equal
),
_ => false,
},
AstBinaryOp::Lt => {
matches!(predicate.op, AstBinaryOp::Lt | AstBinaryOp::Le)
&& matches!(
ordering,
std::cmp::Ordering::Less | std::cmp::Ordering::Equal
)
}
AstBinaryOp::Le => match predicate.op {
AstBinaryOp::Lt => matches!(ordering, std::cmp::Ordering::Less),
AstBinaryOp::Le => matches!(
ordering,
std::cmp::Ordering::Less | std::cmp::Ordering::Equal
),
_ => false,
},
_ => false,
}
}
}
fn literal_satisfies_predicate_literal(
ordering: std::cmp::Ordering,
predicate_op: AstBinaryOp,
) -> bool {
match predicate_op {
AstBinaryOp::Eq => matches!(ordering, std::cmp::Ordering::Equal),
AstBinaryOp::Gt => matches!(ordering, std::cmp::Ordering::Greater),
AstBinaryOp::Ge => matches!(
ordering,
std::cmp::Ordering::Greater | std::cmp::Ordering::Equal
),
AstBinaryOp::Lt => matches!(ordering, std::cmp::Ordering::Less),
AstBinaryOp::Le => matches!(
ordering,
std::cmp::Ordering::Less | std::cmp::Ordering::Equal
),
_ => false,
}
}
fn expr_guarantees_non_null(expr: &Expr, predicate_column: &WhereColumn) -> bool {
if let Some(query_cmp) = normalize_column_literal_comparison(expr) {
return where_columns_compatible(&query_cmp.column, predicate_column)
&& !matches!(query_cmp.literal, Literal::Null);
}
if let Some((column, _)) = classify_or_disjunction_as_in_list(expr) {
return where_columns_compatible(&column, predicate_column);
}
match expr {
Expr::Between { expr: inner, .. }
| Expr::In { expr: inner, .. }
| Expr::Like { expr: inner, .. } => extract_where_column(inner)
.is_some_and(|column| where_columns_compatible(&column, predicate_column)),
Expr::IsNull {
expr: inner,
not: true,
..
} => extract_where_column(inner)
.is_some_and(|column| where_columns_compatible(&column, predicate_column)),
_ => false,
}
}
fn normalize_is_not_null_predicate(expr: &Expr) -> Option<WhereColumn> {
let Expr::IsNull {
expr: inner,
not: true,
..
} = expr
else {
return None;
};
extract_where_column(inner)
}
fn normalize_column_literal_comparison(expr: &Expr) -> Option<NormalizedColumnComparison> {
let Expr::BinaryOp {
left,
op: AstBinaryOp::Eq | AstBinaryOp::Lt | AstBinaryOp::Le | AstBinaryOp::Gt | AstBinaryOp::Ge,
right,
..
} = expr
else {
return None;
};
if let (Some(column), Expr::Literal(literal, _)) = (extract_where_column(left), right.as_ref())
{
return Some(NormalizedColumnComparison {
column,
op: match expr {
Expr::BinaryOp { op, .. } => *op,
_ => unreachable!(),
},
literal: literal.clone(),
});
}
if let (Expr::Literal(literal, _), Some(column)) = (left.as_ref(), extract_where_column(right))
{
return Some(NormalizedColumnComparison {
column,
op: reverse_comparison_op(match expr {
Expr::BinaryOp { op, .. } => *op,
_ => unreachable!(),
})?,
literal: literal.clone(),
});
}
None
}
fn reverse_comparison_op(op: AstBinaryOp) -> Option<AstBinaryOp> {
match op {
AstBinaryOp::Eq => Some(AstBinaryOp::Eq),
AstBinaryOp::Lt => Some(AstBinaryOp::Gt),
AstBinaryOp::Le => Some(AstBinaryOp::Ge),
AstBinaryOp::Gt => Some(AstBinaryOp::Lt),
AstBinaryOp::Ge => Some(AstBinaryOp::Le),
_ => None,
}
}
fn compare_partial_index_literals(left: &Literal, right: &Literal) -> Option<std::cmp::Ordering> {
match (left, right) {
(Literal::Integer(lhs), Literal::Integer(rhs)) => Some(lhs.cmp(rhs)),
(Literal::Float(lhs), Literal::Float(rhs)) => lhs.partial_cmp(rhs),
(Literal::Integer(lhs), Literal::Float(rhs)) => (*lhs as f64).partial_cmp(rhs),
(Literal::Float(lhs), Literal::Integer(rhs)) => lhs.partial_cmp(&(*rhs as f64)),
(Literal::String(lhs), Literal::String(rhs)) => Some(lhs.cmp(rhs)),
_ => None,
}
}
fn where_columns_compatible(left: &WhereColumn, right: &WhereColumn) -> bool {
left.column.eq_ignore_ascii_case(&right.column)
&& match (&left.table, &right.table) {
(Some(lhs), Some(rhs)) => lhs.eq_ignore_ascii_case(rhs),
_ => true,
}
}
#[derive(Debug, Clone, PartialEq)]
#[allow(clippy::derive_partial_eq_without_eq)]
pub enum IndexUsability {
Equality,
MultiColumnEquality {
eq_columns: usize,
trailing_constraint: MultiColumnTrailingConstraint,
},
Range { selectivity: f64 },
InExpansion { probe_count: usize },
LikePrefix { low: String, high: Option<String> },
NotUsable,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MultiColumnTrailingConstraint {
None,
Range,
InExpansion { probe_count: usize },
LikePrefix,
}
#[derive(Debug, Clone, Copy, PartialEq)]
struct SkipScanCandidate {
leading_probes: usize,
trailing_probe_count: usize,
per_probe_selectivity: f64,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
struct IndexColumnTermSummary {
has_equality: bool,
first_in_probe_count: Option<usize>,
has_range: bool,
first_like_prefix: Option<(String, Option<String>)>,
}
#[derive(Debug, Clone)]
pub struct WhereTerm<'a> {
pub expr: &'a Expr,
pub column: Option<WhereColumn>,
pub kind: WhereTermKind,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WhereColumn {
pub table: Option<String>,
pub column: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WhereTermKind {
Equality,
Range,
Between,
InList { count: usize },
LikePrefix {
prefix: String,
upper_bound: Option<String>,
},
RowidEquality,
Other,
}
#[must_use]
pub fn decompose_where(expr: &Expr) -> Vec<&Expr> {
let mut terms = Vec::new();
collect_conjuncts(expr, &mut terms);
terms
}
fn collect_conjuncts<'a>(expr: &'a Expr, out: &mut Vec<&'a Expr>) {
if let Expr::BinaryOp {
left,
op: AstBinaryOp::And,
right,
..
} = expr
{
collect_conjuncts(left, out);
collect_conjuncts(right, out);
} else {
out.push(expr);
}
}
fn collect_disjuncts<'a>(expr: &'a Expr, out: &mut Vec<&'a Expr>) {
if let Expr::BinaryOp {
left,
op: AstBinaryOp::Or,
right,
..
} = expr
{
collect_disjuncts(left, out);
collect_disjuncts(right, out);
} else {
out.push(expr);
}
}
fn where_columns_equivalent(left: &WhereColumn, right: &WhereColumn) -> bool {
left.column.eq_ignore_ascii_case(&right.column)
&& match (&left.table, &right.table) {
(Some(l), Some(r)) => l.eq_ignore_ascii_case(r),
(None, None) => true,
_ => false,
}
}
fn classify_or_disjunction_as_in_list(expr: &Expr) -> Option<(WhereColumn, usize)> {
let mut disjuncts = Vec::new();
collect_disjuncts(expr, &mut disjuncts);
if disjuncts.len() < 2 {
return None;
}
let mut shared_column: Option<WhereColumn> = None;
for disjunct in disjuncts.iter().copied() {
let Expr::BinaryOp {
left,
op: AstBinaryOp::Eq,
right,
..
} = disjunct
else {
return None;
};
let column = match (extract_where_column(left), extract_where_column(right)) {
(Some(column), None) | (None, Some(column)) => column,
_ => return None,
};
if is_rowid_column(&column) {
return None;
}
if let Some(ref existing) = shared_column {
if !where_columns_equivalent(existing, &column) {
return None;
}
} else {
shared_column = Some(column);
}
}
shared_column.map(|column| (column, disjuncts.len()))
}
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn classify_where_term(expr: &Expr) -> WhereTerm<'_> {
match expr {
Expr::BinaryOp {
op: AstBinaryOp::Or,
..
} => {
if let Some((column, probe_count)) = classify_or_disjunction_as_in_list(expr) {
tracing::debug!(
target: "fsqlite.planner",
rewrite = "or_disjunction_to_in_list",
column = ?column,
probe_count,
"planner.where_term.rewrite"
);
return WhereTerm {
expr,
column: Some(column),
kind: WhereTermKind::InList { count: probe_count },
};
}
WhereTerm {
expr,
column: None,
kind: WhereTermKind::Other,
}
}
Expr::BinaryOp {
left,
op: AstBinaryOp::Eq,
right,
..
} => {
if matches!(left.as_ref(), Expr::Literal(Literal::Null, _))
|| matches!(right.as_ref(), Expr::Literal(Literal::Null, _))
{
return WhereTerm {
expr,
column: None,
kind: WhereTermKind::Other,
};
}
if let Some(wc) = extract_where_column(left) {
if is_rowid_column(&wc) {
return WhereTerm {
expr,
column: Some(wc),
kind: WhereTermKind::RowidEquality,
};
}
return WhereTerm {
expr,
column: Some(wc),
kind: WhereTermKind::Equality,
};
}
if let Some(wc) = extract_where_column(right) {
if is_rowid_column(&wc) {
return WhereTerm {
expr,
column: Some(wc),
kind: WhereTermKind::RowidEquality,
};
}
return WhereTerm {
expr,
column: Some(wc),
kind: WhereTermKind::Equality,
};
}
WhereTerm {
expr,
column: None,
kind: WhereTermKind::Other,
}
}
Expr::BinaryOp {
left,
op: AstBinaryOp::Lt | AstBinaryOp::Le | AstBinaryOp::Gt | AstBinaryOp::Ge,
right,
..
} => {
let column = extract_where_column(left).or_else(|| extract_where_column(right));
WhereTerm {
expr,
column,
kind: WhereTermKind::Range,
}
}
Expr::Between {
expr: inner, not, ..
} if !not => {
let column = extract_where_column(inner);
WhereTerm {
expr,
column,
kind: WhereTermKind::Between,
}
}
Expr::In {
expr: inner,
set,
not,
..
} if !not => {
let column = extract_where_column(inner);
let count = match set {
InSet::List(items) => items.len(),
InSet::Subquery(_) | InSet::Table(_) => 10, };
WhereTerm {
expr,
column,
kind: WhereTermKind::InList { count },
}
}
Expr::Like {
expr: inner,
pattern,
op,
not,
escape,
..
} if !not => {
let column = extract_where_column(inner);
let (prefix, operator) = match op {
LikeOp::Glob => (extract_glob_prefix(pattern), "GLOB"),
LikeOp::Like => {
let prefix = extract_like_prefix(pattern, escape.as_deref())
.filter(|prefix| is_like_prefix_safe_for_column(column.as_ref(), prefix));
(prefix, "LIKE")
}
LikeOp::Match | LikeOp::Regexp => (None, "MATCH/REGEXP"),
};
if let Some(pfx) = prefix {
let upper_bound = like_prefix_upper_bound(&pfx);
tracing::debug!(
target: "fsqlite.planner",
rewrite = "pattern_prefix_to_range",
operator,
column = ?column,
prefix = %pfx,
upper_bound = ?upper_bound,
"planner.where_term.rewrite"
);
WhereTerm {
expr,
column,
kind: WhereTermKind::LikePrefix {
upper_bound,
prefix: pfx,
},
}
} else {
WhereTerm {
expr,
column,
kind: WhereTermKind::Other,
}
}
}
_ => WhereTerm {
expr,
column: None,
kind: WhereTermKind::Other,
},
}
}
fn extract_where_column(expr: &Expr) -> Option<WhereColumn> {
if let Expr::Column(col_ref, _) = expr {
Some(WhereColumn {
table: col_ref.table.as_ref().map(ToString::to_string),
column: col_ref.column.to_string(),
})
} else {
None
}
}
fn is_rowid_column(wc: &WhereColumn) -> bool {
is_rowid_alias_name(&wc.column)
}
fn where_term_matches_rowid_equality(
table_name: &str,
term: &WhereTerm<'_>,
rowid_alias_hints: &[RowidAliasHint],
) -> bool {
if matches!(term.kind, WhereTermKind::RowidEquality) {
return true;
}
matches!(term.kind, WhereTermKind::Equality)
&& term.column.as_ref().is_some_and(|column| {
rowid_alias_hints
.iter()
.any(|hint| hint.matches_column(table_name, column))
})
}
fn where_term_matches_rowid_range(
table_name: &str,
term: &WhereTerm<'_>,
rowid_alias_hints: &[RowidAliasHint],
) -> bool {
matches!(term.kind, WhereTermKind::Range | WhereTermKind::Between)
&& term.column.as_ref().is_some_and(|column| {
is_rowid_column(column)
|| rowid_alias_hints
.iter()
.any(|hint| hint.matches_column(table_name, column))
})
}
fn find_rowid_equality_term<'terms, 'expr>(
table_name: &str,
terms: &'terms [WhereTerm<'expr>],
rowid_alias_hints: &[RowidAliasHint],
) -> Option<&'terms WhereTerm<'expr>> {
terms
.iter()
.find(|term| where_term_matches_rowid_equality(table_name, term, rowid_alias_hints))
}
fn find_rowid_range_column(
table_name: &str,
terms: &[WhereTerm<'_>],
rowid_alias_hints: &[RowidAliasHint],
) -> Option<String> {
terms.iter().find_map(|term| {
where_term_matches_rowid_range(table_name, term, rowid_alias_hints)
.then(|| term.column.as_ref().map(|column| column.column.clone()))
.flatten()
})
}
fn extract_comparison_operand(expr: &Expr) -> Option<Expr> {
let Expr::BinaryOp { left, right, .. } = expr else {
return None;
};
if extract_where_column(left).is_some() {
Some(right.as_ref().clone())
} else if extract_where_column(right).is_some() {
Some(left.as_ref().clone())
} else {
None
}
}
fn extract_access_path_probe_with_rowid_aliases(
best: &AccessPath,
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
rowid_alias_hints: &[RowidAliasHint],
) -> Option<AccessPathProbe> {
match &best.kind {
AccessPathKind::FullTableScan => None,
AccessPathKind::RowidLookup => {
let term = find_rowid_equality_term(&best.table, where_terms, rowid_alias_hints)?;
let target = extract_comparison_operand(term.expr)?;
Some(AccessPathProbe::RowidEquality {
target: Box::new(target),
})
}
AccessPathKind::IndexScanEquality => {
let index_name = best.index.as_deref()?;
let idx = indexes
.iter()
.find(|i| i.name.eq_ignore_ascii_case(index_name))?;
let leading_col = idx.columns.first()?;
if let Some(term) = where_terms.iter().find(|t| {
matches!(t.kind, WhereTermKind::Equality)
&& t.column
.as_ref()
.is_some_and(|c| c.column.eq_ignore_ascii_case(leading_col))
}) {
let target = extract_comparison_operand(term.expr)?;
return Some(AccessPathProbe::Equality {
column: leading_col.clone(),
target: Box::new(target),
});
}
if let Some(term) = where_terms.iter().find(|t| {
matches!(t.kind, WhereTermKind::InList { .. })
&& t.column
.as_ref()
.is_some_and(|c| c.column.eq_ignore_ascii_case(leading_col))
}) {
return extract_in_list_probe(term.expr, leading_col);
}
None
}
AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. } => {
if best.index.is_none() {
let leading_col =
find_rowid_range_column(&best.table, where_terms, rowid_alias_hints)?;
return extract_range_probe_for_column(where_terms, &leading_col);
}
let index_name = best.index.as_deref()?;
let idx = indexes
.iter()
.find(|i| i.name.eq_ignore_ascii_case(index_name))?;
let leading_col = idx.columns.first()?;
extract_range_probe_for_column(where_terms, leading_col)
}
}
}
fn extract_range_probe_for_column(
where_terms: &[WhereTerm<'_>],
leading_col: &str,
) -> Option<AccessPathProbe> {
let mut lower: Option<(Box<Expr>, bool)> = None;
let mut upper: Option<(Box<Expr>, bool)> = None;
for term in where_terms {
let col = match &term.column {
Some(c) if c.column.eq_ignore_ascii_case(leading_col) => c,
_ => continue,
};
if matches!(term.kind, WhereTermKind::Equality) {
let target = extract_comparison_operand(term.expr)?;
return Some(AccessPathProbe::Equality {
column: col.column.clone(),
target: Box::new(target),
});
}
if let WhereTermKind::LikePrefix {
prefix,
upper_bound,
} = &term.kind
{
let lo = Expr::Literal(Literal::String(prefix.clone()), Span::ZERO);
let lo_bound = Some((Box::new(lo), true));
let hi_bound = upper_bound.as_ref().map(|ub| {
(
Box::new(Expr::Literal(Literal::String(ub.clone()), Span::ZERO)),
false,
)
});
return Some(AccessPathProbe::Range {
column: col.column.clone(),
lower: lo_bound,
upper: hi_bound,
});
}
if matches!(term.kind, WhereTermKind::Between) {
if let Expr::Between { low, high, not, .. } = term.expr {
if !not {
return Some(AccessPathProbe::Range {
column: col.column.clone(),
lower: Some((Box::new(low.as_ref().clone()), true)),
upper: Some((Box::new(high.as_ref().clone()), true)),
});
}
}
}
if !matches!(term.kind, WhereTermKind::Range) {
continue;
}
if let Expr::BinaryOp {
left, op, right, ..
} = term.expr
{
let col_on_left = extract_where_column(left).is_some();
match op {
AstBinaryOp::Gt => {
let val = if col_on_left { right } else { left };
if col_on_left {
lower = Some((Box::new(val.as_ref().clone()), false));
} else {
upper = Some((Box::new(val.as_ref().clone()), false));
}
}
AstBinaryOp::Ge => {
let val = if col_on_left { right } else { left };
if col_on_left {
lower = Some((Box::new(val.as_ref().clone()), true));
} else {
upper = Some((Box::new(val.as_ref().clone()), true));
}
}
AstBinaryOp::Lt => {
let val = if col_on_left { right } else { left };
if col_on_left {
upper = Some((Box::new(val.as_ref().clone()), false));
} else {
lower = Some((Box::new(val.as_ref().clone()), false));
}
}
AstBinaryOp::Le => {
let val = if col_on_left { right } else { left };
if col_on_left {
upper = Some((Box::new(val.as_ref().clone()), true));
} else {
lower = Some((Box::new(val.as_ref().clone()), true));
}
}
_ => {}
}
}
}
if lower.is_some() || upper.is_some() {
Some(AccessPathProbe::Range {
column: leading_col.to_owned(),
lower,
upper,
})
} else {
None
}
}
fn extract_in_list_probe(expr: &Expr, column: &str) -> Option<AccessPathProbe> {
if let Expr::In {
set: InSet::List(items),
not: false,
..
} = expr
{
let values: Vec<Box<Expr>> = items.iter().map(|item| Box::new(item.clone())).collect();
if values.is_empty() {
return None;
}
return Some(AccessPathProbe::InList {
column: column.to_owned(),
values,
});
}
None
}
fn extract_glob_prefix(pattern: &Expr) -> Option<String> {
if let Expr::Literal(Literal::String(s), _) = pattern {
let mut prefix = String::new();
let mut saw_trailing_star = false;
for ch in s.chars() {
match ch {
'*' => saw_trailing_star = true,
'?' | '[' => return None,
_ if saw_trailing_star => return None,
_ => prefix.push(ch),
}
}
if prefix.is_empty() || !saw_trailing_star {
None
} else {
Some(prefix)
}
} else {
None
}
}
fn extract_like_prefix(pattern: &Expr, escape: Option<&Expr>) -> Option<String> {
let escape_char = match escape {
None => None,
Some(Expr::Literal(Literal::String(s), _)) => {
let mut chars = s.chars();
let ch = chars.next()?;
if chars.next().is_some() {
return None;
}
Some(ch)
}
Some(_) => return None,
};
if let Expr::Literal(Literal::String(s), _) = pattern {
let mut prefix = String::new();
let mut saw_trailing_percent = false;
let mut chars = s.chars();
while let Some(ch) = chars.next() {
if escape_char.is_some_and(|esc| esc == ch) {
if saw_trailing_percent {
return None;
}
prefix.push(chars.next()?);
continue;
}
match ch {
'%' => saw_trailing_percent = true,
'_' => return None,
_ if saw_trailing_percent => return None,
_ => prefix.push(ch),
}
}
if prefix.is_empty() || !saw_trailing_percent {
None
} else {
Some(prefix)
}
} else {
None
}
}
fn is_like_prefix_safe_for_column(_column: Option<&WhereColumn>, prefix: &str) -> bool {
prefix.chars().all(|ch| !ch.is_ascii_alphabetic())
}
fn like_prefix_upper_bound(prefix: &str) -> Option<String> {
let mut chars: Vec<char> = prefix.chars().collect();
for idx in (0..chars.len()).rev() {
let codepoint = u32::from(chars[idx]);
if codepoint == u32::from(char::MAX) {
continue;
}
if let Some(next) = char::from_u32(codepoint + 1) {
chars[idx] = next;
chars.truncate(idx + 1);
return Some(chars.into_iter().collect());
}
}
None
}
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn analyze_index_usability(index: &IndexInfo, terms: &[WhereTerm<'_>]) -> IndexUsability {
if !index.expression_columns.is_empty() {
return analyze_expression_index_usability(index, terms);
}
if index.columns.is_empty() {
return IndexUsability::NotUsable;
}
let col_matches = |wc: &WhereColumn, idx_col: &str| -> bool {
wc.column.eq_ignore_ascii_case(idx_col)
&& wc
.table
.as_ref()
.is_none_or(|t| t.eq_ignore_ascii_case(&index.table))
};
let mut column_summaries = vec![IndexColumnTermSummary::default(); index.columns.len()];
let mut leftmost_first_constraint = None;
for term in terms {
let Some(wc) = term.column.as_ref() else {
continue;
};
for (column_index, index_column) in index.columns.iter().enumerate() {
if !col_matches(wc, index_column) {
continue;
}
let summary = &mut column_summaries[column_index];
match &term.kind {
WhereTermKind::Equality => {
summary.has_equality = true;
if column_index == 0 {
leftmost_first_constraint = Some(IndexUsability::Equality);
}
}
WhereTermKind::InList { count } => {
if summary
.first_in_probe_count
.is_none_or(|existing| *count < existing)
{
summary.first_in_probe_count = Some(*count);
}
if column_index == 0 {
match leftmost_first_constraint {
Some(IndexUsability::InExpansion { probe_count })
if *count < probe_count =>
{
leftmost_first_constraint = Some(IndexUsability::InExpansion {
probe_count: *count,
});
}
None => {
leftmost_first_constraint = Some(IndexUsability::InExpansion {
probe_count: *count,
});
}
_ => {}
}
}
}
WhereTermKind::LikePrefix {
prefix,
upper_bound,
} => {
summary
.first_like_prefix
.get_or_insert_with(|| (prefix.clone(), upper_bound.clone()));
if column_index == 0 && leftmost_first_constraint.is_none() {
leftmost_first_constraint = Some(IndexUsability::LikePrefix {
low: prefix.clone(),
high: upper_bound.clone(),
});
}
}
WhereTermKind::Range | WhereTermKind::Between => {
summary.has_range = true;
}
WhereTermKind::RowidEquality | WhereTermKind::Other => {}
}
}
}
let eq_columns = column_summaries
.iter()
.take_while(|summary| summary.has_equality)
.count();
if eq_columns >= 1 {
let trailing_constraint = if eq_columns < index.columns.len() {
let summary = &column_summaries[eq_columns];
if let Some(probe_count) = summary.first_in_probe_count {
MultiColumnTrailingConstraint::InExpansion { probe_count }
} else if summary.first_like_prefix.is_some() {
MultiColumnTrailingConstraint::LikePrefix
} else if summary.has_range {
MultiColumnTrailingConstraint::Range
} else {
MultiColumnTrailingConstraint::None
}
} else {
MultiColumnTrailingConstraint::None
};
if eq_columns >= 2 || !matches!(trailing_constraint, MultiColumnTrailingConstraint::None) {
return IndexUsability::MultiColumnEquality {
eq_columns,
trailing_constraint,
};
}
}
if let Some(usability) = leftmost_first_constraint {
return usability;
}
if column_summaries[0].has_range {
return IndexUsability::Range {
selectivity: DEFAULT_RANGE_SELECTIVITY,
};
}
IndexUsability::NotUsable
}
fn analyze_expression_index_usability(
index: &IndexInfo,
terms: &[WhereTerm<'_>],
) -> IndexUsability {
let Some(first_expr) = index.expression_columns.first() else {
return IndexUsability::NotUsable;
};
for term in terms {
if let Expr::BinaryOp {
left,
op: AstBinaryOp::Eq,
right,
..
} = term.expr
{
let left_is_null = matches!(left.as_ref(), Expr::Literal(Literal::Null, _));
let right_is_null = matches!(right.as_ref(), Expr::Literal(Literal::Null, _));
if left_is_null || right_is_null {
continue;
}
if **left == *first_expr || **right == *first_expr {
return IndexUsability::Equality;
}
}
}
for term in terms {
if let Expr::BinaryOp {
left,
op: AstBinaryOp::Lt | AstBinaryOp::Le | AstBinaryOp::Gt | AstBinaryOp::Ge,
right,
..
} = term.expr
{
if **left == *first_expr || **right == *first_expr {
return IndexUsability::Range {
selectivity: DEFAULT_RANGE_SELECTIVITY,
};
}
}
if let Expr::Between {
expr: inner, not, ..
} = term.expr
{
if !*not && **inner == *first_expr {
return IndexUsability::Range {
selectivity: DEFAULT_RANGE_SELECTIVITY,
};
}
}
}
IndexUsability::NotUsable
}
const DEFAULT_RANGE_SELECTIVITY: f64 = 0.33;
const LIKE_PREFIX_SELECTIVITY: f64 = 0.10;
const SKIP_SCAN_EQ_SELECTIVITY: f64 = 0.01;
const SKIP_SCAN_RANGE_SELECTIVITY: f64 = 0.20;
const SKIP_SCAN_MAX_LEADING_DISTINCT: u64 = 16;
const SKIP_SCAN_PAGES_PER_LEADING_DISTINCT: u64 = 8;
fn estimate_skip_scan_leading_distinct(index: &IndexInfo) -> u64 {
(index.n_pages / SKIP_SCAN_PAGES_PER_LEADING_DISTINCT).max(1)
}
fn analyze_skip_scan_candidate(
table: &TableStats,
index: &IndexInfo,
terms: &[WhereTerm<'_>],
) -> Option<SkipScanCandidate> {
if index.columns.len() < 2
|| (!matches!(table.source, StatsSource::Analyze)
&& !matches!(index.source, StatsSource::Analyze))
{
return None;
}
let col_matches = |wc: &WhereColumn, idx_col: &str| -> bool {
wc.column.eq_ignore_ascii_case(idx_col)
&& wc
.table
.as_ref()
.is_none_or(|t| t.eq_ignore_ascii_case(&index.table))
};
let leading_col = &index.columns[0];
let second_col = &index.columns[1];
let leading_constrained = terms.iter().any(|term| {
term.column.as_ref().is_some_and(|wc| {
col_matches(wc, leading_col)
&& matches!(
term.kind,
WhereTermKind::Equality
| WhereTermKind::Range
| WhereTermKind::Between
| WhereTermKind::InList { .. }
| WhereTermKind::LikePrefix { .. }
)
})
});
if leading_constrained {
return None;
}
let leading_distinct = estimate_skip_scan_leading_distinct(index);
if leading_distinct > SKIP_SCAN_MAX_LEADING_DISTINCT {
return None;
}
let mut second_column_summary = IndexColumnTermSummary::default();
for term in terms {
let Some(wc) = term.column.as_ref() else {
continue;
};
if !col_matches(wc, second_col) {
continue;
}
match &term.kind {
WhereTermKind::Equality => second_column_summary.has_equality = true,
WhereTermKind::InList { count }
if *count > 0
&& second_column_summary
.first_in_probe_count
.is_none_or(|existing| *count < existing) =>
{
second_column_summary.first_in_probe_count = Some(*count);
}
WhereTermKind::Range | WhereTermKind::Between | WhereTermKind::LikePrefix { .. } => {
second_column_summary.has_range = true;
}
_ => {}
}
}
let (trailing_probe_count, per_probe_selectivity) = if second_column_summary.has_equality {
(1, SKIP_SCAN_EQ_SELECTIVITY)
} else if let Some(probe_count) = second_column_summary.first_in_probe_count {
(probe_count, SKIP_SCAN_EQ_SELECTIVITY)
} else if second_column_summary.has_range {
(1, SKIP_SCAN_RANGE_SELECTIVITY)
} else {
return None;
};
Some(SkipScanCandidate {
leading_probes: leading_distinct as usize,
trailing_probe_count,
per_probe_selectivity,
})
}
#[must_use]
pub fn compute_mx_choice(n_tables: usize, is_star: bool) -> usize {
match n_tables {
0 | 1 => 1,
2 => 5,
_ => {
if is_star {
18
} else {
12
}
}
}
}
#[must_use]
pub fn detect_star_query(tables: &[TableStats], where_terms: &[WhereTerm<'_>]) -> bool {
if tables.len() < 3 {
return false;
}
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
for candidate in &table_names {
let mut join_partners = 0usize;
for other in &table_names {
if *other == *candidate {
continue;
}
if has_join_predicate(candidate, other, where_terms) {
join_partners += 1;
}
}
if join_partners == table_names.len() - 1 {
return true;
}
}
false
}
fn has_join_predicate(table_a: &str, table_b: &str, terms: &[WhereTerm<'_>]) -> bool {
for term in terms {
if let Expr::BinaryOp {
left,
op: AstBinaryOp::Eq,
right,
..
} = term.expr
{
let left_col = extract_where_column(left);
let right_col = extract_where_column(right);
if let (Some(lc), Some(rc)) = (left_col, right_col) {
let lt = lc.table.as_deref().unwrap_or("");
let rt = rc.table.as_deref().unwrap_or("");
if (lt.eq_ignore_ascii_case(table_a) && rt.eq_ignore_ascii_case(table_b))
|| (lt.eq_ignore_ascii_case(table_b) && rt.eq_ignore_ascii_case(table_a))
{
return true;
}
}
}
}
false
}
const HASH_JOIN_SELECTIVITY_HEURISTIC: f64 = 0.25;
const LEAPFROG_SEEK_OVERHEAD_FACTOR: f64 = 0.20;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct ColumnKey {
table: String,
column: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct EquiJoinPredicate {
left: ColumnKey,
right: ColumnKey,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct TrieHypergraph {
relation_variables: Vec<Vec<usize>>,
variable_count: usize,
arity: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct UnionFind {
parent: Vec<usize>,
rank: Vec<usize>,
}
impl UnionFind {
fn new(size: usize) -> Self {
Self {
parent: (0..size).collect(),
rank: vec![0; size],
}
}
fn find(&mut self, idx: usize) -> usize {
if self.parent[idx] != idx {
let root = self.find(self.parent[idx]);
self.parent[idx] = root;
}
self.parent[idx]
}
fn union(&mut self, left: usize, right: usize) {
let left_root = self.find(left);
let right_root = self.find(right);
if left_root == right_root {
return;
}
let left_rank = self.rank[left_root];
let right_rank = self.rank[right_root];
match left_rank.cmp(&right_rank) {
std::cmp::Ordering::Less => {
self.parent[left_root] = right_root;
}
std::cmp::Ordering::Greater => {
self.parent[right_root] = left_root;
}
std::cmp::Ordering::Equal => {
self.parent[right_root] = left_root;
self.rank[left_root] = left_rank + 1;
}
}
}
}
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn choose_join_segments(
join_order: &[String],
tables: &[TableStats],
where_terms: &[WhereTerm<'_>],
from_clause: Option<&FromClause>,
feature_flags: PlannerFeatureFlags,
) -> Vec<JoinPlanSegment> {
if join_order.len() < 2 {
return vec![];
}
let join_order_canonical = join_order
.iter()
.map(|table| canonical_table_key(table))
.collect::<Vec<_>>();
let canonical_to_original = join_order
.iter()
.map(|table| (canonical_table_key(table), table.clone()))
.collect::<HashMap<_, _>>();
let join_table_set = join_order_canonical.iter().cloned().collect::<HashSet<_>>();
let rows_by_table = build_table_row_map(tables, &join_order_canonical);
let (equi_predicates, theta_join_tables) =
collect_join_predicates(where_terms, &join_table_set);
let leapfrog_shape_supported = from_clause_supports_leapfrog(from_clause);
let mut selected_components: Vec<(Vec<String>, f64, f64, usize)> = vec![];
let mut selected_tables = HashSet::<String>::new();
if feature_flags.leapfrog_join && leapfrog_shape_supported {
let leapfrog_candidates = join_order_canonical
.iter()
.filter(|table| !theta_join_tables.contains(*table))
.cloned()
.collect::<Vec<_>>();
for component in connected_components(&leapfrog_candidates, &equi_predicates) {
if component.len() < 3 {
continue;
}
let component_set = component.iter().cloned().collect::<HashSet<_>>();
let ordered_component = ordered_subset(&join_order_canonical, &component_set);
let Some(hypergraph) = build_trie_hypergraph(&ordered_component, &equi_predicates)
else {
continue;
};
let hash_cost = estimate_pairwise_hash_join_cost(&ordered_component, &rows_by_table);
let Some(agm_bound) =
estimate_agm_upper_bound(&ordered_component, &rows_by_table, &hypergraph)
else {
continue;
};
let leapfrog_cost = agm_bound
* LEAPFROG_SEEK_OVERHEAD_FACTOR.mul_add(ordered_component.len() as f64, 1.0);
if leapfrog_cost < hash_cost {
for table in &ordered_component {
selected_tables.insert(table.clone());
}
selected_components.push((
ordered_component,
leapfrog_cost,
hash_cost,
hypergraph.arity,
));
}
}
}
let mut segments = selected_components
.into_iter()
.map(
|(relations, leapfrog_cost, hash_cost, arity)| JoinPlanSegment {
relations: relations
.into_iter()
.filter_map(|table| canonical_to_original.get(&table).cloned())
.collect(),
operator: JoinOperator::LeapfrogTriejoin,
estimated_cost: leapfrog_cost,
reason: format!(
"AGM estimate {:.1} beats hash cost {:.1}; trie arity {}",
leapfrog_cost, hash_cost, arity
),
},
)
.collect::<Vec<_>>();
if segments.is_empty() {
let hash_cost = estimate_pairwise_hash_join_cost(&join_order_canonical, &rows_by_table);
let reason = if !feature_flags.leapfrog_join {
"leapfrog_join feature flag disabled".to_owned()
} else if !leapfrog_shape_supported {
"outer/natural/theta join shape is not Leapfrog-compatible".to_owned()
} else if join_order.len() < 3 {
"2-way joins stay on pairwise hash join".to_owned()
} else if !theta_join_tables.is_empty() {
"theta/non-equi join predicates require hash fallback".to_owned()
} else {
"no compatible 3+ equi-join component with lower AGM estimate".to_owned()
};
return vec![JoinPlanSegment {
relations: join_order.to_vec(),
operator: JoinOperator::HashJoin,
estimated_cost: hash_cost,
reason,
}];
}
let remaining_tables = join_order_canonical
.iter()
.filter(|table| !selected_tables.contains(*table))
.cloned()
.collect::<Vec<_>>();
if remaining_tables.len() >= 2 {
let hash_cost = estimate_pairwise_hash_join_cost(&remaining_tables, &rows_by_table);
segments.push(JoinPlanSegment {
relations: remaining_tables
.iter()
.filter_map(|table| canonical_to_original.get(table).cloned())
.collect(),
operator: JoinOperator::HashJoin,
estimated_cost: hash_cost,
reason: "remaining joins use pairwise hash join".to_owned(),
});
}
let join_order_position = join_order_canonical
.iter()
.enumerate()
.map(|(idx, table)| (table.clone(), idx))
.collect::<HashMap<_, _>>();
segments.sort_by_key(|segment| {
segment
.relations
.first()
.and_then(|table| {
join_order_position
.get(&canonical_table_key(table))
.copied()
})
.unwrap_or(usize::MAX)
});
segments
}
fn build_table_row_map(
tables: &[TableStats],
join_order_canonical: &[String],
) -> HashMap<String, f64> {
let mut rows_by_table = tables
.iter()
.map(|table| (canonical_table_key(&table.name), table.n_rows.max(1) as f64))
.collect::<HashMap<_, _>>();
for table in join_order_canonical {
rows_by_table.entry(table.clone()).or_insert(1.0);
}
rows_by_table
}
fn collect_join_predicates(
where_terms: &[WhereTerm<'_>],
join_table_set: &HashSet<String>,
) -> (Vec<EquiJoinPredicate>, HashSet<String>) {
let mut equi_predicates = Vec::new();
let mut theta_join_tables = HashSet::new();
for term in where_terms {
let Expr::BinaryOp {
left, op, right, ..
} = term.expr
else {
continue;
};
let Some(left_col) = extract_qualified_column(left) else {
continue;
};
let Some(right_col) = extract_qualified_column(right) else {
continue;
};
if left_col.table == right_col.table {
continue;
}
if !join_table_set.contains(&left_col.table) || !join_table_set.contains(&right_col.table) {
continue;
}
if *op == AstBinaryOp::Eq {
equi_predicates.push(EquiJoinPredicate {
left: left_col,
right: right_col,
});
} else {
theta_join_tables.insert(left_col.table);
theta_join_tables.insert(right_col.table);
}
}
(equi_predicates, theta_join_tables)
}
fn extract_qualified_column(expr: &Expr) -> Option<ColumnKey> {
let Expr::Column(column_ref, _) = expr else {
return None;
};
let table = column_ref.table.as_ref()?;
Some(ColumnKey {
table: canonical_table_key(table),
column: column_ref.column.to_ascii_lowercase(),
})
}
fn connected_components(tables: &[String], predicates: &[EquiJoinPredicate]) -> Vec<Vec<String>> {
if tables.is_empty() {
return vec![];
}
let table_set = tables.iter().cloned().collect::<HashSet<_>>();
let mut adjacency = tables
.iter()
.map(|table| (table.clone(), HashSet::<String>::new()))
.collect::<HashMap<_, _>>();
for predicate in predicates {
if table_set.contains(&predicate.left.table) && table_set.contains(&predicate.right.table) {
adjacency
.entry(predicate.left.table.clone())
.or_default()
.insert(predicate.right.table.clone());
adjacency
.entry(predicate.right.table.clone())
.or_default()
.insert(predicate.left.table.clone());
}
}
let mut visited = HashSet::<String>::new();
let mut components = Vec::new();
for table in tables {
if visited.contains(table) {
continue;
}
let mut stack = vec![table.clone()];
let mut component = Vec::new();
while let Some(current) = stack.pop() {
if !visited.insert(current.clone()) {
continue;
}
component.push(current.clone());
if let Some(neighbors) = adjacency.get(¤t) {
for neighbor in neighbors {
if !visited.contains(neighbor) {
stack.push(neighbor.clone());
}
}
}
}
components.push(component);
}
components
}
fn ordered_subset(join_order: &[String], selected_tables: &HashSet<String>) -> Vec<String> {
join_order
.iter()
.filter(|table| selected_tables.contains(*table))
.cloned()
.collect()
}
fn estimate_pairwise_hash_join_cost(
component: &[String],
rows_by_table: &HashMap<String, f64>,
) -> f64 {
if component.len() < 2 {
return 0.0;
}
let mut iter = component.iter();
let first_rows = iter
.next()
.and_then(|table| rows_by_table.get(table))
.copied()
.unwrap_or(1.0)
.max(1.0);
let mut intermediate_rows = first_rows;
let mut total_cost = 0.0;
for table in iter {
let relation_rows = rows_by_table.get(table).copied().unwrap_or(1.0).max(1.0);
total_cost += intermediate_rows.min(relation_rows) + intermediate_rows.max(relation_rows);
intermediate_rows =
(intermediate_rows * relation_rows * HASH_JOIN_SELECTIVITY_HEURISTIC).max(1.0);
}
total_cost
}
#[allow(clippy::too_many_lines)]
fn build_trie_hypergraph(
component: &[String],
predicates: &[EquiJoinPredicate],
) -> Option<TrieHypergraph> {
if component.len() < 2 {
return None;
}
let component_set = component.iter().cloned().collect::<HashSet<_>>();
let table_to_index = component
.iter()
.enumerate()
.map(|(idx, table)| (table.clone(), idx))
.collect::<HashMap<_, _>>();
let mut endpoint_ids = HashMap::<ColumnKey, usize>::new();
let mut edge_endpoint_pairs = Vec::<(usize, usize, String, String)>::new();
for predicate in predicates {
if !component_set.contains(&predicate.left.table)
|| !component_set.contains(&predicate.right.table)
{
continue;
}
let left_entry = if let Some(existing) = endpoint_ids.get(&predicate.left).copied() {
existing
} else {
let next = endpoint_ids.len();
endpoint_ids.insert(predicate.left.clone(), next);
next
};
let right_entry = if let Some(existing) = endpoint_ids.get(&predicate.right).copied() {
existing
} else {
let next = endpoint_ids.len();
endpoint_ids.insert(predicate.right.clone(), next);
next
};
edge_endpoint_pairs.push((
left_entry,
right_entry,
predicate.left.table.clone(),
predicate.right.table.clone(),
));
}
if edge_endpoint_pairs.is_empty() {
return None;
}
let mut union_find = UnionFind::new(endpoint_ids.len());
for (left_id, right_id, _, _) in &edge_endpoint_pairs {
union_find.union(*left_id, *right_id);
}
let mut root_to_variable = HashMap::<usize, usize>::new();
let mut relation_variable_sets = vec![HashSet::<usize>::new(); component.len()];
for (left_id, right_id, left_table, right_table) in edge_endpoint_pairs {
let left_root = union_find.find(left_id);
let right_root = union_find.find(right_id);
let left_variable = if let Some(existing) = root_to_variable.get(&left_root).copied() {
existing
} else {
let next = root_to_variable.len();
root_to_variable.insert(left_root, next);
next
};
let right_variable = if let Some(existing) = root_to_variable.get(&right_root).copied() {
existing
} else {
let next = root_to_variable.len();
root_to_variable.insert(right_root, next);
next
};
let left_index = *table_to_index.get(&left_table)?;
let right_index = *table_to_index.get(&right_table)?;
relation_variable_sets[left_index].insert(left_variable);
relation_variable_sets[right_index].insert(right_variable);
}
if relation_variable_sets.iter().any(HashSet::is_empty) {
return None;
}
let expected_arity = relation_variable_sets.first()?.len();
if expected_arity == 0
|| relation_variable_sets
.iter()
.any(|variables| variables.len() != expected_arity)
{
return None;
}
let variable_count = root_to_variable.len();
let mut variable_degree = vec![0usize; variable_count];
for variables in &relation_variable_sets {
for variable in variables {
variable_degree[*variable] += 1;
}
}
if variable_degree.iter().any(|degree| *degree < 2) {
return None;
}
let relation_variables = relation_variable_sets
.into_iter()
.map(|variables| {
let mut ordered = variables.into_iter().collect::<Vec<_>>();
ordered.sort_unstable();
ordered
})
.collect::<Vec<_>>();
Some(TrieHypergraph {
relation_variables,
variable_count,
arity: expected_arity,
})
}
fn estimate_agm_upper_bound(
component: &[String],
rows_by_table: &HashMap<String, f64>,
hypergraph: &TrieHypergraph,
) -> Option<f64> {
if component.len() != hypergraph.relation_variables.len() || hypergraph.variable_count == 0 {
return None;
}
let mut variable_degree = vec![0usize; hypergraph.variable_count];
for variables in &hypergraph.relation_variables {
for variable in variables {
variable_degree[*variable] += 1;
}
}
let mut bound = 1.0;
for (relation_idx, table) in component.iter().enumerate() {
let row_count = rows_by_table.get(table).copied().unwrap_or(1.0).max(1.0);
let exponent = hypergraph.relation_variables[relation_idx]
.iter()
.map(|variable| 1.0 / variable_degree[*variable] as f64)
.fold(0.0, f64::max);
bound *= row_count.powf(exponent);
}
Some(bound.max(1.0))
}
fn from_clause_supports_leapfrog(from_clause: Option<&FromClause>) -> bool {
let Some(from_clause) = from_clause else {
return true;
};
for join in &from_clause.joins {
if join.join_type.natural {
return false;
}
if !matches!(join.join_type.kind, JoinKind::Inner | JoinKind::Cross) {
return false;
}
if let Some(constraint) = &join.constraint {
match constraint {
JoinConstraint::Using(columns) => {
if columns.is_empty() {
return false;
}
}
JoinConstraint::On(expr) => {
let conjuncts = decompose_where(expr);
if conjuncts.is_empty() {
return false;
}
if conjuncts
.iter()
.any(|conjunct| !expression_is_equi_column_predicate(conjunct))
{
return false;
}
}
}
}
}
true
}
fn expression_is_equi_column_predicate(expr: &Expr) -> bool {
matches!(
expr,
Expr::BinaryOp {
left,
op: AstBinaryOp::Eq,
right,
..
} if extract_where_column(left).is_some() && extract_where_column(right).is_some()
)
}
#[derive(Debug, Clone)]
struct PartialPath {
tables: Vec<String>,
access_paths: Vec<AccessPath>,
cost: f64,
cumulative_rows: f64,
}
#[must_use]
pub fn order_joins(
tables: &[TableStats],
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
cross_join_pairs: &[(String, String)],
) -> QueryPlan {
order_joins_with_hints(
tables,
indexes,
where_terms,
needed_columns,
cross_join_pairs,
None,
None,
)
}
fn join_access_path(
table: &TableStats,
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
table_index_hints: Option<&BTreeMap<String, IndexHint>>,
cracking_hints: Option<&CrackingHintStore>,
) -> AccessPath {
let explicit_hint = lookup_table_index_hint(&table.name, table_index_hints);
let adaptive_hint = cracking_hints.and_then(|store| store.preferred_index(&table.name));
best_access_path_internal(
table,
indexes,
where_terms,
needed_columns,
explicit_hint,
adaptive_hint,
&[],
)
}
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn order_joins_with_hints(
tables: &[TableStats],
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
cross_join_pairs: &[(String, String)],
table_index_hints: Option<&BTreeMap<String, IndexHint>>,
cracking_hints: Option<&mut CrackingHintStore>,
) -> QueryPlan {
order_joins_with_hints_and_features(
tables,
indexes,
where_terms,
needed_columns,
cross_join_pairs,
table_index_hints,
cracking_hints,
PlannerFeatureFlags::default(),
)
}
#[must_use]
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
pub fn order_joins_with_hints_and_features(
tables: &[TableStats],
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
cross_join_pairs: &[(String, String)],
table_index_hints: Option<&BTreeMap<String, IndexHint>>,
cracking_hints: Option<&mut CrackingHintStore>,
feature_flags: PlannerFeatureFlags,
) -> QueryPlan {
let n = tables.len();
if n == 0 {
return QueryPlan {
join_order: vec![],
access_paths: vec![],
join_segments: vec![],
total_cost: 0.0,
morsel_eligibility: None,
};
}
if n == 1 {
let ap = join_access_path(
&tables[0],
indexes,
where_terms,
needed_columns,
table_index_hints,
cracking_hints.as_deref(),
);
let total_cost = ap.estimated_cost;
let plan = QueryPlan {
join_order: vec![tables[0].name.clone()],
access_paths: vec![ap],
join_segments: vec![],
total_cost,
morsel_eligibility: None,
};
if let Some(store) = cracking_hints {
for access_path in &plan.access_paths {
store.record_access_path(access_path);
}
}
FSQLITE_PLANNER_PLANS_ENUMERATED.fetch_add(1, Ordering::Relaxed);
return plan;
}
if feature_flags.dpccp_join && n <= DPCCP_MAX_TABLES {
if let Some((order_indices, total_cost, plans_counted, branches_pruned)) = dpccp_order_joins(
tables,
indexes,
where_terms,
needed_columns,
table_index_hints,
cross_join_pairs,
cracking_hints.as_deref(),
) {
let join_order = order_indices
.iter()
.map(|idx| tables[*idx].name.clone())
.collect::<Vec<_>>();
let access_paths = order_indices
.iter()
.map(|idx| {
join_access_path(
&tables[*idx],
indexes,
where_terms,
needed_columns,
table_index_hints,
cracking_hints.as_deref(),
)
})
.collect::<Vec<_>>();
let join_segments =
choose_join_segments(&join_order, tables, where_terms, None, feature_flags);
let plan = QueryPlan {
join_order,
access_paths,
join_segments,
total_cost,
morsel_eligibility: None,
};
if let Some(store) = cracking_hints {
for access_path in &plan.access_paths {
store.record_access_path(access_path);
}
}
FSQLITE_PLANNER_PLANS_ENUMERATED.fetch_add(plans_counted, Ordering::Relaxed);
tracing::debug!(
join_order = ?plan.join_order,
total_cost = plan.total_cost,
table_count = n,
plans_enumerated = plans_counted,
branches_pruned,
threshold = DPCCP_MAX_TABLES,
algorithm = "dpccp_exhaustive",
"planner.order_joins.complete"
);
tracing::info!(
join_order = ?plan.join_order,
total_cost = plan.total_cost,
table_count = n,
plans_enumerated = plans_counted,
branches_pruned,
algorithm = "dpccp_exhaustive",
"planner.plan_selected"
);
return plan;
}
tracing::debug!(
table_count = n,
threshold = DPCCP_MAX_TABLES,
"planner.dpccp.no_plan_fallback_greedy"
);
}
let mut plans_enumerated: u64 = 0;
let is_star = detect_star_query(tables, where_terms);
let mx_choice = if n > DPCCP_MAX_TABLES {
1
} else {
compute_mx_choice(n, is_star)
};
let mut paths: Vec<PartialPath> = Vec::with_capacity(n);
for t in tables {
if !cross_join_allowed(&[], &t.name, cross_join_pairs) {
continue;
}
let ap = join_access_path(
t,
indexes,
where_terms,
needed_columns,
table_index_hints,
cracking_hints.as_deref(),
);
let cumulative_rows = ap.estimated_rows;
let cost = ap.estimated_cost;
paths.push(PartialPath {
tables: vec![t.name.clone()],
access_paths: vec![ap],
cost,
cumulative_rows,
});
}
paths.sort_by(|a, b| {
a.cost
.partial_cmp(&b.cost)
.unwrap_or(std::cmp::Ordering::Equal)
});
paths.truncate(mx_choice);
for level in 1..n {
let mut next_paths: Vec<PartialPath> = Vec::with_capacity(paths.len() * (n - level));
for path in &paths {
for t in tables {
if path
.tables
.iter()
.any(|existing| existing.eq_ignore_ascii_case(&t.name))
{
continue;
}
if !cross_join_allowed(&path.tables, &t.name, cross_join_pairs) {
continue;
}
let ap = join_access_path(
t,
indexes,
where_terms,
needed_columns,
table_index_hints,
cracking_hints.as_deref(),
);
let outer_rows = path.cumulative_rows;
let inner_cost = ap.estimated_cost * outer_rows;
let mut new_tables = path.tables.clone();
new_tables.push(t.name.clone());
let mut new_aps = path.access_paths.clone();
new_aps.push(ap.clone());
let new_cost = path.cost + inner_cost;
let new_cumulative_rows = path.cumulative_rows * ap.estimated_rows;
plans_enumerated += 1;
tracing::debug!(
target: "fsqlite.planner",
tables = ?new_tables,
cost = new_cost,
"planner.candidate_plan"
);
next_paths.push(PartialPath {
tables: new_tables,
access_paths: new_aps,
cost: new_cost,
cumulative_rows: new_cumulative_rows,
});
}
}
next_paths.sort_by(|a, b| {
a.cost
.partial_cmp(&b.cost)
.unwrap_or(std::cmp::Ordering::Equal)
});
next_paths.truncate(mx_choice);
paths = next_paths;
}
if paths.is_empty() {
for t in tables {
let ap = join_access_path(
t,
indexes,
where_terms,
needed_columns,
table_index_hints,
cracking_hints.as_deref(),
);
let cost = ap.estimated_cost;
let cumulative_rows = ap.estimated_rows;
paths.push(PartialPath {
tables: vec![t.name.clone()],
access_paths: vec![ap],
cost,
cumulative_rows,
});
}
}
let best = paths
.into_iter()
.min_by(|a, b| {
a.cost
.partial_cmp(&b.cost)
.unwrap_or(std::cmp::Ordering::Equal)
})
.expect("tables must be non-empty (checked n == 0 above)");
let join_segments =
choose_join_segments(&best.tables, tables, where_terms, None, feature_flags);
let plan = QueryPlan {
join_order: best.tables,
access_paths: best.access_paths,
join_segments,
total_cost: best.cost,
morsel_eligibility: None,
};
if let Some(store) = cracking_hints {
for access_path in &plan.access_paths {
store.record_access_path(access_path);
}
}
FSQLITE_PLANNER_PLANS_ENUMERATED.fetch_add(plans_enumerated, Ordering::Relaxed);
let span = tracing::info_span!(
target: "fsqlite.planner",
"join_ordering",
tables_count = n,
plans_enumerated,
selected_cost = plan.total_cost,
);
let _g = span.enter();
tracing::debug!(
join_order = ?plan.join_order,
total_cost = plan.total_cost,
beam_width = mx_choice,
star_query = is_star,
table_count = n,
index_hint_entries = table_index_hints.map_or(0, BTreeMap::len),
algorithm = "greedy_width",
threshold = DPCCP_MAX_TABLES,
"planner.order_joins.complete"
);
tracing::info!(
join_order = ?plan.join_order,
total_cost = plan.total_cost,
table_count = n,
plans_enumerated,
algorithm = "greedy_width",
"planner.plan_selected"
);
plan
}
fn cross_join_allowed(
current_path: &[String],
candidate: &str,
cross_join_pairs: &[(String, String)],
) -> bool {
for (left, right) in cross_join_pairs {
if right.eq_ignore_ascii_case(candidate)
&& !current_path.iter().any(|t| t.eq_ignore_ascii_case(left))
{
return false;
}
}
true
}
fn cross_join_allowed_indices(
current_path: &[usize],
candidate: &str,
tables: &[TableStats],
cross_join_pairs: &[(String, String)],
) -> bool {
for (left, right) in cross_join_pairs {
if right.eq_ignore_ascii_case(candidate)
&& !current_path
.iter()
.any(|idx| tables[*idx].name.eq_ignore_ascii_case(left))
{
return false;
}
}
true
}
#[allow(dead_code, clippy::cast_possible_truncation)]
fn dpccp_order_joins(
tables: &[TableStats],
indexes: &[IndexInfo],
where_terms: &[WhereTerm<'_>],
needed_columns: Option<&[String]>,
table_index_hints: Option<&BTreeMap<String, IndexHint>>,
cross_join_pairs: &[(String, String)],
cracking_hints: Option<&CrackingHintStore>,
) -> Option<(Vec<usize>, f64, u64, u64)> {
let n = tables.len();
assert!(n <= DPCCP_MAX_TABLES);
let access_paths = tables
.iter()
.map(|table| {
join_access_path(
table,
indexes,
where_terms,
needed_columns,
table_index_hints,
cracking_hints,
)
})
.collect::<Vec<_>>();
let mut visit_order = (0..n).collect::<Vec<_>>();
visit_order.sort_by(|&lhs, &rhs| {
access_paths[lhs]
.estimated_rows
.partial_cmp(&access_paths[rhs].estimated_rows)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| {
access_paths[lhs]
.estimated_cost
.partial_cmp(&access_paths[rhs].estimated_cost)
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| lhs.cmp(&rhs))
});
let mut state =
ExhaustiveJoinSearchState::new(tables, &access_paths, &visit_order, cross_join_pairs);
state.search();
let order = state.best_order?;
Some((
order,
state.best_cost,
state.plans_enumerated,
state.branches_pruned,
))
}
struct ExhaustiveJoinSearchState<'a> {
tables: &'a [TableStats],
access_paths: &'a [AccessPath],
visit_order: &'a [usize],
cross_join_pairs: &'a [(String, String)],
best_order: Option<Vec<usize>>,
best_cost: f64,
plans_enumerated: u64,
branches_pruned: u64,
}
impl<'a> ExhaustiveJoinSearchState<'a> {
fn new(
tables: &'a [TableStats],
access_paths: &'a [AccessPath],
visit_order: &'a [usize],
cross_join_pairs: &'a [(String, String)],
) -> Self {
Self {
tables,
access_paths,
visit_order,
cross_join_pairs,
best_order: None,
best_cost: f64::INFINITY,
plans_enumerated: 0,
branches_pruned: 0,
}
}
fn search(&mut self) {
let mut current_order = Vec::with_capacity(self.tables.len());
self.search_dfs(&mut current_order, 0, 0.0, 1.0);
}
fn search_dfs(
&mut self,
current_order: &mut Vec<usize>,
used_mask: u64,
current_cost: f64,
current_rows: f64,
) {
if current_order.len() == self.tables.len() {
if current_cost < self.best_cost {
self.best_cost = current_cost;
self.best_order = Some(current_order.clone());
tracing::debug!(
target: "fsqlite.planner",
algorithm = "dpccp_exhaustive",
join_order = ?order_indices_to_names(current_order, self.tables),
total_cost = current_cost,
"planner.best_plan_updated"
);
}
return;
}
for &candidate_idx in self.visit_order {
if used_mask & (1u64 << candidate_idx) != 0 {
continue;
}
let candidate = &self.tables[candidate_idx];
if !cross_join_allowed_indices(
current_order,
&candidate.name,
self.tables,
self.cross_join_pairs,
) {
continue;
}
let ap = &self.access_paths[candidate_idx];
let (new_cost, new_rows) = if current_order.is_empty() {
(ap.estimated_cost, ap.estimated_rows)
} else {
let inner_cost = ap.estimated_cost * current_rows;
(current_cost + inner_cost, current_rows * ap.estimated_rows)
};
self.plans_enumerated += 1;
let should_prune = self.best_cost.is_finite() && new_cost >= self.best_cost;
let mut candidate_order = current_order
.iter()
.map(|idx| self.tables[*idx].name.as_str())
.collect::<Vec<_>>();
candidate_order.push(candidate.name.as_str());
tracing::debug!(
target: "fsqlite.planner",
algorithm = "dpccp_exhaustive",
depth = candidate_order.len(),
candidate_order = ?candidate_order,
cost = new_cost,
best_complete_cost = if self.best_cost.is_finite() {
Some(self.best_cost)
} else {
None::<f64>
},
pruned = should_prune,
"planner.candidate_plan"
);
if should_prune {
self.branches_pruned += 1;
continue;
}
current_order.push(candidate_idx);
self.search_dfs(
current_order,
used_mask | (1u64 << candidate_idx),
new_cost,
new_rows,
);
current_order.pop();
}
}
}
fn order_indices_to_names(order: &[usize], tables: &[TableStats]) -> Vec<String> {
order.iter().map(|idx| tables[*idx].name.clone()).collect()
}
fn collect_table_refs(expr: &Expr, out: &mut HashSet<String>) {
match expr {
Expr::Column(col_ref, _) => {
if let Some(ref tq) = col_ref.table {
out.insert(tq.to_ascii_lowercase());
}
}
Expr::BinaryOp { left, right, .. } => {
collect_table_refs(left, out);
collect_table_refs(right, out);
}
Expr::UnaryOp { expr: inner, .. }
| Expr::Collate { expr: inner, .. }
| Expr::IsNull { expr: inner, .. } => {
collect_table_refs(inner, out);
}
Expr::Between {
expr: e, low, high, ..
} => {
collect_table_refs(e, out);
collect_table_refs(low, out);
collect_table_refs(high, out);
}
Expr::In { expr: e, set, .. } => {
collect_table_refs(e, out);
if let InSet::List(items) = set {
for item in items {
collect_table_refs(item, out);
}
}
}
Expr::Like {
expr: e,
pattern,
escape,
..
} => {
collect_table_refs(e, out);
collect_table_refs(pattern, out);
if let Some(esc) = escape {
collect_table_refs(esc, out);
}
}
Expr::FunctionCall { args, filter, .. } => {
if let fsqlite_ast::FunctionArgs::List(exprs) = args {
for arg in exprs {
collect_table_refs(arg, out);
}
}
if let Some(f) = filter {
collect_table_refs(f, out);
}
}
Expr::Case {
operand,
whens,
else_expr,
..
} => {
if let Some(op) = operand {
collect_table_refs(op, out);
}
for (when_e, then_e) in whens {
collect_table_refs(when_e, out);
collect_table_refs(then_e, out);
}
if let Some(el) = else_expr {
collect_table_refs(el, out);
}
}
Expr::Cast { expr: e, .. } => collect_table_refs(e, out),
Expr::JsonAccess { expr: e, path, .. } => {
collect_table_refs(e, out);
collect_table_refs(path, out);
}
Expr::RowValue(exprs, _) => {
for e in exprs {
collect_table_refs(e, out);
}
}
Expr::Exists { subquery, .. } | Expr::Subquery(subquery, _) => {
if let SelectCore::Select {
where_clause,
columns,
..
} = &subquery.body.select
{
if let Some(wc) = where_clause {
collect_table_refs(wc, out);
}
for col in columns {
if let ResultColumn::Expr { expr, .. } = col {
collect_table_refs(expr, out);
}
}
}
}
_ => {}
}
}
#[derive(Debug, Clone)]
pub struct PushedPredicate<'a> {
pub table: String,
pub term: &'a WhereTerm<'a>,
}
pub fn pushdown_predicates<'a>(
where_terms: &'a [WhereTerm<'a>],
table_names: &[String],
) -> (Vec<PushedPredicate<'a>>, Vec<&'a WhereTerm<'a>>) {
let span = tracing::debug_span!(
target: "fsqlite.planner",
"predicate_pushdown",
total_terms = where_terms.len(),
pushed = tracing::field::Empty,
remaining = tracing::field::Empty,
);
let _g = span.enter();
let mut pushed = Vec::new();
let mut remaining = Vec::new();
for term in where_terms {
let mut refs = HashSet::new();
collect_table_refs(term.expr, &mut refs);
if refs.len() == 1 {
let tq = refs.into_iter().next().unwrap();
let matching: Vec<_> = table_names
.iter()
.filter(|t| t.to_ascii_lowercase() == tq)
.collect();
if matching.len() == 1 {
pushed.push(PushedPredicate {
table: matching[0].clone(),
term,
});
continue;
}
} else if refs.is_empty() {
if let Some(ref col) = term.column {
if let Some(ref tname) = col.table {
if let Some(matched) =
table_names.iter().find(|t| t.eq_ignore_ascii_case(tname))
{
pushed.push(PushedPredicate {
table: matched.clone(),
term,
});
continue;
}
} else if table_names.len() == 1 {
pushed.push(PushedPredicate {
table: table_names[0].clone(),
term,
});
continue;
}
}
}
remaining.push(term);
}
span.record("pushed", pushed.len() as u64);
span.record("remaining", remaining.len() as u64);
tracing::debug!(
pushed_count = pushed.len(),
remaining_count = remaining.len(),
"planner.predicate_pushdown.complete"
);
(pushed, remaining)
}
#[derive(Debug, Clone, PartialEq)]
pub enum FoldResult {
Literal(Literal),
NotConstant,
}
pub fn try_constant_fold(expr: &Expr) -> FoldResult {
match expr {
Expr::Literal(lit, _) => FoldResult::Literal(lit.clone()),
Expr::UnaryOp {
op, expr: inner, ..
} => {
let inner_val = try_constant_fold(inner);
match inner_val {
FoldResult::Literal(Literal::Integer(i)) => match op {
fsqlite_ast::UnaryOp::Negate => {
FoldResult::Literal(Literal::Integer(i.wrapping_neg()))
}
fsqlite_ast::UnaryOp::Plus => FoldResult::Literal(Literal::Integer(i)),
fsqlite_ast::UnaryOp::BitNot => FoldResult::Literal(Literal::Integer(!i)),
fsqlite_ast::UnaryOp::Not => FoldResult::Literal(if i == 0 {
Literal::True
} else {
Literal::False
}),
},
FoldResult::Literal(Literal::Float(f)) => match op {
fsqlite_ast::UnaryOp::Negate => FoldResult::Literal(Literal::Float(-f)),
fsqlite_ast::UnaryOp::Plus => FoldResult::Literal(Literal::Float(f)),
_ => FoldResult::NotConstant,
},
FoldResult::Literal(Literal::Null) => FoldResult::Literal(Literal::Null),
_ => FoldResult::NotConstant,
}
}
Expr::BinaryOp {
left, op, right, ..
} => {
let l = try_constant_fold(left);
let r = try_constant_fold(right);
match (l, r) {
(
FoldResult::Literal(Literal::Integer(a)),
FoldResult::Literal(Literal::Integer(b)),
) => match op {
fsqlite_ast::BinaryOp::Add => {
FoldResult::Literal(Literal::Integer(a.wrapping_add(b)))
}
fsqlite_ast::BinaryOp::Subtract => {
FoldResult::Literal(Literal::Integer(a.wrapping_sub(b)))
}
fsqlite_ast::BinaryOp::Multiply => {
FoldResult::Literal(Literal::Integer(a.wrapping_mul(b)))
}
fsqlite_ast::BinaryOp::Divide => {
if b == 0 {
FoldResult::Literal(Literal::Null)
} else {
FoldResult::Literal(Literal::Integer(a.wrapping_div(b)))
}
}
fsqlite_ast::BinaryOp::Modulo => {
if b == 0 {
FoldResult::Literal(Literal::Null)
} else {
FoldResult::Literal(Literal::Integer(a.wrapping_rem(b)))
}
}
fsqlite_ast::BinaryOp::Eq => FoldResult::Literal(if a == b {
Literal::True
} else {
Literal::False
}),
fsqlite_ast::BinaryOp::Ne => FoldResult::Literal(if a == b {
Literal::False
} else {
Literal::True
}),
fsqlite_ast::BinaryOp::Lt => {
FoldResult::Literal(if a < b { Literal::True } else { Literal::False })
}
fsqlite_ast::BinaryOp::Le => FoldResult::Literal(if a <= b {
Literal::True
} else {
Literal::False
}),
fsqlite_ast::BinaryOp::Gt => {
FoldResult::Literal(if a > b { Literal::True } else { Literal::False })
}
fsqlite_ast::BinaryOp::Ge => FoldResult::Literal(if a >= b {
Literal::True
} else {
Literal::False
}),
_ => FoldResult::NotConstant,
},
(FoldResult::Literal(Literal::Null), FoldResult::Literal(_))
| (FoldResult::Literal(_), FoldResult::Literal(Literal::Null)) => {
FoldResult::Literal(Literal::Null)
}
_ => FoldResult::NotConstant,
}
}
_ => FoldResult::NotConstant,
}
}
#[cfg(test)]
mod tests {
use super::*;
use fsqlite_ast::{
ColumnRef, CompoundOp, Distinctness, Expr, FromClause, InSet, IndexHint, Literal,
OrderingTerm, QualifiedName, ResultColumn, SelectBody, SelectCore, SortDirection, Span,
TableOrSubquery,
};
use std::{cell::Cell, path::PathBuf, time::Instant};
fn select_core_with_aliases(aliases: &[&str]) -> SelectCore {
SelectCore::Select {
distinct: Distinctness::All,
columns: aliases
.iter()
.map(|a| ResultColumn::Expr {
expr: Expr::Literal(Literal::Integer(0), Span::ZERO),
alias: Some((*a).to_owned()),
})
.collect(),
from: None,
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
}
}
fn compound_body(first: &[&str], rest: &[(&[&str], CompoundOp)]) -> SelectBody {
SelectBody {
select: select_core_with_aliases(first),
compounds: rest
.iter()
.map(|(aliases, op)| (*op, select_core_with_aliases(aliases)))
.collect(),
}
}
fn order_by_name(name: &str) -> OrderingTerm {
OrderingTerm {
expr: Expr::Column(ColumnRef::bare(name), Span::ZERO),
direction: None,
nulls: None,
}
}
fn order_by_num(n: i64) -> OrderingTerm {
OrderingTerm {
expr: Expr::Literal(Literal::Integer(n), Span::ZERO),
direction: None,
nulls: None,
}
}
fn order_by_name_dir(name: &str, dir: SortDirection) -> OrderingTerm {
OrderingTerm {
expr: Expr::Column(ColumnRef::bare(name), Span::ZERO),
direction: Some(dir),
nulls: None,
}
}
fn select_core_single_table(
columns: Vec<ResultColumn>,
table_name: &str,
alias: Option<&str>,
) -> SelectCore {
SelectCore::Select {
distinct: Distinctness::All,
columns,
from: Some(FromClause {
source: TableOrSubquery::Table {
name: QualifiedName::bare(table_name),
alias: alias.map(str::to_owned),
index_hint: None,
time_travel: None,
},
joins: vec![],
}),
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
}
}
fn sample_cached_query_plan(label: &str) -> QueryPlan {
QueryPlan {
join_order: vec![label.to_owned()],
access_paths: vec![],
join_segments: vec![],
total_cost: label.len() as f64,
morsel_eligibility: None,
}
}
#[test]
fn test_single_table_projection_expands_star() {
let core = select_core_single_table(vec![ResultColumn::Star], "t", None);
let table_columns = vec!["a".to_owned(), "b".to_owned()];
let resolved =
resolve_single_table_result_columns(&core, &table_columns).expect("star should expand");
assert_eq!(
resolved,
vec![
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("a"), Span::ZERO),
alias: None
},
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("b"), Span::ZERO),
alias: None
},
]
);
}
#[test]
fn test_single_table_projection_expands_table_star_with_alias() {
let core = select_core_single_table(
vec![ResultColumn::TableStar(QualifiedName::bare("tt"))],
"t",
Some("tt"),
);
let table_columns = vec!["a".to_owned(), "b".to_owned()];
let resolved = resolve_single_table_result_columns(&core, &table_columns)
.expect("table.* should expand");
assert_eq!(resolved.len(), 2);
}
#[test]
fn test_single_table_projection_rejects_unknown_column() {
let core = select_core_single_table(
vec![ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("z"), Span::ZERO),
alias: None,
}],
"t",
None,
);
let table_columns = vec!["a".to_owned(), "b".to_owned()];
let err = resolve_single_table_result_columns(&core, &table_columns)
.expect_err("unknown column should fail");
assert_eq!(
err,
SingleTableProjectionError::ColumnNotFound {
column: "z".to_owned()
}
);
}
#[test]
fn test_single_table_projection_accepts_rowid_aliases_with_qualifiers() {
let core = select_core_single_table(
vec![
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("rowid"), Span::ZERO),
alias: None,
},
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::qualified("tt", "_rowid_"), Span::ZERO),
alias: None,
},
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::qualified("t", "oid"), Span::ZERO),
alias: None,
},
],
"t",
Some("tt"),
);
let table_columns = vec!["a".to_owned(), "b".to_owned()];
let resolved = resolve_single_table_result_columns(&core, &table_columns)
.expect("rowid aliases should be accepted in projection");
assert_eq!(resolved.len(), 3);
}
#[test]
fn test_single_table_projection_rejects_hidden_rowid_aliases_when_disabled() {
let core = select_core_single_table(
vec![
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("rowid"), Span::ZERO),
alias: None,
},
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::qualified("tt", "_rowid_"), Span::ZERO),
alias: None,
},
],
"t",
Some("tt"),
);
let table_columns = vec!["a".to_owned(), "b".to_owned()];
let err = resolve_single_table_result_columns_with_options(&core, &table_columns, false)
.expect_err("WITHOUT ROWID tables should reject hidden rowid aliases");
assert_eq!(
err,
SingleTableProjectionError::ColumnNotFound {
column: "rowid".to_owned()
}
);
}
#[test]
fn test_single_table_projection_still_accepts_visible_rowid_column_when_disabled() {
let core = select_core_single_table(
vec![ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("rowid"), Span::ZERO),
alias: None,
}],
"t",
None,
);
let table_columns = vec!["rowid".to_owned(), "payload".to_owned()];
let resolved =
resolve_single_table_result_columns_with_options(&core, &table_columns, false)
.expect("visible rowid-named columns should still resolve");
assert_eq!(resolved.len(), 1);
}
#[test]
fn test_compound_order_by_uses_first_alias() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let result =
resolve_compound_order_by(&body, &[order_by_name("a")]).expect("should resolve");
assert_eq!(result.len(), 1);
assert_eq!(result[0].column_idx, 0);
}
#[test]
fn test_extract_output_aliases_and_count_output_columns() {
let core = SelectCore::Select {
distinct: Distinctness::All,
columns: vec![
ResultColumn::Expr {
expr: Expr::Literal(Literal::Integer(1), Span::ZERO),
alias: Some("renamed".to_owned()),
},
ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("bare_col"), Span::ZERO),
alias: None,
},
ResultColumn::Expr {
expr: Expr::Literal(Literal::Integer(2), Span::ZERO),
alias: None,
},
],
from: None,
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
};
assert_eq!(count_output_columns(&core), 3);
assert_eq!(
extract_output_aliases(&core),
vec![
Some("renamed".to_owned()),
Some("bare_col".to_owned()),
None
]
);
let values = SelectCore::Values(vec![
vec![
Expr::Literal(Literal::Integer(1), Span::ZERO),
Expr::Literal(Literal::Integer(2), Span::ZERO),
],
vec![
Expr::Literal(Literal::Integer(3), Span::ZERO),
Expr::Literal(Literal::Integer(4), Span::ZERO),
],
]);
assert_eq!(count_output_columns(&values), 2);
assert_eq!(extract_output_aliases(&values), vec![None, None]);
let empty = SelectCore::Values(vec![]);
assert_eq!(count_output_columns(&empty), 0);
assert!(extract_output_aliases(&empty).is_empty());
}
#[test]
fn test_compound_order_by_second_select_alias() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let result =
resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
assert_eq!(result.len(), 1);
assert_eq!(result[0].column_idx, 0);
}
#[test]
fn test_compound_order_by_first_select_wins_conflict() {
let body = compound_body(&["a", "b"], &[(&["b", "a"], CompoundOp::UnionAll)]);
let result =
resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
assert_eq!(result[0].column_idx, 1);
}
#[test]
fn test_compound_order_by_numeric_column() {
let body = compound_body(&["a", "b"], &[(&["c", "d"], CompoundOp::Union)]);
let result = resolve_compound_order_by(&body, &[order_by_num(1), order_by_num(2)])
.expect("should resolve");
assert_eq!(result[0].column_idx, 0);
assert_eq!(result[1].column_idx, 1);
}
#[test]
fn test_compound_order_by_unknown_name_error() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let err =
resolve_compound_order_by(&body, &[order_by_name("z")]).expect_err("should error");
assert!(matches!(
err,
CompoundOrderByError::ColumnNotFound { ref name, .. } if name == "z"
));
}
#[test]
fn test_compound_order_by_numeric_out_of_range() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let err = resolve_compound_order_by(&body, &[order_by_num(5)]).expect_err("should error");
assert!(matches!(
err,
CompoundOrderByError::IndexOutOfRange {
index: 5,
num_columns: 1,
..
}
));
}
#[test]
fn test_compound_order_by_numeric_zero() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let err = resolve_compound_order_by(&body, &[order_by_num(0)]).expect_err("should error");
assert!(matches!(
err,
CompoundOrderByError::IndexZeroOrNegative { value: 0, .. }
));
}
#[test]
fn test_compound_order_by_expression_rejected() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let term = OrderingTerm {
expr: Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: fsqlite_ast::BinaryOp::Add,
right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
span: Span::ZERO,
},
direction: None,
nulls: None,
};
let err = resolve_compound_order_by(&body, &[term]).expect_err("should error");
assert!(matches!(
err,
CompoundOrderByError::ExpressionNotAllowed { .. }
));
}
#[test]
fn test_compound_order_by_with_direction() {
let body = compound_body(&["a", "b"], &[(&["c", "d"], CompoundOp::Union)]);
let result =
resolve_compound_order_by(&body, &[order_by_name_dir("a", SortDirection::Desc)])
.expect("should resolve");
assert_eq!(result[0].column_idx, 0);
assert_eq!(result[0].direction, Some(SortDirection::Desc));
}
#[test]
fn test_compound_order_by_collate() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let term = OrderingTerm {
expr: Expr::Collate {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
collation: "NOCASE".to_owned(),
span: Span::ZERO,
},
direction: None,
nulls: None,
};
let result = resolve_compound_order_by(&body, &[term]).expect("should resolve");
assert_eq!(result[0].column_idx, 0);
assert_eq!(result[0].collation.as_deref(), Some("NOCASE"));
}
#[test]
fn test_compound_order_by_three_selects() {
let body = compound_body(
&["a"],
&[(&["b"], CompoundOp::Union), (&["c"], CompoundOp::Union)],
);
let result =
resolve_compound_order_by(&body, &[order_by_name("c")]).expect("should resolve");
assert_eq!(result[0].column_idx, 0);
}
#[test]
fn test_compound_order_by_earlier_select_wins() {
let body = compound_body(
&["a", "x"],
&[
(&["b", "c"], CompoundOp::UnionAll),
(&["c", "b"], CompoundOp::UnionAll),
],
);
let result =
resolve_compound_order_by(&body, &[order_by_name("c")]).expect("should resolve");
assert_eq!(result[0].column_idx, 1);
}
#[test]
fn test_compound_order_by_case_insensitive() {
let body = compound_body(&["MyCol"], &[(&["other"], CompoundOp::Union)]);
let result =
resolve_compound_order_by(&body, &[order_by_name("mycol")]).expect("should resolve");
assert_eq!(result[0].column_idx, 0);
}
#[test]
fn test_compound_order_by_intersect_except() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Intersect)]);
let result =
resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
assert_eq!(result[0].column_idx, 0);
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Except)]);
let result =
resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
assert_eq!(result[0].column_idx, 0);
}
#[test]
fn test_extract_output_aliases_select() {
let core = select_core_with_aliases(&["x", "y", "z"]);
let aliases = extract_output_aliases(&core);
assert_eq!(
aliases,
vec![
Some("x".to_owned()),
Some("y".to_owned()),
Some("z".to_owned())
]
);
}
#[test]
fn test_extract_output_aliases_bare_column() {
let core = SelectCore::Select {
distinct: Distinctness::All,
columns: vec![ResultColumn::Expr {
expr: Expr::Column(ColumnRef::bare("my_col"), Span::ZERO),
alias: None,
}],
from: None,
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
};
let aliases = extract_output_aliases(&core);
assert_eq!(aliases, vec![Some("my_col".to_owned())]);
}
#[test]
fn test_extract_output_aliases_values() {
let core = SelectCore::Values(vec![vec![
Expr::Literal(Literal::Integer(1), Span::ZERO),
Expr::Literal(Literal::Integer(2), Span::ZERO),
]]);
let aliases = extract_output_aliases(&core);
assert_eq!(aliases, vec![None, None]);
}
#[test]
fn test_is_compound() {
let simple = SelectBody {
select: select_core_with_aliases(&["a"]),
compounds: vec![],
};
assert!(!is_compound(&simple));
let compound = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
assert!(is_compound(&compound));
}
#[test]
fn test_compound_op_name_all_variants() {
assert_eq!(compound_op_name(CompoundOp::Union), "UNION");
assert_eq!(compound_op_name(CompoundOp::UnionAll), "UNION ALL");
assert_eq!(compound_op_name(CompoundOp::Intersect), "INTERSECT");
assert_eq!(compound_op_name(CompoundOp::Except), "EXCEPT");
}
#[test]
fn test_compound_order_by_error_display() {
let err = CompoundOrderByError::ColumnNotFound {
name: "z".to_owned(),
span: Span::ZERO,
};
assert!(err.to_string().contains("does not match"));
let err = CompoundOrderByError::IndexOutOfRange {
index: 5,
num_columns: 2,
span: Span::ZERO,
};
assert!(err.to_string().contains("out of range"));
let err = CompoundOrderByError::ExpressionNotAllowed { span: Span::ZERO };
assert!(err.to_string().contains("not allowed"));
}
#[test]
fn test_compound_order_by_negative_index() {
let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
let err = resolve_compound_order_by(&body, &[order_by_num(-1)]).expect_err("should error");
assert!(matches!(
err,
CompoundOrderByError::IndexZeroOrNegative { value: -1, .. }
));
}
#[test]
fn test_compound_order_by_multiple_terms() {
let body = compound_body(
&["a", "b", "c"],
&[(&["x", "y", "z"], CompoundOp::UnionAll)],
);
let result = resolve_compound_order_by(
&body,
&[
order_by_name_dir("c", SortDirection::Desc),
order_by_num(1),
order_by_name("y"),
],
)
.expect("should resolve");
assert_eq!(result.len(), 3);
assert_eq!(result[0].column_idx, 2); assert_eq!(result[0].direction, Some(SortDirection::Desc));
assert_eq!(result[1].column_idx, 0); assert_eq!(result[2].column_idx, 1); }
fn table_stats(name: &str, n_pages: u64, n_rows: u64) -> TableStats {
TableStats {
name: name.to_owned(),
n_pages,
n_rows,
source: StatsSource::Heuristic,
}
}
fn index_info(
name: &str,
table: &str,
columns: &[&str],
unique: bool,
n_pages: u64,
) -> IndexInfo {
IndexInfo {
name: name.to_owned(),
table: table.to_owned(),
columns: columns.iter().map(|c| (*c).to_owned()).collect(),
unique,
n_pages,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
}
}
fn eq_term_value(col: &str, value: i64) -> WhereTerm<'static> {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(value), Span::ZERO)),
span: Span::ZERO,
}));
classify_where_term(expr)
}
fn eq_term(col: &str) -> WhereTerm<'static> {
eq_term_value(col, 1)
}
fn range_term(col: &str) -> WhereTerm<'static> {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
op: AstBinaryOp::Gt,
right: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
span: Span::ZERO,
}));
classify_where_term(expr)
}
fn in_term(col: &str, count: usize) -> WhereTerm<'static> {
let items: Vec<Expr> = (0..count)
.map(|i| {
#[allow(clippy::cast_possible_wrap)]
Expr::Literal(Literal::Integer(i as i64), Span::ZERO)
})
.collect();
let expr: &'static Expr = Box::leak(Box::new(Expr::In {
expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
set: InSet::List(items),
not: false,
span: Span::ZERO,
}));
classify_where_term(expr)
}
fn like_term(col: &str, pattern: &str) -> WhereTerm<'static> {
let expr: &'static Expr = Box::leak(Box::new(Expr::Like {
expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
pattern: Box::new(Expr::Literal(
Literal::String(pattern.to_owned()),
Span::ZERO,
)),
escape: None,
op: LikeOp::Like,
not: false,
span: Span::ZERO,
}));
classify_where_term(expr)
}
fn like_term_with_escape(col: &str, pattern: &str, escape: &str) -> WhereTerm<'static> {
let expr: &'static Expr = Box::leak(Box::new(Expr::Like {
expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
pattern: Box::new(Expr::Literal(
Literal::String(pattern.to_owned()),
Span::ZERO,
)),
escape: Some(Box::new(Expr::Literal(
Literal::String(escape.to_owned()),
Span::ZERO,
))),
op: LikeOp::Like,
not: false,
span: Span::ZERO,
}));
classify_where_term(expr)
}
fn glob_term(col: &str, pattern: &str) -> WhereTerm<'static> {
let expr: &'static Expr = Box::leak(Box::new(Expr::Like {
expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
pattern: Box::new(Expr::Literal(
Literal::String(pattern.to_owned()),
Span::ZERO,
)),
escape: None,
op: LikeOp::Glob,
not: false,
span: Span::ZERO,
}));
classify_where_term(expr)
}
fn or_eq_term(col: &str, values: &[i64]) -> WhereTerm<'static> {
assert!(
values.len() >= 2,
"or_eq_term requires at least two disjunct values"
);
let mut disjuncts = values
.iter()
.map(|value| Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(*value), Span::ZERO)),
span: Span::ZERO,
})
.collect::<Vec<_>>();
let mut combined = disjuncts.pop().expect("values is non-empty");
while let Some(left_disjunct) = disjuncts.pop() {
combined = Expr::BinaryOp {
left: Box::new(left_disjunct),
op: AstBinaryOp::Or,
right: Box::new(combined),
span: Span::ZERO,
};
}
let expr: &'static Expr = Box::leak(Box::new(combined));
classify_where_term(expr)
}
fn join_term(t1: &str, c1: &str, t2: &str, c2: &str) -> WhereTerm<'static> {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::qualified(t1, c1), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::qualified(t2, c2), Span::ZERO)),
span: Span::ZERO,
}));
classify_where_term(expr)
}
#[test]
fn test_cost_full_table_scan() {
assert!(
(estimate_cost(&AccessPathKind::FullTableScan, 100, 0) - 100.0).abs() < f64::EPSILON
);
assert!((estimate_cost(&AccessPathKind::FullTableScan, 1, 0) - 1.0).abs() < f64::EPSILON);
assert!(
(estimate_cost(&AccessPathKind::FullTableScan, 10000, 0) - 10000.0).abs()
< f64::EPSILON
);
}
#[test]
fn test_cost_rowid_lookup() {
let cost = estimate_cost(&AccessPathKind::RowidLookup, 1024, 0);
assert!((cost - 10.0).abs() < f64::EPSILON); }
#[test]
fn test_cost_index_scan_equality() {
let cost = estimate_cost(&AccessPathKind::IndexScanEquality, 200, 50);
let expected = 50_f64.log2() + 200_f64.log2();
assert!((cost - expected).abs() < 1e-10);
}
#[test]
fn test_cost_index_scan_range() {
let sel = 0.1;
let cost = estimate_cost(
&AccessPathKind::IndexScanRange { selectivity: sel },
200,
50,
);
let expected = 50_f64.log2() + sel * 50.0 + sel * 200.0;
assert!((cost - expected).abs() < 1e-10);
}
#[test]
fn test_cost_covering_index_scan() {
let sel = 0.1;
let cost = estimate_cost(
&AccessPathKind::CoveringIndexScan { selectivity: sel },
200,
50,
);
let expected = 50_f64.log2() + sel * 50.0;
assert!((cost - expected).abs() < 1e-10);
}
#[test]
fn test_cost_ranks_covering_index_below_non_covering_range_scan() {
let sel = 0.1;
let range = estimate_cost(
&AccessPathKind::IndexScanRange { selectivity: sel },
200,
50,
);
let covering = estimate_cost(
&AccessPathKind::CoveringIndexScan { selectivity: sel },
200,
50,
);
assert!(
covering < range,
"covering index must rank below a range scan: {covering} vs {range}"
);
assert!(
((range - covering) - sel * 200.0).abs() < 1e-9,
"covering/range gap should equal sel*table_pages (= {}), got {}",
sel * 200.0,
range - covering
);
let range_r = estimate_cost_ext(
&AccessPathKind::IndexScanRange { selectivity: sel },
200,
50,
1_000,
);
let covering_r = estimate_cost_ext(
&AccessPathKind::CoveringIndexScan { selectivity: sel },
200,
50,
1_000,
);
assert!(
covering_r < range_r,
"covering must stay cheaper once rows are counted: {covering_r} vs {range_r}"
);
assert!(
(range_r - covering_r) > (range - covering),
"per-row terms must widen the covering advantage"
);
}
#[test]
fn access_path_metric_label_maps_every_kind() {
assert_eq!(
access_path_metric_label(&AccessPathKind::FullTableScan),
"full_table_scan"
);
assert_eq!(
access_path_metric_label(&AccessPathKind::IndexScanRange { selectivity: 0.1 }),
"index_scan_range"
);
assert_eq!(
access_path_metric_label(&AccessPathKind::IndexScanEquality),
"index_scan_equality"
);
assert_eq!(
access_path_metric_label(&AccessPathKind::CoveringIndexScan { selectivity: 0.1 }),
"covering_index_scan"
);
assert_eq!(
access_path_metric_label(&AccessPathKind::RowidLookup),
"rowid_lookup"
);
}
#[test]
fn test_snapshot_index_selection_totals_has_five_access_path_labels() {
let snap = snapshot_index_selection_totals();
for label in [
"covering_index_scan",
"full_table_scan",
"index_scan_equality",
"index_scan_range",
"rowid_lookup",
] {
assert!(snap.contains_key(label), "missing label: {label}");
}
assert_eq!(snap.len(), 5, "no extra labels");
}
#[test]
fn test_estimate_cost_ext_exact_page_costs_at_zero_rows() {
let approx = |a: f64, b: f64| (a - b).abs() < 1e-9;
let (ip, tp) = (16u64, 64u64);
assert!(approx(
estimate_cost_ext(&AccessPathKind::FullTableScan, tp, ip, 0),
64.0
));
assert!(approx(
estimate_cost_ext(&AccessPathKind::RowidLookup, tp, ip, 0),
6.0
));
assert!(approx(
estimate_cost_ext(&AccessPathKind::IndexScanEquality, tp, ip, 0),
10.0
));
let range = estimate_cost_ext(
&AccessPathKind::IndexScanRange { selectivity: 0.5 },
tp,
ip,
0,
);
assert!(approx(range, 44.0), "range page cost, got {range}");
let covering = estimate_cost_ext(
&AccessPathKind::CoveringIndexScan { selectivity: 0.5 },
tp,
ip,
0,
);
assert!(approx(covering, 12.0), "covering page cost, got {covering}");
assert!(approx(range - covering, 0.5 * 64.0));
}
#[test]
fn test_expression_is_equi_column_predicate() {
let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
let bin = |l: Box<Expr>, op: AstBinaryOp, r: Box<Expr>| Expr::BinaryOp {
left: l,
op,
right: r,
span: Span::ZERO,
};
assert!(expression_is_equi_column_predicate(&bin(
col("a"),
AstBinaryOp::Eq,
col("b")
)));
assert!(!expression_is_equi_column_predicate(&bin(
col("a"),
AstBinaryOp::Eq,
lit(5)
)));
assert!(!expression_is_equi_column_predicate(&bin(
lit(5),
AstBinaryOp::Eq,
col("b")
)));
assert!(!expression_is_equi_column_predicate(&bin(
lit(5),
AstBinaryOp::Eq,
lit(6)
)));
assert!(!expression_is_equi_column_predicate(&bin(
col("a"),
AstBinaryOp::Lt,
col("b")
)));
assert!(!expression_is_equi_column_predicate(&Expr::Literal(
Literal::Integer(1),
Span::ZERO
)));
}
#[test]
fn test_collect_join_predicates() {
let mut set: HashSet<String> = HashSet::new();
set.insert("a".to_owned());
set.insert("b".to_owned());
let terms = [join_term("a", "x", "b", "y")];
let (equi, theta) = collect_join_predicates(&terms, &set);
assert_eq!(equi.len(), 1);
assert!(theta.is_empty());
let mut just_a: HashSet<String> = HashSet::new();
just_a.insert("a".to_owned());
let (equi, theta) = collect_join_predicates(&terms, &just_a);
assert!(equi.is_empty());
assert!(theta.is_empty());
let (equi, theta) = collect_join_predicates(&[], &set);
assert!(equi.is_empty() && theta.is_empty());
}
#[test]
fn test_has_join_predicate_detects_equi_join_either_orientation() {
let terms = [join_term("a", "x", "b", "y")];
assert!(has_join_predicate("a", "b", &terms));
assert!(
has_join_predicate("b", "a", &terms),
"either argument order"
);
assert!(has_join_predicate("A", "B", &terms), "case-insensitive");
assert!(!has_join_predicate("a", "c", &terms), "no predicate to c");
assert!(!has_join_predicate("c", "d", &terms));
assert!(
!has_join_predicate("a", "b", &[]),
"no terms -> no predicate"
);
}
#[test]
fn test_order_indices_to_names() {
let tables = vec![
table_stats("a", 1, 1),
table_stats("b", 1, 1),
table_stats("c", 1, 1),
];
assert!(order_indices_to_names(&[], &tables).is_empty());
assert_eq!(
order_indices_to_names(&[0, 1, 2], &tables),
vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]
);
assert_eq!(
order_indices_to_names(&[2, 0, 1], &tables),
vec!["c".to_owned(), "a".to_owned(), "b".to_owned()]
);
assert_eq!(order_indices_to_names(&[1], &tables), vec!["b".to_owned()]);
}
#[test]
fn test_plan_cache_key_with_feature_flags() {
let sql = "SELECT * FROM t";
assert_eq!(
plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default()),
plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default())
);
let kd = plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default());
let kl = plan_cache_key_with_feature_flags(
sql,
1,
PlannerFeatureFlags {
leapfrog_join: true,
..PlannerFeatureFlags::default()
},
);
let kp = plan_cache_key_with_feature_flags(
sql,
1,
PlannerFeatureFlags {
dpccp_join: true,
..PlannerFeatureFlags::default()
},
);
let kb = plan_cache_key_with_feature_flags(
sql,
1,
PlannerFeatureFlags {
leapfrog_join: true,
dpccp_join: true,
},
);
let set: std::collections::HashSet<u64> = [kd, kl, kp, kb].into_iter().collect();
assert_eq!(
set.len(),
4,
"all four feature-flag combinations must produce distinct keys"
);
assert_ne!(
plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default()),
plan_cache_key_with_feature_flags(sql, 2, PlannerFeatureFlags::default())
);
assert_ne!(
plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default()),
plan_cache_key_with_feature_flags("SELECT 1", 1, PlannerFeatureFlags::default())
);
}
#[test]
fn test_prepare_plan_cache_lookup_evicts_stale_hot_entry() {
let mut p = QueryPlanner::new();
p.prepare_plan_cache_lookup(42);
assert!(p.hot_plan_cache_key.is_none());
assert!(p.hot_plan_cache_plan.is_none());
p.hot_plan_cache_key = Some(42);
p.hot_plan_cache_needs_lru_touch = true;
p.prepare_plan_cache_lookup(42);
assert_eq!(p.hot_plan_cache_key, Some(42));
assert!(p.hot_plan_cache_needs_lru_touch);
p.hot_plan_cache_key = Some(42);
p.hot_plan_cache_needs_lru_touch = true;
p.prepare_plan_cache_lookup(99);
assert!(p.hot_plan_cache_key.is_none());
assert!(p.hot_plan_cache_plan.is_none());
assert!(!p.hot_plan_cache_needs_lru_touch);
}
#[test]
fn test_flush_hot_plan_cache_lru_touch_clears_flag() {
let mut p = QueryPlanner::new();
assert!(!p.hot_plan_cache_needs_lru_touch);
p.flush_hot_plan_cache_lru_touch();
assert!(!p.hot_plan_cache_needs_lru_touch);
p.hot_plan_cache_needs_lru_touch = true;
p.flush_hot_plan_cache_lru_touch();
assert!(!p.hot_plan_cache_needs_lru_touch);
p.hot_plan_cache_key = Some(42);
p.hot_plan_cache_needs_lru_touch = true;
p.flush_hot_plan_cache_lru_touch();
assert!(!p.hot_plan_cache_needs_lru_touch);
assert!(p.is_plan_cache_empty());
}
#[test]
fn test_lookup_hot_plan_cache_and_clear() {
let mut p = QueryPlanner::new();
assert!(p.lookup_hot_plan_cache(42).is_none());
assert!(!p.hot_plan_cache_needs_lru_touch);
p.hot_plan_cache_key = Some(42);
assert!(p.lookup_hot_plan_cache(42).is_none()); assert!(p.hot_plan_cache_needs_lru_touch);
assert!(p.lookup_hot_plan_cache(99).is_none());
assert!(p.hot_plan_cache_needs_lru_touch);
p.clear_hot_plan_cache();
assert!(p.hot_plan_cache_key.is_none());
assert!(p.hot_plan_cache_plan.is_none());
assert!(!p.hot_plan_cache_needs_lru_touch);
}
#[test]
fn test_invalidate_plan_cache_if_schema_cookie_changed_tracks_cookie() {
let mut p = QueryPlanner::new();
assert_eq!(p.cached_schema_cookie, None);
assert!(p.is_plan_cache_empty());
p.invalidate_plan_cache_if_schema_cookie_changed(5);
assert_eq!(p.cached_schema_cookie, Some(5));
assert!(p.is_plan_cache_empty());
p.invalidate_plan_cache_if_schema_cookie_changed(5);
assert_eq!(p.cached_schema_cookie, Some(5));
p.invalidate_plan_cache_if_schema_cookie_changed(7);
assert_eq!(p.cached_schema_cookie, Some(7));
assert!(p.is_plan_cache_empty());
}
#[test]
fn test_is_plan_cache_empty_and_clear_on_fresh_planner() {
let p = QueryPlanner::new();
assert!(p.is_plan_cache_empty());
let p2 = QueryPlanner::with_plan_cache_capacity(8);
assert!(p2.is_plan_cache_empty());
let p3 = QueryPlanner::with_plan_cache_capacity(0);
assert!(p3.is_plan_cache_empty());
let mut p4 = QueryPlanner::new();
p4.clear_plan_cache();
assert!(p4.is_plan_cache_empty());
}
#[test]
fn test_normalize_plan_cache_capacity_floors_at_one() {
assert_eq!(normalize_plan_cache_capacity(0).get(), 1);
assert_eq!(normalize_plan_cache_capacity(1).get(), 1);
assert_eq!(normalize_plan_cache_capacity(10).get(), 10);
}
#[test]
fn test_ordered_subset_preserves_join_order() {
let order: Vec<String> = ["c", "a", "b", "d"]
.iter()
.map(|s| (*s).to_owned())
.collect();
let sel: HashSet<String> = ["a", "d"].iter().map(|s| (*s).to_owned()).collect();
assert_eq!(
ordered_subset(&order, &sel),
vec!["a".to_owned(), "d".to_owned()] );
let all: HashSet<String> = ["a", "b", "c", "d"]
.iter()
.map(|s| (*s).to_owned())
.collect();
assert_eq!(ordered_subset(&order, &all), order);
assert!(ordered_subset(&order, &HashSet::new()).is_empty());
let extra: HashSet<String> = ["a", "x"].iter().map(|s| (*s).to_owned()).collect();
assert_eq!(ordered_subset(&order, &extra), vec!["a".to_owned()]);
}
#[test]
fn test_cross_join_allowed_indices_via_tables() {
let tables = vec![
table_stats("a", 1, 1),
table_stats("b", 1, 1),
table_stats("c", 1, 1),
];
let pairs = vec![("A".to_owned(), "B".to_owned())];
assert!(!cross_join_allowed_indices(&[], "B", &tables, &pairs));
assert!(cross_join_allowed_indices(&[0], "B", &tables, &pairs));
assert!(cross_join_allowed_indices(&[], "A", &tables, &pairs));
assert!(cross_join_allowed_indices(&[], "C", &tables, &pairs));
assert!(cross_join_allowed_indices(&[0], "b", &tables, &pairs));
}
#[test]
fn test_cross_join_allowed_enforces_right_after_left_ordering() {
let pairs = vec![("A".to_owned(), "B".to_owned())];
assert!(!cross_join_allowed(&[], "B", &pairs));
assert!(cross_join_allowed(&["A".to_owned()], "B", &pairs));
assert!(cross_join_allowed(&[], "A", &pairs));
assert!(cross_join_allowed(&[], "C", &pairs));
assert!(!cross_join_allowed(&[], "b", &pairs));
assert!(cross_join_allowed(&["a".to_owned()], "b", &pairs));
}
#[test]
fn test_collect_disjuncts_flattens_or_tree_regardless_of_nesting() {
let leaf = |n: i64| Expr::Literal(Literal::Integer(n), Span::ZERO);
let or = |l: Expr, r: Expr| Expr::BinaryOp {
left: Box::new(l),
op: AstBinaryOp::Or,
right: Box::new(r),
span: Span::ZERO,
};
let count = |e: &Expr| {
let mut v = Vec::new();
collect_disjuncts(e, &mut v);
v.len()
};
assert_eq!(count(&leaf(1)), 1);
assert_eq!(count(&or(leaf(1), leaf(2))), 2);
assert_eq!(count(&or(leaf(1), or(leaf(2), leaf(3)))), 3);
assert_eq!(count(&or(or(leaf(1), leaf(2)), leaf(3))), 3);
assert_eq!(count(&or(or(leaf(1), leaf(2)), or(leaf(3), leaf(4)))), 4);
}
#[test]
fn test_collect_conjuncts_flattens_and_tree_regardless_of_nesting() {
let leaf = |n: i64| Expr::Literal(Literal::Integer(n), Span::ZERO);
let and = |l: Expr, r: Expr| Expr::BinaryOp {
left: Box::new(l),
op: AstBinaryOp::And,
right: Box::new(r),
span: Span::ZERO,
};
let count = |e: &Expr| {
let mut v = Vec::new();
collect_conjuncts(e, &mut v);
v.len()
};
assert_eq!(count(&leaf(1)), 1);
assert_eq!(count(&and(leaf(1), leaf(2))), 2);
assert_eq!(count(&and(leaf(1), and(leaf(2), leaf(3)))), 3);
assert_eq!(count(&and(and(leaf(1), leaf(2)), leaf(3))), 3);
assert_eq!(count(&and(and(leaf(1), leaf(2)), and(leaf(3), leaf(4)))), 4);
}
#[test]
fn test_classify_or_disjunction_as_in_list() {
let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
let eqc = |c: &str, n: i64| Expr::BinaryOp {
left: col(c),
op: AstBinaryOp::Eq,
right: lit(n),
span: Span::ZERO,
};
let or = |l: Expr, r: Expr| Expr::BinaryOp {
left: Box::new(l),
op: AstBinaryOp::Or,
right: Box::new(r),
span: Span::ZERO,
};
let three = or(eqc("a", 1), or(eqc("a", 2), eqc("a", 3)));
assert_eq!(
classify_or_disjunction_as_in_list(&three),
Some((
WhereColumn {
table: None,
column: "a".to_owned()
},
3
))
);
assert!(classify_or_disjunction_as_in_list(&or(eqc("a", 1), eqc("b", 2))).is_none());
assert!(classify_or_disjunction_as_in_list(&eqc("a", 1)).is_none());
let gt = Expr::BinaryOp {
left: col("a"),
op: AstBinaryOp::Gt,
right: lit(2),
span: Span::ZERO,
};
assert!(classify_or_disjunction_as_in_list(&or(eqc("a", 1), gt)).is_none());
}
#[test]
fn test_extract_comparison_operand_returns_other_side_of_column_comparison() {
let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
let binop = |l: Box<Expr>, r: Box<Expr>| Expr::BinaryOp {
left: l,
op: AstBinaryOp::Eq,
right: r,
span: Span::ZERO,
};
assert!(matches!(
extract_comparison_operand(&binop(col("x"), lit(5))),
Some(Expr::Literal(Literal::Integer(5), _))
));
assert!(matches!(
extract_comparison_operand(&binop(lit(5), col("x"))),
Some(Expr::Literal(Literal::Integer(5), _))
));
assert!(extract_comparison_operand(&binop(lit(5), lit(6))).is_none());
assert!(
extract_comparison_operand(&Expr::Literal(Literal::Integer(1), Span::ZERO)).is_none()
);
}
#[test]
fn test_like_prefix_upper_bound() {
assert_eq!(like_prefix_upper_bound("abc").as_deref(), Some("abd"));
assert_eq!(like_prefix_upper_bound("a").as_deref(), Some("b"));
assert_eq!(like_prefix_upper_bound(""), None);
let with_max = format!("a{}", char::MAX);
assert_eq!(like_prefix_upper_bound(&with_max).as_deref(), Some("b"));
assert_eq!(like_prefix_upper_bound(&char::MAX.to_string()), None);
}
#[test]
fn test_is_like_prefix_safe_for_column_rejects_ascii_alphabetic_prefixes() {
assert!(is_like_prefix_safe_for_column(None, "123"));
assert!(is_like_prefix_safe_for_column(None, ""));
assert!(is_like_prefix_safe_for_column(None, "_5%"));
assert!(!is_like_prefix_safe_for_column(None, "abc"));
assert!(!is_like_prefix_safe_for_column(None, "1a"));
assert!(!is_like_prefix_safe_for_column(None, "Z"));
assert!(is_like_prefix_safe_for_column(None, "é"));
}
#[test]
fn test_union_find() {
let mut uf = UnionFind::new(5);
for i in 0..5 {
assert_eq!(uf.find(i), i);
}
uf.union(0, 1);
let r0 = uf.find(0);
assert_eq!(uf.find(1), r0);
uf.union(2, 3);
let r2 = uf.find(2);
assert_eq!(uf.find(3), r2);
assert_ne!(r0, r2);
assert_eq!(uf.find(4), 4);
uf.union(0, 2);
let r = uf.find(0);
for i in [1, 2, 3] {
assert_eq!(uf.find(i), r);
}
assert_eq!(uf.find(4), 4);
uf.union(0, 0);
uf.union(0, 2);
assert_eq!(uf.find(2), r);
}
#[test]
fn test_connected_components_groups_join_connected_tables() {
let pred = |lt: &str, rt: &str| EquiJoinPredicate {
left: ColumnKey {
table: lt.to_owned(),
column: "x".to_owned(),
},
right: ColumnKey {
table: rt.to_owned(),
column: "y".to_owned(),
},
};
let tables = vec!["a".to_owned(), "b".to_owned(), "c".to_owned()];
let comps = connected_components(&tables, &[pred("a", "b")]);
let mut sizes: Vec<usize> = comps.iter().map(Vec::len).collect();
sizes.sort_unstable();
assert_eq!(sizes, vec![1, 2]);
let comps = connected_components(&tables, &[pred("a", "b"), pred("b", "c")]);
assert_eq!(comps.len(), 1);
assert_eq!(comps[0].len(), 3);
let comps = connected_components(&tables, &[]);
assert_eq!(comps.len(), 3);
assert!(comps.iter().all(|c| c.len() == 1));
assert!(connected_components(&[], &[pred("a", "b")]).is_empty());
}
#[test]
fn test_column_exists_ignore_case() {
let cols = vec!["Name".to_owned(), "Age".to_owned()];
assert!(column_exists_ignore_case(&cols, "Name")); assert!(column_exists_ignore_case(&cols, "name")); assert!(column_exists_ignore_case(&cols, "AGE"));
assert!(!column_exists_ignore_case(&cols, "id")); assert!(!column_exists_ignore_case(&[], "name")); }
#[test]
fn test_extract_range_probe_for_column() {
match extract_range_probe_for_column(&[eq_term_value("x", 5)], "x") {
Some(AccessPathProbe::Equality { column, .. }) => assert_eq!(column, "x"),
_ => panic!("expected an Equality probe"),
}
assert!(matches!(
extract_range_probe_for_column(&[range_term("x")], "x"),
Some(AccessPathProbe::Range { .. })
));
assert!(extract_range_probe_for_column(&[eq_term_value("y", 5)], "x").is_none());
assert!(extract_range_probe_for_column(&[], "x").is_none());
}
#[test]
fn test_extract_in_list_probe() {
let col = || Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO));
let lit = |n: i64| Expr::Literal(Literal::Integer(n), Span::ZERO);
let in_expr = |items: Vec<Expr>, not: bool| Expr::In {
expr: col(),
set: InSet::List(items),
not,
span: Span::ZERO,
};
match extract_in_list_probe(&in_expr(vec![lit(1), lit(2), lit(3)], false), "x") {
Some(AccessPathProbe::InList { column, values }) => {
assert_eq!(column, "x");
assert_eq!(values.len(), 3);
}
_ => panic!("expected an InList probe"),
}
assert!(extract_in_list_probe(&in_expr(vec![], false), "x").is_none());
assert!(extract_in_list_probe(&in_expr(vec![lit(1), lit(2)], true), "x").is_none());
assert!(
extract_in_list_probe(&Expr::Literal(Literal::Integer(1), Span::ZERO), "x").is_none()
);
}
#[test]
fn test_reverse_comparison_op() {
use AstBinaryOp::{Add, Eq, Ge, Gt, Le, Lt, Ne};
assert!(matches!(reverse_comparison_op(Eq), Some(Eq)));
assert!(matches!(reverse_comparison_op(Lt), Some(Gt)));
assert!(matches!(reverse_comparison_op(Gt), Some(Lt)));
assert!(matches!(reverse_comparison_op(Le), Some(Ge)));
assert!(matches!(reverse_comparison_op(Ge), Some(Le)));
assert!(reverse_comparison_op(Ne).is_none());
assert!(reverse_comparison_op(Add).is_none());
}
#[test]
fn test_normalize_column_literal_comparison_orients_column_left() {
let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
let bin = |l: Box<Expr>, op: AstBinaryOp, r: Box<Expr>| Expr::BinaryOp {
left: l,
op,
right: r,
span: Span::ZERO,
};
let n =
normalize_column_literal_comparison(&bin(col("x"), AstBinaryOp::Gt, lit(5))).unwrap();
assert_eq!(n.column.column, "x");
assert!(matches!(n.op, AstBinaryOp::Gt));
assert!(matches!(n.literal, Literal::Integer(5)));
let n =
normalize_column_literal_comparison(&bin(lit(5), AstBinaryOp::Lt, col("x"))).unwrap();
assert_eq!(n.column.column, "x");
assert!(matches!(n.op, AstBinaryOp::Gt));
assert!(matches!(n.literal, Literal::Integer(5)));
assert!(
normalize_column_literal_comparison(&bin(col("x"), AstBinaryOp::Add, lit(5))).is_none()
);
assert!(
normalize_column_literal_comparison(&bin(col("x"), AstBinaryOp::Eq, col("y")))
.is_none()
);
}
#[test]
fn test_where_terms_imply_predicate() {
let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
let is_not_null = |n: &str| Expr::IsNull {
expr: col(n),
not: true,
span: Span::ZERO,
};
let and = |l: Expr, r: Expr| Expr::BinaryOp {
left: Box::new(l),
op: AstBinaryOp::And,
right: Box::new(r),
span: Span::ZERO,
};
let terms = [eq_term_value("x", 5)];
assert!(where_terms_imply_predicate(&terms, &is_not_null("x")));
assert!(!where_terms_imply_predicate(&terms, &is_not_null("y")));
let both = [eq_term_value("x", 5), eq_term_value("y", 7)];
assert!(where_terms_imply_predicate(
&both,
&and(is_not_null("x"), is_not_null("y"))
));
assert!(!where_terms_imply_predicate(
&terms,
&and(is_not_null("x"), is_not_null("y"))
));
assert!(!where_terms_imply_predicate(&[], &is_not_null("x")));
}
#[test]
fn test_expr_implies_partial_predicate() {
let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
let cmp = |c: &str, op: AstBinaryOp, n: i64| Expr::BinaryOp {
left: col(c),
op,
right: lit(n),
span: Span::ZERO,
};
assert!(expr_implies_partial_predicate(
&cmp("x", AstBinaryOp::Eq, 5),
&cmp("x", AstBinaryOp::Eq, 5)
));
assert!(expr_implies_partial_predicate(
&cmp("x", AstBinaryOp::Gt, 10),
&cmp("x", AstBinaryOp::Gt, 5)
));
assert!(!expr_implies_partial_predicate(
&cmp("x", AstBinaryOp::Gt, 5),
&cmp("x", AstBinaryOp::Gt, 10)
));
let is_not_null = Expr::IsNull {
expr: col("x"),
not: true,
span: Span::ZERO,
};
assert!(expr_implies_partial_predicate(
&cmp("x", AstBinaryOp::Eq, 5),
&is_not_null
));
assert!(!expr_implies_partial_predicate(
&cmp("x", AstBinaryOp::Eq, 5),
&cmp("y", AstBinaryOp::Eq, 3)
));
}
#[test]
fn test_literal_satisfies_predicate_literal() {
use AstBinaryOp::{Eq, Ge, Gt, Le, Lt, Ne};
use std::cmp::Ordering::{Equal, Greater, Less};
assert!(literal_satisfies_predicate_literal(Equal, Eq));
assert!(!literal_satisfies_predicate_literal(Less, Eq));
assert!(!literal_satisfies_predicate_literal(Greater, Eq));
assert!(literal_satisfies_predicate_literal(Greater, Gt));
assert!(!literal_satisfies_predicate_literal(Equal, Gt));
assert!(literal_satisfies_predicate_literal(Greater, Ge));
assert!(literal_satisfies_predicate_literal(Equal, Ge));
assert!(!literal_satisfies_predicate_literal(Less, Ge));
assert!(literal_satisfies_predicate_literal(Less, Lt));
assert!(!literal_satisfies_predicate_literal(Equal, Lt));
assert!(literal_satisfies_predicate_literal(Less, Le));
assert!(literal_satisfies_predicate_literal(Equal, Le));
assert!(!literal_satisfies_predicate_literal(Greater, Le));
assert!(!literal_satisfies_predicate_literal(Equal, Ne));
}
#[test]
fn test_compare_partial_index_literals_handles_cross_type_numerics() {
use std::cmp::Ordering;
let int = Literal::Integer;
let flt = Literal::Float;
assert_eq!(
compare_partial_index_literals(&int(3), &int(5)),
Some(Ordering::Less)
);
assert_eq!(
compare_partial_index_literals(&flt(2.0), &flt(2.0)),
Some(Ordering::Equal)
);
assert_eq!(
compare_partial_index_literals(
&Literal::String("a".to_owned()),
&Literal::String("b".to_owned())
),
Some(Ordering::Less)
);
assert_eq!(
compare_partial_index_literals(&int(5), &flt(5.0)),
Some(Ordering::Equal)
);
assert_eq!(
compare_partial_index_literals(&int(3), &flt(5.0)),
Some(Ordering::Less)
);
assert_eq!(
compare_partial_index_literals(&flt(7.0), &int(2)),
Some(Ordering::Greater)
);
assert_eq!(
compare_partial_index_literals(&int(1), &Literal::String("x".to_owned())),
None
);
assert_eq!(
compare_partial_index_literals(&Literal::Null, &int(1)),
None
);
assert_eq!(
compare_partial_index_literals(&flt(f64::NAN), &flt(1.0)),
None
);
}
#[test]
fn test_lookup_table_index_hint() {
let mut hints: std::collections::BTreeMap<String, IndexHint> =
std::collections::BTreeMap::new();
hints.insert("users".to_owned(), IndexHint::NotIndexed);
assert!(matches!(
lookup_table_index_hint("users", Some(&hints)),
Some(IndexHint::NotIndexed)
));
assert!(matches!(
lookup_table_index_hint("USERS", Some(&hints)),
Some(IndexHint::NotIndexed)
));
assert!(lookup_table_index_hint("other", Some(&hints)).is_none());
assert!(lookup_table_index_hint("users", None).is_none());
}
#[test]
fn test_is_rowid_column_ignores_table_qualifier() {
let wc = |table: Option<&str>, column: &str| WhereColumn {
table: table.map(str::to_owned),
column: column.to_owned(),
};
assert!(is_rowid_column(&wc(None, "rowid")));
assert!(is_rowid_column(&wc(None, "ROWID")));
assert!(is_rowid_column(&wc(None, "_rowid_")));
assert!(is_rowid_column(&wc(None, "oid")));
assert!(!is_rowid_column(&wc(None, "id")));
assert!(!is_rowid_column(&wc(None, "row_id")));
assert!(is_rowid_column(&wc(Some("t"), "rowid")));
assert!(!is_rowid_column(&wc(Some("t"), "id")));
}
#[test]
fn test_where_columns_compatible_vs_equivalent() {
let bare = |c: &str| WhereColumn {
table: None,
column: c.to_owned(),
};
let qual = |t: &str, c: &str| WhereColumn {
table: Some(t.to_owned()),
column: c.to_owned(),
};
assert!(where_columns_compatible(&bare("x"), &bare("X")));
assert!(where_columns_equivalent(&bare("x"), &bare("X")));
assert!(where_columns_compatible(&qual("t", "x"), &qual("T", "X")));
assert!(where_columns_equivalent(&qual("t", "x"), &qual("T", "X")));
assert!(!where_columns_compatible(&qual("t", "x"), &qual("u", "x")));
assert!(!where_columns_equivalent(&qual("t", "x"), &qual("u", "x")));
assert!(where_columns_compatible(&qual("t", "x"), &bare("x")));
assert!(!where_columns_equivalent(&qual("t", "x"), &bare("x")));
assert!(!where_columns_compatible(&bare("x"), &bare("y")));
assert!(!where_columns_equivalent(&bare("x"), &bare("y")));
}
#[test]
fn test_qualifier_matches_table() {
assert!(qualifier_matches_table("t", "t", None));
assert!(qualifier_matches_table("T", "t", None)); assert!(!qualifier_matches_table("u", "t", None)); assert!(qualifier_matches_table("users", "users", Some("u")));
assert!(qualifier_matches_table("U", "users", Some("u"))); assert!(!qualifier_matches_table("x", "users", Some("u"))); }
#[test]
fn test_extract_qualified_column_requires_qualifier_and_canonicalizes() {
let qualified = Expr::Column(ColumnRef::qualified("T", "Col"), Span::ZERO);
assert_eq!(
extract_qualified_column(&qualified),
Some(ColumnKey {
table: "t".to_owned(),
column: "col".to_owned()
})
);
assert_eq!(
extract_qualified_column(&Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
None
);
assert_eq!(
extract_qualified_column(&Expr::Literal(Literal::Integer(1), Span::ZERO)),
None
);
}
#[test]
fn test_extract_where_column_preserves_qualifier_and_rejects_non_columns() {
let bare = Expr::Column(ColumnRef::bare("x"), Span::ZERO);
assert_eq!(
extract_where_column(&bare),
Some(WhereColumn {
table: None,
column: "x".to_owned()
})
);
let qualified = Expr::Column(ColumnRef::qualified("t", "x"), Span::ZERO);
assert_eq!(
extract_where_column(&qualified),
Some(WhereColumn {
table: Some("t".to_owned()),
column: "x".to_owned()
})
);
assert_eq!(
extract_where_column(&Expr::Literal(Literal::Integer(1), Span::ZERO)),
None
);
let binop = Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
};
assert_eq!(extract_where_column(&binop), None);
}
#[test]
fn test_normalize_is_not_null_predicate() {
let isnull = |inner: Expr, not: bool| Expr::IsNull {
expr: Box::new(inner),
not,
span: Span::ZERO,
};
let col = |n: &str| Expr::Column(ColumnRef::bare(n), Span::ZERO);
assert_eq!(
normalize_is_not_null_predicate(&isnull(col("x"), true)),
Some(WhereColumn {
table: None,
column: "x".to_owned()
})
);
assert_eq!(
normalize_is_not_null_predicate(&isnull(col("x"), false)),
None
);
assert_eq!(
normalize_is_not_null_predicate(&isnull(
Expr::Literal(Literal::Integer(5), Span::ZERO),
true
)),
None
);
assert_eq!(
normalize_is_not_null_predicate(&Expr::Literal(Literal::Integer(1), Span::ZERO)),
None
);
}
#[test]
fn test_expr_guarantees_non_null_for_matching_column() {
let pcol = WhereColumn {
table: None,
column: "x".to_owned(),
};
let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
let is_not_null = Expr::IsNull {
expr: col("x"),
not: true,
span: Span::ZERO,
};
assert!(expr_guarantees_non_null(&is_not_null, &pcol));
let is_null = Expr::IsNull {
expr: col("x"),
not: false,
span: Span::ZERO,
};
assert!(!expr_guarantees_non_null(&is_null, &pcol));
let eq = |lit: Literal| Expr::BinaryOp {
left: col("x"),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(lit, Span::ZERO)),
span: Span::ZERO,
};
assert!(expr_guarantees_non_null(&eq(Literal::Integer(5)), &pcol));
assert!(!expr_guarantees_non_null(&eq(Literal::Null), &pcol));
let other = Expr::IsNull {
expr: col("y"),
not: true,
span: Span::ZERO,
};
assert!(!expr_guarantees_non_null(&other, &pcol));
}
#[test]
fn test_estimate_cost_ext_zero_rows_matches_legacy() {
let legacy = estimate_cost(&AccessPathKind::FullTableScan, 1000, 0);
let ext = estimate_cost_ext(&AccessPathKind::FullTableScan, 1000, 0, 0);
assert!((ext - legacy).abs() < f64::EPSILON);
let legacy = estimate_cost(&AccessPathKind::IndexScanEquality, 1000, 100);
let ext = estimate_cost_ext(&AccessPathKind::IndexScanEquality, 1000, 100, 0);
assert!((ext - legacy).abs() < f64::EPSILON);
}
#[test]
fn test_estimate_cost_ext_full_scan_monotonic_in_n_rows() {
let c_small = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 1_000);
let c_mid = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 100_000);
let c_big = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 10_000_000);
assert!(
c_small < c_mid && c_mid < c_big,
"expected monotonic growth with n_rows, got {c_small} < {c_mid} < {c_big}"
);
}
#[test]
fn test_estimate_cost_ext_range_scan_monotonic_in_n_rows() {
let kind = AccessPathKind::IndexScanRange { selectivity: 0.1 };
let c_small = estimate_cost_ext(&kind, 1000, 100, 1_000);
let c_big = estimate_cost_ext(&kind, 1000, 100, 1_000_000);
assert!(c_big > c_small);
}
#[test]
fn test_estimate_cost_ext_ranks_point_access_below_full_scan_for_large_tables() {
let (tp, ip, big) = (100u64, 50u64, 1_000_000u64);
let full = estimate_cost_ext(&AccessPathKind::FullTableScan, tp, ip, big);
let eq = estimate_cost_ext(&AccessPathKind::IndexScanEquality, tp, ip, big);
let rowid = estimate_cost_ext(&AccessPathKind::RowidLookup, tp, ip, big);
assert!(
rowid <= eq,
"rowid lookup should not cost more than index equality: {rowid} vs {eq}"
);
assert!(
eq < full,
"index equality must rank below a full scan on a large table: {eq} vs {full}"
);
let eq_zero = estimate_cost_ext(&AccessPathKind::IndexScanEquality, tp, ip, 0);
let rowid_zero = estimate_cost_ext(&AccessPathKind::RowidLookup, tp, ip, 0);
assert!(
eq - eq_zero < 1.0,
"equality cost must not scale with n_rows: delta {}",
eq - eq_zero
);
assert!(
rowid - rowid_zero < 1.0,
"rowid cost must not scale with n_rows: delta {}",
rowid - rowid_zero
);
let full_zero = estimate_cost_ext(&AccessPathKind::FullTableScan, tp, ip, 0);
assert!(
(full_zero - 100.0).abs() < f64::EPSILON,
"n_rows=0 full scan == table pages"
);
assert!(
full > full_zero * 10.0,
"full scan must grow strongly with n_rows: {full} vs {full_zero}"
);
}
#[test]
fn test_estimate_cost_ext_scales_full_vs_index_preference() {
let small_rows = 100_u64;
let big_rows = 10_000_000_u64;
let kind = AccessPathKind::IndexScanRange { selectivity: 0.01 };
let full_small = estimate_cost_ext(&AccessPathKind::FullTableScan, 10, 0, small_rows);
let idx_small = estimate_cost_ext(&kind, 10, 5, small_rows);
let full_big = estimate_cost_ext(&AccessPathKind::FullTableScan, 10, 0, big_rows);
let idx_big = estimate_cost_ext(&kind, 10, 5, big_rows);
let gap_small = full_small - idx_small;
let gap_big = full_big - idx_big;
assert!(
gap_big > gap_small,
"expected bigger index advantage at high n_rows: small_gap={gap_small}, big_gap={gap_big}"
);
}
fn stats_ref(name: &str, n_pages: u64, n_rows: u64, has_stats: bool) -> TableRefWithStats {
TableRefWithStats {
name: name.to_owned(),
n_pages,
n_rows,
has_stats,
}
}
#[test]
fn test_order_joins_puts_small_relation_first() {
let inputs = vec![
stats_ref("t_big", 200, 10_000, true),
stats_ref("t_small", 1, 10, true),
];
let perm = order_join_inputs_with_hints(&inputs);
assert_eq!(perm.len(), 2);
assert_eq!(
inputs[perm[0]].name, "t_small",
"small table should sort to build-side first, got perm={perm:?}",
);
assert_eq!(inputs[perm[1]].name, "t_big");
}
#[test]
fn test_order_joins_no_stats_preserves_source_order() {
let inputs = vec![
stats_ref("t_first", 200, 10_000, false),
stats_ref("t_second", 1, 10, false),
stats_ref("t_third", 5, 50, false),
];
let perm = order_join_inputs_with_hints(&inputs);
assert_eq!(
perm,
vec![0, 1, 2],
"source order must be preserved when no stats are available",
);
}
#[test]
fn test_order_joins_partial_stats_still_orders() {
let inputs = vec![
stats_ref("t_big_analyzed", 500, 100_000, true),
stats_ref("t_unknown", 0, 0, false),
];
let perm = order_join_inputs_with_hints(&inputs);
assert_eq!(inputs[perm[0]].name, "t_unknown");
assert_eq!(inputs[perm[1]].name, "t_big_analyzed");
}
#[test]
fn test_order_joins_trivial_sizes() {
assert_eq!(order_join_inputs_with_hints(&[]), Vec::<usize>::new());
let single = vec![stats_ref("only", 10, 100, true)];
assert_eq!(order_join_inputs_with_hints(&single), vec![0]);
}
#[test]
fn test_order_joins_greedy_above_limit() {
let reversed = vec![
stats_ref("a_5", 500, 50_000, true),
stats_ref("a_4", 400, 40_000, true),
stats_ref("a_3", 300, 30_000, true),
stats_ref("a_2", 200, 20_000, true),
stats_ref("a_1", 100, 10_000, true),
];
let perm = order_join_inputs_with_hints(&reversed);
let ordered_names: Vec<&str> = perm.iter().map(|&i| reversed[i].name.as_str()).collect();
assert_eq!(
ordered_names,
vec!["a_1", "a_2", "a_3", "a_4", "a_5"],
"greedy path should sort ascending by scan cost",
);
}
#[test]
fn test_order_joins_exhaustive_minimizes_weighted_cost() {
let inputs = vec![
stats_ref("t_a", 100, 5_000, true),
stats_ref("t_b", 50, 2_000, true),
stats_ref("t_tiny", 1, 10, true),
stats_ref("t_huge", 1_000, 1_000_000, true),
];
let perm = order_join_inputs_with_hints(&inputs);
assert_eq!(
inputs[perm[0]].name, "t_tiny",
"exhaustive search should pick the smallest relation first; perm={perm:?}",
);
assert_eq!(
inputs[perm[3]].name, "t_huge",
"largest relation should sink to the last probe slot; perm={perm:?}",
);
}
#[test]
fn test_order_joins_preserves_source_order_on_equal_cost_ties() {
let exhaustive = vec![
stats_ref("e0", 100, 5_000, true),
stats_ref("e1", 100, 5_000, true),
stats_ref("e2", 100, 5_000, true),
];
assert_eq!(
order_join_inputs_with_hints(&exhaustive),
vec![0, 1, 2],
"equal-cost tables keep source order (exhaustive branch)"
);
let greedy = vec![
stats_ref("g0", 100, 5_000, true),
stats_ref("g1", 100, 5_000, true),
stats_ref("g2", 100, 5_000, true),
stats_ref("g3", 100, 5_000, true),
stats_ref("g4", 100, 5_000, true),
];
assert_eq!(
order_join_inputs_with_hints(&greedy),
vec![0, 1, 2, 3, 4],
"equal-cost tables keep source order (greedy branch)"
);
assert_eq!(
order_join_inputs_with_hints(&exhaustive),
order_join_inputs_with_hints(&exhaustive)
);
assert_eq!(
order_join_inputs_with_hints(&greedy),
order_join_inputs_with_hints(&greedy)
);
}
#[test]
fn test_order_joins_from_table_stats_derives_has_stats() {
let analyzed = TableStats {
name: "t_analyzed".to_owned(),
n_pages: 10,
n_rows: 1000,
source: StatsSource::Analyze,
};
let heur = TableStats {
name: "t_heur".to_owned(),
n_pages: 10,
n_rows: 1000,
source: StatsSource::Heuristic,
};
let a = TableRefWithStats::from_table_stats(&analyzed);
let h = TableRefWithStats::from_table_stats(&heur);
assert!(a.has_stats);
assert!(!h.has_stats);
assert_eq!(a.n_rows, 1000);
assert_eq!(h.n_pages, 10);
}
#[test]
fn test_cost_comparison_table_scan_vs_index() {
let full = estimate_cost(&AccessPathKind::FullTableScan, 1000, 0);
let idx = estimate_cost(
&AccessPathKind::IndexScanRange { selectivity: 0.01 },
1000,
100,
);
assert!(
idx < full,
"index scan ({idx:.1}) should be cheaper than full scan ({full:.1}) at 1% selectivity"
);
let idx_high = estimate_cost(
&AccessPathKind::IndexScanRange { selectivity: 0.95 },
1000,
100,
);
assert!(
idx_high > full,
"index scan ({idx_high:.1}) should be pricier than full scan ({full:.1}) at 95% selectivity"
);
}
#[test]
fn test_index_usability_equality_leftmost() {
let idx = index_info("idx_abc", "t1", &["a", "b", "c"], false, 50);
let terms = [eq_term("a")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::Equality
));
let terms = [eq_term("b")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
));
}
#[test]
fn test_index_usability_qualified_column_rejects_wrong_table() {
let idx = index_info("idx_a", "t1", &["a"], false, 50);
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::qualified("t2", "a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
}));
let terms = [classify_where_term(expr)];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
));
let expr2: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::qualified("t1", "a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
}));
let terms2 = [classify_where_term(expr2)];
assert!(matches!(
analyze_index_usability(&idx, &terms2),
IndexUsability::Equality
));
let terms3 = [eq_term("a")];
assert!(matches!(
analyze_index_usability(&idx, &terms3),
IndexUsability::Equality
));
}
#[test]
fn test_index_usability_range_rightmost() {
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 50);
let terms = [range_term("a")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::Range { .. }
));
let terms = [range_term("b")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
));
}
#[test]
fn test_index_usability_in_expansion() {
let idx = index_info("idx_col", "t1", &["col"], false, 50);
let terms = [in_term("col", 3)];
let result = analyze_index_usability(&idx, &terms);
assert!(matches!(
result,
IndexUsability::InExpansion { probe_count: 3 }
));
}
#[test]
fn test_index_usability_multicolumn_trailing_in_expansion() {
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 50);
let terms = [eq_term("a"), in_term("b", 3)];
let result = analyze_index_usability(&idx, &terms);
assert!(matches!(
result,
IndexUsability::MultiColumnEquality {
eq_columns: 1,
trailing_constraint: MultiColumnTrailingConstraint::InExpansion { probe_count: 3 }
}
));
}
#[test]
fn test_index_usability_multicolumn_trailing_like_prefix() {
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 50);
let terms = [eq_term("a"), like_term("b", "123%")];
let result = analyze_index_usability(&idx, &terms);
assert!(matches!(
result,
IndexUsability::MultiColumnEquality {
eq_columns: 1,
trailing_constraint: MultiColumnTrailingConstraint::LikePrefix
}
));
}
#[test]
fn test_in_expansion_cost_scales_by_probe_count() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_col", "t1", &["col"], false, 50);
let single_eq_term = [eq_term("col")];
let in_3_term = [in_term("col", 3)];
let ap_eq = best_access_path(&table, std::slice::from_ref(&idx), &single_eq_term, None);
let ap_in = best_access_path(&table, std::slice::from_ref(&idx), &in_3_term, None);
let ratio = ap_in.estimated_cost / ap_eq.estimated_cost;
assert!(
(ratio - 3.0).abs() < 0.01,
"IN(3) cost should be 3x equality cost: eq={} in3={} ratio={}",
ap_eq.estimated_cost,
ap_in.estimated_cost,
ratio,
);
}
#[test]
fn test_best_access_path_or_disjunction_uses_in_expansion_index_probe() {
let table = table_stats("t1", 1_000, 100_000);
let idx = index_info("idx_a", "t1", &["a"], false, 80);
let term = or_eq_term("a", &[1, 2, 3, 4]);
assert!(matches!(term.kind, WhereTermKind::InList { count: 4 }));
let ap = best_access_path(&table, &[idx], &[term], None);
assert_eq!(ap.index.as_deref(), Some("idx_a"));
assert!(matches!(ap.kind, AccessPathKind::IndexScanEquality));
}
#[test]
fn test_best_access_path_multicolumn_trailing_in_refines_row_estimate() {
let table = table_stats("t1", 1_000, 1_000_000);
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
let equality_only = [eq_term("a")];
let trailing_in = [eq_term("a"), in_term("b", 3)];
let ap_eq = best_access_path(&table, std::slice::from_ref(&idx), &equality_only, None);
let ap_in = best_access_path(&table, &[idx], &trailing_in, None);
assert_eq!(ap_in.index.as_deref(), Some("idx_ab"));
assert!(matches!(ap_in.kind, AccessPathKind::IndexScanEquality));
assert!(
ap_in.estimated_rows < ap_eq.estimated_rows,
"composite IN should narrow row estimates: eq_only={} trailing_in={}",
ap_eq.estimated_rows,
ap_in.estimated_rows
);
assert!(
(ap_in.estimated_rows - 30_000.0).abs() < f64::EPSILON,
"expected 1e6 / 10^2 * 3 = 30000 rows, got {}",
ap_in.estimated_rows
);
}
#[test]
fn test_best_access_path_multicolumn_trailing_in_prefers_tighter_probe_count()
-> Result<(), String> {
let table = table_stats("t1", 1_000, 1_000_000);
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
let ap = best_access_path(
&table,
&[idx],
&[eq_term("a"), in_term("b", 5), in_term("b", 2)],
None,
);
if ap.index.as_deref() == Some("idx_ab") {
if ap.kind == AccessPathKind::IndexScanEquality {
if (ap.estimated_rows - 20_000.0).abs() < f64::EPSILON {
return Ok(());
}
return Err("expected tighter IN-list row estimate".to_owned());
}
return Err("expected equality access path".to_owned());
}
Err("expected idx_ab access path".to_owned())
}
#[test]
fn test_best_access_path_multicolumn_or_disjunction_reuses_composite_in_expansion() {
let table = table_stats("t1", 1_000, 1_000_000);
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
let term = or_eq_term("b", &[1, 2, 3, 4]);
assert!(matches!(term.kind, WhereTermKind::InList { count: 4 }));
let ap = best_access_path(&table, &[idx], &[eq_term("a"), term], None);
assert_eq!(ap.index.as_deref(), Some("idx_ab"));
assert!(matches!(ap.kind, AccessPathKind::IndexScanEquality));
assert!(
(ap.estimated_rows - 40_000.0).abs() < f64::EPSILON,
"expected 1e6 / 10^2 * 4 = 40000 rows, got {}",
ap.estimated_rows
);
}
#[test]
fn test_best_access_path_multicolumn_trailing_like_prefix_refines_row_estimate() {
let table = table_stats("t1", 1_000, 1_000_000);
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
let equality_only = [eq_term("a")];
let trailing_like = [eq_term("a"), like_term("b", "123%")];
let ap_eq = best_access_path(&table, std::slice::from_ref(&idx), &equality_only, None);
let ap_like = best_access_path(&table, &[idx], &trailing_like, None);
assert_eq!(ap_like.index.as_deref(), Some("idx_ab"));
assert!(matches!(
ap_like.kind,
AccessPathKind::IndexScanRange { .. }
));
assert!(
ap_like.estimated_rows < ap_eq.estimated_rows,
"composite LIKE prefix should narrow row estimates: eq_only={} trailing_like={}",
ap_eq.estimated_rows,
ap_like.estimated_rows
);
assert!(
(ap_like.estimated_rows - 10_000.0).abs() < f64::EPSILON,
"expected 1e6 / 10 * 0.1 = 10000 rows, got {}",
ap_like.estimated_rows
);
}
#[test]
fn test_best_access_path_multicolumn_trailing_glob_prefix_refines_row_estimate() {
let table = table_stats("t1", 1_000, 1_000_000);
let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
let trailing_glob = [eq_term("a"), glob_term("b", "abc*")];
let ap = best_access_path(&table, &[idx], &trailing_glob, None);
assert_eq!(ap.index.as_deref(), Some("idx_ab"));
assert!(matches!(ap.kind, AccessPathKind::IndexScanRange { .. }));
assert!(
(ap.estimated_rows - 10_000.0).abs() < f64::EPSILON,
"expected 1e6 / 10 * 0.1 = 10000 rows, got {}",
ap.estimated_rows
);
}
#[test]
fn test_index_usability_like_not_usable() {
let idx = index_info("idx_name", "t1", &["name"], false, 50);
let terms = [like_term("name", "Jo%")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
));
let terms = [like_term("name", "%Jo%")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
));
}
#[test]
fn test_index_usability_like_case_stable_prefix() {
let idx = index_info("idx_name", "t1", &["name"], false, 50);
let terms = [like_term("name", "123%")];
let result = analyze_index_usability(&idx, &terms);
assert!(matches!(
result,
IndexUsability::LikePrefix {
ref low,
high: Some(ref high)
} if low == "123" && high == "124"
));
}
#[test]
fn test_index_usability_glob_prefix() {
let idx = index_info("idx_name", "t1", &["name"], false, 50);
let terms = [glob_term("name", "Jo*")];
let result = analyze_index_usability(&idx, &terms);
assert!(matches!(
result,
IndexUsability::LikePrefix {
ref low,
high: Some(ref high)
} if low == "Jo" && high == "Jp"
));
let terms = [glob_term("name", "*Jo*")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
));
}
#[test]
fn test_index_usability_leftmost_preserves_first_non_range_probe_order() {
let idx = index_info("idx_name", "t1", &["name"], false, 50);
let terms = [glob_term("name", "Jo*"), in_term("name", 3)];
let result = analyze_index_usability(&idx, &terms);
assert!(matches!(
result,
IndexUsability::LikePrefix {
ref low,
high: Some(ref high)
} if low == "Jo" && high == "Jp"
));
}
#[test]
fn test_index_usability_equality_beats_range_on_same_leftmost_column() {
let idx = index_info("idx_a", "t1", &["a"], false, 50);
let terms = [range_term("a"), eq_term("a")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::Equality
));
}
#[test]
fn test_index_usability_equality_beats_like_prefix_on_same_leftmost_column() {
let idx = index_info("idx_name", "t1", &["name"], false, 50);
let terms = [like_term("name", "123%"), eq_term("name")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::Equality
));
}
#[test]
fn test_index_usability_expression_index_equality() {
let lower_name_expr = |val: &'static str| -> &'static Expr {
Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::FunctionCall {
name: "lower".to_owned(),
args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
ColumnRef::bare("name"),
Span::ZERO,
)]),
distinct: false,
order_by: vec![],
filter: None,
over: None,
span: Span::ZERO,
}),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String(val.to_owned()), Span::ZERO)),
span: Span::ZERO,
}))
};
let where_expr = lower_name_expr("alice");
let key_expr = Expr::FunctionCall {
name: "lower".to_owned(),
args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
ColumnRef::bare("name"),
Span::ZERO,
)]),
distinct: false,
order_by: vec![],
filter: None,
over: None,
span: Span::ZERO,
};
let idx = IndexInfo {
name: "idx_lower_name".to_owned(),
table: "users".to_owned(),
columns: vec![],
unique: false,
n_pages: 50,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![key_expr],
};
let terms = [classify_where_term(where_expr)];
assert!(
matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::Equality
),
"expression index must reach analyze_expression_index_usability \
even though `columns` is empty (issue #63)"
);
}
#[test]
fn test_index_usability_expression_index_case_insensitive_function_name() {
let key_expr = Expr::FunctionCall {
name: "lower".to_owned(),
args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
ColumnRef::bare("name"),
Span::ZERO,
)]),
distinct: false,
order_by: vec![],
filter: None,
over: None,
span: Span::ZERO,
};
let where_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::FunctionCall {
name: "LOWER".to_owned(),
args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
ColumnRef::bare("name"),
Span::ZERO,
)]),
distinct: false,
order_by: vec![],
filter: None,
over: None,
span: Span::ZERO,
}),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(
Literal::String("alice".to_owned()),
Span::ZERO,
)),
span: Span::ZERO,
}));
let idx = IndexInfo {
name: "idx_lower_name".to_owned(),
table: "users".to_owned(),
columns: vec![],
unique: false,
n_pages: 50,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![key_expr],
};
let terms = [classify_where_term(where_expr)];
assert!(
matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::Equality
),
"case-insensitive function name match must reach Equality \
(lower vs LOWER)"
);
}
#[test]
fn test_index_usability_expression_index_non_matching() {
let upper_name_eq: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::FunctionCall {
name: "upper".to_owned(),
args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
ColumnRef::bare("name"),
Span::ZERO,
)]),
distinct: false,
order_by: vec![],
filter: None,
over: None,
span: Span::ZERO,
}),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(
Literal::String("ALICE".to_owned()),
Span::ZERO,
)),
span: Span::ZERO,
}));
let key_expr = Expr::FunctionCall {
name: "lower".to_owned(),
args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
ColumnRef::bare("name"),
Span::ZERO,
)]),
distinct: false,
order_by: vec![],
filter: None,
over: None,
span: Span::ZERO,
};
let idx = IndexInfo {
name: "idx_lower_name".to_owned(),
table: "users".to_owned(),
columns: vec![],
unique: false,
n_pages: 50,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![key_expr],
};
let terms = [classify_where_term(upper_name_eq)];
assert!(
matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
),
"expression index must reject structurally-unrelated WHERE terms"
);
}
#[test]
fn test_index_usability_expression_index_real_parser_spans_differ() {
use fsqlite_ast::{SelectCore, Statement};
let key_expr =
fsqlite_parser::expr::parse_expr("lower(name)").expect("key expression should parse");
let select_sql = "SELECT id FROM users WHERE lower(name) = 'alice'";
let mut scratch = fsqlite_parser::StatementParseScratch::default();
let statement =
fsqlite_parser::parse_single_statement_with_scratch(select_sql, &mut scratch)
.expect("select should parse");
let Statement::Select(select) = statement else {
panic!("expected SELECT statement");
};
let SelectCore::Select { where_clause, .. } = select.body.select else {
panic!("expected SELECT core");
};
let where_expr = *where_clause.expect("WHERE clause must be present");
let left_of_where = match &where_expr {
Expr::BinaryOp { left, .. } => left.as_ref().clone(),
_ => panic!("expected BinaryOp for `lower(name) = 'alice'`"),
};
assert_ne!(
left_of_where.span(),
key_expr.span(),
"real parser should assign different spans across parse \
contexts: stand-alone `lower(name)` starts at 0 but the \
WHERE-side one starts after `SELECT id FROM users WHERE `"
);
assert_eq!(
left_of_where, key_expr,
"Expr::PartialEq is manually span-insensitive in fsqlite-ast; \
if that invariant breaks, the expression-index planner stops \
matching across parse contexts (issue #63)"
);
let idx = IndexInfo {
name: "idx_lower_name".to_owned(),
table: "users".to_owned(),
columns: vec![],
unique: false,
n_pages: 50,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![key_expr],
};
let leaked: &'static Expr = Box::leak(Box::new(where_expr));
let terms = [classify_where_term(leaked)];
assert!(
matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::Equality
),
"real-parser expression index lookup must reach Equality"
);
}
#[test]
fn test_index_usability_empty_index_still_not_usable() {
let idx = IndexInfo {
name: "idx_empty".to_owned(),
table: "t1".to_owned(),
columns: vec![],
unique: false,
n_pages: 50,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
};
let terms = [eq_term("a")];
assert!(matches!(
analyze_index_usability(&idx, &terms),
IndexUsability::NotUsable
));
}
#[test]
fn test_classify_where_term_equality() {
let term = eq_term("x");
assert!(matches!(term.kind, WhereTermKind::Equality));
assert_eq!(term.column.as_ref().unwrap().column, "x");
}
#[test]
fn test_classify_where_term_range() {
let term = range_term("y");
assert!(matches!(term.kind, WhereTermKind::Range));
assert_eq!(term.column.as_ref().unwrap().column, "y");
}
#[test]
fn test_classify_where_term_rowid() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::RowidEquality));
}
#[test]
fn test_decompose_where_and() {
let inner = Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
}),
op: AstBinaryOp::And,
right: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("b"), Span::ZERO)),
op: AstBinaryOp::Gt,
right: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
span: Span::ZERO,
}),
span: Span::ZERO,
};
let terms = decompose_where(&inner);
assert_eq!(terms.len(), 2);
}
#[test]
fn test_join_ordering_single_table() {
let tables = [table_stats("t1", 100, 1000)];
let plan = order_joins(&tables, &[], &[], None, &[]);
assert_eq!(plan.join_order, vec!["t1"]);
let expected = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 1000);
assert!((plan.total_cost - expected).abs() < 1e-9);
}
#[test]
fn test_join_ordering_two_tables() {
let tables = [table_stats("t1", 10, 100), table_stats("t2", 1000, 50000)];
let plan = order_joins(&tables, &[], &[], None, &[]);
assert_eq!(plan.join_order.len(), 2);
assert_eq!(plan.join_order[0], "t1");
}
#[test]
fn test_join_ordering_three_tables() {
let tables = [
table_stats("t1", 10, 100),
table_stats("t2", 100, 1000),
table_stats("t3", 1000, 10000),
];
let plan = order_joins(&tables, &[], &[], None, &[]);
assert_eq!(plan.join_order.len(), 3);
for t in &tables {
assert!(plan.join_order.contains(&t.name));
}
assert!(plan.total_cost > 0.0);
}
#[test]
fn test_join_ordering_prefers_indexed() {
let tables = [table_stats("t1", 10, 100), table_stats("t2", 1000, 50000)];
let indexes = [index_info("idx_t2_fk", "t2", &["fk"], false, 50)];
let terms = [eq_term("fk")];
let plan = order_joins(&tables, &indexes, &terms, None, &[]);
assert_eq!(plan.join_order[0], "t1");
assert!(plan.access_paths[1].index.is_some());
}
#[test]
fn test_join_ordering_beam_search_bounded() {
let tables: Vec<TableStats> = (1..=6_u64)
.map(|i| table_stats(&format!("t{i}"), i * 10, i * 100))
.collect();
let plan = order_joins(&tables, &[], &[], None, &[]);
assert_eq!(plan.join_order.len(), 6);
for t in &tables {
assert!(plan.join_order.contains(&t.name));
}
}
#[test]
fn test_three_way_join_cost_scales_by_cumulative_rows() {
let small = table_stats("small", 1, 10);
let medium = table_stats("medium", 10, 100);
let large = table_stats("large", 100, 1000);
let plan_sml = order_joins(&[small, medium, large], &[], &[], None, &[]);
#[allow(clippy::suboptimal_flops)]
let cost_if_only_last = 1.0_f64 + 10.0 * 10.0 + 100.0 * 100.0; assert!(
plan_sml.total_cost > cost_if_only_last,
"3-way join cost should scale by cumulative rows, not just last table: plan_cost={} bug_cost={}",
plan_sml.total_cost,
cost_if_only_last,
);
}
#[test]
fn test_mx_choice_single_table() {
assert_eq!(compute_mx_choice(1, false), 1);
}
#[test]
fn test_mx_choice_two_tables() {
assert_eq!(compute_mx_choice(2, false), 5);
}
#[test]
fn test_mx_choice_three_tables() {
assert_eq!(compute_mx_choice(3, false), 12);
}
#[test]
fn test_mx_choice_star_query() {
assert_eq!(compute_mx_choice(4, true), 18);
}
#[test]
fn test_detect_star_query_true() {
let tables = [
table_stats("fact", 1000, 100_000),
table_stats("dim1", 10, 100),
table_stats("dim2", 10, 100),
table_stats("dim3", 10, 100),
];
let terms = [
join_term("fact", "d1_id", "dim1", "id"),
join_term("fact", "d2_id", "dim2", "id"),
join_term("fact", "d3_id", "dim3", "id"),
];
assert!(detect_star_query(&tables, &terms));
}
#[test]
fn test_detect_star_query_false() {
let tables = [
table_stats("t1", 100, 1000),
table_stats("t2", 100, 1000),
table_stats("t3", 100, 1000),
table_stats("t4", 100, 1000),
];
let terms = [
join_term("t1", "id", "t2", "fk1"),
join_term("t2", "id", "t3", "fk2"),
join_term("t3", "id", "t4", "fk3"),
];
assert!(!detect_star_query(&tables, &terms));
}
#[test]
fn test_cross_join_no_reorder() {
let tables = [
table_stats("t1", 1000, 50000), table_stats("t2", 10, 100), ];
let cross = [("t1".to_owned(), "t2".to_owned())];
let plan = order_joins(&tables, &[], &[], None, &cross);
assert_eq!(plan.join_order[0], "t1");
assert_eq!(plan.join_order[1], "t2");
}
#[test]
fn test_single_table_source_name_and_alias() {
use fsqlite_ast::{JoinClause, JoinKind, JoinType};
let tbl = |alias: Option<&str>| TableOrSubquery::Table {
name: QualifiedName::bare("users"),
alias: alias.map(str::to_owned),
index_hint: None,
time_travel: None,
};
let fc = |source: TableOrSubquery, joins: Vec<JoinClause>| FromClause { source, joins };
let bare_fc = fc(tbl(None), vec![]);
let (name, alias) = single_table_source_name_and_alias(&bare_fc).unwrap();
assert_eq!(name, "users");
assert_eq!(alias, None);
let aliased_fc = fc(tbl(Some("u")), vec![]);
let (name, alias) = single_table_source_name_and_alias(&aliased_fc).unwrap();
assert_eq!(name, "users");
assert_eq!(alias, Some("u"));
let with_join = fc(
tbl(None),
vec![JoinClause {
join_type: JoinType {
natural: false,
kind: JoinKind::Inner,
},
table: tbl(None),
constraint: None,
}],
);
assert!(single_table_source_name_and_alias(&with_join).is_err());
}
#[test]
fn test_from_clause_supports_leapfrog_branches() {
use fsqlite_ast::{JoinClause, JoinConstraint, JoinKind, JoinType};
let tbl = |name: &str| TableOrSubquery::Table {
name: QualifiedName::bare(name),
alias: None,
index_hint: None,
time_travel: None,
};
let col = |name: &str| Expr::Column(ColumnRef::bare(name), Span::ZERO);
let from = |jt: JoinType, constraint: Option<JoinConstraint>| FromClause {
source: tbl("a"),
joins: vec![JoinClause {
join_type: jt,
table: tbl("b"),
constraint,
}],
};
let inner = || JoinType {
natural: false,
kind: JoinKind::Inner,
};
assert!(from_clause_supports_leapfrog(None));
let equi_on = Expr::BinaryOp {
left: Box::new(col("x")),
op: AstBinaryOp::Eq,
right: Box::new(col("y")),
span: Span::ZERO,
};
assert!(from_clause_supports_leapfrog(Some(&from(
inner(),
Some(JoinConstraint::On(equi_on))
))));
assert!(from_clause_supports_leapfrog(Some(&from(
inner(),
Some(JoinConstraint::Using(vec!["x".to_owned()]))
))));
let nonequi_on = Expr::BinaryOp {
left: Box::new(col("x")),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
span: Span::ZERO,
};
assert!(!from_clause_supports_leapfrog(Some(&from(
inner(),
Some(JoinConstraint::On(nonequi_on))
))));
assert!(!from_clause_supports_leapfrog(Some(&from(
inner(),
Some(JoinConstraint::Using(vec![]))
))));
assert!(!from_clause_supports_leapfrog(Some(&from(
JoinType {
natural: true,
kind: JoinKind::Inner,
},
None
))));
assert!(!from_clause_supports_leapfrog(Some(&from(
JoinType {
natural: false,
kind: JoinKind::Left,
},
None
))));
}
#[test]
fn test_two_way_join_stays_hash_even_with_leapfrog_enabled() {
let tables = [table_stats("t1", 10, 100), table_stats("t2", 12, 120)];
let terms = [join_term("t1", "k", "t2", "k")];
let plan = order_joins_with_hints_and_features(
&tables,
&[],
&terms,
None,
&[],
None,
None,
PlannerFeatureFlags {
leapfrog_join: true,
..PlannerFeatureFlags::default()
},
);
assert_eq!(plan.join_segments.len(), 1);
assert_eq!(plan.join_segments[0].operator, JoinOperator::HashJoin);
}
#[test]
fn test_three_way_equi_join_uses_leapfrog_when_feature_enabled() {
let tables = [
table_stats("a", 1024, 1_000_000),
table_stats("b", 1024, 1_000_000),
table_stats("c", 1024, 1_000_000),
];
let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
let plan = order_joins_with_hints_and_features(
&tables,
&[],
&terms,
None,
&[],
None,
None,
PlannerFeatureFlags {
leapfrog_join: true,
..PlannerFeatureFlags::default()
},
);
assert!(
plan.join_segments
.iter()
.any(|segment| segment.operator == JoinOperator::LeapfrogTriejoin
&& segment.relations.len() == 3),
"expected Leapfrog segment, got {:?}",
plan.join_segments
);
}
#[test]
fn test_leapfrog_feature_flag_gates_routing() {
let tables = [
table_stats("a", 1024, 1_000_000),
table_stats("b", 1024, 1_000_000),
table_stats("c", 1024, 1_000_000),
];
let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
let plan = order_joins_with_hints_and_features(
&tables,
&[],
&terms,
None,
&[],
None,
None,
PlannerFeatureFlags {
leapfrog_join: false,
..PlannerFeatureFlags::default()
},
);
assert_eq!(plan.join_segments.len(), 1);
assert_eq!(plan.join_segments[0].operator, JoinOperator::HashJoin);
}
#[test]
fn test_mixed_join_segments_support_leapfrog_and_hash() {
let tables = [
table_stats("a", 512, 900_000),
table_stats("b", 512, 900_000),
table_stats("c", 512, 900_000),
table_stats("d", 64, 10_000),
table_stats("e", 64, 10_000),
];
let terms = [
join_term("a", "k", "b", "k"),
join_term("b", "k", "c", "k"),
join_term("d", "k", "e", "k"),
];
let plan = order_joins_with_hints_and_features(
&tables,
&[],
&terms,
None,
&[],
None,
None,
PlannerFeatureFlags {
leapfrog_join: true,
..PlannerFeatureFlags::default()
},
);
assert!(
plan.join_segments
.iter()
.any(|segment| segment.operator == JoinOperator::LeapfrogTriejoin
&& segment.relations.len() == 3),
"expected 3-way Leapfrog segment, got {:?}",
plan.join_segments
);
assert!(
plan.join_segments
.iter()
.any(|segment| segment.operator == JoinOperator::HashJoin
&& segment.relations.len() == 2),
"expected 2-way hash segment, got {:?}",
plan.join_segments
);
}
#[test]
fn test_incompatible_trie_ordering_falls_back_to_hash_join() {
let tables = [
table_stats("a", 256, 100_000),
table_stats("b", 256, 100_000),
table_stats("c", 256, 100_000),
];
let terms = [join_term("a", "x", "b", "x"), join_term("b", "y", "c", "y")];
let plan = order_joins_with_hints_and_features(
&tables,
&[],
&terms,
None,
&[],
None,
None,
PlannerFeatureFlags {
leapfrog_join: true,
..PlannerFeatureFlags::default()
},
);
assert!(
plan.join_segments
.iter()
.all(|segment| segment.operator == JoinOperator::HashJoin),
"incompatible trie ordering should stay hash-only: {:?}",
plan.join_segments
);
}
#[test]
fn test_outer_join_shape_forces_hash_fallback() {
use fsqlite_ast::{JoinClause, JoinConstraint, JoinKind, JoinType};
let from = FromClause {
source: TableOrSubquery::Table {
name: QualifiedName::bare("a"),
alias: None,
index_hint: None,
time_travel: None,
},
joins: vec![JoinClause {
join_type: JoinType {
natural: false,
kind: JoinKind::Left,
},
table: TableOrSubquery::Table {
name: QualifiedName::bare("b"),
alias: None,
index_hint: None,
time_travel: None,
},
constraint: Some(JoinConstraint::On(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::qualified("a", "k"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::qualified("b", "k"), Span::ZERO)),
span: Span::ZERO,
})),
}],
};
let tables = [
table_stats("a", 128, 100_000),
table_stats("b", 128, 100_000),
table_stats("c", 128, 100_000),
];
let join_order = vec!["a".to_owned(), "b".to_owned(), "c".to_owned()];
let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
let segments = choose_join_segments(
&join_order,
&tables,
&terms,
Some(&from),
PlannerFeatureFlags {
leapfrog_join: true,
..PlannerFeatureFlags::default()
},
);
assert_eq!(segments.len(), 1);
assert_eq!(segments[0].operator, JoinOperator::HashJoin);
}
#[test]
fn test_collect_table_index_hints_from_clause_includes_aliases() {
use fsqlite_ast::{JoinClause, JoinKind, JoinType};
let from = FromClause {
source: TableOrSubquery::Table {
name: QualifiedName::bare("users"),
alias: Some("u".to_owned()),
index_hint: Some(IndexHint::IndexedBy("idx_users_email".to_owned())),
time_travel: None,
},
joins: vec![JoinClause {
join_type: JoinType {
kind: JoinKind::Inner,
natural: false,
},
table: TableOrSubquery::Table {
name: QualifiedName::bare("events"),
alias: Some("e".to_owned()),
index_hint: Some(IndexHint::NotIndexed),
time_travel: None,
},
constraint: None,
}],
};
let hints = collect_table_index_hints(&from);
assert!(matches!(
hints.get("users"),
Some(IndexHint::IndexedBy(name)) if name == "idx_users_email"
));
assert!(matches!(
hints.get("u"),
Some(IndexHint::IndexedBy(name)) if name == "idx_users_email"
));
assert!(matches!(hints.get("events"), Some(IndexHint::NotIndexed)));
assert!(matches!(hints.get("e"), Some(IndexHint::NotIndexed)));
}
#[test]
fn test_order_joins_with_hints_respects_not_indexed() {
let tables = [table_stats("t1", 1000, 50000)];
let idx = index_info("idx_t1_a", "t1", &["a"], false, 100);
let terms = [eq_term("a")];
let hints = BTreeMap::from([(canonical_table_key("t1"), IndexHint::NotIndexed)]);
let plan = order_joins_with_hints(&tables, &[idx], &terms, None, &[], Some(&hints), None);
assert_eq!(plan.join_order, vec!["t1".to_owned()]);
assert_eq!(plan.access_paths.len(), 1);
assert!(matches!(
plan.access_paths[0].kind,
AccessPathKind::FullTableScan
));
}
#[test]
fn test_order_joins_with_hints_respects_indexed_by() {
let tables = [table_stats("t1", 2000, 100_000)];
let fast = index_info("idx_fast", "t1", &["a"], false, 10);
let slow = index_info("idx_slow", "t1", &["a"], false, 600);
let terms = [eq_term("a")];
let hints = BTreeMap::from([(
canonical_table_key("t1"),
IndexHint::IndexedBy("idx_slow".to_owned()),
)]);
let plan = order_joins_with_hints(
&tables,
&[fast, slow],
&terms,
None,
&[],
Some(&hints),
None,
);
assert_eq!(plan.access_paths.len(), 1);
assert_eq!(plan.access_paths[0].index.as_deref(), Some("idx_slow"));
}
#[test]
fn test_order_joins_with_hints_reuses_cracking_store() {
let tables = [table_stats("t1", 1000, 50000)];
let idx_a = index_info("idx_a", "t1", &["a"], false, 40);
let idx_b = index_info("idx_b", "t1", &["a"], false, 40);
let terms = [eq_term("a")];
let mut store = CrackingHintStore::default();
let first = order_joins_with_hints(
&tables,
&[idx_a.clone(), idx_b.clone()],
&terms,
None,
&[],
None,
Some(&mut store),
);
assert_eq!(first.access_paths[0].index.as_deref(), Some("idx_a"));
assert_eq!(store.preferred_index("t1"), Some("idx_a"));
let second = order_joins_with_hints(
&tables,
&[idx_b, idx_a],
&terms,
None,
&[],
None,
Some(&mut store),
);
assert_eq!(second.access_paths[0].index.as_deref(), Some("idx_a"));
}
#[test]
fn test_planner_selects_covering_index() {
let table = table_stats("t1", 1000, 50000);
let idx = index_info("idx_t1_ab", "t1", &["a", "b"], false, 100);
let terms = [eq_term("a")];
let needed = ["a".to_owned(), "b".to_owned()];
let ap = best_access_path(&table, &[idx], &terms, Some(&needed));
assert!(matches!(ap.kind, AccessPathKind::CoveringIndexScan { .. }));
}
#[test]
fn test_planner_treats_rowid_projection_as_covering_index_payload() {
let table = table_stats("t1", 1000, 50000);
let idx = index_info("idx_t1_a", "t1", &["a"], false, 100);
let terms = [eq_term("a")];
let needed = ["rowid".to_owned()];
let ap = best_access_path(&table, &[idx], &terms, Some(&needed));
assert!(matches!(ap.kind, AccessPathKind::CoveringIndexScan { .. }));
}
#[test]
fn test_planner_heuristic_fallback() {
let table = table_stats("t1", 100, 1000);
let ap = best_access_path(&table, &[], &[], None);
assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
let expected = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 1000);
assert!((ap.estimated_cost - expected).abs() < 1e-9);
}
#[test]
fn test_query_plan_display() {
let plan = QueryPlan {
join_order: vec!["t1".to_owned(), "t2".to_owned()],
access_paths: vec![
AccessPath {
table: "t1".to_owned(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: 100.0,
estimated_rows: 1000.0,
time_travel: None,
probe: None,
},
AccessPath {
table: "t2".to_owned(),
kind: AccessPathKind::IndexScanEquality,
index: Some("idx_t2".to_owned()),
estimated_cost: 15.0,
estimated_rows: 10.0,
time_travel: None,
probe: None,
},
],
join_segments: vec![JoinPlanSegment {
relations: vec!["t1".to_owned(), "t2".to_owned()],
operator: JoinOperator::HashJoin,
estimated_cost: 115.0,
reason: "2-way joins stay on pairwise hash join".to_owned(),
}],
total_cost: 115.0,
morsel_eligibility: None,
};
let display = plan.to_string();
assert!(display.contains("QUERY PLAN"));
assert!(display.contains("SCAN t1"));
assert!(display.contains("JOIN OPERATORS"));
assert!(display.contains("HASH JOIN"));
assert!(display.contains("USING INDEX idx_t2"));
}
#[test]
fn test_query_plan_display_mentions_leapfrog_operator() {
let plan = QueryPlan {
join_order: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
access_paths: vec![],
join_segments: vec![JoinPlanSegment {
relations: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
operator: JoinOperator::LeapfrogTriejoin,
estimated_cost: 42.0,
reason: "AGM estimate 42.0 beats hash cost 100.0; trie arity 1".to_owned(),
}],
total_cost: 42.0,
morsel_eligibility: None,
};
let display = plan.to_string();
assert!(display.contains("LEAPFROG TRIEJOIN"));
assert!(display.contains("JOIN OPERATORS"));
}
#[test]
fn test_morsel_eligibility_full_scan_large_table() {
let plan = QueryPlan {
join_order: vec!["big_table".to_owned()],
access_paths: vec![AccessPath {
table: "big_table".to_owned(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: 10000.0,
estimated_rows: 100_000.0,
time_travel: None,
probe: None,
}],
join_segments: vec![],
total_cost: 10000.0,
morsel_eligibility: None,
};
let elig = MorselEligibility::evaluate(&plan, false, false, 8);
assert!(
elig.eligible,
"bead_id=bd-b434d case=morsel_eligible_full_scan"
);
assert_eq!(elig.driving_table.as_deref(), Some("big_table"));
assert!(elig.morsel_count > 1);
assert!(elig.morsel_count <= 64);
eprintln!(
"INFO bead_id=bd-b434d case=morsel_eligible morsels={} rows_per={}",
elig.morsel_count, elig.rows_per_morsel
);
}
#[test]
fn test_morsel_eligibility_small_table_ineligible() {
let plan = QueryPlan {
join_order: vec!["small".to_owned()],
access_paths: vec![AccessPath {
table: "small".to_owned(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: 10.0,
estimated_rows: 500.0,
time_travel: None,
probe: None,
}],
join_segments: vec![],
total_cost: 10.0,
morsel_eligibility: None,
};
let elig = MorselEligibility::evaluate(&plan, false, false, 8);
assert!(!elig.eligible);
assert_eq!(elig.reason, MorselIneligibleReason::TooFewRows);
}
#[test]
fn test_morsel_eligibility_index_scan_ineligible() {
let plan = QueryPlan {
join_order: vec!["t1".to_owned()],
access_paths: vec![AccessPath {
table: "t1".to_owned(),
kind: AccessPathKind::IndexScanEquality,
index: Some("idx".to_owned()),
estimated_cost: 5.0,
estimated_rows: 10000.0,
time_travel: None,
probe: None,
}],
join_segments: vec![],
total_cost: 5.0,
morsel_eligibility: None,
};
let elig = MorselEligibility::evaluate(&plan, false, false, 8);
assert!(!elig.eligible);
assert_eq!(elig.reason, MorselIneligibleReason::NoFullTableScan);
}
#[test]
fn test_morsel_eligibility_limit_ineligible() {
let plan = QueryPlan {
join_order: vec!["t1".to_owned()],
access_paths: vec![AccessPath {
table: "t1".to_owned(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: 1000.0,
estimated_rows: 50000.0,
time_travel: None,
probe: None,
}],
join_segments: vec![],
total_cost: 1000.0,
morsel_eligibility: None,
};
let elig = MorselEligibility::evaluate(&plan, true, false, 8);
assert!(!elig.eligible);
assert_eq!(elig.reason, MorselIneligibleReason::HasLimit);
}
#[test]
fn test_best_access_path_rowid_lookup() {
let table = table_stats("t1", 1024, 50000);
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
let ap = best_access_path(&table, &[], &[term], None);
assert!(matches!(ap.kind, AccessPathKind::RowidLookup));
let expected = estimate_cost_ext(&AccessPathKind::RowidLookup, 1024, 0, 50000);
assert!((ap.estimated_cost - expected).abs() < 1e-9);
}
#[test]
fn test_best_access_path_ipk_oltp_shapes_without_schema_context() {
let table = table_stats("bench", 128, 5000);
let point = best_access_path(&table, &[], &[eq_term("id")], None);
assert!(matches!(point.kind, AccessPathKind::FullTableScan));
let lower_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
op: AstBinaryOp::Ge,
right: Box::new(Expr::Literal(Literal::Integer(100), Span::ZERO)),
span: Span::ZERO,
}));
let upper_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
op: AstBinaryOp::Lt,
right: Box::new(Expr::Literal(Literal::Integer(150), Span::ZERO)),
span: Span::ZERO,
}));
let range = best_access_path(
&table,
&[],
&[
classify_where_term(lower_expr),
classify_where_term(upper_expr),
],
None,
);
assert!(matches!(range.kind, AccessPathKind::FullTableScan));
let aggregate = best_access_path(&table, &[], &[], None);
assert!(matches!(aggregate.kind, AccessPathKind::FullTableScan));
}
#[test]
fn test_best_access_path_ipk_alias_hint_uses_rowid_lookup() {
let table = table_stats("bench", 128, 5000);
let hints = [RowidAliasHint::new("id")];
let point =
best_access_path_with_rowid_alias_hints(&table, &[], &[eq_term("id")], None, &hints);
assert!(matches!(point.kind, AccessPathKind::RowidLookup));
assert_eq!(point.estimated_rows, 1.0);
assert!(matches!(
&point.probe,
Some(AccessPathProbe::RowidEquality { target })
if **target == Expr::Literal(Literal::Integer(1), Span::ZERO)
));
let range =
best_access_path_with_rowid_alias_hints(&table, &[], &[range_term("id")], None, &hints);
assert!(matches!(range.kind, AccessPathKind::IndexScanRange { .. }));
assert!(range.index.is_none());
assert!(matches!(
&range.probe,
Some(AccessPathProbe::Range {
column,
lower: Some(_),
..
}) if column == "id"
));
}
#[test]
fn test_best_access_path_ipk_alias_hint_respects_qualifier() {
let table = table_stats("bench", 128, 5000);
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::qualified("b", "id"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(7), Span::ZERO)),
span: Span::ZERO,
}));
let terms = [classify_where_term(expr)];
let table_only = [RowidAliasHint::new("id")];
let miss = best_access_path_with_rowid_alias_hints(&table, &[], &terms, None, &table_only);
assert!(matches!(miss.kind, AccessPathKind::FullTableScan));
let qualified = [RowidAliasHint::qualified("b", "id")];
let hit = best_access_path_with_rowid_alias_hints(&table, &[], &terms, None, &qualified);
assert!(matches!(hit.kind, AccessPathKind::RowidLookup));
}
#[test]
fn test_analyze_stats_override() {
let table = TableStats {
name: "t1".to_owned(),
n_pages: 500,
n_rows: 10000,
source: StatsSource::Analyze,
};
assert_eq!(table.source, StatsSource::Analyze);
let ap = best_access_path(&table, &[], &[], None);
assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
let expected = estimate_cost_ext(&AccessPathKind::FullTableScan, 500, 0, 10000);
assert!((ap.estimated_cost - expected).abs() < 1e-9);
}
#[test]
fn test_order_joins_empty() {
let plan = order_joins(&[], &[], &[], None, &[]);
assert!(plan.join_order.is_empty());
assert!((plan.total_cost - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_compound_order_by_error_display_zero_or_negative() {
let err = CompoundOrderByError::IndexZeroOrNegative {
value: -3,
span: Span::ZERO,
};
let msg = err.to_string();
assert!(msg.contains("-3"), "should contain the value: {msg}");
assert!(
msg.contains("must be positive"),
"should say must be positive: {msg}"
);
}
#[test]
fn test_compound_order_by_error_is_error() {
let err = CompoundOrderByError::ColumnNotFound {
name: "x".to_owned(),
span: Span::ZERO,
};
assert!(std::error::Error::source(&err).is_none());
}
#[test]
fn test_single_table_projection_error_display_all_variants() {
let cases: Vec<(SingleTableProjectionError, &str)> = vec![
(SingleTableProjectionError::NotSelectCore, "SELECT core"),
(SingleTableProjectionError::MissingFromClause, "FROM clause"),
(
SingleTableProjectionError::UnsupportedFromSource,
"single-table",
),
(
SingleTableProjectionError::UnknownTableQualifier {
qualifier: "bad".to_owned(),
},
"bad",
),
(
SingleTableProjectionError::ColumnNotFound {
column: "missing_col".to_owned(),
},
"missing_col",
),
];
for (err, expected_fragment) in cases {
let msg = err.to_string();
assert!(
msg.contains(expected_fragment),
"{err:?} display should contain '{expected_fragment}': got '{msg}'"
);
}
}
#[test]
fn test_single_table_projection_error_is_error() {
let err = SingleTableProjectionError::NotSelectCore;
assert!(std::error::Error::source(&err).is_none());
}
#[test]
fn test_count_output_columns_select() {
let core = select_core_with_aliases(&["a", "b", "c"]);
assert_eq!(count_output_columns(&core), 3);
}
#[test]
fn test_count_output_columns_values() {
let core = SelectCore::Values(vec![vec![
Expr::Literal(Literal::Integer(1), Span::ZERO),
Expr::Literal(Literal::Integer(2), Span::ZERO),
]]);
assert_eq!(count_output_columns(&core), 2);
}
#[test]
fn test_count_output_columns_empty_values() {
let core = SelectCore::Values(vec![]);
assert_eq!(count_output_columns(&core), 0);
}
#[test]
fn test_extract_output_aliases_star_is_none() {
let core = SelectCore::Select {
distinct: Distinctness::All,
columns: vec![ResultColumn::Star],
from: None,
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
};
let aliases = extract_output_aliases(&core);
assert_eq!(aliases, vec![None]);
}
#[test]
fn test_extract_output_aliases_expression_no_alias() {
let core = SelectCore::Select {
distinct: Distinctness::All,
columns: vec![ResultColumn::Expr {
expr: Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
op: fsqlite_ast::BinaryOp::Add,
right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
span: Span::ZERO,
},
alias: None,
}],
from: None,
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
};
let aliases = extract_output_aliases(&core);
assert_eq!(aliases, vec![None]);
}
#[test]
fn test_resolve_projection_values_core_error() {
let core = SelectCore::Values(vec![vec![Expr::Literal(Literal::Integer(1), Span::ZERO)]]);
let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
.expect_err("VALUES should fail");
assert_eq!(err, SingleTableProjectionError::NotSelectCore);
}
#[test]
fn test_resolve_projection_missing_from_error() {
let core = SelectCore::Select {
distinct: Distinctness::All,
columns: vec![ResultColumn::Star],
from: None,
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
};
let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
.expect_err("missing FROM should fail");
assert_eq!(err, SingleTableProjectionError::MissingFromClause);
}
#[test]
fn test_resolve_projection_with_joins_error() {
use fsqlite_ast::{JoinClause, JoinKind, JoinType};
let core = SelectCore::Select {
distinct: Distinctness::All,
columns: vec![ResultColumn::Star],
from: Some(FromClause {
source: TableOrSubquery::Table {
name: QualifiedName::bare("t"),
alias: None,
index_hint: None,
time_travel: None,
},
joins: vec![JoinClause {
join_type: JoinType {
kind: JoinKind::Inner,
natural: false,
},
table: TableOrSubquery::Table {
name: QualifiedName::bare("u"),
alias: None,
index_hint: None,
time_travel: None,
},
constraint: None,
}],
}),
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
};
let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
.expect_err("JOIN should fail");
assert_eq!(err, SingleTableProjectionError::UnsupportedFromSource);
}
#[test]
fn test_resolve_projection_unknown_table_qualifier() {
let core = select_core_single_table(
vec![ResultColumn::TableStar(QualifiedName::bare("wrong_table"))],
"t",
None,
);
let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
.expect_err("wrong qualifier should fail");
assert_eq!(
err,
SingleTableProjectionError::UnknownTableQualifier {
qualifier: "wrong_table".to_owned()
}
);
}
#[test]
fn test_resolve_projection_qualified_column_wrong_table() {
let core = select_core_single_table(
vec![ResultColumn::Expr {
expr: Expr::Column(ColumnRef::qualified("other", "a"), Span::ZERO),
alias: None,
}],
"t",
None,
);
let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
.expect_err("wrong table qualifier should fail");
assert!(matches!(
err,
SingleTableProjectionError::UnknownTableQualifier { .. }
));
}
#[test]
fn test_resolve_projection_preserves_expression() {
let core = select_core_single_table(
vec![ResultColumn::Expr {
expr: Expr::Literal(Literal::Integer(42), Span::ZERO),
alias: Some("answer".to_owned()),
}],
"t",
None,
);
let resolved = resolve_single_table_result_columns(&core, &["a".to_owned()])
.expect("expression should be preserved");
assert_eq!(resolved.len(), 1);
assert!(matches!(
&resolved[0],
ResultColumn::Expr {
alias: Some(a), ..
} if a == "answer"
));
}
#[test]
fn test_classify_where_term_between() {
let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
expr: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
high: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
not: false,
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::Between));
assert_eq!(term.column.as_ref().unwrap().column, "x");
}
#[test]
fn test_classify_where_term_not_between_is_other() {
let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
expr: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
high: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
not: true,
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::Other));
}
#[test]
fn test_classify_where_term_in_list() {
let term = in_term("col", 5);
assert!(matches!(term.kind, WhereTermKind::InList { count: 5 }));
assert_eq!(term.column.as_ref().unwrap().column, "col");
}
#[test]
fn test_classify_where_term_not_in_is_other() {
let expr: &'static Expr = Box::leak(Box::new(Expr::In {
expr: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
set: InSet::List(vec![Expr::Literal(Literal::Integer(1), Span::ZERO)]),
not: true,
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::Other));
}
#[test]
fn test_classify_where_term_like_is_other() {
let term = like_term("name", "abc%");
assert!(matches!(term.kind, WhereTermKind::Other));
let term = like_term("name", "%wildcard");
assert!(matches!(term.kind, WhereTermKind::Other));
}
#[test]
fn test_classify_where_term_like_case_stable_prefix() {
let term = like_term("name", "123%");
assert!(matches!(
term.kind,
WhereTermKind::LikePrefix {
ref prefix,
upper_bound: Some(ref upper_bound),
} if prefix == "123" && upper_bound == "124"
));
assert_eq!(term.column.as_ref().unwrap().column, "name");
}
#[test]
fn test_classify_where_term_like_escape_case_stable_prefix() {
let term = like_term_with_escape("name", "123\\%%", "\\");
assert!(matches!(
term.kind,
WhereTermKind::LikePrefix {
ref prefix,
upper_bound: Some(ref upper_bound),
} if prefix == "123%" && upper_bound == "123&"
));
assert_eq!(term.column.as_ref().unwrap().column, "name");
}
#[test]
fn test_classify_where_term_like_escape_ascii_prefix_is_other() {
let term = like_term_with_escape("name", "abc\\%%", "\\");
assert!(matches!(term.kind, WhereTermKind::Other));
}
#[test]
fn test_classify_where_term_glob_prefix() {
let term = glob_term("name", "abc*");
assert!(matches!(
term.kind,
WhereTermKind::LikePrefix {
ref prefix,
upper_bound: Some(ref upper_bound),
} if prefix == "abc" && upper_bound == "abd"
));
assert_eq!(term.column.as_ref().unwrap().column, "name");
}
#[test]
fn test_classify_where_term_glob_no_prefix_is_other() {
let term = glob_term("name", "*wildcard");
assert!(matches!(term.kind, WhereTermKind::Other));
}
#[test]
fn test_classify_where_term_eq_null_is_other() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Null, Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(
matches!(term.kind, WhereTermKind::Other),
"col = NULL should be Other, got {:?}",
term.kind
);
let expr2: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Null, Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
span: Span::ZERO,
}));
let term2 = classify_where_term(expr2);
assert!(
matches!(term2.kind, WhereTermKind::Other),
"NULL = col should be Other, got {:?}",
term2.kind
);
}
#[test]
fn test_classify_where_term_rowid_aliases() {
for alias in &["_rowid_", "oid", "ROWID", "OID"] {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare(*alias), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(
matches!(term.kind, WhereTermKind::RowidEquality),
"'{alias}' should be classified as RowidEquality"
);
}
}
#[test]
fn test_classify_where_term_reversed_equality() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::Equality));
assert_eq!(term.column.as_ref().unwrap().column, "x");
}
#[test]
fn test_classify_where_term_reversed_rowid_equality() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::RowidEquality));
}
#[test]
fn test_classify_where_term_eq_no_columns_is_other() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::Other));
assert!(term.column.is_none());
}
#[test]
fn test_classify_where_term_generic_fallback() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
op: AstBinaryOp::Or,
right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::Other));
}
#[test]
fn test_classify_where_term_or_same_column_becomes_in_list() {
let term = or_eq_term("a", &[1, 2, 3]);
assert!(matches!(term.kind, WhereTermKind::InList { count: 3 }));
assert_eq!(term.column.as_ref().map(|c| c.column.as_str()), Some("a"));
}
#[test]
fn test_classify_where_term_or_reversed_equalities_becomes_in_list() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
span: Span::ZERO,
}),
op: AstBinaryOp::Or,
right: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
span: Span::ZERO,
}),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::InList { count: 2 }));
assert_eq!(term.column.as_ref().map(|c| c.column.as_str()), Some("a"));
}
#[test]
fn test_classify_where_term_or_mixed_columns_is_other() {
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
}),
op: AstBinaryOp::Or,
right: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("b"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
span: Span::ZERO,
}),
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(term.kind, WhereTermKind::Other));
}
#[test]
fn test_decompose_where_nested_and() {
let inner = Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
}),
op: AstBinaryOp::And,
right: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("b"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
span: Span::ZERO,
}),
span: Span::ZERO,
}),
op: AstBinaryOp::And,
right: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("c"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(3), Span::ZERO)),
span: Span::ZERO,
}),
span: Span::ZERO,
};
let terms = decompose_where(&inner);
assert_eq!(terms.len(), 3);
}
#[test]
fn test_decompose_where_single_term() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
};
let terms = decompose_where(&expr);
assert_eq!(terms.len(), 1);
}
#[test]
fn test_extract_glob_prefix_star_wildcard() {
let pat = Expr::Literal(Literal::String("abc*".to_owned()), Span::ZERO);
assert_eq!(extract_glob_prefix(&pat), Some("abc".to_owned()));
}
#[test]
fn test_extract_glob_prefix_rejects_non_terminal_wildcards() {
let embedded_star = Expr::Literal(Literal::String("abc*def".to_owned()), Span::ZERO);
assert_eq!(extract_glob_prefix(&embedded_star), None);
let char_class = Expr::Literal(Literal::String("abc[0-9]".to_owned()), Span::ZERO);
assert_eq!(extract_glob_prefix(&char_class), None);
}
#[test]
fn test_extract_glob_prefix_non_string_expr() {
let pat = Expr::Literal(Literal::Integer(42), Span::ZERO);
assert_eq!(extract_glob_prefix(&pat), None);
}
#[test]
fn test_extract_like_prefix_percent_wildcard() {
let pat = Expr::Literal(Literal::String("abc%".to_owned()), Span::ZERO);
assert_eq!(extract_like_prefix(&pat, None), Some("abc".to_owned()));
}
#[test]
fn test_extract_like_prefix_rejects_non_terminal_or_single_char_wildcards() {
let embedded_percent = Expr::Literal(Literal::String("abc%def".to_owned()), Span::ZERO);
assert_eq!(extract_like_prefix(&embedded_percent, None), None);
let underscore = Expr::Literal(Literal::String("abc_def".to_owned()), Span::ZERO);
assert_eq!(extract_like_prefix(&underscore, None), None);
}
#[test]
fn test_extract_like_prefix_starts_with_wildcard() {
let pat = Expr::Literal(Literal::String("%abc".to_owned()), Span::ZERO);
assert_eq!(extract_like_prefix(&pat, None), None);
let pat2 = Expr::Literal(Literal::String("_abc".to_owned()), Span::ZERO);
assert_eq!(extract_like_prefix(&pat2, None), None);
}
#[test]
fn test_extract_like_prefix_with_escape_percent_in_prefix() {
let pat = Expr::Literal(Literal::String("123\\%%".to_owned()), Span::ZERO);
let esc = Expr::Literal(Literal::String("\\".to_owned()), Span::ZERO);
assert_eq!(
extract_like_prefix(&pat, Some(&esc)),
Some("123%".to_owned())
);
}
#[test]
fn test_extract_like_prefix_with_escape_underscore_in_prefix() {
let pat = Expr::Literal(Literal::String("123!_%".to_owned()), Span::ZERO);
let esc = Expr::Literal(Literal::String("!".to_owned()), Span::ZERO);
assert_eq!(
extract_like_prefix(&pat, Some(&esc)),
Some("123_".to_owned())
);
}
#[test]
fn test_extract_like_prefix_with_invalid_escape_literal() {
let pat = Expr::Literal(Literal::String("123\\%%".to_owned()), Span::ZERO);
let esc = Expr::Literal(Literal::String("xx".to_owned()), Span::ZERO);
assert_eq!(extract_like_prefix(&pat, Some(&esc)), None);
}
#[test]
fn test_extract_like_prefix_non_string_expr() {
let pat = Expr::Literal(Literal::Integer(42), Span::ZERO);
assert_eq!(extract_like_prefix(&pat, None), None);
}
#[test]
fn test_extract_like_prefix_exact_match() {
let pat = Expr::Literal(Literal::String("abc".to_owned()), Span::ZERO);
assert_eq!(extract_like_prefix(&pat, None), None);
}
#[test]
fn test_detect_star_query_too_few_tables() {
let tables = [table_stats("t1", 100, 1000), table_stats("t2", 100, 1000)];
let terms = [join_term("t1", "id", "t2", "fk")];
assert!(!detect_star_query(&tables, &terms));
}
#[test]
fn test_mx_choice_zero_tables() {
assert_eq!(compute_mx_choice(0, false), 1);
}
#[test]
fn test_best_access_path_unique_index_equality() {
let table = table_stats("t1", 1000, 50000);
let idx = index_info("idx_pk", "t1", &["id"], true, 100);
let terms = [eq_term("id")];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(
(ap.estimated_rows - 1.0).abs() < f64::EPSILON,
"unique index equality should return 1 row, got {}",
ap.estimated_rows
);
}
#[test]
fn test_best_access_path_in_expansion() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_col", "t1", &["col"], false, 20);
let terms = [in_term("col", 3)];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(matches!(ap.kind, AccessPathKind::IndexScanEquality));
assert!(ap.index.is_some());
}
#[test]
fn test_best_access_path_like_no_index() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_name", "t1", &["name"], false, 20);
let terms = [like_term("name", "Jo%")];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(
matches!(ap.kind, AccessPathKind::FullTableScan),
"LIKE should fall back to full scan, got {:?}",
ap.kind
);
}
#[test]
fn test_best_access_path_like_case_stable_prefix_uses_index_scan() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_name", "t1", &["name"], false, 20);
let terms = [like_term("name", "123%")];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(
matches!(ap.kind, AccessPathKind::IndexScanRange { .. }),
"case-stable LIKE prefix should use index scan, got {:?}",
ap.kind
);
}
#[test]
fn test_best_access_path_like_escape_case_stable_prefix_uses_index_scan() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_name", "t1", &["name"], false, 20);
let terms = [like_term_with_escape("name", "123\\%%", "\\")];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(
matches!(ap.kind, AccessPathKind::IndexScanRange { .. }),
"escaped case-stable LIKE prefix should use index scan, got {:?}",
ap.kind
);
}
#[test]
fn test_best_access_path_glob_prefix() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_name", "t1", &["name"], false, 20);
let terms = [glob_term("name", "Jo*")];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(
matches!(
ap.kind,
AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
),
"GLOB prefix should use index scan, got {:?}",
ap.kind
);
}
#[test]
fn test_best_access_path_between_range() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_a", "t1", &["a"], false, 20);
let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
high: Box::new(Expr::Literal(Literal::Integer(100), Span::ZERO)),
not: false,
span: Span::ZERO,
}));
let term = classify_where_term(expr);
let ap = best_access_path(&table, &[idx], &[term], None);
assert!(matches!(ap.kind, AccessPathKind::IndexScanRange { .. }));
}
#[test]
fn test_best_access_path_ignores_wrong_table_index() {
let table = table_stats("t1", 100, 1000);
let idx = index_info("idx_other", "t2", &["a"], false, 20);
let terms = [eq_term("a")];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
}
#[test]
fn test_best_access_path_empty_index_columns() {
let table = table_stats("t1", 100, 1000);
let idx = IndexInfo {
name: "idx_empty".to_owned(),
table: "t1".to_owned(),
columns: vec![],
unique: false,
n_pages: 10,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
};
let terms = [eq_term("a")];
let ap = best_access_path(&table, &[idx], &terms, None);
assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
}
#[test]
fn test_estimate_skip_scan_leading_distinct() {
let idx = |n_pages: u64| index_info("idx", "t", &["a", "b"], false, n_pages);
assert_eq!(estimate_skip_scan_leading_distinct(&idx(0)), 1); assert_eq!(estimate_skip_scan_leading_distinct(&idx(7)), 1); assert_eq!(estimate_skip_scan_leading_distinct(&idx(8)), 1); assert_eq!(estimate_skip_scan_leading_distinct(&idx(24)), 3); assert_eq!(estimate_skip_scan_leading_distinct(&idx(80)), 10); }
#[test]
fn test_estimate_pairwise_hash_join_cost_left_deep_accumulation() {
assert!(estimate_pairwise_hash_join_cost(&["A".to_owned()], &HashMap::new()).abs() < 1e-9);
let empty: Vec<String> = vec![];
assert!(estimate_pairwise_hash_join_cost(&empty, &HashMap::new()).abs() < 1e-9);
let rows = |pairs: &[(&str, f64)]| -> HashMap<String, f64> {
pairs.iter().map(|&(t, n)| (t.to_owned(), n)).collect()
};
let ab = estimate_pairwise_hash_join_cost(
&["A".to_owned(), "B".to_owned()],
&rows(&[("A", 100.0), ("B", 250.0)]),
);
assert!(
(ab - 350.0).abs() < 1e-9,
"two-table cost should be 100+250, got {ab}"
);
let abc = estimate_pairwise_hash_join_cost(
&["A".to_owned(), "B".to_owned(), "C".to_owned()],
&rows(&[("A", 100.0), ("B", 250.0), ("C", 40.0)]),
);
assert!(
(abc - 6640.0).abs() < 1e-9,
"three-table cost should be 6640, got {abc}"
);
let defaulted =
estimate_pairwise_hash_join_cost(&["X".to_owned(), "Y".to_owned()], &HashMap::new());
assert!(
(defaulted - 2.0).abs() < 1e-9,
"missing rows default to 1 -> 2, got {defaulted}"
);
}
#[test]
fn test_estimate_agm_upper_bound_triangle_and_guards() {
let triangle = TrieHypergraph {
relation_variables: vec![vec![0, 1], vec![1, 2], vec![0, 2]],
variable_count: 3,
arity: 2,
};
let component = vec!["R".to_owned(), "S".to_owned(), "T".to_owned()];
let mut rows: HashMap<String, f64> = HashMap::new();
rows.insert("R".to_owned(), 100.0);
rows.insert("S".to_owned(), 100.0);
rows.insert("T".to_owned(), 100.0);
let bound = estimate_agm_upper_bound(&component, &rows, &triangle).unwrap();
assert!(
(bound - 1000.0).abs() < 1e-6,
"triangle bound should be 100^1.5 = 1000, got {bound}"
);
let two = vec!["R".to_owned(), "S".to_owned()];
assert!(estimate_agm_upper_bound(&two, &rows, &triangle).is_none());
let empty_hg = TrieHypergraph {
relation_variables: vec![],
variable_count: 0,
arity: 0,
};
let empty_component: Vec<String> = vec![];
assert!(estimate_agm_upper_bound(&empty_component, &rows, &empty_hg).is_none());
let no_rows: HashMap<String, f64> = HashMap::new();
let floored = estimate_agm_upper_bound(&component, &no_rows, &triangle).unwrap();
assert!(
(floored - 1.0).abs() < 1e-9,
"missing row counts default to 1 -> bound 1.0, got {floored}"
);
}
#[test]
fn test_best_access_path_skip_scan_on_low_cardinality_leading_column() {
let table = TableStats {
name: "users".to_owned(),
n_pages: 4_096,
n_rows: 2_000_000,
source: StatsSource::Analyze,
};
let idx = IndexInfo {
name: "idx_tenant_email".to_owned(),
table: "users".to_owned(),
columns: vec!["tenant_id".to_owned(), "email".to_owned()],
unique: false,
n_pages: 64,
source: StatsSource::Analyze,
partial_where: None,
expression_columns: vec![],
};
let ap = best_access_path(&table, &[idx], &[eq_term("email")], None);
assert_eq!(ap.index.as_deref(), Some("idx_tenant_email"));
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_skip_scan_allows_immediate_second_column_on_three_column_index() {
let table = TableStats {
name: "users".to_owned(),
n_pages: 4_096,
n_rows: 2_000_000,
source: StatsSource::Analyze,
};
let idx = IndexInfo {
name: "idx_tenant_region_email".to_owned(),
table: "users".to_owned(),
columns: vec![
"tenant_id".to_owned(),
"region_code".to_owned(),
"email".to_owned(),
],
unique: false,
n_pages: 64,
source: StatsSource::Analyze,
partial_where: None,
expression_columns: vec![],
};
let ap = best_access_path(&table, &[idx], &[eq_term("region_code")], None);
assert_eq!(ap.index.as_deref(), Some("idx_tenant_region_email"));
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_skip_scan_rejects_gapped_trailing_column() {
let table = TableStats {
name: "users".to_owned(),
n_pages: 4_096,
n_rows: 2_000_000,
source: StatsSource::Analyze,
};
let idx = IndexInfo {
name: "idx_tenant_region_email".to_owned(),
table: "users".to_owned(),
columns: vec![
"tenant_id".to_owned(),
"region_code".to_owned(),
"email".to_owned(),
],
unique: false,
n_pages: 64,
source: StatsSource::Analyze,
partial_where: None,
expression_columns: vec![],
};
let ap = best_access_path(&table, &[idx], &[eq_term("email")], None);
assert!(
matches!(ap.kind, AccessPathKind::FullTableScan),
"gapped skip-scan should fall back to full scan until multi-prefix cardinality is modeled, got {:?}",
ap.kind
);
}
#[test]
fn test_skip_scan_candidate_second_column_equality_beats_range_ordering() {
let table = TableStats {
name: "users".to_owned(),
n_pages: 4_096,
n_rows: 2_000_000,
source: StatsSource::Analyze,
};
let idx = IndexInfo {
name: "idx_tenant_email".to_owned(),
table: "users".to_owned(),
columns: vec!["tenant_id".to_owned(), "email".to_owned()],
unique: false,
n_pages: 64,
source: StatsSource::Analyze,
partial_where: None,
expression_columns: vec![],
};
let candidate =
analyze_skip_scan_candidate(&table, &idx, &[range_term("email"), eq_term("email")])
.expect("second-column equality should remain a skip-scan candidate");
assert_eq!(candidate.leading_probes, 8);
assert_eq!(candidate.trailing_probe_count, 1);
assert_eq!(candidate.per_probe_selectivity, SKIP_SCAN_EQ_SELECTIVITY);
}
#[test]
fn test_skip_scan_candidate_second_column_in_beats_range_ordering() {
let table = TableStats {
name: "users".to_owned(),
n_pages: 4_096,
n_rows: 2_000_000,
source: StatsSource::Analyze,
};
let idx = IndexInfo {
name: "idx_tenant_email".to_owned(),
table: "users".to_owned(),
columns: vec!["tenant_id".to_owned(), "email".to_owned()],
unique: false,
n_pages: 64,
source: StatsSource::Analyze,
partial_where: None,
expression_columns: vec![],
};
let candidate =
analyze_skip_scan_candidate(&table, &idx, &[range_term("email"), in_term("email", 3)])
.expect("second-column IN-list should remain a skip-scan candidate");
assert_eq!(candidate.leading_probes, 8);
assert_eq!(candidate.trailing_probe_count, 3);
assert_eq!(candidate.per_probe_selectivity, SKIP_SCAN_EQ_SELECTIVITY);
}
#[test]
fn test_skip_scan_candidate_second_column_prefers_tighter_in_probe_count() -> Result<(), String>
{
let table = TableStats {
name: "users".to_owned(),
n_pages: 4_096,
n_rows: 2_000_000,
source: StatsSource::Analyze,
};
let idx = IndexInfo {
name: "idx_tenant_email".to_owned(),
table: "users".to_owned(),
columns: ["tenant_id".to_owned(), "email".to_owned()]
.into_iter()
.collect(),
unique: false,
n_pages: 64,
source: StatsSource::Analyze,
partial_where: None,
expression_columns: Vec::new(),
};
let candidate =
analyze_skip_scan_candidate(&table, &idx, &[in_term("email", 5), in_term("email", 2)])
.ok_or_else(|| "expected skip-scan candidate".to_owned())?;
if candidate.leading_probes == 8
&& candidate.trailing_probe_count == 2
&& candidate.per_probe_selectivity == SKIP_SCAN_EQ_SELECTIVITY
{
return Ok(());
}
Err("expected tighter second-column IN probe count".to_owned())
}
#[test]
fn test_best_access_path_skip_scan_rejects_high_cardinality_leading_column() {
let table = TableStats {
name: "users".to_owned(),
n_pages: 2_000,
n_rows: 1_000_000,
source: StatsSource::Analyze,
};
let idx = IndexInfo {
name: "idx_region_email".to_owned(),
table: "users".to_owned(),
columns: vec!["region_code".to_owned(), "email".to_owned()],
unique: false,
n_pages: SKIP_SCAN_PAGES_PER_LEADING_DISTINCT * (SKIP_SCAN_MAX_LEADING_DISTINCT + 2),
source: StatsSource::Analyze,
partial_where: None,
expression_columns: vec![],
};
let ap = best_access_path(&table, &[idx], &[eq_term("email")], None);
assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
}
#[test]
fn test_best_access_path_partial_index_requires_implied_predicate() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
});
let ap_not_implied = best_access_path(
&table,
&[partial_idx.clone()],
&[eq_term_value("a", 2)],
None,
);
assert!(matches!(ap_not_implied.kind, AccessPathKind::FullTableScan));
let ap_implied = best_access_path(&table, &[partial_idx], &[eq_term_value("a", 1)], None);
assert!(matches!(
ap_implied.kind,
AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_partial_index_accepts_commuted_equality() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
});
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
span: Span::ZERO,
}));
let ap = best_access_path(&table, &[partial_idx], &[classify_where_term(expr)], None);
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_partial_index_accepts_stronger_lower_bound() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Gt,
right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
span: Span::ZERO,
});
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Gt,
right: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
span: Span::ZERO,
}));
let ap = best_access_path(&table, &[partial_idx], &[classify_where_term(expr)], None);
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_partial_index_rejects_weaker_lower_bound() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Gt,
right: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
span: Span::ZERO,
});
let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
op: AstBinaryOp::Gt,
right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
span: Span::ZERO,
}));
let ap = best_access_path(&table, &[partial_idx], &[classify_where_term(expr)], None);
assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
}
#[test]
fn test_best_access_path_partial_index_accepts_is_not_null_from_equality() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::IsNull {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
not: true,
span: Span::ZERO,
});
let ap = best_access_path(&table, &[partial_idx], &[eq_term_value("a", 7)], None);
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_partial_index_accepts_is_not_null_from_in_list() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::IsNull {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
not: true,
span: Span::ZERO,
});
let ap = best_access_path(&table, &[partial_idx], &[in_term("a", 3)], None);
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanEquality
| AccessPathKind::IndexScanRange { .. }
| AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_partial_index_accepts_is_not_null_from_like_prefix() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::IsNull {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
not: true,
span: Span::ZERO,
});
let ap = best_access_path(&table, &[partial_idx], &[like_term("a", "123%")], None);
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_partial_index_accepts_is_not_null_from_glob_prefix() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::IsNull {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
not: true,
span: Span::ZERO,
});
let ap = best_access_path(&table, &[partial_idx], &[glob_term("a", "abc*")], None);
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_partial_index_accepts_is_not_null_from_or_disjunction() {
let table = table_stats("t1", 100, 1000);
let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
partial_idx.partial_where = Some(Expr::IsNull {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
not: true,
span: Span::ZERO,
});
let ap = best_access_path(&table, &[partial_idx], &[or_eq_term("a", &[1, 2, 3])], None);
assert_eq!(ap.index.as_deref(), Some("idx_partial_a"));
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_respects_indexed_by_hint() {
let table = table_stats("t1", 2000, 100_000);
let fast = index_info("idx_fast", "t1", &["a"], false, 10);
let slow = index_info("idx_slow", "t1", &["a"], false, 600);
let terms = [eq_term("a")];
let hint = IndexHint::IndexedBy("idx_slow".to_owned());
let ap =
best_access_path_with_hints(&table, &[fast, slow], &terms, None, Some(&hint), None);
assert_eq!(ap.index.as_deref(), Some("idx_slow"));
assert!(matches!(
ap.kind,
AccessPathKind::IndexScanEquality
| AccessPathKind::IndexScanRange { .. }
| AccessPathKind::CoveringIndexScan { .. }
));
}
#[test]
fn test_best_access_path_respects_not_indexed_hint() {
let table = table_stats("t1", 1024, 50000);
let idx = index_info("idx_a", "t1", &["a"], false, 20);
let rowid_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
span: Span::ZERO,
}));
let rowid_term = classify_where_term(rowid_expr);
let hint = IndexHint::NotIndexed;
let ap =
best_access_path_with_hints(&table, &[idx], &[rowid_term], None, Some(&hint), None);
assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
assert!(ap.index.is_none());
}
#[test]
fn test_cracking_hint_store_reuses_prior_index_choice() {
let table = table_stats("t1", 1000, 50000);
let idx_a = index_info("idx_a", "t1", &["a"], false, 40);
let idx_b = index_info("idx_b", "t1", &["a"], false, 40);
let terms = [eq_term("a")];
let mut hint_store = CrackingHintStore::default();
let first = best_access_path_with_hints(
&table,
&[idx_a.clone(), idx_b.clone()],
&terms,
None,
None,
Some(&mut hint_store),
);
assert_eq!(first.index.as_deref(), Some("idx_a"));
assert_eq!(hint_store.preferred_index("t1"), Some("idx_a"));
let second = best_access_path_with_hints(
&table,
&[idx_b, idx_a],
&terms,
None,
None,
Some(&mut hint_store),
);
assert_eq!(second.index.as_deref(), Some("idx_a"));
}
#[test]
fn test_index_selection_metric_counter_advances() {
let table = table_stats("t1", 500, 10000);
let idx = index_info("idx_a", "t1", &["a"], false, 20);
let terms = [eq_term("a")];
let before = snapshot_index_selection_totals()
.get("index_scan_equality")
.copied()
.unwrap_or(0);
let _ = best_access_path(&table, &[idx], &terms, None);
let after = snapshot_index_selection_totals()
.get("index_scan_equality")
.copied()
.unwrap_or(0);
assert!(after > before);
}
#[test]
#[allow(clippy::too_many_lines)]
fn planner_index_selection_e2e_replay_emits_artifact() {
use fsqlite_ast::{JoinClause, JoinKind, JoinType};
const BEAD_ID: &str = "bd-1as.4";
const DEFAULT_SCENARIO_ID: &str = "PLANNER-INDEX-1";
const DEFAULT_SEED: u64 = 20_260_219;
let run_id =
std::env::var("RUN_ID").unwrap_or_else(|_| format!("{BEAD_ID}-seed-{DEFAULT_SEED}"));
let trace_id = std::env::var("TRACE_ID")
.ok()
.and_then(|value| value.parse::<u64>().ok())
.unwrap_or(DEFAULT_SEED);
let scenario_id =
std::env::var("SCENARIO_ID").unwrap_or_else(|_| DEFAULT_SCENARIO_ID.to_owned());
let seed = std::env::var("SEED")
.ok()
.and_then(|value| value.parse::<u64>().ok())
.unwrap_or(DEFAULT_SEED);
let artifact_path = std::env::var("FSQLITE_PLANNER_INDEX_E2E_ARTIFACT").map_or_else(
|_| {
PathBuf::from("artifacts")
.join(BEAD_ID)
.join("planner_index_selection_e2e_artifact.json")
},
PathBuf::from,
);
if let Some(parent) = artifact_path.parent() {
std::fs::create_dir_all(parent)
.expect("bead_id={BEAD_ID} artifact directory should be writable");
}
let started = Instant::now();
let mut cracking_hints = CrackingHintStore::default();
let before_metrics = snapshot_index_selection_totals();
let from = FromClause {
source: TableOrSubquery::Table {
name: QualifiedName::bare("users"),
alias: Some("u".to_owned()),
index_hint: Some(IndexHint::IndexedBy("idx_users_email".to_owned())),
time_travel: None,
},
joins: vec![JoinClause {
join_type: JoinType {
kind: JoinKind::Inner,
natural: false,
},
table: TableOrSubquery::Table {
name: QualifiedName::bare("events"),
alias: Some("e".to_owned()),
index_hint: Some(IndexHint::NotIndexed),
time_travel: None,
},
constraint: None,
}],
};
let table_hints = collect_table_index_hints(&from);
let tables = [
table_stats("users", 2_048, 120_000),
table_stats("events", 8_192, 1_200_000),
table_stats("sessions", 4_096, 900_000),
];
let indexes = [
index_info("idx_users_email", "users", &["email"], true, 120),
index_info("idx_users_id", "users", &["id"], true, 240),
index_info("idx_events_user_id", "events", &["user_id"], false, 110),
index_info(
"idx_sessions_user_id_a",
"sessions",
&["user_id"],
false,
90,
),
index_info(
"idx_sessions_user_id_b",
"sessions",
&["user_id"],
false,
90,
),
];
let where_terms = [
eq_term("email"),
eq_term("user_id"),
join_term("events", "user_id", "users", "id"),
];
let first_plan = order_joins_with_hints(
&tables[..2],
&indexes,
&where_terms,
Some(&["email".to_owned(), "user_id".to_owned()]),
&[],
Some(&table_hints),
Some(&mut cracking_hints),
);
let users_path = first_plan
.access_paths
.iter()
.find(|path| path.table.eq_ignore_ascii_case("users"))
.expect("bead_id={BEAD_ID} users path should exist");
assert_eq!(users_path.index.as_deref(), Some("idx_users_email"));
let events_path = first_plan
.access_paths
.iter()
.find(|path| path.table.eq_ignore_ascii_case("events"))
.expect("bead_id={BEAD_ID} events path should exist");
assert!(
matches!(events_path.kind, AccessPathKind::FullTableScan),
"bead_id={BEAD_ID} NOT INDEXED must force full scan for events",
);
let first_session_path = best_access_path_with_hints(
&tables[2],
&indexes[3..5],
&where_terms,
None,
None,
Some(&mut cracking_hints),
);
let second_session_path = best_access_path_with_hints(
&tables[2],
&[indexes[4].clone(), indexes[3].clone()],
&where_terms,
None,
None,
Some(&mut cracking_hints),
);
assert_eq!(
first_session_path.index.as_deref(),
second_session_path.index.as_deref(),
"bead_id={BEAD_ID} adaptive cracking hint should keep stable index preference",
);
let after_metrics = snapshot_index_selection_totals();
let metric_delta = after_metrics
.iter()
.map(|(label, after)| {
let before = before_metrics.get(label).copied().unwrap_or(0);
(label.clone(), after.saturating_sub(before))
})
.collect::<BTreeMap<_, _>>();
let elapsed_us = started.elapsed().as_micros().max(1);
let replay_command = format!(
"RUN_ID='{}' TRACE_ID={} SCENARIO_ID='{}' SEED={} FSQLITE_PLANNER_INDEX_E2E_ARTIFACT='{}' cargo test -p fsqlite-planner planner_index_selection_e2e_replay_emits_artifact -- --exact --nocapture",
run_id,
trace_id,
scenario_id,
seed,
artifact_path.display(),
);
let plan_fingerprint = blake3::hash(
format!(
"{}|{}|{}|{}|{:?}|{:?}",
first_plan.join_order.join(","),
users_path.index.clone().unwrap_or_default(),
access_path_metric_label(&events_path.kind),
second_session_path.index.clone().unwrap_or_default(),
first_session_path.kind,
second_session_path.kind,
)
.as_bytes(),
)
.to_hex()
.to_string();
let artifact = serde_json::json!({
"bead_id": BEAD_ID,
"run_id": run_id,
"trace_id": trace_id,
"scenario_id": scenario_id,
"seed": seed,
"overall_status": "pass",
"timing": {
"selection_elapsed_us": elapsed_us,
},
"checks": [
{
"id": "indexed_by_respected",
"status": "pass",
"detail": "users path honors INDEXED BY idx_users_email"
},
{
"id": "not_indexed_respected",
"status": "pass",
"detail": "events path honors NOT INDEXED by forcing full scan"
},
{
"id": "adaptive_hint_reuse",
"status": "pass",
"detail": "sessions path reuses prior cracking hint under candidate reordering"
}
],
"metric_delta": metric_delta,
"plan_fingerprint_blake3": plan_fingerprint,
"observability": {
"required_fields": [
"run_id",
"trace_id",
"scenario_id",
"selection_elapsed_us",
"table",
"chosen_index",
"index_type",
"candidates"
],
"event_name": "planner.index_select.choice"
},
"replay_command": replay_command,
});
let artifact_bytes = serde_json::to_vec_pretty(&artifact)
.expect("bead_id={BEAD_ID} artifact serialization should succeed");
std::fs::write(&artifact_path, artifact_bytes)
.expect("bead_id={BEAD_ID} artifact write should succeed");
assert!(
artifact_path.exists(),
"bead_id={BEAD_ID} e2e artifact path should exist"
);
}
#[test]
fn test_index_usability_between_on_leftmost() {
let idx = index_info("idx_a", "t1", &["a"], false, 50);
let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
high: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
not: false,
span: Span::ZERO,
}));
let term = classify_where_term(expr);
assert!(matches!(
analyze_index_usability(&idx, &[term]),
IndexUsability::Range { .. }
));
}
#[test]
fn test_where_term_kind_equality() {
assert_eq!(WhereTermKind::Equality, WhereTermKind::Equality);
assert_eq!(WhereTermKind::Range, WhereTermKind::Range);
assert_eq!(WhereTermKind::Between, WhereTermKind::Between);
assert_eq!(
WhereTermKind::InList { count: 3 },
WhereTermKind::InList { count: 3 }
);
assert_ne!(
WhereTermKind::InList { count: 3 },
WhereTermKind::InList { count: 5 }
);
assert_eq!(
WhereTermKind::LikePrefix {
prefix: "abc".to_owned(),
upper_bound: Some("abd".to_owned()),
},
WhereTermKind::LikePrefix {
prefix: "abc".to_owned(),
upper_bound: Some("abd".to_owned()),
}
);
assert_ne!(WhereTermKind::Equality, WhereTermKind::Range);
}
#[test]
fn test_where_column_equality() {
let wc1 = WhereColumn {
table: Some("t".to_owned()),
column: "a".to_owned(),
};
let wc2 = WhereColumn {
table: Some("t".to_owned()),
column: "a".to_owned(),
};
let wc3 = WhereColumn {
table: None,
column: "a".to_owned(),
};
assert_eq!(wc1, wc2);
assert_ne!(wc1, wc3);
}
#[test]
fn test_stats_source_equality() {
assert_eq!(StatsSource::Analyze, StatsSource::Analyze);
assert_eq!(StatsSource::Heuristic, StatsSource::Heuristic);
assert_ne!(StatsSource::Analyze, StatsSource::Heuristic);
}
#[test]
fn test_cost_minimum_page_clamp() {
let cost = estimate_cost(&AccessPathKind::FullTableScan, 0, 0);
assert!(
(cost - 1.0).abs() < f64::EPSILON,
"0 pages should clamp to 1"
);
let cost = estimate_cost(&AccessPathKind::RowidLookup, 0, 0);
assert!(
(cost - 0.0).abs() < f64::EPSILON,
"log2(1) = 0.0 for clamped 0 pages"
);
}
mod proptest_planner {
use super::*;
use fsqlite_ast::{
ColumnRef, Distinctness, Expr, Literal, OrderingTerm, ResultColumn, SelectBody,
SelectCore, Span,
};
use proptest::prelude::*;
fn arb_table_stats() -> BoxedStrategy<TableStats> {
(
prop::string::string_regex("[a-z][a-z0-9]{0,5}").expect("valid regex"),
1u64..10_000,
1u64..1_000_000,
)
.prop_map(|(name, n_pages, n_rows)| TableStats {
name,
n_pages,
n_rows,
source: StatsSource::Heuristic,
})
.boxed()
}
#[allow(dead_code)]
fn arb_index_info(table_name: String) -> BoxedStrategy<IndexInfo> {
(
prop::string::string_regex("idx_[a-z]{1,4}").expect("valid regex"),
proptest::collection::vec(
prop::string::string_regex("[a-z]{1,4}").expect("valid regex"),
1..4,
),
any::<bool>(),
1u64..5_000,
)
.prop_map(move |(name, columns, unique, n_pages)| IndexInfo {
name,
table: table_name.clone(),
columns,
unique,
n_pages,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
})
.boxed()
}
fn arb_selectivity() -> BoxedStrategy<f64> {
(1u32..1000).prop_map(|n| f64::from(n) / 1000.0).boxed()
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(1000))]
#[test]
fn test_cost_non_negative(
table_pages in 0u64..100_000,
index_pages in 0u64..100_000,
selectivity in arb_selectivity(),
) {
let kinds = [
AccessPathKind::FullTableScan,
AccessPathKind::IndexScanEquality,
AccessPathKind::RowidLookup,
AccessPathKind::IndexScanRange { selectivity },
AccessPathKind::CoveringIndexScan { selectivity },
];
for kind in &kinds {
let cost = estimate_cost(kind, table_pages, index_pages);
prop_assert!(
cost >= 0.0,
"cost must be non-negative, got {cost} for {kind:?} \
(table_pages={table_pages}, index_pages={index_pages})"
);
prop_assert!(
cost.is_finite(),
"cost must be finite, got {cost} for {kind:?}"
);
}
}
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(500))]
#[test]
fn test_cost_hierarchy(
table_pages in 10u64..100_000,
index_pages in 2u64..10_000,
) {
let rowid_cost = estimate_cost(
&AccessPathKind::RowidLookup,
table_pages,
index_pages,
);
let eq_cost = estimate_cost(
&AccessPathKind::IndexScanEquality,
table_pages,
index_pages,
);
let full_cost = estimate_cost(
&AccessPathKind::FullTableScan,
table_pages,
index_pages,
);
prop_assert!(
rowid_cost <= eq_cost + f64::EPSILON,
"rowid lookup ({rowid_cost}) should be ≤ index equality ({eq_cost}) \
for table_pages={table_pages}, index_pages={index_pages}"
);
if index_pages <= table_pages {
prop_assert!(
eq_cost <= full_cost + f64::EPSILON,
"index equality ({eq_cost}) should be ≤ full scan ({full_cost}) \
for table_pages={table_pages}, index_pages={index_pages}"
);
}
}
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(500))]
#[test]
fn test_cost_selectivity_monotonic(
table_pages in 10u64..100_000,
index_pages in 2u64..10_000,
s1 in 1u32..500,
s2 in 500u32..1000,
) {
let sel_low = f64::from(s1) / 1000.0;
let sel_high = f64::from(s2) / 1000.0;
let cost_low = estimate_cost(
&AccessPathKind::IndexScanRange { selectivity: sel_low },
table_pages,
index_pages,
);
let cost_high = estimate_cost(
&AccessPathKind::IndexScanRange { selectivity: sel_high },
table_pages,
index_pages,
);
prop_assert!(
cost_low <= cost_high + f64::EPSILON,
"lower selectivity ({sel_low}) should have lower cost ({cost_low}) \
than higher selectivity ({sel_high}) cost ({cost_high})"
);
}
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(200))]
#[test]
fn test_join_order_determinism(
stats1 in arb_table_stats(),
stats2 in arb_table_stats(),
) {
let s1 = stats1;
let mut s2 = stats2;
if s1.name == s2.name {
s2.name = format!("{}_b", s2.name);
}
let tables = [s1, s2];
let empty_indexes: Vec<IndexInfo> = vec![];
let empty_terms: Vec<WhereTerm<'_>> = vec![];
let empty_cross: Vec<(String, String)> = vec![];
let plan_a = order_joins(
&tables,
&empty_indexes,
&empty_terms,
None,
&empty_cross,
);
let plan_b = order_joins(
&tables,
&empty_indexes,
&empty_terms,
None,
&empty_cross,
);
prop_assert_eq!(
plan_a.join_order,
plan_b.join_order,
"join order should be deterministic"
);
prop_assert!(
(plan_a.total_cost - plan_b.total_cost).abs() < f64::EPSILON,
"total cost should be deterministic: {:.6} vs {:.6}",
plan_a.total_cost,
plan_b.total_cost,
);
}
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(300))]
#[test]
fn test_index_never_increases_cost(
stats in arb_table_stats(),
) {
let table = stats;
let empty_terms: Vec<WhereTerm<'_>> = vec![];
let no_index_path = best_access_path(
&table,
&[],
&empty_terms,
None,
);
let idx = IndexInfo {
name: "idx_test".to_string(),
table: table.name.clone(),
columns: vec!["col_a".to_string()],
unique: false,
n_pages: table.n_pages / 5 + 1,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
};
let with_index_path = best_access_path(
&table,
&[idx],
&empty_terms,
None,
);
prop_assert!(
with_index_path.estimated_cost <= no_index_path.estimated_cost + f64::EPSILON,
"adding an index should not increase cost: \
without={:.2}, with={:.2}",
no_index_path.estimated_cost,
with_index_path.estimated_cost,
);
}
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(200))]
#[test]
fn test_order_by_resolution_deterministic(
ncols in 1usize..5,
order_idx in 1usize..5,
) {
let cols: Vec<ResultColumn> = (0..ncols)
.map(|i| ResultColumn::Expr {
expr: Expr::Column(
ColumnRef::bare(format!("c{i}")),
Span::ZERO,
),
alias: Some(format!("a{i}")),
})
.collect();
let core = SelectCore::Select {
distinct: Distinctness::All,
columns: cols,
from: None,
where_clause: None,
group_by: vec![],
having: None,
windows: vec![],
};
let valid_idx = (order_idx % ncols) + 1;
let order_term = OrderingTerm {
expr: Expr::Literal(
Literal::Integer(i64::try_from(valid_idx).unwrap_or(1)),
Span::ZERO,
),
direction: None,
nulls: None,
};
let body = SelectBody {
select: core,
compounds: vec![],
};
let result1 = resolve_compound_order_by(
&body,
std::slice::from_ref(&order_term),
);
let result2 = resolve_compound_order_by(
&body,
std::slice::from_ref(&order_term),
);
prop_assert_eq!(
result1, result2,
"ORDER BY resolution should be deterministic"
);
}
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(500))]
#[test]
fn test_full_scan_linear_scaling(
pages in 1u64..100_000,
multiplier in 2u64..10,
) {
let cost_base = estimate_cost(
&AccessPathKind::FullTableScan,
pages,
0,
);
let cost_scaled = estimate_cost(
&AccessPathKind::FullTableScan,
pages * multiplier,
0,
);
let expected_ratio = multiplier as f64;
let actual_ratio = cost_scaled / cost_base;
prop_assert!(
(actual_ratio - expected_ratio).abs() < 0.01,
"full scan cost should scale linearly: \
expected ratio {expected_ratio}, got {actual_ratio}"
);
}
}
}
#[test]
fn test_cost_estimates_metric_increments() {
reset_cost_metrics();
let before = cost_metrics_snapshot();
let _ = estimate_cost(&AccessPathKind::FullTableScan, 100, 0);
let _ = estimate_cost(&AccessPathKind::RowidLookup, 100, 0);
let after = cost_metrics_snapshot();
assert!(
after.fsqlite_planner_cost_estimates_total
>= before.fsqlite_planner_cost_estimates_total + 2
);
}
#[test]
fn test_estimation_error_recording() {
reset_cost_metrics();
record_estimation_error(100.0, 50.0); record_estimation_error(10.0, 100.0); record_estimation_error(50.0, 50.0);
let snap = cost_metrics_snapshot();
assert_eq!(snap.error_ratio_buckets[0], 1); assert_eq!(snap.error_ratio_buckets[2], 1); assert_eq!(snap.error_ratio_buckets[3], 1); assert!(snap.error_ratio_mean.is_finite());
}
#[test]
fn test_asymmetric_loss_underestimate_penalized_more() {
let loss_under = asymmetric_estimation_loss(100.0, 200.0);
let loss_over = asymmetric_estimation_loss(100.0, 50.0);
assert!(
loss_under > loss_over,
"underestimate loss ({loss_under}) should exceed overestimate loss ({loss_over})"
);
}
#[test]
fn test_asymmetric_loss_perfect_estimate() {
let loss = asymmetric_estimation_loss(100.0, 100.0);
assert!((loss - 0.0).abs() < 1e-10);
}
#[test]
fn test_asymmetric_loss_degenerate() {
let loss = asymmetric_estimation_loss(0.0, 50.0);
assert!((loss - 50.0).abs() < 1e-10);
}
#[test]
fn test_asymmetric_loss_quadratic_under_linear_over() {
let loss = asymmetric_estimation_loss;
let approx = |a: f64, b: f64| (a - b).abs() < 1e-9;
assert!(approx(loss(100.0, 75.0), 0.25));
assert!(approx(loss(100.0, 50.0), 0.5));
assert!(approx(loss(100.0, 25.0), 0.75));
assert!(approx(loss(100.0, 0.0), 1.0));
assert!(approx(
loss(100.0, 50.0) - loss(100.0, 75.0),
loss(100.0, 25.0) - loss(100.0, 50.0)
));
let base = loss(100.0, 200.0); assert!(base > 0.0);
assert!(approx(loss(100.0, 300.0), 4.0 * base)); assert!(approx(loss(100.0, 500.0), 16.0 * base));
assert!(
loss(100.0, 250.0) > loss(100.0, 200.0),
"underestimate loss grows with ratio"
);
assert!(
loss(100.0, 25.0) > loss(100.0, 50.0),
"overestimate loss grows as estimate worsens"
);
}
#[test]
fn test_dpccp_two_tables() {
let tables = vec![
TableStats {
name: "a".to_owned(),
n_pages: 10,
n_rows: 100,
source: StatsSource::Heuristic,
},
TableStats {
name: "b".to_owned(),
n_pages: 20,
n_rows: 200,
source: StatsSource::Heuristic,
},
];
let indexes = vec![];
let where_terms = vec![];
let (order, cost, plans, _pruned) =
dpccp_order_joins(&tables, &indexes, &where_terms, None, None, &[], None)
.expect("2-table exhaustive plan should exist");
assert_eq!(order.len(), 2);
assert!(cost > 0.0);
assert!(plans >= 2); }
#[test]
fn test_dpccp_three_tables() {
let tables = vec![
TableStats {
name: "x".to_owned(),
n_pages: 5,
n_rows: 50,
source: StatsSource::Heuristic,
},
TableStats {
name: "y".to_owned(),
n_pages: 100,
n_rows: 1000,
source: StatsSource::Heuristic,
},
TableStats {
name: "z".to_owned(),
n_pages: 10,
n_rows: 100,
source: StatsSource::Heuristic,
},
];
let indexes = vec![];
let where_terms = vec![];
let (order, cost, plans, _pruned) =
dpccp_order_joins(&tables, &indexes, &where_terms, None, None, &[], None)
.expect("3-table exhaustive plan should exist");
assert_eq!(order.len(), 3);
assert!(cost > 0.0);
assert!(plans > 3); assert_eq!(order[0], 0); }
#[test]
fn test_dpccp_respects_cross_join_constraint() {
let tables = vec![
TableStats {
name: "t1".to_owned(),
n_pages: 100,
n_rows: 10_000,
source: StatsSource::Heuristic,
},
TableStats {
name: "t2".to_owned(),
n_pages: 1,
n_rows: 10,
source: StatsSource::Heuristic,
},
];
let (order, _cost, _plans, _pruned) = dpccp_order_joins(
&tables,
&[],
&[],
None,
None,
&[("t1".to_owned(), "t2".to_owned())],
None,
)
.expect("cross-join constrained exhaustive plan should exist");
assert_eq!(order, vec![0, 1], "CROSS JOIN should force t1 before t2");
}
#[test]
fn test_order_joins_five_tables_uses_exhaustive_search() {
reset_plans_enumerated();
let tables = (0..5)
.map(|i| TableStats {
name: format!("t{i}"),
n_pages: 10,
n_rows: 100,
source: StatsSource::Heuristic,
})
.collect::<Vec<_>>();
let plan = order_joins(&tables, &[], &[], None, &[]);
assert_eq!(plan.join_order.len(), 5);
let enumerated = plans_enumerated_total();
assert!(
enumerated > 10,
"5-table beam search should enumerate well beyond greedy-width-1 bounds, got {enumerated}"
);
}
#[test]
fn test_dpccp_branch_and_bound_prunes_high_cost_branches() {
let tables = vec![
TableStats {
name: "tiny".to_owned(),
n_pages: 1,
n_rows: 1,
source: StatsSource::Heuristic,
},
TableStats {
name: "small".to_owned(),
n_pages: 2,
n_rows: 2,
source: StatsSource::Heuristic,
},
TableStats {
name: "huge_a".to_owned(),
n_pages: 10_000,
n_rows: 10_000,
source: StatsSource::Heuristic,
},
TableStats {
name: "huge_b".to_owned(),
n_pages: 20_000,
n_rows: 20_000,
source: StatsSource::Heuristic,
},
TableStats {
name: "huge_c".to_owned(),
n_pages: 30_000,
n_rows: 30_000,
source: StatsSource::Heuristic,
},
];
let (_order, _cost, _plans, pruned) =
dpccp_order_joins(&tables, &[], &[], None, None, &[], None)
.expect("5-table exhaustive plan should exist");
assert!(pruned > 0, "expected branch-and-bound pruning to occur");
}
#[test]
fn test_order_joins_large_join_uses_greedy_width() {
reset_plans_enumerated();
let tables = (0..10)
.map(|i| TableStats {
name: format!("t{i}"),
n_pages: (i as u64 + 1) * 10,
n_rows: (i as u64 + 1) * 100,
source: StatsSource::Heuristic,
})
.collect::<Vec<_>>();
let plan = order_joins(&tables, &[], &[], None, &[]);
assert_eq!(plan.join_order.len(), 10);
let enumerated = plans_enumerated_total();
assert!(
enumerated <= 800,
"greedy-width search should keep enumeration bounded for 10-table joins, got {enumerated}"
);
}
#[test]
fn test_plans_enumerated_metric() {
reset_plans_enumerated();
let before = plans_enumerated_total();
let tables = vec![
TableStats {
name: "t1".to_owned(),
n_pages: 10,
n_rows: 100,
source: StatsSource::Heuristic,
},
TableStats {
name: "t2".to_owned(),
n_pages: 20,
n_rows: 200,
source: StatsSource::Heuristic,
},
];
let _ = order_joins(&tables, &[], &[], None, &[]);
let after = plans_enumerated_total();
assert!(after > before);
}
#[test]
fn test_pushdown_qualified_predicate() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Column(
ColumnRef::qualified("users", "id"),
Span::ZERO,
)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
};
let term = classify_where_term(&expr);
let terms = [term];
let table_names = vec!["users".to_owned(), "orders".to_owned()];
let (pushed, remaining) = pushdown_predicates(&terms, &table_names);
assert_eq!(pushed.len(), 1);
assert_eq!(pushed[0].table, "users");
assert!(remaining.is_empty());
}
#[test]
fn test_pushdown_single_table_unqualified() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
op: AstBinaryOp::Gt,
right: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
span: Span::ZERO,
};
let term = classify_where_term(&expr);
let terms = [term];
let table_names = vec!["users".to_owned()];
let (pushed, remaining) = pushdown_predicates(&terms, &table_names);
assert_eq!(pushed.len(), 1);
assert!(remaining.is_empty());
}
#[test]
fn test_pushdown_unqualified_multi_table_stays() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
op: AstBinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
span: Span::ZERO,
};
let term = classify_where_term(&expr);
let terms = [term];
let table_names = vec!["users".to_owned(), "orders".to_owned()];
let (pushed, remaining) = pushdown_predicates(&terms, &table_names);
assert!(pushed.is_empty());
assert_eq!(remaining.len(), 1);
}
#[test]
fn test_fold_literal() {
let expr = Expr::Literal(Literal::Integer(42), Span::ZERO);
assert_eq!(
try_constant_fold(&expr),
FoldResult::Literal(Literal::Integer(42))
);
}
#[test]
fn test_fold_addition() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
op: fsqlite_ast::BinaryOp::Add,
right: Box::new(Expr::Literal(Literal::Integer(32), Span::ZERO)),
span: Span::ZERO,
};
assert_eq!(
try_constant_fold(&expr),
FoldResult::Literal(Literal::Integer(42))
);
}
#[test]
fn test_fold_division_by_zero() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
op: fsqlite_ast::BinaryOp::Divide,
right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
span: Span::ZERO,
};
assert_eq!(try_constant_fold(&expr), FoldResult::Literal(Literal::Null));
}
#[test]
fn test_fold_negation() {
let expr = Expr::UnaryOp {
op: fsqlite_ast::UnaryOp::Negate,
expr: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
span: Span::ZERO,
};
assert_eq!(
try_constant_fold(&expr),
FoldResult::Literal(Literal::Integer(-5))
);
}
#[test]
fn test_fold_column_ref_not_constant() {
let expr = Expr::Column(ColumnRef::bare("id"), Span::ZERO);
assert_eq!(try_constant_fold(&expr), FoldResult::NotConstant);
}
#[test]
fn test_fold_comparison() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
op: fsqlite_ast::BinaryOp::Lt,
right: Box::new(Expr::Literal(Literal::Integer(20), Span::ZERO)),
span: Span::ZERO,
};
assert_eq!(try_constant_fold(&expr), FoldResult::Literal(Literal::True));
}
#[test]
fn test_fold_nested_expression() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Literal(Literal::Integer(3), Span::ZERO)),
op: fsqlite_ast::BinaryOp::Add,
right: Box::new(Expr::Literal(Literal::Integer(4), Span::ZERO)),
span: Span::ZERO,
}),
op: fsqlite_ast::BinaryOp::Multiply,
right: Box::new(Expr::Literal(Literal::Integer(6), Span::ZERO)),
span: Span::ZERO,
};
assert_eq!(
try_constant_fold(&expr),
FoldResult::Literal(Literal::Integer(42))
);
}
#[test]
fn test_query_planner_cache_hit_matches_uncached_join_plan() {
let tables = vec![
TableStats {
name: "small".to_owned(),
n_pages: 4,
n_rows: 40,
source: StatsSource::Heuristic,
},
TableStats {
name: "large".to_owned(),
n_pages: 40,
n_rows: 4_000,
source: StatsSource::Heuristic,
},
];
let uncached = order_joins(&tables, &[], &[], None, &[]);
let mut planner = QueryPlanner::default();
let sql_template = "SELECT * FROM small JOIN large ON small.id = large.small_id";
let first = planner.order_joins_with_cache(
sql_template,
7,
&tables,
&[],
&[],
None,
&[],
None,
None,
PlannerFeatureFlags::default(),
);
let second = planner.order_joins_with_cache(
sql_template,
7,
&tables,
&[],
&[],
None,
&[],
None,
None,
PlannerFeatureFlags::default(),
);
assert_eq!(*first, uncached);
assert_eq!(*second, uncached);
assert!(Rc::ptr_eq(&first, &second));
assert_eq!(planner.plan_cache_len(), 1);
}
#[test]
fn test_query_planner_cache_separates_generic_and_join_entries() {
let tables = vec![TableStats {
name: "users".to_owned(),
n_pages: 16,
n_rows: 1_000,
source: StatsSource::Heuristic,
}];
let sql_template = "SELECT * FROM users WHERE id = ?1";
let schema_cookie = 31;
let mut planner = QueryPlanner::default();
let generic = planner.cached_plan(sql_template, schema_cookie, || {
sample_cached_query_plan("generic-sentinel")
});
let join_plan = planner.order_joins_with_cache(
sql_template,
schema_cookie,
&tables,
&[],
&[],
None,
&[],
None,
None,
PlannerFeatureFlags::default(),
);
assert_eq!(generic.join_order, vec!["generic-sentinel".to_owned()]);
assert_eq!(join_plan.join_order, vec!["users".to_owned()]);
assert!(
!Rc::ptr_eq(&generic, &join_plan),
"generic cached_plan entries and join-order cache entries must not alias"
);
assert_eq!(planner.plan_cache_len(), 2);
}
#[test]
fn test_query_planner_cache_invalidates_all_entries_on_schema_cookie_change() {
let mut planner = QueryPlanner::default();
let build_count = Cell::new(0);
let plan_a = planner.cached_plan("SELECT * FROM t1", 11, || {
build_count.set(build_count.get() + 1);
sample_cached_query_plan("t1-v11")
});
let _plan_b = planner.cached_plan("SELECT * FROM t2", 11, || {
build_count.set(build_count.get() + 1);
sample_cached_query_plan("t2-v11")
});
assert_eq!(planner.plan_cache_len(), 2);
let rebuilt_plan_a = planner.cached_plan("SELECT * FROM t1", 12, || {
build_count.set(build_count.get() + 1);
sample_cached_query_plan("t1-v12")
});
assert_eq!(build_count.get(), 3);
assert_eq!(planner.plan_cache_len(), 1);
assert_eq!(rebuilt_plan_a.join_order, vec!["t1-v12".to_owned()]);
assert!(
!Rc::ptr_eq(&plan_a, &rebuilt_plan_a),
"schema cookie change must discard prior Rc<QueryPlan> entries"
);
}
#[test]
fn test_query_planner_cache_lru_eviction_at_capacity() {
let mut planner = QueryPlanner::default();
let schema_cookie = 21;
for idx in 0..DEFAULT_PLAN_CACHE_CAPACITY {
let sql = format!("SELECT * FROM cached_table WHERE id = ?{idx}");
let _ = planner.cached_plan(&sql, schema_cookie, || sample_cached_query_plan(&sql));
}
assert_eq!(planner.plan_cache_len(), DEFAULT_PLAN_CACHE_CAPACITY);
let hottest_sql = "SELECT * FROM cached_table WHERE id = ?0";
let hottest_plan = planner.cached_plan(hottest_sql, schema_cookie, || {
panic!("expected hottest cache entry to already exist")
});
for _ in 0..4 {
let hottest_plan_again = planner.cached_plan(hottest_sql, schema_cookie, || {
panic!("expected hottest entry to stay hot across repeated direct hits")
});
assert!(Rc::ptr_eq(&hottest_plan, &hottest_plan_again));
}
let cold_key = plan_cache_key("SELECT * FROM cached_table WHERE id = ?1", schema_cookie);
let hot_key = plan_cache_key(hottest_sql, schema_cookie);
let _ = planner.cached_plan(
"SELECT * FROM cached_table WHERE id = ?overflow",
schema_cookie,
|| sample_cached_query_plan("overflow"),
);
assert_eq!(planner.plan_cache_len(), DEFAULT_PLAN_CACHE_CAPACITY);
assert!(
planner.plan_cache.iter().any(|(key, _)| *key == hot_key),
"re-accessed entry should remain resident after LRU eviction"
);
assert!(
!planner.plan_cache.iter().any(|(key, _)| *key == cold_key),
"least-recently-used entry should be evicted at capacity"
);
let hottest_plan_again = planner.cached_plan(hottest_sql, schema_cookie, || {
panic!("expected hottest entry to survive eviction")
});
assert!(Rc::ptr_eq(&hottest_plan, &hottest_plan_again));
}
#[test]
fn test_query_planner_cache_separates_feature_flag_variants() {
let tables = [
table_stats("a", 1024, 1_000_000),
table_stats("b", 1024, 1_000_000),
table_stats("c", 1024, 1_000_000),
];
let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
let sql_template = "SELECT * FROM a JOIN b ON a.k = b.k JOIN c ON b.k = c.k";
let mut planner = QueryPlanner::default();
let hash_only = planner.order_joins_with_cache(
sql_template,
7,
&tables,
&[],
&terms,
None,
&[],
None,
None,
PlannerFeatureFlags::default(),
);
let leapfrog = planner.order_joins_with_cache(
sql_template,
7,
&tables,
&[],
&terms,
None,
&[],
None,
None,
PlannerFeatureFlags {
leapfrog_join: true,
..PlannerFeatureFlags::default()
},
);
assert!(
hash_only
.join_segments
.iter()
.all(|segment| segment.operator == JoinOperator::HashJoin),
"disabled feature flag should keep hash-only plan: {:?}",
hash_only.join_segments
);
assert!(
leapfrog
.join_segments
.iter()
.any(|segment| segment.operator == JoinOperator::LeapfrogTriejoin),
"enabled feature flag should allow leapfrog routing: {:?}",
leapfrog.join_segments
);
assert!(
!Rc::ptr_eq(&hash_only, &leapfrog),
"feature-flag variants must not alias the same cached Rc<QueryPlan>"
);
assert_eq!(planner.plan_cache_len(), 2);
}
#[test]
fn test_query_planner_cache_bypasses_adaptive_cracking_hints() {
let tables = [table_stats("t1", 256, 20_000)];
let indexes = [
IndexInfo {
name: "idx_a".to_owned(),
table: "t1".to_owned(),
columns: vec!["a".to_owned()],
unique: false,
n_pages: 16,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
},
IndexInfo {
name: "idx_b".to_owned(),
table: "t1".to_owned(),
columns: vec!["a".to_owned()],
unique: false,
n_pages: 12,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
},
];
let terms = [eq_term("a")];
let sql_template = "SELECT * FROM t1 WHERE a = ?1";
let mut planner = QueryPlanner::default();
let mut first_hints = CrackingHintStore::default();
first_hints.record_access_path(&AccessPath {
table: "t1".to_owned(),
kind: AccessPathKind::IndexScanEquality,
index: Some("idx_a".to_owned()),
estimated_cost: 1.0,
estimated_rows: 1.0,
time_travel: None,
probe: None,
});
let first = planner.order_joins_with_cache(
sql_template,
5,
&tables,
&indexes,
&terms,
None,
&[],
None,
Some(&mut first_hints),
PlannerFeatureFlags::default(),
);
let mut second_hints = CrackingHintStore::default();
second_hints.record_access_path(&AccessPath {
table: "t1".to_owned(),
kind: AccessPathKind::IndexScanEquality,
index: Some("idx_b".to_owned()),
estimated_cost: 1.0,
estimated_rows: 1.0,
time_travel: None,
probe: None,
});
let second = planner.order_joins_with_cache(
sql_template,
5,
&tables,
&indexes,
&terms,
None,
&[],
None,
Some(&mut second_hints),
PlannerFeatureFlags::default(),
);
assert_eq!(first.access_paths[0].index.as_deref(), Some("idx_a"));
assert_eq!(second.access_paths[0].index.as_deref(), Some("idx_b"));
assert_eq!(planner.plan_cache_len(), 0);
assert!(!Rc::ptr_eq(&first, &second));
}
}
#[test]
fn test_join_order_returns_each_table_once() {
let tables = vec![
TableStats {
name: "nation".to_owned(),
n_pages: 1,
n_rows: 25,
source: StatsSource::Analyze,
},
TableStats {
name: "region".to_owned(),
n_pages: 1,
n_rows: 5,
source: StatsSource::Analyze,
},
TableStats {
name: "supplier".to_owned(),
n_pages: 100,
n_rows: 10_000,
source: StatsSource::Analyze,
},
TableStats {
name: "customer".to_owned(),
n_pages: 500,
n_rows: 150_000,
source: StatsSource::Analyze,
},
TableStats {
name: "orders".to_owned(),
n_pages: 2000,
n_rows: 1_500_000,
source: StatsSource::Analyze,
},
TableStats {
name: "lineitem".to_owned(),
n_pages: 8000,
n_rows: 6_000_000,
source: StatsSource::Analyze,
},
];
let plan = order_joins(&tables, &[], &[], None, &[]);
assert_eq!(plan.join_order.len(), tables.len());
let join_order: HashSet<_> = plan.join_order.iter().collect();
assert_eq!(join_order.len(), tables.len());
for table in &tables {
assert!(plan.join_order.iter().any(|name| name == &table.name));
}
}
#[cfg(test)]
mod probe_tests {
use super::*;
use fsqlite_ast::{BinaryOp as AstBinaryOp, ColumnRef, Expr, Literal, Span};
fn col(name: &str) -> Box<Expr> {
Box::new(Expr::Column(ColumnRef::bare(name), Span::ZERO))
}
fn lit_int(v: i64) -> Box<Expr> {
Box::new(Expr::Literal(Literal::Integer(v), Span::ZERO))
}
fn eq_expr(col_name: &str, val: i64) -> Expr {
Expr::BinaryOp {
left: col(col_name),
op: AstBinaryOp::Eq,
right: lit_int(val),
span: Span::ZERO,
}
}
#[test]
fn extract_probe_rowid_equality() {
let expr = eq_expr("rowid", 42);
let terms = [WhereTerm {
expr: &expr,
column: Some(WhereColumn {
table: None,
column: "rowid".to_owned(),
}),
kind: WhereTermKind::RowidEquality,
}];
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::RowidLookup,
index: None,
estimated_cost: 1.0,
estimated_rows: 1.0,
time_travel: None,
probe: None,
};
let probe = extract_access_path_probe_with_rowid_aliases(&ap, &[], &terms, &[]);
assert!(
matches!(&probe, Some(AccessPathProbe::RowidEquality { target }) if **target == Expr::Literal(Literal::Integer(42), Span::ZERO))
);
}
#[test]
fn extract_probe_index_equality() {
let expr = eq_expr("name", 7);
let terms = [WhereTerm {
expr: &expr,
column: Some(WhereColumn {
table: None,
column: "name".to_owned(),
}),
kind: WhereTermKind::Equality,
}];
let indexes = [IndexInfo {
name: "idx_name".to_owned(),
table: "t".to_owned(),
columns: vec!["name".to_owned()],
unique: false,
n_pages: 1,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
}];
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::IndexScanEquality,
index: Some("idx_name".to_owned()),
estimated_cost: 5.0,
estimated_rows: 1.0,
time_travel: None,
probe: None,
};
let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
match &probe {
Some(AccessPathProbe::Equality { column, target }) => {
assert_eq!(column, "name");
assert_eq!(**target, Expr::Literal(Literal::Integer(7), Span::ZERO));
}
other => panic!("expected Equality probe, got {other:?}"),
}
}
#[test]
fn extract_probe_index_range() {
let gt_expr = Expr::BinaryOp {
left: col("age"),
op: AstBinaryOp::Gt,
right: lit_int(18),
span: Span::ZERO,
};
let lt_expr = Expr::BinaryOp {
left: col("age"),
op: AstBinaryOp::Le,
right: lit_int(65),
span: Span::ZERO,
};
let terms = [
WhereTerm {
expr: >_expr,
column: Some(WhereColumn {
table: None,
column: "age".to_owned(),
}),
kind: WhereTermKind::Range,
},
WhereTerm {
expr: <_expr,
column: Some(WhereColumn {
table: None,
column: "age".to_owned(),
}),
kind: WhereTermKind::Range,
},
];
let indexes = [IndexInfo {
name: "idx_age".to_owned(),
table: "t".to_owned(),
columns: vec!["age".to_owned()],
unique: false,
n_pages: 1,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
}];
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::IndexScanRange { selectivity: 0.5 },
index: Some("idx_age".to_owned()),
estimated_cost: 50.0,
estimated_rows: 100.0,
time_travel: None,
probe: None,
};
let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
match &probe {
Some(AccessPathProbe::Range {
column,
lower,
upper,
}) => {
assert_eq!(column, "age");
let (lo_expr, lo_inc) = lower.as_ref().expect("expected lower bound");
assert_eq!(**lo_expr, Expr::Literal(Literal::Integer(18), Span::ZERO));
assert!(!lo_inc, "GT should be exclusive");
let (hi_expr, hi_inc) = upper.as_ref().expect("expected upper bound");
assert_eq!(**hi_expr, Expr::Literal(Literal::Integer(65), Span::ZERO));
assert!(hi_inc, "LE should be inclusive");
}
other => panic!("expected Range probe, got {other:?}"),
}
}
#[test]
fn extract_probe_in_list() {
let in_expr = Expr::In {
expr: col("status"),
set: InSet::List(vec![
Expr::Literal(Literal::Integer(1), Span::ZERO),
Expr::Literal(Literal::Integer(2), Span::ZERO),
Expr::Literal(Literal::Integer(3), Span::ZERO),
]),
not: false,
span: Span::ZERO,
};
let terms = [WhereTerm {
expr: &in_expr,
column: Some(WhereColumn {
table: None,
column: "status".to_owned(),
}),
kind: WhereTermKind::InList { count: 3 },
}];
let indexes = [IndexInfo {
name: "idx_status".to_owned(),
table: "t".to_owned(),
columns: vec!["status".to_owned()],
unique: false,
n_pages: 1,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
}];
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::IndexScanEquality,
index: Some("idx_status".to_owned()),
estimated_cost: 15.0,
estimated_rows: 30.0,
time_travel: None,
probe: None,
};
let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
match &probe {
Some(AccessPathProbe::InList { column, values }) => {
assert_eq!(column, "status");
assert_eq!(values.len(), 3);
assert_eq!(*values[0], Expr::Literal(Literal::Integer(1), Span::ZERO));
assert_eq!(*values[2], Expr::Literal(Literal::Integer(3), Span::ZERO));
}
other => panic!("expected InList probe, got {other:?}"),
}
}
#[test]
fn extract_probe_in_list_prefers_equality_over_in() {
let eq_expression = eq_expr("status", 5);
let in_expr = Expr::In {
expr: col("status"),
set: InSet::List(vec![
Expr::Literal(Literal::Integer(1), Span::ZERO),
Expr::Literal(Literal::Integer(5), Span::ZERO),
]),
not: false,
span: Span::ZERO,
};
let terms = [
WhereTerm {
expr: &eq_expression,
column: Some(WhereColumn {
table: None,
column: "status".to_owned(),
}),
kind: WhereTermKind::Equality,
},
WhereTerm {
expr: &in_expr,
column: Some(WhereColumn {
table: None,
column: "status".to_owned(),
}),
kind: WhereTermKind::InList { count: 2 },
},
];
let indexes = [IndexInfo {
name: "idx_status".to_owned(),
table: "t".to_owned(),
columns: vec!["status".to_owned()],
unique: false,
n_pages: 1,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
}];
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::IndexScanEquality,
index: Some("idx_status".to_owned()),
estimated_cost: 5.0,
estimated_rows: 1.0,
time_travel: None,
probe: None,
};
let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
assert!(
matches!(&probe, Some(AccessPathProbe::Equality { .. })),
"equality should be preferred when both equality and IN terms exist"
);
}
#[test]
fn extract_probe_like_prefix_as_range() {
let like_expr = Expr::Like {
expr: col("name"),
pattern: Box::new(Expr::Literal(
Literal::String("abc%".to_owned()),
Span::ZERO,
)),
escape: None,
not: false,
op: fsqlite_ast::LikeOp::Like,
span: Span::ZERO,
};
let terms = [WhereTerm {
expr: &like_expr,
column: Some(WhereColumn {
table: None,
column: "name".to_owned(),
}),
kind: WhereTermKind::LikePrefix {
prefix: "abc".to_owned(),
upper_bound: Some("abd".to_owned()),
},
}];
let indexes = [IndexInfo {
name: "idx_name".to_owned(),
table: "t".to_owned(),
columns: vec!["name".to_owned()],
unique: false,
n_pages: 1,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
}];
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::IndexScanRange { selectivity: 0.1 },
index: Some("idx_name".to_owned()),
estimated_cost: 10.0,
estimated_rows: 100.0,
time_travel: None,
probe: None,
};
let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
match &probe {
Some(AccessPathProbe::Range {
column,
lower,
upper,
}) => {
assert_eq!(column, "name");
let (lo_expr, lo_inc) = lower.as_ref().expect("expected lower bound");
assert_eq!(
**lo_expr,
Expr::Literal(Literal::String("abc".to_owned()), Span::ZERO)
);
assert!(lo_inc, "LIKE prefix lower bound should be inclusive");
let (hi_expr, hi_inc) = upper.as_ref().expect("expected upper bound");
assert_eq!(
**hi_expr,
Expr::Literal(Literal::String("abd".to_owned()), Span::ZERO)
);
assert!(!hi_inc, "LIKE prefix upper bound should be exclusive");
}
other => panic!("expected Range probe from LikePrefix, got {other:?}"),
}
}
#[test]
fn extract_probe_full_scan_returns_none() {
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::FullTableScan,
index: None,
estimated_cost: 1000.0,
estimated_rows: 1000.0,
time_travel: None,
probe: None,
};
assert!(extract_access_path_probe_with_rowid_aliases(&ap, &[], &[], &[]).is_none());
}
#[test]
fn extract_probe_between_as_inclusive_range() {
let between_expr: &'static Expr = Box::leak(Box::new(Expr::Between {
expr: Box::new(Expr::Column(ColumnRef::bare("age"), Span::ZERO)),
low: Box::new(Expr::Literal(Literal::Integer(18), Span::ZERO)),
high: Box::new(Expr::Literal(Literal::Integer(65), Span::ZERO)),
not: false,
span: Span::ZERO,
}));
let terms = [WhereTerm {
expr: between_expr,
column: Some(WhereColumn {
table: None,
column: "age".to_owned(),
}),
kind: WhereTermKind::Between,
}];
let indexes = [IndexInfo {
name: "idx_age".to_owned(),
table: "t".to_owned(),
columns: vec!["age".to_owned()],
unique: false,
n_pages: 1,
source: StatsSource::Heuristic,
partial_where: None,
expression_columns: vec![],
}];
let ap = AccessPath {
table: "t".to_owned(),
kind: AccessPathKind::IndexScanRange { selectivity: 0.1 },
index: Some("idx_age".to_owned()),
estimated_cost: 10.0,
estimated_rows: 100.0,
time_travel: None,
probe: None,
};
let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
match &probe {
Some(AccessPathProbe::Range {
column,
lower,
upper,
}) => {
assert_eq!(column, "age");
let (lo_expr, lo_inc) = lower.as_ref().expect("expected lower bound");
assert_eq!(**lo_expr, Expr::Literal(Literal::Integer(18), Span::ZERO));
assert!(lo_inc, "BETWEEN lower bound must be inclusive");
let (hi_expr, hi_inc) = upper.as_ref().expect("expected upper bound");
assert_eq!(**hi_expr, Expr::Literal(Literal::Integer(65), Span::ZERO));
assert!(hi_inc, "BETWEEN upper bound must be inclusive");
}
other => panic!("expected Range probe from Between, got {other:?}"),
}
}
}