Skip to main content

fsqlite_planner/
lib.rs

1//! Query planner: name resolution, WHERE analysis, cost model, join ordering.
2//!
3//! Implements:
4//! - Compound SELECT ORDER BY resolution (§19 quirk: first SELECT wins)
5//! - Cost model for access paths in page reads (§10.5)
6//! - Index usability analysis for WHERE terms (§10.5)
7//! - Bounded beam search join ordering — NGQP-style (§10.5)
8//!
9//! Note: AST-to-VDBE compilation is an integration concern and lives above the
10//! planner layer per the workspace layering rules (bd-1wwc).
11
12pub mod codegen;
13pub mod decision_contract;
14pub mod differential;
15pub mod stats;
16
17use decision_contract::access_path_kind_label;
18use fsqlite_ast::{
19    BinaryOp as AstBinaryOp, ColumnRef, CompoundOp, Expr, FromClause, InSet, IndexHint,
20    JoinConstraint, JoinKind, LikeOp, Literal, NullsOrder, OrderingTerm, ResultColumn, SelectBody,
21    SelectCore, SortDirection, Span, TableOrSubquery,
22};
23use lru::LruCache;
24use std::collections::{BTreeMap, HashMap, HashSet};
25use std::fmt;
26use std::num::NonZeroUsize;
27use std::rc::Rc;
28use std::sync::atomic::{AtomicU64, Ordering};
29use std::sync::{LazyLock, Mutex};
30use xxhash_rust::xxh3::xxh3_64_with_seed;
31
32// ---------------------------------------------------------------------------
33// Compound ORDER BY resolution (§19 quirk: first SELECT wins)
34// ---------------------------------------------------------------------------
35
36/// A resolved ORDER BY term for a compound SELECT.
37///
38/// After resolution, each term is bound to a 0-based column index in the
39/// compound result set, with optional direction, collation, and nulls ordering.
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct ResolvedCompoundOrderBy {
42    /// 0-based index into the compound result columns.
43    pub column_idx: usize,
44    /// ASC or DESC.
45    pub direction: Option<SortDirection>,
46    /// COLLATE override (e.g. `ORDER BY a COLLATE NOCASE`).
47    pub collation: Option<String>,
48    /// NULLS FIRST or NULLS LAST.
49    pub nulls: Option<NullsOrder>,
50}
51
52/// Errors during compound ORDER BY resolution.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub enum CompoundOrderByError {
55    /// The referenced column name was not found in any SELECT's output aliases.
56    ColumnNotFound { name: String, span: Span },
57    /// A numeric column index is out of range (1-based in SQL, but converted).
58    IndexOutOfRange {
59        index: usize,
60        num_columns: usize,
61        span: Span,
62    },
63    /// A zero or negative numeric column index.
64    IndexZeroOrNegative { value: i64, span: Span },
65    /// An expression (e.g. `a+1`) is not allowed in compound ORDER BY.
66    ExpressionNotAllowed { span: Span },
67}
68
69impl std::fmt::Display for CompoundOrderByError {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        match self {
72            Self::ColumnNotFound { name, .. } => {
73                write!(
74                    f,
75                    "1st ORDER BY term does not match any column in the result set: {name}"
76                )
77            }
78            Self::IndexOutOfRange {
79                index, num_columns, ..
80            } => {
81                write!(
82                    f,
83                    "ORDER BY column index {index} out of range (result has {num_columns} columns)"
84                )
85            }
86            Self::IndexZeroOrNegative { value, .. } => {
87                write!(
88                    f,
89                    "ORDER BY column index {value} out of range - must be positive"
90                )
91            }
92            Self::ExpressionNotAllowed { .. } => {
93                write!(
94                    f,
95                    "ORDER BY expression not allowed in compound SELECT - use column name or number"
96                )
97            }
98        }
99    }
100}
101
102impl std::error::Error for CompoundOrderByError {}
103
104/// Extract output column alias names from a single `SelectCore`.
105///
106/// For `SELECT expr AS alias, ...` → `[Some("alias"), ...]`.
107/// For unaliased `SELECT col` → uses the column name from a bare column ref.
108/// For `*`, `table.*`, expressions without aliases → `None`.
109/// For `VALUES (...)` → all `None`.
110#[must_use]
111pub fn extract_output_aliases(core: &SelectCore) -> Vec<Option<String>> {
112    match core {
113        SelectCore::Select { columns, .. } => columns
114            .iter()
115            .map(|rc| match rc {
116                ResultColumn::Expr { alias: Some(a), .. } => Some(a.clone()),
117                ResultColumn::Expr {
118                    expr: Expr::Column(col_ref, _),
119                    alias: None,
120                    ..
121                } => Some(col_ref.column.to_string()),
122                _ => None,
123            })
124            .collect(),
125        SelectCore::Values(rows) => {
126            let width = rows.first().map_or(0, Vec::len);
127            vec![None; width]
128        }
129    }
130}
131
132/// Count the number of output columns in a `SelectCore`.
133#[must_use]
134pub fn count_output_columns(core: &SelectCore) -> usize {
135    match core {
136        SelectCore::Select { columns, .. } => columns.len(),
137        SelectCore::Values(rows) => rows.first().map_or(0, Vec::len),
138    }
139}
140
141// ---------------------------------------------------------------------------
142// Single-table projection resolution (`*` / `table.*` expansion)
143// ---------------------------------------------------------------------------
144
145/// Errors during single-table result-column resolution.
146#[derive(Debug, Clone, PartialEq, Eq)]
147pub enum SingleTableProjectionError {
148    /// The core is `VALUES`, not `SELECT`.
149    NotSelectCore,
150    /// A `FROM` clause is required for table-backed projection resolution.
151    MissingFromClause,
152    /// Unsupported source shape (non-table source or joins present).
153    UnsupportedFromSource,
154    /// A table qualifier did not match the single table or its alias.
155    UnknownTableQualifier { qualifier: String },
156    /// A referenced column does not exist on the table.
157    ColumnNotFound { column: String },
158}
159
160impl fmt::Display for SingleTableProjectionError {
161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
162        match self {
163            Self::NotSelectCore => write!(f, "projection resolution requires SELECT core"),
164            Self::MissingFromClause => write!(f, "projection resolution requires FROM clause"),
165            Self::UnsupportedFromSource => {
166                write!(f, "only single-table FROM without JOIN is supported")
167            }
168            Self::UnknownTableQualifier { qualifier } => {
169                write!(f, "unknown table qualifier: {qualifier}")
170            }
171            Self::ColumnNotFound { column } => write!(f, "column not found: {column}"),
172        }
173    }
174}
175
176impl std::error::Error for SingleTableProjectionError {}
177
178/// Resolve result columns for a single-table SELECT by:
179/// - expanding `*` and `table.*` into explicit column refs
180/// - validating table qualifiers and unqualified column refs
181///
182/// Non-column expressions are preserved as-is; codegen decides if they are
183/// supported for table-backed execution.
184pub fn resolve_single_table_result_columns(
185    core: &SelectCore,
186    table_columns: &[String],
187) -> Result<Vec<ResultColumn>, SingleTableProjectionError> {
188    resolve_single_table_result_columns_with_options(core, table_columns, true)
189}
190
191/// Resolve result columns for a single-table SELECT with explicit control over
192/// whether hidden rowid aliases (`rowid`, `_rowid_`, `oid`) are available.
193///
194/// `WITHOUT ROWID` tables should pass `supports_hidden_rowid = false` so hidden
195/// aliases are rejected unless a visible column of the same name exists.
196pub fn resolve_single_table_result_columns_with_options(
197    core: &SelectCore,
198    table_columns: &[String],
199    supports_hidden_rowid: bool,
200) -> Result<Vec<ResultColumn>, SingleTableProjectionError> {
201    let SelectCore::Select { columns, from, .. } = core else {
202        return Err(SingleTableProjectionError::NotSelectCore);
203    };
204    let from_clause = from
205        .as_ref()
206        .ok_or(SingleTableProjectionError::MissingFromClause)?;
207    let (table_name, table_alias) = single_table_source_name_and_alias(from_clause)?;
208
209    let mut resolved = Vec::new();
210    for result_col in columns {
211        match result_col {
212            ResultColumn::Star => {
213                for column_name in table_columns {
214                    resolved.push(ResultColumn::Expr {
215                        expr: Expr::Column(ColumnRef::bare(column_name.clone()), Span::ZERO),
216                        alias: None,
217                    });
218                }
219            }
220            ResultColumn::TableStar(qualifier) => {
221                if !qualifier_matches_table(&qualifier.name, table_name, table_alias) {
222                    return Err(SingleTableProjectionError::UnknownTableQualifier {
223                        qualifier: qualifier.to_string(),
224                    });
225                }
226                for column_name in table_columns {
227                    resolved.push(ResultColumn::Expr {
228                        expr: Expr::Column(ColumnRef::bare(column_name.clone()), Span::ZERO),
229                        alias: None,
230                    });
231                }
232            }
233            ResultColumn::Expr {
234                expr: Expr::Column(col_ref, _),
235                ..
236            } => {
237                if let Some(qualifier) = &col_ref.table {
238                    if !qualifier_matches_table(qualifier, table_name, table_alias) {
239                        return Err(SingleTableProjectionError::UnknownTableQualifier {
240                            qualifier: qualifier.to_string(),
241                        });
242                    }
243                }
244                if !(column_exists_ignore_case(table_columns, &col_ref.column)
245                    || supports_hidden_rowid && is_rowid_alias_name(&col_ref.column))
246                {
247                    return Err(SingleTableProjectionError::ColumnNotFound {
248                        column: col_ref.column.to_string(),
249                    });
250                }
251                resolved.push(result_col.clone());
252            }
253            ResultColumn::Expr { .. } => resolved.push(result_col.clone()),
254        }
255    }
256
257    Ok(resolved)
258}
259
260fn single_table_source_name_and_alias(
261    from_clause: &FromClause,
262) -> Result<(&str, Option<&str>), SingleTableProjectionError> {
263    if !from_clause.joins.is_empty() {
264        return Err(SingleTableProjectionError::UnsupportedFromSource);
265    }
266    match &from_clause.source {
267        TableOrSubquery::Table { name, alias, .. } => Ok((&name.name, alias.as_deref())),
268        _ => Err(SingleTableProjectionError::UnsupportedFromSource),
269    }
270}
271
272fn column_exists_ignore_case(columns: &[String], name: &str) -> bool {
273    columns.iter().any(|c| c.eq_ignore_ascii_case(name))
274}
275
276fn qualifier_matches_table(qualifier: &str, table_name: &str, table_alias: Option<&str>) -> bool {
277    qualifier.eq_ignore_ascii_case(table_name)
278        || table_alias.is_some_and(|alias| qualifier.eq_ignore_ascii_case(alias))
279}
280
281fn is_rowid_alias_name(name: &str) -> bool {
282    let lower = name.to_ascii_lowercase();
283    lower == "rowid" || lower == "_rowid_" || lower == "oid"
284}
285
286/// Resolve all ORDER BY terms for a compound SELECT statement.
287///
288/// # SQLite compound ORDER BY resolution rules
289///
290/// 1. **Integer literal** `ORDER BY N`: 1-based column index into the result.
291/// 2. **Bare column reference** `ORDER BY name`: search output aliases of all
292///    SELECTs in declaration order (first SELECT, then second, etc.). The first
293///    SELECT that contains a matching alias wins, and the column resolves to the
294///    *position* of that alias in that SELECT.
295/// 3. **COLLATE wrapper** `ORDER BY name COLLATE X`: resolve the inner
296///    expression as above, attach the collation override.
297/// 4. **Any other expression**: rejected (expressions like `a+1` are not
298///    allowed in compound SELECT ORDER BY).
299///
300/// # Errors
301///
302/// Returns [`CompoundOrderByError`] if a term cannot be resolved.
303pub fn resolve_compound_order_by(
304    body: &SelectBody,
305    order_by: &[OrderingTerm],
306) -> Result<Vec<ResolvedCompoundOrderBy>, CompoundOrderByError> {
307    // Gather aliases from all SELECT cores in order.
308    let mut all_aliases: Vec<Vec<Option<String>>> = Vec::with_capacity(1 + body.compounds.len());
309    all_aliases.push(extract_output_aliases(&body.select));
310    for (_, core) in &body.compounds {
311        all_aliases.push(extract_output_aliases(core));
312    }
313
314    let num_columns = count_output_columns(&body.select);
315
316    let mut resolved = Vec::with_capacity(order_by.len());
317    for term in order_by {
318        let (col_idx, collation) = resolve_single_term(&term.expr, &all_aliases, num_columns)?;
319        resolved.push(ResolvedCompoundOrderBy {
320            column_idx: col_idx,
321            direction: term.direction,
322            collation,
323            nulls: term.nulls,
324        });
325    }
326
327    Ok(resolved)
328}
329
330/// Resolve a single ORDER BY expression to a 0-based column index and optional
331/// collation override.
332fn resolve_single_term(
333    expr: &Expr,
334    all_aliases: &[Vec<Option<String>>],
335    num_columns: usize,
336) -> Result<(usize, Option<String>), CompoundOrderByError> {
337    match expr {
338        // Integer literal: 1-based column index.
339        Expr::Literal(Literal::Integer(n), span) => {
340            if *n <= 0 {
341                return Err(CompoundOrderByError::IndexZeroOrNegative {
342                    value: *n,
343                    span: *span,
344                });
345            }
346            #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
347            let idx = (*n as usize) - 1;
348            if idx >= num_columns {
349                return Err(CompoundOrderByError::IndexOutOfRange {
350                    index: idx + 1,
351                    num_columns,
352                    span: *span,
353                });
354            }
355            Ok((idx, None))
356        }
357
358        // Bare column reference: search all SELECTs in order.
359        Expr::Column(col_ref, span) => {
360            let name = &col_ref.column;
361            for aliases in all_aliases {
362                for (pos, alias_opt) in aliases.iter().enumerate() {
363                    if let Some(alias) = alias_opt {
364                        if alias.eq_ignore_ascii_case(name) {
365                            return Ok((pos, None));
366                        }
367                    }
368                }
369            }
370            Err(CompoundOrderByError::ColumnNotFound {
371                name: name.to_string(),
372                span: *span,
373            })
374        }
375
376        // COLLATE wrapper: resolve inner expr, attach collation.
377        Expr::Collate {
378            expr: inner,
379            collation,
380            ..
381        } => {
382            let (idx, _) = resolve_single_term(inner, all_aliases, num_columns)?;
383            Ok((idx, Some(collation.clone())))
384        }
385
386        // Any other expression is not allowed in compound ORDER BY.
387        other => Err(CompoundOrderByError::ExpressionNotAllowed { span: other.span() }),
388    }
389}
390
391/// Check whether a `SelectBody` is a compound query (has UNION/INTERSECT/EXCEPT).
392#[must_use]
393pub fn is_compound(body: &SelectBody) -> bool {
394    !body.compounds.is_empty()
395}
396
397/// Get the compound operator type names for a compound SELECT (for logging).
398#[must_use]
399pub fn compound_op_name(op: CompoundOp) -> &'static str {
400    match op {
401        CompoundOp::Union => "UNION",
402        CompoundOp::UnionAll => "UNION ALL",
403        CompoundOp::Intersect => "INTERSECT",
404        CompoundOp::Except => "EXCEPT",
405    }
406}
407
408// ===========================================================================
409// §10.5 Query Planning: Cost Model, Index Selection, Join Ordering
410// ===========================================================================
411
412// ---------------------------------------------------------------------------
413// Statistics and metadata types
414// ---------------------------------------------------------------------------
415
416/// How table/index statistics were obtained.
417#[derive(Debug, Clone, Copy, PartialEq, Eq)]
418pub enum StatsSource {
419    /// From `ANALYZE` (`sqlite_stat1` / `sqlite_stat4`).
420    Analyze,
421    /// Heuristic fallback (no ANALYZE data available).
422    Heuristic,
423}
424
425/// Statistics about a table, used for cost estimation.
426#[derive(Debug, Clone, PartialEq, Eq)]
427pub struct TableStats {
428    /// Table name.
429    pub name: String,
430    /// Number of B-tree pages occupied by the table.
431    pub n_pages: u64,
432    /// Estimated number of rows (from ANALYZE or heuristic).
433    pub n_rows: u64,
434    /// Source of these statistics.
435    pub source: StatsSource,
436}
437
438/// Metadata about an index, used for cost estimation and usability checks.
439#[derive(Debug, Clone, PartialEq)]
440pub struct IndexInfo {
441    /// Index name.
442    pub name: String,
443    /// Table this index belongs to.
444    pub table: String,
445    /// Ordered list of indexed column names (leftmost first).
446    pub columns: Vec<String>,
447    /// Whether this is a UNIQUE index.
448    pub unique: bool,
449    /// Number of B-tree pages occupied by the index.
450    pub n_pages: u64,
451    /// Source of the page count.
452    pub source: StatsSource,
453    /// For partial indexes: the WHERE clause that restricts which rows appear.
454    /// The planner can only use this index if the query's WHERE implies this predicate.
455    pub partial_where: Option<Expr>,
456    /// For expression indexes: the expressions indexed (parallel to `columns`).
457    /// When present, the planner matches query expressions structurally against these.
458    /// `columns` should contain synthetic names; the real matching uses these exprs.
459    pub expression_columns: Vec<Expr>,
460}
461
462/// Schema hint that a visible table column is an alias for SQLite's hidden
463/// rowid, as with `INTEGER PRIMARY KEY`.
464#[derive(Debug, Clone, PartialEq, Eq)]
465pub struct RowidAliasHint {
466    /// Optional table name or query alias qualifier accepted for the column.
467    pub qualifier: Option<String>,
468    /// Visible column name that aliases the rowid.
469    pub column: String,
470}
471
472impl RowidAliasHint {
473    /// Build an unqualified rowid-alias hint for a table-local column.
474    #[must_use]
475    pub fn new(column: impl Into<String>) -> Self {
476        Self {
477            qualifier: None,
478            column: column.into(),
479        }
480    }
481
482    /// Build a rowid-alias hint for a specific table name or query alias.
483    #[must_use]
484    pub fn qualified(qualifier: impl Into<String>, column: impl Into<String>) -> Self {
485        Self {
486            qualifier: Some(qualifier.into()),
487            column: column.into(),
488        }
489    }
490
491    fn matches_column(&self, table_name: &str, column: &WhereColumn) -> bool {
492        if !column.column.eq_ignore_ascii_case(&self.column) {
493            return false;
494        }
495
496        match (column.table.as_deref(), self.qualifier.as_deref()) {
497            (None, _) => true,
498            (Some(column_qualifier), Some(hint_qualifier)) => {
499                column_qualifier.eq_ignore_ascii_case(hint_qualifier)
500            }
501            (Some(column_qualifier), None) => column_qualifier.eq_ignore_ascii_case(table_name),
502        }
503    }
504}
505
506// ---------------------------------------------------------------------------
507// Access path types
508// ---------------------------------------------------------------------------
509
510/// The kind of access path the planner can choose for a table scan.
511#[derive(Debug, Clone, PartialEq)]
512#[allow(clippy::derive_partial_eq_without_eq)]
513pub enum AccessPathKind {
514    /// Sequential scan of all table pages.
515    FullTableScan,
516    /// Index range scan (e.g. `col > expr`, `col BETWEEN`).
517    IndexScanRange { selectivity: f64 },
518    /// Index equality scan (e.g. `col = expr`).
519    IndexScanEquality,
520    /// Covering index scan (all needed columns are in the index).
521    CoveringIndexScan { selectivity: f64 },
522    /// Direct rowid lookup (e.g. `WHERE rowid = ?`).
523    RowidLookup,
524}
525
526/// Probe expressions extracted from the WHERE clause during access-path
527/// selection.  Carried forward so downstream consumers (connection seam, VDBE
528/// codegen) do not re-extract from the AST.
529#[derive(Debug, Clone, PartialEq)]
530#[allow(clippy::derive_partial_eq_without_eq)]
531pub enum AccessPathProbe {
532    /// `WHERE rowid = <target>`
533    RowidEquality { target: Box<Expr> },
534    /// `WHERE <column> = <target>` backed by an index.
535    Equality { column: String, target: Box<Expr> },
536    /// `WHERE <column> {>|>=} <lo> AND <column> {<|<=} <hi>` backed by an index.
537    Range {
538        column: String,
539        lower: Option<(Box<Expr>, bool)>,
540        upper: Option<(Box<Expr>, bool)>,
541    },
542    /// `WHERE <column> IN (<v1>, <v2>, ...)` backed by an index — one seek per
543    /// value.
544    InList {
545        column: String,
546        values: Vec<Box<Expr>>,
547    },
548}
549
550/// A concrete access path chosen by the planner.
551#[derive(Debug, Clone, PartialEq)]
552#[allow(clippy::derive_partial_eq_without_eq)]
553pub struct AccessPath {
554    /// Table being accessed.
555    pub table: String,
556    /// Kind of scan.
557    pub kind: AccessPathKind,
558    /// Index used (None for full table scan / rowid lookup / rowid range).
559    pub index: Option<String>,
560    /// Estimated cost in page reads.
561    pub estimated_cost: f64,
562    /// Estimated rows returned.
563    pub estimated_rows: f64,
564    /// Time-travel clause (SQL:2011 temporal query) — `FOR SYSTEM_TIME AS OF ...`.
565    pub time_travel: Option<fsqlite_ast::TimeTravelClause>,
566    /// Probe expressions extracted during path selection — avoids downstream
567    /// re-extraction from the WHERE clause.
568    pub probe: Option<AccessPathProbe>,
569}
570
571/// Morsel-parallel SELECT eligibility decision produced by the planner.
572///
573/// When `eligible` is true the executor may split the driving table scan
574/// into `morsel_count` page-range morsels and process them in parallel
575/// under separate snapshot-consistent cursors, merging results afterward.
576#[derive(Debug, Clone, PartialEq)]
577pub struct MorselEligibility {
578    pub eligible: bool,
579    pub driving_table: Option<String>,
580    pub estimated_rows: f64,
581    pub morsel_count: u16,
582    pub rows_per_morsel: u64,
583    pub reason: MorselIneligibleReason,
584}
585
586/// Why a query was deemed ineligible for morsel-parallel execution.
587#[derive(Debug, Clone, Copy, PartialEq, Eq)]
588pub enum MorselIneligibleReason {
589    None,
590    TooFewRows,
591    NoFullTableScan,
592    MultiTableJoin,
593    HasLimit,
594    CompoundQuery,
595}
596
597impl MorselEligibility {
598    const MIN_ROWS_FOR_MORSEL: f64 = 4096.0;
599    const DEFAULT_MORSEL_TARGET_ROWS: u64 = 1024;
600    const MAX_MORSELS: u16 = 64;
601
602    fn ineligible(reason: MorselIneligibleReason) -> Self {
603        Self {
604            eligible: false,
605            driving_table: None,
606            estimated_rows: 0.0,
607            morsel_count: 1,
608            rows_per_morsel: 0,
609            reason,
610        }
611    }
612
613    /// Evaluate morsel eligibility for a single-table full-scan query.
614    #[must_use]
615    pub fn evaluate(
616        plan: &QueryPlan,
617        has_limit: bool,
618        is_compound: bool,
619        available_workers: u16,
620    ) -> Self {
621        if is_compound {
622            return Self::ineligible(MorselIneligibleReason::CompoundQuery);
623        }
624        if has_limit {
625            return Self::ineligible(MorselIneligibleReason::HasLimit);
626        }
627        if plan.join_order.len() != 1 {
628            return Self::ineligible(MorselIneligibleReason::MultiTableJoin);
629        }
630        let path = match plan.access_paths.first() {
631            Some(p) => p,
632            None => return Self::ineligible(MorselIneligibleReason::NoFullTableScan),
633        };
634        if !matches!(path.kind, AccessPathKind::FullTableScan) {
635            return Self::ineligible(MorselIneligibleReason::NoFullTableScan);
636        }
637        if path.estimated_rows < Self::MIN_ROWS_FOR_MORSEL {
638            return Self::ineligible(MorselIneligibleReason::TooFewRows);
639        }
640
641        let est_rows = path.estimated_rows as u64;
642        let workers = u64::from(available_workers.clamp(1, Self::MAX_MORSELS));
643        let rows_per_morsel = (est_rows / workers).max(Self::DEFAULT_MORSEL_TARGET_ROWS);
644        let morsel_count =
645            u16::try_from((est_rows / rows_per_morsel).max(1)).unwrap_or(Self::MAX_MORSELS);
646
647        Self {
648            eligible: true,
649            driving_table: Some(path.table.clone()),
650            estimated_rows: path.estimated_rows,
651            morsel_count,
652            rows_per_morsel,
653            reason: MorselIneligibleReason::None,
654        }
655    }
656}
657
658/// The final output of the query planner: an ordered access plan.
659#[derive(Debug, Clone, PartialEq)]
660pub struct QueryPlan {
661    /// Tables in the chosen join order.
662    pub join_order: Vec<String>,
663    /// Access path for each table (parallel to `join_order`).
664    pub access_paths: Vec<AccessPath>,
665    /// Join operator segments selected for execution/explain.
666    pub join_segments: Vec<JoinPlanSegment>,
667    /// Total estimated cost in page reads.
668    pub total_cost: f64,
669    /// Morsel-parallel SELECT eligibility (populated after planning).
670    pub morsel_eligibility: Option<MorselEligibility>,
671}
672
673/// Default number of cached query plans retained by [`QueryPlanner`].
674pub const DEFAULT_PLAN_CACHE_CAPACITY: usize = 128;
675
676/// Stateful planner wrapper that memoizes query plans by SQL template and schema cookie.
677///
678/// The caller is responsible for supplying a stable SQL template string for the
679/// query shape being planned. Literal normalization, placeholder canonicalization,
680/// and any higher-level SQL parsing remain above this crate's current scope.
681#[derive(Debug)]
682pub struct QueryPlanner {
683    plan_cache: LruCache<u64, Rc<QueryPlan>>,
684    cached_schema_cookie: Option<u32>,
685    hot_plan_cache_key: Option<u64>,
686    hot_plan_cache_plan: Option<Rc<QueryPlan>>,
687    hot_plan_cache_needs_lru_touch: bool,
688}
689
690impl Default for QueryPlanner {
691    fn default() -> Self {
692        Self::new()
693    }
694}
695
696impl QueryPlanner {
697    /// Construct a planner with the default 128-entry LRU plan cache.
698    #[must_use]
699    pub fn new() -> Self {
700        Self::with_plan_cache_capacity(DEFAULT_PLAN_CACHE_CAPACITY)
701    }
702
703    /// Construct a planner with a caller-provided cache capacity.
704    ///
705    /// A zero capacity is clamped to 1 so callers can tune the cache without
706    /// dealing with `NonZeroUsize`.
707    #[must_use]
708    pub fn with_plan_cache_capacity(capacity: usize) -> Self {
709        Self {
710            plan_cache: LruCache::new(normalize_plan_cache_capacity(capacity)),
711            cached_schema_cookie: None,
712            hot_plan_cache_key: None,
713            hot_plan_cache_plan: None,
714            hot_plan_cache_needs_lru_touch: false,
715        }
716    }
717
718    /// Return the number of cached plans currently retained.
719    #[must_use]
720    pub fn plan_cache_len(&self) -> usize {
721        self.plan_cache.len()
722    }
723
724    /// Return `true` when no cached plans are currently retained.
725    #[must_use]
726    pub fn is_plan_cache_empty(&self) -> bool {
727        self.plan_cache.is_empty()
728    }
729
730    /// Clear all cached plans and forget the schema cookie they were built under.
731    pub fn clear_plan_cache(&mut self) {
732        self.plan_cache.clear();
733        self.cached_schema_cookie = None;
734        self.clear_hot_plan_cache();
735    }
736
737    /// Return a cached plan for the given SQL template and schema cookie, or compute one.
738    ///
739    /// When the schema cookie changes, the entire cache is flushed because any
740    /// DDL may invalidate earlier planning decisions.
741    #[must_use]
742    pub fn cached_plan<F>(
743        &mut self,
744        sql_template: &str,
745        schema_cookie: u32,
746        build: F,
747    ) -> Rc<QueryPlan>
748    where
749        F: FnOnce() -> QueryPlan,
750    {
751        self.invalidate_plan_cache_if_schema_cookie_changed(schema_cookie);
752        let key = plan_cache_key(sql_template, schema_cookie);
753        self.prepare_plan_cache_lookup(key);
754
755        if let Some(plan) = self.lookup_hot_plan_cache(key) {
756            return plan;
757        }
758
759        if let Some(plan) = self.plan_cache.get(&key).map(Rc::clone) {
760            return self.record_plan_cache_hit(key, plan);
761        }
762
763        let plan = Rc::new(build());
764        self.plan_cache.put(key, Rc::clone(&plan));
765        self.record_plan_cache_hit(key, plan)
766    }
767
768    /// Cached wrapper around [`order_joins_with_hints_and_features`].
769    ///
770    /// This preserves the current stateless free-function API while exposing a
771    /// planner-local cache for repeated SELECT templates.
772    #[allow(clippy::too_many_arguments)]
773    #[must_use]
774    pub fn order_joins_with_cache(
775        &mut self,
776        sql_template: &str,
777        schema_cookie: u32,
778        tables: &[TableStats],
779        indexes: &[IndexInfo],
780        where_terms: &[WhereTerm<'_>],
781        needed_columns: Option<&[String]>,
782        cross_join_pairs: &[(String, String)],
783        table_index_hints: Option<&BTreeMap<String, IndexHint>>,
784        cracking_hints: Option<&mut CrackingHintStore>,
785        feature_flags: PlannerFeatureFlags,
786    ) -> Rc<QueryPlan> {
787        // Adaptive cracking hints are mutable runtime state, not schema state.
788        // They can legitimately change the preferred index for the same SQL
789        // template, so they must not be served from the stable plan cache.
790        if cracking_hints.is_some() {
791            return Rc::new(order_joins_with_hints_and_features(
792                tables,
793                indexes,
794                where_terms,
795                needed_columns,
796                cross_join_pairs,
797                table_index_hints,
798                cracking_hints,
799                feature_flags,
800            ));
801        }
802
803        self.invalidate_plan_cache_if_schema_cookie_changed(schema_cookie);
804        let key = plan_cache_key_with_feature_flags(sql_template, schema_cookie, feature_flags);
805        self.prepare_plan_cache_lookup(key);
806
807        if let Some(plan) = self.lookup_hot_plan_cache(key) {
808            return plan;
809        }
810
811        if let Some(plan) = self.plan_cache.get(&key).map(Rc::clone) {
812            return self.record_plan_cache_hit(key, plan);
813        }
814
815        let plan = Rc::new(order_joins_with_hints_and_features(
816            tables,
817            indexes,
818            where_terms,
819            needed_columns,
820            cross_join_pairs,
821            table_index_hints,
822            cracking_hints,
823            feature_flags,
824        ));
825        self.plan_cache.put(key, Rc::clone(&plan));
826        self.record_plan_cache_hit(key, plan)
827    }
828
829    fn invalidate_plan_cache_if_schema_cookie_changed(&mut self, schema_cookie: u32) {
830        if self
831            .cached_schema_cookie
832            .is_some_and(|cached| cached != schema_cookie)
833        {
834            self.plan_cache.clear();
835            self.clear_hot_plan_cache();
836        }
837        self.cached_schema_cookie = Some(schema_cookie);
838    }
839
840    fn prepare_plan_cache_lookup(&mut self, key: u64) {
841        if self
842            .hot_plan_cache_key
843            .is_some_and(|hot_key| hot_key != key)
844        {
845            self.flush_hot_plan_cache_lru_touch();
846            self.clear_hot_plan_cache();
847        }
848    }
849
850    fn lookup_hot_plan_cache(&mut self, key: u64) -> Option<Rc<QueryPlan>> {
851        if self.hot_plan_cache_key == Some(key) {
852            self.hot_plan_cache_needs_lru_touch = true;
853            return self.hot_plan_cache_plan.as_ref().map(Rc::clone);
854        }
855        None
856    }
857
858    fn record_plan_cache_hit(&mut self, key: u64, plan: Rc<QueryPlan>) -> Rc<QueryPlan> {
859        self.hot_plan_cache_key = Some(key);
860        self.hot_plan_cache_plan = Some(Rc::clone(&plan));
861        self.hot_plan_cache_needs_lru_touch = false;
862        plan
863    }
864
865    fn flush_hot_plan_cache_lru_touch(&mut self) {
866        if !self.hot_plan_cache_needs_lru_touch {
867            return;
868        }
869        if let Some(key) = self.hot_plan_cache_key {
870            let _ = self.plan_cache.get(&key);
871        }
872        self.hot_plan_cache_needs_lru_touch = false;
873    }
874
875    fn clear_hot_plan_cache(&mut self) {
876        self.hot_plan_cache_key = None;
877        self.hot_plan_cache_plan = None;
878        self.hot_plan_cache_needs_lru_touch = false;
879    }
880}
881
882fn normalize_plan_cache_capacity(capacity: usize) -> NonZeroUsize {
883    let normalized = capacity.max(1);
884    if let Some(capacity) = NonZeroUsize::new(normalized) {
885        capacity
886    } else {
887        unreachable!("cache capacity is clamped to a non-zero value");
888    }
889}
890
891const PLAN_CACHE_DIRECT_SEED_TAG: u64 = 0x5A00_0000_0000_0000;
892const PLAN_CACHE_JOIN_SEED_TAG: u64 = 0xA500_0000_0000_0000;
893const PLAN_CACHE_FEATURE_LEAPFROG: u64 = 1_u64 << 32;
894const PLAN_CACHE_FEATURE_DPCCP: u64 = 1_u64 << 33;
895
896fn plan_cache_key(sql_template: &str, schema_cookie: u32) -> u64 {
897    xxh3_64_with_seed(
898        sql_template.as_bytes(),
899        PLAN_CACHE_DIRECT_SEED_TAG | u64::from(schema_cookie),
900    )
901}
902
903fn plan_cache_key_with_feature_flags(
904    sql_template: &str,
905    schema_cookie: u32,
906    feature_flags: PlannerFeatureFlags,
907) -> u64 {
908    // Keep the schema cookie in the low 32 bits and pack feature toggles above
909    // it so each plan-cache variant gets a distinct seed without heap work.
910    // The high tag separates this join-order cache from the generic
911    // `cached_plan()` API; both APIs share `QueryPlanner::plan_cache`.
912    let feature_mask = if feature_flags.leapfrog_join {
913        PLAN_CACHE_FEATURE_LEAPFROG
914    } else {
915        0
916    } | if feature_flags.dpccp_join {
917        PLAN_CACHE_FEATURE_DPCCP
918    } else {
919        0
920    };
921    xxh3_64_with_seed(
922        sql_template.as_bytes(),
923        PLAN_CACHE_JOIN_SEED_TAG | u64::from(schema_cookie) | feature_mask,
924    )
925}
926
927/// Planner feature toggles.
928#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
929pub struct PlannerFeatureFlags {
930    /// Enable Leapfrog Triejoin routing for compatible 3+ relation equi-joins.
931    pub leapfrog_join: bool,
932    /// Enable DPccp exhaustive search for small joins (<= `DPCCP_MAX_TABLES`).
933    /// Falls back to beam search above the threshold.
934    pub dpccp_join: bool,
935}
936
937/// Maximum table count for DPccp exhaustive search.
938/// Above this threshold we use bounded beam search.
939#[allow(dead_code)]
940const DPCCP_MAX_TABLES: usize = 8;
941
942/// Monotonic counter: total join plans enumerated.
943static FSQLITE_PLANNER_PLANS_ENUMERATED: AtomicU64 = AtomicU64::new(0);
944
945/// Take a snapshot of plans-enumerated counter.
946#[must_use]
947pub fn plans_enumerated_total() -> u64 {
948    FSQLITE_PLANNER_PLANS_ENUMERATED.load(Ordering::Relaxed)
949}
950
951/// Reset plans-enumerated counter.
952pub fn reset_plans_enumerated() {
953    FSQLITE_PLANNER_PLANS_ENUMERATED.store(0, Ordering::Relaxed);
954}
955
956/// Join operator chosen for a segment of the join plan.
957#[derive(Debug, Clone, Copy, PartialEq, Eq)]
958pub enum JoinOperator {
959    /// Pairwise hash join execution.
960    HashJoin,
961    /// Multi-way Leapfrog Triejoin execution.
962    LeapfrogTriejoin,
963}
964
965impl JoinOperator {
966    #[must_use]
967    pub const fn label(self) -> &'static str {
968        match self {
969            Self::HashJoin => "HASH JOIN",
970            Self::LeapfrogTriejoin => "LEAPFROG TRIEJOIN",
971        }
972    }
973}
974
975/// One join-operator decision segment.
976#[derive(Debug, Clone, PartialEq)]
977#[allow(clippy::derive_partial_eq_without_eq)]
978pub struct JoinPlanSegment {
979    /// Relations covered by this segment in execution order.
980    pub relations: Vec<String>,
981    /// Operator chosen for this segment.
982    pub operator: JoinOperator,
983    /// Estimated operator cost.
984    pub estimated_cost: f64,
985    /// Human-readable decision reason.
986    pub reason: String,
987}
988
989impl fmt::Display for QueryPlan {
990    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
991        writeln!(f, "QUERY PLAN (est. cost {:.1}):", self.total_cost)?;
992        for (i, ap) in self.access_paths.iter().enumerate() {
993            let idx_str = ap
994                .index
995                .as_deref()
996                .map_or(String::new(), |n| format!(" USING INDEX {n}"));
997            writeln!(
998                f,
999                "  {i}: SCAN {}{idx_str} (~{:.0} rows, cost {:.1})",
1000                ap.table, ap.estimated_rows, ap.estimated_cost
1001            )?;
1002        }
1003        if !self.join_segments.is_empty() {
1004            writeln!(f, "JOIN OPERATORS:")?;
1005            for segment in &self.join_segments {
1006                writeln!(
1007                    f,
1008                    "  {} {} (est. {:.1}) [{}]",
1009                    segment.operator.label(),
1010                    segment.relations.join(" JOIN "),
1011                    segment.estimated_cost,
1012                    segment.reason
1013                )?;
1014            }
1015        }
1016        Ok(())
1017    }
1018}
1019
1020// ---------------------------------------------------------------------------
1021// Cost model (§10.5)
1022// ---------------------------------------------------------------------------
1023
1024/// Estimate the cost (in page reads) for a given access path.
1025///
1026/// Formulas from §10.5:
1027/// - Full table scan: `N_pages(table)`
1028/// - Index scan (range): `log2(idx_pages) + selectivity * idx_pages + selectivity * tbl_pages`
1029/// - Index scan (equality): `log2(idx_pages) + log2(tbl_pages)`
1030/// - Covering index scan: `log2(idx_pages) + selectivity * idx_pages`
1031/// - Rowid lookup: `log2(tbl_pages)`
1032///
1033/// This is the legacy entry point that ignores row-count statistics; it is a
1034/// thin wrapper around [`estimate_cost_ext`] with `n_rows = 0` (i.e. no row
1035/// statistics available). When `sqlite_stat1` data has been loaded, prefer
1036/// [`estimate_cost_ext`] so per-row decode/access costs participate in the
1037/// score.
1038#[must_use]
1039pub fn estimate_cost(kind: &AccessPathKind, table_pages: u64, index_pages: u64) -> f64 {
1040    estimate_cost_ext(kind, table_pages, index_pages, 0)
1041}
1042
1043/// Per-row cost added on top of page-level cost for a full table scan.
1044///
1045/// Reflects the VDBE/record-decode overhead per emitted row, tuned to keep
1046/// the scan cost of a tiny page-count table proportional to its row count
1047/// so that ANALYZE-populated stats change the plan meaningfully.
1048const ROW_DECODE_COST: f64 = 0.01;
1049
1050/// Per-row cost added to each table visit from an indexed access path
1051/// (one rowid dereference + row decode).
1052const ROW_ACCESS_COST: f64 = 0.02;
1053
1054/// Estimate the cost (in page reads) for a given access path, optionally
1055/// incorporating the table row count (PLANNER-2).
1056///
1057/// When `n_rows == 0`, this is equivalent to the legacy [`estimate_cost`]
1058/// formulas and the cost is computed purely from page counts. When `n_rows`
1059/// is available (e.g. from `sqlite_stat1` after `ANALYZE`), per-row terms are
1060/// added so that two tables with the same page count but wildly different row
1061/// counts are ranked differently:
1062///
1063/// - Full table scan: `tbl_pages + n_rows * ROW_DECODE_COST`
1064/// - Index equality / range / covering / rowid: the legacy page-level cost
1065///   plus `selectivity * n_rows * ROW_ACCESS_COST` (for equality we use
1066///   `1 / max(1, n_rows)` as the selectivity floor; rowid lookups yield
1067///   exactly one row).
1068#[must_use]
1069pub fn estimate_cost_ext(
1070    kind: &AccessPathKind,
1071    table_pages: u64,
1072    index_pages: u64,
1073    n_rows: u64,
1074) -> f64 {
1075    let tp = table_pages.max(1) as f64;
1076    let ip = index_pages.max(1) as f64;
1077    let nr = n_rows as f64;
1078
1079    let cost = match kind {
1080        AccessPathKind::FullTableScan => nr.mul_add(ROW_DECODE_COST, tp),
1081        AccessPathKind::IndexScanRange { selectivity } => {
1082            let page_cost = ip.log2() + selectivity * ip + selectivity * tp;
1083            (selectivity * nr).mul_add(ROW_ACCESS_COST, page_cost)
1084        }
1085        AccessPathKind::IndexScanEquality => {
1086            // Equality: selectivity ≈ 1 / n_rows (unique) or floor at 1 row.
1087            let page_cost = ip.log2() + tp.log2();
1088            let matched_rows: f64 = if nr > 0.0 { 1.0 } else { 0.0 };
1089            matched_rows.mul_add(ROW_ACCESS_COST, page_cost)
1090        }
1091        AccessPathKind::CoveringIndexScan { selectivity } => {
1092            let page_cost = ip.log2() + selectivity * ip;
1093            // Covering scan still pays per-row decode but avoids the table
1094            // dereference, so use ROW_DECODE_COST (cheaper than ROW_ACCESS).
1095            (selectivity * nr).mul_add(ROW_DECODE_COST, page_cost)
1096        }
1097        AccessPathKind::RowidLookup => {
1098            let page_cost = tp.log2();
1099            let matched_rows: f64 = if nr > 0.0 { 1.0 } else { 0.0 };
1100            matched_rows.mul_add(ROW_ACCESS_COST, page_cost)
1101        }
1102    };
1103
1104    FSQLITE_PLANNER_COST_ESTIMATES_TOTAL.fetch_add(1, Ordering::Relaxed);
1105
1106    tracing::debug!(
1107        target: "fsqlite.planner",
1108        table_pages,
1109        index_pages,
1110        n_rows,
1111        estimated_cost = cost,
1112        actual_method = %access_path_metric_label(kind),
1113        "cost_estimate"
1114    );
1115
1116    cost
1117}
1118
1119// ---------------------------------------------------------------------------
1120// PLANNER-3: join ordering with sqlite_stat1 row-count hints
1121// ---------------------------------------------------------------------------
1122
1123/// A table reference paired with cost-model inputs for join ordering.
1124///
1125/// Used by [`order_join_inputs_with_hints`] to decide the evaluation order of a
1126/// multi-table FROM clause. The `has_stats` flag lets the caller distinguish
1127/// ANALYZE-populated inputs from pure heuristic fallbacks: when every
1128/// reference is marked `has_stats == false`, callers should preserve the
1129/// source order (there is nothing to optimize on).
1130///
1131/// The struct is intentionally minimal so it can be constructed directly by
1132/// `crates/fsqlite-core/src/connection.rs` without pulling in the full
1133/// bound-statement type surface. Wire-up from connection.rs is staged
1134/// separately (see PLANNER-3 follow-up); for now this lives in the planner
1135/// and is exercised via unit tests.
1136#[derive(Debug, Clone, PartialEq, Eq)]
1137pub struct TableRefWithStats {
1138    /// Table name (used only for diagnostics / test assertions).
1139    pub name: String,
1140    /// Estimated B-tree pages — passed to [`estimate_cost_ext`].
1141    pub n_pages: u64,
1142    /// Estimated row count — passed to [`estimate_cost_ext`]. `0` means
1143    /// "no row-count hint available"; see [`Self::has_stats`].
1144    pub n_rows: u64,
1145    /// Whether this input was populated from `sqlite_stat1` / ANALYZE.
1146    /// When false, ordering callers should fall back to source order.
1147    pub has_stats: bool,
1148}
1149
1150impl TableRefWithStats {
1151    /// Construct a `TableRefWithStats` from a [`TableStats`] snapshot.
1152    ///
1153    /// `has_stats` is derived from the stats source: only
1154    /// [`StatsSource::Analyze`] (populated from `sqlite_stat1`) counts as
1155    /// authoritative for join ordering.
1156    #[must_use]
1157    pub fn from_table_stats(stats: &TableStats) -> Self {
1158        Self {
1159            name: stats.name.clone(),
1160            n_pages: stats.n_pages,
1161            n_rows: stats.n_rows,
1162            has_stats: matches!(stats.source, StatsSource::Analyze),
1163        }
1164    }
1165}
1166
1167/// Threshold above which [`order_join_inputs_with_hints`] falls back to a greedy
1168/// smallest-first heuristic instead of exhaustive permutation search.
1169///
1170/// For N ≤ 4 the permutation count is at most 24, which is trivially cheap.
1171/// Beyond that, `N!` grows quickly enough that greedy ordering (sorting by
1172/// full-scan cost) is a better trade for planning latency.
1173const JOIN_ORDER_EXHAUSTIVE_LIMIT: usize = 4;
1174
1175/// Decide a join evaluation order for `tables` using per-table cost hints.
1176///
1177/// Returns a permutation `perm` such that `tables[perm[i]]` is the `i`-th
1178/// table to evaluate. The first entry is typically the smallest relation so
1179/// it can act as the hash-join build side while larger relations probe it.
1180///
1181/// Strategy:
1182/// - If **no** table has `has_stats == true`, the function returns the
1183///   identity permutation (source order). This preserves pre-PLANNER-3
1184///   behavior when ANALYZE has not been run.
1185/// - For `N <= JOIN_ORDER_EXHAUSTIVE_LIMIT` (4), try every permutation and
1186///   pick the one whose summed full-scan cost is minimal. For inner-equi
1187///   hash joins, minimizing the cost of probing smaller-first is a sound
1188///   approximation: the build side pays `n_pages + n_rows * DECODE` once
1189///   and the probe side scans the remaining relations, so sorting by
1190///   ascending cost is equivalent to picking the smallest build side.
1191/// - For `N > JOIN_ORDER_EXHAUSTIVE_LIMIT`, use greedy smallest-first
1192///   ordering (stable sort by per-table full-scan cost).
1193///
1194/// The permutation is stable: ties break on source order, so deterministic
1195/// replays stay reproducible.
1196///
1197/// # Safety / semantics
1198///
1199/// This function is *purely advisory*. It does **not** inspect join
1200/// predicates or join kinds. Callers that reorder LEFT/RIGHT/FULL OUTER
1201/// joins must verify the outer-preservation semantics are still correct
1202/// (typically: only reorder INNER joins). Wire-up in connection.rs will
1203/// gate the reorder behind an inner-join-only check.
1204///
1205/// # Wire-up plan for `connection.rs` (punted — PLANNER-3 follow-up)
1206///
1207/// `try_prepare_simple_join_rows` in `crates/fsqlite-core/src/connection.rs`
1208/// builds `table_sources`, `table_rows`, `join_plans`, `col_map`,
1209/// `projection_indices`, and `col_collations` in **source order**. A safe
1210/// wire would:
1211///
1212/// 1. Early-bail if any `join.join_type.kind != JoinKind::Inner`
1213///    (LEFT/RIGHT/FULL are non-commutative).
1214/// 2. After `table_sources` is populated, call
1215///    `order_join_inputs_with_hints` with a `TableRefWithStats` built from
1216///    `self.sqlite_stat1_row_counts()` (already exposed) plus table-page
1217///    estimates.
1218/// 3. If the returned permutation is non-identity, apply it to:
1219///    - `table_sources` (and the parallel `all_sources`)
1220///    - `col_map` (rebuild — it's derived from `table_sources`)
1221///    - `col_collations` (rebuild — ditto)
1222///    - `projection_indices` (remap via an old→new index table)
1223///    - The join-plan build loop (which currently consumes
1224///      `from.joins[i]` position-by-position): the equi-pair extraction
1225///      looks up columns via `col_map`, so once `col_map` is rebuilt the
1226///      same WHERE-style ON predicates still resolve, but `left_width`
1227///      must accumulate from the permuted `table_sources[..=i]`.
1228///
1229/// That last bullet — rebuilding the join-plan loop against a permuted
1230/// order while still consuming the AST's `from.joins` in source order — is
1231/// why this wire was punted from the initial PLANNER-3 commit. Landing it
1232/// requires either (a) refactoring the planner to carry an explicit join
1233/// tree instead of a flat source list, or (b) a careful single-site
1234/// rewrite with broad test coverage. Neither fit in the PLANNER-3 scope.
1235#[must_use]
1236pub fn order_join_inputs_with_hints(tables: &[TableRefWithStats]) -> Vec<usize> {
1237    let n = tables.len();
1238    if n <= 1 {
1239        return (0..n).collect();
1240    }
1241
1242    // Fallback: no stats anywhere → preserve source order.
1243    if !tables.iter().any(|t| t.has_stats) {
1244        return (0..n).collect();
1245    }
1246
1247    // Per-table full-scan cost: build-side picking minimizes this.
1248    let scan_cost = |idx: usize| -> f64 {
1249        let t = &tables[idx];
1250        estimate_cost_ext(&AccessPathKind::FullTableScan, t.n_pages, 0, t.n_rows)
1251    };
1252
1253    if n <= JOIN_ORDER_EXHAUSTIVE_LIMIT {
1254        // Exhaustive: try every permutation, score by the sum of scan costs
1255        // weighted so that the first (build-side) table dominates. We sum
1256        // `cost[i] * (n - i)` — equivalent to "smaller cost first" but with
1257        // an explicit weighting that mirrors the left-deep probe chain.
1258        let indices: Vec<usize> = (0..n).collect();
1259        let mut best_perm = indices.clone();
1260        let mut best_score = f64::INFINITY;
1261
1262        // Heap's-algorithm-style permutation over a small scratch buffer.
1263        // We don't need a crate — N is at most 4 here, so a recursive helper
1264        // is fine and still O(N!).
1265        let mut scratch = indices.clone();
1266        permute_scoring(
1267            &mut scratch,
1268            0,
1269            n,
1270            &scan_cost,
1271            &mut best_score,
1272            &mut best_perm,
1273        );
1274        best_perm
1275    } else {
1276        // Greedy: stable sort by ascending scan cost. Stable keeps ties in
1277        // source order, which matches the "no stats" fallback for the
1278        // equal-cost region.
1279        let mut indexed: Vec<(usize, f64)> = (0..n).map(|i| (i, scan_cost(i))).collect();
1280        // `sort_by` is stable in std.
1281        indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
1282        indexed.into_iter().map(|(i, _)| i).collect()
1283    }
1284}
1285
1286/// Helper: enumerate every permutation of `slice`, scoring each and tracking
1287/// the cheapest. Uses Heap's algorithm in-place.
1288fn permute_scoring<F>(
1289    slice: &mut [usize],
1290    k: usize,
1291    n: usize,
1292    scan_cost: &F,
1293    best_score: &mut f64,
1294    best_perm: &mut Vec<usize>,
1295) where
1296    F: Fn(usize) -> f64,
1297{
1298    if k == n {
1299        // Score this permutation: sum of cost[slice[i]] * (n - i).
1300        // Lower = better.
1301        let mut score = 0.0_f64;
1302        for (i, &tbl_idx) in slice.iter().enumerate() {
1303            let weight = (n - i) as f64;
1304            score = scan_cost(tbl_idx).mul_add(weight, score);
1305        }
1306        if score < *best_score {
1307            *best_score = score;
1308            best_perm.clear();
1309            best_perm.extend_from_slice(slice);
1310        }
1311        return;
1312    }
1313    for i in k..n {
1314        slice.swap(k, i);
1315        permute_scoring(slice, k + 1, n, scan_cost, best_score, best_perm);
1316        slice.swap(k, i);
1317    }
1318}
1319
1320const ADAPTIVE_HINT_COST_BIAS: f64 = 0.90;
1321
1322struct AccessPathSelectionCounters {
1323    full_table_scan: AtomicU64,
1324    index_scan_range: AtomicU64,
1325    index_scan_equality: AtomicU64,
1326    covering_index_scan: AtomicU64,
1327    rowid_lookup: AtomicU64,
1328}
1329
1330impl AccessPathSelectionCounters {
1331    const fn new() -> Self {
1332        Self {
1333            full_table_scan: AtomicU64::new(0),
1334            index_scan_range: AtomicU64::new(0),
1335            index_scan_equality: AtomicU64::new(0),
1336            covering_index_scan: AtomicU64::new(0),
1337            rowid_lookup: AtomicU64::new(0),
1338        }
1339    }
1340
1341    fn counter_for(&self, kind: &AccessPathKind) -> &AtomicU64 {
1342        match kind {
1343            AccessPathKind::FullTableScan => &self.full_table_scan,
1344            AccessPathKind::IndexScanRange { .. } => &self.index_scan_range,
1345            AccessPathKind::IndexScanEquality => &self.index_scan_equality,
1346            AccessPathKind::CoveringIndexScan { .. } => &self.covering_index_scan,
1347            AccessPathKind::RowidLookup => &self.rowid_lookup,
1348        }
1349    }
1350
1351    fn snapshot(&self) -> BTreeMap<String, u64> {
1352        [
1353            (
1354                "covering_index_scan",
1355                self.covering_index_scan.load(Ordering::Relaxed),
1356            ),
1357            (
1358                "full_table_scan",
1359                self.full_table_scan.load(Ordering::Relaxed),
1360            ),
1361            (
1362                "index_scan_equality",
1363                self.index_scan_equality.load(Ordering::Relaxed),
1364            ),
1365            (
1366                "index_scan_range",
1367                self.index_scan_range.load(Ordering::Relaxed),
1368            ),
1369            ("rowid_lookup", self.rowid_lookup.load(Ordering::Relaxed)),
1370        ]
1371        .into_iter()
1372        .map(|(label, count)| (label.to_owned(), count))
1373        .collect()
1374    }
1375}
1376
1377static INDEX_SELECTION_TOTAL: AccessPathSelectionCounters = AccessPathSelectionCounters::new();
1378
1379// ---------------------------------------------------------------------------
1380// Cost estimation metrics (bd-1as.1)
1381// ---------------------------------------------------------------------------
1382
1383/// Monotonic counter: total cost estimates computed.
1384static FSQLITE_PLANNER_COST_ESTIMATES_TOTAL: AtomicU64 = AtomicU64::new(0);
1385
1386/// Estimation error ratio observations stored as fixed-point
1387/// (ratio × 1000, truncated to u64). Used to compute histogram buckets.
1388static ESTIMATION_ERROR_OBSERVATIONS: LazyLock<Mutex<Vec<f64>>> =
1389    LazyLock::new(|| Mutex::new(Vec::new()));
1390
1391/// Point-in-time snapshot of planner cost metrics.
1392#[derive(Debug, Clone, PartialEq, Default)]
1393pub struct CostMetricsSnapshot {
1394    /// Total number of cost estimates computed.
1395    pub fsqlite_planner_cost_estimates_total: u64,
1396    /// Estimation error ratio observations (actual/estimated).
1397    /// Bucketed: [0, 0.5), [0.5, 1.0), [1.0, 2.0), [2.0, 5.0), [5.0, +inf).
1398    pub error_ratio_buckets: [u64; 5],
1399    /// Mean error ratio (NaN if no observations).
1400    pub error_ratio_mean: f64,
1401}
1402
1403/// Bucket boundaries for the error ratio histogram.
1404const ERROR_RATIO_BOUNDARIES: [f64; 4] = [0.5, 1.0, 2.0, 5.0];
1405
1406/// Take a point-in-time snapshot of cost estimation metrics.
1407#[must_use]
1408pub fn cost_metrics_snapshot() -> CostMetricsSnapshot {
1409    let total = FSQLITE_PLANNER_COST_ESTIMATES_TOTAL.load(Ordering::Relaxed);
1410    let observations = ESTIMATION_ERROR_OBSERVATIONS
1411        .lock()
1412        .unwrap_or_else(std::sync::PoisonError::into_inner);
1413
1414    let mut buckets = [0u64; 5];
1415    let mut sum = 0.0;
1416    for &ratio in observations.iter() {
1417        sum += ratio;
1418        let idx = ERROR_RATIO_BOUNDARIES
1419            .iter()
1420            .position(|&b| ratio < b)
1421            .unwrap_or(4);
1422        buckets[idx] += 1;
1423    }
1424    let mean = if observations.is_empty() {
1425        f64::NAN
1426    } else {
1427        sum / observations.len() as f64
1428    };
1429
1430    CostMetricsSnapshot {
1431        fsqlite_planner_cost_estimates_total: total,
1432        error_ratio_buckets: buckets,
1433        error_ratio_mean: mean,
1434    }
1435}
1436
1437/// Reset cost estimation metrics.
1438pub fn reset_cost_metrics() {
1439    FSQLITE_PLANNER_COST_ESTIMATES_TOTAL.store(0, Ordering::Relaxed);
1440    let mut obs = ESTIMATION_ERROR_OBSERVATIONS
1441        .lock()
1442        .unwrap_or_else(std::sync::PoisonError::into_inner);
1443    obs.clear();
1444}
1445
1446/// Record an estimation error observation (actual_cost / estimated_cost).
1447pub fn record_estimation_error(actual: f64, estimated: f64) {
1448    if estimated <= 0.0 || actual < 0.0 {
1449        return;
1450    }
1451    let ratio = actual / estimated;
1452    {
1453        let mut obs = ESTIMATION_ERROR_OBSERVATIONS
1454            .lock()
1455            .unwrap_or_else(std::sync::PoisonError::into_inner);
1456        obs.push(ratio);
1457    }
1458
1459    tracing::debug!(
1460        actual,
1461        estimated,
1462        ratio,
1463        miscalibrated = !(0.2..=5.0).contains(&ratio),
1464        "planner.estimation_error"
1465    );
1466}
1467
1468/// Decision-theoretic asymmetric loss function for cost estimation.
1469///
1470/// Underestimation (actual > estimated) is penalized more heavily than
1471/// overestimation because underestimation leads to slow queries that miss
1472/// deadlines, while overestimation merely causes slightly suboptimal plans.
1473///
1474/// Loss = if actual > estimated:
1475///     UNDERESTIMATE_PENALTY × (actual/estimated - 1)²  (quadratic)
1476///   else:
1477///     (1 - actual/estimated)                            (linear)
1478const UNDERESTIMATE_PENALTY: f64 = 3.0;
1479
1480/// Compute asymmetric loss between estimated and actual costs.
1481///
1482/// Higher loss for underestimation (surprise slowness) than overestimation.
1483#[must_use]
1484pub fn asymmetric_estimation_loss(estimated: f64, actual: f64) -> f64 {
1485    if estimated <= 0.0 {
1486        return actual; // Degenerate case.
1487    }
1488    let ratio = actual / estimated;
1489    if ratio > 1.0 {
1490        // Underestimate: quadratic penalty.
1491        UNDERESTIMATE_PENALTY * (ratio - 1.0).powi(2)
1492    } else {
1493        // Overestimate: linear penalty.
1494        1.0 - ratio
1495    }
1496}
1497
1498fn access_path_metric_label(kind: &AccessPathKind) -> &'static str {
1499    match kind {
1500        AccessPathKind::FullTableScan => "full_table_scan",
1501        AccessPathKind::IndexScanRange { .. } => "index_scan_range",
1502        AccessPathKind::IndexScanEquality => "index_scan_equality",
1503        AccessPathKind::CoveringIndexScan { .. } => "covering_index_scan",
1504        AccessPathKind::RowidLookup => "rowid_lookup",
1505    }
1506}
1507
1508fn increment_index_selection_total(kind: &AccessPathKind) -> u64 {
1509    INDEX_SELECTION_TOTAL
1510        .counter_for(kind)
1511        .fetch_add(1, Ordering::Relaxed)
1512        + 1
1513}
1514
1515#[must_use]
1516pub fn snapshot_index_selection_totals() -> BTreeMap<String, u64> {
1517    INDEX_SELECTION_TOTAL.snapshot()
1518}
1519
1520fn canonical_table_key(table_name: &str) -> String {
1521    table_name.to_ascii_lowercase()
1522}
1523
1524fn lookup_table_index_hint<'a>(
1525    table_name: &str,
1526    table_index_hints: Option<&'a BTreeMap<String, IndexHint>>,
1527) -> Option<&'a IndexHint> {
1528    table_index_hints.and_then(|hints| hints.get(&canonical_table_key(table_name)))
1529}
1530
1531/// Minimal adaptive hint cache keyed by table name.
1532///
1533/// The planner records the last chosen index for each table and can reuse it as
1534/// a soft preference on subsequent planning passes.
1535#[derive(Debug, Clone, Default, PartialEq, Eq)]
1536pub struct CrackingHintStore {
1537    preferred_index_by_table: HashMap<String, String>,
1538}
1539
1540impl CrackingHintStore {
1541    #[must_use]
1542    pub fn preferred_index(&self, table_name: &str) -> Option<&str> {
1543        self.preferred_index_by_table
1544            .get(&canonical_table_key(table_name))
1545            .map(String::as_str)
1546    }
1547
1548    pub fn record_access_path(&mut self, access_path: &AccessPath) {
1549        if let Some(index_name) = &access_path.index {
1550            self.preferred_index_by_table
1551                .insert(canonical_table_key(&access_path.table), index_name.clone());
1552        }
1553    }
1554}
1555
1556fn collect_table_index_hints_inner(
1557    from_clause: &FromClause,
1558    output: &mut BTreeMap<String, IndexHint>,
1559) {
1560    fn collect_source(source: &TableOrSubquery, output: &mut BTreeMap<String, IndexHint>) {
1561        match source {
1562            TableOrSubquery::Table {
1563                name,
1564                alias,
1565                index_hint,
1566                ..
1567            } => {
1568                if let Some(hint) = index_hint {
1569                    output.insert(canonical_table_key(&name.name), hint.clone());
1570                    if let Some(alias_name) = alias {
1571                        output.insert(canonical_table_key(alias_name), hint.clone());
1572                    }
1573                }
1574            }
1575            TableOrSubquery::ParenJoin(inner) => {
1576                collect_table_index_hints_inner(inner, output);
1577            }
1578            TableOrSubquery::Subquery { .. } | TableOrSubquery::TableFunction { .. } => {}
1579        }
1580    }
1581
1582    collect_source(&from_clause.source, output);
1583    for join in &from_clause.joins {
1584        collect_source(&join.table, output);
1585    }
1586}
1587
1588/// Extract per-table index hints from a FROM clause.
1589///
1590/// Keys are normalized to ASCII-lowercase table names and aliases.
1591#[must_use]
1592pub fn collect_table_index_hints(from_clause: &FromClause) -> BTreeMap<String, IndexHint> {
1593    let mut hints = BTreeMap::new();
1594    collect_table_index_hints_inner(from_clause, &mut hints);
1595    hints
1596}
1597
1598/// Build the cheapest [`AccessPath`] for a table given available indexes and
1599/// WHERE terms. Returns the lowest-cost option.
1600#[must_use]
1601pub fn best_access_path(
1602    table: &TableStats,
1603    indexes: &[IndexInfo],
1604    where_terms: &[WhereTerm<'_>],
1605    needed_columns: Option<&[String]>,
1606) -> AccessPath {
1607    best_access_path_with_hints(table, indexes, where_terms, needed_columns, None, None)
1608}
1609
1610/// Build the cheapest [`AccessPath`] while recognizing schema-provided rowid
1611/// alias columns such as `id INTEGER PRIMARY KEY`.
1612///
1613/// Existing callers that do not have schema metadata should use
1614/// [`best_access_path`]. Schema-aware callers can pass table-local
1615/// [`RowidAliasHint`] values so predicates like `id = ?1` are costed as
1616/// [`AccessPathKind::RowidLookup`] without mutating the classified WHERE terms.
1617#[must_use]
1618pub fn best_access_path_with_rowid_alias_hints(
1619    table: &TableStats,
1620    indexes: &[IndexInfo],
1621    where_terms: &[WhereTerm<'_>],
1622    needed_columns: Option<&[String]>,
1623    rowid_alias_hints: &[RowidAliasHint],
1624) -> AccessPath {
1625    best_access_path_internal(
1626        table,
1627        indexes,
1628        where_terms,
1629        needed_columns,
1630        None,
1631        None,
1632        rowid_alias_hints,
1633    )
1634}
1635
1636/// Build the cheapest [`AccessPath`] while applying explicit index hints and
1637/// optional adaptive cracking hint reuse.
1638#[must_use]
1639pub fn best_access_path_with_hints(
1640    table: &TableStats,
1641    indexes: &[IndexInfo],
1642    where_terms: &[WhereTerm<'_>],
1643    needed_columns: Option<&[String]>,
1644    index_hint: Option<&IndexHint>,
1645    cracking_hints: Option<&mut CrackingHintStore>,
1646) -> AccessPath {
1647    let adaptive_preferred_index = cracking_hints
1648        .as_deref()
1649        .and_then(|store| store.preferred_index(&table.name))
1650        .map(ToOwned::to_owned);
1651
1652    let best = best_access_path_internal(
1653        table,
1654        indexes,
1655        where_terms,
1656        needed_columns,
1657        index_hint,
1658        adaptive_preferred_index.as_deref(),
1659        &[],
1660    );
1661
1662    if let Some(store) = cracking_hints {
1663        store.record_access_path(&best);
1664    }
1665
1666    best
1667}
1668
1669/// Build the cheapest [`AccessPath`] with optional explicit and adaptive hints.
1670#[must_use]
1671#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1672fn best_access_path_internal(
1673    table: &TableStats,
1674    indexes: &[IndexInfo],
1675    where_terms: &[WhereTerm<'_>],
1676    needed_columns: Option<&[String]>,
1677    index_hint: Option<&IndexHint>,
1678    adaptive_preferred_index: Option<&str>,
1679    rowid_alias_hints: &[RowidAliasHint],
1680) -> AccessPath {
1681    // Only pay the clock read when an INFO subscriber will consume the
1682    // `selection_elapsed_us` diagnostic below. The cost-estimation path is
1683    // otherwise allocation- and syscall-free on the per-compile hot loop.
1684    let started = tracing::enabled!(tracing::Level::INFO).then(std::time::Instant::now);
1685    let explicit_indexed_by = match index_hint {
1686        Some(IndexHint::IndexedBy(index_name)) => Some(index_name.as_str()),
1687        _ => None,
1688    };
1689    let not_indexed = matches!(index_hint, Some(IndexHint::NotIndexed));
1690    let rowid_equality_candidate =
1691        find_rowid_equality_term(&table.name, where_terms, rowid_alias_hints).is_some();
1692    // The range branch below is only reached when the equality branch did not
1693    // match, so the range candidate is dead work in the common point-lookup
1694    // case — short-circuit it. When it is needed, probe with the
1695    // allocation-free matcher instead of `find_rowid_range_column`, which
1696    // clones the matched column name only to discard it for this boolean.
1697    // `where_term_matches_rowid_range` already requires a present column, so
1698    // `.any(..)` is equivalent to the previous `.is_some()`.
1699    let rowid_range_candidate = !rowid_equality_candidate
1700        && where_terms
1701            .iter()
1702            .any(|term| where_term_matches_rowid_range(&table.name, term, rowid_alias_hints));
1703
1704    let mut best = if explicit_indexed_by.is_some() {
1705        AccessPath {
1706            table: table.name.clone(),
1707            kind: AccessPathKind::FullTableScan,
1708            index: None,
1709            estimated_cost: f64::INFINITY,
1710            estimated_rows: table.n_rows as f64,
1711            time_travel: None,
1712            probe: None,
1713        }
1714    } else if !not_indexed && rowid_equality_candidate {
1715        let kind = AccessPathKind::RowidLookup;
1716        AccessPath {
1717            table: table.name.clone(),
1718            estimated_cost: estimate_cost_ext(&kind, table.n_pages, 0, table.n_rows),
1719            kind,
1720            index: None,
1721            estimated_rows: 1.0,
1722            time_travel: None,
1723            probe: None,
1724        }
1725    } else if !not_indexed && rowid_range_candidate {
1726        let selectivity = DEFAULT_RANGE_SELECTIVITY;
1727        let kind = AccessPathKind::IndexScanRange { selectivity };
1728        AccessPath {
1729            table: table.name.clone(),
1730            estimated_cost: estimate_cost_ext(&kind, table.n_pages, 0, table.n_rows),
1731            kind,
1732            index: None,
1733            estimated_rows: (table.n_rows as f64 * selectivity).max(1.0),
1734            time_travel: None,
1735            probe: None,
1736        }
1737    } else {
1738        AccessPath {
1739            table: table.name.clone(),
1740            kind: AccessPathKind::FullTableScan,
1741            index: None,
1742            estimated_cost: estimate_cost_ext(
1743                &AccessPathKind::FullTableScan,
1744                table.n_pages,
1745                0,
1746                table.n_rows,
1747            ),
1748            estimated_rows: table.n_rows as f64,
1749            time_travel: None,
1750            probe: None,
1751        }
1752    };
1753
1754    let mut candidates_considered: usize = 0;
1755    let mut partial_indexes_pruned: usize = 0;
1756    let mut hint_filtered_indexes: usize = 0;
1757    let mut skip_scan_candidates: usize = 0;
1758    let mut adaptive_hint_applied = false;
1759    let mut explicit_hint_applied = false;
1760    let mut explicit_hint_missing = explicit_indexed_by.is_some();
1761
1762    // Check each index for usability.
1763    for idx in indexes {
1764        if !idx.table.eq_ignore_ascii_case(&table.name) {
1765            continue;
1766        }
1767        if not_indexed {
1768            hint_filtered_indexes += 1;
1769            continue;
1770        }
1771        if let Some(hinted_name) = explicit_indexed_by {
1772            if !idx.name.eq_ignore_ascii_case(hinted_name) {
1773                hint_filtered_indexes += 1;
1774                continue;
1775            }
1776            explicit_hint_missing = false;
1777        }
1778
1779        // Partial index gate: skip unless the query's WHERE implies the
1780        // index's WHERE predicate. We use a conservative structural check:
1781        // the index predicate must appear as a conjunct in the query WHERE.
1782        if let Some(ref partial_pred) = idx.partial_where {
1783            if !where_terms_imply_predicate(where_terms, partial_pred) {
1784                partial_indexes_pruned += 1;
1785                continue;
1786            }
1787        }
1788
1789        let mut skip_scan_candidate = None;
1790        let usability = match analyze_index_usability(idx, where_terms) {
1791            IndexUsability::NotUsable => {
1792                if let Some(candidate) = analyze_skip_scan_candidate(table, idx, where_terms) {
1793                    skip_scan_candidates += 1;
1794                    skip_scan_candidate = Some(candidate);
1795                    IndexUsability::Range {
1796                        selectivity: candidate.per_probe_selectivity,
1797                    }
1798                } else {
1799                    IndexUsability::NotUsable
1800                }
1801            }
1802            usable => usable,
1803        };
1804
1805        if matches!(usability, IndexUsability::NotUsable) {
1806            continue;
1807        }
1808
1809        candidates_considered += 1;
1810
1811        let is_covering = needed_columns.is_some_and(|needed| {
1812            needed.iter().all(|column| {
1813                idx.columns
1814                    .iter()
1815                    .any(|index_column| index_column.eq_ignore_ascii_case(column))
1816                    // Ordinary SQLite indexes carry the rowid payload, so
1817                    // rowid projections remain index-only even if the rowid
1818                    // alias is not listed in idx.columns.
1819                    || is_rowid_alias_name(column)
1820            })
1821        });
1822
1823        let mut cost_multiplier: f64 = 1.0;
1824        let (kind, mut est_rows) = match usability {
1825            IndexUsability::Equality => {
1826                let rows = if idx.unique {
1827                    1.0
1828                } else {
1829                    (table.n_rows as f64 / 10.0).max(1.0)
1830                };
1831                if is_covering {
1832                    (
1833                        AccessPathKind::CoveringIndexScan {
1834                            selectivity: rows / table.n_rows.max(1) as f64,
1835                        },
1836                        rows,
1837                    )
1838                } else {
1839                    (AccessPathKind::IndexScanEquality, rows)
1840                }
1841            }
1842            IndexUsability::MultiColumnEquality {
1843                eq_columns,
1844                trailing_constraint,
1845            } => {
1846                // Multi-column equality narrows selectivity geometrically.
1847                // Each additional constrained column reduces rows by ~1/10.
1848                let equality_width = eq_columns
1849                    + usize::from(matches!(
1850                        trailing_constraint,
1851                        MultiColumnTrailingConstraint::InExpansion { .. }
1852                    ));
1853                #[allow(clippy::cast_precision_loss)]
1854                let per_probe_rows = if idx.unique
1855                    && equality_width == idx.columns.len()
1856                    && !matches!(
1857                        trailing_constraint,
1858                        MultiColumnTrailingConstraint::Range
1859                            | MultiColumnTrailingConstraint::LikePrefix
1860                    ) {
1861                    1.0
1862                } else {
1863                    let divisor = 10.0_f64.powi(i32::try_from(equality_width).unwrap_or(i32::MAX));
1864                    (table.n_rows as f64 / divisor).max(1.0)
1865                };
1866                let (rows, sel) = match trailing_constraint {
1867                    MultiColumnTrailingConstraint::Range => {
1868                        let range_factor = DEFAULT_RANGE_SELECTIVITY;
1869                        let r = (per_probe_rows * range_factor).max(1.0);
1870                        (
1871                            r,
1872                            range_factor * per_probe_rows / table.n_rows.max(1) as f64,
1873                        )
1874                    }
1875                    MultiColumnTrailingConstraint::LikePrefix => {
1876                        let range_factor = LIKE_PREFIX_SELECTIVITY;
1877                        let r = (per_probe_rows * range_factor).max(1.0);
1878                        (
1879                            r,
1880                            range_factor * per_probe_rows / table.n_rows.max(1) as f64,
1881                        )
1882                    }
1883                    MultiColumnTrailingConstraint::InExpansion { probe_count } => {
1884                        cost_multiplier = probe_count as f64;
1885                        let r =
1886                            (per_probe_rows * probe_count as f64).min(table.n_rows.max(1) as f64);
1887                        (r, r / table.n_rows.max(1) as f64)
1888                    }
1889                    MultiColumnTrailingConstraint::None => {
1890                        (per_probe_rows, per_probe_rows / table.n_rows.max(1) as f64)
1891                    }
1892                };
1893                if is_covering {
1894                    (AccessPathKind::CoveringIndexScan { selectivity: sel }, rows)
1895                } else if matches!(
1896                    trailing_constraint,
1897                    MultiColumnTrailingConstraint::Range
1898                        | MultiColumnTrailingConstraint::LikePrefix
1899                ) {
1900                    (AccessPathKind::IndexScanRange { selectivity: sel }, rows)
1901                } else {
1902                    (AccessPathKind::IndexScanEquality, rows)
1903                }
1904            }
1905            IndexUsability::Range { selectivity } => {
1906                let rows = (selectivity * table.n_rows as f64).max(1.0);
1907                if is_covering {
1908                    (AccessPathKind::CoveringIndexScan { selectivity }, rows)
1909                } else {
1910                    (AccessPathKind::IndexScanRange { selectivity }, rows)
1911                }
1912            }
1913            IndexUsability::InExpansion { probe_count } => {
1914                // Each probe is like an equality lookup; total cost
1915                // and rows are scaled by the number of probes.
1916                let per_probe_rows: f64 = if idx.unique {
1917                    1.0
1918                } else {
1919                    (table.n_rows as f64 / 10.0).max(1.0)
1920                };
1921                let rows = per_probe_rows * probe_count as f64;
1922                cost_multiplier = probe_count as f64;
1923                (AccessPathKind::IndexScanEquality, rows)
1924            }
1925            IndexUsability::LikePrefix { .. } => {
1926                let selectivity = LIKE_PREFIX_SELECTIVITY;
1927                let rows = (selectivity * table.n_rows as f64).max(1.0);
1928                if is_covering {
1929                    (AccessPathKind::CoveringIndexScan { selectivity }, rows)
1930                } else {
1931                    (AccessPathKind::IndexScanRange { selectivity }, rows)
1932                }
1933            }
1934            IndexUsability::NotUsable => unreachable!(),
1935        };
1936
1937        if let Some(candidate) = skip_scan_candidate {
1938            let probe_multiplier =
1939                (candidate.leading_probes * candidate.trailing_probe_count) as f64;
1940            cost_multiplier *= probe_multiplier;
1941            est_rows = (est_rows * probe_multiplier).min(table.n_rows.max(1) as f64);
1942        }
1943
1944        let mut cost =
1945            estimate_cost_ext(&kind, table.n_pages, idx.n_pages, table.n_rows) * cost_multiplier;
1946
1947        if let Some(hinted_name) = explicit_indexed_by {
1948            if idx.name.eq_ignore_ascii_case(hinted_name) {
1949                // Respect explicit INDEXED BY by strongly preferring that index.
1950                cost *= 0.01;
1951                explicit_hint_applied = true;
1952            }
1953        } else if let Some(adaptive_hint) = adaptive_preferred_index {
1954            if idx.name.eq_ignore_ascii_case(adaptive_hint) {
1955                cost *= ADAPTIVE_HINT_COST_BIAS;
1956                adaptive_hint_applied = true;
1957            }
1958        }
1959
1960        if cost < best.estimated_cost {
1961            best = AccessPath {
1962                table: table.name.clone(),
1963                kind,
1964                index: Some(idx.name.clone()),
1965                estimated_cost: cost,
1966                estimated_rows: est_rows,
1967                time_travel: None,
1968                probe: None,
1969            };
1970        }
1971    }
1972
1973    if !best.estimated_cost.is_finite() {
1974        best = AccessPath {
1975            table: table.name.clone(),
1976            kind: AccessPathKind::FullTableScan,
1977            index: None,
1978            estimated_cost: estimate_cost_ext(
1979                &AccessPathKind::FullTableScan,
1980                table.n_pages,
1981                0,
1982                table.n_rows,
1983            ),
1984            estimated_rows: table.n_rows as f64,
1985            time_travel: None,
1986            probe: None,
1987        };
1988    }
1989
1990    best.probe = extract_access_path_probe_with_rowid_aliases(
1991        &best,
1992        indexes,
1993        where_terms,
1994        rowid_alias_hints,
1995    );
1996
1997    // The index-selection metric counter is a real always-on metric: it must
1998    // increment for every planning decision regardless of tracing config.
1999    let metric_total = increment_index_selection_total(&best.kind);
2000
2001    // The structured `index_select` span/event below is the only consumer of
2002    // three `std::env::var` lookups (each a global env lock + heap String), a
2003    // `format!`/`to_owned` hint label, and the `Instant` clock read above.
2004    // None of that work is observable unless an INFO subscriber is listening,
2005    // so gate it behind a cheap level check. When INFO is enabled the emitted
2006    // diagnostics are identical to before.
2007    if tracing::enabled!(tracing::Level::INFO) {
2008        let chosen_index = best.index.as_deref().unwrap_or("(none)");
2009        let selectivity = match &best.kind {
2010            AccessPathKind::IndexScanRange { selectivity }
2011            | AccessPathKind::CoveringIndexScan { selectivity } => *selectivity,
2012            AccessPathKind::IndexScanEquality | AccessPathKind::RowidLookup => {
2013                best.estimated_rows / table.n_rows.max(1) as f64
2014            }
2015            AccessPathKind::FullTableScan => 1.0,
2016        };
2017        let metric_index_type = access_path_metric_label(&best.kind);
2018        let explicit_hint = match index_hint {
2019            Some(IndexHint::IndexedBy(index_name)) => format!("indexed_by:{index_name}"),
2020            Some(IndexHint::NotIndexed) => "not_indexed".to_owned(),
2021            None => "(none)".to_owned(),
2022        };
2023        let run_id = std::env::var("RUN_ID").unwrap_or_else(|_| "(none)".to_owned());
2024        let trace_id = std::env::var("TRACE_ID")
2025            .ok()
2026            .and_then(|value| value.parse::<u64>().ok())
2027            .unwrap_or(0);
2028        let scenario_id = std::env::var("SCENARIO_ID").unwrap_or_else(|_| "(none)".to_owned());
2029        let selection_elapsed_us = started.map_or(1, |start| start.elapsed().as_micros().max(1));
2030        let adaptive_hint = adaptive_preferred_index.unwrap_or("(none)");
2031        let hint_applied = explicit_hint_applied || adaptive_hint_applied;
2032        let span = tracing::info_span!(
2033            "index_select",
2034            run_id = %run_id,
2035            trace_id,
2036            scenario_id = %scenario_id,
2037            table = %table.name,
2038            explicit_hint = %explicit_hint,
2039            adaptive_hint = %adaptive_hint,
2040            candidates = candidates_considered,
2041            partial_pruned = partial_indexes_pruned,
2042            hint_filtered = hint_filtered_indexes,
2043            skip_scan_candidates
2044        );
2045        let _span_guard = span.enter();
2046
2047        tracing::info!(
2048            table = %table.name,
2049            candidates = candidates_considered,
2050            chosen_index = %chosen_index,
2051            estimated_selectivity = selectivity,
2052            access_path = %access_path_kind_label(&best.kind),
2053            estimated_cost = best.estimated_cost,
2054            estimated_rows = best.estimated_rows,
2055            selection_elapsed_us,
2056            run_id = %run_id,
2057            trace_id,
2058            scenario_id = %scenario_id,
2059            index_type = metric_index_type,
2060            fsqlite_index_selection_total = metric_total,
2061            hint_applied,
2062            explicit_hint_missing,
2063            "planner.index_select.choice"
2064        );
2065    }
2066
2067    best
2068}
2069
2070/// Check if the WHERE terms collectively imply a partial index predicate.
2071///
2072/// This is intentionally stronger than plain structural equality for common
2073/// partial-index predicates. It accepts exact conjunct matches, commuted
2074/// equality/range comparisons, stronger range bounds on the same column, and
2075/// non-NULL comparisons implying `IS NOT NULL`.
2076fn where_terms_imply_predicate(terms: &[WhereTerm<'_>], predicate: &Expr) -> bool {
2077    let pred_conjuncts = decompose_where(predicate);
2078    pred_conjuncts.iter().all(|predicate_conjunct| {
2079        terms
2080            .iter()
2081            .any(|term| expr_implies_partial_predicate(term.expr, predicate_conjunct))
2082    })
2083}
2084
2085fn expr_implies_partial_predicate(query_expr: &Expr, predicate: &Expr) -> bool {
2086    if query_expr == predicate {
2087        return true;
2088    }
2089
2090    if let Some(predicate_column) = normalize_is_not_null_predicate(predicate) {
2091        return expr_guarantees_non_null(query_expr, &predicate_column);
2092    }
2093
2094    match (
2095        normalize_column_literal_comparison(query_expr),
2096        normalize_column_literal_comparison(predicate),
2097    ) {
2098        (Some(query_cmp), Some(predicate_cmp)) => query_cmp.implies(&predicate_cmp),
2099        _ => false,
2100    }
2101}
2102
2103#[derive(Debug, Clone, PartialEq)]
2104struct NormalizedColumnComparison {
2105    column: WhereColumn,
2106    op: AstBinaryOp,
2107    literal: Literal,
2108}
2109
2110impl NormalizedColumnComparison {
2111    fn implies(&self, predicate: &Self) -> bool {
2112        if !where_columns_compatible(&self.column, &predicate.column) {
2113            return false;
2114        }
2115
2116        let Some(ordering) = compare_partial_index_literals(&self.literal, &predicate.literal)
2117        else {
2118            return false;
2119        };
2120
2121        match self.op {
2122            AstBinaryOp::Eq => literal_satisfies_predicate_literal(ordering, predicate.op),
2123            AstBinaryOp::Gt => {
2124                matches!(predicate.op, AstBinaryOp::Gt | AstBinaryOp::Ge)
2125                    && matches!(
2126                        ordering,
2127                        std::cmp::Ordering::Greater | std::cmp::Ordering::Equal
2128                    )
2129            }
2130            AstBinaryOp::Ge => match predicate.op {
2131                AstBinaryOp::Gt => matches!(ordering, std::cmp::Ordering::Greater),
2132                AstBinaryOp::Ge => matches!(
2133                    ordering,
2134                    std::cmp::Ordering::Greater | std::cmp::Ordering::Equal
2135                ),
2136                _ => false,
2137            },
2138            AstBinaryOp::Lt => {
2139                matches!(predicate.op, AstBinaryOp::Lt | AstBinaryOp::Le)
2140                    && matches!(
2141                        ordering,
2142                        std::cmp::Ordering::Less | std::cmp::Ordering::Equal
2143                    )
2144            }
2145            AstBinaryOp::Le => match predicate.op {
2146                AstBinaryOp::Lt => matches!(ordering, std::cmp::Ordering::Less),
2147                AstBinaryOp::Le => matches!(
2148                    ordering,
2149                    std::cmp::Ordering::Less | std::cmp::Ordering::Equal
2150                ),
2151                _ => false,
2152            },
2153            _ => false,
2154        }
2155    }
2156}
2157
2158fn literal_satisfies_predicate_literal(
2159    ordering: std::cmp::Ordering,
2160    predicate_op: AstBinaryOp,
2161) -> bool {
2162    match predicate_op {
2163        AstBinaryOp::Eq => matches!(ordering, std::cmp::Ordering::Equal),
2164        AstBinaryOp::Gt => matches!(ordering, std::cmp::Ordering::Greater),
2165        AstBinaryOp::Ge => matches!(
2166            ordering,
2167            std::cmp::Ordering::Greater | std::cmp::Ordering::Equal
2168        ),
2169        AstBinaryOp::Lt => matches!(ordering, std::cmp::Ordering::Less),
2170        AstBinaryOp::Le => matches!(
2171            ordering,
2172            std::cmp::Ordering::Less | std::cmp::Ordering::Equal
2173        ),
2174        _ => false,
2175    }
2176}
2177
2178fn expr_guarantees_non_null(expr: &Expr, predicate_column: &WhereColumn) -> bool {
2179    if let Some(query_cmp) = normalize_column_literal_comparison(expr) {
2180        return where_columns_compatible(&query_cmp.column, predicate_column)
2181            && !matches!(query_cmp.literal, Literal::Null);
2182    }
2183
2184    if let Some((column, _)) = classify_or_disjunction_as_in_list(expr) {
2185        return where_columns_compatible(&column, predicate_column);
2186    }
2187
2188    match expr {
2189        Expr::Between { expr: inner, .. }
2190        | Expr::In { expr: inner, .. }
2191        | Expr::Like { expr: inner, .. } => extract_where_column(inner)
2192            .is_some_and(|column| where_columns_compatible(&column, predicate_column)),
2193        Expr::IsNull {
2194            expr: inner,
2195            not: true,
2196            ..
2197        } => extract_where_column(inner)
2198            .is_some_and(|column| where_columns_compatible(&column, predicate_column)),
2199        _ => false,
2200    }
2201}
2202
2203fn normalize_is_not_null_predicate(expr: &Expr) -> Option<WhereColumn> {
2204    let Expr::IsNull {
2205        expr: inner,
2206        not: true,
2207        ..
2208    } = expr
2209    else {
2210        return None;
2211    };
2212    extract_where_column(inner)
2213}
2214
2215fn normalize_column_literal_comparison(expr: &Expr) -> Option<NormalizedColumnComparison> {
2216    let Expr::BinaryOp {
2217        left,
2218        op: AstBinaryOp::Eq | AstBinaryOp::Lt | AstBinaryOp::Le | AstBinaryOp::Gt | AstBinaryOp::Ge,
2219        right,
2220        ..
2221    } = expr
2222    else {
2223        return None;
2224    };
2225
2226    if let (Some(column), Expr::Literal(literal, _)) = (extract_where_column(left), right.as_ref())
2227    {
2228        return Some(NormalizedColumnComparison {
2229            column,
2230            op: match expr {
2231                Expr::BinaryOp { op, .. } => *op,
2232                _ => unreachable!(),
2233            },
2234            literal: literal.clone(),
2235        });
2236    }
2237
2238    if let (Expr::Literal(literal, _), Some(column)) = (left.as_ref(), extract_where_column(right))
2239    {
2240        return Some(NormalizedColumnComparison {
2241            column,
2242            op: reverse_comparison_op(match expr {
2243                Expr::BinaryOp { op, .. } => *op,
2244                _ => unreachable!(),
2245            })?,
2246            literal: literal.clone(),
2247        });
2248    }
2249
2250    None
2251}
2252
2253fn reverse_comparison_op(op: AstBinaryOp) -> Option<AstBinaryOp> {
2254    match op {
2255        AstBinaryOp::Eq => Some(AstBinaryOp::Eq),
2256        AstBinaryOp::Lt => Some(AstBinaryOp::Gt),
2257        AstBinaryOp::Le => Some(AstBinaryOp::Ge),
2258        AstBinaryOp::Gt => Some(AstBinaryOp::Lt),
2259        AstBinaryOp::Ge => Some(AstBinaryOp::Le),
2260        _ => None,
2261    }
2262}
2263
2264fn compare_partial_index_literals(left: &Literal, right: &Literal) -> Option<std::cmp::Ordering> {
2265    match (left, right) {
2266        (Literal::Integer(lhs), Literal::Integer(rhs)) => Some(lhs.cmp(rhs)),
2267        (Literal::Float(lhs), Literal::Float(rhs)) => lhs.partial_cmp(rhs),
2268        (Literal::Integer(lhs), Literal::Float(rhs)) => (*lhs as f64).partial_cmp(rhs),
2269        (Literal::Float(lhs), Literal::Integer(rhs)) => lhs.partial_cmp(&(*rhs as f64)),
2270        (Literal::String(lhs), Literal::String(rhs)) => Some(lhs.cmp(rhs)),
2271        _ => None,
2272    }
2273}
2274
2275fn where_columns_compatible(left: &WhereColumn, right: &WhereColumn) -> bool {
2276    left.column.eq_ignore_ascii_case(&right.column)
2277        && match (&left.table, &right.table) {
2278            (Some(lhs), Some(rhs)) => lhs.eq_ignore_ascii_case(rhs),
2279            _ => true,
2280        }
2281}
2282
2283// ---------------------------------------------------------------------------
2284// Index usability analysis (§10.5)
2285// ---------------------------------------------------------------------------
2286
2287/// Result of analyzing a WHERE term against an index.
2288#[derive(Debug, Clone, PartialEq)]
2289#[allow(clippy::derive_partial_eq_without_eq)]
2290pub enum IndexUsability {
2291    /// Index can satisfy an equality constraint on its leftmost column.
2292    Equality,
2293    /// Multi-column equality prefix: equality on the first `eq_columns` index
2294    /// columns, optionally followed by an additional constraint on the next
2295    /// column.
2296    MultiColumnEquality {
2297        /// Number of leading columns with equality constraints.
2298        eq_columns: usize,
2299        /// Constraint on the column immediately after the equality prefix.
2300        trailing_constraint: MultiColumnTrailingConstraint,
2301    },
2302    /// Index can satisfy a range constraint (rightmost usable position).
2303    Range { selectivity: f64 },
2304    /// `IN (...)` expanded to multiple equality probes.
2305    InExpansion { probe_count: usize },
2306    /// `LIKE`/`GLOB` with a constant prefix and derived upper bound.
2307    /// Represents the range: `column >= low` and optionally `column < high`.
2308    LikePrefix { low: String, high: Option<String> },
2309    /// The term cannot use this index.
2310    NotUsable,
2311}
2312
2313#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2314pub enum MultiColumnTrailingConstraint {
2315    None,
2316    Range,
2317    InExpansion { probe_count: usize },
2318    LikePrefix,
2319}
2320
2321#[derive(Debug, Clone, Copy, PartialEq)]
2322struct SkipScanCandidate {
2323    leading_probes: usize,
2324    trailing_probe_count: usize,
2325    per_probe_selectivity: f64,
2326}
2327
2328#[derive(Debug, Clone, Default, PartialEq, Eq)]
2329struct IndexColumnTermSummary {
2330    has_equality: bool,
2331    first_in_probe_count: Option<usize>,
2332    has_range: bool,
2333    first_like_prefix: Option<(String, Option<String>)>,
2334}
2335
2336/// A decomposed WHERE term with the column it references (if any).
2337#[derive(Debug, Clone)]
2338pub struct WhereTerm<'a> {
2339    /// The original expression.
2340    pub expr: &'a Expr,
2341    /// The column referenced on the left side (if this is a simple comparison).
2342    pub column: Option<WhereColumn>,
2343    /// The kind of constraint.
2344    pub kind: WhereTermKind,
2345}
2346
2347/// The column side of a WHERE comparison.
2348#[derive(Debug, Clone, PartialEq, Eq)]
2349pub struct WhereColumn {
2350    /// Optional table qualifier.
2351    pub table: Option<String>,
2352    /// Column name.
2353    pub column: String,
2354}
2355
2356/// Classification of a WHERE term for index usability.
2357#[derive(Debug, Clone, PartialEq, Eq)]
2358pub enum WhereTermKind {
2359    /// `col = expr`
2360    Equality,
2361    /// `col > expr`, `col >= expr`, `col < expr`, `col <= expr`
2362    Range,
2363    /// `col BETWEEN low AND high`
2364    Between,
2365    /// `col IN (...)`
2366    InList { count: usize },
2367    /// `col LIKE 'prefix%'` or `col GLOB 'prefix*'`, rewritten as
2368    /// `col >= prefix AND col < upper_bound`.
2369    LikePrefix {
2370        prefix: String,
2371        upper_bound: Option<String>,
2372    },
2373    /// Rowid equality: `rowid = expr` or `_rowid_ = expr` or `oid = expr`
2374    RowidEquality,
2375    /// Any other expression (not directly usable for index lookup).
2376    Other,
2377}
2378
2379/// Decompose a WHERE clause into individual conjuncts (AND-separated terms).
2380#[must_use]
2381pub fn decompose_where(expr: &Expr) -> Vec<&Expr> {
2382    let mut terms = Vec::new();
2383    collect_conjuncts(expr, &mut terms);
2384    terms
2385}
2386
2387fn collect_conjuncts<'a>(expr: &'a Expr, out: &mut Vec<&'a Expr>) {
2388    if let Expr::BinaryOp {
2389        left,
2390        op: AstBinaryOp::And,
2391        right,
2392        ..
2393    } = expr
2394    {
2395        collect_conjuncts(left, out);
2396        collect_conjuncts(right, out);
2397    } else {
2398        out.push(expr);
2399    }
2400}
2401
2402fn collect_disjuncts<'a>(expr: &'a Expr, out: &mut Vec<&'a Expr>) {
2403    if let Expr::BinaryOp {
2404        left,
2405        op: AstBinaryOp::Or,
2406        right,
2407        ..
2408    } = expr
2409    {
2410        collect_disjuncts(left, out);
2411        collect_disjuncts(right, out);
2412    } else {
2413        out.push(expr);
2414    }
2415}
2416
2417fn where_columns_equivalent(left: &WhereColumn, right: &WhereColumn) -> bool {
2418    left.column.eq_ignore_ascii_case(&right.column)
2419        && match (&left.table, &right.table) {
2420            (Some(l), Some(r)) => l.eq_ignore_ascii_case(r),
2421            (None, None) => true,
2422            _ => false,
2423        }
2424}
2425
2426fn classify_or_disjunction_as_in_list(expr: &Expr) -> Option<(WhereColumn, usize)> {
2427    let mut disjuncts = Vec::new();
2428    collect_disjuncts(expr, &mut disjuncts);
2429    if disjuncts.len() < 2 {
2430        return None;
2431    }
2432
2433    let mut shared_column: Option<WhereColumn> = None;
2434
2435    for disjunct in disjuncts.iter().copied() {
2436        let Expr::BinaryOp {
2437            left,
2438            op: AstBinaryOp::Eq,
2439            right,
2440            ..
2441        } = disjunct
2442        else {
2443            return None;
2444        };
2445
2446        let column = match (extract_where_column(left), extract_where_column(right)) {
2447            (Some(column), None) | (None, Some(column)) => column,
2448            _ => return None,
2449        };
2450
2451        if is_rowid_column(&column) {
2452            return None;
2453        }
2454
2455        if let Some(ref existing) = shared_column {
2456            if !where_columns_equivalent(existing, &column) {
2457                return None;
2458            }
2459        } else {
2460            shared_column = Some(column);
2461        }
2462    }
2463
2464    shared_column.map(|column| (column, disjuncts.len()))
2465}
2466
2467/// Classify a single WHERE expression into a [`WhereTerm`].
2468#[must_use]
2469#[allow(clippy::too_many_lines)]
2470pub fn classify_where_term(expr: &Expr) -> WhereTerm<'_> {
2471    match expr {
2472        // (col = v1) OR (col = v2) OR ... => treat as IN-list probe expansion.
2473        Expr::BinaryOp {
2474            op: AstBinaryOp::Or,
2475            ..
2476        } => {
2477            if let Some((column, probe_count)) = classify_or_disjunction_as_in_list(expr) {
2478                tracing::debug!(
2479                    target: "fsqlite.planner",
2480                    rewrite = "or_disjunction_to_in_list",
2481                    column = ?column,
2482                    probe_count,
2483                    "planner.where_term.rewrite"
2484                );
2485                return WhereTerm {
2486                    expr,
2487                    column: Some(column),
2488                    kind: WhereTermKind::InList { count: probe_count },
2489                };
2490            }
2491
2492            WhereTerm {
2493                expr,
2494                column: None,
2495                kind: WhereTermKind::Other,
2496            }
2497        }
2498
2499        // col = expr or expr = col
2500        //
2501        // `col = NULL` is special-cased: in SQL, `x = NULL` evaluates to NULL
2502        // (unknown), never TRUE, so it cannot drive an index seek or equality
2503        // constraint.  Classify it as Other instead of Equality.
2504        Expr::BinaryOp {
2505            left,
2506            op: AstBinaryOp::Eq,
2507            right,
2508            ..
2509        } => {
2510            if matches!(left.as_ref(), Expr::Literal(Literal::Null, _))
2511                || matches!(right.as_ref(), Expr::Literal(Literal::Null, _))
2512            {
2513                return WhereTerm {
2514                    expr,
2515                    column: None,
2516                    kind: WhereTermKind::Other,
2517                };
2518            }
2519            if let Some(wc) = extract_where_column(left) {
2520                if is_rowid_column(&wc) {
2521                    return WhereTerm {
2522                        expr,
2523                        column: Some(wc),
2524                        kind: WhereTermKind::RowidEquality,
2525                    };
2526                }
2527                return WhereTerm {
2528                    expr,
2529                    column: Some(wc),
2530                    kind: WhereTermKind::Equality,
2531                };
2532            }
2533            if let Some(wc) = extract_where_column(right) {
2534                if is_rowid_column(&wc) {
2535                    return WhereTerm {
2536                        expr,
2537                        column: Some(wc),
2538                        kind: WhereTermKind::RowidEquality,
2539                    };
2540                }
2541                return WhereTerm {
2542                    expr,
2543                    column: Some(wc),
2544                    kind: WhereTermKind::Equality,
2545                };
2546            }
2547            WhereTerm {
2548                expr,
2549                column: None,
2550                kind: WhereTermKind::Other,
2551            }
2552        }
2553
2554        // col < expr, col <= expr, col > expr, col >= expr
2555        // Also handles reversed forms like `5 < col` by checking both sides.
2556        Expr::BinaryOp {
2557            left,
2558            op: AstBinaryOp::Lt | AstBinaryOp::Le | AstBinaryOp::Gt | AstBinaryOp::Ge,
2559            right,
2560            ..
2561        } => {
2562            let column = extract_where_column(left).or_else(|| extract_where_column(right));
2563            WhereTerm {
2564                expr,
2565                column,
2566                kind: WhereTermKind::Range,
2567            }
2568        }
2569
2570        // col BETWEEN low AND high
2571        Expr::Between {
2572            expr: inner, not, ..
2573        } if !not => {
2574            let column = extract_where_column(inner);
2575            WhereTerm {
2576                expr,
2577                column,
2578                kind: WhereTermKind::Between,
2579            }
2580        }
2581
2582        // col IN (...)
2583        Expr::In {
2584            expr: inner,
2585            set,
2586            not,
2587            ..
2588        } if !not => {
2589            let column = extract_where_column(inner);
2590            let count = match set {
2591                InSet::List(items) => items.len(),
2592                InSet::Subquery(_) | InSet::Table(_) => 10, // Heuristic
2593            };
2594            WhereTerm {
2595                expr,
2596                column,
2597                kind: WhereTermKind::InList { count },
2598            }
2599        }
2600
2601        // col GLOB 'prefix*' or col LIKE 'prefix%' — prefix-to-range optimisation.
2602        //
2603        // GLOB is always case-sensitive, so prefix extraction is always safe.
2604        //
2605        // LIKE is case-INSENSITIVE by default (for ASCII), so converting
2606        // `col LIKE 'abc%'` into the range `col >= 'abc' AND col < 'abd'`
2607        // would miss rows like 'ABC…'. The optimisation is only safe when:
2608        //   (a) PRAGMA case_sensitive_like = ON, OR
2609        //   (b) The column has BINARY collation
2610        //
2611        // Until collation/pragma state is wired through the planner, we still
2612        // have one sound subset we can lower today: prefixes with no ASCII
2613        // letters. SQLite's default LIKE only case-folds ASCII, so those
2614        // prefixes are already case-stable.
2615        Expr::Like {
2616            expr: inner,
2617            pattern,
2618            op,
2619            not,
2620            escape,
2621            ..
2622        } if !not => {
2623            let column = extract_where_column(inner);
2624            let (prefix, operator) = match op {
2625                LikeOp::Glob => (extract_glob_prefix(pattern), "GLOB"),
2626                LikeOp::Like => {
2627                    let prefix = extract_like_prefix(pattern, escape.as_deref())
2628                        .filter(|prefix| is_like_prefix_safe_for_column(column.as_ref(), prefix));
2629                    (prefix, "LIKE")
2630                }
2631                // Match and Regexp are not optimizable via prefix-to-range.
2632                LikeOp::Match | LikeOp::Regexp => (None, "MATCH/REGEXP"),
2633            };
2634            if let Some(pfx) = prefix {
2635                let upper_bound = like_prefix_upper_bound(&pfx);
2636                tracing::debug!(
2637                    target: "fsqlite.planner",
2638                    rewrite = "pattern_prefix_to_range",
2639                    operator,
2640                    column = ?column,
2641                    prefix = %pfx,
2642                    upper_bound = ?upper_bound,
2643                    "planner.where_term.rewrite"
2644                );
2645                WhereTerm {
2646                    expr,
2647                    column,
2648                    kind: WhereTermKind::LikePrefix {
2649                        upper_bound,
2650                        prefix: pfx,
2651                    },
2652                }
2653            } else {
2654                WhereTerm {
2655                    expr,
2656                    column,
2657                    kind: WhereTermKind::Other,
2658                }
2659            }
2660        }
2661
2662        _ => WhereTerm {
2663            expr,
2664            column: None,
2665            kind: WhereTermKind::Other,
2666        },
2667    }
2668}
2669
2670/// Extract a `WhereColumn` from an expression if it's a simple column reference.
2671fn extract_where_column(expr: &Expr) -> Option<WhereColumn> {
2672    if let Expr::Column(col_ref, _) = expr {
2673        Some(WhereColumn {
2674            table: col_ref.table.as_ref().map(ToString::to_string),
2675            column: col_ref.column.to_string(),
2676        })
2677    } else {
2678        None
2679    }
2680}
2681
2682/// Check if a `WhereColumn` is a rowid alias.
2683fn is_rowid_column(wc: &WhereColumn) -> bool {
2684    is_rowid_alias_name(&wc.column)
2685}
2686
2687fn where_term_matches_rowid_equality(
2688    table_name: &str,
2689    term: &WhereTerm<'_>,
2690    rowid_alias_hints: &[RowidAliasHint],
2691) -> bool {
2692    if matches!(term.kind, WhereTermKind::RowidEquality) {
2693        return true;
2694    }
2695
2696    matches!(term.kind, WhereTermKind::Equality)
2697        && term.column.as_ref().is_some_and(|column| {
2698            rowid_alias_hints
2699                .iter()
2700                .any(|hint| hint.matches_column(table_name, column))
2701        })
2702}
2703
2704fn where_term_matches_rowid_range(
2705    table_name: &str,
2706    term: &WhereTerm<'_>,
2707    rowid_alias_hints: &[RowidAliasHint],
2708) -> bool {
2709    matches!(term.kind, WhereTermKind::Range | WhereTermKind::Between)
2710        && term.column.as_ref().is_some_and(|column| {
2711            is_rowid_column(column)
2712                || rowid_alias_hints
2713                    .iter()
2714                    .any(|hint| hint.matches_column(table_name, column))
2715        })
2716}
2717
2718fn find_rowid_equality_term<'terms, 'expr>(
2719    table_name: &str,
2720    terms: &'terms [WhereTerm<'expr>],
2721    rowid_alias_hints: &[RowidAliasHint],
2722) -> Option<&'terms WhereTerm<'expr>> {
2723    terms
2724        .iter()
2725        .find(|term| where_term_matches_rowid_equality(table_name, term, rowid_alias_hints))
2726}
2727
2728fn find_rowid_range_column(
2729    table_name: &str,
2730    terms: &[WhereTerm<'_>],
2731    rowid_alias_hints: &[RowidAliasHint],
2732) -> Option<String> {
2733    terms.iter().find_map(|term| {
2734        where_term_matches_rowid_range(table_name, term, rowid_alias_hints)
2735            .then(|| term.column.as_ref().map(|column| column.column.clone()))
2736            .flatten()
2737    })
2738}
2739
2740/// Extract the non-column side of a binary comparison expression.
2741fn extract_comparison_operand(expr: &Expr) -> Option<Expr> {
2742    let Expr::BinaryOp { left, right, .. } = expr else {
2743        return None;
2744    };
2745    if extract_where_column(left).is_some() {
2746        Some(right.as_ref().clone())
2747    } else if extract_where_column(right).is_some() {
2748        Some(left.as_ref().clone())
2749    } else {
2750        None
2751    }
2752}
2753
2754/// Given a finalized [`AccessPath`] and the WHERE terms that produced it,
2755/// extract probe expressions so downstream consumers do not re-parse the
2756/// WHERE clause.
2757fn extract_access_path_probe_with_rowid_aliases(
2758    best: &AccessPath,
2759    indexes: &[IndexInfo],
2760    where_terms: &[WhereTerm<'_>],
2761    rowid_alias_hints: &[RowidAliasHint],
2762) -> Option<AccessPathProbe> {
2763    match &best.kind {
2764        AccessPathKind::FullTableScan => None,
2765        AccessPathKind::RowidLookup => {
2766            let term = find_rowid_equality_term(&best.table, where_terms, rowid_alias_hints)?;
2767            let target = extract_comparison_operand(term.expr)?;
2768            Some(AccessPathProbe::RowidEquality {
2769                target: Box::new(target),
2770            })
2771        }
2772        AccessPathKind::IndexScanEquality => {
2773            let index_name = best.index.as_deref()?;
2774            let idx = indexes
2775                .iter()
2776                .find(|i| i.name.eq_ignore_ascii_case(index_name))?;
2777            let leading_col = idx.columns.first()?;
2778            if let Some(term) = where_terms.iter().find(|t| {
2779                matches!(t.kind, WhereTermKind::Equality)
2780                    && t.column
2781                        .as_ref()
2782                        .is_some_and(|c| c.column.eq_ignore_ascii_case(leading_col))
2783            }) {
2784                let target = extract_comparison_operand(term.expr)?;
2785                return Some(AccessPathProbe::Equality {
2786                    column: leading_col.clone(),
2787                    target: Box::new(target),
2788                });
2789            }
2790            if let Some(term) = where_terms.iter().find(|t| {
2791                matches!(t.kind, WhereTermKind::InList { .. })
2792                    && t.column
2793                        .as_ref()
2794                        .is_some_and(|c| c.column.eq_ignore_ascii_case(leading_col))
2795            }) {
2796                return extract_in_list_probe(term.expr, leading_col);
2797            }
2798            None
2799        }
2800        AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. } => {
2801            if best.index.is_none() {
2802                let leading_col =
2803                    find_rowid_range_column(&best.table, where_terms, rowid_alias_hints)?;
2804                return extract_range_probe_for_column(where_terms, &leading_col);
2805            }
2806            let index_name = best.index.as_deref()?;
2807            let idx = indexes
2808                .iter()
2809                .find(|i| i.name.eq_ignore_ascii_case(index_name))?;
2810            let leading_col = idx.columns.first()?;
2811            extract_range_probe_for_column(where_terms, leading_col)
2812        }
2813    }
2814}
2815
2816fn extract_range_probe_for_column(
2817    where_terms: &[WhereTerm<'_>],
2818    leading_col: &str,
2819) -> Option<AccessPathProbe> {
2820    let mut lower: Option<(Box<Expr>, bool)> = None;
2821    let mut upper: Option<(Box<Expr>, bool)> = None;
2822    for term in where_terms {
2823        let col = match &term.column {
2824            Some(c) if c.column.eq_ignore_ascii_case(leading_col) => c,
2825            _ => continue,
2826        };
2827        if matches!(term.kind, WhereTermKind::Equality) {
2828            let target = extract_comparison_operand(term.expr)?;
2829            return Some(AccessPathProbe::Equality {
2830                column: col.column.clone(),
2831                target: Box::new(target),
2832            });
2833        }
2834        if let WhereTermKind::LikePrefix {
2835            prefix,
2836            upper_bound,
2837        } = &term.kind
2838        {
2839            let lo = Expr::Literal(Literal::String(prefix.clone()), Span::ZERO);
2840            let lo_bound = Some((Box::new(lo), true));
2841            let hi_bound = upper_bound.as_ref().map(|ub| {
2842                (
2843                    Box::new(Expr::Literal(Literal::String(ub.clone()), Span::ZERO)),
2844                    false,
2845                )
2846            });
2847            return Some(AccessPathProbe::Range {
2848                column: col.column.clone(),
2849                lower: lo_bound,
2850                upper: hi_bound,
2851            });
2852        }
2853        if matches!(term.kind, WhereTermKind::Between) {
2854            if let Expr::Between { low, high, not, .. } = term.expr {
2855                if !not {
2856                    return Some(AccessPathProbe::Range {
2857                        column: col.column.clone(),
2858                        lower: Some((Box::new(low.as_ref().clone()), true)),
2859                        upper: Some((Box::new(high.as_ref().clone()), true)),
2860                    });
2861                }
2862            }
2863        }
2864        if !matches!(term.kind, WhereTermKind::Range) {
2865            continue;
2866        }
2867        if let Expr::BinaryOp {
2868            left, op, right, ..
2869        } = term.expr
2870        {
2871            let col_on_left = extract_where_column(left).is_some();
2872            match op {
2873                AstBinaryOp::Gt => {
2874                    let val = if col_on_left { right } else { left };
2875                    if col_on_left {
2876                        lower = Some((Box::new(val.as_ref().clone()), false));
2877                    } else {
2878                        upper = Some((Box::new(val.as_ref().clone()), false));
2879                    }
2880                }
2881                AstBinaryOp::Ge => {
2882                    let val = if col_on_left { right } else { left };
2883                    if col_on_left {
2884                        lower = Some((Box::new(val.as_ref().clone()), true));
2885                    } else {
2886                        upper = Some((Box::new(val.as_ref().clone()), true));
2887                    }
2888                }
2889                AstBinaryOp::Lt => {
2890                    let val = if col_on_left { right } else { left };
2891                    if col_on_left {
2892                        upper = Some((Box::new(val.as_ref().clone()), false));
2893                    } else {
2894                        lower = Some((Box::new(val.as_ref().clone()), false));
2895                    }
2896                }
2897                AstBinaryOp::Le => {
2898                    let val = if col_on_left { right } else { left };
2899                    if col_on_left {
2900                        upper = Some((Box::new(val.as_ref().clone()), true));
2901                    } else {
2902                        lower = Some((Box::new(val.as_ref().clone()), true));
2903                    }
2904                }
2905                _ => {}
2906            }
2907        }
2908    }
2909    if lower.is_some() || upper.is_some() {
2910        Some(AccessPathProbe::Range {
2911            column: leading_col.to_owned(),
2912            lower,
2913            upper,
2914        })
2915    } else {
2916        None
2917    }
2918}
2919
2920fn extract_in_list_probe(expr: &Expr, column: &str) -> Option<AccessPathProbe> {
2921    if let Expr::In {
2922        set: InSet::List(items),
2923        not: false,
2924        ..
2925    } = expr
2926    {
2927        let values: Vec<Box<Expr>> = items.iter().map(|item| Box::new(item.clone())).collect();
2928        if values.is_empty() {
2929            return None;
2930        }
2931        return Some(AccessPathProbe::InList {
2932            column: column.to_owned(),
2933            values,
2934        });
2935    }
2936    None
2937}
2938
2939/// Extract a pure trailing-wildcard prefix from a GLOB pattern (e.g.
2940/// `'abc*'` → `"abc"`).
2941///
2942/// Returns `None` unless the pattern is a string literal whose only wildcard
2943/// region is one or more trailing `*` characters. Shapes such as `abc*def`,
2944/// `abc[0-9]`, or exact-match `abc` require either residual filtering or
2945/// equality handling, so the current prefix-range lowering refuses them.
2946fn extract_glob_prefix(pattern: &Expr) -> Option<String> {
2947    if let Expr::Literal(Literal::String(s), _) = pattern {
2948        let mut prefix = String::new();
2949        let mut saw_trailing_star = false;
2950        for ch in s.chars() {
2951            match ch {
2952                '*' => saw_trailing_star = true,
2953                '?' | '[' => return None,
2954                _ if saw_trailing_star => return None,
2955                _ => prefix.push(ch),
2956            }
2957        }
2958        if prefix.is_empty() || !saw_trailing_star {
2959            None
2960        } else {
2961            Some(prefix)
2962        }
2963    } else {
2964        None
2965    }
2966}
2967
2968/// Extract a pure trailing-wildcard prefix from a LIKE pattern (e.g.
2969/// `'abc%'` → `"abc"`).
2970///
2971/// Returns `None` if:
2972/// - The pattern has no trailing `%` wildcard
2973/// - The pattern is not a string literal
2974/// - The `ESCAPE` expression is not a literal single character
2975/// - The pattern contains an unescaped `_` or any non-trailing wildcard/text
2976///   after the first unescaped `%`
2977///
2978/// bd-wwqen.6: This enables the LIKE prefix-to-range optimization when
2979/// collation makes it safe (BINARY collation or case_sensitive_like = ON).
2980fn extract_like_prefix(pattern: &Expr, escape: Option<&Expr>) -> Option<String> {
2981    let escape_char = match escape {
2982        None => None,
2983        Some(Expr::Literal(Literal::String(s), _)) => {
2984            let mut chars = s.chars();
2985            let ch = chars.next()?;
2986            if chars.next().is_some() {
2987                return None;
2988            }
2989            Some(ch)
2990        }
2991        Some(_) => return None,
2992    };
2993
2994    if let Expr::Literal(Literal::String(s), _) = pattern {
2995        let mut prefix = String::new();
2996        let mut saw_trailing_percent = false;
2997        let mut chars = s.chars();
2998        while let Some(ch) = chars.next() {
2999            if escape_char.is_some_and(|esc| esc == ch) {
3000                if saw_trailing_percent {
3001                    return None;
3002                }
3003                prefix.push(chars.next()?);
3004                continue;
3005            }
3006            match ch {
3007                '%' => saw_trailing_percent = true,
3008                '_' => return None,
3009                _ if saw_trailing_percent => return None,
3010                _ => prefix.push(ch),
3011            }
3012        }
3013        if prefix.is_empty() || !saw_trailing_percent {
3014            None
3015        } else {
3016            Some(prefix)
3017        }
3018    } else {
3019        None
3020    }
3021}
3022
3023/// Check if a LIKE prefix is guaranteed to be case-stable under SQLite's
3024/// default ASCII-only case folding.
3025///
3026/// The conservative fallback we can enable today, even without collation or
3027/// pragma plumbing, is: if the extracted prefix contains no ASCII letters, the
3028/// default LIKE case folding cannot expand the match set beyond the byte range
3029/// defined by `prefix .. upper_bound(prefix)`.
3030///
3031/// Examples that are safe under default SQLite semantics:
3032/// - `"2024-%"` (digits and punctuation only)
3033/// - `"é%"` (non-ASCII characters are not case-folded by built-in LIKE)
3034///
3035/// Future planner context can widen this by checking:
3036/// - `PRAGMA case_sensitive_like`
3037/// - BINARY/case-sensitive column or index collations
3038fn is_like_prefix_safe_for_column(_column: Option<&WhereColumn>, prefix: &str) -> bool {
3039    prefix.chars().all(|ch| !ch.is_ascii_alphabetic())
3040}
3041
3042/// Compute the exclusive upper bound for a LIKE prefix range.
3043///
3044/// Example: `"abc"` becomes `"abd"` so the planner can model:
3045/// `column >= "abc"` and `column < "abd"`.
3046/// Returns `None` when no valid successor exists.
3047fn like_prefix_upper_bound(prefix: &str) -> Option<String> {
3048    let mut chars: Vec<char> = prefix.chars().collect();
3049    for idx in (0..chars.len()).rev() {
3050        let codepoint = u32::from(chars[idx]);
3051        if codepoint == u32::from(char::MAX) {
3052            continue;
3053        }
3054        if let Some(next) = char::from_u32(codepoint + 1) {
3055            chars[idx] = next;
3056            chars.truncate(idx + 1);
3057            return Some(chars.into_iter().collect());
3058        }
3059    }
3060    None
3061}
3062
3063/// Determine the usability of an index for a set of WHERE terms.
3064///
3065/// Rules from §10.5, extended for multi-column indexes:
3066/// - Walk the index columns left-to-right; for each column, check if the WHERE
3067///   has an equality constraint. The equality prefix can be extended as long as
3068///   consecutive leading columns have equality terms.
3069/// - After the equality prefix, check for a range/BETWEEN, `IN (...)`, or
3070///   `LIKE`/`GLOB` prefix probe on the next column.
3071/// - For single-column leftmost matches, also check IN and LIKE prefix.
3072/// - For expression indexes, match query expressions structurally against the
3073///   index's expression columns.
3074#[must_use]
3075#[allow(clippy::too_many_lines)]
3076pub fn analyze_index_usability(index: &IndexInfo, terms: &[WhereTerm<'_>]) -> IndexUsability {
3077    // --- Expression index matching ---
3078    // Expression indexes store their real key terms in `expression_columns`
3079    // and leave `columns` empty by convention (see the schema loader at
3080    // fsqlite-core/src/connection.rs), so the expression-index branch MUST
3081    // run BEFORE the `columns.is_empty()` guard below — otherwise it would
3082    // be unreachable and every expression index would appear planner-dead
3083    // (issue #63).  We still fall through to `NotUsable` if neither
3084    // `columns` nor `expression_columns` carries a matchable term.
3085    if !index.expression_columns.is_empty() {
3086        return analyze_expression_index_usability(index, terms);
3087    }
3088
3089    if index.columns.is_empty() {
3090        return IndexUsability::NotUsable;
3091    }
3092
3093    // Helper: check if a WHERE column matches an index column, respecting
3094    // the table qualifier when present.  Unqualified columns (table = None)
3095    // are conservatively considered matching.
3096    let col_matches = |wc: &WhereColumn, idx_col: &str| -> bool {
3097        wc.column.eq_ignore_ascii_case(idx_col)
3098            && wc
3099                .table
3100                .as_ref()
3101                .is_none_or(|t| t.eq_ignore_ascii_case(&index.table))
3102    };
3103
3104    let mut column_summaries = vec![IndexColumnTermSummary::default(); index.columns.len()];
3105    let mut leftmost_first_constraint = None;
3106
3107    for term in terms {
3108        let Some(wc) = term.column.as_ref() else {
3109            continue;
3110        };
3111        for (column_index, index_column) in index.columns.iter().enumerate() {
3112            if !col_matches(wc, index_column) {
3113                continue;
3114            }
3115            let summary = &mut column_summaries[column_index];
3116            match &term.kind {
3117                WhereTermKind::Equality => {
3118                    summary.has_equality = true;
3119                    if column_index == 0 {
3120                        // Equality must dominate weaker leftmost probes on the
3121                        // same column, regardless of term visitation order.
3122                        leftmost_first_constraint = Some(IndexUsability::Equality);
3123                    }
3124                }
3125                WhereTermKind::InList { count } => {
3126                    if summary
3127                        .first_in_probe_count
3128                        .is_none_or(|existing| *count < existing)
3129                    {
3130                        summary.first_in_probe_count = Some(*count);
3131                    }
3132                    if column_index == 0 {
3133                        match leftmost_first_constraint {
3134                            Some(IndexUsability::InExpansion { probe_count })
3135                                if *count < probe_count =>
3136                            {
3137                                leftmost_first_constraint = Some(IndexUsability::InExpansion {
3138                                    probe_count: *count,
3139                                });
3140                            }
3141                            None => {
3142                                leftmost_first_constraint = Some(IndexUsability::InExpansion {
3143                                    probe_count: *count,
3144                                });
3145                            }
3146                            _ => {}
3147                        }
3148                    }
3149                }
3150                WhereTermKind::LikePrefix {
3151                    prefix,
3152                    upper_bound,
3153                } => {
3154                    summary
3155                        .first_like_prefix
3156                        .get_or_insert_with(|| (prefix.clone(), upper_bound.clone()));
3157                    if column_index == 0 && leftmost_first_constraint.is_none() {
3158                        leftmost_first_constraint = Some(IndexUsability::LikePrefix {
3159                            low: prefix.clone(),
3160                            high: upper_bound.clone(),
3161                        });
3162                    }
3163                }
3164                WhereTermKind::Range | WhereTermKind::Between => {
3165                    summary.has_range = true;
3166                }
3167                WhereTermKind::RowidEquality | WhereTermKind::Other => {}
3168            }
3169        }
3170    }
3171
3172    // --- Multi-column equality prefix ---
3173    // Walk index columns left-to-right, counting how many have equality terms.
3174    let eq_columns = column_summaries
3175        .iter()
3176        .take_while(|summary| summary.has_equality)
3177        .count();
3178
3179    // Preserve the composite shape when we have either:
3180    // - equality on 2+ consecutive index columns, or
3181    // - equality on a leading prefix plus an IN/range constraint on the next
3182    //   column.
3183    if eq_columns >= 1 {
3184        let trailing_constraint = if eq_columns < index.columns.len() {
3185            let summary = &column_summaries[eq_columns];
3186            if let Some(probe_count) = summary.first_in_probe_count {
3187                MultiColumnTrailingConstraint::InExpansion { probe_count }
3188            } else if summary.first_like_prefix.is_some() {
3189                MultiColumnTrailingConstraint::LikePrefix
3190            } else if summary.has_range {
3191                MultiColumnTrailingConstraint::Range
3192            } else {
3193                MultiColumnTrailingConstraint::None
3194            }
3195        } else {
3196            MultiColumnTrailingConstraint::None
3197        };
3198
3199        if eq_columns >= 2 || !matches!(trailing_constraint, MultiColumnTrailingConstraint::None) {
3200            return IndexUsability::MultiColumnEquality {
3201                eq_columns,
3202                trailing_constraint,
3203            };
3204        }
3205    }
3206
3207    // --- Single leftmost column checks (original logic) ---
3208    if let Some(usability) = leftmost_first_constraint {
3209        return usability;
3210    }
3211
3212    if column_summaries[0].has_range {
3213        return IndexUsability::Range {
3214            selectivity: DEFAULT_RANGE_SELECTIVITY,
3215        };
3216    }
3217
3218    IndexUsability::NotUsable
3219}
3220
3221/// Analyze usability for an expression index by matching WHERE term expressions
3222/// against the index's expression columns using structural equality
3223/// (`Expr::PartialEq`, which is manually implemented in fsqlite-ast to ignore
3224/// every node's `Span` field — see the doc comment on `impl PartialEq for
3225/// Expr`).  That span-insensitivity is what makes cross-parse-context
3226/// matching work: the index key is parsed from its stand-alone SQL text at
3227/// schema-load time while the WHERE clause is parsed as part of the
3228/// enclosing SELECT, so the two ASTs carry different byte offsets.
3229///
3230/// Note on classification interplay (issue #63):
3231/// `classify_where_term` only assigns `WhereTermKind::Equality` when the left-
3232/// hand side of an `=` BinaryOp is a bare column (via `extract_where_column`).
3233/// For predicates like `lower(name) = 'alice'` the left side is a function
3234/// call, so the term is classified as `WhereTermKind::Other` even though it
3235/// is structurally `<expr> = <literal>`.  We therefore match against the raw
3236/// `term.expr` AST here — inspecting the BinaryOp / Between directly —
3237/// instead of filtering by `term.kind`.
3238fn analyze_expression_index_usability(
3239    index: &IndexInfo,
3240    terms: &[WhereTerm<'_>],
3241) -> IndexUsability {
3242    let Some(first_expr) = index.expression_columns.first() else {
3243        return IndexUsability::NotUsable;
3244    };
3245
3246    // Pass 1: prefer Equality matches (Equality beats Range on the same key).
3247    for term in terms {
3248        if let Expr::BinaryOp {
3249            left,
3250            op: AstBinaryOp::Eq,
3251            right,
3252            ..
3253        } = term.expr
3254        {
3255            // Match <expr> = <value> or <value> = <expr>.  NULL equality
3256            // cannot drive an index seek (SQL semantics), so skip the
3257            // `x = NULL` / `NULL = x` degenerate forms exactly like
3258            // classify_where_term does for plain columns.
3259            let left_is_null = matches!(left.as_ref(), Expr::Literal(Literal::Null, _));
3260            let right_is_null = matches!(right.as_ref(), Expr::Literal(Literal::Null, _));
3261            if left_is_null || right_is_null {
3262                continue;
3263            }
3264            if **left == *first_expr || **right == *first_expr {
3265                return IndexUsability::Equality;
3266            }
3267        }
3268    }
3269
3270    // Pass 2: fall back to Range/Between matches.
3271    for term in terms {
3272        if let Expr::BinaryOp {
3273            left,
3274            op: AstBinaryOp::Lt | AstBinaryOp::Le | AstBinaryOp::Gt | AstBinaryOp::Ge,
3275            right,
3276            ..
3277        } = term.expr
3278        {
3279            if **left == *first_expr || **right == *first_expr {
3280                return IndexUsability::Range {
3281                    selectivity: DEFAULT_RANGE_SELECTIVITY,
3282                };
3283            }
3284        }
3285        if let Expr::Between {
3286            expr: inner, not, ..
3287        } = term.expr
3288        {
3289            if !*not && **inner == *first_expr {
3290                return IndexUsability::Range {
3291                    selectivity: DEFAULT_RANGE_SELECTIVITY,
3292                };
3293            }
3294        }
3295    }
3296
3297    IndexUsability::NotUsable
3298}
3299
3300/// Default selectivity for range constraints when no ANALYZE data is available.
3301/// 0.33 means "a range predicate eliminates ~67% of rows." This is a
3302/// conservative estimate matching C SQLite's heuristic for tables without
3303/// `sqlite_stat1` data. When ANALYZE has been run, the planner uses the
3304/// actual statistics from sqlite_stat1 instead.
3305const DEFAULT_RANGE_SELECTIVITY: f64 = 0.33;
3306/// Selectivity heuristic for a constant LIKE/GLOB prefix range.
3307const LIKE_PREFIX_SELECTIVITY: f64 = 0.10;
3308/// Equality selectivity for skip-scan leading columns (1% = 100 distinct values).
3309const SKIP_SCAN_EQ_SELECTIVITY: f64 = 0.01;
3310/// Range selectivity for skip-scan trailing columns.
3311const SKIP_SCAN_RANGE_SELECTIVITY: f64 = 0.20;
3312/// Maximum estimated distinct values for a skip-scan leading column.
3313const SKIP_SCAN_MAX_LEADING_DISTINCT: u64 = 16;
3314/// Pages per distinct value for skip-scan cost estimation.
3315const SKIP_SCAN_PAGES_PER_LEADING_DISTINCT: u64 = 8;
3316
3317fn estimate_skip_scan_leading_distinct(index: &IndexInfo) -> u64 {
3318    (index.n_pages / SKIP_SCAN_PAGES_PER_LEADING_DISTINCT).max(1)
3319}
3320
3321fn analyze_skip_scan_candidate(
3322    table: &TableStats,
3323    index: &IndexInfo,
3324    terms: &[WhereTerm<'_>],
3325) -> Option<SkipScanCandidate> {
3326    if index.columns.len() < 2
3327        || (!matches!(table.source, StatsSource::Analyze)
3328            && !matches!(index.source, StatsSource::Analyze))
3329    {
3330        return None;
3331    }
3332
3333    let col_matches = |wc: &WhereColumn, idx_col: &str| -> bool {
3334        wc.column.eq_ignore_ascii_case(idx_col)
3335            && wc
3336                .table
3337                .as_ref()
3338                .is_none_or(|t| t.eq_ignore_ascii_case(&index.table))
3339    };
3340
3341    let leading_col = &index.columns[0];
3342    let second_col = &index.columns[1];
3343    let leading_constrained = terms.iter().any(|term| {
3344        term.column.as_ref().is_some_and(|wc| {
3345            col_matches(wc, leading_col)
3346                && matches!(
3347                    term.kind,
3348                    WhereTermKind::Equality
3349                        | WhereTermKind::Range
3350                        | WhereTermKind::Between
3351                        | WhereTermKind::InList { .. }
3352                        | WhereTermKind::LikePrefix { .. }
3353                )
3354        })
3355    });
3356    if leading_constrained {
3357        return None;
3358    }
3359
3360    let leading_distinct = estimate_skip_scan_leading_distinct(index);
3361    if leading_distinct > SKIP_SCAN_MAX_LEADING_DISTINCT {
3362        return None;
3363    }
3364
3365    let mut second_column_summary = IndexColumnTermSummary::default();
3366
3367    // The current heuristic only prices skip-scan over one skipped leading
3368    // column. If the first usable constraint is deeper in the key, the planner
3369    // would also need the distinct cardinality of every skipped prefix, not
3370    // just the leftmost column, to avoid underestimating cost.
3371    for term in terms {
3372        let Some(wc) = term.column.as_ref() else {
3373            continue;
3374        };
3375        if !col_matches(wc, second_col) {
3376            continue;
3377        }
3378
3379        match &term.kind {
3380            WhereTermKind::Equality => second_column_summary.has_equality = true,
3381            WhereTermKind::InList { count }
3382                if *count > 0
3383                    && second_column_summary
3384                        .first_in_probe_count
3385                        .is_none_or(|existing| *count < existing) =>
3386            {
3387                second_column_summary.first_in_probe_count = Some(*count);
3388            }
3389            WhereTermKind::Range | WhereTermKind::Between | WhereTermKind::LikePrefix { .. } => {
3390                second_column_summary.has_range = true;
3391            }
3392            _ => {}
3393        }
3394    }
3395
3396    let (trailing_probe_count, per_probe_selectivity) = if second_column_summary.has_equality {
3397        (1, SKIP_SCAN_EQ_SELECTIVITY)
3398    } else if let Some(probe_count) = second_column_summary.first_in_probe_count {
3399        (probe_count, SKIP_SCAN_EQ_SELECTIVITY)
3400    } else if second_column_summary.has_range {
3401        (1, SKIP_SCAN_RANGE_SELECTIVITY)
3402    } else {
3403        return None;
3404    };
3405
3406    Some(SkipScanCandidate {
3407        leading_probes: leading_distinct as usize,
3408        trailing_probe_count,
3409        per_probe_selectivity,
3410    })
3411}
3412
3413// ---------------------------------------------------------------------------
3414// Join ordering: bounded beam search (§10.5)
3415// ---------------------------------------------------------------------------
3416
3417/// Compute the `mxChoice` beam width from the number of tables in the join.
3418///
3419/// From §10.5 / C SQLite's `computeMxChoice`:
3420/// - 1 for single-table queries
3421/// - 5 for two-table joins
3422/// - 12 for 3+ table joins (18 if star-query heuristic applies)
3423#[must_use]
3424pub fn compute_mx_choice(n_tables: usize, is_star: bool) -> usize {
3425    match n_tables {
3426        0 | 1 => 1,
3427        2 => 5,
3428        _ => {
3429            if is_star {
3430                18
3431            } else {
3432                12
3433            }
3434        }
3435    }
3436}
3437
3438/// Detect a star-query pattern: one table joins to all other tables.
3439///
3440/// A star query has a central "fact" table that every dimension table
3441/// has a direct join predicate with.
3442#[must_use]
3443pub fn detect_star_query(tables: &[TableStats], where_terms: &[WhereTerm<'_>]) -> bool {
3444    if tables.len() < 3 {
3445        return false;
3446    }
3447
3448    // For each table, count how many OTHER tables it shares a join predicate with.
3449    let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
3450
3451    for candidate in &table_names {
3452        let mut join_partners = 0usize;
3453        for other in &table_names {
3454            if *other == *candidate {
3455                continue;
3456            }
3457            if has_join_predicate(candidate, other, where_terms) {
3458                join_partners += 1;
3459            }
3460        }
3461        if join_partners == table_names.len() - 1 {
3462            return true;
3463        }
3464    }
3465    false
3466}
3467
3468/// Check if two tables share a join predicate in the WHERE terms.
3469fn has_join_predicate(table_a: &str, table_b: &str, terms: &[WhereTerm<'_>]) -> bool {
3470    for term in terms {
3471        if let Expr::BinaryOp {
3472            left,
3473            op: AstBinaryOp::Eq,
3474            right,
3475            ..
3476        } = term.expr
3477        {
3478            let left_col = extract_where_column(left);
3479            let right_col = extract_where_column(right);
3480            if let (Some(lc), Some(rc)) = (left_col, right_col) {
3481                let lt = lc.table.as_deref().unwrap_or("");
3482                let rt = rc.table.as_deref().unwrap_or("");
3483                if (lt.eq_ignore_ascii_case(table_a) && rt.eq_ignore_ascii_case(table_b))
3484                    || (lt.eq_ignore_ascii_case(table_b) && rt.eq_ignore_ascii_case(table_a))
3485                {
3486                    return true;
3487                }
3488            }
3489        }
3490    }
3491    false
3492}
3493
3494const HASH_JOIN_SELECTIVITY_HEURISTIC: f64 = 0.25;
3495const LEAPFROG_SEEK_OVERHEAD_FACTOR: f64 = 0.20;
3496
3497#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3498struct ColumnKey {
3499    table: String,
3500    column: String,
3501}
3502
3503#[derive(Debug, Clone, PartialEq, Eq)]
3504struct EquiJoinPredicate {
3505    left: ColumnKey,
3506    right: ColumnKey,
3507}
3508
3509#[derive(Debug, Clone, PartialEq, Eq)]
3510struct TrieHypergraph {
3511    relation_variables: Vec<Vec<usize>>,
3512    variable_count: usize,
3513    arity: usize,
3514}
3515
3516#[derive(Debug, Clone, PartialEq, Eq)]
3517struct UnionFind {
3518    parent: Vec<usize>,
3519    rank: Vec<usize>,
3520}
3521
3522impl UnionFind {
3523    fn new(size: usize) -> Self {
3524        Self {
3525            parent: (0..size).collect(),
3526            rank: vec![0; size],
3527        }
3528    }
3529
3530    fn find(&mut self, idx: usize) -> usize {
3531        if self.parent[idx] != idx {
3532            let root = self.find(self.parent[idx]);
3533            self.parent[idx] = root;
3534        }
3535        self.parent[idx]
3536    }
3537
3538    fn union(&mut self, left: usize, right: usize) {
3539        let left_root = self.find(left);
3540        let right_root = self.find(right);
3541        if left_root == right_root {
3542            return;
3543        }
3544        let left_rank = self.rank[left_root];
3545        let right_rank = self.rank[right_root];
3546        match left_rank.cmp(&right_rank) {
3547            std::cmp::Ordering::Less => {
3548                self.parent[left_root] = right_root;
3549            }
3550            std::cmp::Ordering::Greater => {
3551                self.parent[right_root] = left_root;
3552            }
3553            std::cmp::Ordering::Equal => {
3554                self.parent[right_root] = left_root;
3555                self.rank[left_root] = left_rank + 1;
3556            }
3557        }
3558    }
3559}
3560
3561/// Select join operator segments for a query plan.
3562///
3563/// This function is additive to `order_joins`: it annotates a chosen join order
3564/// with hash vs Leapfrog routing decisions and can be called directly by higher
3565/// layers that have `FROM`-clause shape information.
3566#[must_use]
3567#[allow(clippy::too_many_lines)]
3568pub fn choose_join_segments(
3569    join_order: &[String],
3570    tables: &[TableStats],
3571    where_terms: &[WhereTerm<'_>],
3572    from_clause: Option<&FromClause>,
3573    feature_flags: PlannerFeatureFlags,
3574) -> Vec<JoinPlanSegment> {
3575    if join_order.len() < 2 {
3576        return vec![];
3577    }
3578
3579    let join_order_canonical = join_order
3580        .iter()
3581        .map(|table| canonical_table_key(table))
3582        .collect::<Vec<_>>();
3583
3584    let canonical_to_original = join_order
3585        .iter()
3586        .map(|table| (canonical_table_key(table), table.clone()))
3587        .collect::<HashMap<_, _>>();
3588
3589    let join_table_set = join_order_canonical.iter().cloned().collect::<HashSet<_>>();
3590    let rows_by_table = build_table_row_map(tables, &join_order_canonical);
3591    let (equi_predicates, theta_join_tables) =
3592        collect_join_predicates(where_terms, &join_table_set);
3593    let leapfrog_shape_supported = from_clause_supports_leapfrog(from_clause);
3594
3595    let mut selected_components: Vec<(Vec<String>, f64, f64, usize)> = vec![];
3596    let mut selected_tables = HashSet::<String>::new();
3597
3598    if feature_flags.leapfrog_join && leapfrog_shape_supported {
3599        let leapfrog_candidates = join_order_canonical
3600            .iter()
3601            .filter(|table| !theta_join_tables.contains(*table))
3602            .cloned()
3603            .collect::<Vec<_>>();
3604
3605        for component in connected_components(&leapfrog_candidates, &equi_predicates) {
3606            if component.len() < 3 {
3607                continue;
3608            }
3609            let component_set = component.iter().cloned().collect::<HashSet<_>>();
3610            let ordered_component = ordered_subset(&join_order_canonical, &component_set);
3611            let Some(hypergraph) = build_trie_hypergraph(&ordered_component, &equi_predicates)
3612            else {
3613                continue;
3614            };
3615            let hash_cost = estimate_pairwise_hash_join_cost(&ordered_component, &rows_by_table);
3616            let Some(agm_bound) =
3617                estimate_agm_upper_bound(&ordered_component, &rows_by_table, &hypergraph)
3618            else {
3619                continue;
3620            };
3621            let leapfrog_cost = agm_bound
3622                * LEAPFROG_SEEK_OVERHEAD_FACTOR.mul_add(ordered_component.len() as f64, 1.0);
3623            if leapfrog_cost < hash_cost {
3624                for table in &ordered_component {
3625                    selected_tables.insert(table.clone());
3626                }
3627                selected_components.push((
3628                    ordered_component,
3629                    leapfrog_cost,
3630                    hash_cost,
3631                    hypergraph.arity,
3632                ));
3633            }
3634        }
3635    }
3636
3637    let mut segments = selected_components
3638        .into_iter()
3639        .map(
3640            |(relations, leapfrog_cost, hash_cost, arity)| JoinPlanSegment {
3641                relations: relations
3642                    .into_iter()
3643                    .filter_map(|table| canonical_to_original.get(&table).cloned())
3644                    .collect(),
3645                operator: JoinOperator::LeapfrogTriejoin,
3646                estimated_cost: leapfrog_cost,
3647                reason: format!(
3648                    "AGM estimate {:.1} beats hash cost {:.1}; trie arity {}",
3649                    leapfrog_cost, hash_cost, arity
3650                ),
3651            },
3652        )
3653        .collect::<Vec<_>>();
3654
3655    if segments.is_empty() {
3656        let hash_cost = estimate_pairwise_hash_join_cost(&join_order_canonical, &rows_by_table);
3657        let reason = if !feature_flags.leapfrog_join {
3658            "leapfrog_join feature flag disabled".to_owned()
3659        } else if !leapfrog_shape_supported {
3660            "outer/natural/theta join shape is not Leapfrog-compatible".to_owned()
3661        } else if join_order.len() < 3 {
3662            "2-way joins stay on pairwise hash join".to_owned()
3663        } else if !theta_join_tables.is_empty() {
3664            "theta/non-equi join predicates require hash fallback".to_owned()
3665        } else {
3666            "no compatible 3+ equi-join component with lower AGM estimate".to_owned()
3667        };
3668        return vec![JoinPlanSegment {
3669            relations: join_order.to_vec(),
3670            operator: JoinOperator::HashJoin,
3671            estimated_cost: hash_cost,
3672            reason,
3673        }];
3674    }
3675
3676    let remaining_tables = join_order_canonical
3677        .iter()
3678        .filter(|table| !selected_tables.contains(*table))
3679        .cloned()
3680        .collect::<Vec<_>>();
3681    if remaining_tables.len() >= 2 {
3682        let hash_cost = estimate_pairwise_hash_join_cost(&remaining_tables, &rows_by_table);
3683        segments.push(JoinPlanSegment {
3684            relations: remaining_tables
3685                .iter()
3686                .filter_map(|table| canonical_to_original.get(table).cloned())
3687                .collect(),
3688            operator: JoinOperator::HashJoin,
3689            estimated_cost: hash_cost,
3690            reason: "remaining joins use pairwise hash join".to_owned(),
3691        });
3692    }
3693
3694    let join_order_position = join_order_canonical
3695        .iter()
3696        .enumerate()
3697        .map(|(idx, table)| (table.clone(), idx))
3698        .collect::<HashMap<_, _>>();
3699    segments.sort_by_key(|segment| {
3700        segment
3701            .relations
3702            .first()
3703            .and_then(|table| {
3704                join_order_position
3705                    .get(&canonical_table_key(table))
3706                    .copied()
3707            })
3708            .unwrap_or(usize::MAX)
3709    });
3710    segments
3711}
3712
3713fn build_table_row_map(
3714    tables: &[TableStats],
3715    join_order_canonical: &[String],
3716) -> HashMap<String, f64> {
3717    let mut rows_by_table = tables
3718        .iter()
3719        .map(|table| (canonical_table_key(&table.name), table.n_rows.max(1) as f64))
3720        .collect::<HashMap<_, _>>();
3721    for table in join_order_canonical {
3722        rows_by_table.entry(table.clone()).or_insert(1.0);
3723    }
3724    rows_by_table
3725}
3726
3727fn collect_join_predicates(
3728    where_terms: &[WhereTerm<'_>],
3729    join_table_set: &HashSet<String>,
3730) -> (Vec<EquiJoinPredicate>, HashSet<String>) {
3731    let mut equi_predicates = Vec::new();
3732    let mut theta_join_tables = HashSet::new();
3733
3734    for term in where_terms {
3735        let Expr::BinaryOp {
3736            left, op, right, ..
3737        } = term.expr
3738        else {
3739            continue;
3740        };
3741        let Some(left_col) = extract_qualified_column(left) else {
3742            continue;
3743        };
3744        let Some(right_col) = extract_qualified_column(right) else {
3745            continue;
3746        };
3747        if left_col.table == right_col.table {
3748            continue;
3749        }
3750        if !join_table_set.contains(&left_col.table) || !join_table_set.contains(&right_col.table) {
3751            continue;
3752        }
3753
3754        if *op == AstBinaryOp::Eq {
3755            equi_predicates.push(EquiJoinPredicate {
3756                left: left_col,
3757                right: right_col,
3758            });
3759        } else {
3760            theta_join_tables.insert(left_col.table);
3761            theta_join_tables.insert(right_col.table);
3762        }
3763    }
3764
3765    (equi_predicates, theta_join_tables)
3766}
3767
3768fn extract_qualified_column(expr: &Expr) -> Option<ColumnKey> {
3769    let Expr::Column(column_ref, _) = expr else {
3770        return None;
3771    };
3772    let table = column_ref.table.as_ref()?;
3773    Some(ColumnKey {
3774        table: canonical_table_key(table),
3775        column: column_ref.column.to_ascii_lowercase(),
3776    })
3777}
3778
3779fn connected_components(tables: &[String], predicates: &[EquiJoinPredicate]) -> Vec<Vec<String>> {
3780    if tables.is_empty() {
3781        return vec![];
3782    }
3783
3784    let table_set = tables.iter().cloned().collect::<HashSet<_>>();
3785    let mut adjacency = tables
3786        .iter()
3787        .map(|table| (table.clone(), HashSet::<String>::new()))
3788        .collect::<HashMap<_, _>>();
3789
3790    for predicate in predicates {
3791        if table_set.contains(&predicate.left.table) && table_set.contains(&predicate.right.table) {
3792            adjacency
3793                .entry(predicate.left.table.clone())
3794                .or_default()
3795                .insert(predicate.right.table.clone());
3796            adjacency
3797                .entry(predicate.right.table.clone())
3798                .or_default()
3799                .insert(predicate.left.table.clone());
3800        }
3801    }
3802
3803    let mut visited = HashSet::<String>::new();
3804    let mut components = Vec::new();
3805    for table in tables {
3806        if visited.contains(table) {
3807            continue;
3808        }
3809        let mut stack = vec![table.clone()];
3810        let mut component = Vec::new();
3811        while let Some(current) = stack.pop() {
3812            if !visited.insert(current.clone()) {
3813                continue;
3814            }
3815            component.push(current.clone());
3816            if let Some(neighbors) = adjacency.get(&current) {
3817                for neighbor in neighbors {
3818                    if !visited.contains(neighbor) {
3819                        stack.push(neighbor.clone());
3820                    }
3821                }
3822            }
3823        }
3824        components.push(component);
3825    }
3826
3827    components
3828}
3829
3830fn ordered_subset(join_order: &[String], selected_tables: &HashSet<String>) -> Vec<String> {
3831    join_order
3832        .iter()
3833        .filter(|table| selected_tables.contains(*table))
3834        .cloned()
3835        .collect()
3836}
3837
3838fn estimate_pairwise_hash_join_cost(
3839    component: &[String],
3840    rows_by_table: &HashMap<String, f64>,
3841) -> f64 {
3842    if component.len() < 2 {
3843        return 0.0;
3844    }
3845
3846    let mut iter = component.iter();
3847    let first_rows = iter
3848        .next()
3849        .and_then(|table| rows_by_table.get(table))
3850        .copied()
3851        .unwrap_or(1.0)
3852        .max(1.0);
3853    let mut intermediate_rows = first_rows;
3854    let mut total_cost = 0.0;
3855
3856    for table in iter {
3857        let relation_rows = rows_by_table.get(table).copied().unwrap_or(1.0).max(1.0);
3858        total_cost += intermediate_rows.min(relation_rows) + intermediate_rows.max(relation_rows);
3859        intermediate_rows =
3860            (intermediate_rows * relation_rows * HASH_JOIN_SELECTIVITY_HEURISTIC).max(1.0);
3861    }
3862
3863    total_cost
3864}
3865
3866#[allow(clippy::too_many_lines)]
3867fn build_trie_hypergraph(
3868    component: &[String],
3869    predicates: &[EquiJoinPredicate],
3870) -> Option<TrieHypergraph> {
3871    if component.len() < 2 {
3872        return None;
3873    }
3874
3875    let component_set = component.iter().cloned().collect::<HashSet<_>>();
3876    let table_to_index = component
3877        .iter()
3878        .enumerate()
3879        .map(|(idx, table)| (table.clone(), idx))
3880        .collect::<HashMap<_, _>>();
3881
3882    let mut endpoint_ids = HashMap::<ColumnKey, usize>::new();
3883    let mut edge_endpoint_pairs = Vec::<(usize, usize, String, String)>::new();
3884    for predicate in predicates {
3885        if !component_set.contains(&predicate.left.table)
3886            || !component_set.contains(&predicate.right.table)
3887        {
3888            continue;
3889        }
3890        let left_entry = if let Some(existing) = endpoint_ids.get(&predicate.left).copied() {
3891            existing
3892        } else {
3893            let next = endpoint_ids.len();
3894            endpoint_ids.insert(predicate.left.clone(), next);
3895            next
3896        };
3897        let right_entry = if let Some(existing) = endpoint_ids.get(&predicate.right).copied() {
3898            existing
3899        } else {
3900            let next = endpoint_ids.len();
3901            endpoint_ids.insert(predicate.right.clone(), next);
3902            next
3903        };
3904        edge_endpoint_pairs.push((
3905            left_entry,
3906            right_entry,
3907            predicate.left.table.clone(),
3908            predicate.right.table.clone(),
3909        ));
3910    }
3911
3912    if edge_endpoint_pairs.is_empty() {
3913        return None;
3914    }
3915
3916    let mut union_find = UnionFind::new(endpoint_ids.len());
3917    for (left_id, right_id, _, _) in &edge_endpoint_pairs {
3918        union_find.union(*left_id, *right_id);
3919    }
3920
3921    let mut root_to_variable = HashMap::<usize, usize>::new();
3922    let mut relation_variable_sets = vec![HashSet::<usize>::new(); component.len()];
3923    for (left_id, right_id, left_table, right_table) in edge_endpoint_pairs {
3924        let left_root = union_find.find(left_id);
3925        let right_root = union_find.find(right_id);
3926        let left_variable = if let Some(existing) = root_to_variable.get(&left_root).copied() {
3927            existing
3928        } else {
3929            let next = root_to_variable.len();
3930            root_to_variable.insert(left_root, next);
3931            next
3932        };
3933        let right_variable = if let Some(existing) = root_to_variable.get(&right_root).copied() {
3934            existing
3935        } else {
3936            let next = root_to_variable.len();
3937            root_to_variable.insert(right_root, next);
3938            next
3939        };
3940        let left_index = *table_to_index.get(&left_table)?;
3941        let right_index = *table_to_index.get(&right_table)?;
3942        relation_variable_sets[left_index].insert(left_variable);
3943        relation_variable_sets[right_index].insert(right_variable);
3944    }
3945
3946    if relation_variable_sets.iter().any(HashSet::is_empty) {
3947        return None;
3948    }
3949    let expected_arity = relation_variable_sets.first()?.len();
3950    if expected_arity == 0
3951        || relation_variable_sets
3952            .iter()
3953            .any(|variables| variables.len() != expected_arity)
3954    {
3955        return None;
3956    }
3957
3958    let variable_count = root_to_variable.len();
3959    let mut variable_degree = vec![0usize; variable_count];
3960    for variables in &relation_variable_sets {
3961        for variable in variables {
3962            variable_degree[*variable] += 1;
3963        }
3964    }
3965    if variable_degree.iter().any(|degree| *degree < 2) {
3966        return None;
3967    }
3968
3969    let relation_variables = relation_variable_sets
3970        .into_iter()
3971        .map(|variables| {
3972            let mut ordered = variables.into_iter().collect::<Vec<_>>();
3973            ordered.sort_unstable();
3974            ordered
3975        })
3976        .collect::<Vec<_>>();
3977
3978    Some(TrieHypergraph {
3979        relation_variables,
3980        variable_count,
3981        arity: expected_arity,
3982    })
3983}
3984
3985fn estimate_agm_upper_bound(
3986    component: &[String],
3987    rows_by_table: &HashMap<String, f64>,
3988    hypergraph: &TrieHypergraph,
3989) -> Option<f64> {
3990    if component.len() != hypergraph.relation_variables.len() || hypergraph.variable_count == 0 {
3991        return None;
3992    }
3993
3994    let mut variable_degree = vec![0usize; hypergraph.variable_count];
3995    for variables in &hypergraph.relation_variables {
3996        for variable in variables {
3997            variable_degree[*variable] += 1;
3998        }
3999    }
4000
4001    let mut bound = 1.0;
4002    for (relation_idx, table) in component.iter().enumerate() {
4003        let row_count = rows_by_table.get(table).copied().unwrap_or(1.0).max(1.0);
4004        let exponent = hypergraph.relation_variables[relation_idx]
4005            .iter()
4006            .map(|variable| 1.0 / variable_degree[*variable] as f64)
4007            .fold(0.0, f64::max);
4008        bound *= row_count.powf(exponent);
4009    }
4010    Some(bound.max(1.0))
4011}
4012
4013fn from_clause_supports_leapfrog(from_clause: Option<&FromClause>) -> bool {
4014    let Some(from_clause) = from_clause else {
4015        return true;
4016    };
4017
4018    for join in &from_clause.joins {
4019        if join.join_type.natural {
4020            return false;
4021        }
4022        if !matches!(join.join_type.kind, JoinKind::Inner | JoinKind::Cross) {
4023            return false;
4024        }
4025        if let Some(constraint) = &join.constraint {
4026            match constraint {
4027                JoinConstraint::Using(columns) => {
4028                    if columns.is_empty() {
4029                        return false;
4030                    }
4031                }
4032                JoinConstraint::On(expr) => {
4033                    let conjuncts = decompose_where(expr);
4034                    if conjuncts.is_empty() {
4035                        return false;
4036                    }
4037                    if conjuncts
4038                        .iter()
4039                        .any(|conjunct| !expression_is_equi_column_predicate(conjunct))
4040                    {
4041                        return false;
4042                    }
4043                }
4044            }
4045        }
4046    }
4047
4048    true
4049}
4050
4051fn expression_is_equi_column_predicate(expr: &Expr) -> bool {
4052    matches!(
4053        expr,
4054        Expr::BinaryOp {
4055            left,
4056            op: AstBinaryOp::Eq,
4057            right,
4058            ..
4059        } if extract_where_column(left).is_some() && extract_where_column(right).is_some()
4060    )
4061}
4062
4063/// A partial join path during beam search.
4064#[derive(Debug, Clone)]
4065struct PartialPath {
4066    /// Tables joined so far, in order.
4067    tables: Vec<String>,
4068    /// Access paths for each table.
4069    access_paths: Vec<AccessPath>,
4070    /// Cumulative cost.
4071    cost: f64,
4072    /// Product of estimated rows across all tables joined so far.
4073    cumulative_rows: f64,
4074}
4075
4076/// Order tables using bounded beam search (NGQP-style, §10.5).
4077///
4078/// Maintains up to `mxChoice` best partial paths at each level, pruning
4079/// suboptimal paths early. Complexity: `O(mxChoice * N^2)`, not `N!`.
4080///
4081/// # Arguments
4082///
4083/// - `tables`: Statistics for each table in the FROM clause.
4084/// - `indexes`: All available indexes.
4085/// - `where_terms`: Classified WHERE terms.
4086/// - `needed_columns`: Columns needed in the result (for covering index detection).
4087/// - `cross_join_pairs`: Pairs of tables that are `CROSS JOIN`ed (prevents reordering).
4088#[must_use]
4089pub fn order_joins(
4090    tables: &[TableStats],
4091    indexes: &[IndexInfo],
4092    where_terms: &[WhereTerm<'_>],
4093    needed_columns: Option<&[String]>,
4094    cross_join_pairs: &[(String, String)],
4095) -> QueryPlan {
4096    order_joins_with_hints(
4097        tables,
4098        indexes,
4099        where_terms,
4100        needed_columns,
4101        cross_join_pairs,
4102        None,
4103        None,
4104    )
4105}
4106
4107fn join_access_path(
4108    table: &TableStats,
4109    indexes: &[IndexInfo],
4110    where_terms: &[WhereTerm<'_>],
4111    needed_columns: Option<&[String]>,
4112    table_index_hints: Option<&BTreeMap<String, IndexHint>>,
4113    cracking_hints: Option<&CrackingHintStore>,
4114) -> AccessPath {
4115    let explicit_hint = lookup_table_index_hint(&table.name, table_index_hints);
4116    let adaptive_hint = cracking_hints.and_then(|store| store.preferred_index(&table.name));
4117    best_access_path_internal(
4118        table,
4119        indexes,
4120        where_terms,
4121        needed_columns,
4122        explicit_hint,
4123        adaptive_hint,
4124        &[],
4125    )
4126}
4127
4128/// Order tables using bounded beam search while honoring table-level
4129/// `INDEXED BY`/`NOT INDEXED` hints and optional adaptive cracking hints.
4130#[must_use]
4131#[allow(clippy::too_many_lines)]
4132pub fn order_joins_with_hints(
4133    tables: &[TableStats],
4134    indexes: &[IndexInfo],
4135    where_terms: &[WhereTerm<'_>],
4136    needed_columns: Option<&[String]>,
4137    cross_join_pairs: &[(String, String)],
4138    table_index_hints: Option<&BTreeMap<String, IndexHint>>,
4139    cracking_hints: Option<&mut CrackingHintStore>,
4140) -> QueryPlan {
4141    order_joins_with_hints_and_features(
4142        tables,
4143        indexes,
4144        where_terms,
4145        needed_columns,
4146        cross_join_pairs,
4147        table_index_hints,
4148        cracking_hints,
4149        PlannerFeatureFlags::default(),
4150    )
4151}
4152
4153/// Order tables using bounded beam search and select join operators (hash vs
4154/// Leapfrog Triejoin) based on feature flags and cost model.
4155#[must_use]
4156#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
4157pub fn order_joins_with_hints_and_features(
4158    tables: &[TableStats],
4159    indexes: &[IndexInfo],
4160    where_terms: &[WhereTerm<'_>],
4161    needed_columns: Option<&[String]>,
4162    cross_join_pairs: &[(String, String)],
4163    table_index_hints: Option<&BTreeMap<String, IndexHint>>,
4164    cracking_hints: Option<&mut CrackingHintStore>,
4165    feature_flags: PlannerFeatureFlags,
4166) -> QueryPlan {
4167    let n = tables.len();
4168
4169    if n == 0 {
4170        return QueryPlan {
4171            join_order: vec![],
4172            access_paths: vec![],
4173            join_segments: vec![],
4174            total_cost: 0.0,
4175            morsel_eligibility: None,
4176        };
4177    }
4178
4179    if n == 1 {
4180        let ap = join_access_path(
4181            &tables[0],
4182            indexes,
4183            where_terms,
4184            needed_columns,
4185            table_index_hints,
4186            cracking_hints.as_deref(),
4187        );
4188        // Move the access path into the plan rather than cloning it (its cost
4189        // is a Copy f64, captured first), so the single owned AccessPath — which
4190        // carries two heap Strings — is not duplicated on this dominant
4191        // single-table planning path.
4192        let total_cost = ap.estimated_cost;
4193        let plan = QueryPlan {
4194            join_order: vec![tables[0].name.clone()],
4195            access_paths: vec![ap],
4196            join_segments: vec![],
4197            total_cost,
4198            morsel_eligibility: None,
4199        };
4200        if let Some(store) = cracking_hints {
4201            for access_path in &plan.access_paths {
4202                store.record_access_path(access_path);
4203            }
4204        }
4205        FSQLITE_PLANNER_PLANS_ENUMERATED.fetch_add(1, Ordering::Relaxed);
4206        return plan;
4207    }
4208
4209    if feature_flags.dpccp_join && n <= DPCCP_MAX_TABLES {
4210        if let Some((order_indices, total_cost, plans_counted, branches_pruned)) = dpccp_order_joins(
4211            tables,
4212            indexes,
4213            where_terms,
4214            needed_columns,
4215            table_index_hints,
4216            cross_join_pairs,
4217            cracking_hints.as_deref(),
4218        ) {
4219            let join_order = order_indices
4220                .iter()
4221                .map(|idx| tables[*idx].name.clone())
4222                .collect::<Vec<_>>();
4223            let access_paths = order_indices
4224                .iter()
4225                .map(|idx| {
4226                    join_access_path(
4227                        &tables[*idx],
4228                        indexes,
4229                        where_terms,
4230                        needed_columns,
4231                        table_index_hints,
4232                        cracking_hints.as_deref(),
4233                    )
4234                })
4235                .collect::<Vec<_>>();
4236            let join_segments =
4237                choose_join_segments(&join_order, tables, where_terms, None, feature_flags);
4238            let plan = QueryPlan {
4239                join_order,
4240                access_paths,
4241                join_segments,
4242                total_cost,
4243                morsel_eligibility: None,
4244            };
4245
4246            if let Some(store) = cracking_hints {
4247                for access_path in &plan.access_paths {
4248                    store.record_access_path(access_path);
4249                }
4250            }
4251
4252            FSQLITE_PLANNER_PLANS_ENUMERATED.fetch_add(plans_counted, Ordering::Relaxed);
4253
4254            tracing::debug!(
4255                join_order = ?plan.join_order,
4256                total_cost = plan.total_cost,
4257                table_count = n,
4258                plans_enumerated = plans_counted,
4259                branches_pruned,
4260                threshold = DPCCP_MAX_TABLES,
4261                algorithm = "dpccp_exhaustive",
4262                "planner.order_joins.complete"
4263            );
4264
4265            tracing::info!(
4266                join_order = ?plan.join_order,
4267                total_cost = plan.total_cost,
4268                table_count = n,
4269                plans_enumerated = plans_counted,
4270                branches_pruned,
4271                algorithm = "dpccp_exhaustive",
4272                "planner.plan_selected"
4273            );
4274
4275            return plan;
4276        }
4277
4278        tracing::debug!(
4279            table_count = n,
4280            threshold = DPCCP_MAX_TABLES,
4281            "planner.dpccp.no_plan_fallback_greedy"
4282        );
4283    }
4284
4285    let mut plans_enumerated: u64 = 0;
4286
4287    let is_star = detect_star_query(tables, where_terms);
4288    let mx_choice = if n > DPCCP_MAX_TABLES {
4289        // For large joins, use a greedy-width search (single best partial path).
4290        1
4291    } else {
4292        compute_mx_choice(n, is_star)
4293    };
4294
4295    // Seed: start with each table as a single-element path.
4296    // Skip tables that are blocked by CROSS JOIN constraints (right side of a
4297    // cross-join pair cannot appear unless the left side is already visited).
4298    let mut paths: Vec<PartialPath> = Vec::with_capacity(n);
4299    for t in tables {
4300        if !cross_join_allowed(&[], &t.name, cross_join_pairs) {
4301            continue;
4302        }
4303        let ap = join_access_path(
4304            t,
4305            indexes,
4306            where_terms,
4307            needed_columns,
4308            table_index_hints,
4309            cracking_hints.as_deref(),
4310        );
4311        let cumulative_rows = ap.estimated_rows;
4312        let cost = ap.estimated_cost;
4313        paths.push(PartialPath {
4314            tables: vec![t.name.clone()],
4315            access_paths: vec![ap],
4316            cost,
4317            cumulative_rows,
4318        });
4319    }
4320    paths.sort_by(|a, b| {
4321        a.cost
4322            .partial_cmp(&b.cost)
4323            .unwrap_or(std::cmp::Ordering::Equal)
4324    });
4325    paths.truncate(mx_choice);
4326
4327    // Extend paths one table at a time.
4328    for level in 1..n {
4329        let mut next_paths: Vec<PartialPath> = Vec::with_capacity(paths.len() * (n - level));
4330
4331        for path in &paths {
4332            for t in tables {
4333                // Skip if already in this path.
4334                if path
4335                    .tables
4336                    .iter()
4337                    .any(|existing| existing.eq_ignore_ascii_case(&t.name))
4338                {
4339                    continue;
4340                }
4341
4342                // Check CROSS JOIN constraint: if (last_in_path, t) is a cross-join
4343                // pair, only allow adding t if it's the next in the original order.
4344                if !cross_join_allowed(&path.tables, &t.name, cross_join_pairs) {
4345                    continue;
4346                }
4347
4348                let ap = join_access_path(
4349                    t,
4350                    indexes,
4351                    where_terms,
4352                    needed_columns,
4353                    table_index_hints,
4354                    cracking_hints.as_deref(),
4355                );
4356                // Scale inner table cost by the cumulative cardinality of
4357                // all outer tables (nested loop model).  For a 3-table join
4358                // T1⋈T2⋈T3, T3 executes once per (T1, T2) pair.
4359                let outer_rows = path.cumulative_rows;
4360                let inner_cost = ap.estimated_cost * outer_rows;
4361
4362                let mut new_tables = path.tables.clone();
4363                new_tables.push(t.name.clone());
4364                let mut new_aps = path.access_paths.clone();
4365                new_aps.push(ap.clone());
4366                let new_cost = path.cost + inner_cost;
4367                let new_cumulative_rows = path.cumulative_rows * ap.estimated_rows;
4368
4369                plans_enumerated += 1;
4370                tracing::debug!(
4371                    target: "fsqlite.planner",
4372                    tables = ?new_tables,
4373                    cost = new_cost,
4374                    "planner.candidate_plan"
4375                );
4376
4377                next_paths.push(PartialPath {
4378                    tables: new_tables,
4379                    access_paths: new_aps,
4380                    cost: new_cost,
4381                    cumulative_rows: new_cumulative_rows,
4382                });
4383            }
4384        }
4385
4386        next_paths.sort_by(|a, b| {
4387            a.cost
4388                .partial_cmp(&b.cost)
4389                .unwrap_or(std::cmp::Ordering::Equal)
4390        });
4391        next_paths.truncate(mx_choice);
4392        paths = next_paths;
4393    }
4394
4395    // Pick the lowest-cost complete path.  If CROSS JOIN constraints
4396    // eliminated all seed paths (shouldn't happen with valid SQL but
4397    // guard defensively), fall back to seeding every table.
4398    if paths.is_empty() {
4399        for t in tables {
4400            let ap = join_access_path(
4401                t,
4402                indexes,
4403                where_terms,
4404                needed_columns,
4405                table_index_hints,
4406                cracking_hints.as_deref(),
4407            );
4408            let cost = ap.estimated_cost;
4409            let cumulative_rows = ap.estimated_rows;
4410            paths.push(PartialPath {
4411                tables: vec![t.name.clone()],
4412                access_paths: vec![ap],
4413                cost,
4414                cumulative_rows,
4415            });
4416        }
4417    }
4418
4419    let best = paths
4420        .into_iter()
4421        .min_by(|a, b| {
4422            a.cost
4423                .partial_cmp(&b.cost)
4424                .unwrap_or(std::cmp::Ordering::Equal)
4425        })
4426        .expect("tables must be non-empty (checked n == 0 above)");
4427
4428    let join_segments =
4429        choose_join_segments(&best.tables, tables, where_terms, None, feature_flags);
4430
4431    let plan = QueryPlan {
4432        join_order: best.tables,
4433        access_paths: best.access_paths,
4434        join_segments,
4435        total_cost: best.cost,
4436        morsel_eligibility: None,
4437    };
4438
4439    if let Some(store) = cracking_hints {
4440        for access_path in &plan.access_paths {
4441            store.record_access_path(access_path);
4442        }
4443    }
4444
4445    FSQLITE_PLANNER_PLANS_ENUMERATED.fetch_add(plans_enumerated, Ordering::Relaxed);
4446
4447    let span = tracing::info_span!(
4448        target: "fsqlite.planner",
4449        "join_ordering",
4450        tables_count = n,
4451        plans_enumerated,
4452        selected_cost = plan.total_cost,
4453    );
4454    let _g = span.enter();
4455
4456    tracing::debug!(
4457        join_order = ?plan.join_order,
4458        total_cost = plan.total_cost,
4459        beam_width = mx_choice,
4460        star_query = is_star,
4461        table_count = n,
4462        index_hint_entries = table_index_hints.map_or(0, BTreeMap::len),
4463        algorithm = "greedy_width",
4464        threshold = DPCCP_MAX_TABLES,
4465        "planner.order_joins.complete"
4466    );
4467
4468    tracing::info!(
4469        join_order = ?plan.join_order,
4470        total_cost = plan.total_cost,
4471        table_count = n,
4472        plans_enumerated,
4473        algorithm = "greedy_width",
4474        "planner.plan_selected"
4475    );
4476
4477    plan
4478}
4479
4480/// Check that adding `candidate` to `current_path` does not violate any
4481/// CROSS JOIN ordering constraint.
4482fn cross_join_allowed(
4483    current_path: &[String],
4484    candidate: &str,
4485    cross_join_pairs: &[(String, String)],
4486) -> bool {
4487    for (left, right) in cross_join_pairs {
4488        // If (left, right) is a cross join pair, right can only appear after left.
4489        if right.eq_ignore_ascii_case(candidate)
4490            && !current_path.iter().any(|t| t.eq_ignore_ascii_case(left))
4491        {
4492            return false;
4493        }
4494    }
4495    true
4496}
4497
4498fn cross_join_allowed_indices(
4499    current_path: &[usize],
4500    candidate: &str,
4501    tables: &[TableStats],
4502    cross_join_pairs: &[(String, String)],
4503) -> bool {
4504    for (left, right) in cross_join_pairs {
4505        if right.eq_ignore_ascii_case(candidate)
4506            && !current_path
4507                .iter()
4508                .any(|idx| tables[*idx].name.eq_ignore_ascii_case(left))
4509        {
4510            return false;
4511        }
4512    }
4513    true
4514}
4515
4516// ---------------------------------------------------------------------------
4517// DPccp: exhaustive join ordering for small join counts (bd-1as.3)
4518// ---------------------------------------------------------------------------
4519
4520/// Exhaustive join-order search for small joins (`n <= DPCCP_MAX_TABLES`).
4521///
4522/// Enumerates permutations with branch-and-bound pruning:
4523/// - explores candidate next tables in deterministic cost order
4524/// - prunes any partial branch whose cost already exceeds best complete plan
4525/// - returns the best order, total cost, enumerated candidates, pruned branches
4526#[allow(dead_code, clippy::cast_possible_truncation)]
4527fn dpccp_order_joins(
4528    tables: &[TableStats],
4529    indexes: &[IndexInfo],
4530    where_terms: &[WhereTerm<'_>],
4531    needed_columns: Option<&[String]>,
4532    table_index_hints: Option<&BTreeMap<String, IndexHint>>,
4533    cross_join_pairs: &[(String, String)],
4534    cracking_hints: Option<&CrackingHintStore>,
4535) -> Option<(Vec<usize>, f64, u64, u64)> {
4536    let n = tables.len();
4537    assert!(n <= DPCCP_MAX_TABLES);
4538
4539    let access_paths = tables
4540        .iter()
4541        .map(|table| {
4542            join_access_path(
4543                table,
4544                indexes,
4545                where_terms,
4546                needed_columns,
4547                table_index_hints,
4548                cracking_hints,
4549            )
4550        })
4551        .collect::<Vec<_>>();
4552
4553    let mut visit_order = (0..n).collect::<Vec<_>>();
4554    visit_order.sort_by(|&lhs, &rhs| {
4555        access_paths[lhs]
4556            .estimated_rows
4557            .partial_cmp(&access_paths[rhs].estimated_rows)
4558            .unwrap_or(std::cmp::Ordering::Equal)
4559            .then_with(|| {
4560                access_paths[lhs]
4561                    .estimated_cost
4562                    .partial_cmp(&access_paths[rhs].estimated_cost)
4563                    .unwrap_or(std::cmp::Ordering::Equal)
4564            })
4565            .then_with(|| lhs.cmp(&rhs))
4566    });
4567
4568    let mut state =
4569        ExhaustiveJoinSearchState::new(tables, &access_paths, &visit_order, cross_join_pairs);
4570    state.search();
4571
4572    let order = state.best_order?;
4573
4574    Some((
4575        order,
4576        state.best_cost,
4577        state.plans_enumerated,
4578        state.branches_pruned,
4579    ))
4580}
4581
4582struct ExhaustiveJoinSearchState<'a> {
4583    tables: &'a [TableStats],
4584    access_paths: &'a [AccessPath],
4585    visit_order: &'a [usize],
4586    cross_join_pairs: &'a [(String, String)],
4587    best_order: Option<Vec<usize>>,
4588    best_cost: f64,
4589    plans_enumerated: u64,
4590    branches_pruned: u64,
4591}
4592
4593impl<'a> ExhaustiveJoinSearchState<'a> {
4594    fn new(
4595        tables: &'a [TableStats],
4596        access_paths: &'a [AccessPath],
4597        visit_order: &'a [usize],
4598        cross_join_pairs: &'a [(String, String)],
4599    ) -> Self {
4600        Self {
4601            tables,
4602            access_paths,
4603            visit_order,
4604            cross_join_pairs,
4605            best_order: None,
4606            best_cost: f64::INFINITY,
4607            plans_enumerated: 0,
4608            branches_pruned: 0,
4609        }
4610    }
4611
4612    fn search(&mut self) {
4613        let mut current_order = Vec::with_capacity(self.tables.len());
4614        self.search_dfs(&mut current_order, 0, 0.0, 1.0);
4615    }
4616
4617    fn search_dfs(
4618        &mut self,
4619        current_order: &mut Vec<usize>,
4620        used_mask: u64,
4621        current_cost: f64,
4622        current_rows: f64,
4623    ) {
4624        if current_order.len() == self.tables.len() {
4625            if current_cost < self.best_cost {
4626                self.best_cost = current_cost;
4627                self.best_order = Some(current_order.clone());
4628                tracing::debug!(
4629                    target: "fsqlite.planner",
4630                    algorithm = "dpccp_exhaustive",
4631                    join_order = ?order_indices_to_names(current_order, self.tables),
4632                    total_cost = current_cost,
4633                    "planner.best_plan_updated"
4634                );
4635            }
4636            return;
4637        }
4638
4639        for &candidate_idx in self.visit_order {
4640            if used_mask & (1u64 << candidate_idx) != 0 {
4641                continue;
4642            }
4643
4644            let candidate = &self.tables[candidate_idx];
4645            if !cross_join_allowed_indices(
4646                current_order,
4647                &candidate.name,
4648                self.tables,
4649                self.cross_join_pairs,
4650            ) {
4651                continue;
4652            }
4653
4654            let ap = &self.access_paths[candidate_idx];
4655            let (new_cost, new_rows) = if current_order.is_empty() {
4656                (ap.estimated_cost, ap.estimated_rows)
4657            } else {
4658                let inner_cost = ap.estimated_cost * current_rows;
4659                (current_cost + inner_cost, current_rows * ap.estimated_rows)
4660            };
4661
4662            self.plans_enumerated += 1;
4663            let should_prune = self.best_cost.is_finite() && new_cost >= self.best_cost;
4664
4665            let mut candidate_order = current_order
4666                .iter()
4667                .map(|idx| self.tables[*idx].name.as_str())
4668                .collect::<Vec<_>>();
4669            candidate_order.push(candidate.name.as_str());
4670
4671            tracing::debug!(
4672                target: "fsqlite.planner",
4673                algorithm = "dpccp_exhaustive",
4674                depth = candidate_order.len(),
4675                candidate_order = ?candidate_order,
4676                cost = new_cost,
4677                best_complete_cost = if self.best_cost.is_finite() {
4678                    Some(self.best_cost)
4679                } else {
4680                    None::<f64>
4681                },
4682                pruned = should_prune,
4683                "planner.candidate_plan"
4684            );
4685
4686            if should_prune {
4687                self.branches_pruned += 1;
4688                continue;
4689            }
4690
4691            current_order.push(candidate_idx);
4692            self.search_dfs(
4693                current_order,
4694                used_mask | (1u64 << candidate_idx),
4695                new_cost,
4696                new_rows,
4697            );
4698            current_order.pop();
4699        }
4700    }
4701}
4702
4703fn order_indices_to_names(order: &[usize], tables: &[TableStats]) -> Vec<String> {
4704    order.iter().map(|idx| tables[*idx].name.clone()).collect()
4705}
4706
4707// ---------------------------------------------------------------------------
4708// Predicate pushdown (bd-1as.3)
4709// ---------------------------------------------------------------------------
4710
4711/// Collect all distinct table qualifiers referenced by column expressions
4712/// within an AST node.  Used to determine whether a predicate is a
4713/// single-table filter (pushable) or a cross-table join condition (not
4714/// pushable).
4715fn collect_table_refs(expr: &Expr, out: &mut HashSet<String>) {
4716    match expr {
4717        Expr::Column(col_ref, _) => {
4718            if let Some(ref tq) = col_ref.table {
4719                out.insert(tq.to_ascii_lowercase());
4720            }
4721        }
4722        Expr::BinaryOp { left, right, .. } => {
4723            collect_table_refs(left, out);
4724            collect_table_refs(right, out);
4725        }
4726        Expr::UnaryOp { expr: inner, .. }
4727        | Expr::Collate { expr: inner, .. }
4728        | Expr::IsNull { expr: inner, .. } => {
4729            collect_table_refs(inner, out);
4730        }
4731        Expr::Between {
4732            expr: e, low, high, ..
4733        } => {
4734            collect_table_refs(e, out);
4735            collect_table_refs(low, out);
4736            collect_table_refs(high, out);
4737        }
4738        Expr::In { expr: e, set, .. } => {
4739            collect_table_refs(e, out);
4740            if let InSet::List(items) = set {
4741                for item in items {
4742                    collect_table_refs(item, out);
4743                }
4744            }
4745        }
4746        Expr::Like {
4747            expr: e,
4748            pattern,
4749            escape,
4750            ..
4751        } => {
4752            collect_table_refs(e, out);
4753            collect_table_refs(pattern, out);
4754            if let Some(esc) = escape {
4755                collect_table_refs(esc, out);
4756            }
4757        }
4758        Expr::FunctionCall { args, filter, .. } => {
4759            if let fsqlite_ast::FunctionArgs::List(exprs) = args {
4760                for arg in exprs {
4761                    collect_table_refs(arg, out);
4762                }
4763            }
4764            if let Some(f) = filter {
4765                collect_table_refs(f, out);
4766            }
4767        }
4768        Expr::Case {
4769            operand,
4770            whens,
4771            else_expr,
4772            ..
4773        } => {
4774            if let Some(op) = operand {
4775                collect_table_refs(op, out);
4776            }
4777            for (when_e, then_e) in whens {
4778                collect_table_refs(when_e, out);
4779                collect_table_refs(then_e, out);
4780            }
4781            if let Some(el) = else_expr {
4782                collect_table_refs(el, out);
4783            }
4784        }
4785        Expr::Cast { expr: e, .. } => collect_table_refs(e, out),
4786        Expr::JsonAccess { expr: e, path, .. } => {
4787            collect_table_refs(e, out);
4788            collect_table_refs(path, out);
4789        }
4790        Expr::RowValue(exprs, _) => {
4791            for e in exprs {
4792                collect_table_refs(e, out);
4793            }
4794        }
4795        Expr::Exists { subquery, .. } | Expr::Subquery(subquery, _) => {
4796            // Recurse into the subquery's WHERE clause and select list to
4797            // find outer table references (correlated subquery columns).
4798            if let SelectCore::Select {
4799                where_clause,
4800                columns,
4801                ..
4802            } = &subquery.body.select
4803            {
4804                if let Some(wc) = where_clause {
4805                    collect_table_refs(wc, out);
4806                }
4807                for col in columns {
4808                    if let ResultColumn::Expr { expr, .. } = col {
4809                        collect_table_refs(expr, out);
4810                    }
4811                }
4812            }
4813        }
4814        // Literals, placeholders — no column refs to collect.
4815        _ => {}
4816    }
4817}
4818
4819/// A pushed-down predicate: WHERE term assigned to a specific table.
4820#[derive(Debug, Clone)]
4821pub struct PushedPredicate<'a> {
4822    /// Table name this predicate applies to.
4823    pub table: String,
4824    /// The original WHERE term.
4825    pub term: &'a WhereTerm<'a>,
4826}
4827
4828/// Push WHERE predicates down to the lowest possible table in the join tree.
4829///
4830/// A predicate can be pushed down if it references columns from only one table.
4831/// Predicates referencing multiple tables remain as join conditions.
4832///
4833/// Returns (single_table_predicates, join_predicates).
4834pub fn pushdown_predicates<'a>(
4835    where_terms: &'a [WhereTerm<'a>],
4836    table_names: &[String],
4837) -> (Vec<PushedPredicate<'a>>, Vec<&'a WhereTerm<'a>>) {
4838    let span = tracing::debug_span!(
4839        target: "fsqlite.planner",
4840        "predicate_pushdown",
4841        total_terms = where_terms.len(),
4842        pushed = tracing::field::Empty,
4843        remaining = tracing::field::Empty,
4844    );
4845    let _g = span.enter();
4846
4847    let mut pushed = Vec::new();
4848    let mut remaining = Vec::new();
4849
4850    for term in where_terms {
4851        // Collect all table qualifiers referenced anywhere in the expression.
4852        // A predicate is only pushable if it references at most one table;
4853        // cross-table predicates (join conditions) must remain as join filters.
4854        let mut refs = HashSet::new();
4855        collect_table_refs(term.expr, &mut refs);
4856
4857        if refs.len() == 1 {
4858            // Single qualified table — push to that table.
4859            let tq = refs.into_iter().next().unwrap();
4860            let matching: Vec<_> = table_names
4861                .iter()
4862                .filter(|t| t.to_ascii_lowercase() == tq)
4863                .collect();
4864            if matching.len() == 1 {
4865                pushed.push(PushedPredicate {
4866                    table: matching[0].clone(),
4867                    term,
4868                });
4869                continue;
4870            }
4871        } else if refs.is_empty() {
4872            // No table qualifiers (unqualified columns or pure literals in the RHS).
4873            if let Some(ref col) = term.column {
4874                if let Some(ref tname) = col.table {
4875                    if let Some(matched) =
4876                        table_names.iter().find(|t| t.eq_ignore_ascii_case(tname))
4877                    {
4878                        pushed.push(PushedPredicate {
4879                            table: matched.clone(),
4880                            term,
4881                        });
4882                        continue;
4883                    }
4884                } else if table_names.len() == 1 {
4885                    pushed.push(PushedPredicate {
4886                        table: table_names[0].clone(),
4887                        term,
4888                    });
4889                    continue;
4890                }
4891            }
4892        }
4893        // Multi-table references or ambiguous — keep as join condition.
4894        remaining.push(term);
4895    }
4896
4897    span.record("pushed", pushed.len() as u64);
4898    span.record("remaining", remaining.len() as u64);
4899
4900    tracing::debug!(
4901        pushed_count = pushed.len(),
4902        remaining_count = remaining.len(),
4903        "planner.predicate_pushdown.complete"
4904    );
4905
4906    (pushed, remaining)
4907}
4908
4909// ---------------------------------------------------------------------------
4910// Constant folding (bd-1as.3)
4911// ---------------------------------------------------------------------------
4912
4913/// Result of attempting to fold a constant expression.
4914#[derive(Debug, Clone, PartialEq)]
4915pub enum FoldResult {
4916    /// Expression was folded to a literal value.
4917    Literal(Literal),
4918    /// Expression could not be folded (contains column references).
4919    NotConstant,
4920}
4921
4922/// Attempt to constant-fold an expression.
4923///
4924/// Evaluates expressions that contain only literals and deterministic operators
4925/// at plan time, avoiding repeated evaluation during execution.
4926pub fn try_constant_fold(expr: &Expr) -> FoldResult {
4927    match expr {
4928        Expr::Literal(lit, _) => FoldResult::Literal(lit.clone()),
4929
4930        Expr::UnaryOp {
4931            op, expr: inner, ..
4932        } => {
4933            let inner_val = try_constant_fold(inner);
4934            match inner_val {
4935                FoldResult::Literal(Literal::Integer(i)) => match op {
4936                    fsqlite_ast::UnaryOp::Negate => {
4937                        FoldResult::Literal(Literal::Integer(i.wrapping_neg()))
4938                    }
4939                    fsqlite_ast::UnaryOp::Plus => FoldResult::Literal(Literal::Integer(i)),
4940                    fsqlite_ast::UnaryOp::BitNot => FoldResult::Literal(Literal::Integer(!i)),
4941                    fsqlite_ast::UnaryOp::Not => FoldResult::Literal(if i == 0 {
4942                        Literal::True
4943                    } else {
4944                        Literal::False
4945                    }),
4946                },
4947                FoldResult::Literal(Literal::Float(f)) => match op {
4948                    fsqlite_ast::UnaryOp::Negate => FoldResult::Literal(Literal::Float(-f)),
4949                    fsqlite_ast::UnaryOp::Plus => FoldResult::Literal(Literal::Float(f)),
4950                    _ => FoldResult::NotConstant,
4951                },
4952                // NULL propagation: any unary op on NULL yields NULL.
4953                FoldResult::Literal(Literal::Null) => FoldResult::Literal(Literal::Null),
4954                _ => FoldResult::NotConstant,
4955            }
4956        }
4957
4958        Expr::BinaryOp {
4959            left, op, right, ..
4960        } => {
4961            let l = try_constant_fold(left);
4962            let r = try_constant_fold(right);
4963            match (l, r) {
4964                (
4965                    FoldResult::Literal(Literal::Integer(a)),
4966                    FoldResult::Literal(Literal::Integer(b)),
4967                ) => match op {
4968                    fsqlite_ast::BinaryOp::Add => {
4969                        FoldResult::Literal(Literal::Integer(a.wrapping_add(b)))
4970                    }
4971                    fsqlite_ast::BinaryOp::Subtract => {
4972                        FoldResult::Literal(Literal::Integer(a.wrapping_sub(b)))
4973                    }
4974                    fsqlite_ast::BinaryOp::Multiply => {
4975                        FoldResult::Literal(Literal::Integer(a.wrapping_mul(b)))
4976                    }
4977                    fsqlite_ast::BinaryOp::Divide => {
4978                        if b == 0 {
4979                            FoldResult::Literal(Literal::Null)
4980                        } else {
4981                            FoldResult::Literal(Literal::Integer(a.wrapping_div(b)))
4982                        }
4983                    }
4984                    fsqlite_ast::BinaryOp::Modulo => {
4985                        if b == 0 {
4986                            FoldResult::Literal(Literal::Null)
4987                        } else {
4988                            FoldResult::Literal(Literal::Integer(a.wrapping_rem(b)))
4989                        }
4990                    }
4991                    fsqlite_ast::BinaryOp::Eq => FoldResult::Literal(if a == b {
4992                        Literal::True
4993                    } else {
4994                        Literal::False
4995                    }),
4996                    fsqlite_ast::BinaryOp::Ne => FoldResult::Literal(if a == b {
4997                        Literal::False
4998                    } else {
4999                        Literal::True
5000                    }),
5001                    fsqlite_ast::BinaryOp::Lt => {
5002                        FoldResult::Literal(if a < b { Literal::True } else { Literal::False })
5003                    }
5004                    fsqlite_ast::BinaryOp::Le => FoldResult::Literal(if a <= b {
5005                        Literal::True
5006                    } else {
5007                        Literal::False
5008                    }),
5009                    fsqlite_ast::BinaryOp::Gt => {
5010                        FoldResult::Literal(if a > b { Literal::True } else { Literal::False })
5011                    }
5012                    fsqlite_ast::BinaryOp::Ge => FoldResult::Literal(if a >= b {
5013                        Literal::True
5014                    } else {
5015                        Literal::False
5016                    }),
5017                    _ => FoldResult::NotConstant,
5018                },
5019                // NULL propagation: any arithmetic or comparison with NULL
5020                // yields NULL in SQL.
5021                (FoldResult::Literal(Literal::Null), FoldResult::Literal(_))
5022                | (FoldResult::Literal(_), FoldResult::Literal(Literal::Null)) => {
5023                    FoldResult::Literal(Literal::Null)
5024                }
5025                _ => FoldResult::NotConstant,
5026            }
5027        }
5028
5029        // Any expression containing column references is not constant.
5030        _ => FoldResult::NotConstant,
5031    }
5032}
5033
5034// ---------------------------------------------------------------------------
5035// Tests
5036// ---------------------------------------------------------------------------
5037
5038#[cfg(test)]
5039mod tests {
5040    use super::*;
5041    use fsqlite_ast::{
5042        ColumnRef, CompoundOp, Distinctness, Expr, FromClause, InSet, IndexHint, Literal,
5043        OrderingTerm, QualifiedName, ResultColumn, SelectBody, SelectCore, SortDirection, Span,
5044        TableOrSubquery,
5045    };
5046    use std::{cell::Cell, path::PathBuf, time::Instant};
5047
5048    /// Helper: build a SELECT core with named result columns.
5049    fn select_core_with_aliases(aliases: &[&str]) -> SelectCore {
5050        SelectCore::Select {
5051            distinct: Distinctness::All,
5052            columns: aliases
5053                .iter()
5054                .map(|a| ResultColumn::Expr {
5055                    expr: Expr::Literal(Literal::Integer(0), Span::ZERO),
5056                    alias: Some((*a).to_owned()),
5057                })
5058                .collect(),
5059            from: None,
5060            where_clause: None,
5061            group_by: vec![],
5062            having: None,
5063            windows: vec![],
5064        }
5065    }
5066
5067    /// Helper: build a compound body from multiple sets of aliases.
5068    fn compound_body(first: &[&str], rest: &[(&[&str], CompoundOp)]) -> SelectBody {
5069        SelectBody {
5070            select: select_core_with_aliases(first),
5071            compounds: rest
5072                .iter()
5073                .map(|(aliases, op)| (*op, select_core_with_aliases(aliases)))
5074                .collect(),
5075        }
5076    }
5077
5078    /// Helper: ORDER BY a bare column name.
5079    fn order_by_name(name: &str) -> OrderingTerm {
5080        OrderingTerm {
5081            expr: Expr::Column(ColumnRef::bare(name), Span::ZERO),
5082            direction: None,
5083            nulls: None,
5084        }
5085    }
5086
5087    /// Helper: ORDER BY a numeric index.
5088    fn order_by_num(n: i64) -> OrderingTerm {
5089        OrderingTerm {
5090            expr: Expr::Literal(Literal::Integer(n), Span::ZERO),
5091            direction: None,
5092            nulls: None,
5093        }
5094    }
5095
5096    /// Helper: ORDER BY a name with direction.
5097    fn order_by_name_dir(name: &str, dir: SortDirection) -> OrderingTerm {
5098        OrderingTerm {
5099            expr: Expr::Column(ColumnRef::bare(name), Span::ZERO),
5100            direction: Some(dir),
5101            nulls: None,
5102        }
5103    }
5104
5105    fn select_core_single_table(
5106        columns: Vec<ResultColumn>,
5107        table_name: &str,
5108        alias: Option<&str>,
5109    ) -> SelectCore {
5110        SelectCore::Select {
5111            distinct: Distinctness::All,
5112            columns,
5113            from: Some(FromClause {
5114                source: TableOrSubquery::Table {
5115                    name: QualifiedName::bare(table_name),
5116                    alias: alias.map(str::to_owned),
5117                    index_hint: None,
5118                    time_travel: None,
5119                },
5120                joins: vec![],
5121            }),
5122            where_clause: None,
5123            group_by: vec![],
5124            having: None,
5125            windows: vec![],
5126        }
5127    }
5128
5129    fn sample_cached_query_plan(label: &str) -> QueryPlan {
5130        QueryPlan {
5131            join_order: vec![label.to_owned()],
5132            access_paths: vec![],
5133            join_segments: vec![],
5134            total_cost: label.len() as f64,
5135            morsel_eligibility: None,
5136        }
5137    }
5138
5139    // --- Core resolution tests ---
5140
5141    #[test]
5142    fn test_single_table_projection_expands_star() {
5143        let core = select_core_single_table(vec![ResultColumn::Star], "t", None);
5144        let table_columns = vec!["a".to_owned(), "b".to_owned()];
5145        let resolved =
5146            resolve_single_table_result_columns(&core, &table_columns).expect("star should expand");
5147        assert_eq!(
5148            resolved,
5149            vec![
5150                ResultColumn::Expr {
5151                    expr: Expr::Column(ColumnRef::bare("a"), Span::ZERO),
5152                    alias: None
5153                },
5154                ResultColumn::Expr {
5155                    expr: Expr::Column(ColumnRef::bare("b"), Span::ZERO),
5156                    alias: None
5157                },
5158            ]
5159        );
5160    }
5161
5162    #[test]
5163    fn test_single_table_projection_expands_table_star_with_alias() {
5164        let core = select_core_single_table(
5165            vec![ResultColumn::TableStar(QualifiedName::bare("tt"))],
5166            "t",
5167            Some("tt"),
5168        );
5169        let table_columns = vec!["a".to_owned(), "b".to_owned()];
5170        let resolved = resolve_single_table_result_columns(&core, &table_columns)
5171            .expect("table.* should expand");
5172        assert_eq!(resolved.len(), 2);
5173    }
5174
5175    #[test]
5176    fn test_single_table_projection_rejects_unknown_column() {
5177        let core = select_core_single_table(
5178            vec![ResultColumn::Expr {
5179                expr: Expr::Column(ColumnRef::bare("z"), Span::ZERO),
5180                alias: None,
5181            }],
5182            "t",
5183            None,
5184        );
5185        let table_columns = vec!["a".to_owned(), "b".to_owned()];
5186        let err = resolve_single_table_result_columns(&core, &table_columns)
5187            .expect_err("unknown column should fail");
5188        assert_eq!(
5189            err,
5190            SingleTableProjectionError::ColumnNotFound {
5191                column: "z".to_owned()
5192            }
5193        );
5194    }
5195
5196    #[test]
5197    fn test_single_table_projection_accepts_rowid_aliases_with_qualifiers() {
5198        let core = select_core_single_table(
5199            vec![
5200                ResultColumn::Expr {
5201                    expr: Expr::Column(ColumnRef::bare("rowid"), Span::ZERO),
5202                    alias: None,
5203                },
5204                ResultColumn::Expr {
5205                    expr: Expr::Column(ColumnRef::qualified("tt", "_rowid_"), Span::ZERO),
5206                    alias: None,
5207                },
5208                ResultColumn::Expr {
5209                    expr: Expr::Column(ColumnRef::qualified("t", "oid"), Span::ZERO),
5210                    alias: None,
5211                },
5212            ],
5213            "t",
5214            Some("tt"),
5215        );
5216        let table_columns = vec!["a".to_owned(), "b".to_owned()];
5217        let resolved = resolve_single_table_result_columns(&core, &table_columns)
5218            .expect("rowid aliases should be accepted in projection");
5219        assert_eq!(resolved.len(), 3);
5220    }
5221
5222    #[test]
5223    fn test_single_table_projection_rejects_hidden_rowid_aliases_when_disabled() {
5224        let core = select_core_single_table(
5225            vec![
5226                ResultColumn::Expr {
5227                    expr: Expr::Column(ColumnRef::bare("rowid"), Span::ZERO),
5228                    alias: None,
5229                },
5230                ResultColumn::Expr {
5231                    expr: Expr::Column(ColumnRef::qualified("tt", "_rowid_"), Span::ZERO),
5232                    alias: None,
5233                },
5234            ],
5235            "t",
5236            Some("tt"),
5237        );
5238        let table_columns = vec!["a".to_owned(), "b".to_owned()];
5239        let err = resolve_single_table_result_columns_with_options(&core, &table_columns, false)
5240            .expect_err("WITHOUT ROWID tables should reject hidden rowid aliases");
5241        assert_eq!(
5242            err,
5243            SingleTableProjectionError::ColumnNotFound {
5244                column: "rowid".to_owned()
5245            }
5246        );
5247    }
5248
5249    #[test]
5250    fn test_single_table_projection_still_accepts_visible_rowid_column_when_disabled() {
5251        let core = select_core_single_table(
5252            vec![ResultColumn::Expr {
5253                expr: Expr::Column(ColumnRef::bare("rowid"), Span::ZERO),
5254                alias: None,
5255            }],
5256            "t",
5257            None,
5258        );
5259        let table_columns = vec!["rowid".to_owned(), "payload".to_owned()];
5260        let resolved =
5261            resolve_single_table_result_columns_with_options(&core, &table_columns, false)
5262                .expect("visible rowid-named columns should still resolve");
5263        assert_eq!(resolved.len(), 1);
5264    }
5265
5266    #[test]
5267    fn test_compound_order_by_uses_first_alias() {
5268        // SELECT 1 AS a UNION SELECT 2 AS b ORDER BY a
5269        // → a is in the first SELECT at col 0
5270        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5271        let result =
5272            resolve_compound_order_by(&body, &[order_by_name("a")]).expect("should resolve");
5273        assert_eq!(result.len(), 1);
5274        assert_eq!(result[0].column_idx, 0);
5275    }
5276
5277    #[test]
5278    fn test_extract_output_aliases_and_count_output_columns() {
5279        // SELECT 1 AS renamed, bare_col, 2 -> aliased / bare-column-name / unaliased-expr.
5280        let core = SelectCore::Select {
5281            distinct: Distinctness::All,
5282            columns: vec![
5283                ResultColumn::Expr {
5284                    expr: Expr::Literal(Literal::Integer(1), Span::ZERO),
5285                    alias: Some("renamed".to_owned()),
5286                },
5287                ResultColumn::Expr {
5288                    expr: Expr::Column(ColumnRef::bare("bare_col"), Span::ZERO),
5289                    alias: None,
5290                },
5291                ResultColumn::Expr {
5292                    expr: Expr::Literal(Literal::Integer(2), Span::ZERO),
5293                    alias: None,
5294                },
5295            ],
5296            from: None,
5297            where_clause: None,
5298            group_by: vec![],
5299            having: None,
5300            windows: vec![],
5301        };
5302        assert_eq!(count_output_columns(&core), 3);
5303        assert_eq!(
5304            extract_output_aliases(&core),
5305            vec![
5306                Some("renamed".to_owned()),
5307                Some("bare_col".to_owned()),
5308                None
5309            ]
5310        );
5311
5312        // VALUES: width comes from the first row; every column is unnamed.
5313        let values = SelectCore::Values(vec![
5314            vec![
5315                Expr::Literal(Literal::Integer(1), Span::ZERO),
5316                Expr::Literal(Literal::Integer(2), Span::ZERO),
5317            ],
5318            vec![
5319                Expr::Literal(Literal::Integer(3), Span::ZERO),
5320                Expr::Literal(Literal::Integer(4), Span::ZERO),
5321            ],
5322        ]);
5323        assert_eq!(count_output_columns(&values), 2);
5324        assert_eq!(extract_output_aliases(&values), vec![None, None]);
5325
5326        // Empty VALUES -> zero columns.
5327        let empty = SelectCore::Values(vec![]);
5328        assert_eq!(count_output_columns(&empty), 0);
5329        assert!(extract_output_aliases(&empty).is_empty());
5330    }
5331
5332    #[test]
5333    fn test_compound_order_by_second_select_alias() {
5334        // SELECT 1 AS a UNION SELECT 2 AS b ORDER BY b
5335        // → b is in the second SELECT at col 0
5336        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5337        let result =
5338            resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
5339        assert_eq!(result.len(), 1);
5340        assert_eq!(result[0].column_idx, 0);
5341    }
5342
5343    #[test]
5344    fn test_compound_order_by_first_select_wins_conflict() {
5345        // SELECT 10 AS a, 1 AS b UNION ALL SELECT 2 AS b, 20 AS a ORDER BY b
5346        // → b is in first SELECT at col 1 AND second SELECT at col 0
5347        // → first SELECT wins → col 1
5348        let body = compound_body(&["a", "b"], &[(&["b", "a"], CompoundOp::UnionAll)]);
5349        let result =
5350            resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
5351        assert_eq!(result[0].column_idx, 1);
5352    }
5353
5354    #[test]
5355    fn test_compound_order_by_numeric_column() {
5356        // ORDER BY 1 → col 0, ORDER BY 2 → col 1
5357        let body = compound_body(&["a", "b"], &[(&["c", "d"], CompoundOp::Union)]);
5358        let result = resolve_compound_order_by(&body, &[order_by_num(1), order_by_num(2)])
5359            .expect("should resolve");
5360        assert_eq!(result[0].column_idx, 0);
5361        assert_eq!(result[1].column_idx, 1);
5362    }
5363
5364    #[test]
5365    fn test_compound_order_by_unknown_name_error() {
5366        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5367        let err =
5368            resolve_compound_order_by(&body, &[order_by_name("z")]).expect_err("should error");
5369        assert!(matches!(
5370            err,
5371            CompoundOrderByError::ColumnNotFound { ref name, .. } if name == "z"
5372        ));
5373    }
5374
5375    #[test]
5376    fn test_compound_order_by_numeric_out_of_range() {
5377        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5378        let err = resolve_compound_order_by(&body, &[order_by_num(5)]).expect_err("should error");
5379        assert!(matches!(
5380            err,
5381            CompoundOrderByError::IndexOutOfRange {
5382                index: 5,
5383                num_columns: 1,
5384                ..
5385            }
5386        ));
5387    }
5388
5389    #[test]
5390    fn test_compound_order_by_numeric_zero() {
5391        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5392        let err = resolve_compound_order_by(&body, &[order_by_num(0)]).expect_err("should error");
5393        assert!(matches!(
5394            err,
5395            CompoundOrderByError::IndexZeroOrNegative { value: 0, .. }
5396        ));
5397    }
5398
5399    #[test]
5400    fn test_compound_order_by_expression_rejected() {
5401        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5402        let term = OrderingTerm {
5403            expr: Expr::BinaryOp {
5404                left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
5405                op: fsqlite_ast::BinaryOp::Add,
5406                right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
5407                span: Span::ZERO,
5408            },
5409            direction: None,
5410            nulls: None,
5411        };
5412        let err = resolve_compound_order_by(&body, &[term]).expect_err("should error");
5413        assert!(matches!(
5414            err,
5415            CompoundOrderByError::ExpressionNotAllowed { .. }
5416        ));
5417    }
5418
5419    #[test]
5420    fn test_compound_order_by_with_direction() {
5421        let body = compound_body(&["a", "b"], &[(&["c", "d"], CompoundOp::Union)]);
5422        let result =
5423            resolve_compound_order_by(&body, &[order_by_name_dir("a", SortDirection::Desc)])
5424                .expect("should resolve");
5425        assert_eq!(result[0].column_idx, 0);
5426        assert_eq!(result[0].direction, Some(SortDirection::Desc));
5427    }
5428
5429    #[test]
5430    fn test_compound_order_by_collate() {
5431        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5432        let term = OrderingTerm {
5433            expr: Expr::Collate {
5434                expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
5435                collation: "NOCASE".to_owned(),
5436                span: Span::ZERO,
5437            },
5438            direction: None,
5439            nulls: None,
5440        };
5441        let result = resolve_compound_order_by(&body, &[term]).expect("should resolve");
5442        assert_eq!(result[0].column_idx, 0);
5443        assert_eq!(result[0].collation.as_deref(), Some("NOCASE"));
5444    }
5445
5446    #[test]
5447    fn test_compound_order_by_three_selects() {
5448        // Alias c only in 3rd SELECT at col 0
5449        let body = compound_body(
5450            &["a"],
5451            &[(&["b"], CompoundOp::Union), (&["c"], CompoundOp::Union)],
5452        );
5453        let result =
5454            resolve_compound_order_by(&body, &[order_by_name("c")]).expect("should resolve");
5455        assert_eq!(result[0].column_idx, 0);
5456    }
5457
5458    #[test]
5459    fn test_compound_order_by_earlier_select_wins() {
5460        // 2nd SELECT has 'c' at col 1, 3rd SELECT has 'c' at col 0
5461        // → 2nd SELECT wins → col 1
5462        let body = compound_body(
5463            &["a", "x"],
5464            &[
5465                (&["b", "c"], CompoundOp::UnionAll),
5466                (&["c", "b"], CompoundOp::UnionAll),
5467            ],
5468        );
5469        let result =
5470            resolve_compound_order_by(&body, &[order_by_name("c")]).expect("should resolve");
5471        assert_eq!(result[0].column_idx, 1);
5472    }
5473
5474    #[test]
5475    fn test_compound_order_by_case_insensitive() {
5476        let body = compound_body(&["MyCol"], &[(&["other"], CompoundOp::Union)]);
5477        let result =
5478            resolve_compound_order_by(&body, &[order_by_name("mycol")]).expect("should resolve");
5479        assert_eq!(result[0].column_idx, 0);
5480    }
5481
5482    #[test]
5483    fn test_compound_order_by_intersect_except() {
5484        // Same resolution rules for all compound operators
5485        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Intersect)]);
5486        let result =
5487            resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
5488        assert_eq!(result[0].column_idx, 0);
5489
5490        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Except)]);
5491        let result =
5492            resolve_compound_order_by(&body, &[order_by_name("b")]).expect("should resolve");
5493        assert_eq!(result[0].column_idx, 0);
5494    }
5495
5496    #[test]
5497    fn test_extract_output_aliases_select() {
5498        let core = select_core_with_aliases(&["x", "y", "z"]);
5499        let aliases = extract_output_aliases(&core);
5500        assert_eq!(
5501            aliases,
5502            vec![
5503                Some("x".to_owned()),
5504                Some("y".to_owned()),
5505                Some("z".to_owned())
5506            ]
5507        );
5508    }
5509
5510    #[test]
5511    fn test_extract_output_aliases_bare_column() {
5512        // SELECT col_name (no alias) → uses column name
5513        let core = SelectCore::Select {
5514            distinct: Distinctness::All,
5515            columns: vec![ResultColumn::Expr {
5516                expr: Expr::Column(ColumnRef::bare("my_col"), Span::ZERO),
5517                alias: None,
5518            }],
5519            from: None,
5520            where_clause: None,
5521            group_by: vec![],
5522            having: None,
5523            windows: vec![],
5524        };
5525        let aliases = extract_output_aliases(&core);
5526        assert_eq!(aliases, vec![Some("my_col".to_owned())]);
5527    }
5528
5529    #[test]
5530    fn test_extract_output_aliases_values() {
5531        let core = SelectCore::Values(vec![vec![
5532            Expr::Literal(Literal::Integer(1), Span::ZERO),
5533            Expr::Literal(Literal::Integer(2), Span::ZERO),
5534        ]]);
5535        let aliases = extract_output_aliases(&core);
5536        assert_eq!(aliases, vec![None, None]);
5537    }
5538
5539    #[test]
5540    fn test_is_compound() {
5541        let simple = SelectBody {
5542            select: select_core_with_aliases(&["a"]),
5543            compounds: vec![],
5544        };
5545        assert!(!is_compound(&simple));
5546
5547        let compound = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5548        assert!(is_compound(&compound));
5549    }
5550
5551    #[test]
5552    fn test_compound_op_name_all_variants() {
5553        assert_eq!(compound_op_name(CompoundOp::Union), "UNION");
5554        assert_eq!(compound_op_name(CompoundOp::UnionAll), "UNION ALL");
5555        assert_eq!(compound_op_name(CompoundOp::Intersect), "INTERSECT");
5556        assert_eq!(compound_op_name(CompoundOp::Except), "EXCEPT");
5557    }
5558
5559    #[test]
5560    fn test_compound_order_by_error_display() {
5561        let err = CompoundOrderByError::ColumnNotFound {
5562            name: "z".to_owned(),
5563            span: Span::ZERO,
5564        };
5565        assert!(err.to_string().contains("does not match"));
5566
5567        let err = CompoundOrderByError::IndexOutOfRange {
5568            index: 5,
5569            num_columns: 2,
5570            span: Span::ZERO,
5571        };
5572        assert!(err.to_string().contains("out of range"));
5573
5574        let err = CompoundOrderByError::ExpressionNotAllowed { span: Span::ZERO };
5575        assert!(err.to_string().contains("not allowed"));
5576    }
5577
5578    #[test]
5579    fn test_compound_order_by_negative_index() {
5580        let body = compound_body(&["a"], &[(&["b"], CompoundOp::Union)]);
5581        let err = resolve_compound_order_by(&body, &[order_by_num(-1)]).expect_err("should error");
5582        assert!(matches!(
5583            err,
5584            CompoundOrderByError::IndexZeroOrNegative { value: -1, .. }
5585        ));
5586    }
5587
5588    #[test]
5589    fn test_compound_order_by_multiple_terms() {
5590        let body = compound_body(
5591            &["a", "b", "c"],
5592            &[(&["x", "y", "z"], CompoundOp::UnionAll)],
5593        );
5594        let result = resolve_compound_order_by(
5595            &body,
5596            &[
5597                order_by_name_dir("c", SortDirection::Desc),
5598                order_by_num(1),
5599                order_by_name("y"),
5600            ],
5601        )
5602        .expect("should resolve");
5603        assert_eq!(result.len(), 3);
5604        assert_eq!(result[0].column_idx, 2); // c → first SELECT col 2
5605        assert_eq!(result[0].direction, Some(SortDirection::Desc));
5606        assert_eq!(result[1].column_idx, 0); // 1 → col 0
5607        assert_eq!(result[2].column_idx, 1); // y → second SELECT col 1
5608    }
5609
5610    // ===================================================================
5611    // §10.5 Cost Model tests
5612    // ===================================================================
5613
5614    fn table_stats(name: &str, n_pages: u64, n_rows: u64) -> TableStats {
5615        TableStats {
5616            name: name.to_owned(),
5617            n_pages,
5618            n_rows,
5619            source: StatsSource::Heuristic,
5620        }
5621    }
5622
5623    fn index_info(
5624        name: &str,
5625        table: &str,
5626        columns: &[&str],
5627        unique: bool,
5628        n_pages: u64,
5629    ) -> IndexInfo {
5630        IndexInfo {
5631            name: name.to_owned(),
5632            table: table.to_owned(),
5633            columns: columns.iter().map(|c| (*c).to_owned()).collect(),
5634            unique,
5635            n_pages,
5636            source: StatsSource::Heuristic,
5637            partial_where: None,
5638            expression_columns: vec![],
5639        }
5640    }
5641
5642    fn eq_term_value(col: &str, value: i64) -> WhereTerm<'static> {
5643        // Leaked for convenience in tests — we just need the lifetime.
5644        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
5645            left: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
5646            op: AstBinaryOp::Eq,
5647            right: Box::new(Expr::Literal(Literal::Integer(value), Span::ZERO)),
5648            span: Span::ZERO,
5649        }));
5650        classify_where_term(expr)
5651    }
5652
5653    fn eq_term(col: &str) -> WhereTerm<'static> {
5654        eq_term_value(col, 1)
5655    }
5656
5657    fn range_term(col: &str) -> WhereTerm<'static> {
5658        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
5659            left: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
5660            op: AstBinaryOp::Gt,
5661            right: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
5662            span: Span::ZERO,
5663        }));
5664        classify_where_term(expr)
5665    }
5666
5667    fn in_term(col: &str, count: usize) -> WhereTerm<'static> {
5668        let items: Vec<Expr> = (0..count)
5669            .map(|i| {
5670                #[allow(clippy::cast_possible_wrap)]
5671                Expr::Literal(Literal::Integer(i as i64), Span::ZERO)
5672            })
5673            .collect();
5674        let expr: &'static Expr = Box::leak(Box::new(Expr::In {
5675            expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
5676            set: InSet::List(items),
5677            not: false,
5678            span: Span::ZERO,
5679        }));
5680        classify_where_term(expr)
5681    }
5682
5683    fn like_term(col: &str, pattern: &str) -> WhereTerm<'static> {
5684        let expr: &'static Expr = Box::leak(Box::new(Expr::Like {
5685            expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
5686            pattern: Box::new(Expr::Literal(
5687                Literal::String(pattern.to_owned()),
5688                Span::ZERO,
5689            )),
5690            escape: None,
5691            op: LikeOp::Like,
5692            not: false,
5693            span: Span::ZERO,
5694        }));
5695        classify_where_term(expr)
5696    }
5697
5698    fn like_term_with_escape(col: &str, pattern: &str, escape: &str) -> WhereTerm<'static> {
5699        let expr: &'static Expr = Box::leak(Box::new(Expr::Like {
5700            expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
5701            pattern: Box::new(Expr::Literal(
5702                Literal::String(pattern.to_owned()),
5703                Span::ZERO,
5704            )),
5705            escape: Some(Box::new(Expr::Literal(
5706                Literal::String(escape.to_owned()),
5707                Span::ZERO,
5708            ))),
5709            op: LikeOp::Like,
5710            not: false,
5711            span: Span::ZERO,
5712        }));
5713        classify_where_term(expr)
5714    }
5715
5716    fn glob_term(col: &str, pattern: &str) -> WhereTerm<'static> {
5717        let expr: &'static Expr = Box::leak(Box::new(Expr::Like {
5718            expr: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
5719            pattern: Box::new(Expr::Literal(
5720                Literal::String(pattern.to_owned()),
5721                Span::ZERO,
5722            )),
5723            escape: None,
5724            op: LikeOp::Glob,
5725            not: false,
5726            span: Span::ZERO,
5727        }));
5728        classify_where_term(expr)
5729    }
5730
5731    fn or_eq_term(col: &str, values: &[i64]) -> WhereTerm<'static> {
5732        assert!(
5733            values.len() >= 2,
5734            "or_eq_term requires at least two disjunct values"
5735        );
5736
5737        let mut disjuncts = values
5738            .iter()
5739            .map(|value| Expr::BinaryOp {
5740                left: Box::new(Expr::Column(ColumnRef::bare(col), Span::ZERO)),
5741                op: AstBinaryOp::Eq,
5742                right: Box::new(Expr::Literal(Literal::Integer(*value), Span::ZERO)),
5743                span: Span::ZERO,
5744            })
5745            .collect::<Vec<_>>();
5746
5747        let mut combined = disjuncts.pop().expect("values is non-empty");
5748        while let Some(left_disjunct) = disjuncts.pop() {
5749            combined = Expr::BinaryOp {
5750                left: Box::new(left_disjunct),
5751                op: AstBinaryOp::Or,
5752                right: Box::new(combined),
5753                span: Span::ZERO,
5754            };
5755        }
5756
5757        let expr: &'static Expr = Box::leak(Box::new(combined));
5758        classify_where_term(expr)
5759    }
5760
5761    fn join_term(t1: &str, c1: &str, t2: &str, c2: &str) -> WhereTerm<'static> {
5762        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
5763            left: Box::new(Expr::Column(ColumnRef::qualified(t1, c1), Span::ZERO)),
5764            op: AstBinaryOp::Eq,
5765            right: Box::new(Expr::Column(ColumnRef::qualified(t2, c2), Span::ZERO)),
5766            span: Span::ZERO,
5767        }));
5768        classify_where_term(expr)
5769    }
5770
5771    #[test]
5772    fn test_cost_full_table_scan() {
5773        // Full table scan cost = N_pages(table)
5774        assert!(
5775            (estimate_cost(&AccessPathKind::FullTableScan, 100, 0) - 100.0).abs() < f64::EPSILON
5776        );
5777        assert!((estimate_cost(&AccessPathKind::FullTableScan, 1, 0) - 1.0).abs() < f64::EPSILON);
5778        assert!(
5779            (estimate_cost(&AccessPathKind::FullTableScan, 10000, 0) - 10000.0).abs()
5780                < f64::EPSILON
5781        );
5782    }
5783
5784    #[test]
5785    fn test_cost_rowid_lookup() {
5786        // Rowid lookup cost = log2(N_pages(table))
5787        let cost = estimate_cost(&AccessPathKind::RowidLookup, 1024, 0);
5788        assert!((cost - 10.0).abs() < f64::EPSILON); // log2(1024) = 10
5789    }
5790
5791    #[test]
5792    fn test_cost_index_scan_equality() {
5793        // Equality scan cost = log2(idx_pages) + log2(tbl_pages)
5794        let cost = estimate_cost(&AccessPathKind::IndexScanEquality, 200, 50);
5795        let expected = 50_f64.log2() + 200_f64.log2();
5796        assert!((cost - expected).abs() < 1e-10);
5797    }
5798
5799    #[test]
5800    fn test_cost_index_scan_range() {
5801        // Range scan cost = log2(idx_pages) + sel * idx_pages + sel * tbl_pages
5802        let sel = 0.1;
5803        let cost = estimate_cost(
5804            &AccessPathKind::IndexScanRange { selectivity: sel },
5805            200,
5806            50,
5807        );
5808        let expected = 50_f64.log2() + sel * 50.0 + sel * 200.0;
5809        assert!((cost - expected).abs() < 1e-10);
5810    }
5811
5812    #[test]
5813    fn test_cost_covering_index_scan() {
5814        // Covering index cost = log2(idx_pages) + sel * idx_pages (no table lookup)
5815        let sel = 0.1;
5816        let cost = estimate_cost(
5817            &AccessPathKind::CoveringIndexScan { selectivity: sel },
5818            200,
5819            50,
5820        );
5821        let expected = 50_f64.log2() + sel * 50.0;
5822        assert!((cost - expected).abs() < 1e-10);
5823    }
5824
5825    #[test]
5826    fn test_cost_ranks_covering_index_below_non_covering_range_scan() {
5827        // A covering index avoids the per-match table dereference, so for the
5828        // same selectivity/pages it must cost strictly less than a non-covering
5829        // range scan — by exactly the table-access term (sel * table_pages) it
5830        // skips. The existing tests check each formula in isolation; this pins
5831        // the cross-kind ordering that makes the planner prefer covering indexes.
5832        let sel = 0.1;
5833        let range = estimate_cost(
5834            &AccessPathKind::IndexScanRange { selectivity: sel },
5835            200,
5836            50,
5837        );
5838        let covering = estimate_cost(
5839            &AccessPathKind::CoveringIndexScan { selectivity: sel },
5840            200,
5841            50,
5842        );
5843        assert!(
5844            covering < range,
5845            "covering index must rank below a range scan: {covering} vs {range}"
5846        );
5847        // The gap is exactly the avoided table-access term: sel * table_pages.
5848        assert!(
5849            ((range - covering) - sel * 200.0).abs() < 1e-9,
5850            "covering/range gap should equal sel*table_pages (= {}), got {}",
5851            sel * 200.0,
5852            range - covering
5853        );
5854
5855        // With a row count, the covering scan also pays the cheaper per-row term
5856        // (decode only, not decode + dereference), so its advantage widens.
5857        let range_r = estimate_cost_ext(
5858            &AccessPathKind::IndexScanRange { selectivity: sel },
5859            200,
5860            50,
5861            1_000,
5862        );
5863        let covering_r = estimate_cost_ext(
5864            &AccessPathKind::CoveringIndexScan { selectivity: sel },
5865            200,
5866            50,
5867            1_000,
5868        );
5869        assert!(
5870            covering_r < range_r,
5871            "covering must stay cheaper once rows are counted: {covering_r} vs {range_r}"
5872        );
5873        assert!(
5874            (range_r - covering_r) > (range - covering),
5875            "per-row terms must widen the covering advantage"
5876        );
5877    }
5878
5879    // ===================================================================
5880    // PLANNER-2: estimate_cost_ext should react monotonically to n_rows
5881    // ===================================================================
5882
5883    #[test]
5884    fn access_path_metric_label_maps_every_kind() {
5885        // The bare metric/tracing label for each access path (no selectivity),
5886        // used in cost-estimate tracing and differential-plan fingerprints. It
5887        // is only ever used as a value, never directly asserted per variant, so
5888        // a wrong label would silently break observability.
5889        assert_eq!(
5890            access_path_metric_label(&AccessPathKind::FullTableScan),
5891            "full_table_scan"
5892        );
5893        assert_eq!(
5894            access_path_metric_label(&AccessPathKind::IndexScanRange { selectivity: 0.1 }),
5895            "index_scan_range"
5896        );
5897        assert_eq!(
5898            access_path_metric_label(&AccessPathKind::IndexScanEquality),
5899            "index_scan_equality"
5900        );
5901        assert_eq!(
5902            access_path_metric_label(&AccessPathKind::CoveringIndexScan { selectivity: 0.1 }),
5903            "covering_index_scan"
5904        );
5905        assert_eq!(
5906            access_path_metric_label(&AccessPathKind::RowidLookup),
5907            "rowid_lookup"
5908        );
5909    }
5910
5911    #[test]
5912    fn test_snapshot_index_selection_totals_has_five_access_path_labels() {
5913        // The snapshot builds its map from a fixed 5-label array; the KEY set
5914        // is a structural contract regardless of the live counter values, so
5915        // this assertion is race-safe under parallel tests (values are not
5916        // checked).
5917        let snap = snapshot_index_selection_totals();
5918        for label in [
5919            "covering_index_scan",
5920            "full_table_scan",
5921            "index_scan_equality",
5922            "index_scan_range",
5923            "rowid_lookup",
5924        ] {
5925            assert!(snap.contains_key(label), "missing label: {label}");
5926        }
5927        assert_eq!(snap.len(), 5, "no extra labels");
5928    }
5929
5930    #[test]
5931    fn test_estimate_cost_ext_exact_page_costs_at_zero_rows() {
5932        // At n_rows == 0 every per-row term vanishes, leaving the closed-form
5933        // page-level cost for each access path. test_estimate_cost_ext_zero_rows_
5934        // matches_legacy only checks FullTableScan and IndexScanEquality; pin the
5935        // exact log2-based formulas for the remaining variants too. Power-of-two
5936        // page counts keep the logs exact: ip=16 -> log2=4, tp=64 -> log2=6.
5937        let approx = |a: f64, b: f64| (a - b).abs() < 1e-9;
5938        let (ip, tp) = (16u64, 64u64);
5939
5940        // Full scan == table page count.
5941        assert!(approx(
5942            estimate_cost_ext(&AccessPathKind::FullTableScan, tp, ip, 0),
5943            64.0
5944        ));
5945        // Rowid lookup == log2(table pages); no index term.
5946        assert!(approx(
5947            estimate_cost_ext(&AccessPathKind::RowidLookup, tp, ip, 0),
5948            6.0
5949        ));
5950        // Index equality == log2(index pages) + log2(table pages).
5951        assert!(approx(
5952            estimate_cost_ext(&AccessPathKind::IndexScanEquality, tp, ip, 0),
5953            10.0
5954        ));
5955
5956        // Range scan == log2(ip) + sel*ip + sel*tp = 4 + 8 + 32.
5957        let range = estimate_cost_ext(
5958            &AccessPathKind::IndexScanRange { selectivity: 0.5 },
5959            tp,
5960            ip,
5961            0,
5962        );
5963        assert!(approx(range, 44.0), "range page cost, got {range}");
5964
5965        // Covering scan omits the table-page (row dereference) term:
5966        // log2(ip) + sel*ip = 4 + 8, with no sel*tp.
5967        let covering = estimate_cost_ext(
5968            &AccessPathKind::CoveringIndexScan { selectivity: 0.5 },
5969            tp,
5970            ip,
5971            0,
5972        );
5973        assert!(approx(covering, 12.0), "covering page cost, got {covering}");
5974
5975        // The structural difference is exactly the avoided table dereference,
5976        // sel*tp = 0.5*64 = 32 -- the reason a covering scan ranks below a range
5977        // scan over the same index.
5978        assert!(approx(range - covering, 0.5 * 64.0));
5979    }
5980
5981    #[test]
5982    fn test_expression_is_equi_column_predicate() {
5983        // True only for a column = column equality; column=literal, literal=
5984        // column, literal=literal, a non-Eq op, and a non-BinaryOp all fail.
5985        let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
5986        let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
5987        let bin = |l: Box<Expr>, op: AstBinaryOp, r: Box<Expr>| Expr::BinaryOp {
5988            left: l,
5989            op,
5990            right: r,
5991            span: Span::ZERO,
5992        };
5993
5994        assert!(expression_is_equi_column_predicate(&bin(
5995            col("a"),
5996            AstBinaryOp::Eq,
5997            col("b")
5998        )));
5999        assert!(!expression_is_equi_column_predicate(&bin(
6000            col("a"),
6001            AstBinaryOp::Eq,
6002            lit(5)
6003        )));
6004        assert!(!expression_is_equi_column_predicate(&bin(
6005            lit(5),
6006            AstBinaryOp::Eq,
6007            col("b")
6008        )));
6009        assert!(!expression_is_equi_column_predicate(&bin(
6010            lit(5),
6011            AstBinaryOp::Eq,
6012            lit(6)
6013        )));
6014        assert!(!expression_is_equi_column_predicate(&bin(
6015            col("a"),
6016            AstBinaryOp::Lt,
6017            col("b")
6018        )));
6019        assert!(!expression_is_equi_column_predicate(&Expr::Literal(
6020            Literal::Integer(1),
6021            Span::ZERO
6022        )));
6023    }
6024
6025    #[test]
6026    fn test_collect_join_predicates() {
6027        // Equi-join terms between two tables in the set become EquiJoinPredicate
6028        // entries; a term involving a table outside the set is skipped; an
6029        // empty term list yields nothing.
6030        let mut set: HashSet<String> = HashSet::new();
6031        set.insert("a".to_owned());
6032        set.insert("b".to_owned());
6033        let terms = [join_term("a", "x", "b", "y")]; // a.x = b.y
6034
6035        let (equi, theta) = collect_join_predicates(&terms, &set);
6036        assert_eq!(equi.len(), 1);
6037        assert!(theta.is_empty());
6038
6039        // With only one table in the set, the predicate is skipped.
6040        let mut just_a: HashSet<String> = HashSet::new();
6041        just_a.insert("a".to_owned());
6042        let (equi, theta) = collect_join_predicates(&terms, &just_a);
6043        assert!(equi.is_empty());
6044        assert!(theta.is_empty());
6045
6046        // An empty term list yields nothing.
6047        let (equi, theta) = collect_join_predicates(&[], &set);
6048        assert!(equi.is_empty() && theta.is_empty());
6049    }
6050
6051    #[test]
6052    fn test_has_join_predicate_detects_equi_join_either_orientation() {
6053        // has_join_predicate finds an equi-join column predicate between two
6054        // tables in either argument order, case-insensitively; absent or
6055        // unrelated tables yield false.
6056        let terms = [join_term("a", "x", "b", "y")]; // a.x = b.y
6057
6058        assert!(has_join_predicate("a", "b", &terms));
6059        assert!(
6060            has_join_predicate("b", "a", &terms),
6061            "either argument order"
6062        );
6063        assert!(has_join_predicate("A", "B", &terms), "case-insensitive");
6064        assert!(!has_join_predicate("a", "c", &terms), "no predicate to c");
6065        assert!(!has_join_predicate("c", "d", &terms));
6066        assert!(
6067            !has_join_predicate("a", "b", &[]),
6068            "no terms -> no predicate"
6069        );
6070    }
6071
6072    #[test]
6073    fn test_order_indices_to_names() {
6074        // order_indices_to_names applies a permutation of indices to a tables
6075        // slice, producing the named permutation.
6076        let tables = vec![
6077            table_stats("a", 1, 1),
6078            table_stats("b", 1, 1),
6079            table_stats("c", 1, 1),
6080        ];
6081        // Empty order -> empty vec.
6082        assert!(order_indices_to_names(&[], &tables).is_empty());
6083        // Identity preserves the table order.
6084        assert_eq!(
6085            order_indices_to_names(&[0, 1, 2], &tables),
6086            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]
6087        );
6088        // A non-trivial permutation reorders the names accordingly.
6089        assert_eq!(
6090            order_indices_to_names(&[2, 0, 1], &tables),
6091            vec!["c".to_owned(), "a".to_owned(), "b".to_owned()]
6092        );
6093        // A single-element permutation yields just that table's name.
6094        assert_eq!(order_indices_to_names(&[1], &tables), vec!["b".to_owned()]);
6095    }
6096
6097    #[test]
6098    fn test_plan_cache_key_with_feature_flags() {
6099        // plan_cache_key_with_feature_flags packs the schema cookie and feature
6100        // toggles into the xxh3 seed, so distinct (sql, cookie, flags) tuples
6101        // get distinct keys. The function is deterministic for fixed inputs.
6102        let sql = "SELECT * FROM t";
6103
6104        // Determinism: same inputs -> same key.
6105        assert_eq!(
6106            plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default()),
6107            plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default())
6108        );
6109
6110        // The four feature-flag combinations produce four distinct keys.
6111        let kd = plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default());
6112        let kl = plan_cache_key_with_feature_flags(
6113            sql,
6114            1,
6115            PlannerFeatureFlags {
6116                leapfrog_join: true,
6117                ..PlannerFeatureFlags::default()
6118            },
6119        );
6120        let kp = plan_cache_key_with_feature_flags(
6121            sql,
6122            1,
6123            PlannerFeatureFlags {
6124                dpccp_join: true,
6125                ..PlannerFeatureFlags::default()
6126            },
6127        );
6128        let kb = plan_cache_key_with_feature_flags(
6129            sql,
6130            1,
6131            PlannerFeatureFlags {
6132                leapfrog_join: true,
6133                dpccp_join: true,
6134            },
6135        );
6136        let set: std::collections::HashSet<u64> = [kd, kl, kp, kb].into_iter().collect();
6137        assert_eq!(
6138            set.len(),
6139            4,
6140            "all four feature-flag combinations must produce distinct keys"
6141        );
6142
6143        // Different schema cookies -> different keys.
6144        assert_ne!(
6145            plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default()),
6146            plan_cache_key_with_feature_flags(sql, 2, PlannerFeatureFlags::default())
6147        );
6148
6149        // Different SQL -> different keys.
6150        assert_ne!(
6151            plan_cache_key_with_feature_flags(sql, 1, PlannerFeatureFlags::default()),
6152            plan_cache_key_with_feature_flags("SELECT 1", 1, PlannerFeatureFlags::default())
6153        );
6154    }
6155
6156    #[test]
6157    fn test_prepare_plan_cache_lookup_evicts_stale_hot_entry() {
6158        // No-op when the hot key is None or matches the lookup key; on a
6159        // mismatch it flushes the LRU touch and clears the hot cache so the
6160        // upcoming lookup starts clean.
6161        let mut p = QueryPlanner::new();
6162
6163        // Fresh: hot key None -> is_some_and false -> no-op.
6164        p.prepare_plan_cache_lookup(42);
6165        assert!(p.hot_plan_cache_key.is_none());
6166        assert!(p.hot_plan_cache_plan.is_none());
6167
6168        // Same hot key as the lookup -> no-op (state preserved).
6169        p.hot_plan_cache_key = Some(42);
6170        p.hot_plan_cache_needs_lru_touch = true;
6171        p.prepare_plan_cache_lookup(42);
6172        assert_eq!(p.hot_plan_cache_key, Some(42));
6173        assert!(p.hot_plan_cache_needs_lru_touch);
6174
6175        // Different key -> flushes (clears the touch flag) and clears the hot
6176        // cache so the upcoming lookup starts clean.
6177        p.hot_plan_cache_key = Some(42);
6178        p.hot_plan_cache_needs_lru_touch = true;
6179        p.prepare_plan_cache_lookup(99);
6180        assert!(p.hot_plan_cache_key.is_none());
6181        assert!(p.hot_plan_cache_plan.is_none());
6182        assert!(!p.hot_plan_cache_needs_lru_touch);
6183    }
6184
6185    #[test]
6186    fn test_flush_hot_plan_cache_lru_touch_clears_flag() {
6187        // flush is a no-op when needs_lru_touch is false; otherwise it touches
6188        // the LRU for the cached key (discarding the result) and clears the
6189        // flag. The flag-transition is directly observable.
6190        let mut p = QueryPlanner::new();
6191        // Fresh: flag is false, flush is a no-op.
6192        assert!(!p.hot_plan_cache_needs_lru_touch);
6193        p.flush_hot_plan_cache_lru_touch();
6194        assert!(!p.hot_plan_cache_needs_lru_touch);
6195
6196        // Flag true, no key -> still clears the flag (no plan_cache.get).
6197        p.hot_plan_cache_needs_lru_touch = true;
6198        p.flush_hot_plan_cache_lru_touch();
6199        assert!(!p.hot_plan_cache_needs_lru_touch);
6200
6201        // Flag true with a key -> plan_cache.get is called (returns None on
6202        // an empty cache, ignored), and the flag is cleared. The cache stays
6203        // empty because get does not insert.
6204        p.hot_plan_cache_key = Some(42);
6205        p.hot_plan_cache_needs_lru_touch = true;
6206        p.flush_hot_plan_cache_lru_touch();
6207        assert!(!p.hot_plan_cache_needs_lru_touch);
6208        assert!(p.is_plan_cache_empty());
6209    }
6210
6211    #[test]
6212    fn test_lookup_hot_plan_cache_and_clear() {
6213        // On a fresh planner the hot cache is empty; any lookup misses without
6214        // setting needs_lru_touch. Seeding just the key (no plan) makes a
6215        // matching lookup still return None but trigger the needs_lru_touch
6216        // side effect because the key matched. A non-matching lookup leaves
6217        // needs_lru_touch alone. clear_hot_plan_cache zeroes all three fields.
6218        let mut p = QueryPlanner::new();
6219        assert!(p.lookup_hot_plan_cache(42).is_none());
6220        assert!(!p.hot_plan_cache_needs_lru_touch);
6221
6222        // Seed the key only; plan stays None.
6223        p.hot_plan_cache_key = Some(42);
6224        assert!(p.lookup_hot_plan_cache(42).is_none()); // plan is None
6225        assert!(p.hot_plan_cache_needs_lru_touch); // side effect: key matched
6226
6227        // A non-matching lookup does not touch the side-effect flag.
6228        assert!(p.lookup_hot_plan_cache(99).is_none());
6229        assert!(p.hot_plan_cache_needs_lru_touch); // unchanged
6230
6231        // clear_hot_plan_cache zeroes all three hot-cache fields.
6232        p.clear_hot_plan_cache();
6233        assert!(p.hot_plan_cache_key.is_none());
6234        assert!(p.hot_plan_cache_plan.is_none());
6235        assert!(!p.hot_plan_cache_needs_lru_touch);
6236    }
6237
6238    #[test]
6239    fn test_invalidate_plan_cache_if_schema_cookie_changed_tracks_cookie() {
6240        // Tracks the latest schema cookie on every call and clears the cache
6241        // when the new cookie differs from the cached one. With an empty cache
6242        // we directly observe the cookie tracking; the clear-on-change effect
6243        // on an already-empty cache is a no-op but the cookie transitions are.
6244        let mut p = QueryPlanner::new();
6245        assert_eq!(p.cached_schema_cookie, None);
6246        assert!(p.is_plan_cache_empty());
6247
6248        // First call seeds the cookie without clearing (no prior cookie).
6249        p.invalidate_plan_cache_if_schema_cookie_changed(5);
6250        assert_eq!(p.cached_schema_cookie, Some(5));
6251        assert!(p.is_plan_cache_empty());
6252
6253        // Same cookie -> no change (the is_some_and predicate is false).
6254        p.invalidate_plan_cache_if_schema_cookie_changed(5);
6255        assert_eq!(p.cached_schema_cookie, Some(5));
6256
6257        // Different cookie -> cache cleared (still empty here) and cookie
6258        // updated to the new value.
6259        p.invalidate_plan_cache_if_schema_cookie_changed(7);
6260        assert_eq!(p.cached_schema_cookie, Some(7));
6261        assert!(p.is_plan_cache_empty());
6262    }
6263
6264    #[test]
6265    fn test_is_plan_cache_empty_and_clear_on_fresh_planner() {
6266        // A freshly-constructed QueryPlanner has an empty plan cache (owned
6267        // state, no globals), custom capacities are empty initially, capacity 0
6268        // is clamped to 1 but still empty, and clear_plan_cache is idempotent
6269        // on an already-empty cache.
6270        let p = QueryPlanner::new();
6271        assert!(p.is_plan_cache_empty());
6272
6273        let p2 = QueryPlanner::with_plan_cache_capacity(8);
6274        assert!(p2.is_plan_cache_empty());
6275
6276        let p3 = QueryPlanner::with_plan_cache_capacity(0);
6277        assert!(p3.is_plan_cache_empty());
6278
6279        let mut p4 = QueryPlanner::new();
6280        p4.clear_plan_cache();
6281        assert!(p4.is_plan_cache_empty());
6282    }
6283
6284    #[test]
6285    fn test_normalize_plan_cache_capacity_floors_at_one() {
6286        // A requested plan-cache capacity is clamped to a non-zero value: 0
6287        // becomes 1 (no zero-capacity cache), positive values pass through.
6288        assert_eq!(normalize_plan_cache_capacity(0).get(), 1);
6289        assert_eq!(normalize_plan_cache_capacity(1).get(), 1);
6290        assert_eq!(normalize_plan_cache_capacity(10).get(), 10);
6291    }
6292
6293    #[test]
6294    fn test_ordered_subset_preserves_join_order() {
6295        // ordered_subset keeps only the selected tables but in join_order's
6296        // order (not the set's), and ignores selected tables absent from the
6297        // join order.
6298        let order: Vec<String> = ["c", "a", "b", "d"]
6299            .iter()
6300            .map(|s| (*s).to_owned())
6301            .collect();
6302
6303        let sel: HashSet<String> = ["a", "d"].iter().map(|s| (*s).to_owned()).collect();
6304        assert_eq!(
6305            ordered_subset(&order, &sel),
6306            vec!["a".to_owned(), "d".to_owned()] // join-order order, not set order
6307        );
6308
6309        // Selecting everything returns the join order unchanged.
6310        let all: HashSet<String> = ["a", "b", "c", "d"]
6311            .iter()
6312            .map(|s| (*s).to_owned())
6313            .collect();
6314        assert_eq!(ordered_subset(&order, &all), order);
6315
6316        // An empty selection yields nothing.
6317        assert!(ordered_subset(&order, &HashSet::new()).is_empty());
6318
6319        // A selected table absent from the join order is ignored.
6320        let extra: HashSet<String> = ["a", "x"].iter().map(|s| (*s).to_owned()).collect();
6321        assert_eq!(ordered_subset(&order, &extra), vec!["a".to_owned()]);
6322    }
6323
6324    #[test]
6325    fn test_cross_join_allowed_indices_via_tables() {
6326        // Index-based variant of cross_join_allowed: current_path holds table
6327        // indices, resolved against tables[*idx].name for the ordering check.
6328        let tables = vec![
6329            table_stats("a", 1, 1),
6330            table_stats("b", 1, 1),
6331            table_stats("c", 1, 1),
6332        ];
6333        let pairs = vec![("A".to_owned(), "B".to_owned())];
6334
6335        // B before A (empty path) -> false.
6336        assert!(!cross_join_allowed_indices(&[], "B", &tables, &pairs));
6337        // B after A: path [0] resolves to "a", matching "A" case-insensitively.
6338        assert!(cross_join_allowed_indices(&[0], "B", &tables, &pairs));
6339        // A (the left side) is unconstrained.
6340        assert!(cross_join_allowed_indices(&[], "A", &tables, &pairs));
6341        // A table not in any pair is allowed.
6342        assert!(cross_join_allowed_indices(&[], "C", &tables, &pairs));
6343        // Case-insensitive on both the candidate and the resolved table name.
6344        assert!(cross_join_allowed_indices(&[0], "b", &tables, &pairs));
6345    }
6346
6347    #[test]
6348    fn test_cross_join_allowed_enforces_right_after_left_ordering() {
6349        // For a cross-join pair (A, B), B may only be placed after A in the join
6350        // order. cross_join_allowed enforces this case-insensitively; candidates
6351        // not on the right of any pair are always allowed.
6352        let pairs = vec![("A".to_owned(), "B".to_owned())];
6353
6354        // B before A is not allowed (A is not yet in the path).
6355        assert!(!cross_join_allowed(&[], "B", &pairs));
6356        // B after A is allowed.
6357        assert!(cross_join_allowed(&["A".to_owned()], "B", &pairs));
6358        // A (the left side) is unconstrained -- allowed anywhere.
6359        assert!(cross_join_allowed(&[], "A", &pairs));
6360        // A table not in any pair is allowed.
6361        assert!(cross_join_allowed(&[], "C", &pairs));
6362        // The check is case-insensitive on both the candidate and the path.
6363        assert!(!cross_join_allowed(&[], "b", &pairs));
6364        assert!(cross_join_allowed(&["a".to_owned()], "b", &pairs));
6365    }
6366
6367    #[test]
6368    fn test_collect_disjuncts_flattens_or_tree_regardless_of_nesting() {
6369        // Symmetric pair to test_collect_conjuncts: collect_disjuncts recurses
6370        // on OR (both sides), so any OR tree flattens to its leaves regardless
6371        // of nesting; a non-OR expression yields a single disjunct.
6372        let leaf = |n: i64| Expr::Literal(Literal::Integer(n), Span::ZERO);
6373        let or = |l: Expr, r: Expr| Expr::BinaryOp {
6374            left: Box::new(l),
6375            op: AstBinaryOp::Or,
6376            right: Box::new(r),
6377            span: Span::ZERO,
6378        };
6379        let count = |e: &Expr| {
6380            let mut v = Vec::new();
6381            collect_disjuncts(e, &mut v);
6382            v.len()
6383        };
6384
6385        // A non-OR expression is a single disjunct.
6386        assert_eq!(count(&leaf(1)), 1);
6387        // a OR b -> 2.
6388        assert_eq!(count(&or(leaf(1), leaf(2))), 2);
6389        // Right-nested a OR (b OR c) -> 3.
6390        assert_eq!(count(&or(leaf(1), or(leaf(2), leaf(3)))), 3);
6391        // Left-nested (a OR b) OR c -> 3.
6392        assert_eq!(count(&or(or(leaf(1), leaf(2)), leaf(3))), 3);
6393        // Balanced (a OR b) OR (c OR d) -> 4.
6394        assert_eq!(count(&or(or(leaf(1), leaf(2)), or(leaf(3), leaf(4)))), 4);
6395    }
6396
6397    #[test]
6398    fn test_collect_conjuncts_flattens_and_tree_regardless_of_nesting() {
6399        // collect_conjuncts recursively splits on AND (both sides), so any AND
6400        // tree flattens to its leaves no matter how it is nested; a non-AND
6401        // expression yields a single conjunct.
6402        let leaf = |n: i64| Expr::Literal(Literal::Integer(n), Span::ZERO);
6403        let and = |l: Expr, r: Expr| Expr::BinaryOp {
6404            left: Box::new(l),
6405            op: AstBinaryOp::And,
6406            right: Box::new(r),
6407            span: Span::ZERO,
6408        };
6409        let count = |e: &Expr| {
6410            let mut v = Vec::new();
6411            collect_conjuncts(e, &mut v);
6412            v.len()
6413        };
6414
6415        // A non-AND expression is a single conjunct.
6416        assert_eq!(count(&leaf(1)), 1);
6417        // a AND b -> 2.
6418        assert_eq!(count(&and(leaf(1), leaf(2))), 2);
6419        // Right-nested a AND (b AND c) -> 3.
6420        assert_eq!(count(&and(leaf(1), and(leaf(2), leaf(3)))), 3);
6421        // Left-nested (a AND b) AND c -> 3.
6422        assert_eq!(count(&and(and(leaf(1), leaf(2)), leaf(3))), 3);
6423        // Balanced (a AND b) AND (c AND d) -> 4.
6424        assert_eq!(count(&and(and(leaf(1), leaf(2)), and(leaf(3), leaf(4)))), 4);
6425    }
6426
6427    #[test]
6428    fn test_classify_or_disjunction_as_in_list() {
6429        // a = 1 OR a = 2 OR a = 3 classifies as an IN-list on column a with 3
6430        // disjuncts. Mixed columns, a single (non-OR) equality, and a non-
6431        // equality disjunct all decline.
6432        let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
6433        let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
6434        let eqc = |c: &str, n: i64| Expr::BinaryOp {
6435            left: col(c),
6436            op: AstBinaryOp::Eq,
6437            right: lit(n),
6438            span: Span::ZERO,
6439        };
6440        let or = |l: Expr, r: Expr| Expr::BinaryOp {
6441            left: Box::new(l),
6442            op: AstBinaryOp::Or,
6443            right: Box::new(r),
6444            span: Span::ZERO,
6445        };
6446
6447        // a = 1 OR a = 2 OR a = 3 -> IN-list on a, 3 disjuncts.
6448        let three = or(eqc("a", 1), or(eqc("a", 2), eqc("a", 3)));
6449        assert_eq!(
6450            classify_or_disjunction_as_in_list(&three),
6451            Some((
6452                WhereColumn {
6453                    table: None,
6454                    column: "a".to_owned()
6455                },
6456                3
6457            ))
6458        );
6459
6460        // Mixed columns decline.
6461        assert!(classify_or_disjunction_as_in_list(&or(eqc("a", 1), eqc("b", 2))).is_none());
6462
6463        // A single equality (no OR) has too few disjuncts.
6464        assert!(classify_or_disjunction_as_in_list(&eqc("a", 1)).is_none());
6465
6466        // A non-equality disjunct declines.
6467        let gt = Expr::BinaryOp {
6468            left: col("a"),
6469            op: AstBinaryOp::Gt,
6470            right: lit(2),
6471            span: Span::ZERO,
6472        };
6473        assert!(classify_or_disjunction_as_in_list(&or(eqc("a", 1), gt)).is_none());
6474    }
6475
6476    #[test]
6477    fn test_extract_comparison_operand_returns_other_side_of_column_comparison() {
6478        // extract_comparison_operand returns the non-column side of a binary
6479        // comparison: a column on the left yields the right operand and vice
6480        // versa; with no column operand (or a non-BinaryOp) it yields None.
6481        let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
6482        let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
6483        let binop = |l: Box<Expr>, r: Box<Expr>| Expr::BinaryOp {
6484            left: l,
6485            op: AstBinaryOp::Eq,
6486            right: r,
6487            span: Span::ZERO,
6488        };
6489
6490        // x = 5 -> the literal 5 (column on the left).
6491        assert!(matches!(
6492            extract_comparison_operand(&binop(col("x"), lit(5))),
6493            Some(Expr::Literal(Literal::Integer(5), _))
6494        ));
6495        // 5 = x -> the literal 5 (column on the right).
6496        assert!(matches!(
6497            extract_comparison_operand(&binop(lit(5), col("x"))),
6498            Some(Expr::Literal(Literal::Integer(5), _))
6499        ));
6500        // No column operand -> None.
6501        assert!(extract_comparison_operand(&binop(lit(5), lit(6))).is_none());
6502        // Not a binary op -> None.
6503        assert!(
6504            extract_comparison_operand(&Expr::Literal(Literal::Integer(1), Span::ZERO)).is_none()
6505        );
6506    }
6507
6508    #[test]
6509    fn test_like_prefix_upper_bound() {
6510        // The exclusive upper bound for a LIKE-prefix range scan increments the
6511        // last incrementable character and truncates after it; a trailing
6512        // char::MAX rolls over to the previous character.
6513        assert_eq!(like_prefix_upper_bound("abc").as_deref(), Some("abd"));
6514        assert_eq!(like_prefix_upper_bound("a").as_deref(), Some("b"));
6515        // Empty prefix has no upper bound.
6516        assert_eq!(like_prefix_upper_bound(""), None);
6517        // A trailing char::MAX rolls over: it is skipped and the previous
6518        // character is incremented (truncating the max away).
6519        let with_max = format!("a{}", char::MAX);
6520        assert_eq!(like_prefix_upper_bound(&with_max).as_deref(), Some("b"));
6521        // A lone char::MAX cannot be incremented -> None.
6522        assert_eq!(like_prefix_upper_bound(&char::MAX.to_string()), None);
6523    }
6524
6525    #[test]
6526    fn test_is_like_prefix_safe_for_column_rejects_ascii_alphabetic_prefixes() {
6527        // A LIKE prefix is safe for a prefix-range index scan only when it
6528        // contains no ASCII alphabetic characters: default LIKE folds ASCII
6529        // letter case, so an alphabetic prefix could miss the opposite-case rows
6530        // a plain range scan would skip. Digits, punctuation, an empty prefix,
6531        // and non-ASCII letters are safe.
6532        assert!(is_like_prefix_safe_for_column(None, "123"));
6533        assert!(is_like_prefix_safe_for_column(None, ""));
6534        assert!(is_like_prefix_safe_for_column(None, "_5%"));
6535        // Any ASCII letter makes the prefix unsafe.
6536        assert!(!is_like_prefix_safe_for_column(None, "abc"));
6537        assert!(!is_like_prefix_safe_for_column(None, "1a"));
6538        assert!(!is_like_prefix_safe_for_column(None, "Z"));
6539        // Non-ASCII letters are not ASCII-alphabetic, so they stay safe.
6540        assert!(is_like_prefix_safe_for_column(None, "é"));
6541    }
6542
6543    #[test]
6544    fn test_union_find() {
6545        // UnionFind starts with each index as its own root; union merges sets so
6546        // find on either side returns the same root; merging already-merged sets
6547        // and self-union are no-ops. Standard union-by-rank with path
6548        // compression in find.
6549        let mut uf = UnionFind::new(5);
6550        for i in 0..5 {
6551            assert_eq!(uf.find(i), i);
6552        }
6553
6554        // union(0, 1): they share a root.
6555        uf.union(0, 1);
6556        let r0 = uf.find(0);
6557        assert_eq!(uf.find(1), r0);
6558
6559        // union(2, 3): a second group with a different root.
6560        uf.union(2, 3);
6561        let r2 = uf.find(2);
6562        assert_eq!(uf.find(3), r2);
6563        assert_ne!(r0, r2);
6564        // Index 4 is still alone.
6565        assert_eq!(uf.find(4), 4);
6566
6567        // Merge the two groups: {0,1,2,3} now share a single root.
6568        uf.union(0, 2);
6569        let r = uf.find(0);
6570        for i in [1, 2, 3] {
6571            assert_eq!(uf.find(i), r);
6572        }
6573        assert_eq!(uf.find(4), 4); // 4 still separate
6574
6575        // Self-union and re-union of already-merged are no-ops.
6576        uf.union(0, 0);
6577        uf.union(0, 2);
6578        assert_eq!(uf.find(2), r);
6579    }
6580
6581    #[test]
6582    fn test_connected_components_groups_join_connected_tables() {
6583        // connected_components builds a join graph from equi-join predicates and
6584        // returns the sets of tables reachable from one another.
6585        let pred = |lt: &str, rt: &str| EquiJoinPredicate {
6586            left: ColumnKey {
6587                table: lt.to_owned(),
6588                column: "x".to_owned(),
6589            },
6590            right: ColumnKey {
6591                table: rt.to_owned(),
6592                column: "y".to_owned(),
6593            },
6594        };
6595        let tables = vec!["a".to_owned(), "b".to_owned(), "c".to_owned()];
6596
6597        // a joined to b; c isolated -> components of size 2 ({a,b}) and 1 ({c}).
6598        let comps = connected_components(&tables, &[pred("a", "b")]);
6599        let mut sizes: Vec<usize> = comps.iter().map(Vec::len).collect();
6600        sizes.sort_unstable();
6601        assert_eq!(sizes, vec![1, 2]);
6602
6603        // a-b-c chain -> one component covering all three.
6604        let comps = connected_components(&tables, &[pred("a", "b"), pred("b", "c")]);
6605        assert_eq!(comps.len(), 1);
6606        assert_eq!(comps[0].len(), 3);
6607
6608        // No predicates -> each table is its own component.
6609        let comps = connected_components(&tables, &[]);
6610        assert_eq!(comps.len(), 3);
6611        assert!(comps.iter().all(|c| c.len() == 1));
6612
6613        // No tables -> no components.
6614        assert!(connected_components(&[], &[pred("a", "b")]).is_empty());
6615    }
6616
6617    #[test]
6618    fn test_column_exists_ignore_case() {
6619        // column_exists_ignore_case is a case-insensitive membership check over
6620        // a column-name list.
6621        let cols = vec!["Name".to_owned(), "Age".to_owned()];
6622        assert!(column_exists_ignore_case(&cols, "Name")); // exact
6623        assert!(column_exists_ignore_case(&cols, "name")); // case-insensitive
6624        assert!(column_exists_ignore_case(&cols, "AGE"));
6625        assert!(!column_exists_ignore_case(&cols, "id")); // absent
6626        assert!(!column_exists_ignore_case(&[], "name")); // empty list
6627    }
6628
6629    #[test]
6630    fn test_extract_range_probe_for_column() {
6631        // For the leading column, an equality term yields an Equality probe and
6632        // a range term (x > 5) yields a Range probe; terms on other columns (or
6633        // no terms) yield no probe.
6634        match extract_range_probe_for_column(&[eq_term_value("x", 5)], "x") {
6635            Some(AccessPathProbe::Equality { column, .. }) => assert_eq!(column, "x"),
6636            _ => panic!("expected an Equality probe"),
6637        }
6638        assert!(matches!(
6639            extract_range_probe_for_column(&[range_term("x")], "x"),
6640            Some(AccessPathProbe::Range { .. })
6641        ));
6642        // A term on a different column yields nothing for the leading column.
6643        assert!(extract_range_probe_for_column(&[eq_term_value("y", 5)], "x").is_none());
6644        // No terms -> no probe.
6645        assert!(extract_range_probe_for_column(&[], "x").is_none());
6646    }
6647
6648    #[test]
6649    fn test_extract_in_list_probe() {
6650        // x IN (1, 2, 3) yields an InList probe carrying the column and its
6651        // values; an empty list, NOT IN, and a non-IN expression all yield None.
6652        let col = || Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO));
6653        let lit = |n: i64| Expr::Literal(Literal::Integer(n), Span::ZERO);
6654        let in_expr = |items: Vec<Expr>, not: bool| Expr::In {
6655            expr: col(),
6656            set: InSet::List(items),
6657            not,
6658            span: Span::ZERO,
6659        };
6660
6661        match extract_in_list_probe(&in_expr(vec![lit(1), lit(2), lit(3)], false), "x") {
6662            Some(AccessPathProbe::InList { column, values }) => {
6663                assert_eq!(column, "x");
6664                assert_eq!(values.len(), 3);
6665            }
6666            _ => panic!("expected an InList probe"),
6667        }
6668
6669        // Empty list -> None.
6670        assert!(extract_in_list_probe(&in_expr(vec![], false), "x").is_none());
6671        // x NOT IN (1, 2) -> None.
6672        assert!(extract_in_list_probe(&in_expr(vec![lit(1), lit(2)], true), "x").is_none());
6673        // A non-IN expression -> None.
6674        assert!(
6675            extract_in_list_probe(&Expr::Literal(Literal::Integer(1), Span::ZERO), "x").is_none()
6676        );
6677    }
6678
6679    #[test]
6680    fn test_reverse_comparison_op() {
6681        use AstBinaryOp::{Add, Eq, Ge, Gt, Le, Lt, Ne};
6682        // Reversing a comparison swaps operand order: Eq is symmetric, and
6683        // Lt<->Gt, Le<->Ge swap. Ne and non-comparison ops return None.
6684        assert!(matches!(reverse_comparison_op(Eq), Some(Eq)));
6685        assert!(matches!(reverse_comparison_op(Lt), Some(Gt)));
6686        assert!(matches!(reverse_comparison_op(Gt), Some(Lt)));
6687        assert!(matches!(reverse_comparison_op(Le), Some(Ge)));
6688        assert!(matches!(reverse_comparison_op(Ge), Some(Le)));
6689        assert!(reverse_comparison_op(Ne).is_none());
6690        assert!(reverse_comparison_op(Add).is_none());
6691    }
6692
6693    #[test]
6694    fn test_normalize_column_literal_comparison_orients_column_left() {
6695        // normalize_column_literal_comparison puts the column on the left: a
6696        // column-OP-literal comparison keeps its op, while literal-OP-column
6697        // reverses the op (5 < x becomes x > 5). A non-comparison op and a
6698        // column-column comparison normalize to None.
6699        let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
6700        let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
6701        let bin = |l: Box<Expr>, op: AstBinaryOp, r: Box<Expr>| Expr::BinaryOp {
6702            left: l,
6703            op,
6704            right: r,
6705            span: Span::ZERO,
6706        };
6707
6708        // x > 5 -> column x, op Gt, literal 5.
6709        let n =
6710            normalize_column_literal_comparison(&bin(col("x"), AstBinaryOp::Gt, lit(5))).unwrap();
6711        assert_eq!(n.column.column, "x");
6712        assert!(matches!(n.op, AstBinaryOp::Gt));
6713        assert!(matches!(n.literal, Literal::Integer(5)));
6714
6715        // 5 < x -> column x, op reversed to Gt, literal 5.
6716        let n =
6717            normalize_column_literal_comparison(&bin(lit(5), AstBinaryOp::Lt, col("x"))).unwrap();
6718        assert_eq!(n.column.column, "x");
6719        assert!(matches!(n.op, AstBinaryOp::Gt));
6720        assert!(matches!(n.literal, Literal::Integer(5)));
6721
6722        // A non-comparison op (Add) normalizes to None.
6723        assert!(
6724            normalize_column_literal_comparison(&bin(col("x"), AstBinaryOp::Add, lit(5))).is_none()
6725        );
6726        // A column-column comparison (no literal) normalizes to None.
6727        assert!(
6728            normalize_column_literal_comparison(&bin(col("x"), AstBinaryOp::Eq, col("y")))
6729                .is_none()
6730        );
6731    }
6732
6733    #[test]
6734    fn test_where_terms_imply_predicate() {
6735        // For every AND-conjunct of the predicate, some term must imply it (via
6736        // expr_implies_partial_predicate). Pinning the multi-conjunct and
6737        // empty-terms behaviors.
6738        let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
6739        let is_not_null = |n: &str| Expr::IsNull {
6740            expr: col(n),
6741            not: true,
6742            span: Span::ZERO,
6743        };
6744        let and = |l: Expr, r: Expr| Expr::BinaryOp {
6745            left: Box::new(l),
6746            op: AstBinaryOp::And,
6747            right: Box::new(r),
6748            span: Span::ZERO,
6749        };
6750
6751        // x = 5 implies x IS NOT NULL.
6752        let terms = [eq_term_value("x", 5)];
6753        assert!(where_terms_imply_predicate(&terms, &is_not_null("x")));
6754
6755        // x = 5 does not imply y IS NOT NULL (different column).
6756        assert!(!where_terms_imply_predicate(&terms, &is_not_null("y")));
6757
6758        // Both terms together imply (x IS NOT NULL AND y IS NOT NULL).
6759        let both = [eq_term_value("x", 5), eq_term_value("y", 7)];
6760        assert!(where_terms_imply_predicate(
6761            &both,
6762            &and(is_not_null("x"), is_not_null("y"))
6763        ));
6764
6765        // Only the x term -> the y conjunct is unimplied -> overall false.
6766        assert!(!where_terms_imply_predicate(
6767            &terms,
6768            &and(is_not_null("x"), is_not_null("y"))
6769        ));
6770
6771        // No terms -> any() over empty is false -> no implication possible.
6772        assert!(!where_terms_imply_predicate(&[], &is_not_null("x")));
6773    }
6774
6775    #[test]
6776    fn test_expr_implies_partial_predicate() {
6777        // A query predicate implies a partial-index predicate when it is
6778        // structurally identical, when it guarantees the column non-null for an
6779        // IS NOT NULL index predicate, or when its column-literal comparison
6780        // logically implies the predicate's (x > 10 implies x > 5, not vice versa).
6781        let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
6782        let lit = |n: i64| Box::new(Expr::Literal(Literal::Integer(n), Span::ZERO));
6783        let cmp = |c: &str, op: AstBinaryOp, n: i64| Expr::BinaryOp {
6784            left: col(c),
6785            op,
6786            right: lit(n),
6787            span: Span::ZERO,
6788        };
6789
6790        // Structural identity implies trivially.
6791        assert!(expr_implies_partial_predicate(
6792            &cmp("x", AstBinaryOp::Eq, 5),
6793            &cmp("x", AstBinaryOp::Eq, 5)
6794        ));
6795
6796        // x > 10 implies x > 5; the reverse does not.
6797        assert!(expr_implies_partial_predicate(
6798            &cmp("x", AstBinaryOp::Gt, 10),
6799            &cmp("x", AstBinaryOp::Gt, 5)
6800        ));
6801        assert!(!expr_implies_partial_predicate(
6802            &cmp("x", AstBinaryOp::Gt, 5),
6803            &cmp("x", AstBinaryOp::Gt, 10)
6804        ));
6805
6806        // x = 5 implies the partial-index predicate x IS NOT NULL.
6807        let is_not_null = Expr::IsNull {
6808            expr: col("x"),
6809            not: true,
6810            span: Span::ZERO,
6811        };
6812        assert!(expr_implies_partial_predicate(
6813            &cmp("x", AstBinaryOp::Eq, 5),
6814            &is_not_null
6815        ));
6816
6817        // Different columns do not imply.
6818        assert!(!expr_implies_partial_predicate(
6819            &cmp("x", AstBinaryOp::Eq, 5),
6820            &cmp("y", AstBinaryOp::Eq, 3)
6821        ));
6822    }
6823
6824    #[test]
6825    fn test_literal_satisfies_predicate_literal() {
6826        use AstBinaryOp::{Eq, Ge, Gt, Le, Lt, Ne};
6827        use std::cmp::Ordering::{Equal, Greater, Less};
6828
6829        // Eq is satisfied only by Equal.
6830        assert!(literal_satisfies_predicate_literal(Equal, Eq));
6831        assert!(!literal_satisfies_predicate_literal(Less, Eq));
6832        assert!(!literal_satisfies_predicate_literal(Greater, Eq));
6833        // Gt only by Greater; Ge by Greater or Equal.
6834        assert!(literal_satisfies_predicate_literal(Greater, Gt));
6835        assert!(!literal_satisfies_predicate_literal(Equal, Gt));
6836        assert!(literal_satisfies_predicate_literal(Greater, Ge));
6837        assert!(literal_satisfies_predicate_literal(Equal, Ge));
6838        assert!(!literal_satisfies_predicate_literal(Less, Ge));
6839        // Lt only by Less; Le by Less or Equal.
6840        assert!(literal_satisfies_predicate_literal(Less, Lt));
6841        assert!(!literal_satisfies_predicate_literal(Equal, Lt));
6842        assert!(literal_satisfies_predicate_literal(Less, Le));
6843        assert!(literal_satisfies_predicate_literal(Equal, Le));
6844        assert!(!literal_satisfies_predicate_literal(Greater, Le));
6845        // Unsupported ops (Ne) -> false.
6846        assert!(!literal_satisfies_predicate_literal(Equal, Ne));
6847    }
6848
6849    #[test]
6850    fn test_compare_partial_index_literals_handles_cross_type_numerics() {
6851        use std::cmp::Ordering;
6852        let int = Literal::Integer;
6853        let flt = Literal::Float;
6854
6855        // Same-type comparisons.
6856        assert_eq!(
6857            compare_partial_index_literals(&int(3), &int(5)),
6858            Some(Ordering::Less)
6859        );
6860        assert_eq!(
6861            compare_partial_index_literals(&flt(2.0), &flt(2.0)),
6862            Some(Ordering::Equal)
6863        );
6864        assert_eq!(
6865            compare_partial_index_literals(
6866                &Literal::String("a".to_owned()),
6867                &Literal::String("b".to_owned())
6868            ),
6869            Some(Ordering::Less)
6870        );
6871
6872        // Integer and Float compare numerically across types.
6873        assert_eq!(
6874            compare_partial_index_literals(&int(5), &flt(5.0)),
6875            Some(Ordering::Equal)
6876        );
6877        assert_eq!(
6878            compare_partial_index_literals(&int(3), &flt(5.0)),
6879            Some(Ordering::Less)
6880        );
6881        assert_eq!(
6882            compare_partial_index_literals(&flt(7.0), &int(2)),
6883            Some(Ordering::Greater)
6884        );
6885
6886        // Incompatible types, NULL, and NaN are unordered -> None.
6887        assert_eq!(
6888            compare_partial_index_literals(&int(1), &Literal::String("x".to_owned())),
6889            None
6890        );
6891        assert_eq!(
6892            compare_partial_index_literals(&Literal::Null, &int(1)),
6893            None
6894        );
6895        assert_eq!(
6896            compare_partial_index_literals(&flt(f64::NAN), &flt(1.0)),
6897            None
6898        );
6899    }
6900
6901    #[test]
6902    fn test_lookup_table_index_hint() {
6903        // Lookup canonicalizes the requested table name to lowercase via
6904        // canonical_table_key; the map keys must already be canonical.
6905        let mut hints: std::collections::BTreeMap<String, IndexHint> =
6906            std::collections::BTreeMap::new();
6907        hints.insert("users".to_owned(), IndexHint::NotIndexed);
6908
6909        // Hit by exact canonical key.
6910        assert!(matches!(
6911            lookup_table_index_hint("users", Some(&hints)),
6912            Some(IndexHint::NotIndexed)
6913        ));
6914        // Hit by case-insensitive lookup (USERS canonicalizes to users).
6915        assert!(matches!(
6916            lookup_table_index_hint("USERS", Some(&hints)),
6917            Some(IndexHint::NotIndexed)
6918        ));
6919        // Miss: a table name not in the map.
6920        assert!(lookup_table_index_hint("other", Some(&hints)).is_none());
6921        // No hints map at all -> None.
6922        assert!(lookup_table_index_hint("users", None).is_none());
6923    }
6924
6925    #[test]
6926    fn test_is_rowid_column_ignores_table_qualifier() {
6927        // is_rowid_column delegates to is_rowid_alias_name on the column part:
6928        // rowid / _rowid_ / oid (case-insensitive) -> true; others -> false. The
6929        // table qualifier on the WhereColumn is ignored.
6930        let wc = |table: Option<&str>, column: &str| WhereColumn {
6931            table: table.map(str::to_owned),
6932            column: column.to_owned(),
6933        };
6934        assert!(is_rowid_column(&wc(None, "rowid")));
6935        assert!(is_rowid_column(&wc(None, "ROWID")));
6936        assert!(is_rowid_column(&wc(None, "_rowid_")));
6937        assert!(is_rowid_column(&wc(None, "oid")));
6938        assert!(!is_rowid_column(&wc(None, "id")));
6939        assert!(!is_rowid_column(&wc(None, "row_id")));
6940        // The table qualifier is ignored; only the column name decides.
6941        assert!(is_rowid_column(&wc(Some("t"), "rowid")));
6942        assert!(!is_rowid_column(&wc(Some("t"), "id")));
6943    }
6944
6945    #[test]
6946    fn test_where_columns_compatible_vs_equivalent() {
6947        // _compatible: column names match (case-insens) AND tables either both
6948        // match or at least one is None (treats None as a wildcard).
6949        // _equivalent: stricter -- both tables must match OR both be None;
6950        // mixed Some/None is NOT equivalent. The Some-vs-None case is the
6951        // distinguishing input.
6952        let bare = |c: &str| WhereColumn {
6953            table: None,
6954            column: c.to_owned(),
6955        };
6956        let qual = |t: &str, c: &str| WhereColumn {
6957            table: Some(t.to_owned()),
6958            column: c.to_owned(),
6959        };
6960
6961        // Same column, both None -> both true.
6962        assert!(where_columns_compatible(&bare("x"), &bare("X")));
6963        assert!(where_columns_equivalent(&bare("x"), &bare("X")));
6964        // Same column, both same table (case-insens) -> both true.
6965        assert!(where_columns_compatible(&qual("t", "x"), &qual("T", "X")));
6966        assert!(where_columns_equivalent(&qual("t", "x"), &qual("T", "X")));
6967        // Same column, different tables -> both false.
6968        assert!(!where_columns_compatible(&qual("t", "x"), &qual("u", "x")));
6969        assert!(!where_columns_equivalent(&qual("t", "x"), &qual("u", "x")));
6970        // Mixed Some/None: compatible treats None as wildcard, equivalent doesn't.
6971        assert!(where_columns_compatible(&qual("t", "x"), &bare("x")));
6972        assert!(!where_columns_equivalent(&qual("t", "x"), &bare("x")));
6973        // Different columns -> both false (regardless of table).
6974        assert!(!where_columns_compatible(&bare("x"), &bare("y")));
6975        assert!(!where_columns_equivalent(&bare("x"), &bare("y")));
6976    }
6977
6978    #[test]
6979    fn test_qualifier_matches_table() {
6980        // A qualifier matches by table name or by alias, case-insensitively.
6981        // Table name, no alias.
6982        assert!(qualifier_matches_table("t", "t", None));
6983        assert!(qualifier_matches_table("T", "t", None)); // case-insensitive
6984        assert!(!qualifier_matches_table("u", "t", None)); // no match, no alias
6985        // With an alias, the qualifier may match either the name or the alias.
6986        assert!(qualifier_matches_table("users", "users", Some("u")));
6987        assert!(qualifier_matches_table("U", "users", Some("u"))); // alias, case-insensitive
6988        assert!(!qualifier_matches_table("x", "users", Some("u"))); // matches neither
6989    }
6990
6991    #[test]
6992    fn test_extract_qualified_column_requires_qualifier_and_canonicalizes() {
6993        // extract_qualified_column requires a table-qualified column and lower-
6994        // cases both the table and the column; bare columns and non-columns
6995        // yield None. (Distinct from extract_where_column, which accepts bare
6996        // columns and preserves case.)
6997        let qualified = Expr::Column(ColumnRef::qualified("T", "Col"), Span::ZERO);
6998        assert_eq!(
6999            extract_qualified_column(&qualified),
7000            Some(ColumnKey {
7001                table: "t".to_owned(),
7002                column: "col".to_owned()
7003            })
7004        );
7005
7006        // A bare (unqualified) column has no table -> None.
7007        assert_eq!(
7008            extract_qualified_column(&Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
7009            None
7010        );
7011
7012        // A non-column expression -> None.
7013        assert_eq!(
7014            extract_qualified_column(&Expr::Literal(Literal::Integer(1), Span::ZERO)),
7015            None
7016        );
7017    }
7018
7019    #[test]
7020    fn test_extract_where_column_preserves_qualifier_and_rejects_non_columns() {
7021        // extract_where_column lifts a column reference into a WhereColumn,
7022        // preserving the table qualifier, and returns None for anything that is
7023        // not a bare column expression.
7024        let bare = Expr::Column(ColumnRef::bare("x"), Span::ZERO);
7025        assert_eq!(
7026            extract_where_column(&bare),
7027            Some(WhereColumn {
7028                table: None,
7029                column: "x".to_owned()
7030            })
7031        );
7032
7033        let qualified = Expr::Column(ColumnRef::qualified("t", "x"), Span::ZERO);
7034        assert_eq!(
7035            extract_where_column(&qualified),
7036            Some(WhereColumn {
7037                table: Some("t".to_owned()),
7038                column: "x".to_owned()
7039            })
7040        );
7041
7042        // Non-column expressions yield None.
7043        assert_eq!(
7044            extract_where_column(&Expr::Literal(Literal::Integer(1), Span::ZERO)),
7045            None
7046        );
7047        let binop = Expr::BinaryOp {
7048            left: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
7049            op: AstBinaryOp::Eq,
7050            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
7051            span: Span::ZERO,
7052        };
7053        assert_eq!(extract_where_column(&binop), None);
7054    }
7055
7056    #[test]
7057    fn test_normalize_is_not_null_predicate() {
7058        // Only `<column> IS NOT NULL` normalizes to its column; IS NULL, a
7059        // non-column operand, and non-IsNull expressions yield None.
7060        let isnull = |inner: Expr, not: bool| Expr::IsNull {
7061            expr: Box::new(inner),
7062            not,
7063            span: Span::ZERO,
7064        };
7065        let col = |n: &str| Expr::Column(ColumnRef::bare(n), Span::ZERO);
7066
7067        // x IS NOT NULL -> Some(column x).
7068        assert_eq!(
7069            normalize_is_not_null_predicate(&isnull(col("x"), true)),
7070            Some(WhereColumn {
7071                table: None,
7072                column: "x".to_owned()
7073            })
7074        );
7075        // x IS NULL (not: false) -> None.
7076        assert_eq!(
7077            normalize_is_not_null_predicate(&isnull(col("x"), false)),
7078            None
7079        );
7080        // (5) IS NOT NULL -> None: the operand is not a column.
7081        assert_eq!(
7082            normalize_is_not_null_predicate(&isnull(
7083                Expr::Literal(Literal::Integer(5), Span::ZERO),
7084                true
7085            )),
7086            None
7087        );
7088        // A non-IsNull expression -> None.
7089        assert_eq!(
7090            normalize_is_not_null_predicate(&Expr::Literal(Literal::Integer(1), Span::ZERO)),
7091            None
7092        );
7093    }
7094
7095    #[test]
7096    fn test_expr_guarantees_non_null_for_matching_column() {
7097        // expr_guarantees_non_null reports whether a WHERE expression proves the
7098        // given column is non-NULL: an explicit IS NOT NULL, or a comparison to a
7099        // non-NULL literal, on the SAME column qualifies; IS NULL, a NULL
7100        // literal, or a different column does not.
7101        let pcol = WhereColumn {
7102            table: None,
7103            column: "x".to_owned(),
7104        };
7105        let col = |n: &str| Box::new(Expr::Column(ColumnRef::bare(n), Span::ZERO));
7106
7107        // x IS NOT NULL guarantees x is non-null.
7108        let is_not_null = Expr::IsNull {
7109            expr: col("x"),
7110            not: true,
7111            span: Span::ZERO,
7112        };
7113        assert!(expr_guarantees_non_null(&is_not_null, &pcol));
7114
7115        // x IS NULL does not.
7116        let is_null = Expr::IsNull {
7117            expr: col("x"),
7118            not: false,
7119            span: Span::ZERO,
7120        };
7121        assert!(!expr_guarantees_non_null(&is_null, &pcol));
7122
7123        // x = 5 (non-null literal) guarantees non-null; x = NULL does not.
7124        let eq = |lit: Literal| Expr::BinaryOp {
7125            left: col("x"),
7126            op: AstBinaryOp::Eq,
7127            right: Box::new(Expr::Literal(lit, Span::ZERO)),
7128            span: Span::ZERO,
7129        };
7130        assert!(expr_guarantees_non_null(&eq(Literal::Integer(5)), &pcol));
7131        assert!(!expr_guarantees_non_null(&eq(Literal::Null), &pcol));
7132
7133        // An IS NOT NULL on a DIFFERENT column does not help.
7134        let other = Expr::IsNull {
7135            expr: col("y"),
7136            not: true,
7137            span: Span::ZERO,
7138        };
7139        assert!(!expr_guarantees_non_null(&other, &pcol));
7140    }
7141
7142    #[test]
7143    fn test_estimate_cost_ext_zero_rows_matches_legacy() {
7144        // With n_rows == 0 the ext function must match the legacy formulas.
7145        let legacy = estimate_cost(&AccessPathKind::FullTableScan, 1000, 0);
7146        let ext = estimate_cost_ext(&AccessPathKind::FullTableScan, 1000, 0, 0);
7147        assert!((ext - legacy).abs() < f64::EPSILON);
7148
7149        let legacy = estimate_cost(&AccessPathKind::IndexScanEquality, 1000, 100);
7150        let ext = estimate_cost_ext(&AccessPathKind::IndexScanEquality, 1000, 100, 0);
7151        assert!((ext - legacy).abs() < f64::EPSILON);
7152    }
7153
7154    #[test]
7155    fn test_estimate_cost_ext_full_scan_monotonic_in_n_rows() {
7156        // Full table scan: same pages, growing rows -> cost must grow.
7157        let c_small = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 1_000);
7158        let c_mid = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 100_000);
7159        let c_big = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 10_000_000);
7160        assert!(
7161            c_small < c_mid && c_mid < c_big,
7162            "expected monotonic growth with n_rows, got {c_small} < {c_mid} < {c_big}"
7163        );
7164    }
7165
7166    #[test]
7167    fn test_estimate_cost_ext_range_scan_monotonic_in_n_rows() {
7168        // Index range scan: fixed selectivity, growing rows -> cost must grow.
7169        let kind = AccessPathKind::IndexScanRange { selectivity: 0.1 };
7170        let c_small = estimate_cost_ext(&kind, 1000, 100, 1_000);
7171        let c_big = estimate_cost_ext(&kind, 1000, 100, 1_000_000);
7172        assert!(c_big > c_small);
7173    }
7174
7175    #[test]
7176    fn test_estimate_cost_ext_ranks_point_access_below_full_scan_for_large_tables() {
7177        // The PLANNER-2 per-row terms exist so the cost model ranks a point
7178        // access *below* a full scan once a table has many rows. Verify the
7179        // cross-path ordering (the planning consequence), not just per-path
7180        // monotonicity: for the same table, rowid <= index-equality << full scan.
7181        let (tp, ip, big) = (100u64, 50u64, 1_000_000u64);
7182        let full = estimate_cost_ext(&AccessPathKind::FullTableScan, tp, ip, big);
7183        let eq = estimate_cost_ext(&AccessPathKind::IndexScanEquality, tp, ip, big);
7184        let rowid = estimate_cost_ext(&AccessPathKind::RowidLookup, tp, ip, big);
7185
7186        assert!(
7187            rowid <= eq,
7188            "rowid lookup should not cost more than index equality: {rowid} vs {eq}"
7189        );
7190        assert!(
7191            eq < full,
7192            "index equality must rank below a full scan on a large table: {eq} vs {full}"
7193        );
7194
7195        // Equality/rowid stay ~row-count-insensitive (only one matched row's
7196        // access cost), unlike the full scan which scales with n_rows.
7197        let eq_zero = estimate_cost_ext(&AccessPathKind::IndexScanEquality, tp, ip, 0);
7198        let rowid_zero = estimate_cost_ext(&AccessPathKind::RowidLookup, tp, ip, 0);
7199        assert!(
7200            eq - eq_zero < 1.0,
7201            "equality cost must not scale with n_rows: delta {}",
7202            eq - eq_zero
7203        );
7204        assert!(
7205            rowid - rowid_zero < 1.0,
7206            "rowid cost must not scale with n_rows: delta {}",
7207            rowid - rowid_zero
7208        );
7209
7210        // Sanity: n_rows=0 full scan equals table-page count, and a large row
7211        // count grows it by orders of magnitude.
7212        let full_zero = estimate_cost_ext(&AccessPathKind::FullTableScan, tp, ip, 0);
7213        assert!(
7214            (full_zero - 100.0).abs() < f64::EPSILON,
7215            "n_rows=0 full scan == table pages"
7216        );
7217        assert!(
7218            full > full_zero * 10.0,
7219            "full scan must grow strongly with n_rows: {full} vs {full_zero}"
7220        );
7221    }
7222
7223    #[test]
7224    fn test_estimate_cost_ext_scales_full_vs_index_preference() {
7225        // Scenario: two tables with the same (small) page count but very
7226        // different row counts. For a moderately selective index scan the
7227        // large-row table should prefer the index over the full scan.
7228        let small_rows = 100_u64;
7229        let big_rows = 10_000_000_u64;
7230        let kind = AccessPathKind::IndexScanRange { selectivity: 0.01 };
7231        let full_small = estimate_cost_ext(&AccessPathKind::FullTableScan, 10, 0, small_rows);
7232        let idx_small = estimate_cost_ext(&kind, 10, 5, small_rows);
7233        let full_big = estimate_cost_ext(&AccessPathKind::FullTableScan, 10, 0, big_rows);
7234        let idx_big = estimate_cost_ext(&kind, 10, 5, big_rows);
7235
7236        // Index vs full gap should widen when n_rows blows up (full scan cost
7237        // grows linearly in rows, index cost grows as selectivity * rows).
7238        let gap_small = full_small - idx_small;
7239        let gap_big = full_big - idx_big;
7240        assert!(
7241            gap_big > gap_small,
7242            "expected bigger index advantage at high n_rows: small_gap={gap_small}, big_gap={gap_big}"
7243        );
7244    }
7245
7246    // ===================================================================
7247    // PLANNER-3: order_join_inputs_with_hints tests
7248    // ===================================================================
7249
7250    fn stats_ref(name: &str, n_pages: u64, n_rows: u64, has_stats: bool) -> TableRefWithStats {
7251        TableRefWithStats {
7252            name: name.to_owned(),
7253            n_pages,
7254            n_rows,
7255            has_stats,
7256        }
7257    }
7258
7259    #[test]
7260    fn test_order_joins_puts_small_relation_first() {
7261        // Classic "10 row small table JOIN 10k row big table": the small
7262        // relation should end up on the build side (index 0).
7263        let inputs = vec![
7264            stats_ref("t_big", 200, 10_000, true),
7265            stats_ref("t_small", 1, 10, true),
7266        ];
7267        let perm = order_join_inputs_with_hints(&inputs);
7268        assert_eq!(perm.len(), 2);
7269        assert_eq!(
7270            inputs[perm[0]].name, "t_small",
7271            "small table should sort to build-side first, got perm={perm:?}",
7272        );
7273        assert_eq!(inputs[perm[1]].name, "t_big");
7274    }
7275
7276    #[test]
7277    fn test_order_joins_no_stats_preserves_source_order() {
7278        // No ANALYZE data: every entry has has_stats = false. Even though
7279        // n_rows differs wildly, we preserve the identity permutation so
7280        // callers see the same row order they handed in.
7281        let inputs = vec![
7282            stats_ref("t_first", 200, 10_000, false),
7283            stats_ref("t_second", 1, 10, false),
7284            stats_ref("t_third", 5, 50, false),
7285        ];
7286        let perm = order_join_inputs_with_hints(&inputs);
7287        assert_eq!(
7288            perm,
7289            vec![0, 1, 2],
7290            "source order must be preserved when no stats are available",
7291        );
7292    }
7293
7294    #[test]
7295    fn test_order_joins_partial_stats_still_orders() {
7296        // At least one table has stats → we reorder. Tables missing stats
7297        // default to n_rows == 0, which yields the smallest scan cost, so
7298        // they naturally sort to the front. That matches the "assume small
7299        // until proven otherwise" heuristic.
7300        let inputs = vec![
7301            stats_ref("t_big_analyzed", 500, 100_000, true),
7302            stats_ref("t_unknown", 0, 0, false),
7303        ];
7304        let perm = order_join_inputs_with_hints(&inputs);
7305        assert_eq!(inputs[perm[0]].name, "t_unknown");
7306        assert_eq!(inputs[perm[1]].name, "t_big_analyzed");
7307    }
7308
7309    #[test]
7310    fn test_order_joins_trivial_sizes() {
7311        // N=0 and N=1 must return the identity.
7312        assert_eq!(order_join_inputs_with_hints(&[]), Vec::<usize>::new());
7313        let single = vec![stats_ref("only", 10, 100, true)];
7314        assert_eq!(order_join_inputs_with_hints(&single), vec![0]);
7315    }
7316
7317    #[test]
7318    fn test_order_joins_greedy_above_limit() {
7319        // N > 4 uses greedy smallest-first. Verify that five tables with
7320        // strictly increasing cost produce the identity permutation, and
7321        // that a reversed input is fully sorted.
7322        let reversed = vec![
7323            stats_ref("a_5", 500, 50_000, true),
7324            stats_ref("a_4", 400, 40_000, true),
7325            stats_ref("a_3", 300, 30_000, true),
7326            stats_ref("a_2", 200, 20_000, true),
7327            stats_ref("a_1", 100, 10_000, true),
7328        ];
7329        let perm = order_join_inputs_with_hints(&reversed);
7330        let ordered_names: Vec<&str> = perm.iter().map(|&i| reversed[i].name.as_str()).collect();
7331        assert_eq!(
7332            ordered_names,
7333            vec!["a_1", "a_2", "a_3", "a_4", "a_5"],
7334            "greedy path should sort ascending by scan cost",
7335        );
7336    }
7337
7338    #[test]
7339    fn test_order_joins_exhaustive_minimizes_weighted_cost() {
7340        // N=4 goes through the exhaustive permutation search. The tiny
7341        // relation should dominate the build-side slot even when it sits
7342        // in the middle of the input.
7343        let inputs = vec![
7344            stats_ref("t_a", 100, 5_000, true),
7345            stats_ref("t_b", 50, 2_000, true),
7346            stats_ref("t_tiny", 1, 10, true),
7347            stats_ref("t_huge", 1_000, 1_000_000, true),
7348        ];
7349        let perm = order_join_inputs_with_hints(&inputs);
7350        assert_eq!(
7351            inputs[perm[0]].name, "t_tiny",
7352            "exhaustive search should pick the smallest relation first; perm={perm:?}",
7353        );
7354        assert_eq!(
7355            inputs[perm[3]].name, "t_huge",
7356            "largest relation should sink to the last probe slot; perm={perm:?}",
7357        );
7358    }
7359
7360    #[test]
7361    fn test_order_joins_preserves_source_order_on_equal_cost_ties() {
7362        // Equal-cost tables must keep their source order in BOTH branches: the
7363        // greedy path uses a stable sort, and the exhaustive path scores the
7364        // identity permutation first with a strict-less update, so no equal-cost
7365        // permutation can displace it. (Documented "stable keeps ties in source
7366        // order" contract; existing tests only exercise distinct costs.)
7367
7368        // Exhaustive branch (N <= JOIN_ORDER_EXHAUSTIVE_LIMIT = 4).
7369        let exhaustive = vec![
7370            stats_ref("e0", 100, 5_000, true),
7371            stats_ref("e1", 100, 5_000, true),
7372            stats_ref("e2", 100, 5_000, true),
7373        ];
7374        assert_eq!(
7375            order_join_inputs_with_hints(&exhaustive),
7376            vec![0, 1, 2],
7377            "equal-cost tables keep source order (exhaustive branch)"
7378        );
7379
7380        // Greedy branch (N > 4, stable sort).
7381        let greedy = vec![
7382            stats_ref("g0", 100, 5_000, true),
7383            stats_ref("g1", 100, 5_000, true),
7384            stats_ref("g2", 100, 5_000, true),
7385            stats_ref("g3", 100, 5_000, true),
7386            stats_ref("g4", 100, 5_000, true),
7387        ];
7388        assert_eq!(
7389            order_join_inputs_with_hints(&greedy),
7390            vec![0, 1, 2, 3, 4],
7391            "equal-cost tables keep source order (greedy branch)"
7392        );
7393
7394        // Deterministic: repeated calls yield identical permutations.
7395        assert_eq!(
7396            order_join_inputs_with_hints(&exhaustive),
7397            order_join_inputs_with_hints(&exhaustive)
7398        );
7399        assert_eq!(
7400            order_join_inputs_with_hints(&greedy),
7401            order_join_inputs_with_hints(&greedy)
7402        );
7403    }
7404
7405    #[test]
7406    fn test_order_joins_from_table_stats_derives_has_stats() {
7407        // TableRefWithStats::from_table_stats should mark Analyze-sourced
7408        // entries as has_stats=true, Heuristic as false.
7409        let analyzed = TableStats {
7410            name: "t_analyzed".to_owned(),
7411            n_pages: 10,
7412            n_rows: 1000,
7413            source: StatsSource::Analyze,
7414        };
7415        let heur = TableStats {
7416            name: "t_heur".to_owned(),
7417            n_pages: 10,
7418            n_rows: 1000,
7419            source: StatsSource::Heuristic,
7420        };
7421        let a = TableRefWithStats::from_table_stats(&analyzed);
7422        let h = TableRefWithStats::from_table_stats(&heur);
7423        assert!(a.has_stats);
7424        assert!(!h.has_stats);
7425        assert_eq!(a.n_rows, 1000);
7426        assert_eq!(h.n_pages, 10);
7427    }
7428
7429    #[test]
7430    fn test_cost_comparison_table_scan_vs_index() {
7431        // For low selectivity, index should be cheaper than full scan.
7432        let full = estimate_cost(&AccessPathKind::FullTableScan, 1000, 0);
7433        let idx = estimate_cost(
7434            &AccessPathKind::IndexScanRange { selectivity: 0.01 },
7435            1000,
7436            100,
7437        );
7438        assert!(
7439            idx < full,
7440            "index scan ({idx:.1}) should be cheaper than full scan ({full:.1}) at 1% selectivity"
7441        );
7442
7443        // For high selectivity (~1.0), full scan may be cheaper.
7444        let idx_high = estimate_cost(
7445            &AccessPathKind::IndexScanRange { selectivity: 0.95 },
7446            1000,
7447            100,
7448        );
7449        // idx_high = log2(100) + 0.95*100 + 0.95*1000 = ~6.6 + 95 + 950 = ~1051
7450        // That's MORE than the 1000-page full scan.
7451        assert!(
7452            idx_high > full,
7453            "index scan ({idx_high:.1}) should be pricier than full scan ({full:.1}) at 95% selectivity"
7454        );
7455    }
7456
7457    // ===================================================================
7458    // §10.5 Index usability tests
7459    // ===================================================================
7460
7461    #[test]
7462    fn test_index_usability_equality_leftmost() {
7463        let idx = index_info("idx_abc", "t1", &["a", "b", "c"], false, 50);
7464        // a = 1 → usable (leftmost)
7465        let terms = [eq_term("a")];
7466        assert!(matches!(
7467            analyze_index_usability(&idx, &terms),
7468            IndexUsability::Equality
7469        ));
7470        // b = 1 alone → NOT usable (not leftmost)
7471        let terms = [eq_term("b")];
7472        assert!(matches!(
7473            analyze_index_usability(&idx, &terms),
7474            IndexUsability::NotUsable
7475        ));
7476    }
7477
7478    #[test]
7479    fn test_index_usability_qualified_column_rejects_wrong_table() {
7480        // Index on t1.a — a WHERE term on t2.a should NOT match.
7481        let idx = index_info("idx_a", "t1", &["a"], false, 50);
7482        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
7483            left: Box::new(Expr::Column(ColumnRef::qualified("t2", "a"), Span::ZERO)),
7484            op: AstBinaryOp::Eq,
7485            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
7486            span: Span::ZERO,
7487        }));
7488        let terms = [classify_where_term(expr)];
7489        assert!(matches!(
7490            analyze_index_usability(&idx, &terms),
7491            IndexUsability::NotUsable
7492        ));
7493
7494        // Same column name but qualified to the correct table → usable.
7495        let expr2: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
7496            left: Box::new(Expr::Column(ColumnRef::qualified("t1", "a"), Span::ZERO)),
7497            op: AstBinaryOp::Eq,
7498            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
7499            span: Span::ZERO,
7500        }));
7501        let terms2 = [classify_where_term(expr2)];
7502        assert!(matches!(
7503            analyze_index_usability(&idx, &terms2),
7504            IndexUsability::Equality
7505        ));
7506
7507        // Unqualified column → conservatively considered usable.
7508        let terms3 = [eq_term("a")];
7509        assert!(matches!(
7510            analyze_index_usability(&idx, &terms3),
7511            IndexUsability::Equality
7512        ));
7513    }
7514
7515    #[test]
7516    fn test_index_usability_range_rightmost() {
7517        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 50);
7518        // a > 5 → range usable on leftmost column
7519        let terms = [range_term("a")];
7520        assert!(matches!(
7521            analyze_index_usability(&idx, &terms),
7522            IndexUsability::Range { .. }
7523        ));
7524        // b > 5 alone → NOT usable (not leftmost)
7525        let terms = [range_term("b")];
7526        assert!(matches!(
7527            analyze_index_usability(&idx, &terms),
7528            IndexUsability::NotUsable
7529        ));
7530    }
7531
7532    #[test]
7533    fn test_index_usability_in_expansion() {
7534        let idx = index_info("idx_col", "t1", &["col"], false, 50);
7535        let terms = [in_term("col", 3)];
7536        let result = analyze_index_usability(&idx, &terms);
7537        assert!(matches!(
7538            result,
7539            IndexUsability::InExpansion { probe_count: 3 }
7540        ));
7541    }
7542
7543    #[test]
7544    fn test_index_usability_multicolumn_trailing_in_expansion() {
7545        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 50);
7546        let terms = [eq_term("a"), in_term("b", 3)];
7547        let result = analyze_index_usability(&idx, &terms);
7548        assert!(matches!(
7549            result,
7550            IndexUsability::MultiColumnEquality {
7551                eq_columns: 1,
7552                trailing_constraint: MultiColumnTrailingConstraint::InExpansion { probe_count: 3 }
7553            }
7554        ));
7555    }
7556
7557    #[test]
7558    fn test_index_usability_multicolumn_trailing_like_prefix() {
7559        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 50);
7560        let terms = [eq_term("a"), like_term("b", "123%")];
7561        let result = analyze_index_usability(&idx, &terms);
7562        assert!(matches!(
7563            result,
7564            IndexUsability::MultiColumnEquality {
7565                eq_columns: 1,
7566                trailing_constraint: MultiColumnTrailingConstraint::LikePrefix
7567            }
7568        ));
7569    }
7570
7571    #[test]
7572    fn test_in_expansion_cost_scales_by_probe_count() {
7573        // Regression: IN (v1, v2, v3) should cost ~3x a single equality
7574        // probe, not the same as a single probe.
7575        let table = table_stats("t1", 100, 1000);
7576        let idx = index_info("idx_col", "t1", &["col"], false, 50);
7577        let single_eq_term = [eq_term("col")];
7578        let in_3_term = [in_term("col", 3)];
7579
7580        let ap_eq = best_access_path(&table, std::slice::from_ref(&idx), &single_eq_term, None);
7581        let ap_in = best_access_path(&table, std::slice::from_ref(&idx), &in_3_term, None);
7582
7583        // IN with 3 probes should cost approximately 3x a single equality.
7584        let ratio = ap_in.estimated_cost / ap_eq.estimated_cost;
7585        assert!(
7586            (ratio - 3.0).abs() < 0.01,
7587            "IN(3) cost should be 3x equality cost: eq={} in3={} ratio={}",
7588            ap_eq.estimated_cost,
7589            ap_in.estimated_cost,
7590            ratio,
7591        );
7592    }
7593
7594    #[test]
7595    fn test_best_access_path_or_disjunction_uses_in_expansion_index_probe() {
7596        let table = table_stats("t1", 1_000, 100_000);
7597        let idx = index_info("idx_a", "t1", &["a"], false, 80);
7598        let term = or_eq_term("a", &[1, 2, 3, 4]);
7599        assert!(matches!(term.kind, WhereTermKind::InList { count: 4 }));
7600
7601        let ap = best_access_path(&table, &[idx], &[term], None);
7602        assert_eq!(ap.index.as_deref(), Some("idx_a"));
7603        assert!(matches!(ap.kind, AccessPathKind::IndexScanEquality));
7604    }
7605
7606    #[test]
7607    fn test_best_access_path_multicolumn_trailing_in_refines_row_estimate() {
7608        let table = table_stats("t1", 1_000, 1_000_000);
7609        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
7610        let equality_only = [eq_term("a")];
7611        let trailing_in = [eq_term("a"), in_term("b", 3)];
7612
7613        let ap_eq = best_access_path(&table, std::slice::from_ref(&idx), &equality_only, None);
7614        let ap_in = best_access_path(&table, &[idx], &trailing_in, None);
7615
7616        assert_eq!(ap_in.index.as_deref(), Some("idx_ab"));
7617        assert!(matches!(ap_in.kind, AccessPathKind::IndexScanEquality));
7618        assert!(
7619            ap_in.estimated_rows < ap_eq.estimated_rows,
7620            "composite IN should narrow row estimates: eq_only={} trailing_in={}",
7621            ap_eq.estimated_rows,
7622            ap_in.estimated_rows
7623        );
7624        assert!(
7625            (ap_in.estimated_rows - 30_000.0).abs() < f64::EPSILON,
7626            "expected 1e6 / 10^2 * 3 = 30000 rows, got {}",
7627            ap_in.estimated_rows
7628        );
7629    }
7630
7631    #[test]
7632    fn test_best_access_path_multicolumn_trailing_in_prefers_tighter_probe_count()
7633    -> Result<(), String> {
7634        let table = table_stats("t1", 1_000, 1_000_000);
7635        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
7636        let ap = best_access_path(
7637            &table,
7638            &[idx],
7639            &[eq_term("a"), in_term("b", 5), in_term("b", 2)],
7640            None,
7641        );
7642
7643        if ap.index.as_deref() == Some("idx_ab") {
7644            if ap.kind == AccessPathKind::IndexScanEquality {
7645                if (ap.estimated_rows - 20_000.0).abs() < f64::EPSILON {
7646                    return Ok(());
7647                }
7648                return Err("expected tighter IN-list row estimate".to_owned());
7649            }
7650            return Err("expected equality access path".to_owned());
7651        }
7652        Err("expected idx_ab access path".to_owned())
7653    }
7654    #[test]
7655    fn test_best_access_path_multicolumn_or_disjunction_reuses_composite_in_expansion() {
7656        let table = table_stats("t1", 1_000, 1_000_000);
7657        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
7658        let term = or_eq_term("b", &[1, 2, 3, 4]);
7659        assert!(matches!(term.kind, WhereTermKind::InList { count: 4 }));
7660
7661        let ap = best_access_path(&table, &[idx], &[eq_term("a"), term], None);
7662
7663        assert_eq!(ap.index.as_deref(), Some("idx_ab"));
7664        assert!(matches!(ap.kind, AccessPathKind::IndexScanEquality));
7665        assert!(
7666            (ap.estimated_rows - 40_000.0).abs() < f64::EPSILON,
7667            "expected 1e6 / 10^2 * 4 = 40000 rows, got {}",
7668            ap.estimated_rows
7669        );
7670    }
7671
7672    #[test]
7673    fn test_best_access_path_multicolumn_trailing_like_prefix_refines_row_estimate() {
7674        let table = table_stats("t1", 1_000, 1_000_000);
7675        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
7676        let equality_only = [eq_term("a")];
7677        let trailing_like = [eq_term("a"), like_term("b", "123%")];
7678
7679        let ap_eq = best_access_path(&table, std::slice::from_ref(&idx), &equality_only, None);
7680        let ap_like = best_access_path(&table, &[idx], &trailing_like, None);
7681
7682        assert_eq!(ap_like.index.as_deref(), Some("idx_ab"));
7683        assert!(matches!(
7684            ap_like.kind,
7685            AccessPathKind::IndexScanRange { .. }
7686        ));
7687        assert!(
7688            ap_like.estimated_rows < ap_eq.estimated_rows,
7689            "composite LIKE prefix should narrow row estimates: eq_only={} trailing_like={}",
7690            ap_eq.estimated_rows,
7691            ap_like.estimated_rows
7692        );
7693        assert!(
7694            (ap_like.estimated_rows - 10_000.0).abs() < f64::EPSILON,
7695            "expected 1e6 / 10 * 0.1 = 10000 rows, got {}",
7696            ap_like.estimated_rows
7697        );
7698    }
7699
7700    #[test]
7701    fn test_best_access_path_multicolumn_trailing_glob_prefix_refines_row_estimate() {
7702        let table = table_stats("t1", 1_000, 1_000_000);
7703        let idx = index_info("idx_ab", "t1", &["a", "b"], false, 80);
7704        let trailing_glob = [eq_term("a"), glob_term("b", "abc*")];
7705
7706        let ap = best_access_path(&table, &[idx], &trailing_glob, None);
7707
7708        assert_eq!(ap.index.as_deref(), Some("idx_ab"));
7709        assert!(matches!(ap.kind, AccessPathKind::IndexScanRange { .. }));
7710        assert!(
7711            (ap.estimated_rows - 10_000.0).abs() < f64::EPSILON,
7712            "expected 1e6 / 10 * 0.1 = 10000 rows, got {}",
7713            ap.estimated_rows
7714        );
7715    }
7716
7717    #[test]
7718    fn test_index_usability_like_not_usable() {
7719        let idx = index_info("idx_name", "t1", &["name"], false, 50);
7720        // ASCII LIKE prefixes remain unsafe under default SQLite semantics
7721        // because LIKE folds ASCII case.
7722        let terms = [like_term("name", "Jo%")];
7723        assert!(matches!(
7724            analyze_index_usability(&idx, &terms),
7725            IndexUsability::NotUsable
7726        ));
7727
7728        let terms = [like_term("name", "%Jo%")];
7729        assert!(matches!(
7730            analyze_index_usability(&idx, &terms),
7731            IndexUsability::NotUsable
7732        ));
7733    }
7734
7735    #[test]
7736    fn test_index_usability_like_case_stable_prefix() {
7737        let idx = index_info("idx_name", "t1", &["name"], false, 50);
7738        let terms = [like_term("name", "123%")];
7739        let result = analyze_index_usability(&idx, &terms);
7740        assert!(matches!(
7741            result,
7742            IndexUsability::LikePrefix {
7743                ref low,
7744                high: Some(ref high)
7745            } if low == "123" && high == "124"
7746        ));
7747    }
7748
7749    #[test]
7750    fn test_index_usability_glob_prefix() {
7751        let idx = index_info("idx_name", "t1", &["name"], false, 50);
7752        // GLOB 'Jo*' → usable (constant prefix)
7753        let terms = [glob_term("name", "Jo*")];
7754        let result = analyze_index_usability(&idx, &terms);
7755        assert!(matches!(
7756            result,
7757            IndexUsability::LikePrefix {
7758                ref low,
7759                high: Some(ref high)
7760            } if low == "Jo" && high == "Jp"
7761        ));
7762
7763        // GLOB '*Jo*' → not usable (no constant prefix)
7764        let terms = [glob_term("name", "*Jo*")];
7765        assert!(matches!(
7766            analyze_index_usability(&idx, &terms),
7767            IndexUsability::NotUsable
7768        ));
7769    }
7770
7771    #[test]
7772    fn test_index_usability_leftmost_preserves_first_non_range_probe_order() {
7773        let idx = index_info("idx_name", "t1", &["name"], false, 50);
7774        let terms = [glob_term("name", "Jo*"), in_term("name", 3)];
7775        let result = analyze_index_usability(&idx, &terms);
7776
7777        assert!(matches!(
7778            result,
7779            IndexUsability::LikePrefix {
7780                ref low,
7781                high: Some(ref high)
7782            } if low == "Jo" && high == "Jp"
7783        ));
7784    }
7785
7786    #[test]
7787    fn test_index_usability_equality_beats_range_on_same_leftmost_column() {
7788        let idx = index_info("idx_a", "t1", &["a"], false, 50);
7789        let terms = [range_term("a"), eq_term("a")];
7790
7791        assert!(matches!(
7792            analyze_index_usability(&idx, &terms),
7793            IndexUsability::Equality
7794        ));
7795    }
7796
7797    #[test]
7798    fn test_index_usability_equality_beats_like_prefix_on_same_leftmost_column() {
7799        let idx = index_info("idx_name", "t1", &["name"], false, 50);
7800        let terms = [like_term("name", "123%"), eq_term("name")];
7801
7802        assert!(matches!(
7803            analyze_index_usability(&idx, &terms),
7804            IndexUsability::Equality
7805        ));
7806    }
7807
7808    /// Regression test for issue #63.
7809    ///
7810    /// Expression indexes store their real key terms in `expression_columns`
7811    /// and leave `columns` empty by convention (see the schema loader in
7812    /// fsqlite-core/src/connection.rs).  Before the fix, analyze_index_usability
7813    /// bailed out at the `columns.is_empty()` guard BEFORE checking
7814    /// `expression_columns`, so every expression index looked planner-dead
7815    /// and queries like `WHERE lower(name) = 'alice'` degraded to a full
7816    /// table scan despite a matching expression index being present.
7817    #[test]
7818    fn test_index_usability_expression_index_equality() {
7819        // Build a `lower(name)` expression that the index will match against.
7820        // The key_expression stored on the index is an identical AST so that
7821        // structural `PartialEq` succeeds.
7822        let lower_name_expr = |val: &'static str| -> &'static Expr {
7823            Box::leak(Box::new(Expr::BinaryOp {
7824                left: Box::new(Expr::FunctionCall {
7825                    name: "lower".to_owned(),
7826                    args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
7827                        ColumnRef::bare("name"),
7828                        Span::ZERO,
7829                    )]),
7830                    distinct: false,
7831                    order_by: vec![],
7832                    filter: None,
7833                    over: None,
7834                    span: Span::ZERO,
7835                }),
7836                op: AstBinaryOp::Eq,
7837                right: Box::new(Expr::Literal(Literal::String(val.to_owned()), Span::ZERO)),
7838                span: Span::ZERO,
7839            }))
7840        };
7841
7842        let where_expr = lower_name_expr("alice");
7843        // The index's recorded key expression is just `lower(name)` (no
7844        // equality wrapper), matching how connection.rs parses the DDL
7845        // expression string.
7846        let key_expr = Expr::FunctionCall {
7847            name: "lower".to_owned(),
7848            args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
7849                ColumnRef::bare("name"),
7850                Span::ZERO,
7851            )]),
7852            distinct: false,
7853            order_by: vec![],
7854            filter: None,
7855            over: None,
7856            span: Span::ZERO,
7857        };
7858
7859        let idx = IndexInfo {
7860            name: "idx_lower_name".to_owned(),
7861            table: "users".to_owned(),
7862            // Expression indexes leave `columns` empty by convention.
7863            columns: vec![],
7864            unique: false,
7865            n_pages: 50,
7866            source: StatsSource::Heuristic,
7867            partial_where: None,
7868            expression_columns: vec![key_expr],
7869        };
7870
7871        let terms = [classify_where_term(where_expr)];
7872        assert!(
7873            matches!(
7874                analyze_index_usability(&idx, &terms),
7875                IndexUsability::Equality
7876            ),
7877            "expression index must reach analyze_expression_index_usability \
7878             even though `columns` is empty (issue #63)"
7879        );
7880    }
7881
7882    /// SQL function names are case-insensitive, so `lower(name)` in the
7883    /// index key must match `LOWER(name)` in the WHERE clause.  Before the
7884    /// `eq_ignore_ascii_case` fix in `impl PartialEq for Expr`, this would
7885    /// silently fall back to a full scan.
7886    #[test]
7887    fn test_index_usability_expression_index_case_insensitive_function_name() {
7888        // Index key uses lowercase function name.
7889        let key_expr = Expr::FunctionCall {
7890            name: "lower".to_owned(),
7891            args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
7892                ColumnRef::bare("name"),
7893                Span::ZERO,
7894            )]),
7895            distinct: false,
7896            order_by: vec![],
7897            filter: None,
7898            over: None,
7899            span: Span::ZERO,
7900        };
7901
7902        // WHERE clause uses UPPERCASE function name.
7903        let where_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
7904            left: Box::new(Expr::FunctionCall {
7905                name: "LOWER".to_owned(),
7906                args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
7907                    ColumnRef::bare("name"),
7908                    Span::ZERO,
7909                )]),
7910                distinct: false,
7911                order_by: vec![],
7912                filter: None,
7913                over: None,
7914                span: Span::ZERO,
7915            }),
7916            op: AstBinaryOp::Eq,
7917            right: Box::new(Expr::Literal(
7918                Literal::String("alice".to_owned()),
7919                Span::ZERO,
7920            )),
7921            span: Span::ZERO,
7922        }));
7923
7924        let idx = IndexInfo {
7925            name: "idx_lower_name".to_owned(),
7926            table: "users".to_owned(),
7927            columns: vec![],
7928            unique: false,
7929            n_pages: 50,
7930            source: StatsSource::Heuristic,
7931            partial_where: None,
7932            expression_columns: vec![key_expr],
7933        };
7934
7935        let terms = [classify_where_term(where_expr)];
7936        assert!(
7937            matches!(
7938                analyze_index_usability(&idx, &terms),
7939                IndexUsability::Equality
7940            ),
7941            "case-insensitive function name match must reach Equality \
7942             (lower vs LOWER)"
7943        );
7944    }
7945
7946    /// Expression-index regression companion: a non-matching WHERE term must
7947    /// still return NotUsable (i.e. the reordered guard does not accidentally
7948    /// widen acceptance).
7949    #[test]
7950    fn test_index_usability_expression_index_non_matching() {
7951        // Index is on `lower(name)`, but the WHERE clause uses `upper(name)`.
7952        let upper_name_eq: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
7953            left: Box::new(Expr::FunctionCall {
7954                name: "upper".to_owned(),
7955                args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
7956                    ColumnRef::bare("name"),
7957                    Span::ZERO,
7958                )]),
7959                distinct: false,
7960                order_by: vec![],
7961                filter: None,
7962                over: None,
7963                span: Span::ZERO,
7964            }),
7965            op: AstBinaryOp::Eq,
7966            right: Box::new(Expr::Literal(
7967                Literal::String("ALICE".to_owned()),
7968                Span::ZERO,
7969            )),
7970            span: Span::ZERO,
7971        }));
7972
7973        let key_expr = Expr::FunctionCall {
7974            name: "lower".to_owned(),
7975            args: fsqlite_ast::FunctionArgs::List(vec![Expr::Column(
7976                ColumnRef::bare("name"),
7977                Span::ZERO,
7978            )]),
7979            distinct: false,
7980            order_by: vec![],
7981            filter: None,
7982            over: None,
7983            span: Span::ZERO,
7984        };
7985
7986        let idx = IndexInfo {
7987            name: "idx_lower_name".to_owned(),
7988            table: "users".to_owned(),
7989            columns: vec![],
7990            unique: false,
7991            n_pages: 50,
7992            source: StatsSource::Heuristic,
7993            partial_where: None,
7994            expression_columns: vec![key_expr],
7995        };
7996
7997        let terms = [classify_where_term(upper_name_eq)];
7998        assert!(
7999            matches!(
8000                analyze_index_usability(&idx, &terms),
8001                IndexUsability::NotUsable
8002            ),
8003            "expression index must reject structurally-unrelated WHERE terms"
8004        );
8005    }
8006
8007    /// Real-parser regression test for issue #63.
8008    ///
8009    /// The index key is parsed from its stand-alone SQL text (the way the
8010    /// schema loader in `fsqlite-core/src/connection.rs` builds
8011    /// `expression_columns`); the WHERE clause is extracted from a full
8012    /// SELECT parse where the `lower(name)` sub-expression lands at a
8013    /// non-zero byte offset inside the outer query.  This test asserts:
8014    ///
8015    /// 1. The two parse contexts really do produce different `Span`
8016    ///    byte offsets for the logically identical expression (sanity
8017    ///    check — if this ever stops being true the test loses its
8018    ///    teeth but the bug it guards against may come back).
8019    /// 2. `Expr::PartialEq` — manually implemented in fsqlite-ast to
8020    ///    skip the span field on every variant — still reports the
8021    ///    two expressions as equal despite the span mismatch.  A
8022    ///    future refactor that accidentally auto-derived `PartialEq`
8023    ///    would silently break the expression-index planner, so this
8024    ///    assertion catches that.
8025    /// 3. The full `analyze_index_usability` path reaches
8026    ///    `IndexUsability::Equality` for a real-parser round trip —
8027    ///    the end-to-end guarantee that the bounded repro from the
8028    ///    issue ships as an index lookup plan.
8029    #[test]
8030    fn test_index_usability_expression_index_real_parser_spans_differ() {
8031        use fsqlite_ast::{SelectCore, Statement};
8032
8033        // Parse the index key the way the schema loader does: from its
8034        // stand-alone text, with spans starting at 0.
8035        let key_expr =
8036            fsqlite_parser::expr::parse_expr("lower(name)").expect("key expression should parse");
8037
8038        // Parse a full SELECT so the WHERE clause's `lower(name)` lands
8039        // at a non-zero byte offset inside the outer query, exactly
8040        // matching how the planner sees it at runtime.
8041        let select_sql = "SELECT id FROM users WHERE lower(name) = 'alice'";
8042        let mut scratch = fsqlite_parser::StatementParseScratch::default();
8043        let statement =
8044            fsqlite_parser::parse_single_statement_with_scratch(select_sql, &mut scratch)
8045                .expect("select should parse");
8046        let Statement::Select(select) = statement else {
8047            panic!("expected SELECT statement");
8048        };
8049        let SelectCore::Select { where_clause, .. } = select.body.select else {
8050            panic!("expected SELECT core");
8051        };
8052        let where_expr = *where_clause.expect("WHERE clause must be present");
8053        let left_of_where = match &where_expr {
8054            Expr::BinaryOp { left, .. } => left.as_ref().clone(),
8055            _ => panic!("expected BinaryOp for `lower(name) = 'alice'`"),
8056        };
8057
8058        // Sanity: the two spans really are different — if this ever stops
8059        // being true, the test's premise is wrong.
8060        assert_ne!(
8061            left_of_where.span(),
8062            key_expr.span(),
8063            "real parser should assign different spans across parse \
8064             contexts: stand-alone `lower(name)` starts at 0 but the \
8065             WHERE-side one starts after `SELECT id FROM users WHERE `"
8066        );
8067
8068        // Span-insensitive structural equality must accept — this is
8069        // the property the planner relies on for expression-index
8070        // matching and it is provided by the manual `impl PartialEq
8071        // for Expr` in fsqlite-ast.  If someone ever changes that
8072        // impl to a derive, this assertion will fail loudly.
8073        assert_eq!(
8074            left_of_where, key_expr,
8075            "Expr::PartialEq is manually span-insensitive in fsqlite-ast; \
8076             if that invariant breaks, the expression-index planner stops \
8077             matching across parse contexts (issue #63)"
8078        );
8079
8080        // And the full planner path should accept it end-to-end.
8081        let idx = IndexInfo {
8082            name: "idx_lower_name".to_owned(),
8083            table: "users".to_owned(),
8084            columns: vec![],
8085            unique: false,
8086            n_pages: 50,
8087            source: StatsSource::Heuristic,
8088            partial_where: None,
8089            expression_columns: vec![key_expr],
8090        };
8091        // Leak the parsed WHERE expression so the WhereTerm can hold a
8092        // reference with `'static` lifetime, matching the other tests.
8093        let leaked: &'static Expr = Box::leak(Box::new(where_expr));
8094        let terms = [classify_where_term(leaked)];
8095        assert!(
8096            matches!(
8097                analyze_index_usability(&idx, &terms),
8098                IndexUsability::Equality
8099            ),
8100            "real-parser expression index lookup must reach Equality"
8101        );
8102    }
8103
8104    /// An index with neither `columns` nor `expression_columns` is degenerate
8105    /// and must still fall through to NotUsable.  Guards against the reorder
8106    /// accidentally exposing a new reachable code path.
8107    #[test]
8108    fn test_index_usability_empty_index_still_not_usable() {
8109        let idx = IndexInfo {
8110            name: "idx_empty".to_owned(),
8111            table: "t1".to_owned(),
8112            columns: vec![],
8113            unique: false,
8114            n_pages: 50,
8115            source: StatsSource::Heuristic,
8116            partial_where: None,
8117            expression_columns: vec![],
8118        };
8119        let terms = [eq_term("a")];
8120        assert!(matches!(
8121            analyze_index_usability(&idx, &terms),
8122            IndexUsability::NotUsable
8123        ));
8124    }
8125
8126    #[test]
8127    fn test_classify_where_term_equality() {
8128        let term = eq_term("x");
8129        assert!(matches!(term.kind, WhereTermKind::Equality));
8130        assert_eq!(term.column.as_ref().unwrap().column, "x");
8131    }
8132
8133    #[test]
8134    fn test_classify_where_term_range() {
8135        let term = range_term("y");
8136        assert!(matches!(term.kind, WhereTermKind::Range));
8137        assert_eq!(term.column.as_ref().unwrap().column, "y");
8138    }
8139
8140    #[test]
8141    fn test_classify_where_term_rowid() {
8142        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
8143            left: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
8144            op: AstBinaryOp::Eq,
8145            right: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
8146            span: Span::ZERO,
8147        }));
8148        let term = classify_where_term(expr);
8149        assert!(matches!(term.kind, WhereTermKind::RowidEquality));
8150    }
8151
8152    #[test]
8153    fn test_decompose_where_and() {
8154        let inner = Expr::BinaryOp {
8155            left: Box::new(Expr::BinaryOp {
8156                left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
8157                op: AstBinaryOp::Eq,
8158                right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
8159                span: Span::ZERO,
8160            }),
8161            op: AstBinaryOp::And,
8162            right: Box::new(Expr::BinaryOp {
8163                left: Box::new(Expr::Column(ColumnRef::bare("b"), Span::ZERO)),
8164                op: AstBinaryOp::Gt,
8165                right: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
8166                span: Span::ZERO,
8167            }),
8168            span: Span::ZERO,
8169        };
8170        let terms = decompose_where(&inner);
8171        assert_eq!(terms.len(), 2);
8172    }
8173
8174    // ===================================================================
8175    // §10.5 Join ordering tests
8176    // ===================================================================
8177
8178    #[test]
8179    fn test_join_ordering_single_table() {
8180        let tables = [table_stats("t1", 100, 1000)];
8181        let plan = order_joins(&tables, &[], &[], None, &[]);
8182        assert_eq!(plan.join_order, vec!["t1"]);
8183        // PLANNER-2: full scan cost = n_pages + n_rows * ROW_DECODE_COST.
8184        let expected = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 1000);
8185        assert!((plan.total_cost - expected).abs() < 1e-9);
8186    }
8187
8188    #[test]
8189    fn test_join_ordering_two_tables() {
8190        let tables = [table_stats("t1", 10, 100), table_stats("t2", 1000, 50000)];
8191        let plan = order_joins(&tables, &[], &[], None, &[]);
8192        assert_eq!(plan.join_order.len(), 2);
8193        // Smaller table should be scanned first (lower startup cost).
8194        assert_eq!(plan.join_order[0], "t1");
8195    }
8196
8197    #[test]
8198    fn test_join_ordering_three_tables() {
8199        let tables = [
8200            table_stats("t1", 10, 100),
8201            table_stats("t2", 100, 1000),
8202            table_stats("t3", 1000, 10000),
8203        ];
8204        let plan = order_joins(&tables, &[], &[], None, &[]);
8205        assert_eq!(plan.join_order.len(), 3);
8206        // All tables present; beam search picks cost-optimal order
8207        // (nested loop model considers outer-row scaling, so smallest
8208        // last-stage rows wins — the exact order depends on the cost model).
8209        for t in &tables {
8210            assert!(plan.join_order.contains(&t.name));
8211        }
8212        assert!(plan.total_cost > 0.0);
8213    }
8214
8215    #[test]
8216    fn test_join_ordering_prefers_indexed() {
8217        let tables = [table_stats("t1", 10, 100), table_stats("t2", 1000, 50000)];
8218        let indexes = [index_info("idx_t2_fk", "t2", &["fk"], false, 50)];
8219        let terms = [eq_term("fk")];
8220        let plan = order_joins(&tables, &indexes, &terms, None, &[]);
8221        // t1 should still come first (small outer), t2 uses index.
8222        assert_eq!(plan.join_order[0], "t1");
8223        assert!(plan.access_paths[1].index.is_some());
8224    }
8225
8226    #[test]
8227    fn test_join_ordering_beam_search_bounded() {
8228        // 6 tables — should NOT explore all 720 orderings.
8229        let tables: Vec<TableStats> = (1..=6_u64)
8230            .map(|i| table_stats(&format!("t{i}"), i * 10, i * 100))
8231            .collect();
8232        let plan = order_joins(&tables, &[], &[], None, &[]);
8233        assert_eq!(plan.join_order.len(), 6);
8234        // Verify it produced a valid plan (all tables present).
8235        for t in &tables {
8236            assert!(plan.join_order.contains(&t.name));
8237        }
8238    }
8239
8240    #[test]
8241    fn test_three_way_join_cost_scales_by_cumulative_rows() {
8242        // Regression: the cost of the 3rd table in a nested loop join must
8243        // be scaled by T1.rows * T2.rows, not just T2.rows.
8244        let small = table_stats("small", 1, 10);
8245        let medium = table_stats("medium", 10, 100);
8246        let large = table_stats("large", 100, 1000);
8247        let plan_sml = order_joins(&[small, medium, large], &[], &[], None, &[]);
8248
8249        // With correct cumulative scaling, putting the largest table last
8250        // is expensive because it scans once per (small * medium) row.
8251        // The planner should NOT produce the same cost as it would if
8252        // outer_rows were only the second table's rows.
8253        #[allow(clippy::suboptimal_flops)]
8254        let cost_if_only_last = 1.0_f64 // small full scan cost
8255            + 10.0 * 10.0 // medium scanned 10 times
8256            + 100.0 * 100.0; // BUG cost: large scanned only 100 times (medium.rows)
8257        // The plan's total cost should be larger than this naive estimate
8258        // because large is actually scanned 10*100=1000 times.
8259        assert!(
8260            plan_sml.total_cost > cost_if_only_last,
8261            "3-way join cost should scale by cumulative rows, not just last table: plan_cost={} bug_cost={}",
8262            plan_sml.total_cost,
8263            cost_if_only_last,
8264        );
8265    }
8266
8267    #[test]
8268    fn test_mx_choice_single_table() {
8269        assert_eq!(compute_mx_choice(1, false), 1);
8270    }
8271
8272    #[test]
8273    fn test_mx_choice_two_tables() {
8274        assert_eq!(compute_mx_choice(2, false), 5);
8275    }
8276
8277    #[test]
8278    fn test_mx_choice_three_tables() {
8279        assert_eq!(compute_mx_choice(3, false), 12);
8280    }
8281
8282    #[test]
8283    fn test_mx_choice_star_query() {
8284        assert_eq!(compute_mx_choice(4, true), 18);
8285    }
8286
8287    #[test]
8288    fn test_detect_star_query_true() {
8289        // Central table "fact" joins to dim1, dim2, dim3.
8290        let tables = [
8291            table_stats("fact", 1000, 100_000),
8292            table_stats("dim1", 10, 100),
8293            table_stats("dim2", 10, 100),
8294            table_stats("dim3", 10, 100),
8295        ];
8296        let terms = [
8297            join_term("fact", "d1_id", "dim1", "id"),
8298            join_term("fact", "d2_id", "dim2", "id"),
8299            join_term("fact", "d3_id", "dim3", "id"),
8300        ];
8301        assert!(detect_star_query(&tables, &terms));
8302    }
8303
8304    #[test]
8305    fn test_detect_star_query_false() {
8306        // 4-node chain: t1-t2-t3-t4. No single table joins ALL others.
8307        // t2 joins t1,t3 (2/3); t3 joins t2,t4 (2/3). Neither reaches 3/3.
8308        let tables = [
8309            table_stats("t1", 100, 1000),
8310            table_stats("t2", 100, 1000),
8311            table_stats("t3", 100, 1000),
8312            table_stats("t4", 100, 1000),
8313        ];
8314        let terms = [
8315            join_term("t1", "id", "t2", "fk1"),
8316            join_term("t2", "id", "t3", "fk2"),
8317            join_term("t3", "id", "t4", "fk3"),
8318        ];
8319        assert!(!detect_star_query(&tables, &terms));
8320    }
8321
8322    #[test]
8323    fn test_cross_join_no_reorder() {
8324        // CROSS JOIN between t1 and t2: t2 cannot appear before t1.
8325        let tables = [
8326            table_stats("t1", 1000, 50000), // Big table first
8327            table_stats("t2", 10, 100),     // Small table second
8328        ];
8329        let cross = [("t1".to_owned(), "t2".to_owned())];
8330        let plan = order_joins(&tables, &[], &[], None, &cross);
8331        // Despite t2 being smaller, CROSS JOIN forces t1 first.
8332        assert_eq!(plan.join_order[0], "t1");
8333        assert_eq!(plan.join_order[1], "t2");
8334    }
8335
8336    #[test]
8337    fn test_single_table_source_name_and_alias() {
8338        use fsqlite_ast::{JoinClause, JoinKind, JoinType};
8339
8340        // A single-table FROM with no joins yields Ok((name, alias)); any joins
8341        // or a non-Table source yield Err(UnsupportedFromSource).
8342        let tbl = |alias: Option<&str>| TableOrSubquery::Table {
8343            name: QualifiedName::bare("users"),
8344            alias: alias.map(str::to_owned),
8345            index_hint: None,
8346            time_travel: None,
8347        };
8348        let fc = |source: TableOrSubquery, joins: Vec<JoinClause>| FromClause { source, joins };
8349
8350        // Bare table, no alias.
8351        let bare_fc = fc(tbl(None), vec![]);
8352        let (name, alias) = single_table_source_name_and_alias(&bare_fc).unwrap();
8353        assert_eq!(name, "users");
8354        assert_eq!(alias, None);
8355
8356        // Bare table with an alias.
8357        let aliased_fc = fc(tbl(Some("u")), vec![]);
8358        let (name, alias) = single_table_source_name_and_alias(&aliased_fc).unwrap();
8359        assert_eq!(name, "users");
8360        assert_eq!(alias, Some("u"));
8361
8362        // A join present -> Err.
8363        let with_join = fc(
8364            tbl(None),
8365            vec![JoinClause {
8366                join_type: JoinType {
8367                    natural: false,
8368                    kind: JoinKind::Inner,
8369                },
8370                table: tbl(None),
8371                constraint: None,
8372            }],
8373        );
8374        assert!(single_table_source_name_and_alias(&with_join).is_err());
8375    }
8376
8377    #[test]
8378    fn test_from_clause_supports_leapfrog_branches() {
8379        use fsqlite_ast::{JoinClause, JoinConstraint, JoinKind, JoinType};
8380
8381        // from_clause_supports_leapfrog gates leapfrog routing on join shape.
8382        // The routing tests only ever pass None (-> supported); the rejection
8383        // branches were never exercised directly.
8384        let tbl = |name: &str| TableOrSubquery::Table {
8385            name: QualifiedName::bare(name),
8386            alias: None,
8387            index_hint: None,
8388            time_travel: None,
8389        };
8390        let col = |name: &str| Expr::Column(ColumnRef::bare(name), Span::ZERO);
8391        let from = |jt: JoinType, constraint: Option<JoinConstraint>| FromClause {
8392            source: tbl("a"),
8393            joins: vec![JoinClause {
8394                join_type: jt,
8395                table: tbl("b"),
8396                constraint,
8397            }],
8398        };
8399        let inner = || JoinType {
8400            natural: false,
8401            kind: JoinKind::Inner,
8402        };
8403
8404        // No FROM clause at all -> trivially supported.
8405        assert!(from_clause_supports_leapfrog(None));
8406
8407        // Inner join with an equi-column ON predicate (x = y) is supported.
8408        let equi_on = Expr::BinaryOp {
8409            left: Box::new(col("x")),
8410            op: AstBinaryOp::Eq,
8411            right: Box::new(col("y")),
8412            span: Span::ZERO,
8413        };
8414        assert!(from_clause_supports_leapfrog(Some(&from(
8415            inner(),
8416            Some(JoinConstraint::On(equi_on))
8417        ))));
8418
8419        // A non-empty USING constraint is supported.
8420        assert!(from_clause_supports_leapfrog(Some(&from(
8421            inner(),
8422            Some(JoinConstraint::Using(vec!["x".to_owned()]))
8423        ))));
8424
8425        // Rejection: a non-equi ON (column = literal) is not equi-column.
8426        let nonequi_on = Expr::BinaryOp {
8427            left: Box::new(col("x")),
8428            op: AstBinaryOp::Eq,
8429            right: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
8430            span: Span::ZERO,
8431        };
8432        assert!(!from_clause_supports_leapfrog(Some(&from(
8433            inner(),
8434            Some(JoinConstraint::On(nonequi_on))
8435        ))));
8436
8437        // Rejection: an empty USING list.
8438        assert!(!from_clause_supports_leapfrog(Some(&from(
8439            inner(),
8440            Some(JoinConstraint::Using(vec![]))
8441        ))));
8442
8443        // Rejection: a NATURAL join.
8444        assert!(!from_clause_supports_leapfrog(Some(&from(
8445            JoinType {
8446                natural: true,
8447                kind: JoinKind::Inner,
8448            },
8449            None
8450        ))));
8451
8452        // Rejection: an outer (LEFT) join.
8453        assert!(!from_clause_supports_leapfrog(Some(&from(
8454            JoinType {
8455                natural: false,
8456                kind: JoinKind::Left,
8457            },
8458            None
8459        ))));
8460    }
8461
8462    #[test]
8463    fn test_two_way_join_stays_hash_even_with_leapfrog_enabled() {
8464        let tables = [table_stats("t1", 10, 100), table_stats("t2", 12, 120)];
8465        let terms = [join_term("t1", "k", "t2", "k")];
8466        let plan = order_joins_with_hints_and_features(
8467            &tables,
8468            &[],
8469            &terms,
8470            None,
8471            &[],
8472            None,
8473            None,
8474            PlannerFeatureFlags {
8475                leapfrog_join: true,
8476                ..PlannerFeatureFlags::default()
8477            },
8478        );
8479
8480        assert_eq!(plan.join_segments.len(), 1);
8481        assert_eq!(plan.join_segments[0].operator, JoinOperator::HashJoin);
8482    }
8483
8484    #[test]
8485    fn test_three_way_equi_join_uses_leapfrog_when_feature_enabled() {
8486        let tables = [
8487            table_stats("a", 1024, 1_000_000),
8488            table_stats("b", 1024, 1_000_000),
8489            table_stats("c", 1024, 1_000_000),
8490        ];
8491        let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
8492        let plan = order_joins_with_hints_and_features(
8493            &tables,
8494            &[],
8495            &terms,
8496            None,
8497            &[],
8498            None,
8499            None,
8500            PlannerFeatureFlags {
8501                leapfrog_join: true,
8502                ..PlannerFeatureFlags::default()
8503            },
8504        );
8505
8506        assert!(
8507            plan.join_segments
8508                .iter()
8509                .any(|segment| segment.operator == JoinOperator::LeapfrogTriejoin
8510                    && segment.relations.len() == 3),
8511            "expected Leapfrog segment, got {:?}",
8512            plan.join_segments
8513        );
8514    }
8515
8516    #[test]
8517    fn test_leapfrog_feature_flag_gates_routing() {
8518        let tables = [
8519            table_stats("a", 1024, 1_000_000),
8520            table_stats("b", 1024, 1_000_000),
8521            table_stats("c", 1024, 1_000_000),
8522        ];
8523        let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
8524        let plan = order_joins_with_hints_and_features(
8525            &tables,
8526            &[],
8527            &terms,
8528            None,
8529            &[],
8530            None,
8531            None,
8532            PlannerFeatureFlags {
8533                leapfrog_join: false,
8534                ..PlannerFeatureFlags::default()
8535            },
8536        );
8537
8538        assert_eq!(plan.join_segments.len(), 1);
8539        assert_eq!(plan.join_segments[0].operator, JoinOperator::HashJoin);
8540    }
8541
8542    #[test]
8543    fn test_mixed_join_segments_support_leapfrog_and_hash() {
8544        let tables = [
8545            table_stats("a", 512, 900_000),
8546            table_stats("b", 512, 900_000),
8547            table_stats("c", 512, 900_000),
8548            table_stats("d", 64, 10_000),
8549            table_stats("e", 64, 10_000),
8550        ];
8551        let terms = [
8552            join_term("a", "k", "b", "k"),
8553            join_term("b", "k", "c", "k"),
8554            join_term("d", "k", "e", "k"),
8555        ];
8556        let plan = order_joins_with_hints_and_features(
8557            &tables,
8558            &[],
8559            &terms,
8560            None,
8561            &[],
8562            None,
8563            None,
8564            PlannerFeatureFlags {
8565                leapfrog_join: true,
8566                ..PlannerFeatureFlags::default()
8567            },
8568        );
8569
8570        assert!(
8571            plan.join_segments
8572                .iter()
8573                .any(|segment| segment.operator == JoinOperator::LeapfrogTriejoin
8574                    && segment.relations.len() == 3),
8575            "expected 3-way Leapfrog segment, got {:?}",
8576            plan.join_segments
8577        );
8578        assert!(
8579            plan.join_segments
8580                .iter()
8581                .any(|segment| segment.operator == JoinOperator::HashJoin
8582                    && segment.relations.len() == 2),
8583            "expected 2-way hash segment, got {:?}",
8584            plan.join_segments
8585        );
8586    }
8587
8588    #[test]
8589    fn test_incompatible_trie_ordering_falls_back_to_hash_join() {
8590        let tables = [
8591            table_stats("a", 256, 100_000),
8592            table_stats("b", 256, 100_000),
8593            table_stats("c", 256, 100_000),
8594        ];
8595        let terms = [join_term("a", "x", "b", "x"), join_term("b", "y", "c", "y")];
8596        let plan = order_joins_with_hints_and_features(
8597            &tables,
8598            &[],
8599            &terms,
8600            None,
8601            &[],
8602            None,
8603            None,
8604            PlannerFeatureFlags {
8605                leapfrog_join: true,
8606                ..PlannerFeatureFlags::default()
8607            },
8608        );
8609
8610        assert!(
8611            plan.join_segments
8612                .iter()
8613                .all(|segment| segment.operator == JoinOperator::HashJoin),
8614            "incompatible trie ordering should stay hash-only: {:?}",
8615            plan.join_segments
8616        );
8617    }
8618
8619    #[test]
8620    fn test_outer_join_shape_forces_hash_fallback() {
8621        use fsqlite_ast::{JoinClause, JoinConstraint, JoinKind, JoinType};
8622
8623        let from = FromClause {
8624            source: TableOrSubquery::Table {
8625                name: QualifiedName::bare("a"),
8626                alias: None,
8627                index_hint: None,
8628                time_travel: None,
8629            },
8630            joins: vec![JoinClause {
8631                join_type: JoinType {
8632                    natural: false,
8633                    kind: JoinKind::Left,
8634                },
8635                table: TableOrSubquery::Table {
8636                    name: QualifiedName::bare("b"),
8637                    alias: None,
8638                    index_hint: None,
8639                    time_travel: None,
8640                },
8641                constraint: Some(JoinConstraint::On(Expr::BinaryOp {
8642                    left: Box::new(Expr::Column(ColumnRef::qualified("a", "k"), Span::ZERO)),
8643                    op: AstBinaryOp::Eq,
8644                    right: Box::new(Expr::Column(ColumnRef::qualified("b", "k"), Span::ZERO)),
8645                    span: Span::ZERO,
8646                })),
8647            }],
8648        };
8649        let tables = [
8650            table_stats("a", 128, 100_000),
8651            table_stats("b", 128, 100_000),
8652            table_stats("c", 128, 100_000),
8653        ];
8654        let join_order = vec!["a".to_owned(), "b".to_owned(), "c".to_owned()];
8655        let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
8656        let segments = choose_join_segments(
8657            &join_order,
8658            &tables,
8659            &terms,
8660            Some(&from),
8661            PlannerFeatureFlags {
8662                leapfrog_join: true,
8663                ..PlannerFeatureFlags::default()
8664            },
8665        );
8666
8667        assert_eq!(segments.len(), 1);
8668        assert_eq!(segments[0].operator, JoinOperator::HashJoin);
8669    }
8670
8671    #[test]
8672    fn test_collect_table_index_hints_from_clause_includes_aliases() {
8673        use fsqlite_ast::{JoinClause, JoinKind, JoinType};
8674
8675        let from = FromClause {
8676            source: TableOrSubquery::Table {
8677                name: QualifiedName::bare("users"),
8678                alias: Some("u".to_owned()),
8679                index_hint: Some(IndexHint::IndexedBy("idx_users_email".to_owned())),
8680                time_travel: None,
8681            },
8682            joins: vec![JoinClause {
8683                join_type: JoinType {
8684                    kind: JoinKind::Inner,
8685                    natural: false,
8686                },
8687                table: TableOrSubquery::Table {
8688                    name: QualifiedName::bare("events"),
8689                    alias: Some("e".to_owned()),
8690                    index_hint: Some(IndexHint::NotIndexed),
8691                    time_travel: None,
8692                },
8693                constraint: None,
8694            }],
8695        };
8696
8697        let hints = collect_table_index_hints(&from);
8698        assert!(matches!(
8699            hints.get("users"),
8700            Some(IndexHint::IndexedBy(name)) if name == "idx_users_email"
8701        ));
8702        assert!(matches!(
8703            hints.get("u"),
8704            Some(IndexHint::IndexedBy(name)) if name == "idx_users_email"
8705        ));
8706        assert!(matches!(hints.get("events"), Some(IndexHint::NotIndexed)));
8707        assert!(matches!(hints.get("e"), Some(IndexHint::NotIndexed)));
8708    }
8709
8710    #[test]
8711    fn test_order_joins_with_hints_respects_not_indexed() {
8712        let tables = [table_stats("t1", 1000, 50000)];
8713        let idx = index_info("idx_t1_a", "t1", &["a"], false, 100);
8714        let terms = [eq_term("a")];
8715        let hints = BTreeMap::from([(canonical_table_key("t1"), IndexHint::NotIndexed)]);
8716
8717        let plan = order_joins_with_hints(&tables, &[idx], &terms, None, &[], Some(&hints), None);
8718        assert_eq!(plan.join_order, vec!["t1".to_owned()]);
8719        assert_eq!(plan.access_paths.len(), 1);
8720        assert!(matches!(
8721            plan.access_paths[0].kind,
8722            AccessPathKind::FullTableScan
8723        ));
8724    }
8725
8726    #[test]
8727    fn test_order_joins_with_hints_respects_indexed_by() {
8728        let tables = [table_stats("t1", 2000, 100_000)];
8729        let fast = index_info("idx_fast", "t1", &["a"], false, 10);
8730        let slow = index_info("idx_slow", "t1", &["a"], false, 600);
8731        let terms = [eq_term("a")];
8732        let hints = BTreeMap::from([(
8733            canonical_table_key("t1"),
8734            IndexHint::IndexedBy("idx_slow".to_owned()),
8735        )]);
8736
8737        let plan = order_joins_with_hints(
8738            &tables,
8739            &[fast, slow],
8740            &terms,
8741            None,
8742            &[],
8743            Some(&hints),
8744            None,
8745        );
8746        assert_eq!(plan.access_paths.len(), 1);
8747        assert_eq!(plan.access_paths[0].index.as_deref(), Some("idx_slow"));
8748    }
8749
8750    #[test]
8751    fn test_order_joins_with_hints_reuses_cracking_store() {
8752        let tables = [table_stats("t1", 1000, 50000)];
8753        let idx_a = index_info("idx_a", "t1", &["a"], false, 40);
8754        let idx_b = index_info("idx_b", "t1", &["a"], false, 40);
8755        let terms = [eq_term("a")];
8756        let mut store = CrackingHintStore::default();
8757
8758        let first = order_joins_with_hints(
8759            &tables,
8760            &[idx_a.clone(), idx_b.clone()],
8761            &terms,
8762            None,
8763            &[],
8764            None,
8765            Some(&mut store),
8766        );
8767        assert_eq!(first.access_paths[0].index.as_deref(), Some("idx_a"));
8768        assert_eq!(store.preferred_index("t1"), Some("idx_a"));
8769
8770        let second = order_joins_with_hints(
8771            &tables,
8772            &[idx_b, idx_a],
8773            &terms,
8774            None,
8775            &[],
8776            None,
8777            Some(&mut store),
8778        );
8779        assert_eq!(second.access_paths[0].index.as_deref(), Some("idx_a"));
8780    }
8781
8782    #[test]
8783    fn test_planner_selects_covering_index() {
8784        let table = table_stats("t1", 1000, 50000);
8785        let idx = index_info("idx_t1_ab", "t1", &["a", "b"], false, 100);
8786        let terms = [eq_term("a")];
8787        let needed = ["a".to_owned(), "b".to_owned()];
8788        let ap = best_access_path(&table, &[idx], &terms, Some(&needed));
8789        assert!(matches!(ap.kind, AccessPathKind::CoveringIndexScan { .. }));
8790    }
8791
8792    #[test]
8793    fn test_planner_treats_rowid_projection_as_covering_index_payload() {
8794        let table = table_stats("t1", 1000, 50000);
8795        let idx = index_info("idx_t1_a", "t1", &["a"], false, 100);
8796        let terms = [eq_term("a")];
8797        let needed = ["rowid".to_owned()];
8798        let ap = best_access_path(&table, &[idx], &terms, Some(&needed));
8799        assert!(matches!(ap.kind, AccessPathKind::CoveringIndexScan { .. }));
8800    }
8801
8802    #[test]
8803    fn test_planner_heuristic_fallback() {
8804        // Without any indexes, should fall back to full table scan.
8805        let table = table_stats("t1", 100, 1000);
8806        let ap = best_access_path(&table, &[], &[], None);
8807        assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
8808        let expected = estimate_cost_ext(&AccessPathKind::FullTableScan, 100, 0, 1000);
8809        assert!((ap.estimated_cost - expected).abs() < 1e-9);
8810    }
8811
8812    #[test]
8813    fn test_query_plan_display() {
8814        let plan = QueryPlan {
8815            join_order: vec!["t1".to_owned(), "t2".to_owned()],
8816            access_paths: vec![
8817                AccessPath {
8818                    table: "t1".to_owned(),
8819                    kind: AccessPathKind::FullTableScan,
8820                    index: None,
8821                    estimated_cost: 100.0,
8822                    estimated_rows: 1000.0,
8823                    time_travel: None,
8824                    probe: None,
8825                },
8826                AccessPath {
8827                    table: "t2".to_owned(),
8828                    kind: AccessPathKind::IndexScanEquality,
8829                    index: Some("idx_t2".to_owned()),
8830                    estimated_cost: 15.0,
8831                    estimated_rows: 10.0,
8832                    time_travel: None,
8833                    probe: None,
8834                },
8835            ],
8836            join_segments: vec![JoinPlanSegment {
8837                relations: vec!["t1".to_owned(), "t2".to_owned()],
8838                operator: JoinOperator::HashJoin,
8839                estimated_cost: 115.0,
8840                reason: "2-way joins stay on pairwise hash join".to_owned(),
8841            }],
8842            total_cost: 115.0,
8843            morsel_eligibility: None,
8844        };
8845        let display = plan.to_string();
8846        assert!(display.contains("QUERY PLAN"));
8847        assert!(display.contains("SCAN t1"));
8848        assert!(display.contains("JOIN OPERATORS"));
8849        assert!(display.contains("HASH JOIN"));
8850        assert!(display.contains("USING INDEX idx_t2"));
8851    }
8852
8853    #[test]
8854    fn test_query_plan_display_mentions_leapfrog_operator() {
8855        let plan = QueryPlan {
8856            join_order: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
8857            access_paths: vec![],
8858            join_segments: vec![JoinPlanSegment {
8859                relations: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
8860                operator: JoinOperator::LeapfrogTriejoin,
8861                estimated_cost: 42.0,
8862                reason: "AGM estimate 42.0 beats hash cost 100.0; trie arity 1".to_owned(),
8863            }],
8864            total_cost: 42.0,
8865            morsel_eligibility: None,
8866        };
8867
8868        let display = plan.to_string();
8869        assert!(display.contains("LEAPFROG TRIEJOIN"));
8870        assert!(display.contains("JOIN OPERATORS"));
8871    }
8872
8873    #[test]
8874    fn test_morsel_eligibility_full_scan_large_table() {
8875        let plan = QueryPlan {
8876            join_order: vec!["big_table".to_owned()],
8877            access_paths: vec![AccessPath {
8878                table: "big_table".to_owned(),
8879                kind: AccessPathKind::FullTableScan,
8880                index: None,
8881                estimated_cost: 10000.0,
8882                estimated_rows: 100_000.0,
8883                time_travel: None,
8884                probe: None,
8885            }],
8886            join_segments: vec![],
8887            total_cost: 10000.0,
8888            morsel_eligibility: None,
8889        };
8890        let elig = MorselEligibility::evaluate(&plan, false, false, 8);
8891        assert!(
8892            elig.eligible,
8893            "bead_id=bd-b434d case=morsel_eligible_full_scan"
8894        );
8895        assert_eq!(elig.driving_table.as_deref(), Some("big_table"));
8896        assert!(elig.morsel_count > 1);
8897        assert!(elig.morsel_count <= 64);
8898        eprintln!(
8899            "INFO bead_id=bd-b434d case=morsel_eligible morsels={} rows_per={}",
8900            elig.morsel_count, elig.rows_per_morsel
8901        );
8902    }
8903
8904    #[test]
8905    fn test_morsel_eligibility_small_table_ineligible() {
8906        let plan = QueryPlan {
8907            join_order: vec!["small".to_owned()],
8908            access_paths: vec![AccessPath {
8909                table: "small".to_owned(),
8910                kind: AccessPathKind::FullTableScan,
8911                index: None,
8912                estimated_cost: 10.0,
8913                estimated_rows: 500.0,
8914                time_travel: None,
8915                probe: None,
8916            }],
8917            join_segments: vec![],
8918            total_cost: 10.0,
8919            morsel_eligibility: None,
8920        };
8921        let elig = MorselEligibility::evaluate(&plan, false, false, 8);
8922        assert!(!elig.eligible);
8923        assert_eq!(elig.reason, MorselIneligibleReason::TooFewRows);
8924    }
8925
8926    #[test]
8927    fn test_morsel_eligibility_index_scan_ineligible() {
8928        let plan = QueryPlan {
8929            join_order: vec!["t1".to_owned()],
8930            access_paths: vec![AccessPath {
8931                table: "t1".to_owned(),
8932                kind: AccessPathKind::IndexScanEquality,
8933                index: Some("idx".to_owned()),
8934                estimated_cost: 5.0,
8935                estimated_rows: 10000.0,
8936                time_travel: None,
8937                probe: None,
8938            }],
8939            join_segments: vec![],
8940            total_cost: 5.0,
8941            morsel_eligibility: None,
8942        };
8943        let elig = MorselEligibility::evaluate(&plan, false, false, 8);
8944        assert!(!elig.eligible);
8945        assert_eq!(elig.reason, MorselIneligibleReason::NoFullTableScan);
8946    }
8947
8948    #[test]
8949    fn test_morsel_eligibility_limit_ineligible() {
8950        let plan = QueryPlan {
8951            join_order: vec!["t1".to_owned()],
8952            access_paths: vec![AccessPath {
8953                table: "t1".to_owned(),
8954                kind: AccessPathKind::FullTableScan,
8955                index: None,
8956                estimated_cost: 1000.0,
8957                estimated_rows: 50000.0,
8958                time_travel: None,
8959                probe: None,
8960            }],
8961            join_segments: vec![],
8962            total_cost: 1000.0,
8963            morsel_eligibility: None,
8964        };
8965        let elig = MorselEligibility::evaluate(&plan, true, false, 8);
8966        assert!(!elig.eligible);
8967        assert_eq!(elig.reason, MorselIneligibleReason::HasLimit);
8968    }
8969
8970    #[test]
8971    fn test_best_access_path_rowid_lookup() {
8972        let table = table_stats("t1", 1024, 50000);
8973        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
8974            left: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
8975            op: AstBinaryOp::Eq,
8976            right: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
8977            span: Span::ZERO,
8978        }));
8979        let term = classify_where_term(expr);
8980        let ap = best_access_path(&table, &[], &[term], None);
8981        assert!(matches!(ap.kind, AccessPathKind::RowidLookup));
8982        // PLANNER-2: rowid lookup cost = log2(n_pages) + 1 * ROW_ACCESS_COST.
8983        let expected = estimate_cost_ext(&AccessPathKind::RowidLookup, 1024, 0, 50000);
8984        assert!((ap.estimated_cost - expected).abs() < 1e-9);
8985    }
8986
8987    #[test]
8988    fn test_best_access_path_ipk_oltp_shapes_without_schema_context() {
8989        let table = table_stats("bench", 128, 5000);
8990
8991        // The planner crate itself has no schema-aware INTEGER PRIMARY KEY
8992        // alias detection, so the mixed-OLTP benchmark's `id = ?1` shape is
8993        // still priced as a full scan until fsqlite-core upgrades it to a
8994        // rowid fast path after planning.
8995        let point = best_access_path(&table, &[], &[eq_term("id")], None);
8996        assert!(matches!(point.kind, AccessPathKind::FullTableScan));
8997
8998        let lower_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
8999            left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
9000            op: AstBinaryOp::Ge,
9001            right: Box::new(Expr::Literal(Literal::Integer(100), Span::ZERO)),
9002            span: Span::ZERO,
9003        }));
9004        let upper_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9005            left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
9006            op: AstBinaryOp::Lt,
9007            right: Box::new(Expr::Literal(Literal::Integer(150), Span::ZERO)),
9008            span: Span::ZERO,
9009        }));
9010        let range = best_access_path(
9011            &table,
9012            &[],
9013            &[
9014                classify_where_term(lower_expr),
9015                classify_where_term(upper_expr),
9016            ],
9017            None,
9018        );
9019        assert!(matches!(range.kind, AccessPathKind::FullTableScan));
9020
9021        let aggregate = best_access_path(&table, &[], &[], None);
9022        assert!(matches!(aggregate.kind, AccessPathKind::FullTableScan));
9023    }
9024
9025    #[test]
9026    fn test_best_access_path_ipk_alias_hint_uses_rowid_lookup() {
9027        let table = table_stats("bench", 128, 5000);
9028        let hints = [RowidAliasHint::new("id")];
9029
9030        let point =
9031            best_access_path_with_rowid_alias_hints(&table, &[], &[eq_term("id")], None, &hints);
9032
9033        assert!(matches!(point.kind, AccessPathKind::RowidLookup));
9034        assert_eq!(point.estimated_rows, 1.0);
9035        assert!(matches!(
9036            &point.probe,
9037            Some(AccessPathProbe::RowidEquality { target })
9038                if **target == Expr::Literal(Literal::Integer(1), Span::ZERO)
9039        ));
9040
9041        let range =
9042            best_access_path_with_rowid_alias_hints(&table, &[], &[range_term("id")], None, &hints);
9043        assert!(matches!(range.kind, AccessPathKind::IndexScanRange { .. }));
9044        assert!(range.index.is_none());
9045        assert!(matches!(
9046            &range.probe,
9047            Some(AccessPathProbe::Range {
9048                column,
9049                lower: Some(_),
9050                ..
9051            }) if column == "id"
9052        ));
9053    }
9054
9055    #[test]
9056    fn test_best_access_path_ipk_alias_hint_respects_qualifier() {
9057        let table = table_stats("bench", 128, 5000);
9058        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9059            left: Box::new(Expr::Column(ColumnRef::qualified("b", "id"), Span::ZERO)),
9060            op: AstBinaryOp::Eq,
9061            right: Box::new(Expr::Literal(Literal::Integer(7), Span::ZERO)),
9062            span: Span::ZERO,
9063        }));
9064        let terms = [classify_where_term(expr)];
9065
9066        let table_only = [RowidAliasHint::new("id")];
9067        let miss = best_access_path_with_rowid_alias_hints(&table, &[], &terms, None, &table_only);
9068        assert!(matches!(miss.kind, AccessPathKind::FullTableScan));
9069
9070        let qualified = [RowidAliasHint::qualified("b", "id")];
9071        let hit = best_access_path_with_rowid_alias_hints(&table, &[], &terms, None, &qualified);
9072        assert!(matches!(hit.kind, AccessPathKind::RowidLookup));
9073    }
9074
9075    #[test]
9076    fn test_analyze_stats_override() {
9077        // With ANALYZE stats, the source is recorded.
9078        let table = TableStats {
9079            name: "t1".to_owned(),
9080            n_pages: 500,
9081            n_rows: 10000,
9082            source: StatsSource::Analyze,
9083        };
9084        assert_eq!(table.source, StatsSource::Analyze);
9085        let ap = best_access_path(&table, &[], &[], None);
9086        assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
9087        let expected = estimate_cost_ext(&AccessPathKind::FullTableScan, 500, 0, 10000);
9088        assert!((ap.estimated_cost - expected).abs() < 1e-9);
9089    }
9090
9091    #[test]
9092    fn test_order_joins_empty() {
9093        let plan = order_joins(&[], &[], &[], None, &[]);
9094        assert!(plan.join_order.is_empty());
9095        assert!((plan.total_cost - 0.0).abs() < f64::EPSILON);
9096    }
9097
9098    // ===================================================================
9099    // Error Display / Error trait tests
9100    // ===================================================================
9101
9102    #[test]
9103    fn test_compound_order_by_error_display_zero_or_negative() {
9104        let err = CompoundOrderByError::IndexZeroOrNegative {
9105            value: -3,
9106            span: Span::ZERO,
9107        };
9108        let msg = err.to_string();
9109        assert!(msg.contains("-3"), "should contain the value: {msg}");
9110        assert!(
9111            msg.contains("must be positive"),
9112            "should say must be positive: {msg}"
9113        );
9114    }
9115
9116    #[test]
9117    fn test_compound_order_by_error_is_error() {
9118        let err = CompoundOrderByError::ColumnNotFound {
9119            name: "x".to_owned(),
9120            span: Span::ZERO,
9121        };
9122        // std::error::Error is implemented — verify source() returns None (leaf error).
9123        assert!(std::error::Error::source(&err).is_none());
9124    }
9125
9126    #[test]
9127    fn test_single_table_projection_error_display_all_variants() {
9128        let cases: Vec<(SingleTableProjectionError, &str)> = vec![
9129            (SingleTableProjectionError::NotSelectCore, "SELECT core"),
9130            (SingleTableProjectionError::MissingFromClause, "FROM clause"),
9131            (
9132                SingleTableProjectionError::UnsupportedFromSource,
9133                "single-table",
9134            ),
9135            (
9136                SingleTableProjectionError::UnknownTableQualifier {
9137                    qualifier: "bad".to_owned(),
9138                },
9139                "bad",
9140            ),
9141            (
9142                SingleTableProjectionError::ColumnNotFound {
9143                    column: "missing_col".to_owned(),
9144                },
9145                "missing_col",
9146            ),
9147        ];
9148        for (err, expected_fragment) in cases {
9149            let msg = err.to_string();
9150            assert!(
9151                msg.contains(expected_fragment),
9152                "{err:?} display should contain '{expected_fragment}': got '{msg}'"
9153            );
9154        }
9155    }
9156
9157    #[test]
9158    fn test_single_table_projection_error_is_error() {
9159        let err = SingleTableProjectionError::NotSelectCore;
9160        assert!(std::error::Error::source(&err).is_none());
9161    }
9162
9163    // ===================================================================
9164    // count_output_columns tests
9165    // ===================================================================
9166
9167    #[test]
9168    fn test_count_output_columns_select() {
9169        let core = select_core_with_aliases(&["a", "b", "c"]);
9170        assert_eq!(count_output_columns(&core), 3);
9171    }
9172
9173    #[test]
9174    fn test_count_output_columns_values() {
9175        let core = SelectCore::Values(vec![vec![
9176            Expr::Literal(Literal::Integer(1), Span::ZERO),
9177            Expr::Literal(Literal::Integer(2), Span::ZERO),
9178        ]]);
9179        assert_eq!(count_output_columns(&core), 2);
9180    }
9181
9182    #[test]
9183    fn test_count_output_columns_empty_values() {
9184        let core = SelectCore::Values(vec![]);
9185        assert_eq!(count_output_columns(&core), 0);
9186    }
9187
9188    // ===================================================================
9189    // extract_output_aliases edge cases
9190    // ===================================================================
9191
9192    #[test]
9193    fn test_extract_output_aliases_star_is_none() {
9194        let core = SelectCore::Select {
9195            distinct: Distinctness::All,
9196            columns: vec![ResultColumn::Star],
9197            from: None,
9198            where_clause: None,
9199            group_by: vec![],
9200            having: None,
9201            windows: vec![],
9202        };
9203        let aliases = extract_output_aliases(&core);
9204        assert_eq!(aliases, vec![None]);
9205    }
9206
9207    #[test]
9208    fn test_extract_output_aliases_expression_no_alias() {
9209        // SELECT 1+2 (expression, no alias) → None
9210        let core = SelectCore::Select {
9211            distinct: Distinctness::All,
9212            columns: vec![ResultColumn::Expr {
9213                expr: Expr::BinaryOp {
9214                    left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9215                    op: fsqlite_ast::BinaryOp::Add,
9216                    right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
9217                    span: Span::ZERO,
9218                },
9219                alias: None,
9220            }],
9221            from: None,
9222            where_clause: None,
9223            group_by: vec![],
9224            having: None,
9225            windows: vec![],
9226        };
9227        let aliases = extract_output_aliases(&core);
9228        assert_eq!(aliases, vec![None]);
9229    }
9230
9231    // ===================================================================
9232    // resolve_single_table_result_columns edge cases
9233    // ===================================================================
9234
9235    #[test]
9236    fn test_resolve_projection_values_core_error() {
9237        let core = SelectCore::Values(vec![vec![Expr::Literal(Literal::Integer(1), Span::ZERO)]]);
9238        let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
9239            .expect_err("VALUES should fail");
9240        assert_eq!(err, SingleTableProjectionError::NotSelectCore);
9241    }
9242
9243    #[test]
9244    fn test_resolve_projection_missing_from_error() {
9245        let core = SelectCore::Select {
9246            distinct: Distinctness::All,
9247            columns: vec![ResultColumn::Star],
9248            from: None,
9249            where_clause: None,
9250            group_by: vec![],
9251            having: None,
9252            windows: vec![],
9253        };
9254        let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
9255            .expect_err("missing FROM should fail");
9256        assert_eq!(err, SingleTableProjectionError::MissingFromClause);
9257    }
9258
9259    #[test]
9260    fn test_resolve_projection_with_joins_error() {
9261        use fsqlite_ast::{JoinClause, JoinKind, JoinType};
9262        let core = SelectCore::Select {
9263            distinct: Distinctness::All,
9264            columns: vec![ResultColumn::Star],
9265            from: Some(FromClause {
9266                source: TableOrSubquery::Table {
9267                    name: QualifiedName::bare("t"),
9268                    alias: None,
9269                    index_hint: None,
9270                    time_travel: None,
9271                },
9272                joins: vec![JoinClause {
9273                    join_type: JoinType {
9274                        kind: JoinKind::Inner,
9275                        natural: false,
9276                    },
9277                    table: TableOrSubquery::Table {
9278                        name: QualifiedName::bare("u"),
9279                        alias: None,
9280                        index_hint: None,
9281                        time_travel: None,
9282                    },
9283                    constraint: None,
9284                }],
9285            }),
9286            where_clause: None,
9287            group_by: vec![],
9288            having: None,
9289            windows: vec![],
9290        };
9291        let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
9292            .expect_err("JOIN should fail");
9293        assert_eq!(err, SingleTableProjectionError::UnsupportedFromSource);
9294    }
9295
9296    #[test]
9297    fn test_resolve_projection_unknown_table_qualifier() {
9298        let core = select_core_single_table(
9299            vec![ResultColumn::TableStar(QualifiedName::bare("wrong_table"))],
9300            "t",
9301            None,
9302        );
9303        let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
9304            .expect_err("wrong qualifier should fail");
9305        assert_eq!(
9306            err,
9307            SingleTableProjectionError::UnknownTableQualifier {
9308                qualifier: "wrong_table".to_owned()
9309            }
9310        );
9311    }
9312
9313    #[test]
9314    fn test_resolve_projection_qualified_column_wrong_table() {
9315        let core = select_core_single_table(
9316            vec![ResultColumn::Expr {
9317                expr: Expr::Column(ColumnRef::qualified("other", "a"), Span::ZERO),
9318                alias: None,
9319            }],
9320            "t",
9321            None,
9322        );
9323        let err = resolve_single_table_result_columns(&core, &["a".to_owned()])
9324            .expect_err("wrong table qualifier should fail");
9325        assert!(matches!(
9326            err,
9327            SingleTableProjectionError::UnknownTableQualifier { .. }
9328        ));
9329    }
9330
9331    #[test]
9332    fn test_resolve_projection_preserves_expression() {
9333        // Non-column expressions should be preserved as-is.
9334        let core = select_core_single_table(
9335            vec![ResultColumn::Expr {
9336                expr: Expr::Literal(Literal::Integer(42), Span::ZERO),
9337                alias: Some("answer".to_owned()),
9338            }],
9339            "t",
9340            None,
9341        );
9342        let resolved = resolve_single_table_result_columns(&core, &["a".to_owned()])
9343            .expect("expression should be preserved");
9344        assert_eq!(resolved.len(), 1);
9345        assert!(matches!(
9346            &resolved[0],
9347            ResultColumn::Expr {
9348                alias: Some(a), ..
9349            } if a == "answer"
9350        ));
9351    }
9352
9353    // ===================================================================
9354    // classify_where_term edge cases
9355    // ===================================================================
9356
9357    #[test]
9358    fn test_classify_where_term_between() {
9359        let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
9360            expr: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
9361            low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9362            high: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
9363            not: false,
9364            span: Span::ZERO,
9365        }));
9366        let term = classify_where_term(expr);
9367        assert!(matches!(term.kind, WhereTermKind::Between));
9368        assert_eq!(term.column.as_ref().unwrap().column, "x");
9369    }
9370
9371    #[test]
9372    fn test_classify_where_term_not_between_is_other() {
9373        let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
9374            expr: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
9375            low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9376            high: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
9377            not: true,
9378            span: Span::ZERO,
9379        }));
9380        let term = classify_where_term(expr);
9381        assert!(matches!(term.kind, WhereTermKind::Other));
9382    }
9383
9384    #[test]
9385    fn test_classify_where_term_in_list() {
9386        let term = in_term("col", 5);
9387        assert!(matches!(term.kind, WhereTermKind::InList { count: 5 }));
9388        assert_eq!(term.column.as_ref().unwrap().column, "col");
9389    }
9390
9391    #[test]
9392    fn test_classify_where_term_not_in_is_other() {
9393        let expr: &'static Expr = Box::leak(Box::new(Expr::In {
9394            expr: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
9395            set: InSet::List(vec![Expr::Literal(Literal::Integer(1), Span::ZERO)]),
9396            not: true,
9397            span: Span::ZERO,
9398        }));
9399        let term = classify_where_term(expr);
9400        assert!(matches!(term.kind, WhereTermKind::Other));
9401    }
9402
9403    #[test]
9404    fn test_classify_where_term_like_is_other() {
9405        // ASCII LIKE prefixes remain unsafe because default SQLite LIKE folds
9406        // ASCII case, so range lowering would miss rows like 'ABC...'.
9407        let term = like_term("name", "abc%");
9408        assert!(matches!(term.kind, WhereTermKind::Other));
9409
9410        let term = like_term("name", "%wildcard");
9411        assert!(matches!(term.kind, WhereTermKind::Other));
9412    }
9413
9414    #[test]
9415    fn test_classify_where_term_like_case_stable_prefix() {
9416        let term = like_term("name", "123%");
9417        assert!(matches!(
9418            term.kind,
9419            WhereTermKind::LikePrefix {
9420                ref prefix,
9421                upper_bound: Some(ref upper_bound),
9422            } if prefix == "123" && upper_bound == "124"
9423        ));
9424        assert_eq!(term.column.as_ref().unwrap().column, "name");
9425    }
9426
9427    #[test]
9428    fn test_classify_where_term_like_escape_case_stable_prefix() {
9429        let term = like_term_with_escape("name", "123\\%%", "\\");
9430        assert!(matches!(
9431            term.kind,
9432            WhereTermKind::LikePrefix {
9433                ref prefix,
9434                upper_bound: Some(ref upper_bound),
9435            } if prefix == "123%" && upper_bound == "123&"
9436        ));
9437        assert_eq!(term.column.as_ref().unwrap().column, "name");
9438    }
9439
9440    #[test]
9441    fn test_classify_where_term_like_escape_ascii_prefix_is_other() {
9442        let term = like_term_with_escape("name", "abc\\%%", "\\");
9443        assert!(matches!(term.kind, WhereTermKind::Other));
9444    }
9445
9446    #[test]
9447    fn test_classify_where_term_glob_prefix() {
9448        let term = glob_term("name", "abc*");
9449        assert!(matches!(
9450            term.kind,
9451            WhereTermKind::LikePrefix {
9452                ref prefix,
9453                upper_bound: Some(ref upper_bound),
9454            } if prefix == "abc" && upper_bound == "abd"
9455        ));
9456        assert_eq!(term.column.as_ref().unwrap().column, "name");
9457    }
9458
9459    #[test]
9460    fn test_classify_where_term_glob_no_prefix_is_other() {
9461        let term = glob_term("name", "*wildcard");
9462        assert!(matches!(term.kind, WhereTermKind::Other));
9463    }
9464
9465    #[test]
9466    fn test_classify_where_term_eq_null_is_other() {
9467        // `col = NULL` is always NULL (unknown) in SQL — not a usable equality.
9468        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9469            left: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
9470            op: AstBinaryOp::Eq,
9471            right: Box::new(Expr::Literal(Literal::Null, Span::ZERO)),
9472            span: Span::ZERO,
9473        }));
9474        let term = classify_where_term(expr);
9475        assert!(
9476            matches!(term.kind, WhereTermKind::Other),
9477            "col = NULL should be Other, got {:?}",
9478            term.kind
9479        );
9480
9481        // Also check NULL = col (reversed)
9482        let expr2: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9483            left: Box::new(Expr::Literal(Literal::Null, Span::ZERO)),
9484            op: AstBinaryOp::Eq,
9485            right: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
9486            span: Span::ZERO,
9487        }));
9488        let term2 = classify_where_term(expr2);
9489        assert!(
9490            matches!(term2.kind, WhereTermKind::Other),
9491            "NULL = col should be Other, got {:?}",
9492            term2.kind
9493        );
9494    }
9495
9496    #[test]
9497    fn test_classify_where_term_rowid_aliases() {
9498        // _rowid_ and oid are also rowid aliases
9499        for alias in &["_rowid_", "oid", "ROWID", "OID"] {
9500            let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9501                left: Box::new(Expr::Column(ColumnRef::bare(*alias), Span::ZERO)),
9502                op: AstBinaryOp::Eq,
9503                right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9504                span: Span::ZERO,
9505            }));
9506            let term = classify_where_term(expr);
9507            assert!(
9508                matches!(term.kind, WhereTermKind::RowidEquality),
9509                "'{alias}' should be classified as RowidEquality"
9510            );
9511        }
9512    }
9513
9514    #[test]
9515    fn test_classify_where_term_reversed_equality() {
9516        // expr = col (column on the right side)
9517        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9518            left: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
9519            op: AstBinaryOp::Eq,
9520            right: Box::new(Expr::Column(ColumnRef::bare("x"), Span::ZERO)),
9521            span: Span::ZERO,
9522        }));
9523        let term = classify_where_term(expr);
9524        assert!(matches!(term.kind, WhereTermKind::Equality));
9525        assert_eq!(term.column.as_ref().unwrap().column, "x");
9526    }
9527
9528    #[test]
9529    fn test_classify_where_term_reversed_rowid_equality() {
9530        // 42 = rowid (column on the right side)
9531        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9532            left: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
9533            op: AstBinaryOp::Eq,
9534            right: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
9535            span: Span::ZERO,
9536        }));
9537        let term = classify_where_term(expr);
9538        assert!(matches!(term.kind, WhereTermKind::RowidEquality));
9539    }
9540
9541    #[test]
9542    fn test_classify_where_term_eq_no_columns_is_other() {
9543        // 1 = 2 (no columns on either side)
9544        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9545            left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9546            op: AstBinaryOp::Eq,
9547            right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
9548            span: Span::ZERO,
9549        }));
9550        let term = classify_where_term(expr);
9551        assert!(matches!(term.kind, WhereTermKind::Other));
9552        assert!(term.column.is_none());
9553    }
9554
9555    #[test]
9556    fn test_classify_where_term_generic_fallback() {
9557        // OR expression → Other
9558        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9559            left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9560            op: AstBinaryOp::Or,
9561            right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
9562            span: Span::ZERO,
9563        }));
9564        let term = classify_where_term(expr);
9565        assert!(matches!(term.kind, WhereTermKind::Other));
9566    }
9567
9568    #[test]
9569    fn test_classify_where_term_or_same_column_becomes_in_list() {
9570        let term = or_eq_term("a", &[1, 2, 3]);
9571        assert!(matches!(term.kind, WhereTermKind::InList { count: 3 }));
9572        assert_eq!(term.column.as_ref().map(|c| c.column.as_str()), Some("a"));
9573    }
9574
9575    #[test]
9576    fn test_classify_where_term_or_reversed_equalities_becomes_in_list() {
9577        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9578            left: Box::new(Expr::BinaryOp {
9579                left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9580                op: AstBinaryOp::Eq,
9581                right: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
9582                span: Span::ZERO,
9583            }),
9584            op: AstBinaryOp::Or,
9585            right: Box::new(Expr::BinaryOp {
9586                left: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
9587                op: AstBinaryOp::Eq,
9588                right: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
9589                span: Span::ZERO,
9590            }),
9591            span: Span::ZERO,
9592        }));
9593
9594        let term = classify_where_term(expr);
9595        assert!(matches!(term.kind, WhereTermKind::InList { count: 2 }));
9596        assert_eq!(term.column.as_ref().map(|c| c.column.as_str()), Some("a"));
9597    }
9598
9599    #[test]
9600    fn test_classify_where_term_or_mixed_columns_is_other() {
9601        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
9602            left: Box::new(Expr::BinaryOp {
9603                left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
9604                op: AstBinaryOp::Eq,
9605                right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9606                span: Span::ZERO,
9607            }),
9608            op: AstBinaryOp::Or,
9609            right: Box::new(Expr::BinaryOp {
9610                left: Box::new(Expr::Column(ColumnRef::bare("b"), Span::ZERO)),
9611                op: AstBinaryOp::Eq,
9612                right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
9613                span: Span::ZERO,
9614            }),
9615            span: Span::ZERO,
9616        }));
9617
9618        let term = classify_where_term(expr);
9619        assert!(matches!(term.kind, WhereTermKind::Other));
9620    }
9621
9622    // ===================================================================
9623    // decompose_where edge cases
9624    // ===================================================================
9625
9626    #[test]
9627    fn test_decompose_where_nested_and() {
9628        // (a = 1 AND b = 2) AND c = 3 → 3 terms
9629        let inner = Expr::BinaryOp {
9630            left: Box::new(Expr::BinaryOp {
9631                left: Box::new(Expr::BinaryOp {
9632                    left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
9633                    op: AstBinaryOp::Eq,
9634                    right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9635                    span: Span::ZERO,
9636                }),
9637                op: AstBinaryOp::And,
9638                right: Box::new(Expr::BinaryOp {
9639                    left: Box::new(Expr::Column(ColumnRef::bare("b"), Span::ZERO)),
9640                    op: AstBinaryOp::Eq,
9641                    right: Box::new(Expr::Literal(Literal::Integer(2), Span::ZERO)),
9642                    span: Span::ZERO,
9643                }),
9644                span: Span::ZERO,
9645            }),
9646            op: AstBinaryOp::And,
9647            right: Box::new(Expr::BinaryOp {
9648                left: Box::new(Expr::Column(ColumnRef::bare("c"), Span::ZERO)),
9649                op: AstBinaryOp::Eq,
9650                right: Box::new(Expr::Literal(Literal::Integer(3), Span::ZERO)),
9651                span: Span::ZERO,
9652            }),
9653            span: Span::ZERO,
9654        };
9655        let terms = decompose_where(&inner);
9656        assert_eq!(terms.len(), 3);
9657    }
9658
9659    #[test]
9660    fn test_decompose_where_single_term() {
9661        let expr = Expr::BinaryOp {
9662            left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
9663            op: AstBinaryOp::Eq,
9664            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9665            span: Span::ZERO,
9666        };
9667        let terms = decompose_where(&expr);
9668        assert_eq!(terms.len(), 1);
9669    }
9670
9671    #[test]
9672    fn test_extract_glob_prefix_star_wildcard() {
9673        // "abc*" → prefix = "abc" (pure trailing-star prefix)
9674        let pat = Expr::Literal(Literal::String("abc*".to_owned()), Span::ZERO);
9675        assert_eq!(extract_glob_prefix(&pat), Some("abc".to_owned()));
9676    }
9677
9678    #[test]
9679    fn test_extract_glob_prefix_rejects_non_terminal_wildcards() {
9680        let embedded_star = Expr::Literal(Literal::String("abc*def".to_owned()), Span::ZERO);
9681        assert_eq!(extract_glob_prefix(&embedded_star), None);
9682
9683        let char_class = Expr::Literal(Literal::String("abc[0-9]".to_owned()), Span::ZERO);
9684        assert_eq!(extract_glob_prefix(&char_class), None);
9685    }
9686
9687    #[test]
9688    fn test_extract_glob_prefix_non_string_expr() {
9689        // Non-string expression → None
9690        let pat = Expr::Literal(Literal::Integer(42), Span::ZERO);
9691        assert_eq!(extract_glob_prefix(&pat), None);
9692    }
9693
9694    // ===================================================================
9695    // LIKE prefix extraction (bd-wwqen.6)
9696    // ===================================================================
9697
9698    #[test]
9699    fn test_extract_like_prefix_percent_wildcard() {
9700        // "abc%" → prefix = "abc" (pure trailing-percent prefix)
9701        let pat = Expr::Literal(Literal::String("abc%".to_owned()), Span::ZERO);
9702        assert_eq!(extract_like_prefix(&pat, None), Some("abc".to_owned()));
9703    }
9704
9705    #[test]
9706    fn test_extract_like_prefix_rejects_non_terminal_or_single_char_wildcards() {
9707        let embedded_percent = Expr::Literal(Literal::String("abc%def".to_owned()), Span::ZERO);
9708        assert_eq!(extract_like_prefix(&embedded_percent, None), None);
9709
9710        let underscore = Expr::Literal(Literal::String("abc_def".to_owned()), Span::ZERO);
9711        assert_eq!(extract_like_prefix(&underscore, None), None);
9712    }
9713
9714    #[test]
9715    fn test_extract_like_prefix_starts_with_wildcard() {
9716        // "%abc" → None (no constant prefix)
9717        let pat = Expr::Literal(Literal::String("%abc".to_owned()), Span::ZERO);
9718        assert_eq!(extract_like_prefix(&pat, None), None);
9719
9720        // "_abc" → None (no constant prefix)
9721        let pat2 = Expr::Literal(Literal::String("_abc".to_owned()), Span::ZERO);
9722        assert_eq!(extract_like_prefix(&pat2, None), None);
9723    }
9724
9725    #[test]
9726    fn test_extract_like_prefix_with_escape_percent_in_prefix() {
9727        let pat = Expr::Literal(Literal::String("123\\%%".to_owned()), Span::ZERO);
9728        let esc = Expr::Literal(Literal::String("\\".to_owned()), Span::ZERO);
9729        assert_eq!(
9730            extract_like_prefix(&pat, Some(&esc)),
9731            Some("123%".to_owned())
9732        );
9733    }
9734
9735    #[test]
9736    fn test_extract_like_prefix_with_escape_underscore_in_prefix() {
9737        let pat = Expr::Literal(Literal::String("123!_%".to_owned()), Span::ZERO);
9738        let esc = Expr::Literal(Literal::String("!".to_owned()), Span::ZERO);
9739        assert_eq!(
9740            extract_like_prefix(&pat, Some(&esc)),
9741            Some("123_".to_owned())
9742        );
9743    }
9744
9745    #[test]
9746    fn test_extract_like_prefix_with_invalid_escape_literal() {
9747        let pat = Expr::Literal(Literal::String("123\\%%".to_owned()), Span::ZERO);
9748        let esc = Expr::Literal(Literal::String("xx".to_owned()), Span::ZERO);
9749        assert_eq!(extract_like_prefix(&pat, Some(&esc)), None);
9750    }
9751
9752    #[test]
9753    fn test_extract_like_prefix_non_string_expr() {
9754        let pat = Expr::Literal(Literal::Integer(42), Span::ZERO);
9755        assert_eq!(extract_like_prefix(&pat, None), None);
9756    }
9757
9758    #[test]
9759    fn test_extract_like_prefix_exact_match() {
9760        // "abc" (no wildcards) is not a prefix-range probe.
9761        let pat = Expr::Literal(Literal::String("abc".to_owned()), Span::ZERO);
9762        assert_eq!(extract_like_prefix(&pat, None), None);
9763    }
9764
9765    // ===================================================================
9766    // Join ordering / star query edge cases
9767    // ===================================================================
9768
9769    #[test]
9770    fn test_detect_star_query_too_few_tables() {
9771        let tables = [table_stats("t1", 100, 1000), table_stats("t2", 100, 1000)];
9772        let terms = [join_term("t1", "id", "t2", "fk")];
9773        assert!(!detect_star_query(&tables, &terms));
9774    }
9775
9776    #[test]
9777    fn test_mx_choice_zero_tables() {
9778        assert_eq!(compute_mx_choice(0, false), 1);
9779    }
9780
9781    // ===================================================================
9782    // best_access_path edge cases
9783    // ===================================================================
9784
9785    #[test]
9786    fn test_best_access_path_unique_index_equality() {
9787        let table = table_stats("t1", 1000, 50000);
9788        let idx = index_info("idx_pk", "t1", &["id"], true, 100);
9789        let terms = [eq_term("id")];
9790        let ap = best_access_path(&table, &[idx], &terms, None);
9791        // Unique index equality → estimated_rows = 1.0
9792        assert!(
9793            (ap.estimated_rows - 1.0).abs() < f64::EPSILON,
9794            "unique index equality should return 1 row, got {}",
9795            ap.estimated_rows
9796        );
9797    }
9798
9799    #[test]
9800    fn test_best_access_path_in_expansion() {
9801        let table = table_stats("t1", 100, 1000);
9802        let idx = index_info("idx_col", "t1", &["col"], false, 20);
9803        let terms = [in_term("col", 3)];
9804        let ap = best_access_path(&table, &[idx], &terms, None);
9805        assert!(matches!(ap.kind, AccessPathKind::IndexScanEquality));
9806        assert!(ap.index.is_some());
9807    }
9808
9809    #[test]
9810    fn test_best_access_path_like_no_index() {
9811        let table = table_stats("t1", 100, 1000);
9812        let idx = index_info("idx_name", "t1", &["name"], false, 20);
9813        let terms = [like_term("name", "Jo%")];
9814        let ap = best_access_path(&table, &[idx], &terms, None);
9815        // ASCII LIKE prefixes remain unsafe under default SQLite semantics, so
9816        // a full table scan is expected.
9817        assert!(
9818            matches!(ap.kind, AccessPathKind::FullTableScan),
9819            "LIKE should fall back to full scan, got {:?}",
9820            ap.kind
9821        );
9822    }
9823
9824    #[test]
9825    fn test_best_access_path_like_case_stable_prefix_uses_index_scan() {
9826        let table = table_stats("t1", 100, 1000);
9827        let idx = index_info("idx_name", "t1", &["name"], false, 20);
9828        let terms = [like_term("name", "123%")];
9829        let ap = best_access_path(&table, &[idx], &terms, None);
9830        assert!(
9831            matches!(ap.kind, AccessPathKind::IndexScanRange { .. }),
9832            "case-stable LIKE prefix should use index scan, got {:?}",
9833            ap.kind
9834        );
9835    }
9836
9837    #[test]
9838    fn test_best_access_path_like_escape_case_stable_prefix_uses_index_scan() {
9839        let table = table_stats("t1", 100, 1000);
9840        let idx = index_info("idx_name", "t1", &["name"], false, 20);
9841        let terms = [like_term_with_escape("name", "123\\%%", "\\")];
9842        let ap = best_access_path(&table, &[idx], &terms, None);
9843        assert!(
9844            matches!(ap.kind, AccessPathKind::IndexScanRange { .. }),
9845            "escaped case-stable LIKE prefix should use index scan, got {:?}",
9846            ap.kind
9847        );
9848    }
9849
9850    #[test]
9851    fn test_best_access_path_glob_prefix() {
9852        let table = table_stats("t1", 100, 1000);
9853        let idx = index_info("idx_name", "t1", &["name"], false, 20);
9854        let terms = [glob_term("name", "Jo*")];
9855        let ap = best_access_path(&table, &[idx], &terms, None);
9856        // GLOB prefix should use index range scan
9857        assert!(
9858            matches!(
9859                ap.kind,
9860                AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
9861            ),
9862            "GLOB prefix should use index scan, got {:?}",
9863            ap.kind
9864        );
9865    }
9866
9867    #[test]
9868    fn test_best_access_path_between_range() {
9869        let table = table_stats("t1", 100, 1000);
9870        let idx = index_info("idx_a", "t1", &["a"], false, 20);
9871        let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
9872            expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
9873            low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
9874            high: Box::new(Expr::Literal(Literal::Integer(100), Span::ZERO)),
9875            not: false,
9876            span: Span::ZERO,
9877        }));
9878        let term = classify_where_term(expr);
9879        let ap = best_access_path(&table, &[idx], &[term], None);
9880        assert!(matches!(ap.kind, AccessPathKind::IndexScanRange { .. }));
9881    }
9882
9883    #[test]
9884    fn test_best_access_path_ignores_wrong_table_index() {
9885        // Index belongs to different table — should not be used.
9886        let table = table_stats("t1", 100, 1000);
9887        let idx = index_info("idx_other", "t2", &["a"], false, 20);
9888        let terms = [eq_term("a")];
9889        let ap = best_access_path(&table, &[idx], &terms, None);
9890        assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
9891    }
9892
9893    #[test]
9894    fn test_best_access_path_empty_index_columns() {
9895        // Index with no columns → not usable.
9896        let table = table_stats("t1", 100, 1000);
9897        let idx = IndexInfo {
9898            name: "idx_empty".to_owned(),
9899            table: "t1".to_owned(),
9900            columns: vec![],
9901            unique: false,
9902            n_pages: 10,
9903            source: StatsSource::Heuristic,
9904            partial_where: None,
9905            expression_columns: vec![],
9906        };
9907        let terms = [eq_term("a")];
9908        let ap = best_access_path(&table, &[idx], &terms, None);
9909        assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
9910    }
9911
9912    #[test]
9913    fn test_estimate_skip_scan_leading_distinct() {
9914        // (n_pages / SKIP_SCAN_PAGES_PER_LEADING_DISTINCT=8).max(1): proportional
9915        // to the page count, floored at 1.
9916        let idx = |n_pages: u64| index_info("idx", "t", &["a", "b"], false, n_pages);
9917        assert_eq!(estimate_skip_scan_leading_distinct(&idx(0)), 1); // floor
9918        assert_eq!(estimate_skip_scan_leading_distinct(&idx(7)), 1); // 7/8 = 0 -> 1
9919        assert_eq!(estimate_skip_scan_leading_distinct(&idx(8)), 1); // 8/8 = 1
9920        assert_eq!(estimate_skip_scan_leading_distinct(&idx(24)), 3); // 24/8 = 3
9921        assert_eq!(estimate_skip_scan_leading_distinct(&idx(80)), 10); // 80/8 = 10
9922    }
9923
9924    #[test]
9925    fn test_estimate_pairwise_hash_join_cost_left_deep_accumulation() {
9926        // Left-deep hash-join cost model: each join step charges build+probe
9927        // (scanning both inputs, written as min+max which equals their sum) and
9928        // grows the running intermediate cardinality by a factor of the join
9929        // selectivity heuristic (0.25). A single relation costs nothing.
9930        // estimate_pairwise_hash_join_cost has no direct unit test, only
9931        // indirect coverage inside best_access_path.
9932
9933        // Fewer than two relations: nothing to join, zero cost.
9934        assert!(estimate_pairwise_hash_join_cost(&["A".to_owned()], &HashMap::new()).abs() < 1e-9);
9935        let empty: Vec<String> = vec![];
9936        assert!(estimate_pairwise_hash_join_cost(&empty, &HashMap::new()).abs() < 1e-9);
9937
9938        let rows = |pairs: &[(&str, f64)]| -> HashMap<String, f64> {
9939            pairs.iter().map(|&(t, n)| (t.to_owned(), n)).collect()
9940        };
9941
9942        // Two relations A(100) |><| B(250): cost is just the two scans, 100+250,
9943        // independent of selectivity (the intermediate is never reused).
9944        let ab = estimate_pairwise_hash_join_cost(
9945            &["A".to_owned(), "B".to_owned()],
9946            &rows(&[("A", 100.0), ("B", 250.0)]),
9947        );
9948        assert!(
9949            (ab - 350.0).abs() < 1e-9,
9950            "two-table cost should be 100+250, got {ab}"
9951        );
9952
9953        // Three relations A(100), B(250), C(40): after A|><|B the intermediate is
9954        // 100*250*0.25 = 6250, so the third step charges 6250+40. Total =
9955        // (100+250) + (6250+40) = 6640.
9956        let abc = estimate_pairwise_hash_join_cost(
9957            &["A".to_owned(), "B".to_owned(), "C".to_owned()],
9958            &rows(&[("A", 100.0), ("B", 250.0), ("C", 40.0)]),
9959        );
9960        assert!(
9961            (abc - 6640.0).abs() < 1e-9,
9962            "three-table cost should be 6640, got {abc}"
9963        );
9964
9965        // Unknown tables default to 1 row (floored at 1.0): cost 1 + 1 = 2.
9966        let defaulted =
9967            estimate_pairwise_hash_join_cost(&["X".to_owned(), "Y".to_owned()], &HashMap::new());
9968        assert!(
9969            (defaulted - 2.0).abs() < 1e-9,
9970            "missing rows default to 1 -> 2, got {defaulted}"
9971        );
9972    }
9973
9974    #[test]
9975    fn test_estimate_agm_upper_bound_triangle_and_guards() {
9976        // The AGM (Atserias-Grohe-Marx) fractional-cover bound on worst-case join
9977        // output. The textbook case is the triangle query R(A,B) |><| S(B,C) |><|
9978        // T(A,C): every variable has degree 2, so each relation's exponent is
9979        // max(1/2, 1/2) = 1/2 and the bound is (N_R * N_S * N_T)^(1/2). With all
9980        // three relations at N=100 rows this is 100^(3/2) = 1000 -- the classic
9981        // sub-N^3 bound. estimate_agm_upper_bound has no direct unit test (only
9982        // indirect coverage through best_access_path).
9983        let triangle = TrieHypergraph {
9984            relation_variables: vec![vec![0, 1], vec![1, 2], vec![0, 2]],
9985            variable_count: 3,
9986            arity: 2,
9987        };
9988        let component = vec!["R".to_owned(), "S".to_owned(), "T".to_owned()];
9989        let mut rows: HashMap<String, f64> = HashMap::new();
9990        rows.insert("R".to_owned(), 100.0);
9991        rows.insert("S".to_owned(), 100.0);
9992        rows.insert("T".to_owned(), 100.0);
9993
9994        let bound = estimate_agm_upper_bound(&component, &rows, &triangle).unwrap();
9995        assert!(
9996            (bound - 1000.0).abs() < 1e-6,
9997            "triangle bound should be 100^1.5 = 1000, got {bound}"
9998        );
9999
10000        // A component whose length does not match the relation count is rejected.
10001        let two = vec!["R".to_owned(), "S".to_owned()];
10002        assert!(estimate_agm_upper_bound(&two, &rows, &triangle).is_none());
10003
10004        // An empty hypergraph (variable_count == 0) is rejected.
10005        let empty_hg = TrieHypergraph {
10006            relation_variables: vec![],
10007            variable_count: 0,
10008            arity: 0,
10009        };
10010        let empty_component: Vec<String> = vec![];
10011        assert!(estimate_agm_upper_bound(&empty_component, &rows, &empty_hg).is_none());
10012
10013        // Missing row counts default to 1 and the bound is floored at 1.0.
10014        let no_rows: HashMap<String, f64> = HashMap::new();
10015        let floored = estimate_agm_upper_bound(&component, &no_rows, &triangle).unwrap();
10016        assert!(
10017            (floored - 1.0).abs() < 1e-9,
10018            "missing row counts default to 1 -> bound 1.0, got {floored}"
10019        );
10020    }
10021
10022    #[test]
10023    fn test_best_access_path_skip_scan_on_low_cardinality_leading_column() {
10024        let table = TableStats {
10025            name: "users".to_owned(),
10026            n_pages: 4_096,
10027            n_rows: 2_000_000,
10028            source: StatsSource::Analyze,
10029        };
10030        let idx = IndexInfo {
10031            name: "idx_tenant_email".to_owned(),
10032            table: "users".to_owned(),
10033            columns: vec!["tenant_id".to_owned(), "email".to_owned()],
10034            unique: false,
10035            n_pages: 64,
10036            source: StatsSource::Analyze,
10037            partial_where: None,
10038            expression_columns: vec![],
10039        };
10040
10041        let ap = best_access_path(&table, &[idx], &[eq_term("email")], None);
10042        assert_eq!(ap.index.as_deref(), Some("idx_tenant_email"));
10043        assert!(matches!(
10044            ap.kind,
10045            AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
10046        ));
10047    }
10048
10049    #[test]
10050    fn test_best_access_path_skip_scan_allows_immediate_second_column_on_three_column_index() {
10051        let table = TableStats {
10052            name: "users".to_owned(),
10053            n_pages: 4_096,
10054            n_rows: 2_000_000,
10055            source: StatsSource::Analyze,
10056        };
10057        let idx = IndexInfo {
10058            name: "idx_tenant_region_email".to_owned(),
10059            table: "users".to_owned(),
10060            columns: vec![
10061                "tenant_id".to_owned(),
10062                "region_code".to_owned(),
10063                "email".to_owned(),
10064            ],
10065            unique: false,
10066            n_pages: 64,
10067            source: StatsSource::Analyze,
10068            partial_where: None,
10069            expression_columns: vec![],
10070        };
10071
10072        let ap = best_access_path(&table, &[idx], &[eq_term("region_code")], None);
10073        assert_eq!(ap.index.as_deref(), Some("idx_tenant_region_email"));
10074        assert!(matches!(
10075            ap.kind,
10076            AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
10077        ));
10078    }
10079
10080    #[test]
10081    fn test_best_access_path_skip_scan_rejects_gapped_trailing_column() {
10082        let table = TableStats {
10083            name: "users".to_owned(),
10084            n_pages: 4_096,
10085            n_rows: 2_000_000,
10086            source: StatsSource::Analyze,
10087        };
10088        let idx = IndexInfo {
10089            name: "idx_tenant_region_email".to_owned(),
10090            table: "users".to_owned(),
10091            columns: vec![
10092                "tenant_id".to_owned(),
10093                "region_code".to_owned(),
10094                "email".to_owned(),
10095            ],
10096            unique: false,
10097            n_pages: 64,
10098            source: StatsSource::Analyze,
10099            partial_where: None,
10100            expression_columns: vec![],
10101        };
10102
10103        let ap = best_access_path(&table, &[idx], &[eq_term("email")], None);
10104        assert!(
10105            matches!(ap.kind, AccessPathKind::FullTableScan),
10106            "gapped skip-scan should fall back to full scan until multi-prefix cardinality is modeled, got {:?}",
10107            ap.kind
10108        );
10109    }
10110
10111    #[test]
10112    fn test_skip_scan_candidate_second_column_equality_beats_range_ordering() {
10113        let table = TableStats {
10114            name: "users".to_owned(),
10115            n_pages: 4_096,
10116            n_rows: 2_000_000,
10117            source: StatsSource::Analyze,
10118        };
10119        let idx = IndexInfo {
10120            name: "idx_tenant_email".to_owned(),
10121            table: "users".to_owned(),
10122            columns: vec!["tenant_id".to_owned(), "email".to_owned()],
10123            unique: false,
10124            n_pages: 64,
10125            source: StatsSource::Analyze,
10126            partial_where: None,
10127            expression_columns: vec![],
10128        };
10129
10130        let candidate =
10131            analyze_skip_scan_candidate(&table, &idx, &[range_term("email"), eq_term("email")])
10132                .expect("second-column equality should remain a skip-scan candidate");
10133
10134        assert_eq!(candidate.leading_probes, 8);
10135        assert_eq!(candidate.trailing_probe_count, 1);
10136        assert_eq!(candidate.per_probe_selectivity, SKIP_SCAN_EQ_SELECTIVITY);
10137    }
10138
10139    #[test]
10140    fn test_skip_scan_candidate_second_column_in_beats_range_ordering() {
10141        let table = TableStats {
10142            name: "users".to_owned(),
10143            n_pages: 4_096,
10144            n_rows: 2_000_000,
10145            source: StatsSource::Analyze,
10146        };
10147        let idx = IndexInfo {
10148            name: "idx_tenant_email".to_owned(),
10149            table: "users".to_owned(),
10150            columns: vec!["tenant_id".to_owned(), "email".to_owned()],
10151            unique: false,
10152            n_pages: 64,
10153            source: StatsSource::Analyze,
10154            partial_where: None,
10155            expression_columns: vec![],
10156        };
10157
10158        let candidate =
10159            analyze_skip_scan_candidate(&table, &idx, &[range_term("email"), in_term("email", 3)])
10160                .expect("second-column IN-list should remain a skip-scan candidate");
10161
10162        assert_eq!(candidate.leading_probes, 8);
10163        assert_eq!(candidate.trailing_probe_count, 3);
10164        assert_eq!(candidate.per_probe_selectivity, SKIP_SCAN_EQ_SELECTIVITY);
10165    }
10166
10167    #[test]
10168    fn test_skip_scan_candidate_second_column_prefers_tighter_in_probe_count() -> Result<(), String>
10169    {
10170        let table = TableStats {
10171            name: "users".to_owned(),
10172            n_pages: 4_096,
10173            n_rows: 2_000_000,
10174            source: StatsSource::Analyze,
10175        };
10176        let idx = IndexInfo {
10177            name: "idx_tenant_email".to_owned(),
10178            table: "users".to_owned(),
10179            columns: ["tenant_id".to_owned(), "email".to_owned()]
10180                .into_iter()
10181                .collect(),
10182            unique: false,
10183            n_pages: 64,
10184            source: StatsSource::Analyze,
10185            partial_where: None,
10186            expression_columns: Vec::new(),
10187        };
10188
10189        let candidate =
10190            analyze_skip_scan_candidate(&table, &idx, &[in_term("email", 5), in_term("email", 2)])
10191                .ok_or_else(|| "expected skip-scan candidate".to_owned())?;
10192
10193        if candidate.leading_probes == 8
10194            && candidate.trailing_probe_count == 2
10195            && candidate.per_probe_selectivity == SKIP_SCAN_EQ_SELECTIVITY
10196        {
10197            return Ok(());
10198        }
10199
10200        Err("expected tighter second-column IN probe count".to_owned())
10201    }
10202    #[test]
10203    fn test_best_access_path_skip_scan_rejects_high_cardinality_leading_column() {
10204        let table = TableStats {
10205            name: "users".to_owned(),
10206            n_pages: 2_000,
10207            n_rows: 1_000_000,
10208            source: StatsSource::Analyze,
10209        };
10210        let idx = IndexInfo {
10211            name: "idx_region_email".to_owned(),
10212            table: "users".to_owned(),
10213            columns: vec!["region_code".to_owned(), "email".to_owned()],
10214            unique: false,
10215            n_pages: SKIP_SCAN_PAGES_PER_LEADING_DISTINCT * (SKIP_SCAN_MAX_LEADING_DISTINCT + 2),
10216            source: StatsSource::Analyze,
10217            partial_where: None,
10218            expression_columns: vec![],
10219        };
10220
10221        let ap = best_access_path(&table, &[idx], &[eq_term("email")], None);
10222        assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
10223    }
10224
10225    #[test]
10226    fn test_best_access_path_partial_index_requires_implied_predicate() {
10227        let table = table_stats("t1", 100, 1000);
10228        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10229        partial_idx.partial_where = Some(Expr::BinaryOp {
10230            left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10231            op: AstBinaryOp::Eq,
10232            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
10233            span: Span::ZERO,
10234        });
10235
10236        let ap_not_implied = best_access_path(
10237            &table,
10238            &[partial_idx.clone()],
10239            &[eq_term_value("a", 2)],
10240            None,
10241        );
10242        assert!(matches!(ap_not_implied.kind, AccessPathKind::FullTableScan));
10243
10244        let ap_implied = best_access_path(&table, &[partial_idx], &[eq_term_value("a", 1)], None);
10245        assert!(matches!(
10246            ap_implied.kind,
10247            AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
10248        ));
10249    }
10250
10251    #[test]
10252    fn test_best_access_path_partial_index_accepts_commuted_equality() {
10253        let table = table_stats("t1", 100, 1000);
10254        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10255        partial_idx.partial_where = Some(Expr::BinaryOp {
10256            left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10257            op: AstBinaryOp::Eq,
10258            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
10259            span: Span::ZERO,
10260        });
10261
10262        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
10263            left: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
10264            op: AstBinaryOp::Eq,
10265            right: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10266            span: Span::ZERO,
10267        }));
10268        let ap = best_access_path(&table, &[partial_idx], &[classify_where_term(expr)], None);
10269        assert!(matches!(
10270            ap.kind,
10271            AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
10272        ));
10273    }
10274
10275    #[test]
10276    fn test_best_access_path_partial_index_accepts_stronger_lower_bound() {
10277        let table = table_stats("t1", 100, 1000);
10278        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10279        partial_idx.partial_where = Some(Expr::BinaryOp {
10280            left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10281            op: AstBinaryOp::Gt,
10282            right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
10283            span: Span::ZERO,
10284        });
10285
10286        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
10287            left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10288            op: AstBinaryOp::Gt,
10289            right: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
10290            span: Span::ZERO,
10291        }));
10292        let ap = best_access_path(&table, &[partial_idx], &[classify_where_term(expr)], None);
10293        assert!(matches!(
10294            ap.kind,
10295            AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
10296        ));
10297    }
10298
10299    #[test]
10300    fn test_best_access_path_partial_index_rejects_weaker_lower_bound() {
10301        let table = table_stats("t1", 100, 1000);
10302        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10303        partial_idx.partial_where = Some(Expr::BinaryOp {
10304            left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10305            op: AstBinaryOp::Gt,
10306            right: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
10307            span: Span::ZERO,
10308        });
10309
10310        let expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
10311            left: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10312            op: AstBinaryOp::Gt,
10313            right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
10314            span: Span::ZERO,
10315        }));
10316        let ap = best_access_path(&table, &[partial_idx], &[classify_where_term(expr)], None);
10317        assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
10318    }
10319
10320    #[test]
10321    fn test_best_access_path_partial_index_accepts_is_not_null_from_equality() {
10322        let table = table_stats("t1", 100, 1000);
10323        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10324        partial_idx.partial_where = Some(Expr::IsNull {
10325            expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10326            not: true,
10327            span: Span::ZERO,
10328        });
10329
10330        let ap = best_access_path(&table, &[partial_idx], &[eq_term_value("a", 7)], None);
10331        assert!(matches!(
10332            ap.kind,
10333            AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
10334        ));
10335    }
10336
10337    #[test]
10338    fn test_best_access_path_partial_index_accepts_is_not_null_from_in_list() {
10339        let table = table_stats("t1", 100, 1000);
10340        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10341        partial_idx.partial_where = Some(Expr::IsNull {
10342            expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10343            not: true,
10344            span: Span::ZERO,
10345        });
10346
10347        let ap = best_access_path(&table, &[partial_idx], &[in_term("a", 3)], None);
10348        assert!(matches!(
10349            ap.kind,
10350            AccessPathKind::IndexScanEquality
10351                | AccessPathKind::IndexScanRange { .. }
10352                | AccessPathKind::CoveringIndexScan { .. }
10353        ));
10354    }
10355
10356    #[test]
10357    fn test_best_access_path_partial_index_accepts_is_not_null_from_like_prefix() {
10358        let table = table_stats("t1", 100, 1000);
10359        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10360        partial_idx.partial_where = Some(Expr::IsNull {
10361            expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10362            not: true,
10363            span: Span::ZERO,
10364        });
10365
10366        let ap = best_access_path(&table, &[partial_idx], &[like_term("a", "123%")], None);
10367        assert!(matches!(
10368            ap.kind,
10369            AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
10370        ));
10371    }
10372
10373    #[test]
10374    fn test_best_access_path_partial_index_accepts_is_not_null_from_glob_prefix() {
10375        let table = table_stats("t1", 100, 1000);
10376        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10377        partial_idx.partial_where = Some(Expr::IsNull {
10378            expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10379            not: true,
10380            span: Span::ZERO,
10381        });
10382
10383        let ap = best_access_path(&table, &[partial_idx], &[glob_term("a", "abc*")], None);
10384        assert!(matches!(
10385            ap.kind,
10386            AccessPathKind::IndexScanRange { .. } | AccessPathKind::CoveringIndexScan { .. }
10387        ));
10388    }
10389
10390    #[test]
10391    fn test_best_access_path_partial_index_accepts_is_not_null_from_or_disjunction() {
10392        let table = table_stats("t1", 100, 1000);
10393        let mut partial_idx = index_info("idx_partial_a", "t1", &["a"], false, 20);
10394        partial_idx.partial_where = Some(Expr::IsNull {
10395            expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10396            not: true,
10397            span: Span::ZERO,
10398        });
10399
10400        let ap = best_access_path(&table, &[partial_idx], &[or_eq_term("a", &[1, 2, 3])], None);
10401        assert_eq!(ap.index.as_deref(), Some("idx_partial_a"));
10402        assert!(matches!(
10403            ap.kind,
10404            AccessPathKind::IndexScanEquality | AccessPathKind::CoveringIndexScan { .. }
10405        ));
10406    }
10407
10408    #[test]
10409    fn test_best_access_path_respects_indexed_by_hint() {
10410        let table = table_stats("t1", 2000, 100_000);
10411        let fast = index_info("idx_fast", "t1", &["a"], false, 10);
10412        let slow = index_info("idx_slow", "t1", &["a"], false, 600);
10413        let terms = [eq_term("a")];
10414        let hint = IndexHint::IndexedBy("idx_slow".to_owned());
10415
10416        let ap =
10417            best_access_path_with_hints(&table, &[fast, slow], &terms, None, Some(&hint), None);
10418        assert_eq!(ap.index.as_deref(), Some("idx_slow"));
10419        assert!(matches!(
10420            ap.kind,
10421            AccessPathKind::IndexScanEquality
10422                | AccessPathKind::IndexScanRange { .. }
10423                | AccessPathKind::CoveringIndexScan { .. }
10424        ));
10425    }
10426
10427    #[test]
10428    fn test_best_access_path_respects_not_indexed_hint() {
10429        let table = table_stats("t1", 1024, 50000);
10430        let idx = index_info("idx_a", "t1", &["a"], false, 20);
10431        let rowid_expr: &'static Expr = Box::leak(Box::new(Expr::BinaryOp {
10432            left: Box::new(Expr::Column(ColumnRef::bare("rowid"), Span::ZERO)),
10433            op: AstBinaryOp::Eq,
10434            right: Box::new(Expr::Literal(Literal::Integer(42), Span::ZERO)),
10435            span: Span::ZERO,
10436        }));
10437        let rowid_term = classify_where_term(rowid_expr);
10438        let hint = IndexHint::NotIndexed;
10439
10440        let ap =
10441            best_access_path_with_hints(&table, &[idx], &[rowid_term], None, Some(&hint), None);
10442        assert!(matches!(ap.kind, AccessPathKind::FullTableScan));
10443        assert!(ap.index.is_none());
10444    }
10445
10446    #[test]
10447    fn test_cracking_hint_store_reuses_prior_index_choice() {
10448        let table = table_stats("t1", 1000, 50000);
10449        let idx_a = index_info("idx_a", "t1", &["a"], false, 40);
10450        let idx_b = index_info("idx_b", "t1", &["a"], false, 40);
10451        let terms = [eq_term("a")];
10452        let mut hint_store = CrackingHintStore::default();
10453
10454        let first = best_access_path_with_hints(
10455            &table,
10456            &[idx_a.clone(), idx_b.clone()],
10457            &terms,
10458            None,
10459            None,
10460            Some(&mut hint_store),
10461        );
10462        assert_eq!(first.index.as_deref(), Some("idx_a"));
10463        assert_eq!(hint_store.preferred_index("t1"), Some("idx_a"));
10464
10465        // Reverse candidate order; adaptive hint should bias back to idx_a.
10466        let second = best_access_path_with_hints(
10467            &table,
10468            &[idx_b, idx_a],
10469            &terms,
10470            None,
10471            None,
10472            Some(&mut hint_store),
10473        );
10474        assert_eq!(second.index.as_deref(), Some("idx_a"));
10475    }
10476
10477    #[test]
10478    fn test_index_selection_metric_counter_advances() {
10479        let table = table_stats("t1", 500, 10000);
10480        let idx = index_info("idx_a", "t1", &["a"], false, 20);
10481        let terms = [eq_term("a")];
10482        let before = snapshot_index_selection_totals()
10483            .get("index_scan_equality")
10484            .copied()
10485            .unwrap_or(0);
10486
10487        let _ = best_access_path(&table, &[idx], &terms, None);
10488
10489        let after = snapshot_index_selection_totals()
10490            .get("index_scan_equality")
10491            .copied()
10492            .unwrap_or(0);
10493        assert!(after > before);
10494    }
10495
10496    #[test]
10497    #[allow(clippy::too_many_lines)]
10498    fn planner_index_selection_e2e_replay_emits_artifact() {
10499        use fsqlite_ast::{JoinClause, JoinKind, JoinType};
10500
10501        const BEAD_ID: &str = "bd-1as.4";
10502        const DEFAULT_SCENARIO_ID: &str = "PLANNER-INDEX-1";
10503        const DEFAULT_SEED: u64 = 20_260_219;
10504
10505        let run_id =
10506            std::env::var("RUN_ID").unwrap_or_else(|_| format!("{BEAD_ID}-seed-{DEFAULT_SEED}"));
10507        let trace_id = std::env::var("TRACE_ID")
10508            .ok()
10509            .and_then(|value| value.parse::<u64>().ok())
10510            .unwrap_or(DEFAULT_SEED);
10511        let scenario_id =
10512            std::env::var("SCENARIO_ID").unwrap_or_else(|_| DEFAULT_SCENARIO_ID.to_owned());
10513        let seed = std::env::var("SEED")
10514            .ok()
10515            .and_then(|value| value.parse::<u64>().ok())
10516            .unwrap_or(DEFAULT_SEED);
10517
10518        let artifact_path = std::env::var("FSQLITE_PLANNER_INDEX_E2E_ARTIFACT").map_or_else(
10519            |_| {
10520                PathBuf::from("artifacts")
10521                    .join(BEAD_ID)
10522                    .join("planner_index_selection_e2e_artifact.json")
10523            },
10524            PathBuf::from,
10525        );
10526        if let Some(parent) = artifact_path.parent() {
10527            std::fs::create_dir_all(parent)
10528                .expect("bead_id={BEAD_ID} artifact directory should be writable");
10529        }
10530
10531        let started = Instant::now();
10532        let mut cracking_hints = CrackingHintStore::default();
10533        let before_metrics = snapshot_index_selection_totals();
10534
10535        let from = FromClause {
10536            source: TableOrSubquery::Table {
10537                name: QualifiedName::bare("users"),
10538                alias: Some("u".to_owned()),
10539                index_hint: Some(IndexHint::IndexedBy("idx_users_email".to_owned())),
10540                time_travel: None,
10541            },
10542            joins: vec![JoinClause {
10543                join_type: JoinType {
10544                    kind: JoinKind::Inner,
10545                    natural: false,
10546                },
10547                table: TableOrSubquery::Table {
10548                    name: QualifiedName::bare("events"),
10549                    alias: Some("e".to_owned()),
10550                    index_hint: Some(IndexHint::NotIndexed),
10551                    time_travel: None,
10552                },
10553                constraint: None,
10554            }],
10555        };
10556        let table_hints = collect_table_index_hints(&from);
10557
10558        let tables = [
10559            table_stats("users", 2_048, 120_000),
10560            table_stats("events", 8_192, 1_200_000),
10561            table_stats("sessions", 4_096, 900_000),
10562        ];
10563        let indexes = [
10564            index_info("idx_users_email", "users", &["email"], true, 120),
10565            index_info("idx_users_id", "users", &["id"], true, 240),
10566            index_info("idx_events_user_id", "events", &["user_id"], false, 110),
10567            index_info(
10568                "idx_sessions_user_id_a",
10569                "sessions",
10570                &["user_id"],
10571                false,
10572                90,
10573            ),
10574            index_info(
10575                "idx_sessions_user_id_b",
10576                "sessions",
10577                &["user_id"],
10578                false,
10579                90,
10580            ),
10581        ];
10582        let where_terms = [
10583            eq_term("email"),
10584            eq_term("user_id"),
10585            join_term("events", "user_id", "users", "id"),
10586        ];
10587
10588        let first_plan = order_joins_with_hints(
10589            &tables[..2],
10590            &indexes,
10591            &where_terms,
10592            Some(&["email".to_owned(), "user_id".to_owned()]),
10593            &[],
10594            Some(&table_hints),
10595            Some(&mut cracking_hints),
10596        );
10597        let users_path = first_plan
10598            .access_paths
10599            .iter()
10600            .find(|path| path.table.eq_ignore_ascii_case("users"))
10601            .expect("bead_id={BEAD_ID} users path should exist");
10602        assert_eq!(users_path.index.as_deref(), Some("idx_users_email"));
10603        let events_path = first_plan
10604            .access_paths
10605            .iter()
10606            .find(|path| path.table.eq_ignore_ascii_case("events"))
10607            .expect("bead_id={BEAD_ID} events path should exist");
10608        assert!(
10609            matches!(events_path.kind, AccessPathKind::FullTableScan),
10610            "bead_id={BEAD_ID} NOT INDEXED must force full scan for events",
10611        );
10612
10613        let first_session_path = best_access_path_with_hints(
10614            &tables[2],
10615            &indexes[3..5],
10616            &where_terms,
10617            None,
10618            None,
10619            Some(&mut cracking_hints),
10620        );
10621        let second_session_path = best_access_path_with_hints(
10622            &tables[2],
10623            &[indexes[4].clone(), indexes[3].clone()],
10624            &where_terms,
10625            None,
10626            None,
10627            Some(&mut cracking_hints),
10628        );
10629        assert_eq!(
10630            first_session_path.index.as_deref(),
10631            second_session_path.index.as_deref(),
10632            "bead_id={BEAD_ID} adaptive cracking hint should keep stable index preference",
10633        );
10634
10635        let after_metrics = snapshot_index_selection_totals();
10636        let metric_delta = after_metrics
10637            .iter()
10638            .map(|(label, after)| {
10639                let before = before_metrics.get(label).copied().unwrap_or(0);
10640                (label.clone(), after.saturating_sub(before))
10641            })
10642            .collect::<BTreeMap<_, _>>();
10643        let elapsed_us = started.elapsed().as_micros().max(1);
10644        let replay_command = format!(
10645            "RUN_ID='{}' TRACE_ID={} SCENARIO_ID='{}' SEED={} FSQLITE_PLANNER_INDEX_E2E_ARTIFACT='{}' cargo test -p fsqlite-planner planner_index_selection_e2e_replay_emits_artifact -- --exact --nocapture",
10646            run_id,
10647            trace_id,
10648            scenario_id,
10649            seed,
10650            artifact_path.display(),
10651        );
10652
10653        let plan_fingerprint = blake3::hash(
10654            format!(
10655                "{}|{}|{}|{}|{:?}|{:?}",
10656                first_plan.join_order.join(","),
10657                users_path.index.clone().unwrap_or_default(),
10658                access_path_metric_label(&events_path.kind),
10659                second_session_path.index.clone().unwrap_or_default(),
10660                first_session_path.kind,
10661                second_session_path.kind,
10662            )
10663            .as_bytes(),
10664        )
10665        .to_hex()
10666        .to_string();
10667        let artifact = serde_json::json!({
10668            "bead_id": BEAD_ID,
10669            "run_id": run_id,
10670            "trace_id": trace_id,
10671            "scenario_id": scenario_id,
10672            "seed": seed,
10673            "overall_status": "pass",
10674            "timing": {
10675                "selection_elapsed_us": elapsed_us,
10676            },
10677            "checks": [
10678                {
10679                    "id": "indexed_by_respected",
10680                    "status": "pass",
10681                    "detail": "users path honors INDEXED BY idx_users_email"
10682                },
10683                {
10684                    "id": "not_indexed_respected",
10685                    "status": "pass",
10686                    "detail": "events path honors NOT INDEXED by forcing full scan"
10687                },
10688                {
10689                    "id": "adaptive_hint_reuse",
10690                    "status": "pass",
10691                    "detail": "sessions path reuses prior cracking hint under candidate reordering"
10692                }
10693            ],
10694            "metric_delta": metric_delta,
10695            "plan_fingerprint_blake3": plan_fingerprint,
10696            "observability": {
10697                "required_fields": [
10698                    "run_id",
10699                    "trace_id",
10700                    "scenario_id",
10701                    "selection_elapsed_us",
10702                    "table",
10703                    "chosen_index",
10704                    "index_type",
10705                    "candidates"
10706                ],
10707                "event_name": "planner.index_select.choice"
10708            },
10709            "replay_command": replay_command,
10710        });
10711        let artifact_bytes = serde_json::to_vec_pretty(&artifact)
10712            .expect("bead_id={BEAD_ID} artifact serialization should succeed");
10713        std::fs::write(&artifact_path, artifact_bytes)
10714            .expect("bead_id={BEAD_ID} artifact write should succeed");
10715        assert!(
10716            artifact_path.exists(),
10717            "bead_id={BEAD_ID} e2e artifact path should exist"
10718        );
10719    }
10720
10721    #[test]
10722    fn test_index_usability_between_on_leftmost() {
10723        let idx = index_info("idx_a", "t1", &["a"], false, 50);
10724        let expr: &'static Expr = Box::leak(Box::new(Expr::Between {
10725            expr: Box::new(Expr::Column(ColumnRef::bare("a"), Span::ZERO)),
10726            low: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
10727            high: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
10728            not: false,
10729            span: Span::ZERO,
10730        }));
10731        let term = classify_where_term(expr);
10732        assert!(matches!(
10733            analyze_index_usability(&idx, &[term]),
10734            IndexUsability::Range { .. }
10735        ));
10736    }
10737
10738    // ===================================================================
10739    // WhereTermKind / WhereColumn equality tests
10740    // ===================================================================
10741
10742    #[test]
10743    fn test_where_term_kind_equality() {
10744        assert_eq!(WhereTermKind::Equality, WhereTermKind::Equality);
10745        assert_eq!(WhereTermKind::Range, WhereTermKind::Range);
10746        assert_eq!(WhereTermKind::Between, WhereTermKind::Between);
10747        assert_eq!(
10748            WhereTermKind::InList { count: 3 },
10749            WhereTermKind::InList { count: 3 }
10750        );
10751        assert_ne!(
10752            WhereTermKind::InList { count: 3 },
10753            WhereTermKind::InList { count: 5 }
10754        );
10755        assert_eq!(
10756            WhereTermKind::LikePrefix {
10757                prefix: "abc".to_owned(),
10758                upper_bound: Some("abd".to_owned()),
10759            },
10760            WhereTermKind::LikePrefix {
10761                prefix: "abc".to_owned(),
10762                upper_bound: Some("abd".to_owned()),
10763            }
10764        );
10765        assert_ne!(WhereTermKind::Equality, WhereTermKind::Range);
10766    }
10767
10768    #[test]
10769    fn test_where_column_equality() {
10770        let wc1 = WhereColumn {
10771            table: Some("t".to_owned()),
10772            column: "a".to_owned(),
10773        };
10774        let wc2 = WhereColumn {
10775            table: Some("t".to_owned()),
10776            column: "a".to_owned(),
10777        };
10778        let wc3 = WhereColumn {
10779            table: None,
10780            column: "a".to_owned(),
10781        };
10782        assert_eq!(wc1, wc2);
10783        assert_ne!(wc1, wc3);
10784    }
10785
10786    // ===================================================================
10787    // StatsSource tests
10788    // ===================================================================
10789
10790    #[test]
10791    fn test_stats_source_equality() {
10792        assert_eq!(StatsSource::Analyze, StatsSource::Analyze);
10793        assert_eq!(StatsSource::Heuristic, StatsSource::Heuristic);
10794        assert_ne!(StatsSource::Analyze, StatsSource::Heuristic);
10795    }
10796
10797    // ===================================================================
10798    // cost model minimum page clamp
10799    // ===================================================================
10800
10801    #[test]
10802    fn test_cost_minimum_page_clamp() {
10803        // With 0 pages, cost should use max(1) = 1.
10804        let cost = estimate_cost(&AccessPathKind::FullTableScan, 0, 0);
10805        assert!(
10806            (cost - 1.0).abs() < f64::EPSILON,
10807            "0 pages should clamp to 1"
10808        );
10809
10810        let cost = estimate_cost(&AccessPathKind::RowidLookup, 0, 0);
10811        assert!(
10812            (cost - 0.0).abs() < f64::EPSILON,
10813            "log2(1) = 0.0 for clamped 0 pages"
10814        );
10815    }
10816
10817    // -----------------------------------------------------------------------
10818    // Proptest: property-based tests for query planner (bd-1lsfu.4)
10819    // -----------------------------------------------------------------------
10820
10821    mod proptest_planner {
10822        use super::*;
10823        use fsqlite_ast::{
10824            ColumnRef, Distinctness, Expr, Literal, OrderingTerm, ResultColumn, SelectBody,
10825            SelectCore, Span,
10826        };
10827        use proptest::prelude::*;
10828
10829        /// Generate random table stats with realistic ranges.
10830        fn arb_table_stats() -> BoxedStrategy<TableStats> {
10831            (
10832                prop::string::string_regex("[a-z][a-z0-9]{0,5}").expect("valid regex"),
10833                1u64..10_000,
10834                1u64..1_000_000,
10835            )
10836                .prop_map(|(name, n_pages, n_rows)| TableStats {
10837                    name,
10838                    n_pages,
10839                    n_rows,
10840                    source: StatsSource::Heuristic,
10841                })
10842                .boxed()
10843        }
10844
10845        /// Generate random index info for a given table.
10846        #[allow(dead_code)]
10847        fn arb_index_info(table_name: String) -> BoxedStrategy<IndexInfo> {
10848            (
10849                prop::string::string_regex("idx_[a-z]{1,4}").expect("valid regex"),
10850                proptest::collection::vec(
10851                    prop::string::string_regex("[a-z]{1,4}").expect("valid regex"),
10852                    1..4,
10853                ),
10854                any::<bool>(),
10855                1u64..5_000,
10856            )
10857                .prop_map(move |(name, columns, unique, n_pages)| IndexInfo {
10858                    name,
10859                    table: table_name.clone(),
10860                    columns,
10861                    unique,
10862                    n_pages,
10863                    source: StatsSource::Heuristic,
10864                    partial_where: None,
10865                    expression_columns: vec![],
10866                })
10867                .boxed()
10868        }
10869
10870        /// Generate a selectivity in (0, 1].
10871        fn arb_selectivity() -> BoxedStrategy<f64> {
10872            (1u32..1000).prop_map(|n| f64::from(n) / 1000.0).boxed()
10873        }
10874
10875        // Property 1: Cost model non-negativity — all costs >= 0.
10876        proptest::proptest! {
10877            #![proptest_config(proptest::prelude::ProptestConfig::with_cases(1000))]
10878
10879            #[test]
10880            fn test_cost_non_negative(
10881                table_pages in 0u64..100_000,
10882                index_pages in 0u64..100_000,
10883                selectivity in arb_selectivity(),
10884            ) {
10885                let kinds = [
10886                    AccessPathKind::FullTableScan,
10887                    AccessPathKind::IndexScanEquality,
10888                    AccessPathKind::RowidLookup,
10889                    AccessPathKind::IndexScanRange { selectivity },
10890                    AccessPathKind::CoveringIndexScan { selectivity },
10891                ];
10892                for kind in &kinds {
10893                    let cost = estimate_cost(kind, table_pages, index_pages);
10894                    prop_assert!(
10895                        cost >= 0.0,
10896                        "cost must be non-negative, got {cost} for {kind:?} \
10897                         (table_pages={table_pages}, index_pages={index_pages})"
10898                    );
10899                    prop_assert!(
10900                        cost.is_finite(),
10901                        "cost must be finite, got {cost} for {kind:?}"
10902                    );
10903                }
10904            }
10905        }
10906
10907        // Property 2: Cost hierarchy — RowidLookup ≤ IndexScanEquality ≤ FullTableScan
10908        // for tables with at least a few pages.
10909        proptest::proptest! {
10910            #![proptest_config(proptest::prelude::ProptestConfig::with_cases(500))]
10911
10912            #[test]
10913            fn test_cost_hierarchy(
10914                table_pages in 10u64..100_000,
10915                // Constrain index_pages ≤ table_pages (realistic: indices are
10916                // typically smaller than the table they index).
10917                index_pages in 2u64..10_000,
10918            ) {
10919                let rowid_cost = estimate_cost(
10920                    &AccessPathKind::RowidLookup,
10921                    table_pages,
10922                    index_pages,
10923                );
10924                let eq_cost = estimate_cost(
10925                    &AccessPathKind::IndexScanEquality,
10926                    table_pages,
10927                    index_pages,
10928                );
10929                let full_cost = estimate_cost(
10930                    &AccessPathKind::FullTableScan,
10931                    table_pages,
10932                    index_pages,
10933                );
10934
10935                // Rowid lookup (log2(tp)) is always ≤ index equality
10936                // (log2(ip) + log2(tp)) since log2(ip) ≥ 0.
10937                prop_assert!(
10938                    rowid_cost <= eq_cost + f64::EPSILON,
10939                    "rowid lookup ({rowid_cost}) should be ≤ index equality ({eq_cost}) \
10940                     for table_pages={table_pages}, index_pages={index_pages}"
10941                );
10942
10943                // Index equality ≤ full scan only when index is not
10944                // disproportionately large: log2(ip) + log2(tp) ≤ tp.
10945                // For huge indices on tiny tables, full scan can be cheaper.
10946                if index_pages <= table_pages {
10947                    prop_assert!(
10948                        eq_cost <= full_cost + f64::EPSILON,
10949                        "index equality ({eq_cost}) should be ≤ full scan ({full_cost}) \
10950                         for table_pages={table_pages}, index_pages={index_pages}"
10951                    );
10952                }
10953            }
10954        }
10955
10956        // Property 3: Cost monotonicity in selectivity — lower selectivity means
10957        // lower cost for range scans.
10958        proptest::proptest! {
10959            #![proptest_config(proptest::prelude::ProptestConfig::with_cases(500))]
10960
10961            #[test]
10962            fn test_cost_selectivity_monotonic(
10963                table_pages in 10u64..100_000,
10964                index_pages in 2u64..10_000,
10965                s1 in 1u32..500,
10966                s2 in 500u32..1000,
10967            ) {
10968                let sel_low = f64::from(s1) / 1000.0;
10969                let sel_high = f64::from(s2) / 1000.0;
10970
10971                let cost_low = estimate_cost(
10972                    &AccessPathKind::IndexScanRange { selectivity: sel_low },
10973                    table_pages,
10974                    index_pages,
10975                );
10976                let cost_high = estimate_cost(
10977                    &AccessPathKind::IndexScanRange { selectivity: sel_high },
10978                    table_pages,
10979                    index_pages,
10980                );
10981
10982                prop_assert!(
10983                    cost_low <= cost_high + f64::EPSILON,
10984                    "lower selectivity ({sel_low}) should have lower cost ({cost_low}) \
10985                     than higher selectivity ({sel_high}) cost ({cost_high})"
10986                );
10987            }
10988        }
10989
10990        // Property 4: Join ordering determinism — same inputs always produce
10991        // the same plan.
10992        proptest::proptest! {
10993            #![proptest_config(proptest::prelude::ProptestConfig::with_cases(200))]
10994
10995            #[test]
10996            fn test_join_order_determinism(
10997                stats1 in arb_table_stats(),
10998                stats2 in arb_table_stats(),
10999            ) {
11000                // Ensure distinct table names.
11001                let s1 = stats1;
11002                let mut s2 = stats2;
11003                if s1.name == s2.name {
11004                    s2.name = format!("{}_b", s2.name);
11005                }
11006
11007                let tables = [s1, s2];
11008                let empty_indexes: Vec<IndexInfo> = vec![];
11009                let empty_terms: Vec<WhereTerm<'_>> = vec![];
11010                let empty_cross: Vec<(String, String)> = vec![];
11011
11012                let plan_a = order_joins(
11013                    &tables,
11014                    &empty_indexes,
11015                    &empty_terms,
11016                    None,
11017                    &empty_cross,
11018                );
11019                let plan_b = order_joins(
11020                    &tables,
11021                    &empty_indexes,
11022                    &empty_terms,
11023                    None,
11024                    &empty_cross,
11025                );
11026
11027                prop_assert_eq!(
11028                    plan_a.join_order,
11029                    plan_b.join_order,
11030                    "join order should be deterministic"
11031                );
11032                prop_assert!(
11033                    (plan_a.total_cost - plan_b.total_cost).abs() < f64::EPSILON,
11034                    "total cost should be deterministic: {:.6} vs {:.6}",
11035                    plan_a.total_cost,
11036                    plan_b.total_cost,
11037                );
11038            }
11039        }
11040
11041        // Property 5: Adding an index never increases the best access path cost.
11042        proptest::proptest! {
11043            #![proptest_config(proptest::prelude::ProptestConfig::with_cases(300))]
11044
11045            #[test]
11046            fn test_index_never_increases_cost(
11047                stats in arb_table_stats(),
11048            ) {
11049                let table = stats;
11050                let empty_terms: Vec<WhereTerm<'_>> = vec![];
11051
11052                // Cost without any index.
11053                let no_index_path = best_access_path(
11054                    &table,
11055                    &[],
11056                    &empty_terms,
11057                    None,
11058                );
11059
11060                // Create an index on this table.
11061                let idx = IndexInfo {
11062                    name: "idx_test".to_string(),
11063                    table: table.name.clone(),
11064                    columns: vec!["col_a".to_string()],
11065                    unique: false,
11066                    n_pages: table.n_pages / 5 + 1,
11067                    source: StatsSource::Heuristic,
11068                    partial_where: None,
11069                    expression_columns: vec![],
11070                };
11071
11072                let with_index_path = best_access_path(
11073                    &table,
11074                    &[idx],
11075                    &empty_terms,
11076                    None,
11077                );
11078
11079                prop_assert!(
11080                    with_index_path.estimated_cost <= no_index_path.estimated_cost + f64::EPSILON,
11081                    "adding an index should not increase cost: \
11082                     without={:.2}, with={:.2}",
11083                    no_index_path.estimated_cost,
11084                    with_index_path.estimated_cost,
11085                );
11086            }
11087        }
11088
11089        // Property 6: Compound ORDER BY resolution is deterministic.
11090        proptest::proptest! {
11091            #![proptest_config(proptest::prelude::ProptestConfig::with_cases(200))]
11092
11093            #[test]
11094            fn test_order_by_resolution_deterministic(
11095                ncols in 1usize..5,
11096                order_idx in 1usize..5,
11097            ) {
11098                // Build a synthetic compound SELECT with aliases.
11099                let cols: Vec<ResultColumn> = (0..ncols)
11100                    .map(|i| ResultColumn::Expr {
11101                        expr: Expr::Column(
11102                            ColumnRef::bare(format!("c{i}")),
11103                            Span::ZERO,
11104                        ),
11105                        alias: Some(format!("a{i}")),
11106                    })
11107                    .collect();
11108                let core = SelectCore::Select {
11109                    distinct: Distinctness::All,
11110                    columns: cols,
11111                    from: None,
11112                    where_clause: None,
11113                    group_by: vec![],
11114                    having: None,
11115                    windows: vec![],
11116                };
11117
11118                // ORDER BY a numeric index (clamped to valid range).
11119                let valid_idx = (order_idx % ncols) + 1;
11120                let order_term = OrderingTerm {
11121                    expr: Expr::Literal(
11122                        Literal::Integer(i64::try_from(valid_idx).unwrap_or(1)),
11123                        Span::ZERO,
11124                    ),
11125                    direction: None,
11126                    nulls: None,
11127                };
11128
11129                let body = SelectBody {
11130                    select: core,
11131                    compounds: vec![],
11132                };
11133
11134                let result1 = resolve_compound_order_by(
11135                    &body,
11136                    std::slice::from_ref(&order_term),
11137                );
11138                let result2 = resolve_compound_order_by(
11139                    &body,
11140                    std::slice::from_ref(&order_term),
11141                );
11142
11143                prop_assert_eq!(
11144                    result1, result2,
11145                    "ORDER BY resolution should be deterministic"
11146                );
11147            }
11148        }
11149
11150        // Property 7: Full table scan cost scales linearly with page count.
11151        proptest::proptest! {
11152            #![proptest_config(proptest::prelude::ProptestConfig::with_cases(500))]
11153
11154            #[test]
11155            fn test_full_scan_linear_scaling(
11156                pages in 1u64..100_000,
11157                multiplier in 2u64..10,
11158            ) {
11159                let cost_base = estimate_cost(
11160                    &AccessPathKind::FullTableScan,
11161                    pages,
11162                    0,
11163                );
11164                let cost_scaled = estimate_cost(
11165                    &AccessPathKind::FullTableScan,
11166                    pages * multiplier,
11167                    0,
11168                );
11169
11170                // For full scan, cost = table_pages, so scaling should be exact.
11171                let expected_ratio = multiplier as f64;
11172                let actual_ratio = cost_scaled / cost_base;
11173                prop_assert!(
11174                    (actual_ratio - expected_ratio).abs() < 0.01,
11175                    "full scan cost should scale linearly: \
11176                     expected ratio {expected_ratio}, got {actual_ratio}"
11177                );
11178            }
11179        }
11180    }
11181
11182    // ── Cost metrics and asymmetric loss tests (bd-1as.1) ──
11183
11184    #[test]
11185    fn test_cost_estimates_metric_increments() {
11186        reset_cost_metrics();
11187        let before = cost_metrics_snapshot();
11188
11189        // Each estimate_cost call should increment the counter.
11190        let _ = estimate_cost(&AccessPathKind::FullTableScan, 100, 0);
11191        let _ = estimate_cost(&AccessPathKind::RowidLookup, 100, 0);
11192
11193        let after = cost_metrics_snapshot();
11194        assert!(
11195            after.fsqlite_planner_cost_estimates_total
11196                >= before.fsqlite_planner_cost_estimates_total + 2
11197        );
11198    }
11199
11200    #[test]
11201    fn test_estimation_error_recording() {
11202        reset_cost_metrics();
11203
11204        record_estimation_error(100.0, 50.0); // ratio = 2.0, bucket [2.0, 5.0)
11205        record_estimation_error(10.0, 100.0); // ratio = 0.1, bucket [0, 0.5)
11206        record_estimation_error(50.0, 50.0); // ratio = 1.0, bucket [1.0, 2.0)
11207
11208        let snap = cost_metrics_snapshot();
11209        assert_eq!(snap.error_ratio_buckets[0], 1); // [0, 0.5)
11210        assert_eq!(snap.error_ratio_buckets[2], 1); // [1.0, 2.0)
11211        assert_eq!(snap.error_ratio_buckets[3], 1); // [2.0, 5.0)
11212        assert!(snap.error_ratio_mean.is_finite());
11213    }
11214
11215    #[test]
11216    fn test_asymmetric_loss_underestimate_penalized_more() {
11217        // Underestimate: actual 200, estimated 100 → ratio 2.0
11218        let loss_under = asymmetric_estimation_loss(100.0, 200.0);
11219        // Overestimate: actual 50, estimated 100 → ratio 0.5
11220        let loss_over = asymmetric_estimation_loss(100.0, 50.0);
11221
11222        // Underestimation should have higher loss.
11223        assert!(
11224            loss_under > loss_over,
11225            "underestimate loss ({loss_under}) should exceed overestimate loss ({loss_over})"
11226        );
11227    }
11228
11229    #[test]
11230    fn test_asymmetric_loss_perfect_estimate() {
11231        let loss = asymmetric_estimation_loss(100.0, 100.0);
11232        assert!((loss - 0.0).abs() < 1e-10);
11233    }
11234
11235    #[test]
11236    fn test_asymmetric_loss_degenerate() {
11237        // Zero estimated cost → loss = actual.
11238        let loss = asymmetric_estimation_loss(0.0, 50.0);
11239        assert!((loss - 50.0).abs() < 1e-10);
11240    }
11241
11242    #[test]
11243    fn test_asymmetric_loss_quadratic_under_linear_over() {
11244        // Existing tests compare one under vs one over point; this pins the
11245        // functional shape: overestimate is a linear 1 - ratio penalty, while
11246        // underestimate grows quadratically in (ratio - 1).
11247        let loss = asymmetric_estimation_loss;
11248        let approx = |a: f64, b: f64| (a - b).abs() < 1e-9;
11249
11250        // Overestimate (ratio < 1): exact linear 1 - ratio.
11251        assert!(approx(loss(100.0, 75.0), 0.25));
11252        assert!(approx(loss(100.0, 50.0), 0.5));
11253        assert!(approx(loss(100.0, 25.0), 0.75));
11254        assert!(approx(loss(100.0, 0.0), 1.0));
11255        // Linear: equal actual-decrements yield equal loss increments.
11256        assert!(approx(
11257            loss(100.0, 50.0) - loss(100.0, 75.0),
11258            loss(100.0, 25.0) - loss(100.0, 50.0)
11259        ));
11260
11261        // Underestimate (ratio > 1): doubling the excess (ratio - 1) quadruples
11262        // the loss, independent of the penalty constant (it cancels in the ratio).
11263        let base = loss(100.0, 200.0); // ratio 2 -> k * 1
11264        assert!(base > 0.0);
11265        assert!(approx(loss(100.0, 300.0), 4.0 * base)); // ratio 3 -> k * 4
11266        assert!(approx(loss(100.0, 500.0), 16.0 * base)); // ratio 5 -> k * 16
11267
11268        // Loss is monotonic in the ratio on both sides.
11269        assert!(
11270            loss(100.0, 250.0) > loss(100.0, 200.0),
11271            "underestimate loss grows with ratio"
11272        );
11273        assert!(
11274            loss(100.0, 25.0) > loss(100.0, 50.0),
11275            "overestimate loss grows as estimate worsens"
11276        );
11277    }
11278
11279    // ── DPccp tests (bd-1as.3) ──
11280
11281    #[test]
11282    fn test_dpccp_two_tables() {
11283        let tables = vec![
11284            TableStats {
11285                name: "a".to_owned(),
11286                n_pages: 10,
11287                n_rows: 100,
11288                source: StatsSource::Heuristic,
11289            },
11290            TableStats {
11291                name: "b".to_owned(),
11292                n_pages: 20,
11293                n_rows: 200,
11294                source: StatsSource::Heuristic,
11295            },
11296        ];
11297        let indexes = vec![];
11298        let where_terms = vec![];
11299
11300        let (order, cost, plans, _pruned) =
11301            dpccp_order_joins(&tables, &indexes, &where_terms, None, None, &[], None)
11302                .expect("2-table exhaustive plan should exist");
11303        assert_eq!(order.len(), 2);
11304        assert!(cost > 0.0);
11305        assert!(plans >= 2); // At least 2 seed + extensions.
11306    }
11307
11308    #[test]
11309    fn test_dpccp_three_tables() {
11310        let tables = vec![
11311            TableStats {
11312                name: "x".to_owned(),
11313                n_pages: 5,
11314                n_rows: 50,
11315                source: StatsSource::Heuristic,
11316            },
11317            TableStats {
11318                name: "y".to_owned(),
11319                n_pages: 100,
11320                n_rows: 1000,
11321                source: StatsSource::Heuristic,
11322            },
11323            TableStats {
11324                name: "z".to_owned(),
11325                n_pages: 10,
11326                n_rows: 100,
11327                source: StatsSource::Heuristic,
11328            },
11329        ];
11330        let indexes = vec![];
11331        let where_terms = vec![];
11332
11333        let (order, cost, plans, _pruned) =
11334            dpccp_order_joins(&tables, &indexes, &where_terms, None, None, &[], None)
11335                .expect("3-table exhaustive plan should exist");
11336        assert_eq!(order.len(), 3);
11337        assert!(cost > 0.0);
11338        assert!(plans > 3); // More than just seed.
11339        // Small table should be chosen first (lower cost).
11340        assert_eq!(order[0], 0); // "x" has fewest pages.
11341    }
11342
11343    #[test]
11344    fn test_dpccp_respects_cross_join_constraint() {
11345        let tables = vec![
11346            TableStats {
11347                name: "t1".to_owned(),
11348                n_pages: 100,
11349                n_rows: 10_000,
11350                source: StatsSource::Heuristic,
11351            },
11352            TableStats {
11353                name: "t2".to_owned(),
11354                n_pages: 1,
11355                n_rows: 10,
11356                source: StatsSource::Heuristic,
11357            },
11358        ];
11359
11360        let (order, _cost, _plans, _pruned) = dpccp_order_joins(
11361            &tables,
11362            &[],
11363            &[],
11364            None,
11365            None,
11366            &[("t1".to_owned(), "t2".to_owned())],
11367            None,
11368        )
11369        .expect("cross-join constrained exhaustive plan should exist");
11370
11371        assert_eq!(order, vec![0, 1], "CROSS JOIN should force t1 before t2");
11372    }
11373
11374    #[test]
11375    fn test_order_joins_five_tables_uses_exhaustive_search() {
11376        reset_plans_enumerated();
11377        let tables = (0..5)
11378            .map(|i| TableStats {
11379                name: format!("t{i}"),
11380                n_pages: 10,
11381                n_rows: 100,
11382                source: StatsSource::Heuristic,
11383            })
11384            .collect::<Vec<_>>();
11385
11386        let plan = order_joins(&tables, &[], &[], None, &[]);
11387        assert_eq!(plan.join_order.len(), 5);
11388
11389        let enumerated = plans_enumerated_total();
11390        // Beam search with mx_choice=12 enumerates ~92 plans (bounded by
11391        // truncation at each level), much more than greedy (mx_choice=1 → ~10)
11392        // but less than full exhaustive (5! = 120).
11393        assert!(
11394            enumerated > 10,
11395            "5-table beam search should enumerate well beyond greedy-width-1 bounds, got {enumerated}"
11396        );
11397    }
11398
11399    #[test]
11400    fn test_dpccp_branch_and_bound_prunes_high_cost_branches() {
11401        let tables = vec![
11402            TableStats {
11403                name: "tiny".to_owned(),
11404                n_pages: 1,
11405                n_rows: 1,
11406                source: StatsSource::Heuristic,
11407            },
11408            TableStats {
11409                name: "small".to_owned(),
11410                n_pages: 2,
11411                n_rows: 2,
11412                source: StatsSource::Heuristic,
11413            },
11414            TableStats {
11415                name: "huge_a".to_owned(),
11416                n_pages: 10_000,
11417                n_rows: 10_000,
11418                source: StatsSource::Heuristic,
11419            },
11420            TableStats {
11421                name: "huge_b".to_owned(),
11422                n_pages: 20_000,
11423                n_rows: 20_000,
11424                source: StatsSource::Heuristic,
11425            },
11426            TableStats {
11427                name: "huge_c".to_owned(),
11428                n_pages: 30_000,
11429                n_rows: 30_000,
11430                source: StatsSource::Heuristic,
11431            },
11432        ];
11433
11434        let (_order, _cost, _plans, pruned) =
11435            dpccp_order_joins(&tables, &[], &[], None, None, &[], None)
11436                .expect("5-table exhaustive plan should exist");
11437
11438        assert!(pruned > 0, "expected branch-and-bound pruning to occur");
11439    }
11440
11441    #[test]
11442    fn test_order_joins_large_join_uses_greedy_width() {
11443        reset_plans_enumerated();
11444        let tables = (0..10)
11445            .map(|i| TableStats {
11446                name: format!("t{i}"),
11447                n_pages: (i as u64 + 1) * 10,
11448                n_rows: (i as u64 + 1) * 100,
11449                source: StatsSource::Heuristic,
11450            })
11451            .collect::<Vec<_>>();
11452
11453        let plan = order_joins(&tables, &[], &[], None, &[]);
11454        assert_eq!(plan.join_order.len(), 10);
11455
11456        let enumerated = plans_enumerated_total();
11457        assert!(
11458            enumerated <= 800,
11459            "greedy-width search should keep enumeration bounded for 10-table joins, got {enumerated}"
11460        );
11461    }
11462
11463    #[test]
11464    fn test_plans_enumerated_metric() {
11465        reset_plans_enumerated();
11466        let before = plans_enumerated_total();
11467
11468        let tables = vec![
11469            TableStats {
11470                name: "t1".to_owned(),
11471                n_pages: 10,
11472                n_rows: 100,
11473                source: StatsSource::Heuristic,
11474            },
11475            TableStats {
11476                name: "t2".to_owned(),
11477                n_pages: 20,
11478                n_rows: 200,
11479                source: StatsSource::Heuristic,
11480            },
11481        ];
11482        let _ = order_joins(&tables, &[], &[], None, &[]);
11483
11484        let after = plans_enumerated_total();
11485        assert!(after > before);
11486    }
11487
11488    // ── Predicate pushdown tests (bd-1as.3) ──
11489
11490    #[test]
11491    fn test_pushdown_qualified_predicate() {
11492        let expr = Expr::BinaryOp {
11493            left: Box::new(Expr::Column(
11494                ColumnRef::qualified("users", "id"),
11495                Span::ZERO,
11496            )),
11497            op: AstBinaryOp::Eq,
11498            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
11499            span: Span::ZERO,
11500        };
11501        let term = classify_where_term(&expr);
11502        let terms = [term];
11503        let table_names = vec!["users".to_owned(), "orders".to_owned()];
11504
11505        let (pushed, remaining) = pushdown_predicates(&terms, &table_names);
11506        assert_eq!(pushed.len(), 1);
11507        assert_eq!(pushed[0].table, "users");
11508        assert!(remaining.is_empty());
11509    }
11510
11511    #[test]
11512    fn test_pushdown_single_table_unqualified() {
11513        let expr = Expr::BinaryOp {
11514            left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
11515            op: AstBinaryOp::Gt,
11516            right: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
11517            span: Span::ZERO,
11518        };
11519        let term = classify_where_term(&expr);
11520        let terms = [term];
11521        let table_names = vec!["users".to_owned()];
11522
11523        let (pushed, remaining) = pushdown_predicates(&terms, &table_names);
11524        assert_eq!(pushed.len(), 1);
11525        assert!(remaining.is_empty());
11526    }
11527
11528    #[test]
11529    fn test_pushdown_unqualified_multi_table_stays() {
11530        let expr = Expr::BinaryOp {
11531            left: Box::new(Expr::Column(ColumnRef::bare("id"), Span::ZERO)),
11532            op: AstBinaryOp::Eq,
11533            right: Box::new(Expr::Literal(Literal::Integer(1), Span::ZERO)),
11534            span: Span::ZERO,
11535        };
11536        let term = classify_where_term(&expr);
11537        let terms = [term];
11538        let table_names = vec!["users".to_owned(), "orders".to_owned()];
11539
11540        let (pushed, remaining) = pushdown_predicates(&terms, &table_names);
11541        // Unqualified with multiple tables → stays as join predicate.
11542        assert!(pushed.is_empty());
11543        assert_eq!(remaining.len(), 1);
11544    }
11545
11546    // ── Constant folding tests (bd-1as.3) ──
11547
11548    #[test]
11549    fn test_fold_literal() {
11550        let expr = Expr::Literal(Literal::Integer(42), Span::ZERO);
11551        assert_eq!(
11552            try_constant_fold(&expr),
11553            FoldResult::Literal(Literal::Integer(42))
11554        );
11555    }
11556
11557    #[test]
11558    fn test_fold_addition() {
11559        let expr = Expr::BinaryOp {
11560            left: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
11561            op: fsqlite_ast::BinaryOp::Add,
11562            right: Box::new(Expr::Literal(Literal::Integer(32), Span::ZERO)),
11563            span: Span::ZERO,
11564        };
11565        assert_eq!(
11566            try_constant_fold(&expr),
11567            FoldResult::Literal(Literal::Integer(42))
11568        );
11569    }
11570
11571    #[test]
11572    fn test_fold_division_by_zero() {
11573        let expr = Expr::BinaryOp {
11574            left: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
11575            op: fsqlite_ast::BinaryOp::Divide,
11576            right: Box::new(Expr::Literal(Literal::Integer(0), Span::ZERO)),
11577            span: Span::ZERO,
11578        };
11579        assert_eq!(try_constant_fold(&expr), FoldResult::Literal(Literal::Null));
11580    }
11581
11582    #[test]
11583    fn test_fold_negation() {
11584        let expr = Expr::UnaryOp {
11585            op: fsqlite_ast::UnaryOp::Negate,
11586            expr: Box::new(Expr::Literal(Literal::Integer(5), Span::ZERO)),
11587            span: Span::ZERO,
11588        };
11589        assert_eq!(
11590            try_constant_fold(&expr),
11591            FoldResult::Literal(Literal::Integer(-5))
11592        );
11593    }
11594
11595    #[test]
11596    fn test_fold_column_ref_not_constant() {
11597        let expr = Expr::Column(ColumnRef::bare("id"), Span::ZERO);
11598        assert_eq!(try_constant_fold(&expr), FoldResult::NotConstant);
11599    }
11600
11601    #[test]
11602    fn test_fold_comparison() {
11603        let expr = Expr::BinaryOp {
11604            left: Box::new(Expr::Literal(Literal::Integer(10), Span::ZERO)),
11605            op: fsqlite_ast::BinaryOp::Lt,
11606            right: Box::new(Expr::Literal(Literal::Integer(20), Span::ZERO)),
11607            span: Span::ZERO,
11608        };
11609        assert_eq!(try_constant_fold(&expr), FoldResult::Literal(Literal::True));
11610    }
11611
11612    #[test]
11613    fn test_fold_nested_expression() {
11614        // (3 + 4) * 6 = 42
11615        let expr = Expr::BinaryOp {
11616            left: Box::new(Expr::BinaryOp {
11617                left: Box::new(Expr::Literal(Literal::Integer(3), Span::ZERO)),
11618                op: fsqlite_ast::BinaryOp::Add,
11619                right: Box::new(Expr::Literal(Literal::Integer(4), Span::ZERO)),
11620                span: Span::ZERO,
11621            }),
11622            op: fsqlite_ast::BinaryOp::Multiply,
11623            right: Box::new(Expr::Literal(Literal::Integer(6), Span::ZERO)),
11624            span: Span::ZERO,
11625        };
11626        assert_eq!(
11627            try_constant_fold(&expr),
11628            FoldResult::Literal(Literal::Integer(42))
11629        );
11630    }
11631
11632    #[test]
11633    fn test_query_planner_cache_hit_matches_uncached_join_plan() {
11634        let tables = vec![
11635            TableStats {
11636                name: "small".to_owned(),
11637                n_pages: 4,
11638                n_rows: 40,
11639                source: StatsSource::Heuristic,
11640            },
11641            TableStats {
11642                name: "large".to_owned(),
11643                n_pages: 40,
11644                n_rows: 4_000,
11645                source: StatsSource::Heuristic,
11646            },
11647        ];
11648        let uncached = order_joins(&tables, &[], &[], None, &[]);
11649
11650        let mut planner = QueryPlanner::default();
11651        let sql_template = "SELECT * FROM small JOIN large ON small.id = large.small_id";
11652
11653        let first = planner.order_joins_with_cache(
11654            sql_template,
11655            7,
11656            &tables,
11657            &[],
11658            &[],
11659            None,
11660            &[],
11661            None,
11662            None,
11663            PlannerFeatureFlags::default(),
11664        );
11665        let second = planner.order_joins_with_cache(
11666            sql_template,
11667            7,
11668            &tables,
11669            &[],
11670            &[],
11671            None,
11672            &[],
11673            None,
11674            None,
11675            PlannerFeatureFlags::default(),
11676        );
11677
11678        assert_eq!(*first, uncached);
11679        assert_eq!(*second, uncached);
11680        assert!(Rc::ptr_eq(&first, &second));
11681        assert_eq!(planner.plan_cache_len(), 1);
11682    }
11683
11684    #[test]
11685    fn test_query_planner_cache_separates_generic_and_join_entries() {
11686        let tables = vec![TableStats {
11687            name: "users".to_owned(),
11688            n_pages: 16,
11689            n_rows: 1_000,
11690            source: StatsSource::Heuristic,
11691        }];
11692        let sql_template = "SELECT * FROM users WHERE id = ?1";
11693        let schema_cookie = 31;
11694        let mut planner = QueryPlanner::default();
11695
11696        let generic = planner.cached_plan(sql_template, schema_cookie, || {
11697            sample_cached_query_plan("generic-sentinel")
11698        });
11699        let join_plan = planner.order_joins_with_cache(
11700            sql_template,
11701            schema_cookie,
11702            &tables,
11703            &[],
11704            &[],
11705            None,
11706            &[],
11707            None,
11708            None,
11709            PlannerFeatureFlags::default(),
11710        );
11711
11712        assert_eq!(generic.join_order, vec!["generic-sentinel".to_owned()]);
11713        assert_eq!(join_plan.join_order, vec!["users".to_owned()]);
11714        assert!(
11715            !Rc::ptr_eq(&generic, &join_plan),
11716            "generic cached_plan entries and join-order cache entries must not alias"
11717        );
11718        assert_eq!(planner.plan_cache_len(), 2);
11719    }
11720
11721    #[test]
11722    fn test_query_planner_cache_invalidates_all_entries_on_schema_cookie_change() {
11723        let mut planner = QueryPlanner::default();
11724        let build_count = Cell::new(0);
11725
11726        let plan_a = planner.cached_plan("SELECT * FROM t1", 11, || {
11727            build_count.set(build_count.get() + 1);
11728            sample_cached_query_plan("t1-v11")
11729        });
11730        let _plan_b = planner.cached_plan("SELECT * FROM t2", 11, || {
11731            build_count.set(build_count.get() + 1);
11732            sample_cached_query_plan("t2-v11")
11733        });
11734
11735        assert_eq!(planner.plan_cache_len(), 2);
11736
11737        let rebuilt_plan_a = planner.cached_plan("SELECT * FROM t1", 12, || {
11738            build_count.set(build_count.get() + 1);
11739            sample_cached_query_plan("t1-v12")
11740        });
11741
11742        assert_eq!(build_count.get(), 3);
11743        assert_eq!(planner.plan_cache_len(), 1);
11744        assert_eq!(rebuilt_plan_a.join_order, vec!["t1-v12".to_owned()]);
11745        assert!(
11746            !Rc::ptr_eq(&plan_a, &rebuilt_plan_a),
11747            "schema cookie change must discard prior Rc<QueryPlan> entries"
11748        );
11749    }
11750
11751    #[test]
11752    fn test_query_planner_cache_lru_eviction_at_capacity() {
11753        let mut planner = QueryPlanner::default();
11754        let schema_cookie = 21;
11755
11756        for idx in 0..DEFAULT_PLAN_CACHE_CAPACITY {
11757            let sql = format!("SELECT * FROM cached_table WHERE id = ?{idx}");
11758            let _ = planner.cached_plan(&sql, schema_cookie, || sample_cached_query_plan(&sql));
11759        }
11760
11761        assert_eq!(planner.plan_cache_len(), DEFAULT_PLAN_CACHE_CAPACITY);
11762
11763        let hottest_sql = "SELECT * FROM cached_table WHERE id = ?0";
11764        let hottest_plan = planner.cached_plan(hottest_sql, schema_cookie, || {
11765            panic!("expected hottest cache entry to already exist")
11766        });
11767        for _ in 0..4 {
11768            let hottest_plan_again = planner.cached_plan(hottest_sql, schema_cookie, || {
11769                panic!("expected hottest entry to stay hot across repeated direct hits")
11770            });
11771            assert!(Rc::ptr_eq(&hottest_plan, &hottest_plan_again));
11772        }
11773
11774        let cold_key = plan_cache_key("SELECT * FROM cached_table WHERE id = ?1", schema_cookie);
11775        let hot_key = plan_cache_key(hottest_sql, schema_cookie);
11776
11777        let _ = planner.cached_plan(
11778            "SELECT * FROM cached_table WHERE id = ?overflow",
11779            schema_cookie,
11780            || sample_cached_query_plan("overflow"),
11781        );
11782
11783        assert_eq!(planner.plan_cache_len(), DEFAULT_PLAN_CACHE_CAPACITY);
11784        assert!(
11785            planner.plan_cache.iter().any(|(key, _)| *key == hot_key),
11786            "re-accessed entry should remain resident after LRU eviction"
11787        );
11788        assert!(
11789            !planner.plan_cache.iter().any(|(key, _)| *key == cold_key),
11790            "least-recently-used entry should be evicted at capacity"
11791        );
11792
11793        let hottest_plan_again = planner.cached_plan(hottest_sql, schema_cookie, || {
11794            panic!("expected hottest entry to survive eviction")
11795        });
11796        assert!(Rc::ptr_eq(&hottest_plan, &hottest_plan_again));
11797    }
11798
11799    #[test]
11800    fn test_query_planner_cache_separates_feature_flag_variants() {
11801        let tables = [
11802            table_stats("a", 1024, 1_000_000),
11803            table_stats("b", 1024, 1_000_000),
11804            table_stats("c", 1024, 1_000_000),
11805        ];
11806        let terms = [join_term("a", "k", "b", "k"), join_term("b", "k", "c", "k")];
11807        let sql_template = "SELECT * FROM a JOIN b ON a.k = b.k JOIN c ON b.k = c.k";
11808        let mut planner = QueryPlanner::default();
11809
11810        let hash_only = planner.order_joins_with_cache(
11811            sql_template,
11812            7,
11813            &tables,
11814            &[],
11815            &terms,
11816            None,
11817            &[],
11818            None,
11819            None,
11820            PlannerFeatureFlags::default(),
11821        );
11822        let leapfrog = planner.order_joins_with_cache(
11823            sql_template,
11824            7,
11825            &tables,
11826            &[],
11827            &terms,
11828            None,
11829            &[],
11830            None,
11831            None,
11832            PlannerFeatureFlags {
11833                leapfrog_join: true,
11834                ..PlannerFeatureFlags::default()
11835            },
11836        );
11837
11838        assert!(
11839            hash_only
11840                .join_segments
11841                .iter()
11842                .all(|segment| segment.operator == JoinOperator::HashJoin),
11843            "disabled feature flag should keep hash-only plan: {:?}",
11844            hash_only.join_segments
11845        );
11846        assert!(
11847            leapfrog
11848                .join_segments
11849                .iter()
11850                .any(|segment| segment.operator == JoinOperator::LeapfrogTriejoin),
11851            "enabled feature flag should allow leapfrog routing: {:?}",
11852            leapfrog.join_segments
11853        );
11854        assert!(
11855            !Rc::ptr_eq(&hash_only, &leapfrog),
11856            "feature-flag variants must not alias the same cached Rc<QueryPlan>"
11857        );
11858        assert_eq!(planner.plan_cache_len(), 2);
11859    }
11860
11861    #[test]
11862    fn test_query_planner_cache_bypasses_adaptive_cracking_hints() {
11863        let tables = [table_stats("t1", 256, 20_000)];
11864        let indexes = [
11865            IndexInfo {
11866                name: "idx_a".to_owned(),
11867                table: "t1".to_owned(),
11868                columns: vec!["a".to_owned()],
11869                unique: false,
11870                n_pages: 16,
11871                source: StatsSource::Heuristic,
11872                partial_where: None,
11873                expression_columns: vec![],
11874            },
11875            IndexInfo {
11876                name: "idx_b".to_owned(),
11877                table: "t1".to_owned(),
11878                columns: vec!["a".to_owned()],
11879                unique: false,
11880                n_pages: 12,
11881                source: StatsSource::Heuristic,
11882                partial_where: None,
11883                expression_columns: vec![],
11884            },
11885        ];
11886        let terms = [eq_term("a")];
11887        let sql_template = "SELECT * FROM t1 WHERE a = ?1";
11888        let mut planner = QueryPlanner::default();
11889
11890        let mut first_hints = CrackingHintStore::default();
11891        first_hints.record_access_path(&AccessPath {
11892            table: "t1".to_owned(),
11893            kind: AccessPathKind::IndexScanEquality,
11894            index: Some("idx_a".to_owned()),
11895            estimated_cost: 1.0,
11896            estimated_rows: 1.0,
11897            time_travel: None,
11898            probe: None,
11899        });
11900        let first = planner.order_joins_with_cache(
11901            sql_template,
11902            5,
11903            &tables,
11904            &indexes,
11905            &terms,
11906            None,
11907            &[],
11908            None,
11909            Some(&mut first_hints),
11910            PlannerFeatureFlags::default(),
11911        );
11912
11913        let mut second_hints = CrackingHintStore::default();
11914        second_hints.record_access_path(&AccessPath {
11915            table: "t1".to_owned(),
11916            kind: AccessPathKind::IndexScanEquality,
11917            index: Some("idx_b".to_owned()),
11918            estimated_cost: 1.0,
11919            estimated_rows: 1.0,
11920            time_travel: None,
11921            probe: None,
11922        });
11923        let second = planner.order_joins_with_cache(
11924            sql_template,
11925            5,
11926            &tables,
11927            &indexes,
11928            &terms,
11929            None,
11930            &[],
11931            None,
11932            Some(&mut second_hints),
11933            PlannerFeatureFlags::default(),
11934        );
11935
11936        assert_eq!(first.access_paths[0].index.as_deref(), Some("idx_a"));
11937        assert_eq!(second.access_paths[0].index.as_deref(), Some("idx_b"));
11938        assert_eq!(planner.plan_cache_len(), 0);
11939        assert!(!Rc::ptr_eq(&first, &second));
11940    }
11941}
11942#[test]
11943fn test_join_order_returns_each_table_once() {
11944    let tables = vec![
11945        TableStats {
11946            name: "nation".to_owned(),
11947            n_pages: 1,
11948            n_rows: 25,
11949            source: StatsSource::Analyze,
11950        },
11951        TableStats {
11952            name: "region".to_owned(),
11953            n_pages: 1,
11954            n_rows: 5,
11955            source: StatsSource::Analyze,
11956        },
11957        TableStats {
11958            name: "supplier".to_owned(),
11959            n_pages: 100,
11960            n_rows: 10_000,
11961            source: StatsSource::Analyze,
11962        },
11963        TableStats {
11964            name: "customer".to_owned(),
11965            n_pages: 500,
11966            n_rows: 150_000,
11967            source: StatsSource::Analyze,
11968        },
11969        TableStats {
11970            name: "orders".to_owned(),
11971            n_pages: 2000,
11972            n_rows: 1_500_000,
11973            source: StatsSource::Analyze,
11974        },
11975        TableStats {
11976            name: "lineitem".to_owned(),
11977            n_pages: 8000,
11978            n_rows: 6_000_000,
11979            source: StatsSource::Analyze,
11980        },
11981    ];
11982    let plan = order_joins(&tables, &[], &[], None, &[]);
11983    assert_eq!(plan.join_order.len(), tables.len());
11984    let join_order: HashSet<_> = plan.join_order.iter().collect();
11985    assert_eq!(join_order.len(), tables.len());
11986    for table in &tables {
11987        assert!(plan.join_order.iter().any(|name| name == &table.name));
11988    }
11989}
11990
11991#[cfg(test)]
11992mod probe_tests {
11993    use super::*;
11994    use fsqlite_ast::{BinaryOp as AstBinaryOp, ColumnRef, Expr, Literal, Span};
11995
11996    fn col(name: &str) -> Box<Expr> {
11997        Box::new(Expr::Column(ColumnRef::bare(name), Span::ZERO))
11998    }
11999
12000    fn lit_int(v: i64) -> Box<Expr> {
12001        Box::new(Expr::Literal(Literal::Integer(v), Span::ZERO))
12002    }
12003
12004    fn eq_expr(col_name: &str, val: i64) -> Expr {
12005        Expr::BinaryOp {
12006            left: col(col_name),
12007            op: AstBinaryOp::Eq,
12008            right: lit_int(val),
12009            span: Span::ZERO,
12010        }
12011    }
12012
12013    #[test]
12014    fn extract_probe_rowid_equality() {
12015        let expr = eq_expr("rowid", 42);
12016        let terms = [WhereTerm {
12017            expr: &expr,
12018            column: Some(WhereColumn {
12019                table: None,
12020                column: "rowid".to_owned(),
12021            }),
12022            kind: WhereTermKind::RowidEquality,
12023        }];
12024        let ap = AccessPath {
12025            table: "t".to_owned(),
12026            kind: AccessPathKind::RowidLookup,
12027            index: None,
12028            estimated_cost: 1.0,
12029            estimated_rows: 1.0,
12030            time_travel: None,
12031            probe: None,
12032        };
12033        let probe = extract_access_path_probe_with_rowid_aliases(&ap, &[], &terms, &[]);
12034        assert!(
12035            matches!(&probe, Some(AccessPathProbe::RowidEquality { target }) if **target == Expr::Literal(Literal::Integer(42), Span::ZERO))
12036        );
12037    }
12038
12039    #[test]
12040    fn extract_probe_index_equality() {
12041        let expr = eq_expr("name", 7);
12042        let terms = [WhereTerm {
12043            expr: &expr,
12044            column: Some(WhereColumn {
12045                table: None,
12046                column: "name".to_owned(),
12047            }),
12048            kind: WhereTermKind::Equality,
12049        }];
12050        let indexes = [IndexInfo {
12051            name: "idx_name".to_owned(),
12052            table: "t".to_owned(),
12053            columns: vec!["name".to_owned()],
12054            unique: false,
12055            n_pages: 1,
12056            source: StatsSource::Heuristic,
12057            partial_where: None,
12058            expression_columns: vec![],
12059        }];
12060        let ap = AccessPath {
12061            table: "t".to_owned(),
12062            kind: AccessPathKind::IndexScanEquality,
12063            index: Some("idx_name".to_owned()),
12064            estimated_cost: 5.0,
12065            estimated_rows: 1.0,
12066            time_travel: None,
12067            probe: None,
12068        };
12069        let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
12070        match &probe {
12071            Some(AccessPathProbe::Equality { column, target }) => {
12072                assert_eq!(column, "name");
12073                assert_eq!(**target, Expr::Literal(Literal::Integer(7), Span::ZERO));
12074            }
12075            other => panic!("expected Equality probe, got {other:?}"),
12076        }
12077    }
12078
12079    #[test]
12080    fn extract_probe_index_range() {
12081        let gt_expr = Expr::BinaryOp {
12082            left: col("age"),
12083            op: AstBinaryOp::Gt,
12084            right: lit_int(18),
12085            span: Span::ZERO,
12086        };
12087        let lt_expr = Expr::BinaryOp {
12088            left: col("age"),
12089            op: AstBinaryOp::Le,
12090            right: lit_int(65),
12091            span: Span::ZERO,
12092        };
12093        let terms = [
12094            WhereTerm {
12095                expr: &gt_expr,
12096                column: Some(WhereColumn {
12097                    table: None,
12098                    column: "age".to_owned(),
12099                }),
12100                kind: WhereTermKind::Range,
12101            },
12102            WhereTerm {
12103                expr: &lt_expr,
12104                column: Some(WhereColumn {
12105                    table: None,
12106                    column: "age".to_owned(),
12107                }),
12108                kind: WhereTermKind::Range,
12109            },
12110        ];
12111        let indexes = [IndexInfo {
12112            name: "idx_age".to_owned(),
12113            table: "t".to_owned(),
12114            columns: vec!["age".to_owned()],
12115            unique: false,
12116            n_pages: 1,
12117            source: StatsSource::Heuristic,
12118            partial_where: None,
12119            expression_columns: vec![],
12120        }];
12121        let ap = AccessPath {
12122            table: "t".to_owned(),
12123            kind: AccessPathKind::IndexScanRange { selectivity: 0.5 },
12124            index: Some("idx_age".to_owned()),
12125            estimated_cost: 50.0,
12126            estimated_rows: 100.0,
12127            time_travel: None,
12128            probe: None,
12129        };
12130        let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
12131        match &probe {
12132            Some(AccessPathProbe::Range {
12133                column,
12134                lower,
12135                upper,
12136            }) => {
12137                assert_eq!(column, "age");
12138                let (lo_expr, lo_inc) = lower.as_ref().expect("expected lower bound");
12139                assert_eq!(**lo_expr, Expr::Literal(Literal::Integer(18), Span::ZERO));
12140                assert!(!lo_inc, "GT should be exclusive");
12141                let (hi_expr, hi_inc) = upper.as_ref().expect("expected upper bound");
12142                assert_eq!(**hi_expr, Expr::Literal(Literal::Integer(65), Span::ZERO));
12143                assert!(hi_inc, "LE should be inclusive");
12144            }
12145            other => panic!("expected Range probe, got {other:?}"),
12146        }
12147    }
12148
12149    #[test]
12150    fn extract_probe_in_list() {
12151        let in_expr = Expr::In {
12152            expr: col("status"),
12153            set: InSet::List(vec![
12154                Expr::Literal(Literal::Integer(1), Span::ZERO),
12155                Expr::Literal(Literal::Integer(2), Span::ZERO),
12156                Expr::Literal(Literal::Integer(3), Span::ZERO),
12157            ]),
12158            not: false,
12159            span: Span::ZERO,
12160        };
12161        let terms = [WhereTerm {
12162            expr: &in_expr,
12163            column: Some(WhereColumn {
12164                table: None,
12165                column: "status".to_owned(),
12166            }),
12167            kind: WhereTermKind::InList { count: 3 },
12168        }];
12169        let indexes = [IndexInfo {
12170            name: "idx_status".to_owned(),
12171            table: "t".to_owned(),
12172            columns: vec!["status".to_owned()],
12173            unique: false,
12174            n_pages: 1,
12175            source: StatsSource::Heuristic,
12176            partial_where: None,
12177            expression_columns: vec![],
12178        }];
12179        let ap = AccessPath {
12180            table: "t".to_owned(),
12181            kind: AccessPathKind::IndexScanEquality,
12182            index: Some("idx_status".to_owned()),
12183            estimated_cost: 15.0,
12184            estimated_rows: 30.0,
12185            time_travel: None,
12186            probe: None,
12187        };
12188        let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
12189        match &probe {
12190            Some(AccessPathProbe::InList { column, values }) => {
12191                assert_eq!(column, "status");
12192                assert_eq!(values.len(), 3);
12193                assert_eq!(*values[0], Expr::Literal(Literal::Integer(1), Span::ZERO));
12194                assert_eq!(*values[2], Expr::Literal(Literal::Integer(3), Span::ZERO));
12195            }
12196            other => panic!("expected InList probe, got {other:?}"),
12197        }
12198    }
12199
12200    #[test]
12201    fn extract_probe_in_list_prefers_equality_over_in() {
12202        let eq_expression = eq_expr("status", 5);
12203        let in_expr = Expr::In {
12204            expr: col("status"),
12205            set: InSet::List(vec![
12206                Expr::Literal(Literal::Integer(1), Span::ZERO),
12207                Expr::Literal(Literal::Integer(5), Span::ZERO),
12208            ]),
12209            not: false,
12210            span: Span::ZERO,
12211        };
12212        let terms = [
12213            WhereTerm {
12214                expr: &eq_expression,
12215                column: Some(WhereColumn {
12216                    table: None,
12217                    column: "status".to_owned(),
12218                }),
12219                kind: WhereTermKind::Equality,
12220            },
12221            WhereTerm {
12222                expr: &in_expr,
12223                column: Some(WhereColumn {
12224                    table: None,
12225                    column: "status".to_owned(),
12226                }),
12227                kind: WhereTermKind::InList { count: 2 },
12228            },
12229        ];
12230        let indexes = [IndexInfo {
12231            name: "idx_status".to_owned(),
12232            table: "t".to_owned(),
12233            columns: vec!["status".to_owned()],
12234            unique: false,
12235            n_pages: 1,
12236            source: StatsSource::Heuristic,
12237            partial_where: None,
12238            expression_columns: vec![],
12239        }];
12240        let ap = AccessPath {
12241            table: "t".to_owned(),
12242            kind: AccessPathKind::IndexScanEquality,
12243            index: Some("idx_status".to_owned()),
12244            estimated_cost: 5.0,
12245            estimated_rows: 1.0,
12246            time_travel: None,
12247            probe: None,
12248        };
12249        let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
12250        assert!(
12251            matches!(&probe, Some(AccessPathProbe::Equality { .. })),
12252            "equality should be preferred when both equality and IN terms exist"
12253        );
12254    }
12255
12256    #[test]
12257    fn extract_probe_like_prefix_as_range() {
12258        let like_expr = Expr::Like {
12259            expr: col("name"),
12260            pattern: Box::new(Expr::Literal(
12261                Literal::String("abc%".to_owned()),
12262                Span::ZERO,
12263            )),
12264            escape: None,
12265            not: false,
12266            op: fsqlite_ast::LikeOp::Like,
12267            span: Span::ZERO,
12268        };
12269        let terms = [WhereTerm {
12270            expr: &like_expr,
12271            column: Some(WhereColumn {
12272                table: None,
12273                column: "name".to_owned(),
12274            }),
12275            kind: WhereTermKind::LikePrefix {
12276                prefix: "abc".to_owned(),
12277                upper_bound: Some("abd".to_owned()),
12278            },
12279        }];
12280        let indexes = [IndexInfo {
12281            name: "idx_name".to_owned(),
12282            table: "t".to_owned(),
12283            columns: vec!["name".to_owned()],
12284            unique: false,
12285            n_pages: 1,
12286            source: StatsSource::Heuristic,
12287            partial_where: None,
12288            expression_columns: vec![],
12289        }];
12290        let ap = AccessPath {
12291            table: "t".to_owned(),
12292            kind: AccessPathKind::IndexScanRange { selectivity: 0.1 },
12293            index: Some("idx_name".to_owned()),
12294            estimated_cost: 10.0,
12295            estimated_rows: 100.0,
12296            time_travel: None,
12297            probe: None,
12298        };
12299        let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
12300        match &probe {
12301            Some(AccessPathProbe::Range {
12302                column,
12303                lower,
12304                upper,
12305            }) => {
12306                assert_eq!(column, "name");
12307                let (lo_expr, lo_inc) = lower.as_ref().expect("expected lower bound");
12308                assert_eq!(
12309                    **lo_expr,
12310                    Expr::Literal(Literal::String("abc".to_owned()), Span::ZERO)
12311                );
12312                assert!(lo_inc, "LIKE prefix lower bound should be inclusive");
12313                let (hi_expr, hi_inc) = upper.as_ref().expect("expected upper bound");
12314                assert_eq!(
12315                    **hi_expr,
12316                    Expr::Literal(Literal::String("abd".to_owned()), Span::ZERO)
12317                );
12318                assert!(!hi_inc, "LIKE prefix upper bound should be exclusive");
12319            }
12320            other => panic!("expected Range probe from LikePrefix, got {other:?}"),
12321        }
12322    }
12323
12324    #[test]
12325    fn extract_probe_full_scan_returns_none() {
12326        let ap = AccessPath {
12327            table: "t".to_owned(),
12328            kind: AccessPathKind::FullTableScan,
12329            index: None,
12330            estimated_cost: 1000.0,
12331            estimated_rows: 1000.0,
12332            time_travel: None,
12333            probe: None,
12334        };
12335        assert!(extract_access_path_probe_with_rowid_aliases(&ap, &[], &[], &[]).is_none());
12336    }
12337
12338    #[test]
12339    fn extract_probe_between_as_inclusive_range() {
12340        let between_expr: &'static Expr = Box::leak(Box::new(Expr::Between {
12341            expr: Box::new(Expr::Column(ColumnRef::bare("age"), Span::ZERO)),
12342            low: Box::new(Expr::Literal(Literal::Integer(18), Span::ZERO)),
12343            high: Box::new(Expr::Literal(Literal::Integer(65), Span::ZERO)),
12344            not: false,
12345            span: Span::ZERO,
12346        }));
12347        let terms = [WhereTerm {
12348            expr: between_expr,
12349            column: Some(WhereColumn {
12350                table: None,
12351                column: "age".to_owned(),
12352            }),
12353            kind: WhereTermKind::Between,
12354        }];
12355        let indexes = [IndexInfo {
12356            name: "idx_age".to_owned(),
12357            table: "t".to_owned(),
12358            columns: vec!["age".to_owned()],
12359            unique: false,
12360            n_pages: 1,
12361            source: StatsSource::Heuristic,
12362            partial_where: None,
12363            expression_columns: vec![],
12364        }];
12365        let ap = AccessPath {
12366            table: "t".to_owned(),
12367            kind: AccessPathKind::IndexScanRange { selectivity: 0.1 },
12368            index: Some("idx_age".to_owned()),
12369            estimated_cost: 10.0,
12370            estimated_rows: 100.0,
12371            time_travel: None,
12372            probe: None,
12373        };
12374        let probe = extract_access_path_probe_with_rowid_aliases(&ap, &indexes, &terms, &[]);
12375        match &probe {
12376            Some(AccessPathProbe::Range {
12377                column,
12378                lower,
12379                upper,
12380            }) => {
12381                assert_eq!(column, "age");
12382                let (lo_expr, lo_inc) = lower.as_ref().expect("expected lower bound");
12383                assert_eq!(**lo_expr, Expr::Literal(Literal::Integer(18), Span::ZERO));
12384                assert!(lo_inc, "BETWEEN lower bound must be inclusive");
12385                let (hi_expr, hi_inc) = upper.as_ref().expect("expected upper bound");
12386                assert_eq!(**hi_expr, Expr::Literal(Literal::Integer(65), Span::ZERO));
12387                assert!(hi_inc, "BETWEEN upper bound must be inclusive");
12388            }
12389            other => panic!("expected Range probe from Between, got {other:?}"),
12390        }
12391    }
12392}