Skip to main content

citadel_sql/
planner.rs

1//! Query planner: chooses between seq scan, PK lookup, or index scan.
2
3use crate::encoding::encode_composite_key;
4use crate::parser::{BinOp, Expr};
5use crate::types::{IndexDef, IndexKind, InvertedKind, TableSchema, Value};
6
7#[derive(Debug, Clone)]
8pub enum ScanPlan {
9    SeqScan,
10    PkLookup {
11        pk_values: Vec<Value>,
12    },
13    PkRangeScan {
14        start_key: Vec<u8>,
15        range_conds: Vec<(BinOp, Value)>,
16        num_pk_cols: usize,
17    },
18    IndexScan {
19        index_name: String,
20        idx_table: Vec<u8>,
21        prefix: Vec<u8>,
22        num_prefix_cols: usize,
23        range_conds: Vec<(BinOp, Value)>,
24        is_unique: bool,
25        index_columns: Vec<u16>,
26    },
27    InvertedScan {
28        kind: InvertedKind,
29        idx_table: Vec<u8>,
30        column_idx: u16,
31        probe_entries: Vec<Vec<u8>>,
32        recheck_expr: Expr,
33        recheck_needed: bool,
34    },
35}
36
37struct SimplePredicate {
38    col_idx: usize,
39    op: BinOp,
40    value: Value,
41}
42
43fn flatten_and(expr: &Expr) -> Vec<&Expr> {
44    match expr {
45        Expr::BinaryOp {
46            left,
47            op: BinOp::And,
48            right,
49        } => {
50            let mut v = flatten_and(left);
51            v.extend(flatten_and(right));
52            v
53        }
54        _ => vec![expr],
55    }
56}
57
58fn is_comparison(op: BinOp) -> bool {
59    matches!(
60        op,
61        BinOp::Eq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
62    )
63}
64
65fn is_range_op(op: BinOp) -> bool {
66    matches!(op, BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq)
67}
68
69fn flip_op(op: BinOp) -> BinOp {
70    match op {
71        BinOp::Lt => BinOp::Gt,
72        BinOp::LtEq => BinOp::GtEq,
73        BinOp::Gt => BinOp::Lt,
74        BinOp::GtEq => BinOp::LtEq,
75        other => other,
76    }
77}
78
79fn resolve_column_name(expr: &Expr) -> Option<&str> {
80    match expr {
81        Expr::Column(name) => Some(name.as_str()),
82        Expr::QualifiedColumn { column, .. } => Some(column.as_str()),
83        _ => None,
84    }
85}
86
87fn resolve_literal(expr: &Expr) -> Option<Value> {
88    match expr {
89        Expr::Literal(v) => Some(v.clone()),
90        Expr::Parameter(n) => crate::eval::resolve_scoped_param(*n).ok(),
91        Expr::Function { .. } | Expr::Cast { .. } => {
92            let col_map = crate::eval::ColumnMap::new(&[]);
93            let ctx = crate::eval::EvalCtx::new(&col_map, &[]);
94            crate::eval::eval_expr(expr, &ctx).ok()
95        }
96        _ => None,
97    }
98}
99
100fn extract_simple_predicate(expr: &Expr, schema: &TableSchema) -> Option<SimplePredicate> {
101    match expr {
102        Expr::BinaryOp { left, op, right } if is_comparison(*op) => {
103            if let (Some(name), Some(val)) = (resolve_column_name(left), resolve_literal(right)) {
104                let col_idx = schema.column_index(name)?;
105                return Some(SimplePredicate {
106                    col_idx,
107                    op: *op,
108                    value: val,
109                });
110            }
111            if let (Some(val), Some(name)) = (resolve_literal(left), resolve_column_name(right)) {
112                let col_idx = schema.column_index(name)?;
113                return Some(SimplePredicate {
114                    col_idx,
115                    op: flip_op(*op),
116                    value: val,
117                });
118            }
119            None
120        }
121        _ => None,
122    }
123}
124
125/// Decompose BETWEEN into two range predicates for planner use.
126fn flatten_between(expr: &Expr, schema: &TableSchema, out: &mut Vec<SimplePredicate>) {
127    match expr {
128        Expr::Between {
129            expr: col_expr,
130            low,
131            high,
132            negated: false,
133        } => {
134            if let (Some(name), Some(lo), Some(hi)) = (
135                resolve_column_name(col_expr),
136                resolve_literal(low),
137                resolve_literal(high),
138            ) {
139                if let Some(col_idx) = schema.column_index(name) {
140                    out.push(SimplePredicate {
141                        col_idx,
142                        op: BinOp::GtEq,
143                        value: lo,
144                    });
145                    out.push(SimplePredicate {
146                        col_idx,
147                        op: BinOp::LtEq,
148                        value: hi,
149                    });
150                }
151            }
152        }
153        Expr::BinaryOp {
154            left,
155            op: BinOp::And,
156            right,
157        } => {
158            flatten_between(left, schema, out);
159            flatten_between(right, schema, out);
160        }
161        _ => {}
162    }
163}
164
165pub fn plan_select(schema: &TableSchema, where_clause: &Option<Expr>) -> ScanPlan {
166    plan_select_inner(schema, where_clause, false)
167}
168
169pub fn plan_select_inverted(schema: &TableSchema, where_clause: &Option<Expr>) -> ScanPlan {
170    plan_select_inner(schema, where_clause, true)
171}
172
173fn plan_select_inner(
174    schema: &TableSchema,
175    where_clause: &Option<Expr>,
176    allow_inverted: bool,
177) -> ScanPlan {
178    let where_expr = match where_clause {
179        Some(e) => e,
180        None => return ScanPlan::SeqScan,
181    };
182
183    let predicates = flatten_and(where_expr);
184    let simple: Vec<Option<SimplePredicate>> = predicates
185        .iter()
186        .map(|p| extract_simple_predicate(p, schema))
187        .collect();
188
189    if let Some(plan) = try_pk_lookup(schema, &simple) {
190        return plan;
191    }
192
193    let mut range_preds: Vec<SimplePredicate> = simple
194        .iter()
195        .filter_map(|p| {
196            let p = p.as_ref()?;
197            if is_range_op(p.op) {
198                Some(SimplePredicate {
199                    col_idx: p.col_idx,
200                    op: p.op,
201                    value: p.value.clone(),
202                })
203            } else {
204                None
205            }
206        })
207        .collect();
208    flatten_between(where_expr, schema, &mut range_preds);
209
210    if let Some(plan) = try_pk_range_scan(schema, &range_preds) {
211        return plan;
212    }
213
214    if allow_inverted {
215        if let Some(plan) = try_inverted_scan(schema, where_expr) {
216            return plan;
217        }
218    }
219
220    if let Some(plan) = try_best_index(schema, where_expr, &simple) {
221        return plan;
222    }
223
224    ScanPlan::SeqScan
225}
226
227fn try_inverted_scan(schema: &TableSchema, where_expr: &Expr) -> Option<ScanPlan> {
228    use crate::parser::BinOp as B;
229    let (col_idx, rhs_val, op) = match where_expr {
230        Expr::BinaryOp {
231            left,
232            op: B::JsonContains,
233            right,
234        } => {
235            let name = resolve_column_name(left)?;
236            let col_idx = schema.column_index(name)? as u16;
237            let rhs = resolve_literal(right)?;
238            (col_idx, rhs, B::JsonContains)
239        }
240        Expr::BinaryOp {
241            left,
242            op: B::JsonPathMatch,
243            right,
244        } => {
245            let name = resolve_column_name(left)?;
246            let col_idx = schema.column_index(name)? as u16;
247            let rhs = resolve_literal(right)?;
248            (col_idx, rhs, B::JsonPathMatch)
249        }
250        _ => return None,
251    };
252    let idx = schema.indices.iter().find(|i| {
253        matches!(i.kind, IndexKind::Inverted(_))
254            && i.columns.first().is_some_and(|&c| c == col_idx)
255            && i.predicate_expr.is_none()
256    })?;
257    let kind = match idx.kind {
258        IndexKind::Inverted(k) => k,
259        _ => return None,
260    };
261    match (kind, op) {
262        (InvertedKind::Gin(_), B::JsonContains) => {}
263        (InvertedKind::Fts { .. }, B::JsonPathMatch) => {}
264        _ => return None,
265    }
266    let probe_entries = extract_inverted_probe(&rhs_val, kind)?;
267    if probe_entries.is_empty() {
268        return None;
269    }
270    let recheck_needed = inverted_recheck_needed(kind, &rhs_val);
271    let idx_table = TableSchema::index_table_name(&schema.name, &idx.name);
272    Some(ScanPlan::InvertedScan {
273        kind,
274        idx_table,
275        column_idx: col_idx,
276        probe_entries,
277        recheck_expr: where_expr.clone(),
278        recheck_needed,
279    })
280}
281
282fn inverted_recheck_needed(kind: InvertedKind, rhs: &Value) -> bool {
283    match kind {
284        InvertedKind::Gin(_) => true,
285        InvertedKind::Fts { .. } => match rhs {
286            Value::TsQuery(bytes) => match crate::fts::TsQueryAst::decode(bytes) {
287                Ok(ast) => !fts_ast_exact_for_index(&ast),
288                Err(_) => true,
289            },
290            _ => true,
291        },
292    }
293}
294
295fn fts_ast_exact_for_index(ast: &crate::fts::TsQueryAst) -> bool {
296    use crate::fts::TsQueryAst;
297    match ast {
298        TsQueryAst::Lexeme {
299            prefix: false,
300            weight_mask: 0,
301            ..
302        } => true,
303        TsQueryAst::Lexeme { .. } => false,
304        TsQueryAst::And(l, r) => fts_ast_exact_for_index(l) && fts_ast_exact_for_index(r),
305        _ => false,
306    }
307}
308
309pub(crate) fn fts_ast_is_pure_phrase(ast: &crate::fts::TsQueryAst) -> bool {
310    use crate::fts::TsQueryAst;
311    match ast {
312        TsQueryAst::Lexeme {
313            prefix: false,
314            weight_mask: 0,
315            ..
316        } => true,
317        TsQueryAst::Phrase { left, right, .. } => {
318            fts_ast_is_pure_phrase(left) && fts_ast_is_pure_phrase(right)
319        }
320        _ => false,
321    }
322}
323
324fn extract_inverted_probe(rhs: &Value, kind: InvertedKind) -> Option<Vec<Vec<u8>>> {
325    use crate::types::GinOpsClass;
326    match kind {
327        InvertedKind::Gin(ops) => {
328            let entries = crate::json::extract_gin_entries(rhs, ops).ok()?;
329            let filtered: Vec<Vec<u8>> = match ops {
330                GinOpsClass::JsonbOps => entries
331                    .into_iter()
332                    .filter(|e| !matches!(e.first(), Some(&0x01)))
333                    .collect(),
334                GinOpsClass::JsonbPathOps => entries,
335            };
336            Some(filtered)
337        }
338        InvertedKind::Fts { .. } => match rhs {
339            Value::TsQuery(bytes) => {
340                let ast = crate::fts::TsQueryAst::decode(bytes).ok()?;
341                let required = fts_required_lexemes(&ast)?;
342                if required.is_empty() {
343                    None
344                } else {
345                    Some(required)
346                }
347            }
348            _ => None,
349        },
350    }
351}
352
353fn fts_required_lexemes(ast: &crate::fts::TsQueryAst) -> Option<Vec<Vec<u8>>> {
354    let mut out: std::collections::BTreeSet<Vec<u8>> = std::collections::BTreeSet::new();
355    let ok = collect_required(ast, &mut out);
356    if !ok || out.is_empty() {
357        None
358    } else {
359        Some(out.into_iter().collect())
360    }
361}
362
363fn collect_required(
364    ast: &crate::fts::TsQueryAst,
365    out: &mut std::collections::BTreeSet<Vec<u8>>,
366) -> bool {
367    use crate::fts::TsQueryAst;
368    match ast {
369        TsQueryAst::Lexeme { prefix, .. } if *prefix => false,
370        TsQueryAst::Lexeme { lexeme, .. } => {
371            out.insert(lexeme.clone());
372            true
373        }
374        TsQueryAst::And(l, r) => {
375            let lo = collect_required(l, out);
376            let ro = collect_required(r, out);
377            lo || ro
378        }
379        TsQueryAst::Or(..) => false,
380        TsQueryAst::Not(_) => false,
381        TsQueryAst::Phrase { left, right, .. } => {
382            let lo = collect_required(left, out);
383            let ro = collect_required(right, out);
384            lo && ro
385        }
386    }
387}
388
389fn try_pk_range_scan(schema: &TableSchema, range_preds: &[SimplePredicate]) -> Option<ScanPlan> {
390    if schema.primary_key_columns.len() != 1 {
391        return None; // Only single-column PK for now
392    }
393    let pk_col = schema.primary_key_columns[0] as usize;
394    let conds: Vec<(BinOp, Value)> = range_preds
395        .iter()
396        .filter(|p| p.col_idx == pk_col)
397        .map(|p| (p.op, p.value.clone()))
398        .collect();
399    if conds.is_empty() {
400        return None;
401    }
402    let start_key = conds
403        .iter()
404        .filter(|(op, _)| matches!(op, BinOp::GtEq | BinOp::Gt))
405        .map(|(_, v)| encode_composite_key(std::slice::from_ref(v)))
406        .min_by(|a, b| a.cmp(b))
407        .unwrap_or_default();
408    Some(ScanPlan::PkRangeScan {
409        start_key,
410        range_conds: conds,
411        num_pk_cols: 1,
412    })
413}
414
415fn try_pk_lookup(schema: &TableSchema, predicates: &[Option<SimplePredicate>]) -> Option<ScanPlan> {
416    let pk_cols = &schema.primary_key_columns;
417    let mut pk_values: Vec<Option<Value>> = vec![None; pk_cols.len()];
418
419    for pred in predicates.iter().flatten() {
420        if pred.op == BinOp::Eq {
421            if let Some(pk_pos) = pk_cols.iter().position(|&c| c == pred.col_idx as u16) {
422                pk_values[pk_pos] = Some(pred.value.clone());
423            }
424        }
425    }
426
427    if pk_values.iter().all(|v| v.is_some()) {
428        let values: Vec<Value> = pk_values.into_iter().map(|v| v.unwrap()).collect();
429        Some(ScanPlan::PkLookup { pk_values: values })
430    } else {
431        None
432    }
433}
434
435#[derive(PartialEq, Eq, PartialOrd, Ord)]
436struct IndexScore {
437    num_equality: usize,
438    has_range: bool,
439    is_unique: bool,
440}
441
442fn try_best_index(
443    schema: &TableSchema,
444    where_expr: &Expr,
445    predicates: &[Option<SimplePredicate>],
446) -> Option<ScanPlan> {
447    let mut best_score: Option<IndexScore> = None;
448    let mut best_plan: Option<ScanPlan> = None;
449
450    let conjuncts = flatten_and(where_expr);
451    for idx in &schema.indices {
452        if !partial_predicate_implied(idx, where_expr, &conjuncts) {
453            continue;
454        }
455        if let Some((score, plan)) = try_index_scan(schema, idx, predicates) {
456            if best_score.is_none() || score > *best_score.as_ref().unwrap() {
457                best_score = Some(score);
458                best_plan = Some(plan);
459            }
460        }
461    }
462
463    best_plan
464}
465
466fn partial_predicate_implied(idx: &IndexDef, where_expr: &Expr, conjuncts: &[&Expr]) -> bool {
467    let Some(pred) = idx.predicate_expr.as_ref() else {
468        return true;
469    };
470    if expr_structurally_eq(pred, where_expr) {
471        return true;
472    }
473    if conjuncts.iter().any(|c| expr_structurally_eq(pred, c)) {
474        return true;
475    }
476    if let Expr::IsNotNull(target) = pred {
477        if let Expr::Column(col) = target.as_ref() {
478            return conjuncts.iter().any(|c| conjunct_proves_not_null(c, col));
479        }
480    }
481    false
482}
483
484fn expr_structurally_eq(a: &Expr, b: &Expr) -> bool {
485    format!("{a:?}") == format!("{b:?}")
486}
487
488fn conjunct_proves_not_null(expr: &Expr, col: &str) -> bool {
489    let mentions = |e: &Expr| matches!(e, Expr::Column(n) if n.eq_ignore_ascii_case(col));
490    match expr {
491        Expr::BinaryOp {
492            left,
493            op: BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq,
494            right,
495        } => mentions(left) || mentions(right),
496        Expr::IsNotNull(inner) => mentions(inner),
497        _ => false,
498    }
499}
500
501fn try_index_scan(
502    schema: &TableSchema,
503    idx: &IndexDef,
504    predicates: &[Option<SimplePredicate>],
505) -> Option<(IndexScore, ScanPlan)> {
506    let mut used = Vec::new();
507    let mut equality_values: Vec<Value> = Vec::new();
508    let mut range_conds: Vec<(BinOp, Value)> = Vec::new();
509
510    for &col_idx in &idx.columns {
511        let mut found_eq = false;
512        for (i, pred) in predicates.iter().enumerate() {
513            if used.contains(&i) {
514                continue;
515            }
516            if let Some(sp) = pred {
517                if sp.col_idx == col_idx as usize && sp.op == BinOp::Eq {
518                    equality_values.push(sp.value.clone());
519                    used.push(i);
520                    found_eq = true;
521                    break;
522                }
523            }
524        }
525        if !found_eq {
526            for (i, pred) in predicates.iter().enumerate() {
527                if used.contains(&i) {
528                    continue;
529                }
530                if let Some(sp) = pred {
531                    if sp.col_idx == col_idx as usize && is_range_op(sp.op) {
532                        range_conds.push((sp.op, sp.value.clone()));
533                        used.push(i);
534                    }
535                }
536            }
537            break;
538        }
539    }
540
541    if equality_values.is_empty() && range_conds.is_empty() {
542        return None;
543    }
544
545    let score = IndexScore {
546        num_equality: equality_values.len(),
547        has_range: !range_conds.is_empty(),
548        is_unique: idx.unique,
549    };
550
551    let prefix = encode_composite_key(&equality_values);
552    let idx_table = TableSchema::index_table_name(&schema.name, &idx.name);
553
554    Some((
555        score,
556        ScanPlan::IndexScan {
557            index_name: idx.name.clone(),
558            idx_table,
559            prefix,
560            num_prefix_cols: equality_values.len(),
561            range_conds,
562            is_unique: idx.unique,
563            index_columns: idx.columns.clone(),
564        },
565    ))
566}
567
568pub fn describe_plan(plan: &ScanPlan, table_schema: &TableSchema) -> String {
569    match plan {
570        ScanPlan::SeqScan => String::new(),
571
572        ScanPlan::PkLookup { pk_values } => {
573            let pk_cols: Vec<&str> = table_schema
574                .primary_key_columns
575                .iter()
576                .map(|&idx| table_schema.columns[idx as usize].name.as_str())
577                .collect();
578            let conditions: Vec<String> = pk_cols
579                .iter()
580                .zip(pk_values.iter())
581                .map(|(col, val)| format!("{col} = {}", format_value(val)))
582                .collect();
583            format!("USING PRIMARY KEY ({})", conditions.join(", "))
584        }
585
586        ScanPlan::PkRangeScan { range_conds, .. } => {
587            let pk_col = &table_schema.columns[table_schema.primary_key_columns[0] as usize].name;
588            let conditions: Vec<String> = range_conds
589                .iter()
590                .map(|(op, val)| format!("{pk_col} {} {}", op_symbol(*op), format_value(val)))
591                .collect();
592            format!("USING PRIMARY KEY RANGE ({})", conditions.join(", "))
593        }
594
595        ScanPlan::IndexScan {
596            index_name,
597            num_prefix_cols,
598            range_conds,
599            index_columns,
600            ..
601        } => {
602            let mut conditions = Vec::new();
603            for &col in index_columns.iter().take(*num_prefix_cols) {
604                let col_idx = col as usize;
605                let col_name = &table_schema.columns[col_idx].name;
606                conditions.push(format!("{col_name} = ?"));
607            }
608            if !range_conds.is_empty() && *num_prefix_cols < index_columns.len() {
609                let col_idx = index_columns[*num_prefix_cols] as usize;
610                let col_name = &table_schema.columns[col_idx].name;
611                for (op, _) in range_conds {
612                    conditions.push(format!("{col_name} {} ?", op_symbol(*op)));
613                }
614            }
615            if conditions.is_empty() {
616                format!("USING INDEX {index_name}")
617            } else {
618                format!("USING INDEX {index_name} ({})", conditions.join(", "))
619            }
620        }
621
622        ScanPlan::InvertedScan { .. } => "USING INVERTED INDEX".to_string(),
623    }
624}
625
626fn format_value(val: &Value) -> String {
627    match val {
628        Value::Null => "NULL".into(),
629        Value::Integer(i) => i.to_string(),
630        Value::Real(f) => format!("{f}"),
631        Value::Text(s) => format!("'{s}'"),
632        Value::Blob(_) => "BLOB".into(),
633        Value::Boolean(b) => b.to_string(),
634        Value::Date(d) => format!("DATE '{}'", crate::datetime::format_date(*d)),
635        Value::Time(t) => format!("TIME '{}'", crate::datetime::format_time(*t)),
636        Value::Timestamp(t) => format!("TIMESTAMP '{}'", crate::datetime::format_timestamp(*t)),
637        Value::Interval {
638            months,
639            days,
640            micros,
641        } => format!(
642            "INTERVAL '{}'",
643            crate::datetime::format_interval(*months, *days, *micros)
644        ),
645        Value::Json(s) => format!("JSON '{s}'"),
646        Value::Jsonb(_) => "JSONB '<binary>'".into(),
647        Value::TsVector(_) => "TSVECTOR '<binary>'".into(),
648        Value::TsQuery(_) => "TSQUERY '<binary>'".into(),
649    }
650}
651
652fn op_symbol(op: BinOp) -> &'static str {
653    match op {
654        BinOp::Lt => "<",
655        BinOp::LtEq => "<=",
656        BinOp::Gt => ">",
657        BinOp::GtEq => ">=",
658        BinOp::Eq => "=",
659        BinOp::NotEq => "!=",
660        _ => "?",
661    }
662}
663
664#[cfg(test)]
665#[path = "planner_tests.rs"]
666mod tests;