Skip to main content

citadel_sql/
planner.rs

1//! Query planner: chooses between seq scan, PK lookup, or index scan.
2
3use crate::encoding::encode_composite_key;
4use crate::parser::{BinOp, Expr};
5use crate::types::{IndexDef, IndexKind, TableSchema, Value};
6
7#[derive(Debug, Clone)]
8pub enum ScanPlan {
9    SeqScan,
10    PkLookup {
11        pk_values: Vec<Value>,
12    },
13    PkRangeScan {
14        start_key: Vec<u8>,
15        range_conds: Vec<(BinOp, Value)>,
16        num_pk_cols: usize,
17    },
18    IndexScan {
19        index_name: String,
20        idx_table: Vec<u8>,
21        prefix: Vec<u8>,
22        num_prefix_cols: usize,
23        range_conds: Vec<(BinOp, Value)>,
24        is_unique: bool,
25        index_columns: Vec<u16>,
26    },
27    /// GIN inverted-index scan for `column @> predicate`. Returns candidate
28    /// row PKs; caller must recheck the full predicate on the heap row.
29    GinScan {
30        idx_table: Vec<u8>,
31        column_idx: u16,
32        probe_entries: Vec<Vec<u8>>,
33        recheck_expr: Expr,
34    },
35}
36
37struct SimplePredicate {
38    col_idx: usize,
39    op: BinOp,
40    value: Value,
41}
42
43fn flatten_and(expr: &Expr) -> Vec<&Expr> {
44    match expr {
45        Expr::BinaryOp {
46            left,
47            op: BinOp::And,
48            right,
49        } => {
50            let mut v = flatten_and(left);
51            v.extend(flatten_and(right));
52            v
53        }
54        _ => vec![expr],
55    }
56}
57
58fn is_comparison(op: BinOp) -> bool {
59    matches!(
60        op,
61        BinOp::Eq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
62    )
63}
64
65fn is_range_op(op: BinOp) -> bool {
66    matches!(op, BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq)
67}
68
69fn flip_op(op: BinOp) -> BinOp {
70    match op {
71        BinOp::Lt => BinOp::Gt,
72        BinOp::LtEq => BinOp::GtEq,
73        BinOp::Gt => BinOp::Lt,
74        BinOp::GtEq => BinOp::LtEq,
75        other => other,
76    }
77}
78
79fn resolve_column_name(expr: &Expr) -> Option<&str> {
80    match expr {
81        Expr::Column(name) => Some(name.as_str()),
82        Expr::QualifiedColumn { column, .. } => Some(column.as_str()),
83        _ => None,
84    }
85}
86
87fn resolve_literal(expr: &Expr) -> Option<Value> {
88    match expr {
89        Expr::Literal(v) => Some(v.clone()),
90        Expr::Parameter(n) => crate::eval::resolve_scoped_param(*n).ok(),
91        _ => None,
92    }
93}
94
95fn extract_simple_predicate(expr: &Expr, schema: &TableSchema) -> Option<SimplePredicate> {
96    match expr {
97        Expr::BinaryOp { left, op, right } if is_comparison(*op) => {
98            if let (Some(name), Some(val)) = (resolve_column_name(left), resolve_literal(right)) {
99                let col_idx = schema.column_index(name)?;
100                return Some(SimplePredicate {
101                    col_idx,
102                    op: *op,
103                    value: val,
104                });
105            }
106            if let (Some(val), Some(name)) = (resolve_literal(left), resolve_column_name(right)) {
107                let col_idx = schema.column_index(name)?;
108                return Some(SimplePredicate {
109                    col_idx,
110                    op: flip_op(*op),
111                    value: val,
112                });
113            }
114            None
115        }
116        _ => None,
117    }
118}
119
120/// Decompose BETWEEN into two range predicates for planner use.
121fn flatten_between(expr: &Expr, schema: &TableSchema, out: &mut Vec<SimplePredicate>) {
122    match expr {
123        Expr::Between {
124            expr: col_expr,
125            low,
126            high,
127            negated: false,
128        } => {
129            if let (Some(name), Some(lo), Some(hi)) = (
130                resolve_column_name(col_expr),
131                resolve_literal(low),
132                resolve_literal(high),
133            ) {
134                if let Some(col_idx) = schema.column_index(name) {
135                    out.push(SimplePredicate {
136                        col_idx,
137                        op: BinOp::GtEq,
138                        value: lo,
139                    });
140                    out.push(SimplePredicate {
141                        col_idx,
142                        op: BinOp::LtEq,
143                        value: hi,
144                    });
145                }
146            }
147        }
148        Expr::BinaryOp {
149            left,
150            op: BinOp::And,
151            right,
152        } => {
153            flatten_between(left, schema, out);
154            flatten_between(right, schema, out);
155        }
156        _ => {}
157    }
158}
159
160pub fn plan_select(schema: &TableSchema, where_clause: &Option<Expr>) -> ScanPlan {
161    plan_select_inner(schema, where_clause, false)
162}
163
164/// Same as `plan_select` but allows returning `ScanPlan::GinScan` when an
165/// inverted index covers the predicate. The caller MUST handle the GinScan
166/// variant; non-supporting paths should use `plan_select` instead.
167pub fn plan_select_with_gin(schema: &TableSchema, where_clause: &Option<Expr>) -> ScanPlan {
168    plan_select_inner(schema, where_clause, true)
169}
170
171fn plan_select_inner(
172    schema: &TableSchema,
173    where_clause: &Option<Expr>,
174    allow_gin: bool,
175) -> ScanPlan {
176    let where_expr = match where_clause {
177        Some(e) => e,
178        None => return ScanPlan::SeqScan,
179    };
180
181    let predicates = flatten_and(where_expr);
182    let simple: Vec<Option<SimplePredicate>> = predicates
183        .iter()
184        .map(|p| extract_simple_predicate(p, schema))
185        .collect();
186
187    if let Some(plan) = try_pk_lookup(schema, &simple) {
188        return plan;
189    }
190
191    let mut range_preds: Vec<SimplePredicate> = simple
192        .iter()
193        .filter_map(|p| {
194            let p = p.as_ref()?;
195            if is_range_op(p.op) {
196                Some(SimplePredicate {
197                    col_idx: p.col_idx,
198                    op: p.op,
199                    value: p.value.clone(),
200                })
201            } else {
202                None
203            }
204        })
205        .collect();
206    flatten_between(where_expr, schema, &mut range_preds);
207
208    if let Some(plan) = try_pk_range_scan(schema, &range_preds) {
209        return plan;
210    }
211
212    if allow_gin {
213        if let Some(plan) = try_gin_scan(schema, where_expr) {
214            return plan;
215        }
216    }
217
218    if let Some(plan) = try_best_index(schema, where_expr, &simple) {
219        return plan;
220    }
221
222    ScanPlan::SeqScan
223}
224
225fn try_gin_scan(schema: &TableSchema, where_expr: &Expr) -> Option<ScanPlan> {
226    use crate::parser::BinOp as B;
227    let (col_idx, rhs_val) = match where_expr {
228        Expr::BinaryOp {
229            left,
230            op: B::JsonContains,
231            right,
232        } => {
233            let name = resolve_column_name(left)?;
234            let col_idx = schema.column_index(name)? as u16;
235            let rhs = resolve_literal(right)?;
236            (col_idx, rhs)
237        }
238        _ => return None,
239    };
240    let idx = schema.indices.iter().find(|i| {
241        matches!(i.kind, IndexKind::Gin(_))
242            && i.columns.first().is_some_and(|&c| c == col_idx)
243            && i.predicate_expr.is_none()
244    })?;
245    let ops = match idx.kind {
246        IndexKind::Gin(o) => o,
247        _ => return None,
248    };
249    // Skip 0x01 key-exists entries — they match every row with the key (no selectivity).
250    let probe_entries: Vec<Vec<u8>> = crate::json::extract_gin_entries(&rhs_val, ops)
251        .ok()?
252        .into_iter()
253        .filter(|e| !matches!(e.first(), Some(&0x01)))
254        .collect();
255    if probe_entries.is_empty() {
256        return None;
257    }
258    let idx_table = TableSchema::index_table_name(&schema.name, &idx.name);
259    Some(ScanPlan::GinScan {
260        idx_table,
261        column_idx: col_idx,
262        probe_entries,
263        recheck_expr: where_expr.clone(),
264    })
265}
266
267fn try_pk_range_scan(schema: &TableSchema, range_preds: &[SimplePredicate]) -> Option<ScanPlan> {
268    if schema.primary_key_columns.len() != 1 {
269        return None; // Only single-column PK for now
270    }
271    let pk_col = schema.primary_key_columns[0] as usize;
272    let conds: Vec<(BinOp, Value)> = range_preds
273        .iter()
274        .filter(|p| p.col_idx == pk_col)
275        .map(|p| (p.op, p.value.clone()))
276        .collect();
277    if conds.is_empty() {
278        return None;
279    }
280    let start_key = conds
281        .iter()
282        .filter(|(op, _)| matches!(op, BinOp::GtEq | BinOp::Gt))
283        .map(|(_, v)| encode_composite_key(std::slice::from_ref(v)))
284        .min_by(|a, b| a.cmp(b))
285        .unwrap_or_default();
286    Some(ScanPlan::PkRangeScan {
287        start_key,
288        range_conds: conds,
289        num_pk_cols: 1,
290    })
291}
292
293fn try_pk_lookup(schema: &TableSchema, predicates: &[Option<SimplePredicate>]) -> Option<ScanPlan> {
294    let pk_cols = &schema.primary_key_columns;
295    let mut pk_values: Vec<Option<Value>> = vec![None; pk_cols.len()];
296
297    for pred in predicates.iter().flatten() {
298        if pred.op == BinOp::Eq {
299            if let Some(pk_pos) = pk_cols.iter().position(|&c| c == pred.col_idx as u16) {
300                pk_values[pk_pos] = Some(pred.value.clone());
301            }
302        }
303    }
304
305    if pk_values.iter().all(|v| v.is_some()) {
306        let values: Vec<Value> = pk_values.into_iter().map(|v| v.unwrap()).collect();
307        Some(ScanPlan::PkLookup { pk_values: values })
308    } else {
309        None
310    }
311}
312
313#[derive(PartialEq, Eq, PartialOrd, Ord)]
314struct IndexScore {
315    num_equality: usize,
316    has_range: bool,
317    is_unique: bool,
318}
319
320fn try_best_index(
321    schema: &TableSchema,
322    where_expr: &Expr,
323    predicates: &[Option<SimplePredicate>],
324) -> Option<ScanPlan> {
325    let mut best_score: Option<IndexScore> = None;
326    let mut best_plan: Option<ScanPlan> = None;
327
328    let conjuncts = flatten_and(where_expr);
329    for idx in &schema.indices {
330        if !partial_predicate_implied(idx, where_expr, &conjuncts) {
331            continue;
332        }
333        if let Some((score, plan)) = try_index_scan(schema, idx, predicates) {
334            if best_score.is_none() || score > *best_score.as_ref().unwrap() {
335                best_score = Some(score);
336                best_plan = Some(plan);
337            }
338        }
339    }
340
341    best_plan
342}
343
344fn partial_predicate_implied(idx: &IndexDef, where_expr: &Expr, conjuncts: &[&Expr]) -> bool {
345    let Some(pred) = idx.predicate_expr.as_ref() else {
346        return true;
347    };
348    if expr_structurally_eq(pred, where_expr) {
349        return true;
350    }
351    if conjuncts.iter().any(|c| expr_structurally_eq(pred, c)) {
352        return true;
353    }
354    if let Expr::IsNotNull(target) = pred {
355        if let Expr::Column(col) = target.as_ref() {
356            return conjuncts.iter().any(|c| conjunct_proves_not_null(c, col));
357        }
358    }
359    false
360}
361
362fn expr_structurally_eq(a: &Expr, b: &Expr) -> bool {
363    format!("{a:?}") == format!("{b:?}")
364}
365
366fn conjunct_proves_not_null(expr: &Expr, col: &str) -> bool {
367    let mentions = |e: &Expr| matches!(e, Expr::Column(n) if n.eq_ignore_ascii_case(col));
368    match expr {
369        Expr::BinaryOp {
370            left,
371            op: BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq,
372            right,
373        } => mentions(left) || mentions(right),
374        Expr::IsNotNull(inner) => mentions(inner),
375        _ => false,
376    }
377}
378
379fn try_index_scan(
380    schema: &TableSchema,
381    idx: &IndexDef,
382    predicates: &[Option<SimplePredicate>],
383) -> Option<(IndexScore, ScanPlan)> {
384    let mut used = Vec::new();
385    let mut equality_values: Vec<Value> = Vec::new();
386    let mut range_conds: Vec<(BinOp, Value)> = Vec::new();
387
388    for &col_idx in &idx.columns {
389        let mut found_eq = false;
390        for (i, pred) in predicates.iter().enumerate() {
391            if used.contains(&i) {
392                continue;
393            }
394            if let Some(sp) = pred {
395                if sp.col_idx == col_idx as usize && sp.op == BinOp::Eq {
396                    equality_values.push(sp.value.clone());
397                    used.push(i);
398                    found_eq = true;
399                    break;
400                }
401            }
402        }
403        if !found_eq {
404            for (i, pred) in predicates.iter().enumerate() {
405                if used.contains(&i) {
406                    continue;
407                }
408                if let Some(sp) = pred {
409                    if sp.col_idx == col_idx as usize && is_range_op(sp.op) {
410                        range_conds.push((sp.op, sp.value.clone()));
411                        used.push(i);
412                    }
413                }
414            }
415            break;
416        }
417    }
418
419    if equality_values.is_empty() && range_conds.is_empty() {
420        return None;
421    }
422
423    let score = IndexScore {
424        num_equality: equality_values.len(),
425        has_range: !range_conds.is_empty(),
426        is_unique: idx.unique,
427    };
428
429    let prefix = encode_composite_key(&equality_values);
430    let idx_table = TableSchema::index_table_name(&schema.name, &idx.name);
431
432    Some((
433        score,
434        ScanPlan::IndexScan {
435            index_name: idx.name.clone(),
436            idx_table,
437            prefix,
438            num_prefix_cols: equality_values.len(),
439            range_conds,
440            is_unique: idx.unique,
441            index_columns: idx.columns.clone(),
442        },
443    ))
444}
445
446pub fn describe_plan(plan: &ScanPlan, table_schema: &TableSchema) -> String {
447    match plan {
448        ScanPlan::SeqScan => String::new(),
449
450        ScanPlan::PkLookup { pk_values } => {
451            let pk_cols: Vec<&str> = table_schema
452                .primary_key_columns
453                .iter()
454                .map(|&idx| table_schema.columns[idx as usize].name.as_str())
455                .collect();
456            let conditions: Vec<String> = pk_cols
457                .iter()
458                .zip(pk_values.iter())
459                .map(|(col, val)| format!("{col} = {}", format_value(val)))
460                .collect();
461            format!("USING PRIMARY KEY ({})", conditions.join(", "))
462        }
463
464        ScanPlan::PkRangeScan { range_conds, .. } => {
465            let pk_col = &table_schema.columns[table_schema.primary_key_columns[0] as usize].name;
466            let conditions: Vec<String> = range_conds
467                .iter()
468                .map(|(op, val)| format!("{pk_col} {} {}", op_symbol(*op), format_value(val)))
469                .collect();
470            format!("USING PRIMARY KEY RANGE ({})", conditions.join(", "))
471        }
472
473        ScanPlan::IndexScan {
474            index_name,
475            num_prefix_cols,
476            range_conds,
477            index_columns,
478            ..
479        } => {
480            let mut conditions = Vec::new();
481            for &col in index_columns.iter().take(*num_prefix_cols) {
482                let col_idx = col as usize;
483                let col_name = &table_schema.columns[col_idx].name;
484                conditions.push(format!("{col_name} = ?"));
485            }
486            if !range_conds.is_empty() && *num_prefix_cols < index_columns.len() {
487                let col_idx = index_columns[*num_prefix_cols] as usize;
488                let col_name = &table_schema.columns[col_idx].name;
489                for (op, _) in range_conds {
490                    conditions.push(format!("{col_name} {} ?", op_symbol(*op)));
491                }
492            }
493            if conditions.is_empty() {
494                format!("USING INDEX {index_name}")
495            } else {
496                format!("USING INDEX {index_name} ({})", conditions.join(", "))
497            }
498        }
499
500        ScanPlan::GinScan { .. } => "USING GIN INDEX".to_string(),
501    }
502}
503
504fn format_value(val: &Value) -> String {
505    match val {
506        Value::Null => "NULL".into(),
507        Value::Integer(i) => i.to_string(),
508        Value::Real(f) => format!("{f}"),
509        Value::Text(s) => format!("'{s}'"),
510        Value::Blob(_) => "BLOB".into(),
511        Value::Boolean(b) => b.to_string(),
512        Value::Date(d) => format!("DATE '{}'", crate::datetime::format_date(*d)),
513        Value::Time(t) => format!("TIME '{}'", crate::datetime::format_time(*t)),
514        Value::Timestamp(t) => format!("TIMESTAMP '{}'", crate::datetime::format_timestamp(*t)),
515        Value::Interval {
516            months,
517            days,
518            micros,
519        } => format!(
520            "INTERVAL '{}'",
521            crate::datetime::format_interval(*months, *days, *micros)
522        ),
523        Value::Json(s) => format!("JSON '{s}'"),
524        Value::Jsonb(_) => "JSONB '<binary>'".into(),
525    }
526}
527
528fn op_symbol(op: BinOp) -> &'static str {
529    match op {
530        BinOp::Lt => "<",
531        BinOp::LtEq => "<=",
532        BinOp::Gt => ">",
533        BinOp::GtEq => ">=",
534        BinOp::Eq => "=",
535        BinOp::NotEq => "!=",
536        _ => "?",
537    }
538}
539
540#[cfg(test)]
541#[path = "planner_tests.rs"]
542mod tests;