cynos_query/optimizer/
index_selection.rs

1//! Index selection optimization pass.
2
3use crate::ast::{BinaryOp, Expr};
4use crate::context::{ExecutionContext, IndexInfo};
5use crate::optimizer::OptimizerPass;
6use crate::planner::LogicalPlan;
7use alloc::boxed::Box;
8use alloc::format;
9use alloc::string::String;
10use alloc::vec::Vec;
11use cynos_core::Value;
12
13/// Index selection optimization.
14///
15/// Analyzes predicates and selects appropriate indexes for scans.
16/// This pass identifies Filter(Scan) patterns where the filter predicate
17/// can be satisfied using an index, converting them to IndexScan operations.
18///
19/// Supports:
20/// - Point lookups: `col = value` → IndexGet
21/// - Range scans: `col > value`, `col < value`, etc. → IndexScan
22/// - IN queries: `col IN (v1, v2, v3)` → IndexInGet
23/// - JSONB queries with GIN indexes → GinIndexScan
24pub struct IndexSelection {
25    /// Execution context with table statistics and index information.
26    context: Option<ExecutionContext>,
27}
28
29impl Default for IndexSelection {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35impl IndexSelection {
36    /// Creates a new index selection pass without context.
37    pub fn new() -> Self {
38        Self { context: None }
39    }
40
41    /// Creates a new index selection pass with execution context.
42    pub fn with_context(context: ExecutionContext) -> Self {
43        Self {
44            context: Some(context),
45        }
46    }
47}
48
49impl OptimizerPass for IndexSelection {
50    fn optimize(&self, plan: LogicalPlan) -> LogicalPlan {
51        self.select_indexes(plan)
52    }
53
54    fn name(&self) -> &'static str {
55        "index_selection"
56    }
57}
58
59/// Information extracted from a predicate for index selection.
60#[derive(Debug, Clone)]
61pub struct PredicateInfo {
62    /// Table name referenced by the predicate.
63    pub table: String,
64    /// Column name referenced by the predicate.
65    pub column: String,
66    /// The comparison operator.
67    pub op: BinaryOp,
68    /// The literal value being compared (if any).
69    pub value: Option<Value>,
70    /// Whether this is a range predicate (can use index range scan).
71    pub is_range: bool,
72    /// Whether this is a point lookup (can use index get).
73    pub is_point_lookup: bool,
74}
75
76/// Information extracted from an IN predicate for index selection.
77#[derive(Debug, Clone)]
78#[allow(dead_code)]
79pub struct InPredicateInfo {
80    /// Table name referenced by the predicate.
81    pub table: String,
82    /// Column name referenced by the predicate.
83    pub column: String,
84    /// The literal values in the IN list.
85    pub values: Vec<Value>,
86}
87
88/// Information about a GIN-indexable predicate.
89#[derive(Debug, Clone)]
90struct GinPredicateInfo {
91    index: String,
92    column: String,
93    column_index: usize,
94    path: String,
95    value: Option<Value>,
96    query_type: String,
97}
98
99impl IndexSelection {
100    fn select_indexes(&self, plan: LogicalPlan) -> LogicalPlan {
101        match plan {
102            // Look for Filter on Scan patterns that could use an index
103            LogicalPlan::Filter { input, predicate } => {
104                let optimized_input = self.select_indexes(*input);
105
106                // Try to convert Filter(Scan) to IndexScan
107                if let LogicalPlan::Scan { ref table } = optimized_input {
108                    if let Some(index_plan) =
109                        self.try_use_index(table, &predicate, optimized_input.clone())
110                    {
111                        return index_plan;
112                    }
113                }
114
115                LogicalPlan::Filter {
116                    input: Box::new(optimized_input),
117                    predicate,
118                }
119            }
120
121            LogicalPlan::Project { input, columns } => LogicalPlan::Project {
122                input: Box::new(self.select_indexes(*input)),
123                columns,
124            },
125
126            LogicalPlan::Join {
127                left,
128                right,
129                condition,
130                join_type,
131            } => LogicalPlan::Join {
132                left: Box::new(self.select_indexes(*left)),
133                right: Box::new(self.select_indexes(*right)),
134                condition,
135                join_type,
136            },
137
138            LogicalPlan::Aggregate {
139                input,
140                group_by,
141                aggregates,
142            } => LogicalPlan::Aggregate {
143                input: Box::new(self.select_indexes(*input)),
144                group_by,
145                aggregates,
146            },
147
148            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
149                input: Box::new(self.select_indexes(*input)),
150                order_by,
151            },
152
153            LogicalPlan::Limit {
154                input,
155                limit,
156                offset,
157            } => LogicalPlan::Limit {
158                input: Box::new(self.select_indexes(*input)),
159                limit,
160                offset,
161            },
162
163            LogicalPlan::CrossProduct { left, right } => LogicalPlan::CrossProduct {
164                left: Box::new(self.select_indexes(*left)),
165                right: Box::new(self.select_indexes(*right)),
166            },
167
168            LogicalPlan::Union { left, right, all } => LogicalPlan::Union {
169                left: Box::new(self.select_indexes(*left)),
170                right: Box::new(self.select_indexes(*right)),
171                all,
172            },
173
174            // Leaf nodes
175            LogicalPlan::Scan { .. }
176            | LogicalPlan::IndexScan { .. }
177            | LogicalPlan::IndexGet { .. }
178            | LogicalPlan::IndexInGet { .. }
179            | LogicalPlan::GinIndexScan { .. }
180            | LogicalPlan::GinIndexScanMulti { .. }
181            | LogicalPlan::Empty => plan,
182        }
183    }
184
185    /// Attempts to use an index for the given predicate.
186    fn try_use_index(
187        &self,
188        table: &str,
189        predicate: &Expr,
190        _original: LogicalPlan,
191    ) -> Option<LogicalPlan> {
192        // Check if we have context with index information
193        let ctx = self.context.as_ref()?;
194
195        // First, try to handle IN predicates
196        if let Some(in_info) = self.analyze_in_predicate(predicate) {
197            // Find an index that covers the IN column
198            if let Some(index) = ctx.find_index(table, &[in_info.column.as_str()]) {
199                // Use IndexInGet for IN queries with indexed columns
200                return Some(LogicalPlan::IndexInGet {
201                    table: table.into(),
202                    index: index.name.clone(),
203                    keys: in_info.values,
204                });
205            }
206        }
207
208        // Try to handle BETWEEN predicates
209        if let Some(between_plan) = self.try_use_between_index(table, predicate, ctx) {
210            return Some(between_plan);
211        }
212
213        // Then, try to use GIN index for JSONB function queries
214        if let Some(gin_plan) = self.try_use_gin_index(table, predicate, ctx) {
215            return Some(gin_plan);
216        }
217
218        // Try to handle AND compound predicates for B-Tree indexes
219        if let Some(btree_plan) = self.try_use_btree_with_and(table, predicate, ctx) {
220            return Some(btree_plan);
221        }
222
223        // Extract predicate information for B-Tree index (simple predicate)
224        let pred_info = self.analyze_predicate(predicate)?;
225
226        // Find an index that covers the predicate column
227        let index = ctx.find_index(table, &[pred_info.column.as_str()])?;
228
229        // Skip GIN indexes for regular predicates
230        if index.is_gin() {
231            return None;
232        }
233
234        // Decide whether to use IndexScan or IndexGet based on predicate type
235        if pred_info.is_point_lookup {
236            // Use IndexGet for equality lookups
237            if let Some(value) = pred_info.value {
238                return Some(LogicalPlan::IndexGet {
239                    table: table.into(),
240                    index: index.name.clone(),
241                    key: value,
242                });
243            }
244        } else if pred_info.is_range {
245            // Use IndexScan for range predicates
246            let (range_start, range_end, include_start, include_end) =
247                self.compute_range(&pred_info);
248            return Some(LogicalPlan::IndexScan {
249                table: table.into(),
250                index: index.name.clone(),
251                range_start,
252                range_end,
253                include_start,
254                include_end,
255            });
256        }
257
258        None
259    }
260
261    /// Attempts to use an index for BETWEEN predicates.
262    fn try_use_between_index(
263        &self,
264        table: &str,
265        predicate: &Expr,
266        ctx: &ExecutionContext,
267    ) -> Option<LogicalPlan> {
268        if let Expr::Between { expr, low, high } = predicate {
269            // Check if expr is a column reference
270            if let Expr::Column(col) = expr.as_ref() {
271                // Check if low and high are literals
272                if let (Expr::Literal(low_val), Expr::Literal(high_val)) = (low.as_ref(), high.as_ref()) {
273                    // Find an index for this column
274                    if let Some(index) = ctx.find_index(table, &[col.column.as_str()]) {
275                        // Skip GIN indexes
276                        if index.is_gin() {
277                            return None;
278                        }
279                        // Use IndexScan for BETWEEN
280                        return Some(LogicalPlan::IndexScan {
281                            table: table.into(),
282                            index: index.name.clone(),
283                            range_start: Some(low_val.clone()),
284                            range_end: Some(high_val.clone()),
285                            include_start: true,
286                            include_end: true,
287                        });
288                    }
289                }
290            }
291        }
292        None
293    }
294
295    /// Attempts to use a B-Tree index for AND compound predicates.
296    /// Extracts sub-predicates from AND, finds the best indexable predicate,
297    /// converts to IndexScan/IndexGet, and keeps remaining predicates as Filter.
298    fn try_use_btree_with_and(
299        &self,
300        table: &str,
301        predicate: &Expr,
302        ctx: &ExecutionContext,
303    ) -> Option<LogicalPlan> {
304        // Only handle AND compound predicates
305        if !matches!(predicate, Expr::BinaryOp { op: BinaryOp::And, .. }) {
306            return None;
307        }
308
309        // Extract all sub-predicates from AND
310        let (indexable, remaining) = self.extract_btree_and_remaining_predicates(predicate, table, ctx);
311
312        if indexable.is_empty() {
313            return None;
314        }
315
316        // Find the best indexable predicate (prioritize point lookups over range scans)
317        let (best_pred, best_info, best_index) = self.select_best_btree_predicate(&indexable)?;
318
319        // Build the index plan
320        let index_plan = if best_info.is_point_lookup {
321            LogicalPlan::IndexGet {
322                table: table.into(),
323                index: best_index.name.clone(),
324                key: best_info.value.clone()?,
325            }
326        } else {
327            let (range_start, range_end, include_start, include_end) =
328                self.compute_range(&best_info);
329            LogicalPlan::IndexScan {
330                table: table.into(),
331                index: best_index.name.clone(),
332                range_start,
333                range_end,
334                include_start,
335                include_end,
336            }
337        };
338
339        // Collect remaining predicates (non-indexable + indexable but not selected)
340        let mut all_remaining: Vec<Expr> = remaining;
341        for (pred, _, _) in &indexable {
342            if !Self::expr_eq(pred, &best_pred) {
343                all_remaining.push(pred.clone());
344            }
345        }
346
347        // If there are remaining predicates, wrap with Filter
348        if all_remaining.is_empty() {
349            Some(index_plan)
350        } else {
351            let combined_predicate = all_remaining
352                .into_iter()
353                .reduce(|acc, pred| Expr::and(acc, pred))
354                .unwrap();
355
356            Some(LogicalPlan::Filter {
357                input: Box::new(index_plan),
358                predicate: combined_predicate,
359            })
360        }
361    }
362
363    /// Extracts B-Tree indexable predicates and remaining predicates from an AND expression.
364    fn extract_btree_and_remaining_predicates(
365        &self,
366        predicate: &Expr,
367        table: &str,
368        ctx: &ExecutionContext,
369    ) -> (Vec<(Expr, PredicateInfo, IndexInfo)>, Vec<Expr>) {
370        let mut indexable = Vec::new();
371        let mut remaining = Vec::new();
372        self.extract_btree_and_remaining_recursive(predicate, table, ctx, &mut indexable, &mut remaining);
373        (indexable, remaining)
374    }
375
376    fn extract_btree_and_remaining_recursive(
377        &self,
378        predicate: &Expr,
379        table: &str,
380        ctx: &ExecutionContext,
381        indexable: &mut Vec<(Expr, PredicateInfo, IndexInfo)>,
382        remaining: &mut Vec<Expr>,
383    ) {
384        match predicate {
385            Expr::BinaryOp {
386                left,
387                op: BinaryOp::And,
388                right,
389            } => {
390                self.extract_btree_and_remaining_recursive(left, table, ctx, indexable, remaining);
391                self.extract_btree_and_remaining_recursive(right, table, ctx, indexable, remaining);
392            }
393            _ => {
394                // Try to analyze as a simple predicate
395                if let Some(pred_info) = self.analyze_predicate(predicate) {
396                    // Check if there's a B-Tree index for this column
397                    if let Some(index) = ctx.find_index(table, &[pred_info.column.as_str()]) {
398                        if !index.is_gin() && (pred_info.is_point_lookup || pred_info.is_range) {
399                            indexable.push((predicate.clone(), pred_info, index.clone()));
400                            return;
401                        }
402                    }
403                }
404                // Not indexable - add to remaining
405                remaining.push(predicate.clone());
406            }
407        }
408    }
409
410    /// Selects the best B-Tree predicate for index usage.
411    /// Priority: point lookup (IndexGet) > range scan (IndexScan)
412    fn select_best_btree_predicate(
413        &self,
414        indexable: &[(Expr, PredicateInfo, IndexInfo)],
415    ) -> Option<(Expr, PredicateInfo, IndexInfo)> {
416        // First, try to find a point lookup (equality)
417        for (pred, info, index) in indexable {
418            if info.is_point_lookup && info.value.is_some() {
419                return Some((pred.clone(), info.clone(), index.clone()));
420            }
421        }
422
423        // Fall back to range scan
424        for (pred, info, index) in indexable {
425            if info.is_range && info.value.is_some() {
426                return Some((pred.clone(), info.clone(), index.clone()));
427            }
428        }
429
430        None
431    }
432
433    /// Simple expression equality check for filtering out the selected predicate.
434    fn expr_eq(a: &Expr, b: &Expr) -> bool {
435        // Use debug representation for simple equality check
436        format!("{:?}", a) == format!("{:?}", b)
437    }
438
439    /// Analyzes an IN predicate to extract index-relevant information.
440    fn analyze_in_predicate(&self, predicate: &Expr) -> Option<InPredicateInfo> {
441        match predicate {
442            Expr::In { expr, list } => {
443                // Check if expr is a column reference
444                if let Expr::Column(col) = expr.as_ref() {
445                    // Extract all literal values from the list
446                    let values: Vec<Value> = list
447                        .iter()
448                        .filter_map(|item| {
449                            if let Expr::Literal(val) = item {
450                                Some(val.clone())
451                            } else {
452                                None
453                            }
454                        })
455                        .collect();
456
457                    // Only use index if all values are literals
458                    if values.len() == list.len() && !values.is_empty() {
459                        return Some(InPredicateInfo {
460                            table: col.table.clone(),
461                            column: col.column.clone(),
462                            values,
463                        });
464                    }
465                }
466                None
467            }
468            _ => None,
469        }
470    }
471
472
473    /// Attempts to use a GIN index for JSONB function queries.
474    /// Supports both single predicates and AND combinations of multiple predicates.
475    ///
476    /// Returns a tuple of (GIN plan, remaining predicates that couldn't be handled by GIN).
477    /// The remaining predicates should be wrapped as a Filter around the GIN plan.
478    fn try_use_gin_index(
479        &self,
480        table: &str,
481        predicate: &Expr,
482        ctx: &ExecutionContext,
483    ) -> Option<LogicalPlan> {
484        // Extract GIN predicates and collect remaining non-GIN predicates
485        let (gin_predicates, remaining_predicates) = self.extract_gin_and_remaining_predicates(predicate, table, ctx);
486
487        if gin_predicates.is_empty() {
488            return None;
489        }
490
491        // Build the GIN plan
492        let gin_plan = if gin_predicates.len() > 1 {
493            // Multiple GIN predicates - try to use GinIndexScanMulti for better performance
494            let first_index = gin_predicates[0].index.clone();
495            let first_column = gin_predicates[0].column.clone();
496            let all_same_index = gin_predicates.iter().all(|p| p.index == first_index && p.column == first_column);
497            let all_have_values = gin_predicates.iter().all(|p| p.value.is_some());
498
499            if all_same_index && all_have_values {
500                let pairs: Vec<(String, Value)> = gin_predicates
501                    .into_iter()
502                    .filter_map(|p| p.value.map(|v| (p.path, v)))
503                    .collect();
504
505                LogicalPlan::GinIndexScanMulti {
506                    table: table.into(),
507                    index: first_index,
508                    column: first_column,
509                    pairs,
510                }
511            } else {
512                // Fall back to single predicate if multi-predicate optimization fails
513                let info = gin_predicates.into_iter().next()?;
514                LogicalPlan::GinIndexScan {
515                    table: table.into(),
516                    index: info.index,
517                    column: info.column,
518                    column_index: info.column_index,
519                    path: info.path,
520                    value: info.value,
521                    query_type: info.query_type,
522                }
523            }
524        } else {
525            // Single GIN predicate
526            let info = gin_predicates.into_iter().next()?;
527            LogicalPlan::GinIndexScan {
528                table: table.into(),
529                index: info.index,
530                column: info.column,
531                column_index: info.column_index,
532                path: info.path,
533                value: info.value,
534                query_type: info.query_type,
535            }
536        };
537
538        // If there are remaining predicates, wrap the GIN plan with a Filter
539        if remaining_predicates.is_empty() {
540            Some(gin_plan)
541        } else {
542            // Combine remaining predicates with AND
543            let combined_predicate = remaining_predicates
544                .into_iter()
545                .reduce(|acc, pred| Expr::and(acc, pred))
546                .unwrap();
547
548            Some(LogicalPlan::Filter {
549                input: Box::new(gin_plan),
550                predicate: combined_predicate,
551            })
552        }
553    }
554
555    /// Extracts GIN-indexable predicates and remaining non-GIN predicates from an expression.
556    /// Returns (gin_predicates, remaining_predicates).
557    fn extract_gin_and_remaining_predicates(
558        &self,
559        predicate: &Expr,
560        table: &str,
561        ctx: &ExecutionContext,
562    ) -> (Vec<GinPredicateInfo>, Vec<Expr>) {
563        let mut gin_predicates = Vec::new();
564        let mut remaining_predicates = Vec::new();
565        self.extract_gin_and_remaining_recursive(predicate, table, ctx, &mut gin_predicates, &mut remaining_predicates);
566        (gin_predicates, remaining_predicates)
567    }
568
569    fn extract_gin_and_remaining_recursive(
570        &self,
571        predicate: &Expr,
572        table: &str,
573        ctx: &ExecutionContext,
574        gin_result: &mut Vec<GinPredicateInfo>,
575        remaining_result: &mut Vec<Expr>,
576    ) {
577        match predicate {
578            // Handle AND combinations - recursively process both sides
579            Expr::BinaryOp {
580                left,
581                op: BinaryOp::And,
582                right,
583            } => {
584                self.extract_gin_and_remaining_recursive(left, table, ctx, gin_result, remaining_result);
585                self.extract_gin_and_remaining_recursive(right, table, ctx, gin_result, remaining_result);
586            }
587            // Handle JSONB function calls - these can potentially use GIN index
588            Expr::Function { name, args } => {
589                if let Some(info) = self.analyze_gin_function(name, args, table, ctx) {
590                    gin_result.push(info);
591                } else {
592                    // Function that doesn't match GIN pattern - keep as remaining
593                    remaining_result.push(predicate.clone());
594                }
595            }
596            // All other predicates (BinaryOp with Eq/Lt/Gt, etc.) are non-GIN
597            _ => {
598                remaining_result.push(predicate.clone());
599            }
600        }
601    }
602
603    /// Analyzes a JSONB function call to extract GIN predicate information.
604    fn analyze_gin_function(
605        &self,
606        name: &str,
607        args: &[Expr],
608        table: &str,
609        ctx: &ExecutionContext,
610    ) -> Option<GinPredicateInfo> {
611        let func_name = name.to_uppercase();
612        match func_name.as_str() {
613            "JSONB_PATH_EQ" if args.len() >= 3 => {
614                if let Expr::Column(col) = &args[0] {
615                    let column_name = &col.column;
616                    let column_index = col.index;
617                    if let Some(index) = ctx.find_gin_index(table, column_name) {
618                        let path = self.extract_string_literal(&args[1])?;
619                        let value = self.extract_literal(&args[2]);
620                        return Some(GinPredicateInfo {
621                            index: index.name.clone(),
622                            column: column_name.clone(),
623                            column_index,
624                            path,
625                            value,
626                            query_type: "eq".into(),
627                        });
628                    }
629                }
630            }
631            "JSONB_CONTAINS" if args.len() >= 2 => {
632                if let Expr::Column(col) = &args[0] {
633                    let column_name = &col.column;
634                    let column_index = col.index;
635                    if let Some(index) = ctx.find_gin_index(table, column_name) {
636                        let path = self.extract_string_literal(&args[1])?;
637                        return Some(GinPredicateInfo {
638                            index: index.name.clone(),
639                            column: column_name.clone(),
640                            column_index,
641                            path,
642                            value: None,
643                            query_type: "contains".into(),
644                        });
645                    }
646                }
647            }
648            "JSONB_EXISTS" if args.len() >= 2 => {
649                if let Expr::Column(col) = &args[0] {
650                    let column_name = &col.column;
651                    let column_index = col.index;
652                    if let Some(index) = ctx.find_gin_index(table, column_name) {
653                        let path = self.extract_string_literal(&args[1])?;
654                        return Some(GinPredicateInfo {
655                            index: index.name.clone(),
656                            column: column_name.clone(),
657                            column_index,
658                            path,
659                            value: None,
660                            query_type: "exists".into(),
661                        });
662                    }
663                }
664            }
665            _ => {}
666        }
667        None
668    }
669
670    /// Extracts a string literal from an expression.
671    fn extract_string_literal(&self, expr: &Expr) -> Option<String> {
672        if let Expr::Literal(Value::String(s)) = expr {
673            Some(s.clone())
674        } else {
675            None
676        }
677    }
678
679    /// Extracts a literal value from an expression.
680    fn extract_literal(&self, expr: &Expr) -> Option<Value> {
681        if let Expr::Literal(v) = expr {
682            Some(v.clone())
683        } else {
684            None
685        }
686    }
687
688    /// Analyzes a predicate to extract index-relevant information.
689    fn analyze_predicate(&self, predicate: &Expr) -> Option<PredicateInfo> {
690        match predicate {
691            Expr::BinaryOp { left, op, right } => {
692                // Check for column = literal pattern
693                if let (Expr::Column(col), Expr::Literal(val)) = (left.as_ref(), right.as_ref()) {
694                    let is_point_lookup = *op == BinaryOp::Eq;
695                    let is_range = matches!(
696                        op,
697                        BinaryOp::Lt | BinaryOp::Le | BinaryOp::Gt | BinaryOp::Ge
698                    );
699
700                    return Some(PredicateInfo {
701                        table: col.table.clone(),
702                        column: col.column.clone(),
703                        op: *op,
704                        value: Some(val.clone()),
705                        is_range,
706                        is_point_lookup,
707                    });
708                }
709
710                // Check for literal = column pattern (reversed)
711                if let (Expr::Literal(val), Expr::Column(col)) = (left.as_ref(), right.as_ref()) {
712                    let reversed_op = match op {
713                        BinaryOp::Lt => BinaryOp::Gt,
714                        BinaryOp::Le => BinaryOp::Ge,
715                        BinaryOp::Gt => BinaryOp::Lt,
716                        BinaryOp::Ge => BinaryOp::Le,
717                        other => *other,
718                    };
719                    let is_point_lookup = *op == BinaryOp::Eq;
720                    let is_range = matches!(
721                        op,
722                        BinaryOp::Lt | BinaryOp::Le | BinaryOp::Gt | BinaryOp::Ge
723                    );
724
725                    return Some(PredicateInfo {
726                        table: col.table.clone(),
727                        column: col.column.clone(),
728                        op: reversed_op,
729                        value: Some(val.clone()),
730                        is_range,
731                        is_point_lookup,
732                    });
733                }
734
735                None
736            }
737            _ => None,
738        }
739    }
740
741    /// Computes the range bounds for an index scan.
742    fn compute_range(
743        &self,
744        pred_info: &PredicateInfo,
745    ) -> (Option<Value>, Option<Value>, bool, bool) {
746        let value = pred_info.value.clone();
747
748        match pred_info.op {
749            BinaryOp::Eq => (value.clone(), value, true, true),
750            BinaryOp::Lt => (None, value, true, false),
751            BinaryOp::Le => (None, value, true, true),
752            BinaryOp::Gt => (value, None, false, true),
753            BinaryOp::Ge => (value, None, true, true),
754            _ => (None, None, true, true),
755        }
756    }
757
758    /// Extracts all simple predicates from a compound predicate.
759    pub fn extract_predicates(&self, predicate: &Expr) -> Vec<PredicateInfo> {
760        let mut result = Vec::new();
761        self.extract_predicates_recursive(predicate, &mut result);
762        result
763    }
764
765    fn extract_predicates_recursive(&self, predicate: &Expr, result: &mut Vec<PredicateInfo>) {
766        match predicate {
767            Expr::BinaryOp {
768                left,
769                op: BinaryOp::And,
770                right,
771            } => {
772                self.extract_predicates_recursive(left, result);
773                self.extract_predicates_recursive(right, result);
774            }
775            _ => {
776                if let Some(info) = self.analyze_predicate(predicate) {
777                    result.push(info);
778                }
779            }
780        }
781    }
782}
783
784#[cfg(test)]
785mod tests {
786    use super::*;
787    use crate::context::{IndexInfo, TableStats};
788
789    #[test]
790    fn test_index_selection_basic() {
791        let pass = IndexSelection::new();
792
793        let plan = LogicalPlan::filter(
794            LogicalPlan::scan("users"),
795            Expr::eq(Expr::column("users", "id", 0), Expr::literal(1i64)),
796        );
797
798        let optimized = pass.optimize(plan);
799        // Without context, should remain unchanged
800        assert!(matches!(optimized, LogicalPlan::Filter { .. }));
801    }
802
803    #[test]
804    fn test_index_selection_with_context() {
805        let mut ctx = ExecutionContext::new();
806        ctx.register_table(
807            "users",
808            TableStats {
809                row_count: 1000,
810                is_sorted: false,
811                indexes: alloc::vec![IndexInfo::new(
812                    "idx_id",
813                    alloc::vec!["id".into()],
814                    true
815                )],
816            },
817        );
818
819        let pass = IndexSelection::with_context(ctx);
820
821        let plan = LogicalPlan::filter(
822            LogicalPlan::scan("users"),
823            Expr::eq(Expr::column("users", "id", 0), Expr::literal(1i64)),
824        );
825
826        let optimized = pass.optimize(plan);
827        // With context and matching index, should convert to IndexGet
828        assert!(matches!(optimized, LogicalPlan::IndexGet { .. }));
829    }
830
831    #[test]
832    fn test_index_selection_range_scan() {
833        let mut ctx = ExecutionContext::new();
834        ctx.register_table(
835            "orders",
836            TableStats {
837                row_count: 10000,
838                is_sorted: false,
839                indexes: alloc::vec![IndexInfo::new(
840                    "idx_amount",
841                    alloc::vec!["amount".into()],
842                    false
843                )],
844            },
845        );
846
847        let pass = IndexSelection::with_context(ctx);
848
849        let plan = LogicalPlan::filter(
850            LogicalPlan::scan("orders"),
851            Expr::gt(Expr::column("orders", "amount", 0), Expr::literal(100i64)),
852        );
853
854        let optimized = pass.optimize(plan);
855        // Should convert to IndexScan for range predicate
856        assert!(matches!(optimized, LogicalPlan::IndexScan { .. }));
857    }
858
859    #[test]
860    fn test_analyze_predicate() {
861        let pass = IndexSelection::new();
862
863        let pred = Expr::eq(Expr::column("users", "id", 0), Expr::literal(42i64));
864        let info = pass.analyze_predicate(&pred).unwrap();
865
866        assert_eq!(info.table, "users");
867        assert_eq!(info.column, "id");
868        assert!(info.is_point_lookup);
869        assert!(!info.is_range);
870    }
871
872    #[test]
873    fn test_analyze_range_predicate() {
874        let pass = IndexSelection::new();
875
876        let pred = Expr::gt(Expr::column("orders", "amount", 0), Expr::literal(100i64));
877        let info = pass.analyze_predicate(&pred).unwrap();
878
879        assert_eq!(info.table, "orders");
880        assert_eq!(info.column, "amount");
881        assert!(!info.is_point_lookup);
882        assert!(info.is_range);
883        assert_eq!(info.op, BinaryOp::Gt);
884    }
885
886    #[test]
887    fn test_extract_compound_predicates() {
888        let pass = IndexSelection::new();
889
890        let pred = Expr::and(
891            Expr::eq(Expr::column("users", "id", 0), Expr::literal(1i64)),
892            Expr::gt(Expr::column("users", "age", 1), Expr::literal(18i64)),
893        );
894
895        let predicates = pass.extract_predicates(&pred);
896        assert_eq!(predicates.len(), 2);
897        assert_eq!(predicates[0].column, "id");
898        assert_eq!(predicates[1].column, "age");
899    }
900
901    #[test]
902    fn test_no_index_available() {
903        let mut ctx = ExecutionContext::new();
904        ctx.register_table(
905            "users",
906            TableStats {
907                row_count: 1000,
908                is_sorted: false,
909                indexes: alloc::vec![], // No indexes
910            },
911        );
912
913        let pass = IndexSelection::with_context(ctx);
914
915        let plan = LogicalPlan::filter(
916            LogicalPlan::scan("users"),
917            Expr::eq(Expr::column("users", "id", 0), Expr::literal(1i64)),
918        );
919
920        let optimized = pass.optimize(plan);
921        // Without matching index, should remain as Filter
922        assert!(matches!(optimized, LogicalPlan::Filter { .. }));
923    }
924
925    #[test]
926    fn test_in_query_index_selection() {
927        let mut ctx = ExecutionContext::new();
928        ctx.register_table(
929            "users",
930            TableStats {
931                row_count: 1000,
932                is_sorted: false,
933                indexes: alloc::vec![IndexInfo::new(
934                    "idx_id",
935                    alloc::vec!["id".into()],
936                    true
937                )],
938            },
939        );
940
941        let pass = IndexSelection::with_context(ctx);
942
943        // Filter: id IN (1, 2, 3)
944        let plan = LogicalPlan::filter(
945            LogicalPlan::scan("users"),
946            Expr::In {
947                expr: Box::new(Expr::column("users", "id", 0)),
948                list: alloc::vec![
949                    Expr::literal(Value::Int64(1)),
950                    Expr::literal(Value::Int64(2)),
951                    Expr::literal(Value::Int64(3)),
952                ],
953            },
954        );
955
956        let optimized = pass.optimize(plan);
957        // Should convert to IndexInGet for IN query with indexed column
958        assert!(matches!(optimized, LogicalPlan::IndexInGet { .. }));
959
960        if let LogicalPlan::IndexInGet { table, index, keys } = optimized {
961            assert_eq!(table, "users");
962            assert_eq!(index, "idx_id");
963            assert_eq!(keys.len(), 3);
964        }
965    }
966
967    #[test]
968    fn test_in_query_no_index() {
969        let mut ctx = ExecutionContext::new();
970        ctx.register_table(
971            "users",
972            TableStats {
973                row_count: 1000,
974                is_sorted: false,
975                indexes: alloc::vec![], // No indexes
976            },
977        );
978
979        let pass = IndexSelection::with_context(ctx);
980
981        // Filter: id IN (1, 2, 3) but no index available
982        let plan = LogicalPlan::filter(
983            LogicalPlan::scan("users"),
984            Expr::In {
985                expr: Box::new(Expr::column("users", "id", 0)),
986                list: alloc::vec![
987                    Expr::literal(Value::Int64(1)),
988                    Expr::literal(Value::Int64(2)),
989                ],
990            },
991        );
992
993        let optimized = pass.optimize(plan);
994        // Without index, should remain as Filter
995        assert!(matches!(optimized, LogicalPlan::Filter { .. }));
996    }
997
998    #[test]
999    fn test_analyze_in_predicate() {
1000        let pass = IndexSelection::new();
1001
1002        let pred = Expr::In {
1003            expr: Box::new(Expr::column("users", "id", 0)),
1004            list: alloc::vec![
1005                Expr::literal(Value::Int64(1)),
1006                Expr::literal(Value::Int64(2)),
1007                Expr::literal(Value::Int64(3)),
1008            ],
1009        };
1010
1011        let info = pass.analyze_in_predicate(&pred).unwrap();
1012        assert_eq!(info.table, "users");
1013        assert_eq!(info.column, "id");
1014        assert_eq!(info.values.len(), 3);
1015        assert_eq!(info.values[0], Value::Int64(1));
1016        assert_eq!(info.values[1], Value::Int64(2));
1017        assert_eq!(info.values[2], Value::Int64(3));
1018    }
1019
1020    #[test]
1021    fn test_analyze_in_predicate_with_non_literals() {
1022        let pass = IndexSelection::new();
1023
1024        // IN list with non-literal expression should not be optimized
1025        let pred = Expr::In {
1026            expr: Box::new(Expr::column("users", "id", 0)),
1027            list: alloc::vec![
1028                Expr::literal(Value::Int64(1)),
1029                Expr::column("other", "val", 0), // Non-literal
1030            ],
1031        };
1032
1033        let info = pass.analyze_in_predicate(&pred);
1034        assert!(info.is_none());
1035    }
1036
1037    /// Test case for bug: mixed GIN + non-GIN predicates should preserve non-GIN predicates
1038    ///
1039    /// Query: col('status').eq('published') AND col('tags').get('$.primary').eq('tech')
1040    ///
1041    /// Expected behcynos:
1042    /// - The GIN predicate (tags.primary = 'tech') should use GinIndexScan
1043    /// - The non-GIN predicate (status = 'published') should be preserved as a Filter
1044    ///
1045    /// Bug: Currently the non-GIN predicate is completely dropped!
1046    #[test]
1047    fn test_mixed_gin_and_non_gin_predicates_bug() {
1048        let mut ctx = ExecutionContext::new();
1049        ctx.register_table(
1050            "documents",
1051            TableStats {
1052                row_count: 1000,
1053                is_sorted: false,
1054                indexes: alloc::vec![
1055                    // GIN index on 'tags' column (JSONB)
1056                    IndexInfo::new_gin("idx_tags", alloc::vec!["tags".into()]),
1057                    // B-Tree index on 'status' column
1058                    IndexInfo::new("idx_status", alloc::vec!["status".into()], false),
1059                ],
1060            },
1061        );
1062
1063        let pass = IndexSelection::with_context(ctx);
1064
1065        // Build predicate: status = 'published' AND JSONB_PATH_EQ(tags, '$.primary', 'tech')
1066        // This simulates: col('status').eq('published').and(col('tags').get('$.primary').eq('tech'))
1067        let predicate = Expr::and(
1068            // Non-GIN predicate: status = 'published'
1069            Expr::eq(
1070                Expr::column("documents", "status", 1),
1071                Expr::literal(Value::String("published".into())),
1072            ),
1073            // GIN predicate: JSONB_PATH_EQ(tags, '$.primary', 'tech')
1074            Expr::Function {
1075                name: "JSONB_PATH_EQ".into(),
1076                args: alloc::vec![
1077                    Expr::column("documents", "tags", 2),
1078                    Expr::literal(Value::String("$.primary".into())),
1079                    Expr::literal(Value::String("tech".into())),
1080                ],
1081            },
1082        );
1083
1084        let plan = LogicalPlan::filter(LogicalPlan::scan("documents"), predicate);
1085
1086        let optimized = pass.optimize(plan);
1087
1088        // BUG: Currently this returns just GinIndexScan, dropping the status = 'published' predicate!
1089        // The correct behcynos should be:
1090        // Filter(status = 'published', GinIndexScan(tags, '$.primary', 'tech'))
1091
1092        // This assertion currently FAILS - demonstrating the bug
1093        // The optimized plan should be a Filter wrapping a GinIndexScan
1094        match &optimized {
1095            LogicalPlan::Filter { input, predicate } => {
1096                // Good: we have a Filter
1097                // Check that the input is a GinIndexScan
1098                assert!(
1099                    matches!(input.as_ref(), LogicalPlan::GinIndexScan { .. }),
1100                    "Expected GinIndexScan as input to Filter, got: {:?}",
1101                    input
1102                );
1103                // Check that the predicate is the non-GIN predicate (status = 'published')
1104                if let Expr::BinaryOp { left, op, right } = predicate {
1105                    assert_eq!(*op, BinaryOp::Eq);
1106                    if let Expr::Column(col) = left.as_ref() {
1107                        assert_eq!(col.column, "status");
1108                    } else {
1109                        panic!("Expected column reference in predicate left side");
1110                    }
1111                } else {
1112                    panic!("Expected BinaryOp predicate");
1113                }
1114            }
1115            LogicalPlan::GinIndexScan { .. } => {
1116                // BUG: This is what currently happens - the status predicate is dropped!
1117                panic!(
1118                    "BUG CONFIRMED: Non-GIN predicate (status = 'published') was dropped! \
1119                     The query will return incorrect results - all rows matching \
1120                     tags.primary = 'tech' regardless of status."
1121                );
1122            }
1123            other => {
1124                panic!("Unexpected plan type: {:?}", other);
1125            }
1126        }
1127    }
1128
1129    /// Test case for bug: B-Tree index should work with AND predicates
1130    ///
1131    /// Query: price > 100 AND category = 'Electronics'
1132    ///
1133    /// Expected behcynos:
1134    /// - The indexed predicate (price > 100) should use IndexScan
1135    /// - The non-indexed predicate (category = 'Electronics') should be preserved as a Filter
1136    ///
1137    /// Bug: Currently the entire AND predicate fails to use any index because
1138    /// analyze_predicate only handles simple predicates, not AND combinations.
1139    #[test]
1140    fn test_btree_index_with_and_predicates_bug() {
1141        let mut ctx = ExecutionContext::new();
1142        ctx.register_table(
1143            "products",
1144            TableStats {
1145                row_count: 10000,
1146                is_sorted: false,
1147                indexes: alloc::vec![
1148                    // B-Tree index on 'price' column
1149                    IndexInfo::new("idx_price", alloc::vec!["price".into()], false),
1150                ],
1151            },
1152        );
1153
1154        let pass = IndexSelection::with_context(ctx);
1155
1156        // Build predicate: price > 100 AND category = 'Electronics'
1157        let predicate = Expr::and(
1158            // Indexed predicate: price > 100
1159            Expr::gt(
1160                Expr::column("products", "price", 1),
1161                Expr::literal(Value::Int64(100)),
1162            ),
1163            // Non-indexed predicate: category = 'Electronics'
1164            Expr::eq(
1165                Expr::column("products", "category", 2),
1166                Expr::literal(Value::String("Electronics".into())),
1167            ),
1168        );
1169
1170        let plan = LogicalPlan::filter(LogicalPlan::scan("products"), predicate);
1171
1172        let optimized = pass.optimize(plan);
1173
1174        // The correct behcynos should be:
1175        // Filter(category = 'Electronics', IndexScan(price > 100))
1176        //
1177        // But currently it returns:
1178        // Filter(price > 100 AND category = 'Electronics', Scan(products))
1179        // because analyze_predicate returns None for AND expressions
1180
1181        match &optimized {
1182            LogicalPlan::Filter { input, predicate: _ } => {
1183                match input.as_ref() {
1184                    LogicalPlan::IndexScan { index, .. } => {
1185                        // Good: we're using IndexScan
1186                        assert_eq!(index, "idx_price");
1187                    }
1188                    LogicalPlan::Scan { .. } => {
1189                        // BUG: Index is not being used!
1190                        panic!(
1191                            "BUG CONFIRMED: B-Tree index (idx_price) is not used for AND predicates! \
1192                             The query falls back to full table scan even though price > 100 \
1193                             could use the index."
1194                        );
1195                    }
1196                    other => {
1197                        panic!("Unexpected input plan type: {:?}", other);
1198                    }
1199                }
1200            }
1201            LogicalPlan::IndexScan { .. } => {
1202                // This would be acceptable too (if the other predicate is somehow handled)
1203            }
1204            other => {
1205                panic!("Unexpected plan type: {:?}", other);
1206            }
1207        }
1208    }
1209
1210    /// Test that point lookups (IndexGet) are prioritized over range scans (IndexScan)
1211    /// when both are available in an AND predicate.
1212    #[test]
1213    fn test_btree_and_prioritizes_point_lookup() {
1214        let mut ctx = ExecutionContext::new();
1215        ctx.register_table(
1216            "products",
1217            TableStats {
1218                row_count: 10000,
1219                is_sorted: false,
1220                indexes: alloc::vec![
1221                    // B-Tree index on 'price' column (for range scan)
1222                    IndexInfo::new("idx_price", alloc::vec!["price".into()], false),
1223                    // B-Tree index on 'id' column (for point lookup)
1224                    IndexInfo::new("idx_id", alloc::vec!["id".into()], true),
1225                ],
1226            },
1227        );
1228
1229        let pass = IndexSelection::with_context(ctx);
1230
1231        // Build predicate: price > 100 AND id = 42
1232        // Both predicates are indexable, but id = 42 should be preferred (point lookup)
1233        let predicate = Expr::and(
1234            // Range predicate: price > 100
1235            Expr::gt(
1236                Expr::column("products", "price", 1),
1237                Expr::literal(Value::Int64(100)),
1238            ),
1239            // Point lookup predicate: id = 42
1240            Expr::eq(
1241                Expr::column("products", "id", 0),
1242                Expr::literal(Value::Int64(42)),
1243            ),
1244        );
1245
1246        let plan = LogicalPlan::filter(LogicalPlan::scan("products"), predicate);
1247
1248        let optimized = pass.optimize(plan);
1249
1250        // Should use IndexGet for id = 42 (point lookup) and Filter for price > 100
1251        match &optimized {
1252            LogicalPlan::Filter { input, predicate } => {
1253                // Check that we're using IndexGet (point lookup)
1254                match input.as_ref() {
1255                    LogicalPlan::IndexGet { index, key, .. } => {
1256                        assert_eq!(index, "idx_id");
1257                        assert_eq!(*key, Value::Int64(42));
1258                    }
1259                    other => {
1260                        panic!("Expected IndexGet, got: {:?}", other);
1261                    }
1262                }
1263                // Check that the remaining predicate is price > 100
1264                if let Expr::BinaryOp { left, op, right } = predicate {
1265                    assert_eq!(*op, BinaryOp::Gt);
1266                    if let Expr::Column(col) = left.as_ref() {
1267                        assert_eq!(col.column, "price");
1268                    }
1269                    if let Expr::Literal(Value::Int64(v)) = right.as_ref() {
1270                        assert_eq!(*v, 100);
1271                    }
1272                }
1273            }
1274            LogicalPlan::IndexGet { .. } => {
1275                // Also acceptable if there's no remaining predicate
1276            }
1277            other => {
1278                panic!("Unexpected plan type: {:?}", other);
1279            }
1280        }
1281    }
1282}
cynos_query/optimizer/index_selection.rs

cynos_query/optimizer/
index_selection.rs