vibesql_executor/select/executor/fast_path/
analysis.rs

1//! Query analysis functions for fast path detection
2//!
3//! This module provides functions to analyze queries and determine if they qualify
4//! for various fast execution paths. These functions are pure analysis with no
5//! execution side effects.
6
7use vibesql_ast::{Expression, OrderByItem, OrderDirection, SelectItem, SelectStmt};
8
9/// Check if a query is a simple point-lookup that can use the fast path
10///
11/// Returns true for queries that:
12/// 1. Query a single table (no joins, no subqueries in FROM)
13/// 2. Have no WITH clause (CTEs)
14/// 3. Have no aggregates or window functions
15/// 4. Have no GROUP BY, HAVING, DISTINCT, or set operations
16/// 5. Have no ORDER BY with complex expressions
17/// 6. Have a simple WHERE clause (only AND-connected equality predicates)
18pub fn is_simple_point_query(stmt: &SelectStmt) -> bool {
19    // No CTEs
20    if stmt.with_clause.is_some() {
21        return false;
22    }
23
24    // No set operations (UNION, INTERSECT, EXCEPT)
25    if stmt.set_operation.is_some() {
26        return false;
27    }
28
29    // No SELECT INTO (DDL or procedural variable assignment)
30    // These require special handling that the fast path doesn't support
31    if stmt.into_table.is_some() || stmt.into_variables.is_some() {
32        return false;
33    }
34
35    // No GROUP BY, HAVING, or DISTINCT
36    if stmt.group_by.is_some() || stmt.having.is_some() || stmt.distinct {
37        return false;
38    }
39
40    // Must have a FROM clause
41    let Some(from) = &stmt.from else {
42        return false;
43    };
44
45    // FROM must be a simple table (no joins, no subqueries)
46    if !matches!(from, vibesql_ast::FromClause::Table { .. }) {
47        return false;
48    }
49
50    // SELECT list must be simple columns or * (no aggregates, no subqueries)
51    if !has_simple_select_list(&stmt.select_list) {
52        return false;
53    }
54
55    // WHERE clause must be simple equality predicates (if present)
56    if let Some(where_clause) = &stmt.where_clause {
57        if !is_simple_where_clause(where_clause) {
58            return false;
59        }
60    }
61
62    // ORDER BY is allowed if it's simple (column references only, no complex expressions)
63    // and doesn't use SELECT list aliases (which require post-projection sorting)
64    // The index scan logic will automatically use index ordering when possible
65    if let Some(order_by) = &stmt.order_by {
66        if !is_simple_order_by(order_by) {
67            return false;
68        }
69        // Check that ORDER BY doesn't use SELECT list aliases
70        // Fast path sorts before projection, so aliases can't be resolved
71        if uses_select_alias(order_by, &stmt.select_list) {
72            return false;
73        }
74    }
75
76    true
77}
78
79/// Check if a query is a streaming aggregate that can use the ultra-fast path
80///
81/// Returns true for queries that:
82/// 1. Query a single table (no joins)
83/// 2. Have no CTEs, set operations, GROUP BY, HAVING, DISTINCT, ORDER BY
84/// 3. Have a simple WHERE clause with PK range predicate (BETWEEN)
85/// 4. SELECT list contains only simple aggregates (SUM, COUNT, AVG, MIN, MAX) on single column
86///    references (not DISTINCT aggregates)
87///
88/// # Example queries that use this path:
89/// ```sql
90/// SELECT SUM(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100
91/// SELECT COUNT(c), SUM(k) FROM sbtest1 WHERE id BETWEEN 50 AND 150
92/// ```
93pub fn is_streaming_aggregate_query(stmt: &SelectStmt) -> bool {
94    // No CTEs
95    if stmt.with_clause.is_some() {
96        return false;
97    }
98
99    // No set operations (UNION, INTERSECT, EXCEPT)
100    if stmt.set_operation.is_some() {
101        return false;
102    }
103
104    // No GROUP BY, HAVING, DISTINCT, ORDER BY
105    if stmt.group_by.is_some() || stmt.having.is_some() || stmt.distinct {
106        return false;
107    }
108
109    if stmt.order_by.is_some() {
110        return false;
111    }
112
113    // Must have a FROM clause (simple table)
114    let Some(from) = &stmt.from else {
115        return false;
116    };
117
118    // FROM must be a simple table (no joins)
119    if !matches!(from, vibesql_ast::FromClause::Table { .. }) {
120        return false;
121    }
122
123    // Must have a WHERE clause
124    if stmt.where_clause.is_none() {
125        return false;
126    }
127
128    // SELECT list must contain only simple aggregates
129    if !has_simple_aggregate_select_list(&stmt.select_list) {
130        return false;
131    }
132
133    true
134}
135
136/// Check if a SELECT list contains only simple aggregates (SUM, COUNT, AVG, MIN, MAX)
137/// on single column references without DISTINCT.
138pub(crate) fn has_simple_aggregate_select_list(select_list: &[SelectItem]) -> bool {
139    if select_list.is_empty() {
140        return false;
141    }
142
143    for item in select_list {
144        match item {
145            SelectItem::Expression { expr, .. } => {
146                if !is_simple_aggregate_expression(expr) {
147                    return false;
148                }
149            }
150            // Wildcards are not aggregates
151            _ => return false,
152        }
153    }
154    true
155}
156
157/// Check if an expression is a simple aggregate function call
158pub(crate) fn is_simple_aggregate_expression(expr: &Expression) -> bool {
159    match expr {
160        Expression::AggregateFunction { name, args, distinct, .. } => {
161            // Only support non-DISTINCT aggregates for streaming
162            if *distinct {
163                return false;
164            }
165
166            // Only support standard aggregate functions
167            let upper_name = name.to_uppercase();
168            if !matches!(upper_name.as_str(), "SUM" | "COUNT" | "AVG" | "MIN" | "MAX") {
169                return false;
170            }
171
172            // Must have exactly one argument (column reference)
173            // COUNT(*) is excluded until properly supported in extract_simple_aggregate
174            if args.len() != 1 {
175                return false;
176            }
177
178            // Argument must be a simple column reference
179            matches!(&args[0], Expression::ColumnRef(_))
180        }
181        _ => false,
182    }
183}
184
185/// Extract the aggregate function name and column index from a SELECT item
186pub(crate) fn extract_simple_aggregate(
187    expr: &Expression,
188    table_schema: &vibesql_catalog::TableSchema,
189) -> Option<(String, usize)> {
190    match expr {
191        Expression::AggregateFunction { name, args, distinct, .. } => {
192            if *distinct {
193                return None;
194            }
195
196            let upper_name = name.to_uppercase();
197            if !matches!(upper_name.as_str(), "SUM" | "COUNT" | "AVG" | "MIN" | "MAX") {
198                return None;
199            }
200
201            // Get the column reference
202            if args.len() != 1 {
203                return None;
204            }
205
206            if let Expression::ColumnRef(col_id) = &args[0] {
207                let column = col_id.column_canonical();
208                // Find column index
209                let col_idx = table_schema
210                    .columns
211                    .iter()
212                    .position(|c| c.name.eq_ignore_ascii_case(column))?;
213                return Some((upper_name, col_idx));
214            }
215
216            None
217        }
218        _ => None,
219    }
220}
221
222/// Check if an ORDER BY clause is simple enough for the fast path
223///
224/// Returns true if all ORDER BY items are simple column references.
225/// Complex expressions (functions, arithmetic, subqueries) are not supported.
226///
227/// Examples:
228/// - `ORDER BY col ASC` -> true
229/// - `ORDER BY col1, col2 DESC` -> true
230/// - `ORDER BY col LIMIT 1` -> true (LIMIT doesn't affect ORDER BY simplicity)
231/// - `ORDER BY UPPER(col)` -> false (function call)
232/// - `ORDER BY col + 1` -> false (arithmetic expression)
233pub(crate) fn is_simple_order_by(order_by: &[OrderByItem]) -> bool {
234    for item in order_by {
235        // ORDER BY expression must be a simple column reference
236        if !matches!(item.expr, Expression::ColumnRef(_)) {
237            return false;
238        }
239    }
240    true
241}
242
243/// Check if ORDER BY uses any SELECT list aliases
244///
245/// Returns true if any ORDER BY column matches a SELECT list alias.
246/// This is used to exclude such queries from the fast path, since
247/// the fast path sorts before projection and can't resolve aliases.
248pub(crate) fn uses_select_alias(order_by: &[OrderByItem], select_list: &[SelectItem]) -> bool {
249    // Collect all aliases from the SELECT list
250    let aliases: Vec<&str> = select_list
251        .iter()
252        .filter_map(|item| {
253            if let SelectItem::Expression { alias: Some(alias), .. } = item {
254                Some(alias.as_str())
255            } else {
256                None
257            }
258        })
259        .collect();
260
261    // If no aliases, no conflict possible
262    if aliases.is_empty() {
263        return false;
264    }
265
266    // Check if any ORDER BY column matches an alias
267    for item in order_by {
268        if let Expression::ColumnRef(col_id) = &item.expr {
269            if col_id.schema_canonical().is_none() && col_id.table_canonical().is_none() {
270                let column = col_id.column_canonical();
271                // Case-insensitive comparison for SQL identifiers
272                if aliases.iter().any(|alias| alias.eq_ignore_ascii_case(column)) {
273                    return true;
274                }
275            }
276        }
277    }
278
279    false
280}
281
282/// Check if we need to apply explicit sorting (index didn't provide the order)
283///
284/// Returns true if ORDER BY columns don't match the sorted_by metadata from index scan.
285pub fn needs_sorting(
286    order_by: &[OrderByItem],
287    sorted_by: &Option<Vec<(String, OrderDirection)>>,
288) -> bool {
289    let Some(sorted_cols) = sorted_by else {
290        return true; // No sorting metadata, need to sort
291    };
292
293    // Check if ORDER BY is a prefix of sorted_by with matching directions
294    if order_by.len() > sorted_cols.len() {
295        return true; // ORDER BY has more columns than sorted
296    }
297
298    for (order_item, (col_name, col_dir)) in order_by.iter().zip(sorted_cols.iter()) {
299        // Extract column name from ORDER BY expression
300        let order_col = match &order_item.expr {
301            Expression::ColumnRef(col_id) => col_id.column_canonical(),
302            _ => return true, // Non-column expression, need to sort
303        };
304
305        // Check column name matches (case-insensitive)
306        if !order_col.eq_ignore_ascii_case(col_name) {
307            return true;
308        }
309
310        // Check direction matches
311        if &order_item.direction != col_dir {
312            return true;
313        }
314    }
315
316    false // Sorting is already satisfied
317}
318
319/// Check if a SELECT list contains only simple columns or *
320pub(crate) fn has_simple_select_list(select_list: &[SelectItem]) -> bool {
321    for item in select_list {
322        match item {
323            SelectItem::Wildcard { .. } | SelectItem::QualifiedWildcard { .. } => continue,
324            SelectItem::Expression { expr, .. } => {
325                if !is_simple_expression(expr) {
326                    return false;
327                }
328            }
329        }
330    }
331    true
332}
333
334/// Check if an expression is simple (column ref, literal, or basic arithmetic)
335pub(crate) fn is_simple_expression(expr: &Expression) -> bool {
336    match expr {
337        Expression::ColumnRef(_) | Expression::Literal(_) => true,
338        Expression::BinaryOp { left, right, op } => {
339            // Allow simple arithmetic on columns/literals
340            matches!(
341                op,
342                vibesql_ast::BinaryOperator::Plus
343                    | vibesql_ast::BinaryOperator::Minus
344                    | vibesql_ast::BinaryOperator::Multiply
345                    | vibesql_ast::BinaryOperator::Divide
346                    | vibesql_ast::BinaryOperator::Concat
347            ) && is_simple_expression(left)
348                && is_simple_expression(right)
349        }
350        Expression::UnaryOp { expr, .. } => is_simple_expression(expr),
351        Expression::Cast { expr, .. } => is_simple_expression(expr),
352        // Functions are not simple (could be aggregates or expensive)
353        _ => false,
354    }
355}
356
357/// Check if a WHERE clause is simple (only AND-connected equality/comparison predicates)
358pub(crate) fn is_simple_where_clause(expr: &Expression) -> bool {
359    match expr {
360        // Simple comparison: col = val, col > val, etc.
361        Expression::BinaryOp { left, op, right } => {
362            match op {
363                vibesql_ast::BinaryOperator::Equal
364                | vibesql_ast::BinaryOperator::NotEqual
365                | vibesql_ast::BinaryOperator::GreaterThan
366                | vibesql_ast::BinaryOperator::GreaterThanOrEqual
367                | vibesql_ast::BinaryOperator::LessThan
368                | vibesql_ast::BinaryOperator::LessThanOrEqual => {
369                    // Must be column vs literal (not column vs column for join conditions)
370                    is_column_or_literal(left) && is_column_or_literal(right)
371                }
372                vibesql_ast::BinaryOperator::And => {
373                    // AND is fine - recurse
374                    is_simple_where_clause(left) && is_simple_where_clause(right)
375                }
376                // OR could be optimized but is more complex
377                vibesql_ast::BinaryOperator::Or => false,
378                _ => false,
379            }
380        }
381        // BETWEEN is simple
382        Expression::Between { expr, low, high, .. } => {
383            is_column_or_literal(expr) && is_column_or_literal(low) && is_column_or_literal(high)
384        }
385        // IN list is simple (not IN subquery)
386        Expression::InList { expr, values, .. } => {
387            is_column_or_literal(expr) && values.iter().all(is_column_or_literal)
388        }
389        // IS NULL is simple
390        Expression::IsNull { expr, .. } => is_column_or_literal(expr),
391        // LIKE is simple
392        Expression::Like { expr, pattern, .. } => {
393            is_column_or_literal(expr) && is_column_or_literal(pattern)
394        }
395        _ => false,
396    }
397}
398
399/// Check if an expression is a column reference or literal
400pub(crate) fn is_column_or_literal(expr: &Expression) -> bool {
401    matches!(expr, Expression::ColumnRef(_) | Expression::Literal(_))
402}
403
404#[cfg(test)]
405mod tests {
406    use vibesql_ast::Statement;
407    use vibesql_parser::Parser;
408
409    use super::*;
410
411    fn parse_select(sql: &str) -> SelectStmt {
412        match Parser::parse_sql(sql).unwrap() {
413            Statement::Select(stmt) => *stmt,
414            _ => panic!("Expected SELECT statement"),
415        }
416    }
417
418    #[test]
419    fn test_simple_point_query_detection() {
420        // Simple point queries should be detected
421        assert!(is_simple_point_query(&parse_select("SELECT w_tax FROM warehouse WHERE w_id = 1")));
422        assert!(is_simple_point_query(&parse_select("SELECT * FROM users WHERE id = 123")));
423        assert!(is_simple_point_query(&parse_select("SELECT a, b FROM t WHERE x = 1 AND y = 2")));
424        assert!(is_simple_point_query(&parse_select("SELECT a FROM t WHERE x > 10")));
425        assert!(is_simple_point_query(&parse_select("SELECT a FROM t WHERE x BETWEEN 1 AND 10")));
426        assert!(is_simple_point_query(&parse_select("SELECT a FROM t WHERE x IN (1, 2, 3)")));
427        assert!(is_simple_point_query(&parse_select("SELECT a FROM t WHERE x IS NULL")));
428    }
429
430    #[test]
431    fn test_non_simple_query_detection() {
432        // Complex queries should not be detected as simple
433        assert!(!is_simple_point_query(&parse_select("SELECT COUNT(*) FROM t WHERE id = 1")));
434        assert!(!is_simple_point_query(&parse_select("SELECT a FROM t1, t2 WHERE t1.id = t2.id")));
435        assert!(!is_simple_point_query(&parse_select(
436            "SELECT a FROM t WHERE id IN (SELECT id FROM t2)"
437        )));
438        assert!(!is_simple_point_query(&parse_select("SELECT DISTINCT a FROM t")));
439        assert!(!is_simple_point_query(&parse_select("SELECT a FROM t GROUP BY a")));
440        assert!(!is_simple_point_query(&parse_select("WITH cte AS (SELECT 1) SELECT * FROM cte")));
441        assert!(!is_simple_point_query(&parse_select("SELECT a FROM t UNION SELECT b FROM t2")));
442    }
443
444    #[test]
445    fn test_or_not_simple() {
446        // OR predicates are not simple (could be optimized later)
447        assert!(!is_simple_point_query(&parse_select("SELECT a FROM t WHERE x = 1 OR y = 2")));
448    }
449
450    #[test]
451    fn test_order_by_simple_queries() {
452        // Simple ORDER BY with column references should be detected as simple
453        assert!(is_simple_point_query(&parse_select(
454            "SELECT no_o_id FROM new_order WHERE no_w_id = 1 ORDER BY no_o_id"
455        )));
456        assert!(is_simple_point_query(&parse_select(
457            "SELECT * FROM t WHERE id = 1 ORDER BY col ASC"
458        )));
459        assert!(is_simple_point_query(&parse_select(
460            "SELECT a, b FROM t WHERE x = 1 ORDER BY a DESC"
461        )));
462        assert!(is_simple_point_query(&parse_select("SELECT a FROM t WHERE x = 1 ORDER BY a, b")));
463        assert!(is_simple_point_query(&parse_select(
464            "SELECT a FROM t WHERE x = 1 ORDER BY a DESC, b ASC"
465        )));
466        // ORDER BY with LIMIT
467        assert!(is_simple_point_query(&parse_select(
468            "SELECT a FROM t WHERE x = 1 ORDER BY a LIMIT 1"
469        )));
470    }
471
472    #[test]
473    fn test_order_by_complex_not_simple() {
474        // Complex ORDER BY expressions should not be detected as simple
475        assert!(!is_simple_point_query(&parse_select(
476            "SELECT a FROM t WHERE x = 1 ORDER BY UPPER(a)"
477        )));
478        assert!(!is_simple_point_query(&parse_select(
479            "SELECT a FROM t WHERE x = 1 ORDER BY a + 1"
480        )));
481        assert!(!is_simple_point_query(&parse_select(
482            "SELECT a FROM t WHERE x = 1 ORDER BY COALESCE(a, b)"
483        )));
484    }
485
486    #[test]
487    fn test_needs_sorting() {
488        // No sorted_by means we need to sort
489        assert!(needs_sorting(
490            &[vibesql_ast::OrderByItem {
491                expr: vibesql_ast::Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
492                    "a", false
493                )),
494                direction: vibesql_ast::OrderDirection::Asc,
495                nulls_order: None,
496            }],
497            &None
498        ));
499
500        // Matching sorted_by means no sorting needed
501        assert!(!needs_sorting(
502            &[vibesql_ast::OrderByItem {
503                expr: vibesql_ast::Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
504                    "a", false
505                )),
506                direction: vibesql_ast::OrderDirection::Asc,
507                nulls_order: None,
508            }],
509            &Some(vec![("a".to_string(), vibesql_ast::OrderDirection::Asc)])
510        ));
511
512        // Different column means sorting needed
513        assert!(needs_sorting(
514            &[vibesql_ast::OrderByItem {
515                expr: vibesql_ast::Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
516                    "b", false
517                )),
518                direction: vibesql_ast::OrderDirection::Asc,
519                nulls_order: None,
520            }],
521            &Some(vec![("a".to_string(), vibesql_ast::OrderDirection::Asc)])
522        ));
523
524        // Different direction means sorting needed
525        assert!(needs_sorting(
526            &[vibesql_ast::OrderByItem {
527                expr: vibesql_ast::Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
528                    "a", false
529                )),
530                direction: vibesql_ast::OrderDirection::Desc,
531                nulls_order: None,
532            }],
533            &Some(vec![("a".to_string(), vibesql_ast::OrderDirection::Asc)])
534        ));
535
536        // ORDER BY prefix of sorted_by is OK
537        assert!(!needs_sorting(
538            &[vibesql_ast::OrderByItem {
539                expr: vibesql_ast::Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
540                    "a", false
541                )),
542                direction: vibesql_ast::OrderDirection::Asc,
543                nulls_order: None,
544            }],
545            &Some(vec![
546                ("a".to_string(), vibesql_ast::OrderDirection::Asc),
547                ("b".to_string(), vibesql_ast::OrderDirection::Asc),
548            ])
549        ));
550
551        // ORDER BY with more columns than sorted_by needs sorting
552        assert!(needs_sorting(
553            &[
554                vibesql_ast::OrderByItem {
555                    expr: vibesql_ast::Expression::ColumnRef(
556                        vibesql_ast::ColumnIdentifier::simple("a", false)
557                    ),
558                    direction: vibesql_ast::OrderDirection::Asc,
559                    nulls_order: None,
560                },
561                vibesql_ast::OrderByItem {
562                    expr: vibesql_ast::Expression::ColumnRef(
563                        vibesql_ast::ColumnIdentifier::simple("b", false)
564                    ),
565                    direction: vibesql_ast::OrderDirection::Asc,
566                    nulls_order: None,
567                },
568            ],
569            &Some(vec![("a".to_string(), vibesql_ast::OrderDirection::Asc)])
570        ));
571    }
572
573    #[test]
574    fn test_streaming_aggregate_detection() {
575        // Simple streaming aggregate queries should be detected
576        assert!(is_streaming_aggregate_query(&parse_select(
577            "SELECT SUM(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100"
578        )));
579        assert!(is_streaming_aggregate_query(&parse_select(
580            "SELECT COUNT(c) FROM sbtest1 WHERE id BETWEEN 50 AND 150"
581        )));
582        assert!(is_streaming_aggregate_query(&parse_select(
583            "SELECT AVG(k), SUM(k), COUNT(k) FROM sbtest1 WHERE id BETWEEN 1 AND 10"
584        )));
585        assert!(is_streaming_aggregate_query(&parse_select(
586            "SELECT MIN(k), MAX(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100"
587        )));
588    }
589
590    #[test]
591    fn test_streaming_aggregate_negative_cases() {
592        // Non-streaming aggregate queries should not be detected
593
594        // Regular column selection (not aggregate)
595        assert!(!is_streaming_aggregate_query(&parse_select(
596            "SELECT c FROM sbtest1 WHERE id BETWEEN 1 AND 100"
597        )));
598
599        // WITH clause not supported
600        assert!(!is_streaming_aggregate_query(&parse_select(
601            "WITH cte AS (SELECT 1) SELECT SUM(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100"
602        )));
603
604        // GROUP BY not supported
605        assert!(!is_streaming_aggregate_query(&parse_select(
606            "SELECT SUM(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100 GROUP BY c"
607        )));
608
609        // DISTINCT not supported
610        assert!(!is_streaming_aggregate_query(&parse_select(
611            "SELECT DISTINCT SUM(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100"
612        )));
613
614        // ORDER BY not supported
615        assert!(!is_streaming_aggregate_query(&parse_select(
616            "SELECT SUM(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100 ORDER BY 1"
617        )));
618
619        // HAVING not supported
620        assert!(!is_streaming_aggregate_query(&parse_select(
621            "SELECT SUM(k) FROM sbtest1 WHERE id BETWEEN 1 AND 100 HAVING SUM(k) > 10"
622        )));
623
624        // No WHERE clause
625        assert!(!is_streaming_aggregate_query(&parse_select("SELECT SUM(k) FROM sbtest1")));
626
627        // DISTINCT aggregate not supported
628        assert!(!is_streaming_aggregate_query(&parse_select(
629            "SELECT COUNT(DISTINCT k) FROM sbtest1 WHERE id BETWEEN 1 AND 100"
630        )));
631
632        // Wildcard not supported
633        assert!(!is_streaming_aggregate_query(&parse_select(
634            "SELECT * FROM sbtest1 WHERE id BETWEEN 1 AND 100"
635        )));
636
637        // Join not supported
638        assert!(!is_streaming_aggregate_query(&parse_select(
639            "SELECT SUM(a.k) FROM sbtest1 a, sbtest1 b WHERE a.id BETWEEN 1 AND 100"
640        )));
641    }
642}