vibesql/parser/
query.rs

1//! Query (SELECT) statement parsing.
2
3use super::Parser;
4use crate::ast::*;
5use crate::error::{Error, Result, Span};
6use crate::lexer::{Keyword, TokenKind};
7
8impl<'a> Parser<'a> {
9    /// Parse a complete query (WITH clause, SELECT, set operations, ORDER BY, LIMIT).
10    pub fn parse_query(&mut self) -> Result<Query> {
11        let start = self.current_position();
12
13        // Parse optional WITH clause
14        let with = if self.check_keyword(Keyword::With)? {
15            Some(self.parse_with_clause()?)
16        } else {
17            None
18        };
19
20        // Parse the query body
21        let body = self.parse_query_body()?;
22
23        // Parse ORDER BY
24        let order_by = if self.consume_keyword(Keyword::Order)?.is_some() {
25            self.expect_keyword(Keyword::By)?;
26            self.parse_comma_separated(|p| p.parse_order_by_expr())?
27        } else {
28            Vec::new()
29        };
30
31        // Parse LIMIT/OFFSET
32        let limit = self.parse_limit_clause()?;
33
34        let end = self.current_position();
35        Ok(Query {
36            with,
37            body,
38            order_by,
39            limit,
40            span: Span::new(start, end),
41        })
42    }
43
44    /// Parse WITH clause (Common Table Expressions).
45    fn parse_with_clause(&mut self) -> Result<WithClause> {
46        let start = self.current_position();
47        self.expect_keyword(Keyword::With)?;
48
49        let recursive = self.consume_keyword(Keyword::Recursive)?.is_some();
50        let ctes = self.parse_comma_separated(|p| p.parse_cte())?;
51
52        let end = self.current_position();
53        Ok(WithClause {
54            recursive,
55            ctes,
56            span: Span::new(start, end),
57        })
58    }
59
60    /// Parse a single CTE.
61    fn parse_cte(&mut self) -> Result<Cte> {
62        let start = self.current_position();
63        let name = self.parse_identifier()?;
64
65        // Optional column list
66        let columns = if self.consume(&TokenKind::LeftParen)?.is_some() {
67            let cols = self.parse_comma_separated(|p| p.parse_identifier())?;
68            self.expect(&TokenKind::RightParen)?;
69            cols
70        } else {
71            Vec::new()
72        };
73
74        self.expect_keyword(Keyword::As)?;
75        self.expect(&TokenKind::LeftParen)?;
76        let query = Box::new(self.parse_query()?);
77        self.expect(&TokenKind::RightParen)?;
78
79        let end = self.current_position();
80        Ok(Cte {
81            name,
82            columns,
83            query,
84            span: Span::new(start, end),
85        })
86    }
87
88    /// Parse query body (SELECT, set operations, or parenthesized query).
89    fn parse_query_body(&mut self) -> Result<QueryBody> {
90        let mut left = self.parse_query_primary()?;
91
92        // Check for set operations
93        loop {
94            let op = if self.consume_keyword(Keyword::Union)?.is_some() {
95                Some(SetOperator::Union)
96            } else if self.consume_keyword(Keyword::Intersect)?.is_some() {
97                Some(SetOperator::Intersect)
98            } else if self.consume_keyword(Keyword::Except)?.is_some() {
99                Some(SetOperator::Except)
100            } else {
101                None
102            };
103
104            if let Some(op) = op {
105                let all = self.consume_keyword(Keyword::All)?.is_some();
106                if !all {
107                    self.consume_keyword(Keyword::Distinct)?;
108                }
109                let right = self.parse_query_primary()?;
110                left = QueryBody::SetOperation {
111                    op,
112                    all,
113                    left: Box::new(left),
114                    right: Box::new(right),
115                };
116            } else {
117                break;
118            }
119        }
120
121        Ok(left)
122    }
123
124    /// Parse a primary query (SELECT or parenthesized query).
125    fn parse_query_primary(&mut self) -> Result<QueryBody> {
126        if self.consume(&TokenKind::LeftParen)?.is_some() {
127            let query = self.parse_query()?;
128            self.expect(&TokenKind::RightParen)?;
129            Ok(QueryBody::Parenthesized(Box::new(query)))
130        } else {
131            let select = self.parse_select()?;
132            Ok(QueryBody::Select(Box::new(select)))
133        }
134    }
135
136    /// Parse a SELECT statement.
137    fn parse_select(&mut self) -> Result<Select> {
138        let start = self.current_position();
139        self.expect_keyword(Keyword::Select)?;
140
141        // Parse SELECT AS STRUCT/VALUE (value table syntax)
142        let select_as = self.parse_select_as()?;
143
144        // Parse DISTINCT/ALL
145        let distinct = if self.consume_keyword(Keyword::Distinct)?.is_some() {
146            Some(Distinct::Distinct)
147        } else if self.consume_keyword(Keyword::All)?.is_some() {
148            Some(Distinct::All)
149        } else {
150            None
151        };
152
153        // Parse projection list
154        let projection = self.parse_comma_separated(|p| p.parse_select_item())?;
155
156        // Parse FROM clause
157        let from = if self.consume_keyword(Keyword::From)?.is_some() {
158            Some(self.parse_from_clause()?)
159        } else {
160            None
161        };
162
163        // Parse WHERE clause
164        let where_clause = if self.consume_keyword(Keyword::Where)?.is_some() {
165            Some(self.parse_expression()?)
166        } else {
167            None
168        };
169
170        // Parse GROUP BY clause
171        let group_by = if self.consume_keyword(Keyword::Group)?.is_some() {
172            self.expect_keyword(Keyword::By)?;
173            Some(self.parse_group_by_clause()?)
174        } else {
175            None
176        };
177
178        // Parse HAVING clause
179        let having = if self.consume_keyword(Keyword::Having)?.is_some() {
180            Some(self.parse_expression()?)
181        } else {
182            None
183        };
184
185        // Parse QUALIFY clause (window function filter)
186        let qualify = if self.consume_keyword(Keyword::Qualify)?.is_some() {
187            Some(self.parse_expression()?)
188        } else {
189            None
190        };
191
192        // Parse WINDOW clause
193        let window = if self.consume_keyword(Keyword::Window)?.is_some() {
194            self.parse_comma_separated(|p| p.parse_named_window_def())?
195        } else {
196            Vec::new()
197        };
198
199        let end = self.current_position();
200        Ok(Select {
201            distinct,
202            select_as,
203            projection,
204            from,
205            where_clause,
206            group_by,
207            having,
208            qualify,
209            window,
210            span: Span::new(start, end),
211        })
212    }
213
214    /// Parse SELECT AS STRUCT/VALUE modifier.
215    fn parse_select_as(&mut self) -> Result<Option<SelectAs>> {
216        // Check if we have 'AS' next (but not as part of an alias - lookahead for STRUCT/VALUE)
217        if self.check_keyword(Keyword::As)? {
218            // Peek at the token after AS
219            let next_token = self.peek_nth(1)?;
220            match &next_token.kind {
221                TokenKind::Keyword(Keyword::Struct) => {
222                    self.consume_keyword(Keyword::As)?;
223                    self.consume_keyword(Keyword::Struct)?;
224                    return Ok(Some(SelectAs::Struct));
225                }
226                TokenKind::Keyword(Keyword::Value) => {
227                    self.consume_keyword(Keyword::As)?;
228                    self.consume_keyword(Keyword::Value)?;
229                    return Ok(Some(SelectAs::Value));
230                }
231                // Could be a type name like `SELECT AS myproto.Message`
232                TokenKind::Identifier(_) | TokenKind::QuotedIdentifier(_) => {
233                    self.consume_keyword(Keyword::As)?;
234                    let type_name = self.parse_object_name()?;
235                    return Ok(Some(SelectAs::TypeName(type_name)));
236                }
237                _ => {}
238            }
239        }
240        Ok(None)
241    }
242
243    /// Parse a SELECT item.
244    pub(super) fn parse_select_item(&mut self) -> Result<SelectItem> {
245        // Check for wildcard
246        if self.consume(&TokenKind::Star)?.is_some() {
247            // Check for EXCEPT or REPLACE
248            if self.consume_keyword(Keyword::Except)?.is_some() {
249                self.expect(&TokenKind::LeftParen)?;
250                let except = self.parse_comma_separated(|p| p.parse_identifier())?;
251                self.expect(&TokenKind::RightParen)?;
252                return Ok(SelectItem::WildcardExcept {
253                    qualifier: None,
254                    except,
255                });
256            } else if self.consume_keyword(Keyword::Replace)?.is_some() {
257                self.expect(&TokenKind::LeftParen)?;
258                let replace = self.parse_comma_separated(|p| {
259                    let expr = p.parse_expression()?;
260                    p.expect_keyword(Keyword::As)?;
261                    let alias = p.parse_identifier()?;
262                    Ok((expr, alias))
263                })?;
264                self.expect(&TokenKind::RightParen)?;
265                return Ok(SelectItem::WildcardReplace {
266                    qualifier: None,
267                    replace,
268                });
269            }
270            return Ok(SelectItem::Wildcard);
271        }
272
273        // Parse expression
274        let expr = self.parse_expression()?;
275
276        // Check for qualified wildcard: expr.*
277        if let ExprKind::Identifier(ref ident) = expr.kind {
278            if self.consume(&TokenKind::Dot)?.is_some() && self.consume(&TokenKind::Star)?.is_some()
279            {
280                let qualifier = ObjectName::simple(ident.clone());
281                // Check for EXCEPT or REPLACE
282                if self.consume_keyword(Keyword::Except)?.is_some() {
283                    self.expect(&TokenKind::LeftParen)?;
284                    let except = self.parse_comma_separated(|p| p.parse_identifier())?;
285                    self.expect(&TokenKind::RightParen)?;
286                    return Ok(SelectItem::WildcardExcept {
287                        qualifier: Some(qualifier),
288                        except,
289                    });
290                } else if self.consume_keyword(Keyword::Replace)?.is_some() {
291                    self.expect(&TokenKind::LeftParen)?;
292                    let replace = self.parse_comma_separated(|p| {
293                        let expr = p.parse_expression()?;
294                        p.expect_keyword(Keyword::As)?;
295                        let alias = p.parse_identifier()?;
296                        Ok((expr, alias))
297                    })?;
298                    self.expect(&TokenKind::RightParen)?;
299                    return Ok(SelectItem::WildcardReplace {
300                        qualifier: Some(qualifier),
301                        replace,
302                    });
303                }
304                return Ok(SelectItem::QualifiedWildcard { qualifier });
305            }
306        }
307
308        // Parse optional alias
309        let alias = self.parse_optional_alias()?;
310
311        Ok(SelectItem::Expr { expr, alias })
312    }
313
314    /// Parse FROM clause.
315    fn parse_from_clause(&mut self) -> Result<FromClause> {
316        let tables = self.parse_comma_separated(|p| p.parse_table_ref())?;
317        Ok(FromClause { tables })
318    }
319
320    /// Parse a table reference.
321    pub(super) fn parse_table_ref(&mut self) -> Result<TableRef> {
322        let mut left = self.parse_table_primary()?;
323
324        // Parse joins
325        loop {
326            let join_type = self.parse_join_type()?;
327            if let Some(jt) = join_type {
328                let right = self.parse_table_primary()?;
329                let condition = self.parse_join_condition(jt)?;
330                left = TableRef::Join {
331                    left: Box::new(left),
332                    right: Box::new(right),
333                    join_type: jt,
334                    condition,
335                };
336            } else {
337                break;
338            }
339        }
340
341        Ok(left)
342    }
343
344    /// Parse a primary table reference.
345    fn parse_table_primary(&mut self) -> Result<TableRef> {
346        // Check for parenthesized table ref or subquery
347        if self.consume(&TokenKind::LeftParen)?.is_some() {
348            // Could be subquery or parenthesized table ref
349            if self.check_keyword(Keyword::Select)? || self.check_keyword(Keyword::With)? {
350                let query = self.parse_query()?;
351                self.expect(&TokenKind::RightParen)?;
352                let alias = self.parse_optional_table_alias()?;
353                return Ok(TableRef::Subquery {
354                    query: Box::new(query),
355                    alias,
356                });
357            } else {
358                let inner = self.parse_table_ref()?;
359                self.expect(&TokenKind::RightParen)?;
360                return Ok(TableRef::Parenthesized(Box::new(inner)));
361            }
362        }
363
364        // Check for UNNEST
365        if self.consume_keyword(Keyword::Unnest)?.is_some() {
366            self.expect(&TokenKind::LeftParen)?;
367            let expr = self.parse_expression()?;
368            self.expect(&TokenKind::RightParen)?;
369            let alias = self.parse_optional_table_alias()?;
370            let (with_offset, offset_alias) = if self.consume_keyword(Keyword::With)?.is_some() {
371                self.expect_keyword(Keyword::Offset)?;
372                let offset_alias = if self.consume_keyword(Keyword::As)?.is_some() {
373                    Some(self.parse_identifier()?)
374                } else {
375                    None
376                };
377                (true, offset_alias)
378            } else {
379                (false, None)
380            };
381            return Ok(TableRef::Unnest {
382                expr,
383                alias,
384                with_offset,
385                offset_alias,
386            });
387        }
388
389        // Simple table reference
390        let name = self.parse_object_name()?;
391
392        // Check if this is a table function call
393        if self.consume(&TokenKind::LeftParen)?.is_some() {
394            let args = if self.check(&TokenKind::RightParen)? {
395                Vec::new()
396            } else {
397                self.parse_comma_separated(|p| p.parse_function_arg())?
398            };
399            self.expect(&TokenKind::RightParen)?;
400            let alias = self.parse_optional_table_alias()?;
401            return Ok(TableRef::TableFunction { name, args, alias });
402        }
403
404        // Parse optional hints
405        let hints = if self.consume(&TokenKind::At)?.is_some() {
406            self.expect(&TokenKind::LeftBrace)?;
407            let opts = self.parse_comma_separated(|p| p.parse_sql_option())?;
408            self.expect(&TokenKind::RightBrace)?;
409            opts
410        } else {
411            Vec::new()
412        };
413
414        let alias = self.parse_optional_table_alias()?;
415        Ok(TableRef::Table { name, alias, hints })
416    }
417
418    /// Parse SQL option (key = value).
419    fn parse_sql_option(&mut self) -> Result<SqlOption> {
420        let name = self.parse_identifier()?;
421        self.expect(&TokenKind::Eq)?;
422        let value = self.parse_expression()?;
423        Ok(SqlOption { name, value })
424    }
425
426    /// Parse a function argument.
427    pub(super) fn parse_function_arg(&mut self) -> Result<FunctionArg> {
428        // Check for named argument
429        let token = self.peek()?;
430        if let TokenKind::Identifier(_) = &token.kind {
431            let next = self.peek_nth(1)?;
432            if matches!(next.kind, TokenKind::FatArrow) {
433                let name = self.parse_identifier()?;
434                self.advance()?; // consume =>
435                let expr = self.parse_expression()?;
436                return Ok(FunctionArg::Named { name, value: expr });
437            }
438        }
439
440        let expr = self.parse_expression()?;
441        Ok(FunctionArg::Unnamed(expr))
442    }
443
444    /// Parse JOIN type.
445    fn parse_join_type(&mut self) -> Result<Option<JoinType>> {
446        if self.consume_keyword(Keyword::Cross)?.is_some() {
447            self.expect_keyword(Keyword::Join)?;
448            return Ok(Some(JoinType::Cross));
449        }
450
451        if self.consume_keyword(Keyword::Natural)?.is_some() {
452            self.consume_keyword(Keyword::Inner)?;
453            self.expect_keyword(Keyword::Join)?;
454            return Ok(Some(JoinType::Natural));
455        }
456
457        let join_type = if self.consume_keyword(Keyword::Inner)?.is_some() {
458            Some(JoinType::Inner)
459        } else if self.consume_keyword(Keyword::Left)?.is_some() {
460            if self.consume_keyword(Keyword::Semi)?.is_some() {
461                Some(JoinType::LeftSemi)
462            } else if self.consume_keyword(Keyword::Anti)?.is_some() {
463                Some(JoinType::LeftAnti)
464            } else {
465                self.consume_keyword(Keyword::Outer)?;
466                Some(JoinType::Left)
467            }
468        } else if self.consume_keyword(Keyword::Right)?.is_some() {
469            if self.consume_keyword(Keyword::Semi)?.is_some() {
470                Some(JoinType::RightSemi)
471            } else if self.consume_keyword(Keyword::Anti)?.is_some() {
472                Some(JoinType::RightAnti)
473            } else {
474                self.consume_keyword(Keyword::Outer)?;
475                Some(JoinType::Right)
476            }
477        } else if self.consume_keyword(Keyword::Full)?.is_some() {
478            self.consume_keyword(Keyword::Outer)?;
479            Some(JoinType::Full)
480        } else {
481            None
482        };
483
484        if let Some(jt) = join_type {
485            self.expect_keyword(Keyword::Join)?;
486            return Ok(Some(jt));
487        }
488
489        // Plain JOIN is INNER JOIN
490        if self.consume_keyword(Keyword::Join)?.is_some() {
491            return Ok(Some(JoinType::Inner));
492        }
493
494        Ok(None)
495    }
496
497    /// Parse JOIN condition.
498    fn parse_join_condition(&mut self, join_type: JoinType) -> Result<Option<JoinCondition>> {
499        match join_type {
500            JoinType::Cross | JoinType::Natural => Ok(None),
501            _ => {
502                if self.consume_keyword(Keyword::On)?.is_some() {
503                    let expr = self.parse_expression()?;
504                    Ok(Some(JoinCondition::On(expr)))
505                } else if self.consume_keyword(Keyword::Using)?.is_some() {
506                    self.expect(&TokenKind::LeftParen)?;
507                    let columns = self.parse_comma_separated(|p| p.parse_identifier())?;
508                    self.expect(&TokenKind::RightParen)?;
509                    Ok(Some(JoinCondition::Using(columns)))
510                } else {
511                    Ok(None)
512                }
513            }
514        }
515    }
516
517    /// Parse GROUP BY clause.
518    fn parse_group_by_clause(&mut self) -> Result<GroupByClause> {
519        let items = self.parse_comma_separated(|p| p.parse_group_by_item())?;
520        Ok(GroupByClause { items })
521    }
522
523    /// Parse a GROUP BY item.
524    fn parse_group_by_item(&mut self) -> Result<GroupByItem> {
525        if self.consume_keyword(Keyword::Rollup)?.is_some() {
526            self.expect(&TokenKind::LeftParen)?;
527            let exprs = self.parse_comma_separated(|p| p.parse_expression())?;
528            self.expect(&TokenKind::RightParen)?;
529            return Ok(GroupByItem::Rollup(exprs));
530        }
531
532        if self.consume_keyword(Keyword::Cube)?.is_some() {
533            self.expect(&TokenKind::LeftParen)?;
534            let exprs = self.parse_comma_separated(|p| p.parse_expression())?;
535            self.expect(&TokenKind::RightParen)?;
536            return Ok(GroupByItem::Cube(exprs));
537        }
538
539        if self.consume_keyword(Keyword::Grouping)?.is_some() {
540            self.expect_keyword(Keyword::Sets)?;
541            self.expect(&TokenKind::LeftParen)?;
542            let sets = self.parse_comma_separated(|p| {
543                if p.consume(&TokenKind::LeftParen)?.is_some() {
544                    let exprs = p.parse_comma_separated(|p2| p2.parse_expression())?;
545                    p.expect(&TokenKind::RightParen)?;
546                    Ok(exprs)
547                } else {
548                    Ok(vec![p.parse_expression()?])
549                }
550            })?;
551            self.expect(&TokenKind::RightParen)?;
552            return Ok(GroupByItem::GroupingSets(sets));
553        }
554
555        let expr = self.parse_expression()?;
556        Ok(GroupByItem::Expr(expr))
557    }
558
559    /// Parse ORDER BY expression.
560    pub(super) fn parse_order_by_expr(&mut self) -> Result<OrderByExpr> {
561        let expr = self.parse_expression()?;
562
563        let order = if self.consume_keyword(Keyword::Asc)?.is_some() {
564            Some(SortOrder::Asc)
565        } else if self.consume_keyword(Keyword::Desc)?.is_some() {
566            Some(SortOrder::Desc)
567        } else {
568            None
569        };
570
571        let nulls = if self.consume_keyword(Keyword::Nulls)?.is_some() {
572            if self.consume_keyword(Keyword::First)?.is_some() {
573                Some(NullsOrder::First)
574            } else if self.consume_keyword(Keyword::Last)?.is_some() {
575                Some(NullsOrder::Last)
576            } else {
577                let token = self.peek()?;
578                return Err(Error::unexpected_token(
579                    "FIRST or LAST",
580                    format!("{}", token.kind),
581                    token.span,
582                ));
583            }
584        } else {
585            None
586        };
587
588        Ok(OrderByExpr { expr, order, nulls })
589    }
590
591    /// Parse LIMIT/OFFSET clause.
592    fn parse_limit_clause(&mut self) -> Result<Option<LimitClause>> {
593        if self.consume_keyword(Keyword::Limit)?.is_some() {
594            let count = if self.consume_keyword(Keyword::All)?.is_some() {
595                None
596            } else {
597                Some(self.parse_expression()?)
598            };
599
600            let offset = if self.consume_keyword(Keyword::Offset)?.is_some() {
601                Some(self.parse_expression()?)
602            } else {
603                None
604            };
605
606            Ok(Some(LimitClause { count, offset }))
607        } else if self.consume_keyword(Keyword::Offset)?.is_some() {
608            let offset = Some(self.parse_expression()?);
609
610            let count = if self.consume_keyword(Keyword::Limit)?.is_some() {
611                if self.consume_keyword(Keyword::All)?.is_some() {
612                    None
613                } else {
614                    Some(self.parse_expression()?)
615                }
616            } else {
617                None
618            };
619
620            Ok(Some(LimitClause { count, offset }))
621        } else {
622            Ok(None)
623        }
624    }
625
626    /// Parse a named window definition.
627    fn parse_named_window_def(&mut self) -> Result<WindowDef> {
628        let name = self.parse_identifier()?;
629        self.expect_keyword(Keyword::As)?;
630        let spec = self.parse_window_spec()?;
631        Ok(WindowDef { name, spec })
632    }
633}
634
635#[cfg(test)]
636mod tests {
637    use super::*;
638    use crate::parser::Parser;
639
640    fn parse_query(sql: &str) -> Query {
641        let mut parser = Parser::new(sql);
642        parser.parse_query().expect("Failed to parse query")
643    }
644
645    #[test]
646    fn test_simple_select() {
647        let query = parse_query("SELECT 1");
648        assert!(matches!(query.body, QueryBody::Select(_)));
649    }
650
651    #[test]
652    fn test_select_from() {
653        let query = parse_query("SELECT * FROM users");
654        if let QueryBody::Select(select) = query.body {
655            assert!(select.from.is_some());
656            assert_eq!(select.projection.len(), 1);
657        } else {
658            panic!("Expected SELECT");
659        }
660    }
661
662    #[test]
663    fn test_select_where() {
664        let query = parse_query("SELECT id FROM users WHERE active = true");
665        if let QueryBody::Select(select) = query.body {
666            assert!(select.where_clause.is_some());
667        } else {
668            panic!("Expected SELECT");
669        }
670    }
671
672    #[test]
673    fn test_select_join() {
674        let query = parse_query("SELECT * FROM users u JOIN orders o ON u.id = o.user_id");
675        if let QueryBody::Select(select) = query.body {
676            assert!(select.from.is_some());
677        } else {
678            panic!("Expected SELECT");
679        }
680    }
681
682    #[test]
683    fn test_select_group_by() {
684        let query = parse_query("SELECT department, COUNT(*) FROM employees GROUP BY department");
685        if let QueryBody::Select(select) = query.body {
686            assert!(select.group_by.is_some());
687        } else {
688            panic!("Expected SELECT");
689        }
690    }
691
692    #[test]
693    fn test_union() {
694        let query = parse_query("SELECT 1 UNION ALL SELECT 2");
695        assert!(matches!(query.body, QueryBody::SetOperation { .. }));
696    }
697
698    #[test]
699    fn test_with_clause() {
700        let query = parse_query("WITH cte AS (SELECT 1 AS x) SELECT * FROM cte");
701        assert!(query.with.is_some());
702    }
703
704    #[test]
705    fn test_order_by_limit() {
706        let query = parse_query("SELECT * FROM t ORDER BY id DESC LIMIT 10 OFFSET 5");
707        assert_eq!(query.order_by.len(), 1);
708        assert!(query.limit.is_some());
709    }
710}