vibesql/parser/
mod.rs

1//! SQL Parser.
2//!
3//! This module provides a recursive descent parser that converts a stream of tokens
4//! into an Abstract Syntax Tree (AST).
5
6mod expr;
7mod query;
8mod stmt;
9
10use crate::ast::*;
11use crate::error::{Error, Result, Span};
12use crate::lexer::{Keyword, Lexer, Token, TokenKind};
13
14/// SQL Parser.
15///
16/// The parser converts SQL text into an Abstract Syntax Tree (AST).
17pub struct Parser<'a> {
18    lexer: Lexer<'a>,
19    #[allow(dead_code)]
20    input: &'a str,
21}
22
23impl<'a> Parser<'a> {
24    /// Create a new parser for the given input.
25    pub fn new(input: &'a str) -> Self {
26        Self {
27            lexer: Lexer::new(input),
28            input,
29        }
30    }
31
32    /// Parse all statements from the input.
33    pub fn parse(&mut self) -> Result<Vec<Statement>> {
34        let mut statements = Vec::new();
35
36        loop {
37            // Skip empty statements (standalone semicolons)
38            while self.consume(&TokenKind::Semicolon)?.is_some() {}
39
40            if self.check_eof()? {
41                break;
42            }
43
44            let stmt = self.parse_statement()?;
45            statements.push(stmt);
46
47            // Optional semicolon between statements
48            self.consume(&TokenKind::Semicolon)?;
49        }
50
51        Ok(statements)
52    }
53
54    /// Parse a single statement.
55    pub fn parse_statement(&mut self) -> Result<Statement> {
56        let token = self.peek()?;
57        let start = token.span.start;
58
59        let kind = match &token.kind {
60            TokenKind::Keyword(kw) => match kw {
61                Keyword::Select | Keyword::With => {
62                    let query = self.parse_query()?;
63                    StatementKind::Query(Box::new(query))
64                }
65                Keyword::Insert => self.parse_insert()?,
66                Keyword::Update => self.parse_update()?,
67                Keyword::Delete => self.parse_delete()?,
68                Keyword::Merge => self.parse_merge()?,
69                Keyword::Create => self.parse_create()?,
70                Keyword::Alter => self.parse_alter()?,
71                Keyword::Drop => self.parse_drop()?,
72                Keyword::Truncate => self.parse_truncate()?,
73                Keyword::Begin => self.parse_begin()?,
74                Keyword::Commit => {
75                    self.advance()?;
76                    StatementKind::Commit
77                }
78                Keyword::Rollback => self.parse_rollback()?,
79                Keyword::Explain => self.parse_explain()?,
80                Keyword::Describe => self.parse_describe()?,
81                Keyword::Show => self.parse_show()?,
82                Keyword::Set => self.parse_set()?,
83                _ => {
84                    return Err(Error::unexpected_token(
85                        "statement",
86                        format!("{}", token.kind),
87                        token.span,
88                    ));
89                }
90            },
91            TokenKind::LeftParen => {
92                // Parenthesized query
93                let query = self.parse_query()?;
94                StatementKind::Query(Box::new(query))
95            }
96            _ => {
97                return Err(Error::unexpected_token(
98                    "statement",
99                    format!("{}", token.kind),
100                    token.span,
101                ));
102            }
103        };
104
105        let end = self.current_position();
106        Ok(Statement::new(kind, Span::new(start, end)))
107    }
108
109    // ========================================================================
110    // Parser utilities
111    // ========================================================================
112
113    /// Get the current position in the input.
114    fn current_position(&self) -> usize {
115        self.lexer.position()
116    }
117
118    /// Peek at the next token.
119    fn peek(&mut self) -> Result<&Token> {
120        self.lexer.peek()
121    }
122
123    /// Peek at the nth token ahead.
124    fn peek_nth(&mut self, n: usize) -> Result<&Token> {
125        self.lexer.peek_nth(n)
126    }
127
128    /// Advance to the next token and return it.
129    fn advance(&mut self) -> Result<Token> {
130        self.lexer.next_token_result()
131    }
132
133    /// Check if the next token is EOF.
134    fn check_eof(&mut self) -> Result<bool> {
135        Ok(self.peek()?.is_eof())
136    }
137
138    /// Check if the next token matches the expected kind.
139    fn check(&mut self, expected: &TokenKind) -> Result<bool> {
140        Ok(&self.peek()?.kind == expected)
141    }
142
143    /// Check if the next token is a specific keyword.
144    fn check_keyword(&mut self, keyword: Keyword) -> Result<bool> {
145        Ok(self.peek()?.is_keyword(keyword))
146    }
147
148    /// Consume the next token if it matches.
149    fn consume(&mut self, expected: &TokenKind) -> Result<Option<Token>> {
150        self.lexer.consume(expected)
151    }
152
153    /// Consume a keyword if it matches.
154    fn consume_keyword(&mut self, keyword: Keyword) -> Result<Option<Token>> {
155        self.lexer.consume_keyword(keyword)
156    }
157
158    /// Expect and consume a specific token.
159    fn expect(&mut self, expected: &TokenKind) -> Result<Token> {
160        self.lexer.expect(expected)
161    }
162
163    /// Expect and consume a specific keyword.
164    fn expect_keyword(&mut self, keyword: Keyword) -> Result<Token> {
165        self.lexer.expect_keyword(keyword)
166    }
167
168    /// Parse an identifier.
169    fn parse_identifier(&mut self) -> Result<Ident> {
170        let token = self.advance()?;
171        match token.kind {
172            TokenKind::Identifier(name) => Ok(Ident::new(name, token.span)),
173            TokenKind::QuotedIdentifier(name) => Ok(Ident::quoted(name, token.span)),
174            TokenKind::Keyword(kw) if !kw.is_reserved() => Ok(Ident::new(token.text, token.span)),
175            _ => Err(Error::expected_identifier(token.span)),
176        }
177    }
178
179    /// Parse an identifier, allowing reserved keywords (for aliases).
180    fn parse_identifier_allow_reserved(&mut self) -> Result<Ident> {
181        let token = self.advance()?;
182        match token.kind {
183            TokenKind::Identifier(name) => Ok(Ident::new(name, token.span)),
184            TokenKind::QuotedIdentifier(name) => Ok(Ident::quoted(name, token.span)),
185            TokenKind::Keyword(_) => Ok(Ident::new(token.text, token.span)),
186            _ => Err(Error::expected_identifier(token.span)),
187        }
188    }
189
190    /// Parse an object name (possibly qualified: schema.table).
191    fn parse_object_name(&mut self) -> Result<ObjectName> {
192        let mut parts = vec![self.parse_identifier()?];
193        let start = parts[0].span.start;
194
195        while self.consume(&TokenKind::Dot)?.is_some() {
196            parts.push(self.parse_identifier()?);
197        }
198
199        let end = parts.last().map(|p| p.span.end).unwrap_or(start);
200        Ok(ObjectName::new(parts, Span::new(start, end)))
201    }
202
203    /// Parse a comma-separated list.
204    fn parse_comma_separated<T, F>(&mut self, mut parse_fn: F) -> Result<Vec<T>>
205    where
206        F: FnMut(&mut Self) -> Result<T>,
207    {
208        let mut items = vec![parse_fn(self)?];
209
210        while self.consume(&TokenKind::Comma)?.is_some() {
211            items.push(parse_fn(self)?);
212        }
213
214        Ok(items)
215    }
216
217    /// Parse an optional alias (AS name or just name).
218    fn parse_optional_alias(&mut self) -> Result<Option<Ident>> {
219        if self.consume_keyword(Keyword::As)?.is_some() {
220            Ok(Some(self.parse_identifier_allow_reserved()?))
221        } else if self.peek()?.is_identifier() || !self.peek()?.is_any_keyword() {
222            // Check if next token could be an alias
223            let token = self.peek()?;
224            if matches!(
225                &token.kind,
226                TokenKind::Identifier(_) | TokenKind::QuotedIdentifier(_)
227            ) || matches!(&token.kind, TokenKind::Keyword(kw) if !kw.is_reserved())
228            {
229                Ok(Some(self.parse_identifier_allow_reserved()?))
230            } else {
231                Ok(None)
232            }
233        } else {
234            Ok(None)
235        }
236    }
237
238    /// Parse an optional table alias with column aliases.
239    fn parse_optional_table_alias(&mut self) -> Result<Option<Alias>> {
240        if let Some(name) = self.parse_optional_alias()? {
241            let columns = if self.consume(&TokenKind::LeftParen)?.is_some() {
242                let cols = self.parse_comma_separated(|p| p.parse_identifier())?;
243                self.expect(&TokenKind::RightParen)?;
244                cols
245            } else {
246                Vec::new()
247            };
248            Ok(Some(Alias::with_columns(name, columns)))
249        } else {
250            Ok(None)
251        }
252    }
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    #[allow(dead_code)]
260    fn parse_stmt(sql: &str) -> Statement {
261        let mut parser = Parser::new(sql);
262        parser.parse_statement().expect("Failed to parse")
263    }
264
265    fn parse_all(sql: &str) -> Vec<Statement> {
266        let mut parser = Parser::new(sql);
267        parser.parse().expect("Failed to parse")
268    }
269
270    #[test]
271    fn test_empty_input() {
272        let stmts = parse_all("");
273        assert!(stmts.is_empty());
274    }
275
276    #[test]
277    fn test_semicolons() {
278        let stmts = parse_all(";;;");
279        assert!(stmts.is_empty());
280    }
281
282    #[test]
283    fn test_multiple_statements() {
284        let stmts = parse_all("SELECT 1; SELECT 2; SELECT 3");
285        assert_eq!(stmts.len(), 3);
286    }
287}