alopex_sql/parser/
mod.rs

1pub mod ddl;
2pub mod dml;
3pub mod expr;
4pub mod precedence;
5pub mod recursion;
6
7use crate::Span;
8use crate::ast::span::Spanned;
9use crate::ast::{Expr, Statement, StatementKind};
10use crate::dialect::Dialect;
11use crate::error::{ParserError, Result};
12use crate::tokenizer::token::{Token, TokenWithSpan, Word};
13use precedence::Precedence;
14use recursion::{DEFAULT_RECURSION_LIMIT, RecursionCounter};
15
16/// トークン列をSQL ASTへ変換するパーサ。
17#[derive(Debug, Clone)]
18pub struct Parser<'a> {
19    tokens: Vec<TokenWithSpan>,
20    pos: usize,
21    pub(crate) recursion: RecursionCounter,
22    dialect: &'a dyn Dialect,
23}
24
25impl<'a> Parser<'a> {
26    pub fn new(dialect: &'a dyn Dialect, tokens: Vec<TokenWithSpan>) -> Self {
27        Self {
28            tokens,
29            pos: 0,
30            recursion: RecursionCounter::new(DEFAULT_RECURSION_LIMIT),
31            dialect,
32        }
33    }
34
35    pub fn with_recursion_limit(
36        dialect: &'a dyn Dialect,
37        tokens: Vec<TokenWithSpan>,
38        limit: usize,
39    ) -> Self {
40        Self {
41            tokens,
42            pos: 0,
43            recursion: RecursionCounter::new(limit),
44            dialect,
45        }
46    }
47
48    /// Convenience entrypoint: parse a single expression from SQL input.
49    pub fn parse_expression_sql(dialect: &'a dyn Dialect, sql: &str) -> Result<Expr> {
50        let tokens = crate::tokenizer::Tokenizer::new(dialect, sql).tokenize()?;
51        let mut parser = Parser::new(dialect, tokens);
52        let expr = parser.parse_expr()?;
53        if !matches!(parser.peek().token, Token::EOF) {
54            let tok = parser.peek().clone();
55            return Err(ParserError::UnexpectedToken {
56                line: tok.span.start.line,
57                column: tok.span.start.column,
58                expected: "end of input".into(),
59                found: format!("{:?}", tok.token),
60            });
61        }
62        Ok(expr)
63    }
64
65    /// SQL文字列全体をパースし、ステートメント列を返す。
66    pub fn parse_sql(dialect: &'a dyn Dialect, sql: &str) -> Result<Vec<Statement>> {
67        let tokens = crate::tokenizer::Tokenizer::new(dialect, sql).tokenize()?;
68        let mut parser = Parser::new(dialect, tokens);
69        parser.parse_statements()
70    }
71
72    fn parse_statements(&mut self) -> Result<Vec<Statement>> {
73        let mut statements = Vec::new();
74        loop {
75            match &self.peek().token {
76                Token::EOF => break,
77                Token::SemiColon => {
78                    self.advance();
79                    continue;
80                }
81                _ => {
82                    let stmt = self.parse_statement()?;
83                    statements.push(stmt);
84                    if matches!(self.peek().token, Token::SemiColon) {
85                        self.advance();
86                    }
87                }
88            }
89        }
90        Ok(statements)
91    }
92
93    fn parse_statement(&mut self) -> Result<Statement> {
94        if let Some(result) = self.dialect.parse_statement(self) {
95            return result;
96        }
97
98        let tok = self.peek().clone();
99        match &tok.token {
100            Token::Word(Word { keyword, .. }) => match keyword {
101                crate::tokenizer::keyword::Keyword::SELECT => {
102                    let select = self.parse_select()?;
103                    Ok(Statement {
104                        span: select.span(),
105                        kind: StatementKind::Select(select),
106                    })
107                }
108                crate::tokenizer::keyword::Keyword::INSERT => {
109                    let insert = self.parse_insert()?;
110                    Ok(Statement {
111                        span: insert.span(),
112                        kind: StatementKind::Insert(insert),
113                    })
114                }
115                crate::tokenizer::keyword::Keyword::UPDATE => {
116                    let update = self.parse_update()?;
117                    Ok(Statement {
118                        span: update.span(),
119                        kind: StatementKind::Update(update),
120                    })
121                }
122                crate::tokenizer::keyword::Keyword::DELETE => {
123                    let delete = self.parse_delete()?;
124                    Ok(Statement {
125                        span: delete.span(),
126                        kind: StatementKind::Delete(delete),
127                    })
128                }
129                crate::tokenizer::keyword::Keyword::CREATE => match self.peek_keyword_ahead(1) {
130                    Some(crate::tokenizer::keyword::Keyword::TABLE) => {
131                        let create_table = self.parse_create_table()?;
132                        Ok(Statement {
133                            span: create_table.span(),
134                            kind: StatementKind::CreateTable(create_table),
135                        })
136                    }
137                    Some(crate::tokenizer::keyword::Keyword::INDEX) => {
138                        let create_index = self.parse_create_index()?;
139                        Ok(Statement {
140                            span: create_index.span(),
141                            kind: StatementKind::CreateIndex(create_index),
142                        })
143                    }
144                    _ => Err(ParserError::UnexpectedToken {
145                        line: tok.span.start.line,
146                        column: tok.span.start.column,
147                        expected: "CREATE TABLE or CREATE INDEX".into(),
148                        found: format!("{:?}", tok.token),
149                    }),
150                },
151                crate::tokenizer::keyword::Keyword::DROP => match self.peek_keyword_ahead(1) {
152                    Some(crate::tokenizer::keyword::Keyword::INDEX) => {
153                        let drop_index = self.parse_drop_index()?;
154                        Ok(Statement {
155                            span: drop_index.span(),
156                            kind: StatementKind::DropIndex(drop_index),
157                        })
158                    }
159                    Some(crate::tokenizer::keyword::Keyword::TABLE) => {
160                        let drop_table = self.parse_drop_table()?;
161                        Ok(Statement {
162                            span: drop_table.span(),
163                            kind: StatementKind::DropTable(drop_table),
164                        })
165                    }
166                    _ => Err(ParserError::UnexpectedToken {
167                        line: tok.span.start.line,
168                        column: tok.span.start.column,
169                        expected: "DROP TABLE or DROP INDEX".into(),
170                        found: format!("{:?}", tok.token),
171                    }),
172                },
173                _ => Err(ParserError::UnexpectedToken {
174                    line: tok.span.start.line,
175                    column: tok.span.start.column,
176                    expected: "statement".into(),
177                    found: format!("{:?}", tok.token),
178                }),
179            },
180            _ => Err(ParserError::UnexpectedToken {
181                line: tok.span.start.line,
182                column: tok.span.start.column,
183                expected: "statement".into(),
184                found: format!("{:?}", tok.token),
185            }),
186        }
187    }
188
189    /// Parse a single expression from the current token stream.
190    pub fn parse_expr(&mut self) -> Result<Expr> {
191        self.parse_subexpr(precedence::PREC_UNKNOWN)
192    }
193
194    pub(crate) fn peek(&self) -> &TokenWithSpan {
195        self.tokens
196            .get(self.pos)
197            .unwrap_or_else(|| self.tokens.last().expect("token stream not empty"))
198    }
199
200    pub(crate) fn advance(&mut self) -> TokenWithSpan {
201        let tok = self.peek().clone();
202        if self.pos + 1 < self.tokens.len() {
203            self.pos += 1;
204        }
205        tok
206    }
207
208    pub(crate) fn prev(&self) -> Option<&TokenWithSpan> {
209        if self.pos == 0 {
210            None
211        } else {
212            self.tokens.get(self.pos - 1)
213        }
214    }
215
216    pub(crate) fn expect_token<F>(
217        &mut self,
218        expected: &str,
219        mut predicate: F,
220    ) -> Result<TokenWithSpan>
221    where
222        F: FnMut(&Token) -> bool,
223    {
224        let tok = self.peek().clone();
225        if predicate(&tok.token) {
226            self.advance();
227            Ok(tok)
228        } else {
229            Err(ParserError::ExpectedToken {
230                line: tok.span.start.line,
231                column: tok.span.start.column,
232                expected: expected.to_string(),
233                found: format!("{:?}", tok.token),
234            })
235        }
236    }
237
238    pub(crate) fn consume_keyword(&mut self, keyword: crate::tokenizer::keyword::Keyword) -> bool {
239        if let Token::Word(Word { keyword: kw, .. }) = &self.peek().token
240            && *kw == keyword
241        {
242            self.advance();
243            return true;
244        }
245        false
246    }
247
248    pub(crate) fn parse_identifier(&mut self) -> Result<(String, Span)> {
249        let tok = self.expect_token("identifier", |t| {
250            matches!(
251                t,
252                Token::Word(Word {
253                    keyword: crate::tokenizer::keyword::Keyword::NoKeyword,
254                    ..
255                })
256            )
257        })?;
258        if let Token::Word(Word { value, .. }) = tok.token {
259            Ok((value, tok.span))
260        } else {
261            unreachable!()
262        }
263    }
264
265    pub(crate) fn expect_keyword(
266        &mut self,
267        expected: &str,
268        kw: crate::tokenizer::keyword::Keyword,
269    ) -> Result<Span> {
270        let tok = self.peek().clone();
271        if let Token::Word(Word { keyword, .. }) = tok.token
272            && keyword == kw
273        {
274            self.advance();
275            return Ok(tok.span);
276        }
277        Err(ParserError::ExpectedToken {
278            line: tok.span.start.line,
279            column: tok.span.start.column,
280            expected: expected.to_string(),
281            found: format!("{:?}", tok.token),
282        })
283    }
284
285    pub(crate) fn next_precedence(&self) -> u8 {
286        match &self.peek().token {
287            Token::Plus | Token::Minus => self.dialect.prec_value(Precedence::PlusMinus),
288            Token::Mul | Token::Div | Token::Mod => self.dialect.prec_value(Precedence::MulDivMod),
289            Token::StringConcat => self.dialect.prec_value(Precedence::StringConcat),
290            Token::Eq | Token::Neq | Token::Lt | Token::Gt | Token::LtEq | Token::GtEq => {
291                self.dialect.prec_value(Precedence::Comparison)
292            }
293            Token::Word(Word { keyword, .. }) => match keyword {
294                crate::tokenizer::keyword::Keyword::AND => self.dialect.prec_value(Precedence::And),
295                crate::tokenizer::keyword::Keyword::OR => self.dialect.prec_value(Precedence::Or),
296                crate::tokenizer::keyword::Keyword::BETWEEN => {
297                    self.dialect.prec_value(Precedence::Between)
298                }
299                crate::tokenizer::keyword::Keyword::LIKE => {
300                    self.dialect.prec_value(Precedence::Like)
301                }
302                crate::tokenizer::keyword::Keyword::IN => {
303                    self.dialect.prec_value(Precedence::Comparison)
304                }
305                crate::tokenizer::keyword::Keyword::IS => self.dialect.prec_value(Precedence::Is),
306                crate::tokenizer::keyword::Keyword::NOT => {
307                    // NOT can introduce NOT BETWEEN/LIKE/IN
308                    if let Some(next_kw) = self.peek_keyword_ahead(1) {
309                        match next_kw {
310                            crate::tokenizer::keyword::Keyword::BETWEEN => {
311                                self.dialect.prec_value(Precedence::Between)
312                            }
313                            crate::tokenizer::keyword::Keyword::LIKE => {
314                                self.dialect.prec_value(Precedence::Like)
315                            }
316                            crate::tokenizer::keyword::Keyword::IN => {
317                                self.dialect.prec_value(Precedence::Comparison)
318                            }
319                            _ => precedence::PREC_UNKNOWN,
320                        }
321                    } else {
322                        precedence::PREC_UNKNOWN
323                    }
324                }
325                _ => precedence::PREC_UNKNOWN,
326            },
327            _ => precedence::PREC_UNKNOWN,
328        }
329    }
330
331    fn peek_keyword_ahead(&self, offset: usize) -> Option<crate::tokenizer::keyword::Keyword> {
332        self.tokens.get(self.pos + offset).and_then(|tw| {
333            if let Token::Word(Word { keyword, .. }) = &tw.token {
334                Some(*keyword)
335            } else {
336                None
337            }
338        })
339    }
340}