1pub mod ddl;
2pub mod dml;
3pub mod expr;
4pub mod precedence;
5pub mod recursion;
6
7use crate::Span;
8use crate::ast::span::Spanned;
9use crate::ast::{Expr, Statement, StatementKind};
10use crate::dialect::Dialect;
11use crate::error::{ParserError, Result};
12use crate::tokenizer::token::{Token, TokenWithSpan, Word};
13use precedence::Precedence;
14use recursion::{DEFAULT_RECURSION_LIMIT, RecursionCounter};
15
16#[derive(Debug, Clone)]
18pub struct Parser<'a> {
19 tokens: Vec<TokenWithSpan>,
20 pos: usize,
21 pub(crate) recursion: RecursionCounter,
22 dialect: &'a dyn Dialect,
23}
24
25impl<'a> Parser<'a> {
26 pub fn new(dialect: &'a dyn Dialect, tokens: Vec<TokenWithSpan>) -> Self {
27 Self {
28 tokens,
29 pos: 0,
30 recursion: RecursionCounter::new(DEFAULT_RECURSION_LIMIT),
31 dialect,
32 }
33 }
34
35 pub fn with_recursion_limit(
36 dialect: &'a dyn Dialect,
37 tokens: Vec<TokenWithSpan>,
38 limit: usize,
39 ) -> Self {
40 Self {
41 tokens,
42 pos: 0,
43 recursion: RecursionCounter::new(limit),
44 dialect,
45 }
46 }
47
48 pub fn parse_expression_sql(dialect: &'a dyn Dialect, sql: &str) -> Result<Expr> {
50 let tokens = crate::tokenizer::Tokenizer::new(dialect, sql).tokenize()?;
51 let mut parser = Parser::new(dialect, tokens);
52 let expr = parser.parse_expr()?;
53 if !matches!(parser.peek().token, Token::EOF) {
54 let tok = parser.peek().clone();
55 return Err(ParserError::UnexpectedToken {
56 line: tok.span.start.line,
57 column: tok.span.start.column,
58 expected: "end of input".into(),
59 found: format!("{:?}", tok.token),
60 });
61 }
62 Ok(expr)
63 }
64
65 pub fn parse_sql(dialect: &'a dyn Dialect, sql: &str) -> Result<Vec<Statement>> {
67 let tokens = crate::tokenizer::Tokenizer::new(dialect, sql).tokenize()?;
68 let mut parser = Parser::new(dialect, tokens);
69 parser.parse_statements()
70 }
71
72 fn parse_statements(&mut self) -> Result<Vec<Statement>> {
73 let mut statements = Vec::new();
74 loop {
75 match &self.peek().token {
76 Token::EOF => break,
77 Token::SemiColon => {
78 self.advance();
79 continue;
80 }
81 _ => {
82 let stmt = self.parse_statement()?;
83 statements.push(stmt);
84 if matches!(self.peek().token, Token::SemiColon) {
85 self.advance();
86 }
87 }
88 }
89 }
90 Ok(statements)
91 }
92
93 fn parse_statement(&mut self) -> Result<Statement> {
94 if let Some(result) = self.dialect.parse_statement(self) {
95 return result;
96 }
97
98 let tok = self.peek().clone();
99 match &tok.token {
100 Token::Word(Word { keyword, .. }) => match keyword {
101 crate::tokenizer::keyword::Keyword::SELECT => {
102 let select = self.parse_select()?;
103 Ok(Statement {
104 span: select.span(),
105 kind: StatementKind::Select(select),
106 })
107 }
108 crate::tokenizer::keyword::Keyword::INSERT => {
109 let insert = self.parse_insert()?;
110 Ok(Statement {
111 span: insert.span(),
112 kind: StatementKind::Insert(insert),
113 })
114 }
115 crate::tokenizer::keyword::Keyword::UPDATE => {
116 let update = self.parse_update()?;
117 Ok(Statement {
118 span: update.span(),
119 kind: StatementKind::Update(update),
120 })
121 }
122 crate::tokenizer::keyword::Keyword::DELETE => {
123 let delete = self.parse_delete()?;
124 Ok(Statement {
125 span: delete.span(),
126 kind: StatementKind::Delete(delete),
127 })
128 }
129 crate::tokenizer::keyword::Keyword::CREATE => match self.peek_keyword_ahead(1) {
130 Some(crate::tokenizer::keyword::Keyword::TABLE) => {
131 let create_table = self.parse_create_table()?;
132 Ok(Statement {
133 span: create_table.span(),
134 kind: StatementKind::CreateTable(create_table),
135 })
136 }
137 Some(crate::tokenizer::keyword::Keyword::INDEX) => {
138 let create_index = self.parse_create_index()?;
139 Ok(Statement {
140 span: create_index.span(),
141 kind: StatementKind::CreateIndex(create_index),
142 })
143 }
144 _ => Err(ParserError::UnexpectedToken {
145 line: tok.span.start.line,
146 column: tok.span.start.column,
147 expected: "CREATE TABLE or CREATE INDEX".into(),
148 found: format!("{:?}", tok.token),
149 }),
150 },
151 crate::tokenizer::keyword::Keyword::DROP => match self.peek_keyword_ahead(1) {
152 Some(crate::tokenizer::keyword::Keyword::INDEX) => {
153 let drop_index = self.parse_drop_index()?;
154 Ok(Statement {
155 span: drop_index.span(),
156 kind: StatementKind::DropIndex(drop_index),
157 })
158 }
159 Some(crate::tokenizer::keyword::Keyword::TABLE) => {
160 let drop_table = self.parse_drop_table()?;
161 Ok(Statement {
162 span: drop_table.span(),
163 kind: StatementKind::DropTable(drop_table),
164 })
165 }
166 _ => Err(ParserError::UnexpectedToken {
167 line: tok.span.start.line,
168 column: tok.span.start.column,
169 expected: "DROP TABLE or DROP INDEX".into(),
170 found: format!("{:?}", tok.token),
171 }),
172 },
173 _ => Err(ParserError::UnexpectedToken {
174 line: tok.span.start.line,
175 column: tok.span.start.column,
176 expected: "statement".into(),
177 found: format!("{:?}", tok.token),
178 }),
179 },
180 _ => Err(ParserError::UnexpectedToken {
181 line: tok.span.start.line,
182 column: tok.span.start.column,
183 expected: "statement".into(),
184 found: format!("{:?}", tok.token),
185 }),
186 }
187 }
188
189 pub fn parse_expr(&mut self) -> Result<Expr> {
191 self.parse_subexpr(precedence::PREC_UNKNOWN)
192 }
193
194 pub(crate) fn peek(&self) -> &TokenWithSpan {
195 self.tokens
196 .get(self.pos)
197 .unwrap_or_else(|| self.tokens.last().expect("token stream not empty"))
198 }
199
200 pub(crate) fn advance(&mut self) -> TokenWithSpan {
201 let tok = self.peek().clone();
202 if self.pos + 1 < self.tokens.len() {
203 self.pos += 1;
204 }
205 tok
206 }
207
208 pub(crate) fn prev(&self) -> Option<&TokenWithSpan> {
209 if self.pos == 0 {
210 None
211 } else {
212 self.tokens.get(self.pos - 1)
213 }
214 }
215
216 pub(crate) fn expect_token<F>(
217 &mut self,
218 expected: &str,
219 mut predicate: F,
220 ) -> Result<TokenWithSpan>
221 where
222 F: FnMut(&Token) -> bool,
223 {
224 let tok = self.peek().clone();
225 if predicate(&tok.token) {
226 self.advance();
227 Ok(tok)
228 } else {
229 Err(ParserError::ExpectedToken {
230 line: tok.span.start.line,
231 column: tok.span.start.column,
232 expected: expected.to_string(),
233 found: format!("{:?}", tok.token),
234 })
235 }
236 }
237
238 pub(crate) fn consume_keyword(&mut self, keyword: crate::tokenizer::keyword::Keyword) -> bool {
239 if let Token::Word(Word { keyword: kw, .. }) = &self.peek().token
240 && *kw == keyword
241 {
242 self.advance();
243 return true;
244 }
245 false
246 }
247
248 pub(crate) fn parse_identifier(&mut self) -> Result<(String, Span)> {
249 let tok = self.expect_token("identifier", |t| {
250 matches!(
251 t,
252 Token::Word(Word {
253 keyword: crate::tokenizer::keyword::Keyword::NoKeyword,
254 ..
255 })
256 )
257 })?;
258 if let Token::Word(Word { value, .. }) = tok.token {
259 Ok((value, tok.span))
260 } else {
261 unreachable!()
262 }
263 }
264
265 pub(crate) fn expect_keyword(
266 &mut self,
267 expected: &str,
268 kw: crate::tokenizer::keyword::Keyword,
269 ) -> Result<Span> {
270 let tok = self.peek().clone();
271 if let Token::Word(Word { keyword, .. }) = tok.token
272 && keyword == kw
273 {
274 self.advance();
275 return Ok(tok.span);
276 }
277 Err(ParserError::ExpectedToken {
278 line: tok.span.start.line,
279 column: tok.span.start.column,
280 expected: expected.to_string(),
281 found: format!("{:?}", tok.token),
282 })
283 }
284
285 pub(crate) fn next_precedence(&self) -> u8 {
286 match &self.peek().token {
287 Token::Plus | Token::Minus => self.dialect.prec_value(Precedence::PlusMinus),
288 Token::Mul | Token::Div | Token::Mod => self.dialect.prec_value(Precedence::MulDivMod),
289 Token::StringConcat => self.dialect.prec_value(Precedence::StringConcat),
290 Token::Eq | Token::Neq | Token::Lt | Token::Gt | Token::LtEq | Token::GtEq => {
291 self.dialect.prec_value(Precedence::Comparison)
292 }
293 Token::Word(Word { keyword, .. }) => match keyword {
294 crate::tokenizer::keyword::Keyword::AND => self.dialect.prec_value(Precedence::And),
295 crate::tokenizer::keyword::Keyword::OR => self.dialect.prec_value(Precedence::Or),
296 crate::tokenizer::keyword::Keyword::BETWEEN => {
297 self.dialect.prec_value(Precedence::Between)
298 }
299 crate::tokenizer::keyword::Keyword::LIKE => {
300 self.dialect.prec_value(Precedence::Like)
301 }
302 crate::tokenizer::keyword::Keyword::IN => {
303 self.dialect.prec_value(Precedence::Comparison)
304 }
305 crate::tokenizer::keyword::Keyword::IS => self.dialect.prec_value(Precedence::Is),
306 crate::tokenizer::keyword::Keyword::NOT => {
307 if let Some(next_kw) = self.peek_keyword_ahead(1) {
309 match next_kw {
310 crate::tokenizer::keyword::Keyword::BETWEEN => {
311 self.dialect.prec_value(Precedence::Between)
312 }
313 crate::tokenizer::keyword::Keyword::LIKE => {
314 self.dialect.prec_value(Precedence::Like)
315 }
316 crate::tokenizer::keyword::Keyword::IN => {
317 self.dialect.prec_value(Precedence::Comparison)
318 }
319 _ => precedence::PREC_UNKNOWN,
320 }
321 } else {
322 precedence::PREC_UNKNOWN
323 }
324 }
325 _ => precedence::PREC_UNKNOWN,
326 },
327 _ => precedence::PREC_UNKNOWN,
328 }
329 }
330
331 fn peek_keyword_ahead(&self, offset: usize) -> Option<crate::tokenizer::keyword::Keyword> {
332 self.tokens.get(self.pos + offset).and_then(|tw| {
333 if let Token::Word(Word { keyword, .. }) = &tw.token {
334 Some(*keyword)
335 } else {
336 None
337 }
338 })
339 }
340}