1mod ddl;
27mod delete;
28mod expression;
29mod insert;
30mod select;
31mod update;
32
33use bumpalo::Bump;
34use vibesql_ast::arena::{
35 AlterTableStmt, ArenaInterner, Converter, DeleteStmt, Expression, InsertStmt, SelectStmt,
36 Statement, Symbol, UpdateStmt,
37};
38
39use crate::keywords::Keyword;
40use crate::{Lexer, ParseError, Token};
41
42pub struct ArenaParser<'arena> {
50 tokens: Vec<Token>,
51 position: usize,
52 placeholder_count: usize,
53 arena: &'arena Bump,
54 interner: ArenaInterner<'arena>,
55}
56
57impl<'arena> ArenaParser<'arena> {
58 pub fn new(tokens: Vec<Token>, arena: &'arena Bump) -> Self {
60 ArenaParser {
61 tokens,
62 position: 0,
63 placeholder_count: 0,
64 arena,
65 interner: ArenaInterner::new(arena),
66 }
67 }
68
69 pub fn interner(&self) -> &ArenaInterner<'arena> {
71 &self.interner
72 }
73
74 pub fn into_interner(self) -> ArenaInterner<'arena> {
76 self.interner
77 }
78
79 pub fn parse_sql(input: &str, arena: &'arena Bump) -> Result<Statement<'arena>, ParseError> {
84 let mut lexer = Lexer::new(input);
85 let tokens =
86 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
87
88 let mut parser = ArenaParser::new(tokens, arena);
89 parser.parse_statement()
90 }
91
92 pub fn parse_select(
96 input: &str,
97 arena: &'arena Bump,
98 ) -> Result<&'arena SelectStmt<'arena>, ParseError> {
99 let mut lexer = Lexer::new(input);
100 let tokens =
101 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
102
103 let mut parser = ArenaParser::new(tokens, arena);
104 parser.parse_select_statement()
105 }
106
107 pub fn parse_select_with_interner(
111 input: &str,
112 arena: &'arena Bump,
113 ) -> Result<(&'arena SelectStmt<'arena>, ArenaInterner<'arena>), ParseError> {
114 let mut lexer = Lexer::new(input);
115 let tokens =
116 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
117
118 let mut parser = ArenaParser::new(tokens, arena);
119 let stmt = parser.parse_select_statement()?;
120 Ok((stmt, parser.into_interner()))
121 }
122
123 fn parse_statement(&mut self) -> Result<Statement<'arena>, ParseError> {
125 while self.try_consume(&Token::Semicolon) {}
127
128 match self.peek() {
129 Token::Keyword(Keyword::Select) | Token::Keyword(Keyword::With) => {
131 let stmt = self.parse_select_statement()?;
132 Ok(Statement::Select(stmt))
133 }
134 Token::Keyword(Keyword::Insert) => {
135 let stmt = self.parse_insert_statement()?;
136 Ok(Statement::Insert(stmt.clone()))
137 }
138 Token::Keyword(Keyword::Replace) => {
139 let stmt = self.parse_replace_statement()?;
140 Ok(Statement::Insert(stmt.clone()))
141 }
142 Token::Keyword(Keyword::Update) => {
143 let stmt = self.parse_update_statement()?;
144 Ok(Statement::Update(stmt.clone()))
145 }
146 Token::Keyword(Keyword::Delete) => {
147 let stmt = self.parse_delete_statement()?;
148 Ok(Statement::Delete(stmt.clone()))
149 }
150
151 Token::Keyword(Keyword::Create) => self.parse_create_statement(),
153 Token::Keyword(Keyword::Drop) => self.parse_drop_statement(),
154 Token::Keyword(Keyword::Alter) => {
155 let stmt = self.parse_alter_table_statement()?;
156 Ok(Statement::AlterTable(stmt.clone()))
157 }
158 Token::Keyword(Keyword::Truncate) => {
159 let stmt = self.parse_truncate_table_statement()?;
160 Ok(Statement::TruncateTable(stmt))
161 }
162 Token::Keyword(Keyword::Analyze) => {
163 let stmt = self.parse_analyze_statement()?;
164 Ok(Statement::Analyze(stmt))
165 }
166
167 Token::Keyword(Keyword::Begin) | Token::Keyword(Keyword::Start) => {
169 let stmt = self.parse_begin_statement()?;
170 Ok(Statement::BeginTransaction(stmt))
171 }
172 Token::Keyword(Keyword::Commit) => {
173 let stmt = self.parse_commit_statement()?;
174 Ok(Statement::Commit(stmt))
175 }
176 Token::Keyword(Keyword::Rollback) => {
177 if self.peek_next_keyword(Keyword::To) {
179 let stmt = self.parse_rollback_to_savepoint_statement()?;
180 Ok(Statement::RollbackToSavepoint(stmt))
181 } else {
182 let stmt = self.parse_rollback_statement()?;
183 Ok(Statement::Rollback(stmt))
184 }
185 }
186 Token::Keyword(Keyword::Savepoint) => {
187 let stmt = self.parse_savepoint_statement()?;
188 Ok(Statement::Savepoint(stmt))
189 }
190 Token::Keyword(Keyword::Release) => {
191 let stmt = self.parse_release_savepoint_statement()?;
192 Ok(Statement::ReleaseSavepoint(stmt))
193 }
194
195 _ => Err(ParseError { message: format!("Unexpected token: {:?}", self.peek()) }),
196 }
197 }
198
199 fn parse_create_statement(&mut self) -> Result<Statement<'arena>, ParseError> {
201 let mut offset = 1; if matches!(self.peek_at_offset(offset), Token::Keyword(Keyword::Or)) {
206 offset += 2; }
208
209 if matches!(
211 self.peek_at_offset(offset),
212 Token::Keyword(Keyword::Unique)
213 | Token::Keyword(Keyword::Fulltext)
214 | Token::Keyword(Keyword::Spatial)
215 ) {
216 offset += 1;
217 }
218
219 if matches!(
221 self.peek_at_offset(offset),
222 Token::Keyword(Keyword::Temp) | Token::Keyword(Keyword::Temporary)
223 ) {
224 offset += 1;
225 }
226
227 match self.peek_at_offset(offset) {
228 Token::Keyword(Keyword::Index) => {
229 let stmt = self.parse_create_index_statement()?;
230 Ok(Statement::CreateIndex(stmt))
231 }
232 Token::Keyword(Keyword::View) => {
233 let stmt = self.parse_create_view_statement()?;
234 Ok(Statement::CreateView(stmt))
235 }
236 _ => Err(ParseError {
237 message: format!(
238 "Unsupported CREATE statement type: {:?}",
239 self.peek_at_offset(offset)
240 ),
241 }),
242 }
243 }
244
245 fn parse_drop_statement(&mut self) -> Result<Statement<'arena>, ParseError> {
247 match self.peek_at_offset(1) {
249 Token::Keyword(Keyword::Table) => {
250 let stmt = self.parse_drop_table_statement()?;
251 Ok(Statement::DropTable(stmt))
252 }
253 Token::Keyword(Keyword::Index) => {
254 let stmt = self.parse_drop_index_statement()?;
255 Ok(Statement::DropIndex(stmt))
256 }
257 Token::Keyword(Keyword::View) => {
258 let stmt = self.parse_drop_view_statement()?;
259 Ok(Statement::DropView(stmt))
260 }
261 _ => Err(ParseError {
262 message: format!("Unsupported DROP statement type: {:?}", self.peek_at_offset(1)),
263 }),
264 }
265 }
266
267 pub fn parse_expression_sql(
269 input: &str,
270 arena: &'arena Bump,
271 ) -> Result<&'arena Expression<'arena>, ParseError> {
272 let mut lexer = Lexer::new(input);
273 let tokens =
274 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
275
276 let mut parser = ArenaParser::new(tokens, arena);
277 let expr = parser.parse_expression()?;
278 Ok(arena.alloc(expr))
279 }
280
281 pub fn parse_alter_table_sql(
283 input: &str,
284 arena: &'arena Bump,
285 ) -> Result<&'arena AlterTableStmt<'arena>, ParseError> {
286 let mut lexer = Lexer::new(input);
287 let tokens =
288 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
289
290 let mut parser = ArenaParser::new(tokens, arena);
291 parser.parse_alter_table_statement()
292 }
293
294 pub fn parse_insert(
296 input: &str,
297 arena: &'arena Bump,
298 ) -> Result<&'arena InsertStmt<'arena>, ParseError> {
299 let mut lexer = Lexer::new(input);
300 let tokens =
301 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
302
303 let mut parser = ArenaParser::new(tokens, arena);
304 parser.parse_insert_statement()
305 }
306
307 pub fn parse_update(
309 input: &str,
310 arena: &'arena Bump,
311 ) -> Result<&'arena UpdateStmt<'arena>, ParseError> {
312 let mut lexer = Lexer::new(input);
313 let tokens =
314 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
315
316 let mut parser = ArenaParser::new(tokens, arena);
317 parser.parse_update_statement()
318 }
319
320 pub fn parse_delete(
322 input: &str,
323 arena: &'arena Bump,
324 ) -> Result<&'arena DeleteStmt<'arena>, ParseError> {
325 let mut lexer = Lexer::new(input);
326 let tokens =
327 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
328
329 let mut parser = ArenaParser::new(tokens, arena);
330 parser.parse_delete_statement()
331 }
332
333 pub fn parse_replace(
335 input: &str,
336 arena: &'arena Bump,
337 ) -> Result<&'arena InsertStmt<'arena>, ParseError> {
338 let mut lexer = Lexer::new(input);
339 let tokens =
340 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
341
342 let mut parser = ArenaParser::new(tokens, arena);
343 parser.parse_replace_statement()
344 }
345
346 pub fn parse_alter_table_sql_with_interner(
348 input: &str,
349 arena: &'arena Bump,
350 ) -> Result<(&'arena AlterTableStmt<'arena>, ArenaInterner<'arena>), ParseError> {
351 let mut lexer = Lexer::new(input);
352 let tokens =
353 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
354
355 let mut parser = ArenaParser::new(tokens, arena);
356 let stmt = parser.parse_alter_table_statement()?;
357 Ok((stmt, parser.into_interner()))
358 }
359
360 pub fn parse_delete_with_interner(
362 input: &str,
363 arena: &'arena Bump,
364 ) -> Result<(&'arena DeleteStmt<'arena>, ArenaInterner<'arena>), ParseError> {
365 let mut lexer = Lexer::new(input);
366 let tokens =
367 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
368
369 let mut parser = ArenaParser::new(tokens, arena);
370 let stmt = parser.parse_delete_statement()?;
371 Ok((stmt, parser.into_interner()))
372 }
373
374 pub fn parse_update_with_interner(
376 input: &str,
377 arena: &'arena Bump,
378 ) -> Result<(&'arena UpdateStmt<'arena>, ArenaInterner<'arena>), ParseError> {
379 let mut lexer = Lexer::new(input);
380 let tokens =
381 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
382
383 let mut parser = ArenaParser::new(tokens, arena);
384 let stmt = parser.parse_update_statement()?;
385 Ok((stmt, parser.into_interner()))
386 }
387
388 pub fn parse_insert_with_interner(
390 input: &str,
391 arena: &'arena Bump,
392 ) -> Result<(&'arena InsertStmt<'arena>, ArenaInterner<'arena>), ParseError> {
393 let mut lexer = Lexer::new(input);
394 let tokens =
395 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
396
397 let mut parser = ArenaParser::new(tokens, arena);
398 let stmt = parser.parse_insert_statement()?;
399 Ok((stmt, parser.into_interner()))
400 }
401
402 pub fn parse_replace_with_interner(
404 input: &str,
405 arena: &'arena Bump,
406 ) -> Result<(&'arena InsertStmt<'arena>, ArenaInterner<'arena>), ParseError> {
407 let mut lexer = Lexer::new(input);
408 let tokens =
409 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
410
411 let mut parser = ArenaParser::new(tokens, arena);
412 let stmt = parser.parse_replace_statement()?;
413 Ok((stmt, parser.into_interner()))
414 }
415
416 #[inline]
418 pub(crate) fn intern(&mut self, s: &str) -> Symbol {
419 self.interner.intern(s)
420 }
421
422 #[inline]
424 #[allow(dead_code)]
425 pub(crate) fn alloc_str(&self, s: &str) -> &'arena str {
426 self.arena.alloc_str(s)
427 }
428
429 #[inline]
431 #[allow(dead_code)]
432 pub(crate) fn arena(&self) -> &'arena Bump {
433 self.arena
434 }
435
436 pub(crate) fn peek(&self) -> &Token {
442 self.tokens.get(self.position).unwrap_or(&Token::Eof)
443 }
444
445 #[allow(dead_code)]
447 pub(crate) fn peek_next(&self) -> &Token {
448 self.tokens.get(self.position + 1).unwrap_or(&Token::Eof)
449 }
450
451 #[allow(dead_code)]
453 pub(crate) fn peek_at_offset(&self, offset: usize) -> &Token {
454 self.tokens.get(self.position + offset).unwrap_or(&Token::Eof)
455 }
456
457 pub(crate) fn advance(&mut self) {
459 if self.position < self.tokens.len() {
460 self.position += 1;
461 }
462 }
463
464 pub(crate) fn peek_keyword(&self, keyword: Keyword) -> bool {
466 matches!(self.peek(), Token::Keyword(kw) if *kw == keyword)
467 }
468
469 #[allow(dead_code)]
471 pub(crate) fn peek_next_keyword(&self, keyword: Keyword) -> bool {
472 matches!(self.peek_next(), Token::Keyword(kw) if *kw == keyword)
473 }
474
475 pub(crate) fn consume_keyword(&mut self, keyword: Keyword) -> Result<(), ParseError> {
477 if self.peek_keyword(keyword) {
478 self.advance();
479 Ok(())
480 } else {
481 Err(ParseError {
482 message: format!("Expected keyword {:?}, found {:?}", keyword, self.peek()),
483 })
484 }
485 }
486
487 pub(crate) fn try_consume_keyword(&mut self, keyword: Keyword) -> bool {
489 if self.peek_keyword(keyword) {
490 self.advance();
491 true
492 } else {
493 false
494 }
495 }
496
497 pub(crate) fn expect_keyword(&mut self, keyword: Keyword) -> Result<(), ParseError> {
499 self.consume_keyword(keyword)
500 }
501
502 pub(crate) fn expect_token(&mut self, expected: Token) -> Result<(), ParseError> {
504 if self.peek() == &expected {
505 self.advance();
506 Ok(())
507 } else {
508 Err(ParseError { message: format!("Expected {:?}, found {:?}", expected, self.peek()) })
509 }
510 }
511
512 pub(crate) fn try_consume(&mut self, token: &Token) -> bool {
514 if self.peek() == token {
515 self.advance();
516 true
517 } else {
518 false
519 }
520 }
521
522 pub(crate) fn next_placeholder(&mut self) -> usize {
524 let index = self.placeholder_count;
525 self.placeholder_count += 1;
526 index
527 }
528
529 pub(crate) fn parse_arena_identifier(&mut self) -> Result<Symbol, ParseError> {
535 match self.peek() {
536 Token::Identifier(name) => {
537 let name = name.clone();
538 self.advance();
539 Ok(self.intern(&name))
540 }
541 _ => {
542 Err(ParseError { message: format!("Expected identifier, found {:?}", self.peek()) })
543 }
544 }
545 }
546
547 pub(crate) fn parse_identifier_list(
549 &mut self,
550 ) -> Result<bumpalo::collections::Vec<'arena, Symbol>, ParseError> {
551 let mut list = bumpalo::collections::Vec::new_in(self.arena);
552 loop {
553 list.push(self.parse_arena_identifier()?);
554 if !self.try_consume(&Token::Comma) {
555 break;
556 }
557 }
558 Ok(list)
559 }
560
561 pub(crate) fn parse_column_alias_list(
569 &mut self,
570 ) -> Result<Option<bumpalo::collections::Vec<'arena, Symbol>>, ParseError> {
571 if !self.try_consume(&Token::LParen) {
573 return Ok(None);
574 }
575
576 let mut aliases = bumpalo::collections::Vec::new_in(self.arena);
577
578 if self.try_consume(&Token::RParen) {
580 return Ok(Some(aliases));
581 }
582
583 aliases.push(self.parse_alias_name_symbol()?);
585
586 while self.try_consume(&Token::Comma) {
588 aliases.push(self.parse_alias_name_symbol()?);
589 }
590
591 self.expect_token(Token::RParen)?;
593
594 Ok(Some(aliases))
595 }
596
597 fn parse_alias_name_symbol(&mut self) -> Result<Symbol, ParseError> {
599 match self.peek() {
600 Token::Identifier(name) => {
601 let name = name.clone();
602 self.advance();
603 Ok(self.intern(&name))
604 }
605 Token::Keyword(kw) => {
606 let name = kw.to_string();
608 self.advance();
609 Ok(self.intern(&name))
610 }
611 _ => {
612 Err(ParseError { message: format!("Expected alias name, found {:?}", self.peek()) })
613 }
614 }
615 }
616}
617
618pub fn parse_select_to_owned(input: &str) -> Result<vibesql_ast::SelectStmt, ParseError> {
644 let arena = Bump::new();
645 let mut lexer = Lexer::new(input);
646 let tokens =
647 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
648
649 let mut parser = ArenaParser::new(tokens, &arena);
650 let arena_stmt = parser.parse_select_statement()?;
651 let converter = Converter::new(parser.interner());
652 Ok(converter.convert_select(arena_stmt))
653}
654
655pub fn parse_expression_to_owned(input: &str) -> Result<vibesql_ast::Expression, ParseError> {
668 let arena = Bump::new();
669 let mut lexer = Lexer::new(input);
670 let tokens =
671 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
672
673 let mut parser = ArenaParser::new(tokens, &arena);
674 let arena_expr = parser.parse_expression()?;
675 let converter = Converter::new(parser.interner());
676 Ok(converter.convert_expression(&arena_expr))
677}
678
679pub fn parse_insert_to_owned(input: &str) -> Result<vibesql_ast::InsertStmt, ParseError> {
692 let arena = Bump::new();
693 let mut lexer = Lexer::new(input);
694 let tokens =
695 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
696
697 let mut parser = ArenaParser::new(tokens, &arena);
698 let arena_stmt = parser.parse_insert_statement()?;
699 let converter = Converter::new(parser.interner());
700 Ok(converter.convert_insert(arena_stmt))
701}
702
703pub fn parse_update_to_owned(input: &str) -> Result<vibesql_ast::UpdateStmt, ParseError> {
716 let arena = Bump::new();
717 let mut lexer = Lexer::new(input);
718 let tokens =
719 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
720
721 let mut parser = ArenaParser::new(tokens, &arena);
722 let arena_stmt = parser.parse_update_statement()?;
723 let converter = Converter::new(parser.interner());
724 Ok(converter.convert_update(arena_stmt))
725}
726
727pub fn parse_delete_to_owned(input: &str) -> Result<vibesql_ast::DeleteStmt, ParseError> {
740 let arena = Bump::new();
741 let mut lexer = Lexer::new(input);
742 let tokens =
743 lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
744
745 let mut parser = ArenaParser::new(tokens, &arena);
746 let arena_stmt = parser.parse_delete_statement()?;
747 let converter = Converter::new(parser.interner());
748 Ok(converter.convert_delete(arena_stmt))
749}
750
751#[cfg(test)]
752mod tests {
753 use super::*;
754 use vibesql_ast::arena::Expression;
755 use vibesql_types::SqlValue;
756
757 #[test]
758 fn test_date_literal() {
759 let arena = Bump::new();
760 let expr = ArenaParser::parse_expression_sql("DATE '1998-12-01'", &arena).unwrap();
761 match expr {
762 Expression::Literal(SqlValue::Date(d)) => {
763 assert_eq!(d.year, 1998);
764 assert_eq!(d.month, 12);
765 assert_eq!(d.day, 1);
766 }
767 _ => panic!("Expected Date literal, got {:?}", expr),
768 }
769 }
770
771 #[test]
772 fn test_time_literal() {
773 let arena = Bump::new();
774 let expr = ArenaParser::parse_expression_sql("TIME '12:30:45'", &arena).unwrap();
775 match expr {
776 Expression::Literal(SqlValue::Time(t)) => {
777 assert_eq!(t.hour, 12);
778 assert_eq!(t.minute, 30);
779 assert_eq!(t.second, 45);
780 }
781 _ => panic!("Expected Time literal, got {:?}", expr),
782 }
783 }
784
785 #[test]
786 fn test_timestamp_literal() {
787 let arena = Bump::new();
788 let expr =
789 ArenaParser::parse_expression_sql("TIMESTAMP '2024-01-15 10:30:00'", &arena).unwrap();
790 match expr {
791 Expression::Literal(SqlValue::Timestamp(ts)) => {
792 assert_eq!(ts.date.year, 2024);
793 assert_eq!(ts.date.month, 1);
794 assert_eq!(ts.date.day, 15);
795 }
796 _ => panic!("Expected Timestamp literal, got {:?}", expr),
797 }
798 }
799
800 #[test]
801 fn test_interval_literal() {
802 let arena = Bump::new();
803 let expr = ArenaParser::parse_expression_sql("INTERVAL '90' DAY", &arena).unwrap();
804 assert!(matches!(expr, Expression::Literal(SqlValue::Interval(_))));
806 }
807
808 #[test]
809 fn test_date_minus_interval_expression() {
810 let arena = Bump::new();
811 let expr =
812 ArenaParser::parse_expression_sql("DATE '1998-12-01' - INTERVAL '90' DAY", &arena)
813 .unwrap();
814 match expr {
815 Expression::BinaryOp { op, left, right } => {
816 assert_eq!(*op, vibesql_ast::BinaryOperator::Minus);
817 assert!(matches!(left, Expression::Literal(SqlValue::Date(_))));
818 assert!(matches!(right, Expression::Literal(SqlValue::Interval(_))));
819 }
820 _ => panic!("Expected BinaryOp, got {:?}", expr),
821 }
822 }
823
824 #[test]
825 fn test_tpch_q1_parses() {
826 let arena = Bump::new();
827 let sql = r#"SELECT
828 l_returnflag,
829 l_linestatus,
830 SUM(l_quantity) AS sum_qty,
831 SUM(l_extendedprice) AS sum_base_price,
832 SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
833 SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge,
834 AVG(l_quantity) AS avg_qty,
835 AVG(l_extendedprice) AS avg_price,
836 AVG(l_discount) AS avg_disc,
837 COUNT(*) AS count_order
838 FROM
839 lineitem
840 WHERE
841 l_shipdate <= DATE '1998-12-01' - INTERVAL '90' DAY
842 GROUP BY
843 l_returnflag,
844 l_linestatus
845 ORDER BY
846 l_returnflag,
847 l_linestatus"#;
848
849 let result = ArenaParser::parse_sql(sql, &arena);
851 assert!(result.is_ok(), "TPC-H Q1 should parse successfully: {:?}", result.err());
852 }
853}