vibesql_parser/arena_parser/
mod.rs

1//! Arena-allocated SQL parser.
2//!
3//! This module provides a parser that allocates AST nodes from a bump arena
4//! for improved performance. All allocations are contiguous in memory and
5//! freed in a single operation when the arena is dropped.
6//!
7//! # Usage
8//!
9//! For arena-allocated AST (fastest, but requires arena lifetime management):
10//! ```text
11//! use bumpalo::Bump;
12//! use vibesql_parser::arena_parser::ArenaParser;
13//!
14//! let arena = Bump::new();
15//! let result = ArenaParser::parse_sql("SELECT * FROM users", &arena);
16//! ```
17//!
18//! For standard heap-allocated AST (convenient, with arena parsing benefits):
19//! ```
20//! use vibesql_parser::arena_parser::parse_select_to_owned;
21//!
22//! // Parse with arena internally, convert to owned SelectStmt
23//! let stmt = parse_select_to_owned("SELECT * FROM users").unwrap();
24//! ```
25
26mod ddl;
27mod delete;
28mod expression;
29mod insert;
30mod select;
31mod update;
32
33use bumpalo::Bump;
34use vibesql_ast::arena::{
35    AlterTableStmt, ArenaInterner, Converter, DeleteStmt, Expression, InsertStmt, SelectStmt,
36    Statement, Symbol, UpdateStmt,
37};
38
39use crate::{keywords::Keyword, lexer::Span, Lexer, ParseError, Token};
40
41/// Arena-based SQL parser.
42///
43/// Unlike the standard [`Parser`](crate::Parser), this parser allocates all
44/// AST nodes from a bump arena, resulting in:
45/// - O(1) allocation time (vs heap allocation overhead)
46/// - Better cache locality (contiguous memory)
47/// - Single deallocation when arena is dropped
48pub struct ArenaParser<'arena> {
49    tokens: Vec<Token>,
50    /// Byte spans for each token, used to extract original source text
51    spans: Vec<Span>,
52    /// Original SQL input string, used with spans to extract source text
53    input: &'arena str,
54    position: usize,
55    placeholder_count: usize,
56    arena: &'arena Bump,
57    interner: ArenaInterner<'arena>,
58}
59
60impl<'arena> ArenaParser<'arena> {
61    /// Create a new arena parser from tokens with spans and original input.
62    pub fn new_with_spans(
63        tokens: Vec<Token>,
64        spans: Vec<Span>,
65        input: &'arena str,
66        arena: &'arena Bump,
67    ) -> Self {
68        ArenaParser {
69            tokens,
70            spans,
71            input,
72            position: 0,
73            placeholder_count: 0,
74            arena,
75            interner: ArenaInterner::new(arena),
76        }
77    }
78
79    /// Create a new arena parser from tokens (legacy constructor without spans).
80    /// Source text reconstruction will fall back to token-based reconstruction.
81    pub fn new(tokens: Vec<Token>, arena: &'arena Bump) -> Self {
82        ArenaParser {
83            tokens,
84            spans: Vec::new(), // No spans available
85            input: "",         // No original input
86            position: 0,
87            placeholder_count: 0,
88            arena,
89            interner: ArenaInterner::new(arena),
90        }
91    }
92
93    /// Returns a reference to the interner for symbol resolution during conversion.
94    pub fn interner(&self) -> &ArenaInterner<'arena> {
95        &self.interner
96    }
97
98    /// Consumes the parser and returns the interner.
99    pub fn into_interner(self) -> ArenaInterner<'arena> {
100        self.interner
101    }
102
103    /// Parse SQL input string into an arena-allocated Statement.
104    ///
105    /// Supports the full range of SQL statements including DML (SELECT, INSERT,
106    /// UPDATE, DELETE), DDL (CREATE, DROP, ALTER), and transaction statements.
107    pub fn parse_sql(input: &str, arena: &'arena Bump) -> Result<Statement<'arena>, ParseError> {
108        let mut lexer = Lexer::new(input);
109        let tokens_with_spans = lexer
110            .tokenize_with_spans()
111            .map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
112
113        let (tokens, spans): (Vec<_>, Vec<_>) = tokens_with_spans.into_iter().unzip();
114        let input_in_arena = arena.alloc_str(input);
115
116        let mut parser = ArenaParser::new_with_spans(tokens, spans, input_in_arena, arena);
117        parser.parse_statement()
118    }
119
120    /// Parse SQL input string into an arena-allocated SelectStmt.
121    ///
122    /// Convenience method for when you know you're parsing a SELECT.
123    pub fn parse_select(
124        input: &str,
125        arena: &'arena Bump,
126    ) -> Result<&'arena SelectStmt<'arena>, ParseError> {
127        let mut lexer = Lexer::new(input);
128        let tokens_with_spans = lexer
129            .tokenize_with_spans()
130            .map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
131
132        let (tokens, spans): (Vec<_>, Vec<_>) = tokens_with_spans.into_iter().unzip();
133        let input_in_arena = arena.alloc_str(input);
134
135        let mut parser = ArenaParser::new_with_spans(tokens, spans, input_in_arena, arena);
136        parser.parse_select_statement()
137    }
138
139    /// Parse SQL input string into an arena-allocated SelectStmt, returning the interner too.
140    ///
141    /// Use this method when you need to resolve Symbol values to strings.
142    pub fn parse_select_with_interner(
143        input: &str,
144        arena: &'arena Bump,
145    ) -> Result<(&'arena SelectStmt<'arena>, ArenaInterner<'arena>), ParseError> {
146        let mut lexer = Lexer::new(input);
147        let tokens_with_spans = lexer
148            .tokenize_with_spans()
149            .map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
150
151        let (tokens, spans): (Vec<_>, Vec<_>) = tokens_with_spans.into_iter().unzip();
152        let input_in_arena = arena.alloc_str(input);
153
154        let mut parser = ArenaParser::new_with_spans(tokens, spans, input_in_arena, arena);
155        let stmt = parser.parse_select_statement()?;
156        Ok((stmt, parser.into_interner()))
157    }
158
159    /// Parse a single statement.
160    fn parse_statement(&mut self) -> Result<Statement<'arena>, ParseError> {
161        // Skip leading semicolons
162        while self.try_consume(&Token::Semicolon) {}
163
164        match self.peek() {
165            // DML statements
166            Token::Keyword { keyword: Keyword::Select, .. }
167            | Token::Keyword { keyword: Keyword::With, .. } => {
168                let stmt = self.parse_select_statement()?;
169                Ok(Statement::Select(stmt))
170            }
171            Token::Keyword { keyword: Keyword::Insert, .. } => {
172                let stmt = self.parse_insert_statement()?;
173                Ok(Statement::Insert(stmt.clone()))
174            }
175            Token::Keyword { keyword: Keyword::Replace, .. } => {
176                let stmt = self.parse_replace_statement()?;
177                Ok(Statement::Insert(stmt.clone()))
178            }
179            Token::Keyword { keyword: Keyword::Update, .. } => {
180                let stmt = self.parse_update_statement()?;
181                Ok(Statement::Update(stmt.clone()))
182            }
183            Token::Keyword { keyword: Keyword::Delete, .. } => {
184                let stmt = self.parse_delete_statement()?;
185                Ok(Statement::Delete(stmt.clone()))
186            }
187
188            // DDL statements
189            Token::Keyword { keyword: Keyword::Create, .. } => self.parse_create_statement(),
190            Token::Keyword { keyword: Keyword::Drop, .. } => self.parse_drop_statement(),
191            Token::Keyword { keyword: Keyword::Alter, .. } => {
192                let stmt = self.parse_alter_table_statement()?;
193                Ok(Statement::AlterTable(stmt.clone()))
194            }
195            Token::Keyword { keyword: Keyword::Truncate, .. } => {
196                let stmt = self.parse_truncate_table_statement()?;
197                Ok(Statement::TruncateTable(stmt))
198            }
199            Token::Keyword { keyword: Keyword::Analyze, .. } => {
200                let stmt = self.parse_analyze_statement()?;
201                Ok(Statement::Analyze(stmt))
202            }
203            Token::Keyword { keyword: Keyword::Pragma, .. } => {
204                let stmt = self.parse_pragma_statement()?;
205                Ok(Statement::Pragma(stmt))
206            }
207
208            // Transaction statements
209            Token::Keyword { keyword: Keyword::Begin, .. }
210            | Token::Keyword { keyword: Keyword::Start, .. } => {
211                let stmt = self.parse_begin_statement()?;
212                Ok(Statement::BeginTransaction(stmt))
213            }
214            Token::Keyword { keyword: Keyword::Commit, .. }
215            | Token::Keyword { keyword: Keyword::End, .. } => {
216                // END is a SQLite alias for COMMIT in transaction context
217                let stmt = self.parse_commit_statement()?;
218                Ok(Statement::Commit(stmt))
219            }
220            Token::Keyword { keyword: Keyword::Rollback, .. } => {
221                // Check for ROLLBACK TO SAVEPOINT
222                if self.peek_next_keyword(Keyword::To) {
223                    let stmt = self.parse_rollback_to_savepoint_statement()?;
224                    Ok(Statement::RollbackToSavepoint(stmt))
225                } else {
226                    let stmt = self.parse_rollback_statement()?;
227                    Ok(Statement::Rollback(stmt))
228                }
229            }
230            Token::Keyword { keyword: Keyword::Savepoint, .. } => {
231                let stmt = self.parse_savepoint_statement()?;
232                Ok(Statement::Savepoint(stmt))
233            }
234            Token::Keyword { keyword: Keyword::Release, .. } => {
235                let stmt = self.parse_release_savepoint_statement()?;
236                Ok(Statement::ReleaseSavepoint(stmt))
237            }
238
239            _ => Err(ParseError { message: self.peek().syntax_error() }),
240        }
241    }
242
243    /// Parse CREATE statement and dispatch to appropriate sub-parser.
244    fn parse_create_statement(&mut self) -> Result<Statement<'arena>, ParseError> {
245        // Peek ahead to determine what we're creating
246        let mut offset = 1; // Skip CREATE
247
248        // Skip optional OR REPLACE
249        if matches!(self.peek_at_offset(offset), Token::Keyword { keyword: Keyword::Or, .. }) {
250            offset += 2; // Skip OR REPLACE
251        }
252
253        // Skip optional UNIQUE, FULLTEXT, SPATIAL
254        if matches!(
255            self.peek_at_offset(offset),
256            Token::Keyword { keyword: Keyword::Unique, .. }
257                | Token::Keyword { keyword: Keyword::Fulltext, .. }
258                | Token::Keyword { keyword: Keyword::Spatial, .. }
259        ) {
260            offset += 1;
261        }
262
263        // Skip optional TEMP/TEMPORARY
264        if matches!(
265            self.peek_at_offset(offset),
266            Token::Keyword { keyword: Keyword::Temp, .. }
267                | Token::Keyword { keyword: Keyword::Temporary, .. }
268        ) {
269            offset += 1;
270        }
271
272        match self.peek_at_offset(offset) {
273            Token::Keyword { keyword: Keyword::Index, .. } => {
274                let stmt = self.parse_create_index_statement()?;
275                Ok(Statement::CreateIndex(stmt))
276            }
277            Token::Keyword { keyword: Keyword::View, .. } => {
278                let stmt = self.parse_create_view_statement()?;
279                Ok(Statement::CreateView(stmt))
280            }
281            _ => Err(ParseError {
282                message: format!(
283                    "Unsupported CREATE statement type: {:?}",
284                    self.peek_at_offset(offset)
285                ),
286            }),
287        }
288    }
289
290    /// Parse DROP statement and dispatch to appropriate sub-parser.
291    fn parse_drop_statement(&mut self) -> Result<Statement<'arena>, ParseError> {
292        // Peek ahead to determine what we're dropping
293        match self.peek_at_offset(1) {
294            Token::Keyword { keyword: Keyword::Table, .. } => {
295                let stmt = self.parse_drop_table_statement()?;
296                Ok(Statement::DropTable(stmt))
297            }
298            Token::Keyword { keyword: Keyword::Index, .. } => {
299                let stmt = self.parse_drop_index_statement()?;
300                Ok(Statement::DropIndex(stmt))
301            }
302            Token::Keyword { keyword: Keyword::View, .. } => {
303                let stmt = self.parse_drop_view_statement()?;
304                Ok(Statement::DropView(stmt))
305            }
306            _ => Err(ParseError {
307                message: format!("Unsupported DROP statement type: {:?}", self.peek_at_offset(1)),
308            }),
309        }
310    }
311
312    /// Parse an expression and return an arena-allocated reference.
313    pub fn parse_expression_sql(
314        input: &str,
315        arena: &'arena Bump,
316    ) -> Result<&'arena Expression<'arena>, ParseError> {
317        let mut lexer = Lexer::new(input);
318        let tokens =
319            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
320
321        let mut parser = ArenaParser::new(tokens, arena);
322        let expr = parser.parse_expression()?;
323        Ok(arena.alloc(expr))
324    }
325
326    /// Parse SQL input string into an arena-allocated AlterTableStmt.
327    pub fn parse_alter_table_sql(
328        input: &str,
329        arena: &'arena Bump,
330    ) -> Result<&'arena AlterTableStmt<'arena>, ParseError> {
331        let mut lexer = Lexer::new(input);
332        let tokens =
333            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
334
335        let mut parser = ArenaParser::new(tokens, arena);
336        parser.parse_alter_table_statement()
337    }
338
339    /// Parse an INSERT statement into an arena-allocated InsertStmt.
340    pub fn parse_insert(
341        input: &str,
342        arena: &'arena Bump,
343    ) -> Result<&'arena InsertStmt<'arena>, ParseError> {
344        let mut lexer = Lexer::new(input);
345        let tokens =
346            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
347
348        let mut parser = ArenaParser::new(tokens, arena);
349        parser.parse_insert_statement()
350    }
351
352    /// Parse an UPDATE statement into an arena-allocated UpdateStmt.
353    pub fn parse_update(
354        input: &str,
355        arena: &'arena Bump,
356    ) -> Result<&'arena UpdateStmt<'arena>, ParseError> {
357        let mut lexer = Lexer::new(input);
358        let tokens =
359            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
360
361        let mut parser = ArenaParser::new(tokens, arena);
362        parser.parse_update_statement()
363    }
364
365    /// Parse a DELETE statement into an arena-allocated DeleteStmt.
366    pub fn parse_delete(
367        input: &str,
368        arena: &'arena Bump,
369    ) -> Result<&'arena DeleteStmt<'arena>, ParseError> {
370        let mut lexer = Lexer::new(input);
371        let tokens =
372            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
373
374        let mut parser = ArenaParser::new(tokens, arena);
375        parser.parse_delete_statement()
376    }
377
378    /// Parse a REPLACE statement (alias for INSERT OR REPLACE) into an arena-allocated InsertStmt.
379    pub fn parse_replace(
380        input: &str,
381        arena: &'arena Bump,
382    ) -> Result<&'arena InsertStmt<'arena>, ParseError> {
383        let mut lexer = Lexer::new(input);
384        let tokens =
385            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
386
387        let mut parser = ArenaParser::new(tokens, arena);
388        parser.parse_replace_statement()
389    }
390
391    /// Parse an ALTER TABLE statement, returning the interner for symbol resolution.
392    pub fn parse_alter_table_sql_with_interner(
393        input: &str,
394        arena: &'arena Bump,
395    ) -> Result<(&'arena AlterTableStmt<'arena>, ArenaInterner<'arena>), ParseError> {
396        let mut lexer = Lexer::new(input);
397        let tokens =
398            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
399
400        let mut parser = ArenaParser::new(tokens, arena);
401        let stmt = parser.parse_alter_table_statement()?;
402        Ok((stmt, parser.into_interner()))
403    }
404
405    /// Parse a DELETE statement, returning the interner for symbol resolution.
406    pub fn parse_delete_with_interner(
407        input: &str,
408        arena: &'arena Bump,
409    ) -> Result<(&'arena DeleteStmt<'arena>, ArenaInterner<'arena>), ParseError> {
410        let mut lexer = Lexer::new(input);
411        let tokens =
412            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
413
414        let mut parser = ArenaParser::new(tokens, arena);
415        let stmt = parser.parse_delete_statement()?;
416        Ok((stmt, parser.into_interner()))
417    }
418
419    /// Parse an UPDATE statement, returning the interner for symbol resolution.
420    pub fn parse_update_with_interner(
421        input: &str,
422        arena: &'arena Bump,
423    ) -> Result<(&'arena UpdateStmt<'arena>, ArenaInterner<'arena>), ParseError> {
424        let mut lexer = Lexer::new(input);
425        let tokens =
426            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
427
428        let mut parser = ArenaParser::new(tokens, arena);
429        let stmt = parser.parse_update_statement()?;
430        Ok((stmt, parser.into_interner()))
431    }
432
433    /// Parse an INSERT statement, returning the interner for symbol resolution.
434    pub fn parse_insert_with_interner(
435        input: &str,
436        arena: &'arena Bump,
437    ) -> Result<(&'arena InsertStmt<'arena>, ArenaInterner<'arena>), ParseError> {
438        let mut lexer = Lexer::new(input);
439        let tokens =
440            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
441
442        let mut parser = ArenaParser::new(tokens, arena);
443        let stmt = parser.parse_insert_statement()?;
444        Ok((stmt, parser.into_interner()))
445    }
446
447    /// Parse a REPLACE statement, returning the interner for symbol resolution.
448    pub fn parse_replace_with_interner(
449        input: &str,
450        arena: &'arena Bump,
451    ) -> Result<(&'arena InsertStmt<'arena>, ArenaInterner<'arena>), ParseError> {
452        let mut lexer = Lexer::new(input);
453        let tokens =
454            lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
455
456        let mut parser = ArenaParser::new(tokens, arena);
457        let stmt = parser.parse_replace_statement()?;
458        Ok((stmt, parser.into_interner()))
459    }
460
461    /// Intern a string and return a Symbol.
462    #[inline]
463    pub(crate) fn intern(&mut self, s: &str) -> Symbol {
464        self.interner.intern(s)
465    }
466
467    /// Allocate a string in the arena (for non-identifier strings).
468    #[inline]
469    #[allow(dead_code)]
470    pub(crate) fn alloc_str(&self, s: &str) -> &'arena str {
471        self.arena.alloc_str(s)
472    }
473
474    /// Get a reference to the arena.
475    #[inline]
476    #[allow(dead_code)]
477    pub(crate) fn arena(&self) -> &'arena Bump {
478        self.arena
479    }
480
481    // ========================================================================
482    // Token manipulation helpers (same as standard parser)
483    // ========================================================================
484
485    /// Peek at the current token without consuming it.
486    pub(crate) fn peek(&self) -> &Token {
487        self.tokens.get(self.position).unwrap_or(&Token::Eof)
488    }
489
490    /// Peek at the next token (position + 1) without consuming.
491    #[allow(dead_code)]
492    pub(crate) fn peek_next(&self) -> &Token {
493        self.tokens.get(self.position + 1).unwrap_or(&Token::Eof)
494    }
495
496    /// Peek at token at specific offset from current position.
497    #[allow(dead_code)]
498    pub(crate) fn peek_at_offset(&self, offset: usize) -> &Token {
499        self.tokens.get(self.position + offset).unwrap_or(&Token::Eof)
500    }
501
502    /// Advance to the next token.
503    pub(crate) fn advance(&mut self) {
504        if self.position < self.tokens.len() {
505            self.position += 1;
506        }
507    }
508
509    /// Check if current token is a specific keyword.
510    pub(crate) fn peek_keyword(&self, keyword: Keyword) -> bool {
511        matches!(self.peek(), Token::Keyword { keyword: kw, .. } if *kw == keyword)
512    }
513
514    /// Check if next token (position + 1) is a specific keyword.
515    #[allow(dead_code)]
516    pub(crate) fn peek_next_keyword(&self, keyword: Keyword) -> bool {
517        matches!(self.peek_next(), Token::Keyword { keyword: kw, .. } if *kw == keyword)
518    }
519
520    /// Check if there's a SELECT keyword after any number of opening parentheses.
521    /// This is used to detect subqueries in contexts like `IN ((SELECT ...))` where
522    /// extra parentheses around the SELECT should still be treated as a subquery.
523    ///
524    /// Returns (true, depth) if SELECT is found, where depth is the number of parens traversed.
525    /// Returns (false, 0) otherwise.
526    pub(crate) fn peek_select_through_parens(&self) -> (bool, usize) {
527        let mut offset = 0;
528        let mut paren_depth = 0;
529
530        loop {
531            let token = self.peek_at_offset(offset);
532            match token {
533                Token::LParen => {
534                    paren_depth += 1;
535                    offset += 1;
536                }
537                Token::Keyword { keyword: Keyword::Select, .. }
538                | Token::Keyword { keyword: Keyword::Values, .. } => {
539                    // Found SELECT or VALUES after parentheses - this is a subquery
540                    return (true, paren_depth);
541                }
542                _ => {
543                    // Found something else - not a subquery through parens
544                    return (false, 0);
545                }
546            }
547        }
548    }
549
550    /// Consume a keyword, returning an error if it's not the expected keyword.
551    pub(crate) fn consume_keyword(&mut self, keyword: Keyword) -> Result<(), ParseError> {
552        if self.peek_keyword(keyword) {
553            self.advance();
554            Ok(())
555        } else {
556            Err(ParseError { message: self.peek().syntax_error() })
557        }
558    }
559
560    /// Try to consume a keyword, returning true if successful.
561    pub(crate) fn try_consume_keyword(&mut self, keyword: Keyword) -> bool {
562        if self.peek_keyword(keyword) {
563            self.advance();
564            true
565        } else {
566            false
567        }
568    }
569
570    /// Expect a specific keyword.
571    pub(crate) fn expect_keyword(&mut self, keyword: Keyword) -> Result<(), ParseError> {
572        self.consume_keyword(keyword)
573    }
574
575    /// Expect a specific token.
576    pub(crate) fn expect_token(&mut self, expected: Token) -> Result<(), ParseError> {
577        if self.peek() == &expected {
578            self.advance();
579            Ok(())
580        } else {
581            Err(ParseError { message: self.peek().syntax_error() })
582        }
583    }
584
585    /// Try to consume a specific token, returning true if successful.
586    pub(crate) fn try_consume(&mut self, token: &Token) -> bool {
587        if self.peek() == token {
588            self.advance();
589            true
590        } else {
591            false
592        }
593    }
594
595    /// Get the next placeholder index.
596    pub(crate) fn next_placeholder(&mut self) -> usize {
597        let index = self.placeholder_count;
598        self.placeholder_count += 1;
599        index
600    }
601
602    // ========================================================================
603    // Common parsing helpers
604    // ========================================================================
605
606    /// Parse an identifier and intern it, returning a Symbol.
607    pub(crate) fn parse_arena_identifier(&mut self) -> Result<Symbol, ParseError> {
608        match self.peek() {
609            Token::Identifier(name) => {
610                let name = name.clone();
611                self.advance();
612                Ok(self.intern(&name))
613            }
614            _ => Err(ParseError { message: self.peek().syntax_error() }),
615        }
616    }
617
618    /// Parse a comma-separated list of identifiers.
619    pub(crate) fn parse_identifier_list(
620        &mut self,
621    ) -> Result<bumpalo::collections::Vec<'arena, Symbol>, ParseError> {
622        let mut list = bumpalo::collections::Vec::new_in(self.arena);
623        loop {
624            list.push(self.parse_arena_identifier()?);
625            if !self.try_consume(&Token::Comma) {
626                break;
627            }
628        }
629        Ok(list)
630    }
631
632    /// Parse an optional column alias list: (col1, col2, ...)
633    ///
634    /// SQL:1999 Feature E051-09: Derived column lists in table aliases
635    /// Example: FROM t AS myalias (x, y) or FROM (SELECT a, b) AS mytemp (x, y)
636    ///
637    /// Returns None if no opening parenthesis is found, otherwise parses
638    /// and returns the list of column aliases as Symbols.
639    pub(crate) fn parse_column_alias_list(
640        &mut self,
641    ) -> Result<Option<bumpalo::collections::Vec<'arena, Symbol>>, ParseError> {
642        // Check for opening parenthesis
643        if !self.try_consume(&Token::LParen) {
644            return Ok(None);
645        }
646
647        let mut aliases = bumpalo::collections::Vec::new_in(self.arena);
648
649        // Handle empty list case: ()
650        if self.try_consume(&Token::RParen) {
651            return Ok(Some(aliases));
652        }
653
654        // Parse first alias (identifiers or keywords allowed)
655        aliases.push(self.parse_alias_name_symbol()?);
656
657        // Parse remaining aliases
658        while self.try_consume(&Token::Comma) {
659            aliases.push(self.parse_alias_name_symbol()?);
660        }
661
662        // Expect closing parenthesis
663        self.expect_token(Token::RParen)?;
664
665        Ok(Some(aliases))
666    }
667
668    /// Parse an identifier or keyword as an alias name, returning a Symbol.
669    ///
670    /// SQLite also allows single-quoted strings as aliases (e.g., `SELECT 1 AS 'a'`).
671    /// In this context, the string literal is treated as an identifier name.
672    fn parse_alias_name_symbol(&mut self) -> Result<Symbol, ParseError> {
673        match self.peek() {
674            Token::Identifier(name) | Token::DelimitedIdentifier(name) => {
675                let name = name.clone();
676                self.advance();
677                Ok(self.intern(&name))
678            }
679            Token::Keyword { keyword: kw, .. } => {
680                // Allow keywords as alias names
681                let name = kw.to_string();
682                self.advance();
683                Ok(self.intern(&name))
684            }
685            Token::String(s) => {
686                // SQLite compatibility: single-quoted strings can be used as aliases
687                let alias = s.clone();
688                self.advance();
689                Ok(self.intern(&alias))
690            }
691            _ => Err(ParseError { message: self.peek().syntax_error() }),
692        }
693    }
694
695    /// Reconstruct source text from tokens in a range.
696    ///
697    /// This reconstructs the original SQL text from the tokens consumed during
698    /// expression parsing. Used for preserving original expression text as column
699    /// names when no alias is provided (SQLite compatibility).
700    ///
701    /// When spans are available, extracts directly from the original input to
702    /// preserve exact case and formatting. Falls back to token-based reconstruction
703    /// (which uppercases identifiers) when spans are not available.
704    pub(crate) fn reconstruct_source_text(
705        &self,
706        start_pos: usize,
707        end_pos: usize,
708    ) -> Option<&'arena str> {
709        if start_pos >= end_pos || start_pos >= self.tokens.len() {
710            return None;
711        }
712
713        // If we have spans, extract the original source text directly
714        if !self.spans.is_empty() && start_pos < self.spans.len() && end_pos <= self.spans.len() {
715            let start_byte = self.spans[start_pos].start;
716            // Use end_pos - 1 because end_pos is exclusive (points past the last token)
717            let end_byte = if end_pos > 0 && end_pos <= self.spans.len() {
718                self.spans[end_pos - 1].end
719            } else {
720                self.spans[self.spans.len() - 1].end
721            };
722
723            if start_byte < end_byte && end_byte <= self.input.len() {
724                let source_text = &self.input[start_byte..end_byte];
725                return Some(self.arena.alloc_str(source_text));
726            }
727        }
728
729        // Fall back to token-based reconstruction (won't preserve case)
730        let mut result = String::new();
731        let end = end_pos.min(self.tokens.len());
732
733        for i in start_pos..end {
734            let token = &self.tokens[i];
735            if matches!(token, Token::Eof) {
736                break;
737            }
738            result.push_str(&token.to_sql());
739        }
740
741        if result.is_empty() {
742            None
743        } else {
744            // Allocate the string in the arena and return a reference
745            Some(self.arena.alloc_str(&result))
746        }
747    }
748}
749
750// ============================================================================
751// Standalone parse-to-owned functions
752// ============================================================================
753
754/// Parse SQL and return a heap-allocated (owned) SelectStmt.
755///
756/// This function provides the performance benefits of arena parsing while
757/// returning a standard `SelectStmt` that can be stored and used without
758/// lifetime constraints.
759///
760/// # Performance
761///
762/// This is faster than the standard parser because:
763/// - Arena parsing is 30-40% faster (fewer allocations during parse)
764/// - Conversion allocates fewer, larger chunks (better cache locality)
765/// - Many strings benefit from SSO (Small String Optimization)
766///
767/// # Example
768///
769/// ```
770/// use vibesql_parser::arena_parser::parse_select_to_owned;
771///
772/// let stmt = parse_select_to_owned("SELECT * FROM users").unwrap();
773/// // stmt is a standard SelectStmt, no lifetime constraints
774/// ```
775pub fn parse_select_to_owned(input: &str) -> Result<vibesql_ast::SelectStmt, ParseError> {
776    let arena = Bump::new();
777    let mut lexer = Lexer::new(input);
778    let tokens_with_spans = lexer
779        .tokenize_with_spans()
780        .map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
781
782    let (tokens, spans): (Vec<_>, Vec<_>) = tokens_with_spans.into_iter().unzip();
783    let input_in_arena = arena.alloc_str(input);
784
785    let mut parser = ArenaParser::new_with_spans(tokens, spans, input_in_arena, &arena);
786    let arena_stmt = parser.parse_select_statement()?;
787    let converter = Converter::new(parser.interner());
788    Ok(converter.convert_select(arena_stmt))
789}
790
791/// Parse an expression and return a heap-allocated (owned) Expression.
792///
793/// Like [`parse_select_to_owned`], this provides arena parsing
794/// benefits while returning an owned expression.
795///
796/// # Example
797///
798/// ```
799/// use vibesql_parser::arena_parser::parse_expression_to_owned;
800///
801/// let expr = parse_expression_to_owned("a + b * 2").unwrap();
802/// ```
803pub fn parse_expression_to_owned(input: &str) -> Result<vibesql_ast::Expression, ParseError> {
804    let arena = Bump::new();
805    let mut lexer = Lexer::new(input);
806    let tokens =
807        lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
808
809    let mut parser = ArenaParser::new(tokens, &arena);
810    let arena_expr = parser.parse_expression()?;
811    let converter = Converter::new(parser.interner());
812    Ok(converter.convert_expression(&arena_expr))
813}
814
815/// Parse INSERT SQL and return a heap-allocated (owned) InsertStmt.
816///
817/// Like [`parse_select_to_owned`], this provides arena parsing
818/// benefits while returning a standard `InsertStmt`.
819///
820/// # Example
821///
822/// ```
823/// use vibesql_parser::arena_parser::parse_insert_to_owned;
824///
825/// let stmt = parse_insert_to_owned("INSERT INTO users (name) VALUES ('Alice')").unwrap();
826/// ```
827pub fn parse_insert_to_owned(input: &str) -> Result<vibesql_ast::InsertStmt, ParseError> {
828    let arena = Bump::new();
829    let mut lexer = Lexer::new(input);
830    let tokens =
831        lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
832
833    let mut parser = ArenaParser::new(tokens, &arena);
834    let arena_stmt = parser.parse_insert_statement()?;
835    let converter = Converter::new(parser.interner());
836    Ok(converter.convert_insert(arena_stmt))
837}
838
839/// Parse UPDATE SQL and return a heap-allocated (owned) UpdateStmt.
840///
841/// Like [`parse_select_to_owned`], this provides arena parsing
842/// benefits while returning a standard `UpdateStmt`.
843///
844/// # Example
845///
846/// ```
847/// use vibesql_parser::arena_parser::parse_update_to_owned;
848///
849/// let stmt = parse_update_to_owned("UPDATE users SET name = 'Bob' WHERE id = 1").unwrap();
850/// ```
851pub fn parse_update_to_owned(input: &str) -> Result<vibesql_ast::UpdateStmt, ParseError> {
852    let arena = Bump::new();
853    let mut lexer = Lexer::new(input);
854    let tokens =
855        lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
856
857    let mut parser = ArenaParser::new(tokens, &arena);
858    let arena_stmt = parser.parse_update_statement()?;
859    let converter = Converter::new(parser.interner());
860    Ok(converter.convert_update(arena_stmt))
861}
862
863/// Parse DELETE SQL and return a heap-allocated (owned) DeleteStmt.
864///
865/// Like [`parse_select_to_owned`], this provides arena parsing
866/// benefits while returning a standard `DeleteStmt`.
867///
868/// # Example
869///
870/// ```
871/// use vibesql_parser::arena_parser::parse_delete_to_owned;
872///
873/// let stmt = parse_delete_to_owned("DELETE FROM users WHERE id = 1").unwrap();
874/// ```
875pub fn parse_delete_to_owned(input: &str) -> Result<vibesql_ast::DeleteStmt, ParseError> {
876    let arena = Bump::new();
877    let mut lexer = Lexer::new(input);
878    let tokens =
879        lexer.tokenize().map_err(|e| ParseError { message: format!("Lexer error: {}", e) })?;
880
881    let mut parser = ArenaParser::new(tokens, &arena);
882    let arena_stmt = parser.parse_delete_statement()?;
883    let converter = Converter::new(parser.interner());
884    Ok(converter.convert_delete(arena_stmt))
885}
886
887#[cfg(test)]
888mod tests {
889    use vibesql_ast::arena::Expression;
890    use vibesql_types::SqlValue;
891
892    use super::*;
893
894    #[test]
895    fn test_date_literal() {
896        let arena = Bump::new();
897        let expr = ArenaParser::parse_expression_sql("DATE '1998-12-01'", &arena).unwrap();
898        match expr {
899            Expression::Literal(SqlValue::Date(d)) => {
900                assert_eq!(d.year, 1998);
901                assert_eq!(d.month, 12);
902                assert_eq!(d.day, 1);
903            }
904            _ => panic!("Expected Date literal, got {:?}", expr),
905        }
906    }
907
908    #[test]
909    fn test_time_literal() {
910        let arena = Bump::new();
911        let expr = ArenaParser::parse_expression_sql("TIME '12:30:45'", &arena).unwrap();
912        match expr {
913            Expression::Literal(SqlValue::Time(t)) => {
914                assert_eq!(t.hour, 12);
915                assert_eq!(t.minute, 30);
916                assert_eq!(t.second, 45);
917            }
918            _ => panic!("Expected Time literal, got {:?}", expr),
919        }
920    }
921
922    #[test]
923    fn test_timestamp_literal() {
924        let arena = Bump::new();
925        let expr =
926            ArenaParser::parse_expression_sql("TIMESTAMP '2024-01-15 10:30:00'", &arena).unwrap();
927        match expr {
928            Expression::Literal(SqlValue::Timestamp(ts)) => {
929                assert_eq!(ts.date.year, 2024);
930                assert_eq!(ts.date.month, 1);
931                assert_eq!(ts.date.day, 15);
932            }
933            _ => panic!("Expected Timestamp literal, got {:?}", expr),
934        }
935    }
936
937    #[test]
938    fn test_interval_literal() {
939        let arena = Bump::new();
940        let expr = ArenaParser::parse_expression_sql("INTERVAL '90' DAY", &arena).unwrap();
941        // Just verify it parses to an Interval type
942        assert!(matches!(expr, Expression::Literal(SqlValue::Interval(_))));
943    }
944
945    #[test]
946    fn test_date_minus_interval_expression() {
947        let arena = Bump::new();
948        let expr =
949            ArenaParser::parse_expression_sql("DATE '1998-12-01' - INTERVAL '90' DAY", &arena)
950                .unwrap();
951        match expr {
952            Expression::BinaryOp { op, left, right } => {
953                assert_eq!(*op, vibesql_ast::BinaryOperator::Minus);
954                assert!(matches!(left, Expression::Literal(SqlValue::Date(_))));
955                assert!(matches!(right, Expression::Literal(SqlValue::Interval(_))));
956            }
957            _ => panic!("Expected BinaryOp, got {:?}", expr),
958        }
959    }
960
961    #[test]
962    fn test_tpch_q1_parses() {
963        let arena = Bump::new();
964        let sql = r#"SELECT
965            l_returnflag,
966            l_linestatus,
967            SUM(l_quantity) AS sum_qty,
968            SUM(l_extendedprice) AS sum_base_price,
969            SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
970            SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge,
971            AVG(l_quantity) AS avg_qty,
972            AVG(l_extendedprice) AS avg_price,
973            AVG(l_discount) AS avg_disc,
974            COUNT(*) AS count_order
975        FROM
976            lineitem
977        WHERE
978            l_shipdate <= DATE '1998-12-01' - INTERVAL '90' DAY
979        GROUP BY
980            l_returnflag,
981            l_linestatus
982        ORDER BY
983            l_returnflag,
984            l_linestatus"#;
985
986        // This should parse successfully now with typed literal support
987        let result = ArenaParser::parse_sql(sql, &arena);
988        assert!(result.is_ok(), "TPC-H Q1 should parse successfully: {:?}", result.err());
989    }
990
991    #[test]
992    fn test_order_by_nulls_first_last() {
993        use vibesql_ast::NullsOrder;
994
995        // Test NULLS FIRST
996        let stmt = parse_select_to_owned("SELECT x FROM t ORDER BY x ASC NULLS FIRST").unwrap();
997        let order_by = stmt.order_by.unwrap();
998        assert_eq!(order_by.len(), 1);
999        assert_eq!(order_by[0].nulls_order, Some(NullsOrder::First));
1000
1001        // Test NULLS LAST
1002        let stmt = parse_select_to_owned("SELECT x FROM t ORDER BY x DESC NULLS LAST").unwrap();
1003        let order_by = stmt.order_by.unwrap();
1004        assert_eq!(order_by.len(), 1);
1005        assert_eq!(order_by[0].nulls_order, Some(NullsOrder::Last));
1006
1007        // Test default (None)
1008        let stmt = parse_select_to_owned("SELECT x FROM t ORDER BY x").unwrap();
1009        let order_by = stmt.order_by.unwrap();
1010        assert_eq!(order_by.len(), 1);
1011        assert_eq!(order_by[0].nulls_order, None);
1012
1013        // Test multiple columns
1014        let stmt =
1015            parse_select_to_owned("SELECT x, y FROM t ORDER BY x NULLS FIRST, y DESC NULLS LAST")
1016                .unwrap();
1017        let order_by = stmt.order_by.unwrap();
1018        assert_eq!(order_by.len(), 2);
1019        assert_eq!(order_by[0].nulls_order, Some(NullsOrder::First));
1020        assert_eq!(order_by[1].nulls_order, Some(NullsOrder::Last));
1021    }
1022}