Skip to main content

sochdb_query/sql/
parser.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SQL Parser
19//!
20//! Recursive descent parser for SQL grammar.
21//! Produces AST from token stream.
22
23use super::ast::*;
24use super::lexer::Lexer;
25use super::token::{Span, Token, TokenKind};
26
27/// Parser errors
28#[derive(Debug, Clone)]
29pub struct ParseError {
30    pub message: String,
31    pub span: Span,
32    pub expected: Vec<String>,
33}
34
35impl ParseError {
36    pub fn new(message: impl Into<String>, span: Span) -> Self {
37        Self {
38            message: message.into(),
39            span,
40            expected: Vec::new(),
41        }
42    }
43
44    pub fn expected(mut self, expected: impl Into<String>) -> Self {
45        self.expected.push(expected.into());
46        self
47    }
48}
49
50impl std::fmt::Display for ParseError {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        write!(
53            f,
54            "Parse error at line {}, column {}: {}",
55            self.span.line, self.span.column, self.message
56        )?;
57        if !self.expected.is_empty() {
58            write!(f, " (expected: {})", self.expected.join(", "))?;
59        }
60        Ok(())
61    }
62}
63
64impl std::error::Error for ParseError {}
65
66/// SQL Parser
67pub struct Parser {
68    tokens: Vec<Token>,
69    pos: usize,
70}
71
72impl Parser {
73    /// Create a new parser from tokens
74    pub fn new(tokens: Vec<Token>) -> Self {
75        Self { tokens, pos: 0 }
76    }
77
78    /// Parse a SQL string into a statement
79    pub fn parse(sql: &str) -> Result<Statement, Vec<ParseError>> {
80        let tokens = Lexer::new(sql).tokenize().map_err(|lex_errors| {
81            lex_errors
82                .into_iter()
83                .map(|e| ParseError::new(e.message, e.span))
84                .collect::<Vec<_>>()
85        })?;
86
87        let mut parser = Parser::new(tokens);
88        parser.parse_statement()
89    }
90
91    /// Parse multiple statements (semicolon-separated)
92    pub fn parse_statements(sql: &str) -> Result<Vec<Statement>, Vec<ParseError>> {
93        let tokens = Lexer::new(sql).tokenize().map_err(|lex_errors| {
94            lex_errors
95                .into_iter()
96                .map(|e| ParseError::new(e.message, e.span))
97                .collect::<Vec<_>>()
98        })?;
99
100        let mut parser = Parser::new(tokens);
101        let mut statements = Vec::new();
102
103        while !parser.is_at_end() {
104            match parser.parse_statement() {
105                Ok(stmt) => {
106                    statements.push(stmt);
107                    // Consume optional semicolon
108                    parser.match_token(&TokenKind::Semicolon);
109                }
110                Err(errors) => return Err(errors),
111            }
112        }
113
114        Ok(statements)
115    }
116
117    // ========== Helper Methods ==========
118
119    fn is_at_end(&self) -> bool {
120        matches!(self.peek().kind, TokenKind::Eof)
121    }
122
123    fn peek(&self) -> &Token {
124        self.tokens
125            .get(self.pos)
126            .unwrap_or(&self.tokens[self.tokens.len() - 1])
127    }
128
129    fn peek_nth(&self, n: usize) -> &Token {
130        self.tokens
131            .get(self.pos + n)
132            .unwrap_or(&self.tokens[self.tokens.len() - 1])
133    }
134
135    fn advance(&mut self) -> Token {
136        if !self.is_at_end() {
137            self.pos += 1;
138        }
139        self.tokens.get(self.pos - 1).cloned().unwrap()
140    }
141
142    fn check(&self, kind: &TokenKind) -> bool {
143        std::mem::discriminant(&self.peek().kind) == std::mem::discriminant(kind)
144    }
145
146    fn check_keyword(&self, kw: &TokenKind) -> bool {
147        self.peek().kind == *kw
148    }
149
150    fn match_token(&mut self, kind: &TokenKind) -> bool {
151        if self.check(kind) {
152            self.advance();
153            true
154        } else {
155            false
156        }
157    }
158
159    fn expect(&mut self, kind: &TokenKind, message: &str) -> Result<Token, ParseError> {
160        if self.check(kind) {
161            Ok(self.advance())
162        } else {
163            Err(ParseError::new(message, self.peek().span).expected(format!("{:?}", kind)))
164        }
165    }
166
167    fn expect_identifier(&mut self, message: &str) -> Result<String, ParseError> {
168        match &self.peek().kind {
169            TokenKind::Identifier(name) => {
170                let name = name.clone();
171                self.advance();
172                Ok(name)
173            }
174            TokenKind::QuotedIdentifier(name) => {
175                let name = name.clone();
176                self.advance();
177                Ok(name)
178            }
179            _ => Err(ParseError::new(message, self.peek().span).expected("identifier")),
180        }
181    }
182
183    fn current_span(&self) -> Span {
184        self.peek().span
185    }
186
187    // ========== Statement Parsing ==========
188
189    fn parse_statement(&mut self) -> Result<Statement, Vec<ParseError>> {
190        let result = match &self.peek().kind {
191            TokenKind::Select => self.parse_select().map(Statement::Select),
192            TokenKind::Insert => self.parse_insert().map(Statement::Insert),
193            TokenKind::Update => self.parse_update().map(Statement::Update),
194            TokenKind::Delete => self.parse_delete().map(Statement::Delete),
195            TokenKind::Create => self.parse_create(),
196            TokenKind::Drop => self.parse_drop(),
197            TokenKind::Alter => self.parse_alter(),
198            TokenKind::Begin => self.parse_begin().map(Statement::Begin),
199            TokenKind::Commit => {
200                self.advance();
201                Ok(Statement::Commit)
202            }
203            TokenKind::Rollback => self.parse_rollback(),
204            TokenKind::Savepoint => self.parse_savepoint(),
205            TokenKind::Release => self.parse_release(),
206            _ => Err(ParseError::new(
207                format!("Unexpected token: {:?}", self.peek().kind),
208                self.peek().span,
209            )),
210        };
211
212        result.map_err(|e| vec![e])
213    }
214
215    // ========== SELECT Parsing ==========
216
217    fn parse_select(&mut self) -> Result<SelectStmt, ParseError> {
218        let start_span = self.current_span();
219        self.expect(&TokenKind::Select, "Expected SELECT")?;
220
221        // DISTINCT?
222        let distinct = self.match_token(&TokenKind::Distinct);
223        if !distinct {
224            self.match_token(&TokenKind::All);
225        }
226
227        // Select list
228        let columns = self.parse_select_list()?;
229
230        // FROM clause (optional for SELECT 1+1 style queries)
231        let from = if self.match_token(&TokenKind::From) {
232            Some(self.parse_from_clause()?)
233        } else {
234            None
235        };
236
237        // WHERE clause
238        let where_clause = if self.match_token(&TokenKind::Where) {
239            Some(self.parse_expr()?)
240        } else {
241            None
242        };
243
244        // GROUP BY clause
245        let group_by = if self.check_keyword(&TokenKind::Group) {
246            self.advance();
247            self.expect(&TokenKind::By, "Expected BY after GROUP")?;
248            self.parse_expr_list()?
249        } else {
250            Vec::new()
251        };
252
253        // HAVING clause
254        let having = if self.match_token(&TokenKind::Having) {
255            Some(self.parse_expr()?)
256        } else {
257            None
258        };
259
260        // ORDER BY clause
261        let order_by = if self.check_keyword(&TokenKind::Order) {
262            self.advance();
263            self.expect(&TokenKind::By, "Expected BY after ORDER")?;
264            self.parse_order_by_list()?
265        } else {
266            Vec::new()
267        };
268
269        // LIMIT clause
270        let limit = if self.match_token(&TokenKind::Limit) {
271            Some(self.parse_expr()?)
272        } else {
273            None
274        };
275
276        // OFFSET clause
277        let offset = if self.match_token(&TokenKind::Offset) {
278            Some(self.parse_expr()?)
279        } else {
280            None
281        };
282
283        // Set operations (UNION, INTERSECT, EXCEPT)
284        let mut unions = Vec::new();
285        loop {
286            let set_op = if self.match_token(&TokenKind::Union) {
287                if self.match_token(&TokenKind::All) {
288                    SetOp::UnionAll
289                } else {
290                    SetOp::Union
291                }
292            } else if self.match_token(&TokenKind::Intersect) {
293                if self.match_token(&TokenKind::All) {
294                    SetOp::IntersectAll
295                } else {
296                    SetOp::Intersect
297                }
298            } else if self.match_token(&TokenKind::Except) {
299                if self.match_token(&TokenKind::All) {
300                    SetOp::ExceptAll
301                } else {
302                    SetOp::Except
303                }
304            } else {
305                break;
306            };
307
308            let right = self.parse_select()?;
309            unions.push((set_op, Box::new(right)));
310        }
311
312        Ok(SelectStmt {
313            span: start_span.merge(self.current_span()),
314            distinct,
315            columns,
316            from,
317            where_clause,
318            group_by,
319            having,
320            order_by,
321            limit,
322            offset,
323            unions,
324        })
325    }
326
327    fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParseError> {
328        let mut items = Vec::new();
329
330        loop {
331            items.push(self.parse_select_item()?);
332
333            if !self.match_token(&TokenKind::Comma) {
334                break;
335            }
336        }
337
338        Ok(items)
339    }
340
341    fn parse_select_item(&mut self) -> Result<SelectItem, ParseError> {
342        // Check for *
343        if self.match_token(&TokenKind::Star) {
344            return Ok(SelectItem::Wildcard);
345        }
346
347        // Check for table.*
348        if let TokenKind::Identifier(name) = &self.peek().kind
349            && self.peek_nth(1).kind == TokenKind::Dot
350            && self.peek_nth(2).kind == TokenKind::Star
351        {
352            let table = name.clone();
353            self.advance(); // identifier
354            self.advance(); // .
355            self.advance(); // *
356            return Ok(SelectItem::QualifiedWildcard(table));
357        }
358
359        // Expression with optional alias
360        let expr = self.parse_expr()?;
361
362        let alias = if self.match_token(&TokenKind::As) {
363            Some(self.expect_identifier("Expected alias after AS")?)
364        } else if let TokenKind::Identifier(name) = &self.peek().kind {
365            // Implicit alias (without AS)
366            if !self.check_keyword(&TokenKind::From)
367                && !self.check_keyword(&TokenKind::Where)
368                && !self.check(&TokenKind::Comma)
369                && !self.check_keyword(&TokenKind::Order)
370                && !self.check_keyword(&TokenKind::Group)
371                && !self.check_keyword(&TokenKind::Limit)
372                && !self.is_at_end()
373            {
374                let name = name.clone();
375                self.advance();
376                Some(name)
377            } else {
378                None
379            }
380        } else {
381            None
382        };
383
384        Ok(SelectItem::Expr { expr, alias })
385    }
386
387    fn parse_from_clause(&mut self) -> Result<FromClause, ParseError> {
388        let mut tables = vec![self.parse_table_ref()?];
389
390        while self.match_token(&TokenKind::Comma) {
391            tables.push(self.parse_table_ref()?);
392        }
393
394        Ok(FromClause { tables })
395    }
396
397    fn parse_table_ref(&mut self) -> Result<TableRef, ParseError> {
398        let mut table = self.parse_table_primary()?;
399
400        // Parse joins
401        loop {
402            let join_type = if self.match_token(&TokenKind::Cross) {
403                self.expect(&TokenKind::Join, "Expected JOIN after CROSS")?;
404                JoinType::Cross
405            } else if self.match_token(&TokenKind::Inner) {
406                self.expect(&TokenKind::Join, "Expected JOIN after INNER")?;
407                JoinType::Inner
408            } else if self.match_token(&TokenKind::Left) {
409                self.match_token(&TokenKind::Outer);
410                self.expect(&TokenKind::Join, "Expected JOIN after LEFT")?;
411                JoinType::Left
412            } else if self.match_token(&TokenKind::Right) {
413                self.match_token(&TokenKind::Outer);
414                self.expect(&TokenKind::Join, "Expected JOIN after RIGHT")?;
415                JoinType::Right
416            } else if self.match_token(&TokenKind::Join) {
417                JoinType::Inner // Default join is INNER
418            } else {
419                break;
420            };
421
422            let right = self.parse_table_primary()?;
423
424            let condition = if join_type == JoinType::Cross {
425                None
426            } else if self.match_token(&TokenKind::On) {
427                Some(JoinCondition::On(self.parse_expr()?))
428            } else if self.match_token(&TokenKind::Using) {
429                self.expect(&TokenKind::LParen, "Expected '(' after USING")?;
430                let columns = self.parse_identifier_list()?;
431                self.expect(&TokenKind::RParen, "Expected ')' after USING columns")?;
432                Some(JoinCondition::Using(columns))
433            } else {
434                return Err(ParseError::new(
435                    "Expected ON or USING clause for JOIN",
436                    self.current_span(),
437                ));
438            };
439
440            table = TableRef::Join {
441                left: Box::new(table),
442                join_type,
443                right: Box::new(right),
444                condition,
445            };
446        }
447
448        Ok(table)
449    }
450
451    fn parse_table_primary(&mut self) -> Result<TableRef, ParseError> {
452        // Subquery: (SELECT ...)
453        if self.match_token(&TokenKind::LParen) {
454            let query = self.parse_select()?;
455            self.expect(&TokenKind::RParen, "Expected ')' after subquery")?;
456
457            self.match_token(&TokenKind::As);
458            let alias = self.expect_identifier("Subquery requires an alias")?;
459
460            return Ok(TableRef::Subquery {
461                query: Box::new(query),
462                alias,
463            });
464        }
465
466        // Table name
467        let name = self.parse_object_name()?;
468
469        // Optional alias
470        let alias = if self.match_token(&TokenKind::As) {
471            Some(self.expect_identifier("Expected alias after AS")?)
472        } else if let TokenKind::Identifier(id) = &self.peek().kind {
473            // Check it's not a keyword
474            if !self.peek().kind.is_keyword() {
475                let alias = id.clone();
476                self.advance();
477                Some(alias)
478            } else {
479                None
480            }
481        } else {
482            None
483        };
484
485        Ok(TableRef::Table { name, alias })
486    }
487
488    // ========== INSERT Parsing ==========
489
490    fn parse_insert(&mut self) -> Result<InsertStmt, ParseError> {
491        let start_span = self.current_span();
492        self.expect(&TokenKind::Insert, "Expected INSERT")?;
493
494        // Check for MySQL-style INSERT IGNORE
495        let mysql_ignore = self.match_token(&TokenKind::Ignore);
496
497        // Check for SQLite-style INSERT OR {IGNORE|REPLACE|ABORT|FAIL}
498        let sqlite_conflict_action = if self.match_token(&TokenKind::Or) {
499            if self.match_token(&TokenKind::Ignore) {
500                Some(ConflictAction::DoNothing)
501            } else if self.match_token(&TokenKind::Replace) {
502                Some(ConflictAction::DoReplace)
503            } else if self.match_token(&TokenKind::Abort) {
504                Some(ConflictAction::DoAbort)
505            } else if self.match_token(&TokenKind::Fail) {
506                Some(ConflictAction::DoFail)
507            } else {
508                return Err(ParseError::new(
509                    "Expected IGNORE, REPLACE, ABORT, or FAIL after OR",
510                    self.current_span(),
511                ));
512            }
513        } else {
514            None
515        };
516
517        self.expect(&TokenKind::Into, "Expected INTO")?;
518
519        let table = self.parse_object_name()?;
520
521        // Optional column list
522        let columns = if self.match_token(&TokenKind::LParen) {
523            let cols = self.parse_identifier_list()?;
524            self.expect(&TokenKind::RParen, "Expected ')' after column list")?;
525            Some(cols)
526        } else {
527            None
528        };
529
530        // VALUES or SELECT
531        let source = if self.match_token(&TokenKind::Values) {
532            InsertSource::Values(self.parse_values_list()?)
533        } else if self.check_keyword(&TokenKind::Select) {
534            InsertSource::Query(Box::new(self.parse_select()?))
535        } else if self.match_token(&TokenKind::Default) {
536            self.expect(&TokenKind::Values, "Expected VALUES after DEFAULT")?;
537            InsertSource::Default
538        } else {
539            return Err(ParseError::new(
540                "Expected VALUES or SELECT",
541                self.current_span(),
542            ));
543        };
544
545        // Parse ON CONFLICT (PostgreSQL) or ON DUPLICATE KEY UPDATE (MySQL)
546        let on_conflict = if self.match_token(&TokenKind::On) {
547            if self.match_token(&TokenKind::Conflict) {
548                // PostgreSQL: ON CONFLICT [target] DO {NOTHING | UPDATE SET ...}
549                Some(self.parse_on_conflict()?)
550            } else if self.match_token(&TokenKind::Duplicate) {
551                // MySQL: ON DUPLICATE KEY UPDATE ...
552                self.expect(&TokenKind::Key, "Expected KEY after DUPLICATE")?;
553                self.expect(&TokenKind::Update, "Expected UPDATE after KEY")?;
554                let assignments = self.parse_assignments()?;
555                Some(OnConflict {
556                    target: None,
557                    action: ConflictAction::DoUpdate(assignments),
558                })
559            } else {
560                return Err(ParseError::new(
561                    "Expected CONFLICT or DUPLICATE after ON",
562                    self.current_span(),
563                ));
564            }
565        } else if mysql_ignore {
566            // MySQL INSERT IGNORE normalizes to DoNothing
567            Some(OnConflict {
568                target: None,
569                action: ConflictAction::DoNothing,
570            })
571        } else {
572            // SQLite conflict action from OR clause
573            sqlite_conflict_action.map(|action| OnConflict {
574                target: None,
575                action,
576            })
577        };
578
579        // RETURNING clause (PostgreSQL/SQLite)
580        let returning = if self.match_token(&TokenKind::Returning) {
581            Some(self.parse_select_list()?)
582        } else {
583            None
584        };
585
586        Ok(InsertStmt {
587            span: start_span.merge(self.current_span()),
588            table,
589            columns,
590            source,
591            on_conflict,
592            returning,
593        })
594    }
595
596    /// Parse ON CONFLICT clause (PostgreSQL style)
597    fn parse_on_conflict(&mut self) -> Result<OnConflict, ParseError> {
598        // Optional conflict target: (columns) or ON CONSTRAINT name
599        let target = if self.match_token(&TokenKind::LParen) {
600            let cols = self.parse_identifier_list()?;
601            self.expect(&TokenKind::RParen, "Expected ')' after conflict columns")?;
602            Some(ConflictTarget::Columns(cols))
603        } else if self.match_token(&TokenKind::On) {
604            // ON CONSTRAINT name (though this is a bit unusual syntax)
605            // Actually PostgreSQL uses just ON CONFLICT ON CONSTRAINT name
606            // Let's handle the standard case
607            None
608        } else {
609            None
610        };
611
612        // DO {NOTHING | UPDATE SET ...}
613        self.expect(&TokenKind::Do, "Expected DO after ON CONFLICT")?;
614
615        let action = if self.match_token(&TokenKind::Nothing) {
616            ConflictAction::DoNothing
617        } else if self.match_token(&TokenKind::Update) {
618            self.expect(&TokenKind::Set, "Expected SET after UPDATE")?;
619            let assignments = self.parse_assignments()?;
620            ConflictAction::DoUpdate(assignments)
621        } else {
622            return Err(ParseError::new(
623                "Expected NOTHING or UPDATE after DO",
624                self.current_span(),
625            ));
626        };
627
628        Ok(OnConflict { target, action })
629    }
630
631    fn parse_values_list(&mut self) -> Result<Vec<Vec<Expr>>, ParseError> {
632        let mut rows = Vec::new();
633
634        loop {
635            self.expect(&TokenKind::LParen, "Expected '(' for VALUES row")?;
636            let row = self.parse_expr_list()?;
637            self.expect(&TokenKind::RParen, "Expected ')' after VALUES row")?;
638            rows.push(row);
639
640            if !self.match_token(&TokenKind::Comma) {
641                break;
642            }
643        }
644
645        Ok(rows)
646    }
647
648    // ========== UPDATE Parsing ==========
649
650    fn parse_update(&mut self) -> Result<UpdateStmt, ParseError> {
651        let start_span = self.current_span();
652        self.expect(&TokenKind::Update, "Expected UPDATE")?;
653
654        let table = self.parse_object_name()?;
655
656        let alias = if self.match_token(&TokenKind::As) {
657            Some(self.expect_identifier("Expected alias after AS")?)
658        } else {
659            None
660        };
661
662        self.expect(&TokenKind::Set, "Expected SET")?;
663
664        let assignments = self.parse_assignments()?;
665
666        let from = if self.match_token(&TokenKind::From) {
667            Some(self.parse_from_clause()?)
668        } else {
669            None
670        };
671
672        let where_clause = if self.match_token(&TokenKind::Where) {
673            Some(self.parse_expr()?)
674        } else {
675            None
676        };
677
678        let returning = None; // TODO
679
680        Ok(UpdateStmt {
681            span: start_span.merge(self.current_span()),
682            table,
683            alias,
684            assignments,
685            from,
686            where_clause,
687            returning,
688        })
689    }
690
691    fn parse_assignments(&mut self) -> Result<Vec<Assignment>, ParseError> {
692        let mut assignments = Vec::new();
693
694        loop {
695            let column = self.expect_identifier("Expected column name")?;
696            self.expect(&TokenKind::Eq, "Expected '=' after column name")?;
697            let value = self.parse_expr()?;
698
699            assignments.push(Assignment { column, value });
700
701            if !self.match_token(&TokenKind::Comma) {
702                break;
703            }
704        }
705
706        Ok(assignments)
707    }
708
709    // ========== DELETE Parsing ==========
710
711    fn parse_delete(&mut self) -> Result<DeleteStmt, ParseError> {
712        let start_span = self.current_span();
713        self.expect(&TokenKind::Delete, "Expected DELETE")?;
714        self.expect(&TokenKind::From, "Expected FROM")?;
715
716        let table = self.parse_object_name()?;
717
718        let alias = if self.match_token(&TokenKind::As) {
719            Some(self.expect_identifier("Expected alias after AS")?)
720        } else {
721            None
722        };
723
724        let using = None; // TODO: Parse USING clause
725
726        let where_clause = if self.match_token(&TokenKind::Where) {
727            Some(self.parse_expr()?)
728        } else {
729            None
730        };
731
732        Ok(DeleteStmt {
733            span: start_span.merge(self.current_span()),
734            table,
735            alias,
736            using,
737            where_clause,
738            returning: None,
739        })
740    }
741
742    // ========== DDL Parsing ==========
743
744    fn parse_create(&mut self) -> Result<Statement, ParseError> {
745        self.expect(&TokenKind::Create, "Expected CREATE")?;
746
747        // Check for CREATE UNIQUE INDEX
748        let unique = self.match_token(&TokenKind::Unique);
749
750        if self.match_token(&TokenKind::Table) {
751            self.parse_create_table().map(Statement::CreateTable)
752        } else if self.match_token(&TokenKind::Index) {
753            self.parse_create_index(unique).map(Statement::CreateIndex)
754        } else if unique {
755            // After UNIQUE, must be INDEX
756            Err(ParseError::new(
757                "Expected INDEX after UNIQUE",
758                self.current_span(),
759            ))
760        } else {
761            Err(ParseError::new(
762                "Expected TABLE or INDEX after CREATE",
763                self.current_span(),
764            ))
765        }
766    }
767
768    fn parse_create_index(&mut self, unique: bool) -> Result<CreateIndexStmt, ParseError> {
769        let start_span = self.current_span();
770
771        // IF NOT EXISTS
772        let if_not_exists = if self.match_token(&TokenKind::If) {
773            self.expect(&TokenKind::Not, "Expected NOT after IF")?;
774            self.expect(&TokenKind::Exists, "Expected EXISTS after IF NOT")?;
775            true
776        } else {
777            false
778        };
779
780        // Index name
781        let name = self.expect_identifier("Expected index name")?;
782
783        self.expect(&TokenKind::On, "Expected ON after index name")?;
784
785        // Table name
786        let table = self.parse_object_name()?;
787
788        // Column list
789        self.expect(&TokenKind::LParen, "Expected '(' after table name")?;
790        let mut columns = Vec::new();
791        loop {
792            let col_name = self.expect_identifier("Expected column name")?;
793            
794            // Optional ASC/DESC
795            let asc = if self.match_token(&TokenKind::Desc) {
796                false
797            } else {
798                self.match_token(&TokenKind::Asc);
799                true
800            };
801
802            columns.push(IndexColumn {
803                name: col_name,
804                asc,
805                nulls_first: None,
806            });
807
808            if !self.match_token(&TokenKind::Comma) {
809                break;
810            }
811        }
812        self.expect(&TokenKind::RParen, "Expected ')' after column list")?;
813
814        // Optional WHERE clause for partial indexes
815        let where_clause = if self.match_token(&TokenKind::Where) {
816            Some(self.parse_expr()?)
817        } else {
818            None
819        };
820
821        Ok(CreateIndexStmt {
822            span: start_span.merge(self.current_span()),
823            unique,
824            if_not_exists,
825            name,
826            table,
827            columns,
828            where_clause,
829            index_type: None,
830        })
831    }
832
833    fn parse_create_table(&mut self) -> Result<CreateTableStmt, ParseError> {
834        let start_span = self.current_span();
835
836        let if_not_exists = if self.match_token(&TokenKind::If) {
837            self.expect(&TokenKind::Not, "Expected NOT after IF")?;
838            self.expect(&TokenKind::Exists, "Expected EXISTS after IF NOT")?;
839            true
840        } else {
841            false
842        };
843
844        let name = self.parse_object_name()?;
845
846        self.expect(&TokenKind::LParen, "Expected '(' after table name")?;
847
848        let mut columns = Vec::new();
849        let constraints = Vec::new();
850
851        loop {
852            // Check for table constraint keywords
853            if self.check_keyword(&TokenKind::Primary)
854                || self.check_keyword(&TokenKind::Foreign)
855                || self.check_keyword(&TokenKind::Unique)
856            {
857                // TODO: Parse table constraints
858                break;
859            }
860
861            // Check for end of column list
862            if self.check(&TokenKind::RParen) {
863                break;
864            }
865
866            // Parse column definition
867            columns.push(self.parse_column_def()?);
868
869            if !self.match_token(&TokenKind::Comma) {
870                break;
871            }
872        }
873
874        self.expect(&TokenKind::RParen, "Expected ')' after column definitions")?;
875
876        Ok(CreateTableStmt {
877            span: start_span.merge(self.current_span()),
878            if_not_exists,
879            name,
880            columns,
881            constraints,
882            options: Vec::new(),
883        })
884    }
885
886    fn parse_column_def(&mut self) -> Result<ColumnDef, ParseError> {
887        let name = self.expect_identifier("Expected column name")?;
888        let data_type = self.parse_data_type()?;
889
890        let mut constraints = Vec::new();
891
892        // Parse column constraints
893        loop {
894            if self.match_token(&TokenKind::Primary) {
895                self.expect(&TokenKind::Key, "Expected KEY after PRIMARY")?;
896                constraints.push(ColumnConstraint::PrimaryKey);
897            } else if self.match_token(&TokenKind::Not) {
898                self.expect(&TokenKind::Null, "Expected NULL after NOT")?;
899                constraints.push(ColumnConstraint::NotNull);
900            } else if self.match_token(&TokenKind::Null) {
901                constraints.push(ColumnConstraint::Null);
902            } else if self.match_token(&TokenKind::Unique) {
903                constraints.push(ColumnConstraint::Unique);
904            } else if self.match_token(&TokenKind::Default) {
905                constraints.push(ColumnConstraint::Default(self.parse_expr()?));
906            } else if self.match_token(&TokenKind::AutoIncrement) {
907                constraints.push(ColumnConstraint::AutoIncrement);
908            } else {
909                break;
910            }
911        }
912
913        Ok(ColumnDef {
914            name,
915            data_type,
916            constraints,
917        })
918    }
919
920    fn parse_data_type(&mut self) -> Result<DataType, ParseError> {
921        let type_name = match &self.peek().kind {
922            TokenKind::Int | TokenKind::IntegerKw => {
923                self.advance();
924                DataType::Int
925            }
926            TokenKind::Bigint => {
927                self.advance();
928                DataType::BigInt
929            }
930            TokenKind::Smallint => {
931                self.advance();
932                DataType::SmallInt
933            }
934            TokenKind::Tinyint => {
935                self.advance();
936                DataType::TinyInt
937            }
938            TokenKind::FloatKw | TokenKind::Real => {
939                self.advance();
940                DataType::Float
941            }
942            TokenKind::Double => {
943                self.advance();
944                DataType::Double
945            }
946            TokenKind::Varchar => {
947                self.advance();
948                let len = self.parse_type_length()?;
949                DataType::Varchar(len)
950            }
951            TokenKind::Char => {
952                self.advance();
953                let len = self.parse_type_length()?;
954                DataType::Char(len)
955            }
956            TokenKind::Text => {
957                self.advance();
958                DataType::Text
959            }
960            TokenKind::BlobKw => {
961                self.advance();
962                DataType::Blob
963            }
964            TokenKind::Boolean | TokenKind::Bool => {
965                self.advance();
966                DataType::Boolean
967            }
968            TokenKind::Date => {
969                self.advance();
970                DataType::Date
971            }
972            TokenKind::Time => {
973                self.advance();
974                DataType::Time
975            }
976            TokenKind::Timestamp | TokenKind::Datetime => {
977                self.advance();
978                DataType::Timestamp
979            }
980            TokenKind::Vector => {
981                self.advance();
982                let dims = self.parse_type_length()?.unwrap_or(128);
983                DataType::Vector(dims)
984            }
985            TokenKind::Embedding => {
986                self.advance();
987                let dims = self.parse_type_length()?.unwrap_or(1536);
988                DataType::Embedding(dims)
989            }
990            TokenKind::Identifier(name) => {
991                let name = name.clone();
992                self.advance();
993                DataType::Custom(name)
994            }
995            _ => {
996                return Err(ParseError::new(
997                    format!("Expected data type, got {:?}", self.peek().kind),
998                    self.current_span(),
999                ));
1000            }
1001        };
1002
1003        Ok(type_name)
1004    }
1005
1006    fn parse_type_length(&mut self) -> Result<Option<u32>, ParseError> {
1007        if self.match_token(&TokenKind::LParen) {
1008            let len = match &self.peek().kind {
1009                TokenKind::Integer(n) => {
1010                    let n = *n as u32;
1011                    self.advance();
1012                    n
1013                }
1014                _ => return Err(ParseError::new("Expected integer", self.current_span())),
1015            };
1016            self.expect(&TokenKind::RParen, "Expected ')'")?;
1017            Ok(Some(len))
1018        } else {
1019            Ok(None)
1020        }
1021    }
1022
1023    fn parse_drop(&mut self) -> Result<Statement, ParseError> {
1024        let start_span = self.current_span();
1025        self.expect(&TokenKind::Drop, "Expected DROP")?;
1026
1027        if self.match_token(&TokenKind::Table) {
1028            let if_exists = if self.match_token(&TokenKind::If) {
1029                self.expect(&TokenKind::Exists, "Expected EXISTS after IF")?;
1030                true
1031            } else {
1032                false
1033            };
1034
1035            let name = self.parse_object_name()?;
1036            let cascade = false; // TODO: Parse CASCADE
1037
1038            Ok(Statement::DropTable(DropTableStmt {
1039                span: start_span.merge(self.current_span()),
1040                if_exists,
1041                names: vec![name],
1042                cascade,
1043            }))
1044        } else if self.match_token(&TokenKind::Index) {
1045            let if_exists = if self.match_token(&TokenKind::If) {
1046                self.expect(&TokenKind::Exists, "Expected EXISTS after IF")?;
1047                true
1048            } else {
1049                false
1050            };
1051
1052            let name = self.expect_identifier("Expected index name")?;
1053
1054            // Optional ON table_name (PostgreSQL style)
1055            let table = if self.match_token(&TokenKind::On) {
1056                Some(self.parse_object_name()?)
1057            } else {
1058                None
1059            };
1060
1061            Ok(Statement::DropIndex(DropIndexStmt {
1062                span: start_span.merge(self.current_span()),
1063                if_exists,
1064                name,
1065                table,
1066                cascade: false,
1067            }))
1068        } else {
1069            Err(ParseError::new(
1070                "Expected TABLE or INDEX after DROP",
1071                self.current_span(),
1072            ))
1073        }
1074    }
1075
1076    fn parse_alter(&mut self) -> Result<Statement, ParseError> {
1077        // TODO: Implement ALTER TABLE
1078        Err(ParseError::new(
1079            "ALTER not yet implemented",
1080            self.current_span(),
1081        ))
1082    }
1083
1084    // ========== Transaction Parsing ==========
1085
1086    fn parse_begin(&mut self) -> Result<BeginStmt, ParseError> {
1087        self.expect(&TokenKind::Begin, "Expected BEGIN")?;
1088        self.match_token(&TokenKind::Transaction);
1089
1090        // TODO: Parse isolation level
1091        Ok(BeginStmt {
1092            read_only: false,
1093            isolation_level: None,
1094        })
1095    }
1096
1097    fn parse_rollback(&mut self) -> Result<Statement, ParseError> {
1098        self.expect(&TokenKind::Rollback, "Expected ROLLBACK")?;
1099        self.match_token(&TokenKind::Transaction);
1100
1101        // Check for ROLLBACK TO SAVEPOINT
1102        // TODO
1103
1104        Ok(Statement::Rollback(None))
1105    }
1106
1107    fn parse_savepoint(&mut self) -> Result<Statement, ParseError> {
1108        self.expect(&TokenKind::Savepoint, "Expected SAVEPOINT")?;
1109        let name = self.expect_identifier("Expected savepoint name")?;
1110        Ok(Statement::Savepoint(name))
1111    }
1112
1113    fn parse_release(&mut self) -> Result<Statement, ParseError> {
1114        self.expect(&TokenKind::Release, "Expected RELEASE")?;
1115        self.match_token(&TokenKind::Savepoint);
1116        let name = self.expect_identifier("Expected savepoint name")?;
1117        Ok(Statement::Release(name))
1118    }
1119
1120    // ========== Expression Parsing ==========
1121
1122    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
1123        self.parse_or_expr()
1124    }
1125
1126    fn parse_or_expr(&mut self) -> Result<Expr, ParseError> {
1127        let mut left = self.parse_and_expr()?;
1128
1129        while self.match_token(&TokenKind::Or) {
1130            let right = self.parse_and_expr()?;
1131            left = Expr::BinaryOp {
1132                left: Box::new(left),
1133                op: BinaryOperator::Or,
1134                right: Box::new(right),
1135            };
1136        }
1137
1138        Ok(left)
1139    }
1140
1141    fn parse_and_expr(&mut self) -> Result<Expr, ParseError> {
1142        let mut left = self.parse_not_expr()?;
1143
1144        while self.match_token(&TokenKind::And) {
1145            let right = self.parse_not_expr()?;
1146            left = Expr::BinaryOp {
1147                left: Box::new(left),
1148                op: BinaryOperator::And,
1149                right: Box::new(right),
1150            };
1151        }
1152
1153        Ok(left)
1154    }
1155
1156    fn parse_not_expr(&mut self) -> Result<Expr, ParseError> {
1157        if self.match_token(&TokenKind::Not) {
1158            let expr = self.parse_not_expr()?;
1159            Ok(Expr::UnaryOp {
1160                op: UnaryOperator::Not,
1161                expr: Box::new(expr),
1162            })
1163        } else {
1164            self.parse_comparison_expr()
1165        }
1166    }
1167
1168    fn parse_comparison_expr(&mut self) -> Result<Expr, ParseError> {
1169        let mut left = self.parse_additive_expr()?;
1170
1171        // IS NULL / IS NOT NULL
1172        if self.match_token(&TokenKind::Is) {
1173            let negated = self.match_token(&TokenKind::Not);
1174            self.expect(&TokenKind::Null, "Expected NULL after IS")?;
1175            return Ok(Expr::IsNull {
1176                expr: Box::new(left),
1177                negated,
1178            });
1179        }
1180
1181        // IN / NOT IN
1182        let negated = self.match_token(&TokenKind::Not);
1183        if self.match_token(&TokenKind::In) {
1184            self.expect(&TokenKind::LParen, "Expected '(' after IN")?;
1185
1186            if self.check_keyword(&TokenKind::Select) {
1187                let subquery = self.parse_select()?;
1188                self.expect(&TokenKind::RParen, "Expected ')'")?;
1189                return Ok(Expr::InSubquery {
1190                    expr: Box::new(left),
1191                    subquery: Box::new(subquery),
1192                    negated,
1193                });
1194            } else {
1195                let list = self.parse_expr_list()?;
1196                self.expect(&TokenKind::RParen, "Expected ')'")?;
1197                return Ok(Expr::InList {
1198                    expr: Box::new(left),
1199                    list,
1200                    negated,
1201                });
1202            }
1203        }
1204
1205        // BETWEEN
1206        if self.match_token(&TokenKind::Between) {
1207            let low = self.parse_additive_expr()?;
1208            self.expect(&TokenKind::And, "Expected AND in BETWEEN")?;
1209            let high = self.parse_additive_expr()?;
1210            return Ok(Expr::Between {
1211                expr: Box::new(left),
1212                low: Box::new(low),
1213                high: Box::new(high),
1214                negated,
1215            });
1216        }
1217
1218        // LIKE
1219        if self.match_token(&TokenKind::Like) {
1220            let pattern = self.parse_additive_expr()?;
1221            let escape = if self.match_token(&TokenKind::Escape) {
1222                Some(Box::new(self.parse_additive_expr()?))
1223            } else {
1224                None
1225            };
1226            return Ok(Expr::Like {
1227                expr: Box::new(left),
1228                pattern: Box::new(pattern),
1229                escape,
1230                negated,
1231            });
1232        }
1233
1234        // If we consumed NOT but didn't find IN/BETWEEN/LIKE, error
1235        if negated {
1236            return Err(ParseError::new(
1237                "Expected IN, BETWEEN, or LIKE after NOT",
1238                self.current_span(),
1239            ));
1240        }
1241
1242        // Comparison operators
1243        let op = match &self.peek().kind {
1244            TokenKind::Eq => Some(BinaryOperator::Eq),
1245            TokenKind::Ne => Some(BinaryOperator::Ne),
1246            TokenKind::Lt => Some(BinaryOperator::Lt),
1247            TokenKind::Le => Some(BinaryOperator::Le),
1248            TokenKind::Gt => Some(BinaryOperator::Gt),
1249            TokenKind::Ge => Some(BinaryOperator::Ge),
1250            _ => None,
1251        };
1252
1253        if let Some(op) = op {
1254            self.advance();
1255            let right = self.parse_additive_expr()?;
1256            left = Expr::BinaryOp {
1257                left: Box::new(left),
1258                op,
1259                right: Box::new(right),
1260            };
1261        }
1262
1263        Ok(left)
1264    }
1265
1266    fn parse_additive_expr(&mut self) -> Result<Expr, ParseError> {
1267        let mut left = self.parse_multiplicative_expr()?;
1268
1269        loop {
1270            let op = match &self.peek().kind {
1271                TokenKind::Plus => BinaryOperator::Plus,
1272                TokenKind::Minus => BinaryOperator::Minus,
1273                TokenKind::Concat => BinaryOperator::Concat,
1274                _ => break,
1275            };
1276            self.advance();
1277
1278            let right = self.parse_multiplicative_expr()?;
1279            left = Expr::BinaryOp {
1280                left: Box::new(left),
1281                op,
1282                right: Box::new(right),
1283            };
1284        }
1285
1286        Ok(left)
1287    }
1288
1289    fn parse_multiplicative_expr(&mut self) -> Result<Expr, ParseError> {
1290        let mut left = self.parse_unary_expr()?;
1291
1292        loop {
1293            let op = match &self.peek().kind {
1294                TokenKind::Star => BinaryOperator::Multiply,
1295                TokenKind::Slash => BinaryOperator::Divide,
1296                TokenKind::Percent => BinaryOperator::Modulo,
1297                _ => break,
1298            };
1299            self.advance();
1300
1301            let right = self.parse_unary_expr()?;
1302            left = Expr::BinaryOp {
1303                left: Box::new(left),
1304                op,
1305                right: Box::new(right),
1306            };
1307        }
1308
1309        Ok(left)
1310    }
1311
1312    fn parse_unary_expr(&mut self) -> Result<Expr, ParseError> {
1313        match &self.peek().kind {
1314            TokenKind::Minus => {
1315                self.advance();
1316                let expr = self.parse_unary_expr()?;
1317                Ok(Expr::UnaryOp {
1318                    op: UnaryOperator::Minus,
1319                    expr: Box::new(expr),
1320                })
1321            }
1322            TokenKind::Plus => {
1323                self.advance();
1324                let expr = self.parse_unary_expr()?;
1325                Ok(Expr::UnaryOp {
1326                    op: UnaryOperator::Plus,
1327                    expr: Box::new(expr),
1328                })
1329            }
1330            TokenKind::BitNot => {
1331                self.advance();
1332                let expr = self.parse_unary_expr()?;
1333                Ok(Expr::UnaryOp {
1334                    op: UnaryOperator::BitNot,
1335                    expr: Box::new(expr),
1336                })
1337            }
1338            _ => self.parse_primary_expr(),
1339        }
1340    }
1341
1342    fn parse_primary_expr(&mut self) -> Result<Expr, ParseError> {
1343        let expr = match self.peek().kind.clone() {
1344            // Literals
1345            TokenKind::Integer(n) => {
1346                self.advance();
1347                Expr::Literal(Literal::Integer(n))
1348            }
1349            TokenKind::Float(n) => {
1350                self.advance();
1351                Expr::Literal(Literal::Float(n))
1352            }
1353            TokenKind::String(s) => {
1354                self.advance();
1355                Expr::Literal(Literal::String(s))
1356            }
1357            TokenKind::Blob(b) => {
1358                self.advance();
1359                Expr::Literal(Literal::Blob(b))
1360            }
1361            TokenKind::True => {
1362                self.advance();
1363                Expr::Literal(Literal::Boolean(true))
1364            }
1365            TokenKind::False => {
1366                self.advance();
1367                Expr::Literal(Literal::Boolean(false))
1368            }
1369            TokenKind::Null => {
1370                self.advance();
1371                Expr::Literal(Literal::Null)
1372            }
1373
1374            // Placeholder
1375            TokenKind::Placeholder(n) => {
1376                self.advance();
1377                Expr::Placeholder(n)
1378            }
1379
1380            // Parenthesized expression or subquery
1381            TokenKind::LParen => {
1382                self.advance();
1383                if self.check_keyword(&TokenKind::Select) {
1384                    let query = self.parse_select()?;
1385                    self.expect(&TokenKind::RParen, "Expected ')'")?;
1386                    Expr::Subquery(Box::new(query))
1387                } else {
1388                    let expr = self.parse_expr()?;
1389
1390                    // Check for tuple
1391                    if self.match_token(&TokenKind::Comma) {
1392                        let mut exprs = vec![expr];
1393                        exprs.push(self.parse_expr()?);
1394                        while self.match_token(&TokenKind::Comma) {
1395                            exprs.push(self.parse_expr()?);
1396                        }
1397                        self.expect(&TokenKind::RParen, "Expected ')'")?;
1398                        Expr::Tuple(exprs)
1399                    } else {
1400                        self.expect(&TokenKind::RParen, "Expected ')'")?;
1401                        expr
1402                    }
1403                }
1404            }
1405
1406            // CASE expression
1407            TokenKind::Case => {
1408                self.advance();
1409                self.parse_case_expr()?
1410            }
1411
1412            // EXISTS
1413            TokenKind::Exists => {
1414                self.advance();
1415                self.expect(&TokenKind::LParen, "Expected '(' after EXISTS")?;
1416                let query = self.parse_select()?;
1417                self.expect(&TokenKind::RParen, "Expected ')'")?;
1418                Expr::Exists(Box::new(query))
1419            }
1420
1421            // CAST
1422            TokenKind::Cast => {
1423                self.advance();
1424                self.expect(&TokenKind::LParen, "Expected '(' after CAST")?;
1425                let expr = self.parse_expr()?;
1426                self.expect(&TokenKind::As, "Expected AS in CAST")?;
1427                let data_type = self.parse_data_type()?;
1428                self.expect(&TokenKind::RParen, "Expected ')'")?;
1429                Expr::Cast {
1430                    expr: Box::new(expr),
1431                    data_type,
1432                }
1433            }
1434
1435            // SochDB Extensions
1436            TokenKind::VectorSearch => {
1437                self.advance();
1438                self.parse_vector_search()?
1439            }
1440            TokenKind::ContextWindow => {
1441                self.advance();
1442                self.parse_context_window()?
1443            }
1444
1445            // Aggregate functions
1446            TokenKind::Count
1447            | TokenKind::Sum
1448            | TokenKind::Avg
1449            | TokenKind::Min
1450            | TokenKind::Max => self.parse_aggregate_function()?,
1451
1452            // Function call or column reference
1453            TokenKind::Identifier(_) | TokenKind::QuotedIdentifier(_) => {
1454                self.parse_identifier_or_function()?
1455            }
1456
1457            // Type keywords used as column names
1458            TokenKind::Vector | TokenKind::Embedding | TokenKind::Text | TokenKind::BlobKw => {
1459                // Convert keyword to identifier
1460                let name = match &self.peek().kind {
1461                    TokenKind::Vector => "vector".to_string(),
1462                    TokenKind::Embedding => "embedding".to_string(),
1463                    TokenKind::Text => "text".to_string(),
1464                    TokenKind::BlobKw => "blob".to_string(),
1465                    _ => unreachable!(),
1466                };
1467                self.advance();
1468                Expr::Column(ColumnRef::new(name))
1469            }
1470
1471            _ => {
1472                return Err(ParseError::new(
1473                    format!("Unexpected token in expression: {:?}", self.peek().kind),
1474                    self.current_span(),
1475                ));
1476            }
1477        };
1478
1479        // Handle postfix operators
1480        self.parse_postfix_expr(expr)
1481    }
1482
1483    fn parse_postfix_expr(&mut self, mut expr: Expr) -> Result<Expr, ParseError> {
1484        loop {
1485            if self.match_token(&TokenKind::LBracket) {
1486                // Array subscript
1487                let index = self.parse_expr()?;
1488                self.expect(&TokenKind::RBracket, "Expected ']'")?;
1489                expr = Expr::Subscript {
1490                    expr: Box::new(expr),
1491                    index: Box::new(index),
1492                };
1493            } else if self.match_token(&TokenKind::Arrow) {
1494                // JSON access: ->
1495                let path = self.parse_primary_expr()?;
1496                expr = Expr::JsonAccess {
1497                    expr: Box::new(expr),
1498                    path: Box::new(path),
1499                    return_text: false,
1500                };
1501            } else if self.match_token(&TokenKind::DoubleArrow) {
1502                // JSON access returning text: ->>
1503                let path = self.parse_primary_expr()?;
1504                expr = Expr::JsonAccess {
1505                    expr: Box::new(expr),
1506                    path: Box::new(path),
1507                    return_text: true,
1508                };
1509            } else if self.match_token(&TokenKind::DoubleColon) {
1510                // Type cast: ::type
1511                let data_type = self.parse_data_type()?;
1512                expr = Expr::Cast {
1513                    expr: Box::new(expr),
1514                    data_type,
1515                };
1516            } else {
1517                break;
1518            }
1519        }
1520
1521        Ok(expr)
1522    }
1523
1524    fn parse_case_expr(&mut self) -> Result<Expr, ParseError> {
1525        // Simple CASE: CASE expr WHEN val THEN result ...
1526        // Searched CASE: CASE WHEN cond THEN result ...
1527
1528        let operand = if !self.check_keyword(&TokenKind::When) {
1529            Some(Box::new(self.parse_expr()?))
1530        } else {
1531            None
1532        };
1533
1534        let mut conditions = Vec::new();
1535
1536        while self.match_token(&TokenKind::When) {
1537            let when_expr = self.parse_expr()?;
1538            self.expect(&TokenKind::Then, "Expected THEN")?;
1539            let then_expr = self.parse_expr()?;
1540            conditions.push((when_expr, then_expr));
1541        }
1542
1543        let else_result = if self.match_token(&TokenKind::Else) {
1544            Some(Box::new(self.parse_expr()?))
1545        } else {
1546            None
1547        };
1548
1549        self.expect(&TokenKind::End, "Expected END")?;
1550
1551        Ok(Expr::Case {
1552            operand,
1553            conditions,
1554            else_result,
1555        })
1556    }
1557
1558    fn parse_identifier_or_function(&mut self) -> Result<Expr, ParseError> {
1559        let name = self.parse_object_name()?;
1560
1561        // Check for function call
1562        if self.match_token(&TokenKind::LParen) {
1563            let args = if self.check(&TokenKind::RParen) {
1564                Vec::new()
1565            } else {
1566                self.parse_expr_list()?
1567            };
1568            self.expect(&TokenKind::RParen, "Expected ')'")?;
1569
1570            Ok(Expr::Function(FunctionCall {
1571                name,
1572                args,
1573                distinct: false,
1574                filter: None,
1575                over: None,
1576            }))
1577        } else {
1578            // Column reference
1579            let parts = name.parts;
1580            if parts.len() == 1 {
1581                Ok(Expr::Column(ColumnRef::new(
1582                    parts.into_iter().next().unwrap(),
1583                )))
1584            } else if parts.len() == 2 {
1585                let mut iter = parts.into_iter();
1586                let table = iter.next().unwrap();
1587                let column = iter.next().unwrap();
1588                Ok(Expr::Column(ColumnRef::qualified(table, column)))
1589            } else {
1590                Err(ParseError::new(
1591                    "Invalid column reference",
1592                    self.current_span(),
1593                ))
1594            }
1595        }
1596    }
1597
1598    fn parse_aggregate_function(&mut self) -> Result<Expr, ParseError> {
1599        let name = match &self.peek().kind {
1600            TokenKind::Count => "COUNT",
1601            TokenKind::Sum => "SUM",
1602            TokenKind::Avg => "AVG",
1603            TokenKind::Min => "MIN",
1604            TokenKind::Max => "MAX",
1605            _ => {
1606                return Err(ParseError::new(
1607                    "Expected aggregate function",
1608                    self.current_span(),
1609                ));
1610            }
1611        };
1612        self.advance();
1613
1614        self.expect(&TokenKind::LParen, "Expected '(' after aggregate function")?;
1615
1616        let distinct = self.match_token(&TokenKind::Distinct);
1617
1618        let args = if self.match_token(&TokenKind::Star) {
1619            vec![Expr::Column(ColumnRef::new("*"))]
1620        } else {
1621            self.parse_expr_list()?
1622        };
1623
1624        self.expect(&TokenKind::RParen, "Expected ')'")?;
1625
1626        Ok(Expr::Function(FunctionCall {
1627            name: ObjectName::new(name),
1628            args,
1629            distinct,
1630            filter: None,
1631            over: None,
1632        }))
1633    }
1634
1635    fn parse_vector_search(&mut self) -> Result<Expr, ParseError> {
1636        self.expect(&TokenKind::LParen, "Expected '(' after VECTOR_SEARCH")?;
1637
1638        let column = self.parse_expr()?;
1639        self.expect(&TokenKind::Comma, "Expected ','")?;
1640
1641        let query = self.parse_expr()?;
1642        self.expect(&TokenKind::Comma, "Expected ','")?;
1643
1644        let k = match &self.peek().kind {
1645            TokenKind::Integer(n) => *n as u32,
1646            _ => return Err(ParseError::new("Expected integer k", self.current_span())),
1647        };
1648        self.advance();
1649
1650        let metric = if self.match_token(&TokenKind::Comma) {
1651            match &self.peek().kind {
1652                TokenKind::Cosine => {
1653                    self.advance();
1654                    VectorMetric::Cosine
1655                }
1656                TokenKind::Euclidean => {
1657                    self.advance();
1658                    VectorMetric::Euclidean
1659                }
1660                TokenKind::DotProduct => {
1661                    self.advance();
1662                    VectorMetric::DotProduct
1663                }
1664                _ => VectorMetric::Cosine,
1665            }
1666        } else {
1667            VectorMetric::Cosine
1668        };
1669
1670        self.expect(&TokenKind::RParen, "Expected ')'")?;
1671
1672        Ok(Expr::VectorSearch {
1673            column: Box::new(column),
1674            query: Box::new(query),
1675            k,
1676            metric,
1677        })
1678    }
1679
1680    fn parse_context_window(&mut self) -> Result<Expr, ParseError> {
1681        self.expect(&TokenKind::LParen, "Expected '(' after CONTEXT_WINDOW")?;
1682
1683        let source = self.parse_expr()?;
1684        self.expect(&TokenKind::Comma, "Expected ','")?;
1685
1686        let max_tokens = match &self.peek().kind {
1687            TokenKind::Integer(n) => *n as u32,
1688            _ => {
1689                return Err(ParseError::new(
1690                    "Expected integer max_tokens",
1691                    self.current_span(),
1692                ));
1693            }
1694        };
1695        self.advance();
1696
1697        let priority = if self.match_token(&TokenKind::Comma) {
1698            Some(Box::new(self.parse_expr()?))
1699        } else {
1700            None
1701        };
1702
1703        self.expect(&TokenKind::RParen, "Expected ')'")?;
1704
1705        Ok(Expr::ContextWindow {
1706            source: Box::new(source),
1707            max_tokens,
1708            priority,
1709        })
1710    }
1711
1712    // ========== Helper Parsers ==========
1713
1714    fn parse_object_name(&mut self) -> Result<ObjectName, ParseError> {
1715        let mut parts = Vec::new();
1716        parts.push(self.expect_identifier("Expected identifier")?);
1717
1718        while self.match_token(&TokenKind::Dot) {
1719            // Check for wildcard after dot (table.*)
1720            if self.check(&TokenKind::Star) {
1721                // Don't consume star, let caller handle it
1722                break;
1723            }
1724            parts.push(self.expect_identifier("Expected identifier after '.'")?);
1725        }
1726
1727        Ok(ObjectName { parts })
1728    }
1729
1730    fn parse_identifier_list(&mut self) -> Result<Vec<String>, ParseError> {
1731        let mut list = vec![self.expect_identifier("Expected identifier")?];
1732
1733        while self.match_token(&TokenKind::Comma) {
1734            list.push(self.expect_identifier("Expected identifier")?);
1735        }
1736
1737        Ok(list)
1738    }
1739
1740    fn parse_expr_list(&mut self) -> Result<Vec<Expr>, ParseError> {
1741        let mut list = vec![self.parse_expr()?];
1742
1743        while self.match_token(&TokenKind::Comma) {
1744            list.push(self.parse_expr()?);
1745        }
1746
1747        Ok(list)
1748    }
1749
1750    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, ParseError> {
1751        let mut list = Vec::new();
1752
1753        loop {
1754            let expr = self.parse_expr()?;
1755
1756            let asc = if self.match_token(&TokenKind::Desc) {
1757                false
1758            } else {
1759                self.match_token(&TokenKind::Asc);
1760                true
1761            };
1762
1763            let nulls_first = if self.match_token(&TokenKind::Nulls) {
1764                if self.match_token(&TokenKind::First) {
1765                    Some(true)
1766                } else if self.match_token(&TokenKind::Last) {
1767                    Some(false)
1768                } else {
1769                    return Err(ParseError::new(
1770                        "Expected FIRST or LAST after NULLS",
1771                        self.current_span(),
1772                    ));
1773                }
1774            } else {
1775                None
1776            };
1777
1778            list.push(OrderByItem {
1779                expr,
1780                asc,
1781                nulls_first,
1782            });
1783
1784            if !self.match_token(&TokenKind::Comma) {
1785                break;
1786            }
1787        }
1788
1789        Ok(list)
1790    }
1791}
1792
1793#[cfg(test)]
1794mod tests {
1795    use super::*;
1796
1797    #[test]
1798    fn test_simple_select() {
1799        let stmt = Parser::parse("SELECT * FROM users").unwrap();
1800        assert!(matches!(stmt, Statement::Select(_)));
1801    }
1802
1803    #[test]
1804    fn test_select_with_where() {
1805        let stmt = Parser::parse("SELECT id, name FROM users WHERE id = 1").unwrap();
1806        if let Statement::Select(select) = stmt {
1807            assert_eq!(select.columns.len(), 2);
1808            assert!(select.where_clause.is_some());
1809        } else {
1810            panic!("Expected SELECT statement");
1811        }
1812    }
1813
1814    #[test]
1815    fn test_insert() {
1816        let stmt = Parser::parse("INSERT INTO users (id, name) VALUES (1, 'Alice')").unwrap();
1817        assert!(matches!(stmt, Statement::Insert(_)));
1818    }
1819
1820    #[test]
1821    fn test_create_table() {
1822        let stmt = Parser::parse(
1823            "CREATE TABLE users (id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL)",
1824        )
1825        .unwrap();
1826        if let Statement::CreateTable(create) = stmt {
1827            assert_eq!(create.columns.len(), 2);
1828        } else {
1829            panic!("Expected CREATE TABLE statement");
1830        }
1831    }
1832
1833    #[test]
1834    fn test_vector_search() {
1835        let stmt = Parser::parse(
1836            "SELECT * FROM docs WHERE VECTOR_SEARCH(embedding, $1, 10, COSINE) > 0.8",
1837        )
1838        .unwrap();
1839        assert!(matches!(stmt, Statement::Select(_)));
1840    }
1841
1842    #[test]
1843    fn test_join() {
1844        let stmt = Parser::parse(
1845            "SELECT u.name, o.total FROM users u INNER JOIN orders o ON u.id = o.user_id",
1846        )
1847        .unwrap();
1848        assert!(matches!(stmt, Statement::Select(_)));
1849    }
1850
1851    #[test]
1852    fn test_subquery() {
1853        let stmt =
1854            Parser::parse("SELECT * FROM users WHERE id IN (SELECT user_id FROM orders)").unwrap();
1855        assert!(matches!(stmt, Statement::Select(_)));
1856    }
1857
1858    #[test]
1859    fn test_update() {
1860        let stmt = Parser::parse("UPDATE users SET name = 'Bob', age = 30 WHERE id = 1").unwrap();
1861        assert!(matches!(stmt, Statement::Update(_)));
1862    }
1863
1864    #[test]
1865    fn test_delete() {
1866        let stmt = Parser::parse("DELETE FROM users WHERE id = 1").unwrap();
1867        assert!(matches!(stmt, Statement::Delete(_)));
1868    }
1869
1870    #[test]
1871    fn test_group_by() {
1872        let stmt = Parser::parse(
1873            "SELECT category, COUNT(*) FROM products GROUP BY category HAVING COUNT(*) > 5",
1874        )
1875        .unwrap();
1876        if let Statement::Select(select) = stmt {
1877            assert!(!select.group_by.is_empty());
1878            assert!(select.having.is_some());
1879        } else {
1880            panic!("Expected SELECT statement");
1881        }
1882    }
1883
1884    #[test]
1885    fn test_order_by() {
1886        let stmt =
1887            Parser::parse("SELECT * FROM users ORDER BY name ASC, age DESC NULLS LAST").unwrap();
1888        if let Statement::Select(select) = stmt {
1889            assert_eq!(select.order_by.len(), 2);
1890        } else {
1891            panic!("Expected SELECT statement");
1892        }
1893    }
1894
1895    #[test]
1896    fn test_between() {
1897        let stmt = Parser::parse("SELECT * FROM products WHERE price BETWEEN 10 AND 100").unwrap();
1898        assert!(matches!(stmt, Statement::Select(_)));
1899    }
1900
1901    #[test]
1902    fn test_like() {
1903        let stmt = Parser::parse("SELECT * FROM users WHERE name LIKE '%Alice%'").unwrap();
1904        assert!(matches!(stmt, Statement::Select(_)));
1905    }
1906
1907    #[test]
1908    fn test_case() {
1909        let stmt =
1910            Parser::parse("SELECT CASE WHEN x > 0 THEN 'positive' ELSE 'non-positive' END FROM t")
1911                .unwrap();
1912        assert!(matches!(stmt, Statement::Select(_)));
1913    }
1914
1915    #[test]
1916    fn test_transactions() {
1917        let stmts = Parser::parse_statements("BEGIN; COMMIT; ROLLBACK").unwrap();
1918        assert_eq!(stmts.len(), 3);
1919        assert!(matches!(stmts[0], Statement::Begin(_)));
1920        assert!(matches!(stmts[1], Statement::Commit));
1921        assert!(matches!(stmts[2], Statement::Rollback(_)));
1922    }
1923
1924    // ===== Dialect-Specific Insert Tests =====
1925
1926    #[test]
1927    fn test_insert_on_conflict_do_nothing() {
1928        let stmt = Parser::parse(
1929            "INSERT INTO users (id, name) VALUES (1, 'Alice') ON CONFLICT DO NOTHING",
1930        )
1931        .unwrap();
1932        if let Statement::Insert(insert) = stmt {
1933            assert!(insert.on_conflict.is_some());
1934            let on_conflict = insert.on_conflict.unwrap();
1935            assert!(matches!(on_conflict.action, ConflictAction::DoNothing));
1936        } else {
1937            panic!("Expected INSERT statement");
1938        }
1939    }
1940
1941    #[test]
1942    fn test_insert_on_conflict_do_update() {
1943        let stmt = Parser::parse(
1944            "INSERT INTO users (id, name) VALUES (1, 'Alice') ON CONFLICT (id) DO UPDATE SET name = 'Bob'",
1945        )
1946        .unwrap();
1947        if let Statement::Insert(insert) = stmt {
1948            assert!(insert.on_conflict.is_some());
1949            let on_conflict = insert.on_conflict.unwrap();
1950            assert!(matches!(on_conflict.target, Some(ConflictTarget::Columns(_))));
1951            assert!(matches!(on_conflict.action, ConflictAction::DoUpdate(_)));
1952        } else {
1953            panic!("Expected INSERT statement");
1954        }
1955    }
1956
1957    #[test]
1958    fn test_insert_ignore_mysql() {
1959        let stmt = Parser::parse("INSERT IGNORE INTO users (id, name) VALUES (1, 'Alice')").unwrap();
1960        if let Statement::Insert(insert) = stmt {
1961            assert!(insert.on_conflict.is_some());
1962            let on_conflict = insert.on_conflict.unwrap();
1963            assert!(matches!(on_conflict.action, ConflictAction::DoNothing));
1964        } else {
1965            panic!("Expected INSERT statement");
1966        }
1967    }
1968
1969    #[test]
1970    fn test_insert_or_ignore_sqlite() {
1971        let stmt =
1972            Parser::parse("INSERT OR IGNORE INTO users (id, name) VALUES (1, 'Alice')").unwrap();
1973        if let Statement::Insert(insert) = stmt {
1974            assert!(insert.on_conflict.is_some());
1975            let on_conflict = insert.on_conflict.unwrap();
1976            assert!(matches!(on_conflict.action, ConflictAction::DoNothing));
1977        } else {
1978            panic!("Expected INSERT statement");
1979        }
1980    }
1981
1982    #[test]
1983    fn test_insert_or_replace_sqlite() {
1984        let stmt =
1985            Parser::parse("INSERT OR REPLACE INTO users (id, name) VALUES (1, 'Alice')").unwrap();
1986        if let Statement::Insert(insert) = stmt {
1987            assert!(insert.on_conflict.is_some());
1988            let on_conflict = insert.on_conflict.unwrap();
1989            assert!(matches!(on_conflict.action, ConflictAction::DoReplace));
1990        } else {
1991            panic!("Expected INSERT statement");
1992        }
1993    }
1994
1995    #[test]
1996    fn test_on_duplicate_key_update_mysql() {
1997        let stmt = Parser::parse(
1998            "INSERT INTO users (id, name) VALUES (1, 'Alice') ON DUPLICATE KEY UPDATE name = 'Bob'",
1999        )
2000        .unwrap();
2001        if let Statement::Insert(insert) = stmt {
2002            assert!(insert.on_conflict.is_some());
2003            let on_conflict = insert.on_conflict.unwrap();
2004            assert!(matches!(on_conflict.action, ConflictAction::DoUpdate(_)));
2005        } else {
2006            panic!("Expected INSERT statement");
2007        }
2008    }
2009
2010    // ===== Idempotent DDL Tests =====
2011
2012    #[test]
2013    fn test_create_table_if_not_exists() {
2014        let stmt = Parser::parse("CREATE TABLE IF NOT EXISTS users (id INT PRIMARY KEY)").unwrap();
2015        if let Statement::CreateTable(create) = stmt {
2016            assert!(create.if_not_exists);
2017        } else {
2018            panic!("Expected CREATE TABLE statement");
2019        }
2020    }
2021
2022    #[test]
2023    fn test_drop_table_if_exists() {
2024        let stmt = Parser::parse("DROP TABLE IF EXISTS users").unwrap();
2025        if let Statement::DropTable(drop) = stmt {
2026            assert!(drop.if_exists);
2027        } else {
2028            panic!("Expected DROP TABLE statement");
2029        }
2030    }
2031
2032    #[test]
2033    fn test_create_index() {
2034        let stmt = Parser::parse("CREATE INDEX idx_users_name ON users (name)").unwrap();
2035        if let Statement::CreateIndex(create) = stmt {
2036            assert_eq!(create.name, "idx_users_name");
2037            assert_eq!(create.table.name(), "users");
2038            assert!(!create.unique);
2039            assert!(!create.if_not_exists);
2040        } else {
2041            panic!("Expected CREATE INDEX statement");
2042        }
2043    }
2044
2045    #[test]
2046    fn test_create_unique_index() {
2047        let stmt = Parser::parse("CREATE UNIQUE INDEX idx_users_email ON users (email)").unwrap();
2048        if let Statement::CreateIndex(create) = stmt {
2049            assert!(create.unique);
2050        } else {
2051            panic!("Expected CREATE INDEX statement");
2052        }
2053    }
2054
2055    #[test]
2056    fn test_create_index_if_not_exists() {
2057        let stmt =
2058            Parser::parse("CREATE INDEX IF NOT EXISTS idx_users_name ON users (name)").unwrap();
2059        if let Statement::CreateIndex(create) = stmt {
2060            assert!(create.if_not_exists);
2061        } else {
2062            panic!("Expected CREATE INDEX statement");
2063        }
2064    }
2065
2066    #[test]
2067    fn test_drop_index() {
2068        let stmt = Parser::parse("DROP INDEX idx_users_name").unwrap();
2069        if let Statement::DropIndex(drop) = stmt {
2070            assert_eq!(drop.name, "idx_users_name");
2071            assert!(!drop.if_exists);
2072        } else {
2073            panic!("Expected DROP INDEX statement");
2074        }
2075    }
2076
2077    #[test]
2078    fn test_drop_index_if_exists() {
2079        let stmt = Parser::parse("DROP INDEX IF EXISTS idx_users_name").unwrap();
2080        if let Statement::DropIndex(drop) = stmt {
2081            assert!(drop.if_exists);
2082        } else {
2083            panic!("Expected DROP INDEX statement");
2084        }
2085    }
2086
2087    // ===== RETURNING clause tests =====
2088
2089    #[test]
2090    fn test_insert_returning() {
2091        let stmt = Parser::parse(
2092            "INSERT INTO users (id, name) VALUES (1, 'Alice') RETURNING id, name",
2093        )
2094        .unwrap();
2095        if let Statement::Insert(insert) = stmt {
2096            assert!(insert.returning.is_some());
2097            let returning = insert.returning.unwrap();
2098            assert_eq!(returning.len(), 2);
2099        } else {
2100            panic!("Expected INSERT statement");
2101        }
2102    }
2103}