Skip to main content

parser/
parser.rs

1//! Parser implementation. Generated by CongoCC Parser Generator. Do not edit.
2
3use crate::error::{ParseError, ParseResult};
4use crate::tokens::{Token, TokenType};
5use crate::lexer::Lexer;
6use crate::arena::{Arena, AstNode, NodeId, TokenId};
7use crate::arena::AddOp;
8use crate::arena::MultExprOp;
9use crate::arena::{EqualityOp, ComparisonOp, UnaryOp};
10use crate::arena::{
11    JmsSelectorNode,
12    OrExpressionNode,
13    AndExpressionNode,
14    EqualityExpressionNode,
15    ComparisonExpressionNode,
16    AddExpressionNode,
17    MultExprNode,
18    UnaryExprNode,
19    PrimaryExprNode,
20    LiteralNode,
21    StringLiteralNode,
22    VariableNode
23};
24
25/// The parser for SqlExprParser
26pub struct Parser {
27    /// The lexer providing tokens
28    lexer: Lexer,
29    /// Current token being examined
30    current_token: Token,
31    /// Lookahead tokens
32    lookahead: Vec<Token>,
33    /// Arena owning all AST nodes and tokens
34    arena: Arena,
35    /// ID of the most recently allocated token
36    current_token_id: Option<TokenId>,
37    /// Original input string
38    input: String,
39}
40
41/// Element type classification for IN lists
42#[derive(Debug, Clone, PartialEq)]
43enum InElementType {
44    Integer,
45    Float,
46    StringLit,
47}
48
49impl Parser {
50    /// Create a new parser for the given input
51    pub fn new(input: String) -> ParseResult<Self> {
52        let mut lexer = Lexer::new(input.clone());
53        let current_token = lexer.next_token()?;
54
55        Ok(Parser {
56            lexer,
57            current_token,
58            lookahead: Vec::new(),
59            arena: Arena::new(),
60            current_token_id: None,
61            input,
62        })
63    }
64
65    /// Get a reference to the arena containing all AST nodes
66    pub fn arena(&self) -> &Arena {
67        &self.arena
68    }
69
70    /// Get a mutable reference to the arena
71    pub fn arena_mut(&mut self) -> &mut Arena {
72        &mut self.arena
73    }
74
75    /// Get the original input string
76    pub fn input(&self) -> &str {
77        &self.input
78    }
79
80    /// Parse the input and return the root node ID
81    pub fn parse(&mut self) -> ParseResult<NodeId> {
82        self.parse_jms_selector()
83    }
84
85    /// Parse: JmsSelector -> orExpression <EOF>
86    fn parse_jms_selector(&mut self) -> ParseResult<NodeId> {
87        let begin_token = self.alloc_current_token();
88
89        let child = self.parse_or_expression()?;
90
91        self.validate_boolean_root(child)?;
92
93        self.expect_token(TokenType::EOF)?;
94        let end_token = self.current_token_id.unwrap_or(begin_token);
95
96        let mut node = JmsSelectorNode::new(begin_token, end_token);
97        node.children.push(child);
98
99        let node_id = self.arena.alloc_node(AstNode::JmsSelector(node));
100        self.set_parent(child, node_id);
101        Ok(node_id)
102    }
103
104    /// Parse: orExpression -> andExpression (OR andExpression)*
105    fn parse_or_expression(&mut self) -> ParseResult<NodeId> {
106        let begin_token = self.alloc_current_token();
107        let mut children = Vec::new();
108
109        let first = self.parse_and_expression()?;
110        children.push(first);
111
112        while self.current_token.token_type == TokenType::OR {
113            self.consume_token()?;
114            let child = self.parse_and_expression()?;
115            children.push(child);
116        }
117
118        let end_token = self.current_token_id.unwrap_or(begin_token);
119        let mut node = OrExpressionNode::new(begin_token, end_token);
120        node.children = children.clone();
121
122        let node_id = self.arena.alloc_node(AstNode::OrExpression(node));
123        for child in children {
124            self.set_parent(child, node_id);
125        }
126        Ok(node_id)
127    }
128
129    /// Parse: andExpression -> equalityExpression (AND equalityExpression)*
130    fn parse_and_expression(&mut self) -> ParseResult<NodeId> {
131        let begin_token = self.alloc_current_token();
132        let mut children = Vec::new();
133
134        let first = self.parse_equality_expression()?;
135        children.push(first);
136
137        while self.current_token.token_type == TokenType::AND {
138            self.consume_token()?;
139            let child = self.parse_equality_expression()?;
140            children.push(child);
141        }
142
143        let end_token = self.current_token_id.unwrap_or(begin_token);
144        let mut node = AndExpressionNode::new(begin_token, end_token);
145        node.children = children.clone();
146
147        let node_id = self.arena.alloc_node(AstNode::AndExpression(node));
148        for child in children {
149            self.set_parent(child, node_id);
150        }
151        Ok(node_id)
152    }
153
154    /// Parse: equalityExpression (generic)
155    fn parse_equality_expression(&mut self) -> ParseResult<NodeId> {
156        let begin_token = self.alloc_current_token();
157        let mut children: Vec<NodeId> = Vec::new();
158        let mut operators: Vec<EqualityOp> = Vec::new();
159
160        {
161            let child = self.parse_comparison_expression()?;
162            children.push(child);
163        }
164        loop {
165        if self.current_token.token_type == TokenType::EQ
166        {
167        operators.push(EqualityOp::Equal);
168        self.expect_token(TokenType::EQ)?;
169        {
170            let child = self.parse_comparison_expression()?;
171            children.push(child);
172        }
173        }
174        else if self.current_token.token_type == TokenType::NE
175        {
176        operators.push(EqualityOp::NotEqual);
177        self.expect_token(TokenType::NE)?;
178        {
179            let child = self.parse_comparison_expression()?;
180            children.push(child);
181        }
182        }
183        else if
184            self.current_token.token_type == TokenType::IS
185            && self.lookahead_type(1) == Some(TokenType::NULL)
186        {
187        operators.push(EqualityOp::IsNull);
188        self.expect_token(TokenType::IS)?;
189        self.expect_token(TokenType::NULL)?;
190        }
191        else if self.current_token.token_type == TokenType::IS
192        {
193        operators.push(EqualityOp::IsNotNull);
194        self.expect_token(TokenType::IS)?;
195        self.expect_token(TokenType::NOT)?;
196        self.expect_token(TokenType::NULL)?;
197        }
198        else {
199            break;
200        }
201        }
202
203        let end_token = self.current_token_id.unwrap_or(begin_token);
204        let mut node = EqualityExpressionNode::new(begin_token, end_token);
205        node.children = children.clone();
206        node.operators = operators;
207        let node_id = self.arena.alloc_node(AstNode::EqualityExpression(node));
208        for child_id in children {
209            self.set_parent(child_id, node_id);
210        }
211        Ok(node_id)
212    }
213
214    /// Parse: comparisonExpression (generic)
215    fn parse_comparison_expression(&mut self) -> ParseResult<NodeId> {
216        let begin_token = self.alloc_current_token();
217        let mut children: Vec<NodeId> = Vec::new();
218        let mut operators: Vec<ComparisonOp> = Vec::new();
219
220        {
221            let child = self.parse_add_expression()?;
222            children.push(child);
223        }
224        loop {
225        if self.current_token.token_type == TokenType::GT
226        {
227        operators.push(ComparisonOp::GreaterThan);
228        self.expect_token(TokenType::GT)?;
229        {
230            let child = self.parse_add_expression()?;
231            children.push(child);
232        }
233        }
234        else if self.current_token.token_type == TokenType::GE
235        {
236        operators.push(ComparisonOp::GreaterThanEqual);
237        self.expect_token(TokenType::GE)?;
238        {
239            let child = self.parse_add_expression()?;
240            children.push(child);
241        }
242        }
243        else if self.current_token.token_type == TokenType::LT
244        {
245        operators.push(ComparisonOp::LessThan);
246        self.expect_token(TokenType::LT)?;
247        {
248            let child = self.parse_add_expression()?;
249            children.push(child);
250        }
251        }
252        else if self.current_token.token_type == TokenType::LE
253        {
254        operators.push(ComparisonOp::LessThanEqual);
255        self.expect_token(TokenType::LE)?;
256        {
257            let child = self.parse_add_expression()?;
258            children.push(child);
259        }
260        }
261        else if self.current_token.token_type == TokenType::LIKE
262        {
263        self.expect_token(TokenType::LIKE)?;
264        {
265            let child = self.parse_string_literal()?;
266            children.push(child);
267        }
268        if self.current_token.token_type == TokenType::ESCAPE
269        {
270        operators.push(ComparisonOp::LikeEscape);
271        self.expect_token(TokenType::ESCAPE)?;
272        {
273            let child = self.parse_string_literal()?;
274            children.push(child);
275        }
276        } else {
277        operators.push(ComparisonOp::Like);
278        }
279        }
280        else if
281            self.current_token.token_type == TokenType::NOT
282            && self.lookahead_type(1) == Some(TokenType::LIKE)
283        {
284        self.expect_token(TokenType::NOT)?;
285        self.expect_token(TokenType::LIKE)?;
286        {
287            let child = self.parse_string_literal()?;
288            children.push(child);
289        }
290        if self.current_token.token_type == TokenType::ESCAPE
291        {
292        operators.push(ComparisonOp::NotLikeEscape);
293        self.expect_token(TokenType::ESCAPE)?;
294        {
295            let child = self.parse_string_literal()?;
296            children.push(child);
297        }
298        } else {
299        operators.push(ComparisonOp::NotLike);
300        }
301        }
302        else if self.current_token.token_type == TokenType::BETWEEN
303        {
304        operators.push(ComparisonOp::Between);
305        self.expect_token(TokenType::BETWEEN)?;
306        {
307            let child = self.parse_between_bound()?;
308            children.push(child);
309        }
310        self.expect_token(TokenType::AND)?;
311        let low_id = *children.last().unwrap();
312        {
313            let child = self.parse_between_bound()?;
314            children.push(child);
315        }
316        let high_id = *children.last().unwrap();
317        self.validate_between_bounds(low_id, high_id)?;
318        }
319        else if
320            self.current_token.token_type == TokenType::NOT
321            && self.lookahead_type(1) == Some(TokenType::BETWEEN)
322        {
323        operators.push(ComparisonOp::NotBetween);
324        self.expect_token(TokenType::NOT)?;
325        self.expect_token(TokenType::BETWEEN)?;
326        {
327            let child = self.parse_between_bound()?;
328            children.push(child);
329        }
330        self.expect_token(TokenType::AND)?;
331        let low_id = *children.last().unwrap();
332        {
333            let child = self.parse_between_bound()?;
334            children.push(child);
335        }
336        let high_id = *children.last().unwrap();
337        self.validate_between_bounds(low_id, high_id)?;
338        }
339        else if self.current_token.token_type == TokenType::IN
340        {
341        operators.push(ComparisonOp::In);
342        self.expect_token(TokenType::IN)?;
343        self.expect_token(TokenType::LPAREN)?;
344        let first_type = self.classify_current_token_for_in()?;
345        {
346            let child = self.parse_in_element()?;
347            children.push(child);
348        }
349        while self.current_token.token_type == TokenType::COMMA
350        {
351        self.expect_token(TokenType::COMMA)?;
352        let elem_type = self.classify_current_token_for_in()?;
353        self.check_in_type_consistency(&first_type, &elem_type)?;
354        {
355            let child = self.parse_in_element()?;
356            children.push(child);
357        }
358        }
359        self.expect_token(TokenType::RPAREN)?;
360        }
361        else if
362            self.current_token.token_type == TokenType::NOT
363            && self.lookahead_type(1) == Some(TokenType::IN)
364            && self.lookahead_type(2) == Some(TokenType::LPAREN)
365        {
366        operators.push(ComparisonOp::NotIn);
367        self.expect_token(TokenType::NOT)?;
368        self.expect_token(TokenType::IN)?;
369        self.expect_token(TokenType::LPAREN)?;
370        let first_type = self.classify_current_token_for_in()?;
371        {
372            let child = self.parse_in_element()?;
373            children.push(child);
374        }
375        while self.current_token.token_type == TokenType::COMMA
376        {
377        self.expect_token(TokenType::COMMA)?;
378        let elem_type = self.classify_current_token_for_in()?;
379        self.check_in_type_consistency(&first_type, &elem_type)?;
380        {
381            let child = self.parse_in_element()?;
382            children.push(child);
383        }
384        }
385        self.expect_token(TokenType::RPAREN)?;
386        }
387        else {
388            break;
389        }
390        }
391
392        let end_token = self.current_token_id.unwrap_or(begin_token);
393        let mut node = ComparisonExpressionNode::new(begin_token, end_token);
394        node.children = children.clone();
395        node.operators = operators;
396        let node_id = self.arena.alloc_node(AstNode::ComparisonExpression(node));
397        for child_id in children {
398            self.set_parent(child_id, node_id);
399        }
400        Ok(node_id)
401    }
402
403    /// Parse: addExpression -> multExpr ((ops) multExpr)*
404    fn parse_add_expression(&mut self) -> ParseResult<NodeId> {
405        let begin_token = self.alloc_current_token();
406        let mut children = Vec::new();
407        let mut operators = Vec::new();
408
409        let first = self.parse_mult_expr()?;
410        children.push(first);
411
412        while self.current_token.token_type == TokenType::PLUS
413            || self.current_token.token_type == TokenType::MINUS
414        {
415            let op = match self.current_token.token_type {
416                TokenType::PLUS => AddOp::Plus,
417                TokenType::MINUS => AddOp::Minus,
418                _ => return Err(ParseError::at_position(
419                    format!("Expected '+' or '-', found {:?} '{}'",
420                        self.current_token.token_type, self.current_token.image),
421                    self.current_token.begin_offset,
422                )),
423            };
424            operators.push(op);
425
426            self.consume_token()?;
427            let child = self.parse_mult_expr()?;
428            children.push(child);
429        }
430
431        let end_token = self.current_token_id.unwrap_or(begin_token);
432        let mut node = AddExpressionNode::new(begin_token, end_token);
433        node.children = children.clone();
434        node.operators = operators;
435
436        let node_id = self.arena.alloc_node(AstNode::AddExpression(node));
437        for child in children {
438            self.set_parent(child, node_id);
439        }
440        Ok(node_id)
441    }
442
443    /// Parse: multExpr -> unaryExpr ((ops) unaryExpr)*
444    fn parse_mult_expr(&mut self) -> ParseResult<NodeId> {
445        let begin_token = self.alloc_current_token();
446        let mut children = Vec::new();
447        let mut operators = Vec::new();
448
449        let first = self.parse_unary_expr()?;
450        children.push(first);
451
452        while self.current_token.token_type == TokenType::STAR
453            || self.current_token.token_type == TokenType::SLASH
454            || self.current_token.token_type == TokenType::PERCENT
455        {
456            let op = match self.current_token.token_type {
457                TokenType::STAR => MultExprOp::Star,
458                TokenType::SLASH => MultExprOp::Slash,
459                TokenType::PERCENT => MultExprOp::Percent,
460                _ => return Err(ParseError::at_position(
461                    format!("Expected '*', '/' or '%', found {:?} '{}'",
462                        self.current_token.token_type, self.current_token.image),
463                    self.current_token.begin_offset,
464                )),
465            };
466            operators.push(op);
467
468            self.consume_token()?;
469            let child = self.parse_unary_expr()?;
470            children.push(child);
471        }
472
473        let end_token = self.current_token_id.unwrap_or(begin_token);
474        let mut node = MultExprNode::new(begin_token, end_token);
475        node.children = children.clone();
476        node.operators = operators;
477
478        let node_id = self.arena.alloc_node(AstNode::MultExpr(node));
479        for child in children {
480            self.set_parent(child, node_id);
481        }
482        Ok(node_id)
483    }
484
485    /// Parse: unaryExpr (generic)
486    fn parse_unary_expr(&mut self) -> ParseResult<NodeId> {
487        let begin_token = self.alloc_current_token();
488        let mut children: Vec<NodeId> = Vec::new();
489        let mut operator: Option<UnaryOp> = None;
490
491        if
492            self.current_token.token_type == TokenType::PLUS
493        {
494        operator = Some(UnaryOp::Plus);
495        self.expect_token(TokenType::PLUS)?;
496        {
497            let child = self.parse_unary_expr()?;
498            children.push(child);
499        }
500        }
501        else if self.current_token.token_type == TokenType::MINUS
502        {
503        operator = Some(UnaryOp::Negate);
504        self.expect_token(TokenType::MINUS)?;
505        {
506            let child = self.parse_unary_expr()?;
507            children.push(child);
508        }
509        }
510        else if self.current_token.token_type == TokenType::NOT
511        {
512        operator = Some(UnaryOp::Not);
513        self.expect_token(TokenType::NOT)?;
514        {
515            let child = self.parse_unary_expr()?;
516            children.push(child);
517        }
518        }
519        else if self.current_token.token_type == TokenType::TRUE
520            || self.current_token.token_type == TokenType::FALSE
521            || self.current_token.token_type == TokenType::NULL
522            || self.current_token.token_type == TokenType::LPAREN
523            || self.current_token.token_type == TokenType::DECIMAL_LITERAL
524            || self.current_token.token_type == TokenType::HEX_LITERAL
525            || self.current_token.token_type == TokenType::OCTAL_LITERAL
526            || self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
527            || self.current_token.token_type == TokenType::STRING_LITERAL
528            || self.current_token.token_type == TokenType::ID
529        {
530        {
531            let child = self.parse_primary_expr()?;
532            children.push(child);
533        }
534        }
535        else {
536            return Err(ParseError::at_position(
537                format!(
538                    "Expected expression, found {:?} '{}'",
539                    self.current_token.token_type, self.current_token.image
540                ),
541                self.current_token.begin_offset,
542            ));
543        }
544
545        let end_token = self.current_token_id.unwrap_or(begin_token);
546        let mut node = UnaryExprNode::new(begin_token, end_token);
547        node.children = children.clone();
548        node.operator = operator;
549        let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
550        for child_id in children {
551            self.set_parent(child_id, node_id);
552        }
553        Ok(node_id)
554    }
555
556    /// Parse: primaryExpr
557    fn parse_primary_expr(&mut self) -> ParseResult<NodeId> {
558        let begin_token = self.alloc_current_token();
559        let mut children = Vec::new();
560
561        if self.current_token.token_type == TokenType::TRUE
562            || self.current_token.token_type == TokenType::FALSE
563            || self.current_token.token_type == TokenType::NULL
564            || self.current_token.token_type == TokenType::DECIMAL_LITERAL
565            || self.current_token.token_type == TokenType::HEX_LITERAL
566            || self.current_token.token_type == TokenType::OCTAL_LITERAL
567            || self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
568            || self.current_token.token_type == TokenType::STRING_LITERAL
569        {
570            let inner = self.parse_literal()?;
571            children.push(inner);
572        }
573        else if self.current_token.token_type == TokenType::ID
574        {
575            let inner = self.parse_variable()?;
576            children.push(inner);
577        }
578        else if self.current_token.token_type == TokenType::LPAREN {
579            self.consume_token()?;
580            let inner = self.parse_or_expression()?;
581            children.push(inner);
582            self.expect_token(TokenType::RPAREN)?;
583        }
584        else {
585            return Err(ParseError::at_position(
586                format!(
587                    "Expected expression, found {:?} '{}'",
588                    self.current_token.token_type, self.current_token.image
589                ),
590                self.current_token.begin_offset,
591            ));
592        }
593
594        let end_token = self.current_token_id.unwrap_or(begin_token);
595        let mut node = PrimaryExprNode::new(begin_token, end_token);
596        node.children = children.clone();
597
598        let node_id = self.arena.alloc_node(AstNode::PrimaryExpr(node));
599        for child in children {
600            self.set_parent(child, node_id);
601        }
602        Ok(node_id)
603    }
604
605    /// Parse: literal
606    fn parse_literal(&mut self) -> ParseResult<NodeId> {
607        let begin_token = self.alloc_current_token();
608        let mut children = Vec::new();
609
610        if self.current_token.token_type == TokenType::STRING_LITERAL
611        {
612            let inner = self.parse_string_literal()?;
613            children.push(inner);
614        }
615        else if self.current_token.token_type == TokenType::DECIMAL_LITERAL
616            || self.current_token.token_type == TokenType::HEX_LITERAL
617            || self.current_token.token_type == TokenType::OCTAL_LITERAL
618            || self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
619            || self.current_token.token_type == TokenType::TRUE
620            || self.current_token.token_type == TokenType::FALSE
621            || self.current_token.token_type == TokenType::NULL
622        {
623            self.consume_token()?;
624        }
625        else {
626            return Err(ParseError::at_position(
627                format!(
628                    "Expected expression, found {:?} '{}'",
629                    self.current_token.token_type, self.current_token.image
630                ),
631                self.current_token.begin_offset,
632            ));
633        }
634
635        let end_token = self.current_token_id.unwrap_or(begin_token);
636        let mut node = LiteralNode::new(begin_token, end_token);
637        node.children = children.clone();
638
639        let node_id = self.arena.alloc_node(AstNode::Literal(node));
640        for child in children {
641            self.set_parent(child, node_id);
642        }
643        Ok(node_id)
644    }
645
646    /// Parse: stringLiteral (generic)
647    fn parse_string_literal(&mut self) -> ParseResult<NodeId> {
648        let begin_token = self.alloc_current_token();
649        let children: Vec<NodeId> = Vec::new();
650
651        self.expect_token(TokenType::STRING_LITERAL)?;
652
653        let end_token = self.current_token_id.unwrap_or(begin_token);
654        let mut node = StringLiteralNode::new(begin_token, end_token);
655        node.children = children.clone();
656        let node_id = self.arena.alloc_node(AstNode::StringLiteral(node));
657        for child_id in children {
658            self.set_parent(child_id, node_id);
659        }
660        Ok(node_id)
661    }
662
663    /// Parse: variable (generic)
664    fn parse_variable(&mut self) -> ParseResult<NodeId> {
665        let begin_token = self.alloc_current_token();
666        let children: Vec<NodeId> = Vec::new();
667
668        self.expect_token(TokenType::ID)?;
669
670        let end_token = self.current_token_id.unwrap_or(begin_token);
671        let mut node = VariableNode::new(begin_token, end_token);
672        node.children = children.clone();
673        let node_id = self.arena.alloc_node(AstNode::Variable(node));
674        for child_id in children {
675            self.set_parent(child_id, node_id);
676        }
677        Ok(node_id)
678    }
679
680    // ========== BETWEEN / IN Helper Methods ==========
681
682    /// Parse a BETWEEN bound: only accepts numeric literals (optionally signed) or STRING_LITERAL.
683    fn parse_between_bound(&mut self) -> ParseResult<NodeId> {
684        // Handle sign prefix for negative/positive numbers in BETWEEN bounds
685        if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
686            let begin_token = self.alloc_current_token();
687            let operator = if self.current_token.token_type == TokenType::MINUS {
688                UnaryOp::Negate
689            } else {
690                UnaryOp::Plus
691            };
692            self.consume_token()?;
693            // After sign, must be a numeric literal
694            match self.current_token.token_type {
695                TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
696                | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL => {}
697                _ => {
698                    return Err(ParseError::at_position(
699                        format!(
700                            "Expected numeric literal after '{}' in BETWEEN bound, found {:?} '{}'",
701                            if operator == UnaryOp::Negate { "-" } else { "+" },
702                            self.current_token.token_type, self.current_token.image
703                        ),
704                        self.current_token.begin_offset,
705                    ));
706                }
707            }
708            let child = self.parse_primary_expr()?;
709            let end_token = self.current_token_id.unwrap_or(begin_token);
710            let mut node = UnaryExprNode::new(begin_token, end_token);
711            node.children.push(child);
712            node.operator = Some(operator);
713            let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
714            self.set_parent(child, node_id);
715            return Ok(node_id);
716        }
717        match self.current_token.token_type {
718            TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
719            | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL
720            | TokenType::STRING_LITERAL => {
721                self.parse_primary_expr()
722            }
723            TokenType::TRUE | TokenType::FALSE => {
724                Err(ParseError::at_position(
725                    "BETWEEN bounds cannot be boolean values".to_string(),
726                    self.current_token.begin_offset,
727                ))
728            }
729            TokenType::NULL => {
730                Err(ParseError::at_position(
731                    "NULL is not allowed in BETWEEN bounds".to_string(),
732                    self.current_token.begin_offset,
733                ))
734            }
735            TokenType::ID => {
736                Err(ParseError::at_position(
737                    "BETWEEN bounds must be literal values, not variables".to_string(),
738                    self.current_token.begin_offset,
739                ))
740            }
741            _ => {
742                Err(ParseError::at_position(
743                    format!(
744                        "BETWEEN bounds must be literal values (numeric or string), found {:?} '{}'",
745                        self.current_token.token_type, self.current_token.image
746                    ),
747                    self.current_token.begin_offset,
748                ))
749            }
750        }
751    }
752
753    /// Get the token image for a literal node, navigating through UnaryExpr → PrimaryExpr → Literal → StringLiteral.
754    /// For signed literals (UnaryExpr with Negate), prepends "-" to the inner image.
755    fn get_literal_image(&self, node_id: NodeId) -> String {
756        match self.arena.get_node(node_id) {
757            AstNode::UnaryExpr(n) => {
758                if !n.children.is_empty() {
759                    let inner = self.get_literal_image(n.children[0]);
760                    match n.operator {
761                        Some(UnaryOp::Negate) => format!("-{}", inner),
762                        Some(UnaryOp::Plus) => inner,
763                        _ => inner,
764                    }
765                } else {
766                    String::new()
767                }
768            }
769            AstNode::PrimaryExpr(n) => {
770                if n.children.is_empty() {
771                    self.arena.get_token(n.begin_token).image.clone()
772                } else {
773                    self.get_literal_image(n.children[0])
774                }
775            }
776            AstNode::Literal(n) => {
777                if n.children.is_empty() {
778                    self.arena.get_token(n.begin_token).image.clone()
779                } else {
780                    self.get_literal_image(n.children[0])
781                }
782            }
783            AstNode::StringLiteral(n) => {
784                self.arena.get_token(n.begin_token).image.clone()
785            }
786            _ => String::new(),
787        }
788    }
789
790    /// Get the token type for a literal node, walking through UnaryExpr wrappers.
791    fn get_literal_token_type(&self, node_id: NodeId) -> TokenType {
792        match self.arena.get_node(node_id) {
793            AstNode::UnaryExpr(n) => {
794                if !n.children.is_empty() {
795                    self.get_literal_token_type(n.children[0])
796                } else {
797                    TokenType::INVALID
798                }
799            }
800            AstNode::PrimaryExpr(n) => {
801                if n.children.is_empty() {
802                    self.arena.get_token(n.begin_token).token_type
803                } else {
804                    self.get_literal_token_type(n.children[0])
805                }
806            }
807            AstNode::Literal(n) => {
808                if n.children.is_empty() {
809                    self.arena.get_token(n.begin_token).token_type
810                } else {
811                    self.get_literal_token_type(n.children[0])
812                }
813            }
814            AstNode::StringLiteral(_) => TokenType::STRING_LITERAL,
815            _ => TokenType::INVALID,
816        }
817    }
818
819    /// Validate BETWEEN bounds: same type category and lower <= upper.
820    fn validate_between_bounds(&self, low_id: NodeId, high_id: NodeId) -> ParseResult<()> {
821        let low_image = self.get_literal_image(low_id);
822        let high_image = self.get_literal_image(high_id);
823        let low_type = self.get_literal_token_type(low_id);
824        let high_type = self.get_literal_token_type(high_id);
825
826        let low_is_numeric = matches!(low_type, TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL);
827        let high_is_numeric = matches!(high_type, TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL);
828        let low_is_string = low_type == TokenType::STRING_LITERAL;
829        let high_is_string = high_type == TokenType::STRING_LITERAL;
830
831        if low_is_numeric && high_is_string || low_is_string && high_is_numeric {
832            let low_kind = if low_is_string { "string" } else { "integer" };
833            let high_kind = if high_is_string { "string" } else { "integer" };
834            return Err(ParseError::new(format!(
835                "BETWEEN bounds must be the same type (both numeric or both string): found {} ('{}') and {} ('{}')",
836                low_kind, low_image, high_kind, high_image
837            )));
838        }
839
840        if low_is_numeric && high_is_numeric {
841            let low_val = Self::parse_numeric_literal(&low_image).map_err(|_| {
842                ParseError::new(format!("Invalid numeric literal in BETWEEN: '{}'", low_image))
843            })?;
844            let high_val = Self::parse_numeric_literal(&high_image).map_err(|_| {
845                ParseError::new(format!("Invalid numeric literal in BETWEEN: '{}'", high_image))
846            })?;
847            if low_val > high_val {
848                return Err(ParseError::new(format!(
849                    "BETWEEN lower bound ({}) must not exceed upper bound ({})",
850                    low_image, high_image
851                )));
852            }
853        } else if low_is_string && high_is_string {
854            // Strip quotes for comparison
855            let low_inner = &low_image[1..low_image.len() - 1];
856            let high_inner = &high_image[1..high_image.len() - 1];
857            if low_inner > high_inner {
858                return Err(ParseError::new(format!(
859                    "BETWEEN lower bound ({}) must not exceed upper bound ({})",
860                    low_image, high_image
861                )));
862            }
863        }
864
865        Ok(())
866    }
867
868    /// Parse a numeric literal image (decimal, hex, or octal) to f64.
869    fn parse_numeric_literal(image: &str) -> Result<f64, String> {
870        let image = image.strip_suffix('L').or_else(|| image.strip_suffix('l')).unwrap_or(image);
871        if let Some(hex) = image.strip_prefix("0x").or_else(|| image.strip_prefix("0X")) {
872            i64::from_str_radix(hex, 16)
873                .map(|i| i as f64)
874                .map_err(|e| e.to_string())
875        } else if image.starts_with('0') && image.len() > 1
876            && image[1..].chars().all(|c| ('0'..='7').contains(&c))
877        {
878            let oct = &image[1..];
879            i64::from_str_radix(oct, 8)
880                .map(|i| i as f64)
881                .map_err(|e| e.to_string())
882        } else {
883            image.parse::<f64>().map_err(|e| e.to_string())
884        }
885    }
886
887    /// Classify current token for IN list element type checking.
888    fn classify_current_token_for_in(&mut self) -> ParseResult<InElementType> {
889        // Handle sign prefix: peek past +/- to classify the numeric literal
890        if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
891            let next_type = self.lookahead_type(1);
892            return match next_type {
893                Some(TokenType::FLOATING_POINT_LITERAL) => Ok(InElementType::Float),
894                Some(TokenType::DECIMAL_LITERAL) => {
895                    let next_image = self.lookahead(1).map(|t| t.image.clone()).unwrap_or_default();
896                    if next_image.contains('.') {
897                        Ok(InElementType::Float)
898                    } else {
899                        Ok(InElementType::Integer)
900                    }
901                }
902                Some(TokenType::HEX_LITERAL) | Some(TokenType::OCTAL_LITERAL) => Ok(InElementType::Integer),
903                _ => Err(ParseError::at_position(
904                    format!(
905                        "Expected numeric literal after '{}', found {:?}",
906                        self.current_token.image,
907                        next_type
908                    ),
909                    self.current_token.begin_offset,
910                )),
911            };
912        }
913        match self.current_token.token_type {
914            TokenType::STRING_LITERAL => Ok(InElementType::StringLit),
915            TokenType::FLOATING_POINT_LITERAL => Ok(InElementType::Float),
916            TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL => Ok(InElementType::Integer),
917            TokenType::DECIMAL_LITERAL => {
918                if self.current_token.image.contains('.') {
919                    Ok(InElementType::Float)
920                } else {
921                    Ok(InElementType::Integer)
922                }
923            }
924            TokenType::TRUE | TokenType::FALSE => {
925                Err(ParseError::at_position(
926                    "Boolean is not allowed in IN list elements".to_string(),
927                    self.current_token.begin_offset,
928                ))
929            }
930            TokenType::NULL => {
931                Err(ParseError::at_position(
932                    "NULL is not allowed in IN list elements".to_string(),
933                    self.current_token.begin_offset,
934                ))
935            }
936            _ => {
937                Err(ParseError::at_position(
938                    format!(
939                        "IN list elements must be literal values (string, integer, or float), found {:?} '{}'",
940                        self.current_token.token_type, self.current_token.image
941                    ),
942                    self.current_token.begin_offset,
943                ))
944            }
945        }
946    }
947
948    /// Check that an IN element type is consistent with the first element's type.
949    fn check_in_type_consistency(&self, first: &InElementType, current: &InElementType) -> ParseResult<()> {
950        let compatible = match (first, current) {
951            (InElementType::StringLit, InElementType::StringLit) => true,
952            (InElementType::Integer, InElementType::Integer) => true,
953            (InElementType::Float, InElementType::Float) => true,
954            (InElementType::Integer, InElementType::Float)
955            | (InElementType::Float, InElementType::Integer) => false,
956            _ => false,
957        };
958        if !compatible {
959            let type_name = |t: &InElementType| match t {
960                InElementType::Integer => "integer",
961                InElementType::Float => "float",
962                InElementType::StringLit => "string",
963            };
964            Err(ParseError::at_position(
965                format!(
966                    "IN list elements must all be the same type: first element is {}, but found {} '{}'",
967                    type_name(first), type_name(current), self.current_token.image
968                ),
969                self.current_token.begin_offset,
970            ))
971        } else {
972            Ok(())
973        }
974    }
975
976    /// Parse an IN list element: STRING_LITERAL or numeric literal (optionally signed).
977    fn parse_in_element(&mut self) -> ParseResult<NodeId> {
978        // Handle sign prefix for negative/positive numbers in IN lists
979        if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
980            let begin_token = self.alloc_current_token();
981            let operator = if self.current_token.token_type == TokenType::MINUS {
982                UnaryOp::Negate
983            } else {
984                UnaryOp::Plus
985            };
986            self.consume_token()?;
987            let child = self.parse_primary_expr()?;
988            let end_token = self.current_token_id.unwrap_or(begin_token);
989            let mut node = UnaryExprNode::new(begin_token, end_token);
990            node.children.push(child);
991            node.operator = Some(operator);
992            let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
993            self.set_parent(child, node_id);
994            return Ok(node_id);
995        }
996        match self.current_token.token_type {
997            TokenType::STRING_LITERAL => self.parse_string_literal(),
998            TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
999            | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL => {
1000                self.parse_primary_expr()
1001            }
1002            _ => {
1003                Err(ParseError::at_position(
1004                    format!(
1005                        "Expected literal value in IN list, found {:?} '{}'",
1006                        self.current_token.token_type, self.current_token.image
1007                    ),
1008                    self.current_token.begin_offset,
1009                ))
1010            }
1011        }
1012    }
1013
1014    // ========== Boolean Root Validation ==========
1015
1016    /// Validate that the root expression is boolean-typed (not a standalone literal or arithmetic).
1017    fn validate_boolean_root(&self, node_id: NodeId) -> ParseResult<()> {
1018        if self.is_boolean_expression(node_id) {
1019            Ok(())
1020        } else {
1021            Err(ParseError::new(
1022                "Expression must be boolean (comparison, logical, or boolean literal)".to_string(),
1023            ))
1024        }
1025    }
1026
1027    /// Check whether a node represents a boolean expression.
1028    /// Walks through single-child pass-through nodes to find the "effective" expression.
1029    fn is_boolean_expression(&self, node_id: NodeId) -> bool {
1030        match self.arena.get_node(node_id) {
1031            AstNode::OrExpression(n) => {
1032                if n.children.len() > 1 {
1033                    return true; // OR with multiple children is boolean
1034                }
1035                if n.children.len() == 1 {
1036                    return self.is_boolean_expression(n.children[0]);
1037                }
1038                false
1039            }
1040            AstNode::AndExpression(n) => {
1041                if n.children.len() > 1 {
1042                    return true; // AND with multiple children is boolean
1043                }
1044                if n.children.len() == 1 {
1045                    return self.is_boolean_expression(n.children[0]);
1046                }
1047                false
1048            }
1049            AstNode::EqualityExpression(n) => {
1050                if !n.operators.is_empty() {
1051                    return true; // Has =, <>, IS NULL, IS NOT NULL
1052                }
1053                if n.children.len() == 1 {
1054                    return self.is_boolean_expression(n.children[0]);
1055                }
1056                false
1057            }
1058            AstNode::ComparisonExpression(n) => {
1059                if !n.operators.is_empty() {
1060                    return true; // Has >, >=, <, <=, LIKE, BETWEEN, IN, etc.
1061                }
1062                if n.children.len() == 1 {
1063                    return self.is_boolean_expression(n.children[0]);
1064                }
1065                false
1066            }
1067            AstNode::AddExpression(n) => {
1068                // Arithmetic — not boolean unless single-child pass-through
1069                if n.children.len() == 1 && n.operators.is_empty() {
1070                    return self.is_boolean_expression(n.children[0]);
1071                }
1072                false
1073            }
1074            AstNode::MultExpr(n) => {
1075                if n.children.len() == 1 && n.operators.is_empty() {
1076                    return self.is_boolean_expression(n.children[0]);
1077                }
1078                false
1079            }
1080            AstNode::UnaryExpr(n) => {
1081                if n.operator == Some(UnaryOp::Not) {
1082                    return true; // NOT is boolean
1083                }
1084                if n.children.len() == 1 && n.operator.is_none() {
1085                    return self.is_boolean_expression(n.children[0]);
1086                }
1087                false // Unary +/- is arithmetic
1088            }
1089            AstNode::PrimaryExpr(n) => {
1090                if n.children.len() == 1 {
1091                    return self.is_boolean_expression(n.children[0]);
1092                }
1093                false
1094            }
1095            AstNode::Variable(_) => true, // Variables are assumed boolean when standalone
1096            AstNode::Literal(n) => {
1097                if n.children.is_empty() {
1098                    let token = self.arena.get_token(n.begin_token);
1099                    matches!(token.token_type, TokenType::TRUE | TokenType::FALSE)
1100                } else {
1101                    false // StringLiteral child — not boolean
1102                }
1103            }
1104            _ => false,
1105        }
1106    }
1107
1108    // ========== Helper Methods ==========
1109
1110    /// Set the parent of a node
1111    fn set_parent(&mut self, child_id: NodeId, parent_id: NodeId) {
1112        match self.arena.get_node_mut(child_id) {
1113            AstNode::JmsSelector(node) => node.parent = Some(parent_id),
1114            AstNode::OrExpression(node) => node.parent = Some(parent_id),
1115            AstNode::AndExpression(node) => node.parent = Some(parent_id),
1116            AstNode::EqualityExpression(node) => node.parent = Some(parent_id),
1117            AstNode::ComparisonExpression(node) => node.parent = Some(parent_id),
1118            AstNode::AddExpression(node) => node.parent = Some(parent_id),
1119            AstNode::MultExpr(node) => node.parent = Some(parent_id),
1120            AstNode::UnaryExpr(node) => node.parent = Some(parent_id),
1121            AstNode::PrimaryExpr(node) => node.parent = Some(parent_id),
1122            AstNode::Literal(node) => node.parent = Some(parent_id),
1123            AstNode::StringLiteral(node) => node.parent = Some(parent_id),
1124            AstNode::Variable(node) => node.parent = Some(parent_id),
1125        }
1126    }
1127
1128    /// Check if the current token matches any of the given types
1129    #[allow(dead_code)]
1130    fn current_token_matches(&self, types: &[TokenType]) -> bool {
1131        types.contains(&self.current_token.token_type)
1132    }
1133
1134    /// Consume the current token and advance to the next one
1135    fn consume_token(&mut self) -> ParseResult<Token> {
1136        let old_token = self.current_token.clone();
1137        self.current_token = if !self.lookahead.is_empty() {
1138            self.lookahead.remove(0)
1139        } else {
1140            self.lexer.next_token()?
1141        };
1142        self.current_token_id = Some(self.arena.alloc_token(self.current_token.clone()));
1143        Ok(old_token)
1144    }
1145
1146    /// Expect a specific token type and consume it
1147    fn expect_token(&mut self, expected: TokenType) -> ParseResult<Token> {
1148        if self.current_token.token_type == expected {
1149            self.consume_token()
1150        } else {
1151            Err(ParseError::at_position(
1152                format!(
1153                    "Expected {:?}, found {:?} '{}'",
1154                    expected, self.current_token.token_type, self.current_token.image
1155                ),
1156                self.current_token.begin_offset
1157            ))
1158        }
1159    }
1160
1161    /// Allocate the current token to the arena and track its ID
1162    fn alloc_current_token(&mut self) -> TokenId {
1163        let token_id = self.arena.alloc_token(self.current_token.clone());
1164        self.current_token_id = Some(token_id);
1165        token_id
1166    }
1167
1168    /// Get lookahead token at position n (0 = current token)
1169    #[allow(dead_code)]
1170    fn lookahead(&mut self, n: usize) -> ParseResult<&Token> {
1171        if n == 0 {
1172            return Ok(&self.current_token);
1173        }
1174
1175        // Ensure we have enough lookahead tokens
1176        while self.lookahead.len() < n {
1177            let token = self.lexer.next_token()?;
1178            self.lookahead.push(token);
1179        }
1180
1181        Ok(&self.lookahead[n - 1])
1182    }
1183
1184    /// Peek at the type of the token at lookahead position n (0 = current) without error.
1185    /// Returns None if we can't read that far ahead.
1186    #[allow(dead_code)]
1187    fn lookahead_type(&mut self, n: usize) -> Option<TokenType> {
1188        if n == 0 {
1189            return Some(self.current_token.token_type);
1190        }
1191        self.lookahead(n).ok().map(|t| t.token_type)
1192    }
1193}