sqlexpr_rust/
parser.rs

1// Recursive Descent Parser for SQL Expression Grammar
2//
3// This module implements a parser that follows the EBNF grammar specification.
4// It uses recursive descent parsing with proper operator precedence.
5
6use crate::ast::*;
7use crate::lexer::{Lexer, Token};
8
9pub struct Parser {
10    tokens: Vec<Token>,
11    position: usize,
12    pretty_print: bool,
13    input: String,
14}
15
16#[derive(Debug)]
17pub struct ParseError {
18    pub message: String,
19}
20
21impl std::fmt::Display for ParseError {
22    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23        write!(f, "Parse error: {}", self.message)
24    }
25}
26
27impl std::error::Error for ParseError {}
28
29type ParseResult<T> = Result<T, ParseError>;
30
31impl Parser {
32    pub fn new(input: &str) -> Result<Self, ParseError> {
33        let mut lexer = Lexer::new(input);
34        let tokens = lexer.tokenize()
35            .map_err(|e| ParseError { message: e })?;
36
37        // Check SQLEXPR_PRETTY environment variable
38        let pretty_print = std::env::var("SQLEXPR_PRETTY")
39            .map(|v| v.to_lowercase() == "true")
40            .unwrap_or(false);
41
42        Ok(Parser {
43            tokens,
44            position: 0,
45            pretty_print,
46            input: input.to_string(),
47        })
48    }
49
50    /// Get current token
51    fn current_token(&self) -> &Token {
52        self.tokens.get(self.position).unwrap_or(&Token::Eof)
53    }
54
55    /// Peek at next token
56    fn peek_token(&self) -> &Token {
57        self.tokens.get(self.position + 1).unwrap_or(&Token::Eof)
58    }
59
60    /// Advance to next token
61    fn advance(&mut self) {
62        if self.position < self.tokens.len() {
63            self.position += 1;
64        }
65    }
66
67    /// Expect a specific token and advance
68    fn expect(&mut self, expected: Token) -> ParseResult<()> {
69        if self.current_token() == &expected {
70            self.advance();
71            Ok(())
72        } else {
73            Err(ParseError {
74                message: format!("Expected {}, got {} near position {} in:\n  {}", expected, self.current_token(), self.position, self.input),
75            })
76        }
77    }
78
79    /// Parse the entry point: BooleanExpression
80    pub fn parse(&mut self) -> ParseResult<BooleanExpr> {
81        let expr = self.parse_boolean_expression()?;
82        if self.current_token() != &Token::Eof {
83            return Err(ParseError {
84                message: format!("Unexpected token '{}' near position {} in:\n  {}", self.current_token(), self.position, self.input),
85            });
86        }
87
88        // Pretty print if enabled
89        if self.pretty_print {
90            self.print_ast(&expr);
91        }
92
93        Ok(expr)
94    }
95
96    /// Pretty print the AST with indentation
97    fn print_ast(&self, expr: &BooleanExpr) {
98        println!("Input: {}", self.input);
99        println!("AST:");
100        self.print_boolean_expr(expr, 0);
101        println!();
102    }
103
104    fn print_boolean_expr(&self, expr: &BooleanExpr, indent: usize) {
105        let prefix = " ".repeat(indent);
106        match expr {
107            BooleanExpr::Or(left, right) => {
108                println!("{}Or", prefix);
109                self.print_boolean_expr(left, indent + 3);
110                self.print_boolean_expr(right, indent + 3);
111            }
112            BooleanExpr::And(left, right) => {
113                println!("{}And", prefix);
114                self.print_boolean_expr(left, indent + 3);
115                self.print_boolean_expr(right, indent + 3);
116            }
117            BooleanExpr::Not(inner) => {
118                println!("{}Not", prefix);
119                self.print_boolean_expr(inner, indent + 3);
120            }
121            BooleanExpr::Literal(b) => {
122                println!("{}BooleanLiteral: {}", prefix, b);
123            }
124            BooleanExpr::Variable(name) => {
125                println!("{}Variable: {}", prefix, name);
126            }
127            BooleanExpr::Relational(rel) => {
128                println!("{}Relational", prefix);
129                self.print_relational_expr(rel, indent + 3);
130            }
131        }
132    }
133
134    fn print_relational_expr(&self, expr: &RelationalExpr, indent: usize) {
135        let prefix = " ".repeat(indent);
136        match expr {
137            RelationalExpr::Equality { left, op, right } => {
138                println!("{}Equality: {:?}", prefix, op);
139                self.print_value_expr(left, indent + 3);
140                self.print_value_expr(right, indent + 3);
141            }
142            RelationalExpr::Comparison { left, op, right } => {
143                println!("{}Comparison: {:?}", prefix, op);
144                self.print_value_expr(left, indent + 3);
145                self.print_value_expr(right, indent + 3);
146            }
147            RelationalExpr::Like { expr, pattern, escape, negated } => {
148                println!("{}Like: negated={}, pattern='{}', escape={:?}",
149                    prefix, negated, pattern, escape);
150                self.print_value_expr(expr, indent + 3);
151            }
152            RelationalExpr::Between { expr, lower, upper, negated } => {
153                println!("{}Between: negated={}", prefix, negated);
154                self.print_value_expr(expr, indent + 3);
155                self.print_value_expr(lower, indent + 3);
156                self.print_value_expr(upper, indent + 3);
157            }
158            RelationalExpr::In { expr, values, negated } => {
159                println!("{}In: negated={}, values={:?}", prefix, negated, values);
160                self.print_value_expr(expr, indent + 3);
161            }
162            RelationalExpr::IsNull { expr, negated } => {
163                println!("{}IsNull: negated={}", prefix, negated);
164                self.print_value_expr(expr, indent + 3);
165            }
166        }
167    }
168
169    #[allow(clippy::only_used_in_recursion)]
170    fn print_value_expr(&self, expr: &ValueExpr, indent: usize) {
171        let prefix = " ".repeat(indent);
172        match expr {
173            ValueExpr::Add(left, right) => {
174                println!("{}Add", prefix);
175                self.print_value_expr(left, indent + 3);
176                self.print_value_expr(right, indent + 3);
177            }
178            ValueExpr::Subtract(left, right) => {
179                println!("{}Subtract", prefix);
180                self.print_value_expr(left, indent + 3);
181                self.print_value_expr(right, indent + 3);
182            }
183            ValueExpr::Multiply(left, right) => {
184                println!("{}Multiply", prefix);
185                self.print_value_expr(left, indent + 3);
186                self.print_value_expr(right, indent + 3);
187            }
188            ValueExpr::Divide(left, right) => {
189                println!("{}Divide", prefix);
190                self.print_value_expr(left, indent + 3);
191                self.print_value_expr(right, indent + 3);
192            }
193            ValueExpr::Modulo(left, right) => {
194                println!("{}Modulo", prefix);
195                self.print_value_expr(left, indent + 3);
196                self.print_value_expr(right, indent + 3);
197            }
198            ValueExpr::UnaryPlus(inner) => {
199                println!("{}UnaryPlus", prefix);
200                self.print_value_expr(inner, indent + 3);
201            }
202            ValueExpr::UnaryMinus(inner) => {
203                println!("{}UnaryMinus", prefix);
204                self.print_value_expr(inner, indent + 3);
205            }
206            ValueExpr::Literal(lit) => {
207                println!("{}Literal: {:?}", prefix, lit);
208            }
209            ValueExpr::Variable(name) => {
210                println!("{}Variable: {}", prefix, name);
211            }
212        }
213    }
214
215    // ========================================================================
216    // TYPE CHECKING HELPER FUNCTIONS
217    // ========================================================================
218
219    /// Extract literal from ValueExpr, returning error if not a literal
220    /// Special case: UnaryMinus/UnaryPlus of a literal is allowed (for negative/positive numbers)
221    fn extract_literal(expr: &ValueExpr) -> ParseResult<ValueLiteral> {
222        match expr {
223            ValueExpr::Literal(lit) => Ok(lit.clone()),
224            // Handle unary minus for negative numbers
225            ValueExpr::UnaryMinus(inner) => {
226                if let ValueExpr::Literal(lit) = inner.as_ref() {
227                    match lit {
228                        ValueLiteral::Integer(n) => Ok(ValueLiteral::Integer(-n)),
229                        ValueLiteral::Float(f) => Ok(ValueLiteral::Float(-f)),
230                        _ => Err(ParseError {
231                            message: "Unary minus can only be applied to numeric literals in BETWEEN bounds".to_string(),
232                        }),
233                    }
234                } else {
235                    Err(ParseError {
236                        message: "Complex expressions are not allowed here, only literal values".to_string(),
237                    })
238                }
239            }
240            // Handle unary plus (just unwrap it)
241            ValueExpr::UnaryPlus(inner) => {
242                if let ValueExpr::Literal(lit) = inner.as_ref() {
243                    Ok(lit.clone())
244                } else {
245                    Err(ParseError {
246                        message: "Complex expressions are not allowed here, only literal values".to_string(),
247                    })
248                }
249            }
250            ValueExpr::Variable(_) => Err(ParseError {
251                message: "Variables are not allowed here, only literal values".to_string(),
252            }),
253            _ => Err(ParseError {
254                message: "Complex expressions are not allowed here, only literal values".to_string(),
255            }),
256        }
257    }
258
259    /// Get literal type name for error messages
260    fn literal_type_name(lit: &ValueLiteral) -> &'static str {
261        match lit {
262            ValueLiteral::Integer(_) => "integer",
263            ValueLiteral::Float(_) => "float",
264            ValueLiteral::String(_) => "string",
265            ValueLiteral::Null => "NULL",
266            ValueLiteral::Boolean(_) => "boolean",
267        }
268    }
269
270    /// Check if two literals are type-compatible for BETWEEN
271    /// Both must be numeric (Integer or Float) OR both must be String
272    fn are_between_compatible(lower: &ValueLiteral, upper: &ValueLiteral) -> bool {
273        match (lower, upper) {
274            // Both numeric
275            (ValueLiteral::Integer(_), ValueLiteral::Integer(_)) => true,
276            (ValueLiteral::Integer(_), ValueLiteral::Float(_)) => true,
277            (ValueLiteral::Float(_), ValueLiteral::Integer(_)) => true,
278            (ValueLiteral::Float(_), ValueLiteral::Float(_)) => true,
279            // Both string
280            (ValueLiteral::String(_), ValueLiteral::String(_)) => true,
281            // Everything else incompatible
282            _ => false,
283        }
284    }
285
286    /// Validate literal for IN list (reject Null and Boolean)
287    fn validate_in_literal(&self, lit: &ValueLiteral) -> ParseResult<()> {
288        match lit {
289            ValueLiteral::Null => Err(ParseError {
290                message: format!(
291                    "NULL is not allowed in IN list near position {} in:\n  {}",
292                    self.position, self.input
293                ),
294            }),
295            ValueLiteral::Boolean(_) => Err(ParseError {
296                message: format!(
297                    "Boolean literals are not allowed in IN list near position {} in:\n  {}",
298                    self.position, self.input
299                ),
300            }),
301            ValueLiteral::Integer(_) | ValueLiteral::Float(_) | ValueLiteral::String(_) => Ok(()),
302        }
303    }
304
305    /// Check if two literals are exactly the same type (for IN list)
306    /// No mixing of Integer and Float allowed
307    fn are_exact_same_type(a: &ValueLiteral, b: &ValueLiteral) -> bool {
308        matches!((a, b), 
309            (ValueLiteral::Integer(_), ValueLiteral::Integer(_)) |
310            (ValueLiteral::Float(_), ValueLiteral::Float(_))     |
311            (ValueLiteral::String(_), ValueLiteral::String(_)))
312    }
313
314    /// Validate BETWEEN bounds: lower must be <= upper
315    fn validate_between_bounds(lower: &ValueLiteral, upper: &ValueLiteral, input: &str, position: usize) -> ParseResult<()> {
316        match (lower, upper) {
317            // Integer comparison
318            (ValueLiteral::Integer(l), ValueLiteral::Integer(u)) => {
319                if l > u {
320                    return Err(ParseError {
321                        message: format!(
322                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
323                            l, u, position, input
324                        ),
325                    });
326                }
327            }
328            // Float comparison
329            (ValueLiteral::Float(l), ValueLiteral::Float(u)) => {
330                if l > u {
331                    return Err(ParseError {
332                        message: format!(
333                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
334                            l, u, position, input
335                        ),
336                    });
337                }
338            }
339            // Mixed numeric: Integer and Float
340            (ValueLiteral::Integer(l), ValueLiteral::Float(u)) => {
341                if (*l as f64) > *u {
342                    return Err(ParseError {
343                        message: format!(
344                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
345                            l, u, position, input
346                        ),
347                    });
348                }
349            }
350            (ValueLiteral::Float(l), ValueLiteral::Integer(u)) => {
351                if *l > (*u as f64) {
352                    return Err(ParseError {
353                        message: format!(
354                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
355                            l, u, position, input
356                        ),
357                    });
358                }
359            }
360            // String comparison
361            (ValueLiteral::String(l), ValueLiteral::String(u)) => {
362                if l > u {
363                    return Err(ParseError {
364                        message: format!(
365                            "BETWEEN lower bound ('{}') must be less than or equal to upper bound ('{}') near position {} in:\n  {}",
366                            l, u, position, input
367                        ),
368                    });
369                }
370            }
371            // Other combinations should have been caught by type compatibility check
372            _ => {}
373        }
374        Ok(())
375    }
376
377    // ========================================================================
378    // BOOLEAN EXPRESSION PARSING
379    // ========================================================================
380
381    /// BooleanExpression = BooleanOrExpression
382    fn parse_boolean_expression(&mut self) -> ParseResult<BooleanExpr> {
383        self.parse_boolean_or_expression()
384    }
385
386    /// BooleanOrExpression = BooleanAndExpression { "OR" BooleanAndExpression }
387    fn parse_boolean_or_expression(&mut self) -> ParseResult<BooleanExpr> {
388        let mut left = self.parse_boolean_and_expression()?;
389
390        while self.current_token() == &Token::Or {
391            self.advance();
392            let right = self.parse_boolean_and_expression()?;
393            left = BooleanExpr::Or(Box::new(left), Box::new(right));
394        }
395
396        Ok(left)
397    }
398
399    /// BooleanAndExpression = BooleanTerm { "AND" BooleanTerm }
400    fn parse_boolean_and_expression(&mut self) -> ParseResult<BooleanExpr> {
401        let mut left = self.parse_boolean_term()?;
402
403        while self.current_token() == &Token::And {
404            self.advance();
405            let right = self.parse_boolean_term()?;
406            left = BooleanExpr::And(Box::new(left), Box::new(right));
407        }
408
409        Ok(left)
410    }
411
412    /// BooleanTerm = "NOT" BooleanTerm
413    ///             | "(" BooleanExpression ")"
414    ///             | BooleanLiteral
415    ///             | Variable
416    ///             | RelationalExpression
417    fn parse_boolean_term(&mut self) -> ParseResult<BooleanExpr> {
418        match self.current_token() {
419            Token::Not => {
420                self.advance();
421                let expr = self.parse_boolean_term()?;
422                Ok(BooleanExpr::Not(Box::new(expr)))
423            }
424            Token::LeftParen => {
425                // Need to distinguish between:
426                // 1. (boolean_expr) like (x > 5) or (x > 5 AND y < 10)
427                // 2. (value_expr) OP value like (x + y) > 10
428                //
429                // Strategy: Look ahead past the '(' to see what's inside
430                // If we see patterns like "x >" or "NOT" or "TRUE/FALSE" followed by operators,
431                // it's likely a boolean expression
432                self.advance(); // consume '('
433
434                // Special case: check if this is a parenthesized boolean expression
435                // by looking for boolean operators or seeing if it's a complete relational expr
436                let saved_pos = self.position;
437
438                // Try parsing as a boolean expression first
439                match self.parse_boolean_expression() {
440                    Ok(expr) => {
441                        if self.current_token() == &Token::RightParen {
442                            self.advance(); // consume ')'
443                            Ok(expr)
444                        } else {
445                            // Failed to find closing paren, might be (value_expr) OP ...
446                            // Backtrack and try as relational
447                            self.position = saved_pos - 1; // go back before '('
448                            let rel = self.parse_relational_expression()?;
449                            Ok(BooleanExpr::Relational(rel))
450                        }
451                    }
452                    Err(_) => {
453                        // Failed to parse as boolean, try as relational
454                        self.position = saved_pos - 1; // go back before '('
455                        let rel = self.parse_relational_expression()?;
456                        Ok(BooleanExpr::Relational(rel))
457                    }
458                }
459            }
460            Token::True => {
461                self.advance();
462                Ok(BooleanExpr::Literal(true))
463            }
464            Token::False => {
465                self.advance();
466                Ok(BooleanExpr::Literal(false))
467            }
468            Token::Identifier(_) => {
469                // Could be a variable or start of relational expression
470                // We need to look ahead to determine which
471                if self.is_relational_operator_ahead() || self.is_arithmetic_operator_ahead() {
472                    let rel = self.parse_relational_expression()?;
473                    Ok(BooleanExpr::Relational(rel))
474                } else {
475                    // It's a variable (boolean at runtime)
476                    if let Token::Identifier(name) = self.current_token() {
477                        let name = name.clone();
478                        self.advance();
479                        Ok(BooleanExpr::Variable(name))
480                    } else {
481                        unreachable!()
482                    }
483                }
484            }
485            _ => {
486                // Default case: try to parse as relational expression
487                // This includes literals, etc.
488                let rel = self.parse_relational_expression()?;
489                Ok(BooleanExpr::Relational(rel))
490            }
491        }
492    }
493
494    /// Check if a relational operator follows
495    fn is_relational_operator_ahead(&self) -> bool {
496        // Look ahead to see if there's a relational operator
497        let next = self.peek_token();
498        matches!(next,
499            Token::Equal | Token::NotEqual |
500            Token::GreaterThan | Token::GreaterOrEqual |
501            Token::LessThan | Token::LessOrEqual |
502            Token::Like | Token::Between | Token::In | Token::Is |
503            Token::Not  // For NOT LIKE, NOT BETWEEN, NOT IN
504        )
505    }
506
507    /// Check if an arithmetic operator follows
508    fn is_arithmetic_operator_ahead(&self) -> bool {
509        // Look ahead to see if there's an arithmetic operator
510        let next = self.peek_token();
511        matches!(next,
512            Token::Plus | Token::Minus | Token::Star | Token::Slash | Token::Percent
513        )
514    }
515
516    // ========================================================================
517    // RELATIONAL EXPRESSION PARSING
518    // ========================================================================
519
520    /// RelationalExpression = EqualityExpression
521    ///                      | ComparisonExpression
522    ///                      | IsNullExpression
523    fn parse_relational_expression(&mut self) -> ParseResult<RelationalExpr> {
524        let left = self.parse_value_expression()?;
525
526        match self.current_token() {
527            Token::Equal => {
528                self.advance();
529                let right = self.parse_value_expression()?;
530                Ok(RelationalExpr::Equality {
531                    left,
532                    op: EqualityOp::Equal,
533                    right,
534                })
535            }
536            Token::NotEqual => {
537                self.advance();
538                let right = self.parse_value_expression()?;
539                Ok(RelationalExpr::Equality {
540                    left,
541                    op: EqualityOp::NotEqual,
542                    right,
543                })
544            }
545            Token::GreaterThan => {
546                self.advance();
547                let right = self.parse_value_expression()?;
548                Ok(RelationalExpr::Comparison {
549                    left,
550                    op: ComparisonOp::GreaterThan,
551                    right,
552                })
553            }
554            Token::GreaterOrEqual => {
555                self.advance();
556                let right = self.parse_value_expression()?;
557                Ok(RelationalExpr::Comparison {
558                    left,
559                    op: ComparisonOp::GreaterOrEqual,
560                    right,
561                })
562            }
563            Token::LessThan => {
564                self.advance();
565                let right = self.parse_value_expression()?;
566                Ok(RelationalExpr::Comparison {
567                    left,
568                    op: ComparisonOp::LessThan,
569                    right,
570                })
571            }
572            Token::LessOrEqual => {
573                self.advance();
574                let right = self.parse_value_expression()?;
575                Ok(RelationalExpr::Comparison {
576                    left,
577                    op: ComparisonOp::LessOrEqual,
578                    right,
579                })
580            }
581            Token::Like => {
582                self.advance();
583                let pattern = self.expect_string_literal()?;
584                let escape = if self.current_token() == &Token::Escape {
585                    self.advance();
586                    Some(self.expect_string_literal()?)
587                } else {
588                    None
589                };
590                Ok(RelationalExpr::Like {
591                    expr: left,
592                    pattern,
593                    escape,
594                    negated: false,
595                })
596            }
597            Token::Not => {
598                self.advance();
599                match self.current_token() {
600                    Token::Like => {
601                        self.advance();
602                        let pattern = self.expect_string_literal()?;
603                        let escape = if self.current_token() == &Token::Escape {
604                            self.advance();
605                            Some(self.expect_string_literal()?)
606                        } else {
607                            None
608                        };
609                        Ok(RelationalExpr::Like {
610                            expr: left,
611                            pattern,
612                            escape,
613                            negated: true,
614                        })
615                    }
616                    Token::Between => {
617                        self.advance();
618                        let lower_expr = self.parse_value_expression()?;
619                        self.expect(Token::And)?;
620                        let upper_expr = self.parse_value_expression()?;
621
622                        // Extract literals from expressions
623                        let lower_lit = Self::extract_literal(&lower_expr)?;
624                        let upper_lit = Self::extract_literal(&upper_expr)?;
625
626                        // Reject NULL
627                        if matches!(lower_lit, ValueLiteral::Null) {
628                            return Err(ParseError {
629                                message: format!(
630                                    "NULL is not allowed as lower bound in NOT BETWEEN near position {} in:\n  {}",
631                                    self.position, self.input
632                                ),
633                            });
634                        }
635                        if matches!(upper_lit, ValueLiteral::Null) {
636                            return Err(ParseError {
637                                message: format!(
638                                    "NULL is not allowed as upper bound in NOT BETWEEN near position {} in:\n  {}",
639                                    self.position, self.input
640                                ),
641                            });
642                        }
643
644                        // Reject Boolean
645                        if matches!(lower_lit, ValueLiteral::Boolean(_)) {
646                            return Err(ParseError {
647                                message: format!(
648                                    "Boolean literals are not allowed as lower bound in NOT BETWEEN near position {} in:\n  {}",
649                                    self.position, self.input
650                                ),
651                            });
652                        }
653                        if matches!(upper_lit, ValueLiteral::Boolean(_)) {
654                            return Err(ParseError {
655                                message: format!(
656                                    "Boolean literals are not allowed as upper bound in NOT BETWEEN near position {} in:\n  {}",
657                                    self.position, self.input
658                                ),
659                            });
660                        }
661
662                        // Check type compatibility
663                        if !Self::are_between_compatible(&lower_lit, &upper_lit) {
664                            return Err(ParseError {
665                                message: format!(
666                                    "NOT BETWEEN bounds must be both numeric or both string, found {} and {} near position {} in:\n  {}",
667                                    Self::literal_type_name(&lower_lit),
668                                    Self::literal_type_name(&upper_lit),
669                                    self.position,
670                                    self.input
671                                ),
672                            });
673                        }
674
675                        // Validate bounds order: lower <= upper
676                        Self::validate_between_bounds(&lower_lit, &upper_lit, &self.input, self.position)?;
677
678                        Ok(RelationalExpr::Between {
679                            expr: left,
680                            lower: lower_expr,
681                            upper: upper_expr,
682                            negated: true,
683                        })
684                    }
685                    Token::In => {
686                        self.advance();
687                        let values = self.parse_string_list()?;
688                        Ok(RelationalExpr::In {
689                            expr: left,
690                            values,
691                            negated: true,
692                        })
693                    }
694                    _ => Err(ParseError {
695                        message: format!("Expected LIKE, BETWEEN, or IN after NOT, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
696                    }),
697                }
698            }
699            Token::Between => {
700                self.advance();
701                let lower_expr = self.parse_value_expression()?;
702                self.expect(Token::And)?;
703                let upper_expr = self.parse_value_expression()?;
704
705                // Extract literals from expressions
706                let lower_lit = Self::extract_literal(&lower_expr)?;
707                let upper_lit = Self::extract_literal(&upper_expr)?;
708
709                // Reject NULL
710                if matches!(lower_lit, ValueLiteral::Null) {
711                    return Err(ParseError {
712                        message: format!(
713                            "NULL is not allowed as lower bound in BETWEEN near position {} in:\n  {}",
714                            self.position, self.input
715                        ),
716                    });
717                }
718                if matches!(upper_lit, ValueLiteral::Null) {
719                    return Err(ParseError {
720                        message: format!(
721                            "NULL is not allowed as upper bound in BETWEEN near position {} in:\n  {}",
722                            self.position, self.input
723                        ),
724                    });
725                }
726
727                // Reject Boolean
728                if matches!(lower_lit, ValueLiteral::Boolean(_)) {
729                    return Err(ParseError {
730                        message: format!(
731                            "Boolean literals are not allowed as lower bound in BETWEEN near position {} in:\n  {}",
732                            self.position, self.input
733                        ),
734                    });
735                }
736                if matches!(upper_lit, ValueLiteral::Boolean(_)) {
737                    return Err(ParseError {
738                        message: format!(
739                            "Boolean literals are not allowed as upper bound in BETWEEN near position {} in:\n  {}",
740                            self.position, self.input
741                        ),
742                    });
743                }
744
745                // Check type compatibility
746                if !Self::are_between_compatible(&lower_lit, &upper_lit) {
747                    return Err(ParseError {
748                        message: format!(
749                            "BETWEEN bounds must be both numeric or both string, found {} and {} near position {} in:\n  {}",
750                            Self::literal_type_name(&lower_lit),
751                            Self::literal_type_name(&upper_lit),
752                            self.position,
753                            self.input
754                        ),
755                    });
756                }
757
758                // Validate bounds order: lower <= upper
759                Self::validate_between_bounds(&lower_lit, &upper_lit, &self.input, self.position)?;
760
761                Ok(RelationalExpr::Between {
762                    expr: left,
763                    lower: lower_expr,
764                    upper: upper_expr,
765                    negated: false,
766                })
767            }
768            Token::In => {
769                self.advance();
770                let values = self.parse_string_list()?;
771                Ok(RelationalExpr::In {
772                    expr: left,
773                    values,
774                    negated: false,
775                })
776            }
777            Token::Is => {
778                self.advance();
779                let negated = if self.current_token() == &Token::Not {
780                    self.advance();
781                    true
782                } else {
783                    false
784                };
785                self.expect(Token::Null)?;
786                Ok(RelationalExpr::IsNull {
787                    expr: left,
788                    negated,
789                })
790            }
791            _ => Err(ParseError {
792                message: format!("Expected relational operator, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
793            }),
794        }
795    }
796
797    /// Expect a string literal token
798    fn expect_string_literal(&mut self) -> ParseResult<String> {
799        match self.current_token() {
800            Token::StringLiteral(s) => {
801                let s = s.clone();
802                self.advance();
803                Ok(s)
804            }
805            _ => Err(ParseError {
806                message: format!("Expected string literal, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
807            }),
808        }
809    }
810
811    /// Parse value literal list for IN operator with strict type checking
812    /// All values must be the same exact type (Integer, Float, or String)
813    /// NULL and Boolean are rejected
814    fn parse_string_list(&mut self) -> ParseResult<Vec<ValueLiteral>> {
815        self.expect(Token::LeftParen)?;
816
817        let first = self.expect_value_literal()?;
818
819        // Validate first literal (reject NULL and Boolean)
820        self.validate_in_literal(&first)?;
821
822        let mut values = vec![first.clone()];
823
824        while self.current_token() == &Token::Comma {
825            self.advance();
826            let next = self.expect_value_literal()?;
827
828            // Validate this literal (reject NULL and Boolean)
829            self.validate_in_literal(&next)?;
830
831            // Check type consistency with first value
832            if !Self::are_exact_same_type(&first, &next) {
833                return Err(ParseError {
834                    message: format!(
835                        "IN list values must all be the same type, found {} and {} near position {} in:\n  {}",
836                        Self::literal_type_name(&first),
837                        Self::literal_type_name(&next),
838                        self.position,
839                        self.input
840                    ),
841                });
842            }
843
844            values.push(next);
845        }
846
847        self.expect(Token::RightParen)?;
848        Ok(values)
849    }
850
851    /// Expect a value literal token (string, integer, float, etc.)
852    /// Also handles unary minus for negative numbers
853    fn expect_value_literal(&mut self) -> ParseResult<ValueLiteral> {
854        // Handle unary minus for negative numbers
855        let is_negative = if self.current_token() == &Token::Minus {
856            self.advance();
857            true
858        } else {
859            false
860        };
861
862        match self.current_token().clone() {
863            Token::StringLiteral(s) => {
864                if is_negative {
865                    return Err(ParseError {
866                        message: format!("Cannot apply unary minus to string literal near position {} in:\n  {}", self.position, self.input),
867                    });
868                }
869                self.advance();
870                Ok(ValueLiteral::String(s))
871            }
872            Token::IntegerLiteral(n) => {
873                self.advance();
874                Ok(ValueLiteral::Integer(if is_negative { -n } else { n }))
875            }
876            Token::FloatLiteral(f) => {
877                self.advance();
878                Ok(ValueLiteral::Float(if is_negative { -f } else { f }))
879            }
880            Token::Null => {
881                if is_negative {
882                    return Err(ParseError {
883                        message: format!("Cannot apply unary minus to NULL near position {} in:\n  {}", self.position, self.input),
884                    });
885                }
886                self.advance();
887                Ok(ValueLiteral::Null)
888            }
889            Token::True => {
890                if is_negative {
891                    return Err(ParseError {
892                        message: format!("Cannot apply unary minus to boolean near position {} in:\n  {}", self.position, self.input),
893                    });
894                }
895                self.advance();
896                Ok(ValueLiteral::Boolean(true))
897            }
898            Token::False => {
899                if is_negative {
900                    return Err(ParseError {
901                        message: format!("Cannot apply unary minus to boolean near position {} in:\n  {}", self.position, self.input),
902                    });
903                }
904                self.advance();
905                Ok(ValueLiteral::Boolean(false))
906            }
907            _ => Err(ParseError {
908                message: format!("Expected literal value, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
909            }),
910        }
911    }
912
913    // ========================================================================
914    // VALUE EXPRESSION PARSING
915    // ========================================================================
916
917    /// ValueExpression = AddExpression
918    fn parse_value_expression(&mut self) -> ParseResult<ValueExpr> {
919        self.parse_add_expression()
920    }
921
922    /// AddExpression = MultExpression { ( "+" | "-" ) MultExpression }
923    fn parse_add_expression(&mut self) -> ParseResult<ValueExpr> {
924        let mut left = self.parse_mult_expression()?;
925
926        loop {
927            match self.current_token() {
928                Token::Plus => {
929                    self.advance();
930                    let right = self.parse_mult_expression()?;
931                    left = ValueExpr::Add(Box::new(left), Box::new(right));
932                }
933                Token::Minus => {
934                    self.advance();
935                    let right = self.parse_mult_expression()?;
936                    left = ValueExpr::Subtract(Box::new(left), Box::new(right));
937                }
938                _ => break,
939            }
940        }
941
942        Ok(left)
943    }
944
945    /// MultExpression = UnaryValueExpression { ( "*" | "/" | "%" ) UnaryValueExpression }
946    fn parse_mult_expression(&mut self) -> ParseResult<ValueExpr> {
947        let mut left = self.parse_unary_value_expression()?;
948
949        loop {
950            match self.current_token() {
951                Token::Star => {
952                    self.advance();
953                    let right = self.parse_unary_value_expression()?;
954                    left = ValueExpr::Multiply(Box::new(left), Box::new(right));
955                }
956                Token::Slash => {
957                    self.advance();
958                    let right = self.parse_unary_value_expression()?;
959                    left = ValueExpr::Divide(Box::new(left), Box::new(right));
960                }
961                Token::Percent => {
962                    self.advance();
963                    let right = self.parse_unary_value_expression()?;
964                    left = ValueExpr::Modulo(Box::new(left), Box::new(right));
965                }
966                _ => break,
967            }
968        }
969
970        Ok(left)
971    }
972
973    /// UnaryValueExpression = "+" UnaryValueExpression
974    ///                      | "-" UnaryValueExpression
975    ///                      | ValuePrimary
976    fn parse_unary_value_expression(&mut self) -> ParseResult<ValueExpr> {
977        match self.current_token() {
978            Token::Plus => {
979                self.advance();
980                let expr = self.parse_unary_value_expression()?;
981                Ok(ValueExpr::UnaryPlus(Box::new(expr)))
982            }
983            Token::Minus => {
984                self.advance();
985                let expr = self.parse_unary_value_expression()?;
986                Ok(ValueExpr::UnaryMinus(Box::new(expr)))
987            }
988            _ => self.parse_value_primary(),
989        }
990    }
991
992    /// ValuePrimary = ValueLiteral
993    ///              | Variable
994    ///              | "(" ValueExpression ")"
995    fn parse_value_primary(&mut self) -> ParseResult<ValueExpr> {
996        match self.current_token().clone() {
997            Token::IntegerLiteral(n) => {
998                self.advance();
999                Ok(ValueExpr::Literal(ValueLiteral::Integer(n)))
1000            }
1001            Token::FloatLiteral(n) => {
1002                self.advance();
1003                Ok(ValueExpr::Literal(ValueLiteral::Float(n)))
1004            }
1005            Token::StringLiteral(s) => {
1006                self.advance();
1007                Ok(ValueExpr::Literal(ValueLiteral::String(s)))
1008            }
1009            Token::Null => {
1010                self.advance();
1011                Ok(ValueExpr::Literal(ValueLiteral::Null))
1012            }
1013            Token::True => {
1014                self.advance();
1015                Ok(ValueExpr::Literal(ValueLiteral::Boolean(true)))
1016            }
1017            Token::False => {
1018                self.advance();
1019                Ok(ValueExpr::Literal(ValueLiteral::Boolean(false)))
1020            }
1021            Token::Identifier(name) => {
1022                self.advance();
1023                Ok(ValueExpr::Variable(name))
1024            }
1025            Token::LeftParen => {
1026                self.advance();
1027                let expr = self.parse_value_expression()?;
1028                self.expect(Token::RightParen)?;
1029                Ok(expr)
1030            }
1031            _ => Err(ParseError {
1032                message: format!("Expected value expression, got {}", self.current_token()),
1033            }),
1034        }
1035    }
1036}
1037
1038/// Public API function to parse a SQL expression string
1039pub fn parse(input: &str) -> Result<BooleanExpr, ParseError> {
1040    let mut parser = Parser::new(input)?;
1041    parser.parse()
1042}
1043
1044#[cfg(test)]
1045mod tests {
1046    use super::*;
1047
1048    #[test]
1049    fn test_simple_comparison() {
1050        let result = parse("x > 5");
1051        assert!(result.is_ok());
1052    }
1053
1054    #[test]
1055    fn test_boolean_and() {
1056        let result = parse("x > 5 AND y < 10");
1057        assert!(result.is_ok());
1058    }
1059
1060    #[test]
1061    fn test_like_operator() {
1062        let result = parse("name LIKE '%test%'");
1063        assert!(result.is_ok());
1064    }
1065
1066    #[test]
1067    fn test_between() {
1068        let result = parse("age BETWEEN 18 AND 65");
1069        assert!(result.is_ok());
1070    }
1071
1072    #[test]
1073    fn test_in_operator() {
1074        let result = parse("status IN ('active', 'pending')");
1075        assert!(result.is_ok());
1076    }
1077
1078    #[test]
1079    fn test_is_null() {
1080        let result = parse("value IS NULL");
1081        assert!(result.is_ok());
1082    }
1083
1084    #[test]
1085    fn test_arithmetic_in_comparison() {
1086        let result = parse("(a + b) > (c - d)");
1087        assert!(result.is_ok());
1088    }
1089}