Skip to main content

sqlexpr_rust/
parser.rs

1//! Recursive Descent Parser for SQL Expression Grammar
2//!
3//! This module implements a parser that follows the EBNF grammar specification.
4//! It uses recursive descent parsing with proper operator precedence to build
5//! an Abstract Syntax Tree (AST) from the input SQL expression string.  The
6//! input string is first tokenized by the lexer module and then parsed into
7//! the AST defined in the ast module.  The parser ensures that all top-level
8//! expressions evaluate to boolean values, while arithmetic and value expressions
9//! can only appear as operands to relational operators. 
10//! 
11//! The parser also performs as much type checking at parse time as possible. Additional 
12//! runtime type checking is necessary during evaluation after variable values are known.  
13//! 
14//! The parser also supports pretty-printing of the AST when the
15//! SQLEXPR_PRETTY environment variable is set to "true".
16
17use crate::ast::*;
18use crate::lexer::{Lexer, Token};
19
20/// Parser struct used to track parsing state and options.
21pub struct Parser {
22    tokens: Vec<Token>,
23    position: usize,
24    pretty_print: bool,
25    input: String,
26}
27
28/// Parse error type that defines specific error messages.
29#[derive(Debug)]
30pub struct ParseError {
31    pub message: String,
32}
33
34impl std::fmt::Display for ParseError {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        write!(f, "Parse error: {}", self.message)
37    }
38}
39
40impl std::error::Error for ParseError {}
41
42type ParseResult<T> = Result<T, ParseError>;
43
44impl Parser {
45    pub fn new(input: &str) -> Result<Self, ParseError> {
46        let mut lexer = Lexer::new(input);
47        let tokens = lexer.tokenize()
48            .map_err(|e| ParseError { message: e })?;
49
50        // Check SQLEXPR_PRETTY environment variable
51        let pretty_print = std::env::var("SQLEXPR_PRETTY")
52            .map(|v| v.to_lowercase() == "true")
53            .unwrap_or(false);
54
55        Ok(Parser {
56            tokens,
57            position: 0,
58            pretty_print,
59            input: input.to_string(),
60        })
61    }
62
63    /// Get current token
64    fn current_token(&self) -> &Token {
65        self.tokens.get(self.position).unwrap_or(&Token::Eof)
66    }
67
68    /// Peek at next token
69    fn peek_token(&self) -> &Token {
70        self.tokens.get(self.position + 1).unwrap_or(&Token::Eof)
71    }
72
73    /// Advance to next token
74    fn advance(&mut self) {
75        if self.position < self.tokens.len() {
76            self.position += 1;
77        }
78    }
79
80    /// Expect a specific token and advance
81    fn expect(&mut self, expected: Token) -> ParseResult<()> {
82        if self.current_token() == &expected {
83            self.advance();
84            Ok(())
85        } else {
86            Err(ParseError {
87                message: format!("Expected {}, got {} near position {} in:\n  {}", expected, self.current_token(), self.position, self.input),
88            })
89        }
90    }
91
92    /// Parse the entry point: BooleanExpression
93    pub fn parse(&mut self) -> ParseResult<BooleanExpr> {
94        let expr = self.parse_boolean_expression()?;
95        if self.current_token() != &Token::Eof {
96            return Err(ParseError {
97                message: format!("Unexpected token '{}' near position {} in:\n  {}", self.current_token(), self.position, self.input),
98            });
99        }
100
101        // Pretty print if enabled
102        if self.pretty_print {
103            self.print_ast(&expr);
104        }
105
106        Ok(expr)
107    }
108
109    /// Pretty print the AST with indentation
110    fn print_ast(&self, expr: &BooleanExpr) {
111        println!("Input: {}", self.input);
112        println!("AST:");
113        self.print_boolean_expr(expr, 0);
114        println!();
115    }
116
117    fn print_boolean_expr(&self, expr: &BooleanExpr, indent: usize) {
118        let prefix = " ".repeat(indent);
119        match expr {
120            BooleanExpr::Or(left, right) => {
121                println!("{}Or", prefix);
122                self.print_boolean_expr(left, indent + 3);
123                self.print_boolean_expr(right, indent + 3);
124            }
125            BooleanExpr::And(left, right) => {
126                println!("{}And", prefix);
127                self.print_boolean_expr(left, indent + 3);
128                self.print_boolean_expr(right, indent + 3);
129            }
130            BooleanExpr::Not(inner) => {
131                println!("{}Not", prefix);
132                self.print_boolean_expr(inner, indent + 3);
133            }
134            BooleanExpr::Literal(b) => {
135                println!("{}BooleanLiteral: {}", prefix, b);
136            }
137            BooleanExpr::Variable(name) => {
138                println!("{}Variable: {}", prefix, name);
139            }
140            BooleanExpr::Relational(rel) => {
141                println!("{}Relational", prefix);
142                self.print_relational_expr(rel, indent + 3);
143            }
144        }
145    }
146
147    fn print_relational_expr(&self, expr: &RelationalExpr, indent: usize) {
148        let prefix = " ".repeat(indent);
149        match expr {
150            RelationalExpr::Equality { left, op, right } => {
151                println!("{}Equality: {:?}", prefix, op);
152                self.print_value_expr(left, indent + 3);
153                self.print_value_expr(right, indent + 3);
154            }
155            RelationalExpr::Comparison { left, op, right } => {
156                println!("{}Comparison: {:?}", prefix, op);
157                self.print_value_expr(left, indent + 3);
158                self.print_value_expr(right, indent + 3);
159            }
160            RelationalExpr::Like { expr, pattern, escape, negated } => {
161                println!("{}Like: negated={}, pattern='{}', escape={:?}",
162                    prefix, negated, pattern, escape);
163                self.print_value_expr(expr, indent + 3);
164            }
165            RelationalExpr::Between { expr, lower, upper, negated } => {
166                println!("{}Between: negated={}", prefix, negated);
167                self.print_value_expr(expr, indent + 3);
168                self.print_value_expr(lower, indent + 3);
169                self.print_value_expr(upper, indent + 3);
170            }
171            RelationalExpr::In { expr, values, negated } => {
172                println!("{}In: negated={}, values={:?}", prefix, negated, values);
173                self.print_value_expr(expr, indent + 3);
174            }
175            RelationalExpr::IsNull { expr, negated } => {
176                println!("{}IsNull: negated={}", prefix, negated);
177                self.print_value_expr(expr, indent + 3);
178            }
179        }
180    }
181
182    #[allow(clippy::only_used_in_recursion)]
183    fn print_value_expr(&self, expr: &ValueExpr, indent: usize) {
184        let prefix = " ".repeat(indent);
185        match expr {
186            ValueExpr::Add(left, right) => {
187                println!("{}Add", prefix);
188                self.print_value_expr(left, indent + 3);
189                self.print_value_expr(right, indent + 3);
190            }
191            ValueExpr::Subtract(left, right) => {
192                println!("{}Subtract", prefix);
193                self.print_value_expr(left, indent + 3);
194                self.print_value_expr(right, indent + 3);
195            }
196            ValueExpr::Multiply(left, right) => {
197                println!("{}Multiply", prefix);
198                self.print_value_expr(left, indent + 3);
199                self.print_value_expr(right, indent + 3);
200            }
201            ValueExpr::Divide(left, right) => {
202                println!("{}Divide", prefix);
203                self.print_value_expr(left, indent + 3);
204                self.print_value_expr(right, indent + 3);
205            }
206            ValueExpr::Modulo(left, right) => {
207                println!("{}Modulo", prefix);
208                self.print_value_expr(left, indent + 3);
209                self.print_value_expr(right, indent + 3);
210            }
211            ValueExpr::UnaryPlus(inner) => {
212                println!("{}UnaryPlus", prefix);
213                self.print_value_expr(inner, indent + 3);
214            }
215            ValueExpr::UnaryMinus(inner) => {
216                println!("{}UnaryMinus", prefix);
217                self.print_value_expr(inner, indent + 3);
218            }
219            ValueExpr::Literal(lit) => {
220                println!("{}Literal: {:?}", prefix, lit);
221            }
222            ValueExpr::Variable(name) => {
223                println!("{}Variable: {}", prefix, name);
224            }
225        }
226    }
227
228    // ========================================================================
229    // TYPE CHECKING HELPER FUNCTIONS
230    // ========================================================================
231
232    /// Extract literal from ValueExpr, returning error if not a literal
233    /// Special case: UnaryMinus/UnaryPlus of a literal is allowed (for negative/positive numbers)
234    fn extract_literal(expr: &ValueExpr) -> ParseResult<ValueLiteral> {
235        match expr {
236            ValueExpr::Literal(lit) => Ok(lit.clone()),
237            // Handle unary minus for negative numbers
238            ValueExpr::UnaryMinus(inner) => {
239                if let ValueExpr::Literal(lit) = inner.as_ref() {
240                    match lit {
241                        ValueLiteral::Integer(n) => Ok(ValueLiteral::Integer(-n)),
242                        ValueLiteral::Float(f) => Ok(ValueLiteral::Float(-f)),
243                        _ => Err(ParseError {
244                            message: "Unary minus can only be applied to numeric literals in BETWEEN bounds".to_string(),
245                        }),
246                    }
247                } else {
248                    Err(ParseError {
249                        message: "Complex expressions are not allowed here, only literal values".to_string(),
250                    })
251                }
252            }
253            // Handle unary plus (just unwrap it)
254            ValueExpr::UnaryPlus(inner) => {
255                if let ValueExpr::Literal(lit) = inner.as_ref() {
256                    Ok(lit.clone())
257                } else {
258                    Err(ParseError {
259                        message: "Complex expressions are not allowed here, only literal values".to_string(),
260                    })
261                }
262            }
263            ValueExpr::Variable(_) => Err(ParseError {
264                message: "Variables are not allowed here, only literal values".to_string(),
265            }),
266            _ => Err(ParseError {
267                message: "Complex expressions are not allowed here, only literal values".to_string(),
268            }),
269        }
270    }
271
272    /// Get literal type name for error messages
273    fn literal_type_name(lit: &ValueLiteral) -> &'static str {
274        match lit {
275            ValueLiteral::Integer(_) => "integer",
276            ValueLiteral::Float(_) => "float",
277            ValueLiteral::String(_) => "string",
278            ValueLiteral::Null => "NULL",
279            ValueLiteral::Boolean(_) => "boolean",
280        }
281    }
282
283    /// Check if two literals are type-compatible for BETWEEN
284    /// Both must be numeric (Integer or Float) OR both must be String
285    fn are_between_compatible(lower: &ValueLiteral, upper: &ValueLiteral) -> bool {
286        match (lower, upper) {
287            // Both numeric
288            (ValueLiteral::Integer(_), ValueLiteral::Integer(_)) => true,
289            (ValueLiteral::Integer(_), ValueLiteral::Float(_)) => true,
290            (ValueLiteral::Float(_), ValueLiteral::Integer(_)) => true,
291            (ValueLiteral::Float(_), ValueLiteral::Float(_)) => true,
292            // Both string
293            (ValueLiteral::String(_), ValueLiteral::String(_)) => true,
294            // Everything else incompatible
295            _ => false,
296        }
297    }
298
299    /// Validate literal for IN list (reject Null and Boolean)
300    fn validate_in_literal(&self, lit: &ValueLiteral) -> ParseResult<()> {
301        match lit {
302            ValueLiteral::Null => Err(ParseError {
303                message: format!(
304                    "NULL is not allowed in IN list near position {} in:\n  {}",
305                    self.position, self.input
306                ),
307            }),
308            ValueLiteral::Boolean(_) => Err(ParseError {
309                message: format!(
310                    "Boolean literals are not allowed in IN list near position {} in:\n  {}",
311                    self.position, self.input
312                ),
313            }),
314            ValueLiteral::Integer(_) | ValueLiteral::Float(_) | ValueLiteral::String(_) => Ok(()),
315        }
316    }
317
318    /// Check if two literals are exactly the same type (for IN list)
319    /// No mixing of Integer and Float allowed
320    fn are_exact_same_type(a: &ValueLiteral, b: &ValueLiteral) -> bool {
321        matches!((a, b), 
322            (ValueLiteral::Integer(_), ValueLiteral::Integer(_)) |
323            (ValueLiteral::Float(_), ValueLiteral::Float(_))     |
324            (ValueLiteral::String(_), ValueLiteral::String(_)))
325    }
326
327    /// Validate BETWEEN bounds: lower must be <= upper
328    fn validate_between_bounds(lower: &ValueLiteral, upper: &ValueLiteral, input: &str, position: usize) -> ParseResult<()> {
329        match (lower, upper) {
330            // Integer comparison
331            (ValueLiteral::Integer(l), ValueLiteral::Integer(u)) => {
332                if l > u {
333                    return Err(ParseError {
334                        message: format!(
335                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
336                            l, u, position, input
337                        ),
338                    });
339                }
340            }
341            // Float comparison
342            (ValueLiteral::Float(l), ValueLiteral::Float(u)) => {
343                if l > u {
344                    return Err(ParseError {
345                        message: format!(
346                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
347                            l, u, position, input
348                        ),
349                    });
350                }
351            }
352            // Mixed numeric: Integer and Float
353            (ValueLiteral::Integer(l), ValueLiteral::Float(u)) => {
354                if (*l as f64) > *u {
355                    return Err(ParseError {
356                        message: format!(
357                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
358                            l, u, position, input
359                        ),
360                    });
361                }
362            }
363            (ValueLiteral::Float(l), ValueLiteral::Integer(u)) => {
364                if *l > (*u as f64) {
365                    return Err(ParseError {
366                        message: format!(
367                            "BETWEEN lower bound ({}) must be less than or equal to upper bound ({}) near position {} in:\n  {}",
368                            l, u, position, input
369                        ),
370                    });
371                }
372            }
373            // String comparison
374            (ValueLiteral::String(l), ValueLiteral::String(u)) => {
375                if l > u {
376                    return Err(ParseError {
377                        message: format!(
378                            "BETWEEN lower bound ('{}') must be less than or equal to upper bound ('{}') near position {} in:\n  {}",
379                            l, u, position, input
380                        ),
381                    });
382                }
383            }
384            // Other combinations should have been caught by type compatibility check
385            _ => {}
386        }
387        Ok(())
388    }
389
390    // ========================================================================
391    // BOOLEAN EXPRESSION PARSING
392    // ========================================================================
393
394    /// BooleanExpression = BooleanOrExpression
395    fn parse_boolean_expression(&mut self) -> ParseResult<BooleanExpr> {
396        self.parse_boolean_or_expression()
397    }
398
399    /// BooleanOrExpression = BooleanAndExpression { "OR" BooleanAndExpression }
400    fn parse_boolean_or_expression(&mut self) -> ParseResult<BooleanExpr> {
401        let mut left = self.parse_boolean_and_expression()?;
402
403        while self.current_token() == &Token::Or {
404            self.advance();
405            let right = self.parse_boolean_and_expression()?;
406            left = BooleanExpr::Or(Box::new(left), Box::new(right));
407        }
408
409        Ok(left)
410    }
411
412    /// BooleanAndExpression = BooleanTerm { "AND" BooleanTerm }
413    fn parse_boolean_and_expression(&mut self) -> ParseResult<BooleanExpr> {
414        let mut left = self.parse_boolean_term()?;
415
416        while self.current_token() == &Token::And {
417            self.advance();
418            let right = self.parse_boolean_term()?;
419            left = BooleanExpr::And(Box::new(left), Box::new(right));
420        }
421
422        Ok(left)
423    }
424
425    /// BooleanTerm = "NOT" BooleanTerm
426    ///             | "(" BooleanExpression ")"
427    ///             | BooleanLiteral
428    ///             | Variable
429    ///             | RelationalExpression
430    fn parse_boolean_term(&mut self) -> ParseResult<BooleanExpr> {
431        match self.current_token() {
432            Token::Not => {
433                self.advance();
434                let expr = self.parse_boolean_term()?;
435                Ok(BooleanExpr::Not(Box::new(expr)))
436            }
437            Token::LeftParen => {
438                // Need to distinguish between:
439                // 1. (boolean_expr) like (x > 5) or (x > 5 AND y < 10)
440                // 2. (value_expr) OP value like (x + y) > 10
441                //
442                // Strategy: Look ahead past the '(' to see what's inside
443                // If we see patterns like "x >" or "NOT" or "TRUE/FALSE" followed by operators,
444                // it's likely a boolean expression
445                self.advance(); // consume '('
446
447                // Special case: check if this is a parenthesized boolean expression
448                // by looking for boolean operators or seeing if it's a complete relational expr
449                let saved_pos = self.position;
450
451                // Try parsing as a boolean expression first
452                match self.parse_boolean_expression() {
453                    Ok(expr) => {
454                        if self.current_token() == &Token::RightParen {
455                            self.advance(); // consume ')'
456                            Ok(expr)
457                        } else {
458                            // Failed to find closing paren, might be (value_expr) OP ...
459                            // Backtrack and try as relational
460                            self.position = saved_pos - 1; // go back before '('
461                            let rel = self.parse_relational_expression()?;
462                            Ok(BooleanExpr::Relational(rel))
463                        }
464                    }
465                    Err(_) => {
466                        // Failed to parse as boolean, try as relational
467                        self.position = saved_pos - 1; // go back before '('
468                        let rel = self.parse_relational_expression()?;
469                        Ok(BooleanExpr::Relational(rel))
470                    }
471                }
472            }
473            Token::True => {
474                self.advance();
475                Ok(BooleanExpr::Literal(true))
476            }
477            Token::False => {
478                self.advance();
479                Ok(BooleanExpr::Literal(false))
480            }
481            Token::Identifier(_) => {
482                // Could be a variable or start of relational expression
483                // We need to look ahead to determine which
484                if self.is_relational_operator_ahead() || self.is_arithmetic_operator_ahead() {
485                    let rel = self.parse_relational_expression()?;
486                    Ok(BooleanExpr::Relational(rel))
487                } else {
488                    // It's a variable (boolean at runtime)
489                    if let Token::Identifier(name) = self.current_token() {
490                        let name = name.clone();
491                        self.advance();
492                        Ok(BooleanExpr::Variable(name))
493                    } else {
494                        unreachable!()
495                    }
496                }
497            }
498            _ => {
499                // Default case: try to parse as relational expression
500                // This includes literals, etc.
501                let rel = self.parse_relational_expression()?;
502                Ok(BooleanExpr::Relational(rel))
503            }
504        }
505    }
506
507    /// Check if a relational operator follows
508    fn is_relational_operator_ahead(&self) -> bool {
509        // Look ahead to see if there's a relational operator
510        let next = self.peek_token();
511        matches!(next,
512            Token::Equal | Token::NotEqual |
513            Token::GreaterThan | Token::GreaterOrEqual |
514            Token::LessThan | Token::LessOrEqual |
515            Token::Like | Token::Between | Token::In | Token::Is |
516            Token::Not  // For NOT LIKE, NOT BETWEEN, NOT IN
517        )
518    }
519
520    /// Check if an arithmetic operator follows
521    fn is_arithmetic_operator_ahead(&self) -> bool {
522        // Look ahead to see if there's an arithmetic operator
523        let next = self.peek_token();
524        matches!(next,
525            Token::Plus | Token::Minus | Token::Star | Token::Slash | Token::Percent
526        )
527    }
528
529    // ========================================================================
530    // RELATIONAL EXPRESSION PARSING
531    // ========================================================================
532
533    /// RelationalExpression = EqualityExpression
534    ///                      | ComparisonExpression
535    ///                      | IsNullExpression
536    fn parse_relational_expression(&mut self) -> ParseResult<RelationalExpr> {
537        let left = self.parse_value_expression()?;
538
539        match self.current_token() {
540            Token::Equal => {
541                self.advance();
542                let right = self.parse_value_expression()?;
543                Ok(RelationalExpr::Equality {
544                    left,
545                    op: EqualityOp::Equal,
546                    right,
547                })
548            }
549            Token::NotEqual => {
550                self.advance();
551                let right = self.parse_value_expression()?;
552                Ok(RelationalExpr::Equality {
553                    left,
554                    op: EqualityOp::NotEqual,
555                    right,
556                })
557            }
558            Token::GreaterThan => {
559                self.advance();
560                let right = self.parse_value_expression()?;
561                Ok(RelationalExpr::Comparison {
562                    left,
563                    op: ComparisonOp::GreaterThan,
564                    right,
565                })
566            }
567            Token::GreaterOrEqual => {
568                self.advance();
569                let right = self.parse_value_expression()?;
570                Ok(RelationalExpr::Comparison {
571                    left,
572                    op: ComparisonOp::GreaterOrEqual,
573                    right,
574                })
575            }
576            Token::LessThan => {
577                self.advance();
578                let right = self.parse_value_expression()?;
579                Ok(RelationalExpr::Comparison {
580                    left,
581                    op: ComparisonOp::LessThan,
582                    right,
583                })
584            }
585            Token::LessOrEqual => {
586                self.advance();
587                let right = self.parse_value_expression()?;
588                Ok(RelationalExpr::Comparison {
589                    left,
590                    op: ComparisonOp::LessOrEqual,
591                    right,
592                })
593            }
594            Token::Like => {
595                self.advance();
596                let pattern = self.expect_string_literal()?;
597                let escape = if self.current_token() == &Token::Escape {
598                    self.advance();
599                    Some(self.expect_string_literal()?)
600                } else {
601                    None
602                };
603                Ok(RelationalExpr::Like {
604                    expr: left,
605                    pattern,
606                    escape,
607                    negated: false,
608                })
609            }
610            Token::Not => {
611                self.advance();
612                match self.current_token() {
613                    Token::Like => {
614                        self.advance();
615                        let pattern = self.expect_string_literal()?;
616                        let escape = if self.current_token() == &Token::Escape {
617                            self.advance();
618                            Some(self.expect_string_literal()?)
619                        } else {
620                            None
621                        };
622                        Ok(RelationalExpr::Like {
623                            expr: left,
624                            pattern,
625                            escape,
626                            negated: true,
627                        })
628                    }
629                    Token::Between => {
630                        self.advance();
631                        let lower_expr = self.parse_value_expression()?;
632                        self.expect(Token::And)?;
633                        let upper_expr = self.parse_value_expression()?;
634
635                        // Extract literals from expressions
636                        let lower_lit = Self::extract_literal(&lower_expr)?;
637                        let upper_lit = Self::extract_literal(&upper_expr)?;
638
639                        // Reject NULL
640                        if matches!(lower_lit, ValueLiteral::Null) {
641                            return Err(ParseError {
642                                message: format!(
643                                    "NULL is not allowed as lower bound in NOT BETWEEN near position {} in:\n  {}",
644                                    self.position, self.input
645                                ),
646                            });
647                        }
648                        if matches!(upper_lit, ValueLiteral::Null) {
649                            return Err(ParseError {
650                                message: format!(
651                                    "NULL is not allowed as upper bound in NOT BETWEEN near position {} in:\n  {}",
652                                    self.position, self.input
653                                ),
654                            });
655                        }
656
657                        // Reject Boolean
658                        if matches!(lower_lit, ValueLiteral::Boolean(_)) {
659                            return Err(ParseError {
660                                message: format!(
661                                    "Boolean literals are not allowed as lower bound in NOT BETWEEN near position {} in:\n  {}",
662                                    self.position, self.input
663                                ),
664                            });
665                        }
666                        if matches!(upper_lit, ValueLiteral::Boolean(_)) {
667                            return Err(ParseError {
668                                message: format!(
669                                    "Boolean literals are not allowed as upper bound in NOT BETWEEN near position {} in:\n  {}",
670                                    self.position, self.input
671                                ),
672                            });
673                        }
674
675                        // Check type compatibility
676                        if !Self::are_between_compatible(&lower_lit, &upper_lit) {
677                            return Err(ParseError {
678                                message: format!(
679                                    "NOT BETWEEN bounds must be both numeric or both string, found {} and {} near position {} in:\n  {}",
680                                    Self::literal_type_name(&lower_lit),
681                                    Self::literal_type_name(&upper_lit),
682                                    self.position,
683                                    self.input
684                                ),
685                            });
686                        }
687
688                        // Validate bounds order: lower <= upper
689                        Self::validate_between_bounds(&lower_lit, &upper_lit, &self.input, self.position)?;
690
691                        Ok(RelationalExpr::Between {
692                            expr: left,
693                            lower: lower_expr,
694                            upper: upper_expr,
695                            negated: true,
696                        })
697                    }
698                    Token::In => {
699                        self.advance();
700                        let values = self.parse_string_list()?;
701                        Ok(RelationalExpr::In {
702                            expr: left,
703                            values,
704                            negated: true,
705                        })
706                    }
707                    _ => Err(ParseError {
708                        message: format!("Expected LIKE, BETWEEN, or IN after NOT, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
709                    }),
710                }
711            }
712            Token::Between => {
713                self.advance();
714                let lower_expr = self.parse_value_expression()?;
715                self.expect(Token::And)?;
716                let upper_expr = self.parse_value_expression()?;
717
718                // Extract literals from expressions
719                let lower_lit = Self::extract_literal(&lower_expr)?;
720                let upper_lit = Self::extract_literal(&upper_expr)?;
721
722                // Reject NULL
723                if matches!(lower_lit, ValueLiteral::Null) {
724                    return Err(ParseError {
725                        message: format!(
726                            "NULL is not allowed as lower bound in BETWEEN near position {} in:\n  {}",
727                            self.position, self.input
728                        ),
729                    });
730                }
731                if matches!(upper_lit, ValueLiteral::Null) {
732                    return Err(ParseError {
733                        message: format!(
734                            "NULL is not allowed as upper bound in BETWEEN near position {} in:\n  {}",
735                            self.position, self.input
736                        ),
737                    });
738                }
739
740                // Reject Boolean
741                if matches!(lower_lit, ValueLiteral::Boolean(_)) {
742                    return Err(ParseError {
743                        message: format!(
744                            "Boolean literals are not allowed as lower bound in BETWEEN near position {} in:\n  {}",
745                            self.position, self.input
746                        ),
747                    });
748                }
749                if matches!(upper_lit, ValueLiteral::Boolean(_)) {
750                    return Err(ParseError {
751                        message: format!(
752                            "Boolean literals are not allowed as upper bound in BETWEEN near position {} in:\n  {}",
753                            self.position, self.input
754                        ),
755                    });
756                }
757
758                // Check type compatibility
759                if !Self::are_between_compatible(&lower_lit, &upper_lit) {
760                    return Err(ParseError {
761                        message: format!(
762                            "BETWEEN bounds must be both numeric or both string, found {} and {} near position {} in:\n  {}",
763                            Self::literal_type_name(&lower_lit),
764                            Self::literal_type_name(&upper_lit),
765                            self.position,
766                            self.input
767                        ),
768                    });
769                }
770
771                // Validate bounds order: lower <= upper
772                Self::validate_between_bounds(&lower_lit, &upper_lit, &self.input, self.position)?;
773
774                Ok(RelationalExpr::Between {
775                    expr: left,
776                    lower: lower_expr,
777                    upper: upper_expr,
778                    negated: false,
779                })
780            }
781            Token::In => {
782                self.advance();
783                let values = self.parse_string_list()?;
784                Ok(RelationalExpr::In {
785                    expr: left,
786                    values,
787                    negated: false,
788                })
789            }
790            Token::Is => {
791                self.advance();
792                let negated = if self.current_token() == &Token::Not {
793                    self.advance();
794                    true
795                } else {
796                    false
797                };
798                self.expect(Token::Null)?;
799                Ok(RelationalExpr::IsNull {
800                    expr: left,
801                    negated,
802                })
803            }
804            _ => Err(ParseError {
805                message: format!("Expected relational operator, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
806            }),
807        }
808    }
809
810    /// Expect a string literal token
811    fn expect_string_literal(&mut self) -> ParseResult<String> {
812        match self.current_token() {
813            Token::StringLiteral(s) => {
814                let s = s.clone();
815                self.advance();
816                Ok(s)
817            }
818            _ => Err(ParseError {
819                message: format!("Expected string literal, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
820            }),
821        }
822    }
823
824    /// Parse value literal list for IN operator with strict type checking
825    /// All values must be the same exact type (Integer, Float, or String)
826    /// NULL and Boolean are rejected
827    fn parse_string_list(&mut self) -> ParseResult<Vec<ValueLiteral>> {
828        self.expect(Token::LeftParen)?;
829
830        let first = self.expect_value_literal()?;
831
832        // Validate first literal (reject NULL and Boolean)
833        self.validate_in_literal(&first)?;
834
835        let mut values = vec![first.clone()];
836
837        while self.current_token() == &Token::Comma {
838            self.advance();
839            let next = self.expect_value_literal()?;
840
841            // Validate this literal (reject NULL and Boolean)
842            self.validate_in_literal(&next)?;
843
844            // Check type consistency with first value
845            if !Self::are_exact_same_type(&first, &next) {
846                return Err(ParseError {
847                    message: format!(
848                        "IN list values must all be the same type, found {} and {} near position {} in:\n  {}",
849                        Self::literal_type_name(&first),
850                        Self::literal_type_name(&next),
851                        self.position,
852                        self.input
853                    ),
854                });
855            }
856
857            values.push(next);
858        }
859
860        self.expect(Token::RightParen)?;
861        Ok(values)
862    }
863
864    /// Expect a value literal token (string, integer, float, etc.)
865    /// Also handles unary minus for negative numbers
866    fn expect_value_literal(&mut self) -> ParseResult<ValueLiteral> {
867        // Handle unary minus for negative numbers
868        let is_negative = if self.current_token() == &Token::Minus {
869            self.advance();
870            true
871        } else {
872            false
873        };
874
875        match self.current_token().clone() {
876            Token::StringLiteral(s) => {
877                if is_negative {
878                    return Err(ParseError {
879                        message: format!("Cannot apply unary minus to string literal near position {} in:\n  {}", self.position, self.input),
880                    });
881                }
882                self.advance();
883                Ok(ValueLiteral::String(s))
884            }
885            Token::IntegerLiteral(n) => {
886                self.advance();
887                Ok(ValueLiteral::Integer(if is_negative { -n } else { n }))
888            }
889            Token::FloatLiteral(f) => {
890                self.advance();
891                Ok(ValueLiteral::Float(if is_negative { -f } else { f }))
892            }
893            Token::Null => {
894                if is_negative {
895                    return Err(ParseError {
896                        message: format!("Cannot apply unary minus to NULL near position {} in:\n  {}", self.position, self.input),
897                    });
898                }
899                self.advance();
900                Ok(ValueLiteral::Null)
901            }
902            Token::True => {
903                if is_negative {
904                    return Err(ParseError {
905                        message: format!("Cannot apply unary minus to boolean near position {} in:\n  {}", self.position, self.input),
906                    });
907                }
908                self.advance();
909                Ok(ValueLiteral::Boolean(true))
910            }
911            Token::False => {
912                if is_negative {
913                    return Err(ParseError {
914                        message: format!("Cannot apply unary minus to boolean near position {} in:\n  {}", self.position, self.input),
915                    });
916                }
917                self.advance();
918                Ok(ValueLiteral::Boolean(false))
919            }
920            _ => Err(ParseError {
921                message: format!("Expected literal value, got {} near position {} in:\n  {}", self.current_token(), self.position, self.input),
922            }),
923        }
924    }
925
926    // ========================================================================
927    // VALUE EXPRESSION PARSING
928    // ========================================================================
929
930    /// ValueExpression = AddExpression
931    fn parse_value_expression(&mut self) -> ParseResult<ValueExpr> {
932        self.parse_add_expression()
933    }
934
935    /// AddExpression = MultExpression { ( "+" | "-" ) MultExpression }
936    fn parse_add_expression(&mut self) -> ParseResult<ValueExpr> {
937        let mut left = self.parse_mult_expression()?;
938
939        loop {
940            match self.current_token() {
941                Token::Plus => {
942                    self.advance();
943                    let right = self.parse_mult_expression()?;
944                    left = ValueExpr::Add(Box::new(left), Box::new(right));
945                }
946                Token::Minus => {
947                    self.advance();
948                    let right = self.parse_mult_expression()?;
949                    left = ValueExpr::Subtract(Box::new(left), Box::new(right));
950                }
951                _ => break,
952            }
953        }
954
955        Ok(left)
956    }
957
958    /// MultExpression = UnaryValueExpression { ( "*" | "/" | "%" ) UnaryValueExpression }
959    fn parse_mult_expression(&mut self) -> ParseResult<ValueExpr> {
960        let mut left = self.parse_unary_value_expression()?;
961
962        loop {
963            match self.current_token() {
964                Token::Star => {
965                    self.advance();
966                    let right = self.parse_unary_value_expression()?;
967                    left = ValueExpr::Multiply(Box::new(left), Box::new(right));
968                }
969                Token::Slash => {
970                    self.advance();
971                    let right = self.parse_unary_value_expression()?;
972                    left = ValueExpr::Divide(Box::new(left), Box::new(right));
973                }
974                Token::Percent => {
975                    self.advance();
976                    let right = self.parse_unary_value_expression()?;
977                    left = ValueExpr::Modulo(Box::new(left), Box::new(right));
978                }
979                _ => break,
980            }
981        }
982
983        Ok(left)
984    }
985
986    /// UnaryValueExpression = "+" UnaryValueExpression
987    ///                      | "-" UnaryValueExpression
988    ///                      | ValuePrimary
989    fn parse_unary_value_expression(&mut self) -> ParseResult<ValueExpr> {
990        match self.current_token() {
991            Token::Plus => {
992                self.advance();
993                let expr = self.parse_unary_value_expression()?;
994                Ok(ValueExpr::UnaryPlus(Box::new(expr)))
995            }
996            Token::Minus => {
997                self.advance();
998                let expr = self.parse_unary_value_expression()?;
999                Ok(ValueExpr::UnaryMinus(Box::new(expr)))
1000            }
1001            _ => self.parse_value_primary(),
1002        }
1003    }
1004
1005    /// ValuePrimary = ValueLiteral
1006    ///              | Variable
1007    ///              | "(" ValueExpression ")"
1008    fn parse_value_primary(&mut self) -> ParseResult<ValueExpr> {
1009        match self.current_token().clone() {
1010            Token::IntegerLiteral(n) => {
1011                self.advance();
1012                Ok(ValueExpr::Literal(ValueLiteral::Integer(n)))
1013            }
1014            Token::FloatLiteral(n) => {
1015                self.advance();
1016                Ok(ValueExpr::Literal(ValueLiteral::Float(n)))
1017            }
1018            Token::StringLiteral(s) => {
1019                self.advance();
1020                Ok(ValueExpr::Literal(ValueLiteral::String(s)))
1021            }
1022            Token::Null => {
1023                self.advance();
1024                Ok(ValueExpr::Literal(ValueLiteral::Null))
1025            }
1026            Token::True => {
1027                self.advance();
1028                Ok(ValueExpr::Literal(ValueLiteral::Boolean(true)))
1029            }
1030            Token::False => {
1031                self.advance();
1032                Ok(ValueExpr::Literal(ValueLiteral::Boolean(false)))
1033            }
1034            Token::Identifier(name) => {
1035                self.advance();
1036                Ok(ValueExpr::Variable(name))
1037            }
1038            Token::LeftParen => {
1039                self.advance();
1040                let expr = self.parse_value_expression()?;
1041                self.expect(Token::RightParen)?;
1042                Ok(expr)
1043            }
1044            _ => Err(ParseError {
1045                message: format!("Expected value expression, got {}", self.current_token()),
1046            }),
1047        }
1048    }
1049}
1050
1051/// Public API function to parse a SQL boolean expression string.
1052///
1053/// # Examples
1054/// ```
1055/// use sqlexpr_rust::{parse, BooleanExpr, ParseError};
1056///
1057/// let result: Result<BooleanExpr, ParseError> = parse("x > 5 OR y < 10");
1058/// assert!(result.is_ok());
1059///
1060/// let result: Result<BooleanExpr, ParseError> = parse("name LIKE '%test%'");
1061/// assert!(result.is_ok());
1062/// ```
1063
1064pub fn parse(input: &str) -> Result<BooleanExpr, ParseError> {
1065    let mut parser = Parser::new(input)?;
1066    parser.parse()
1067}
1068
1069#[cfg(test)]
1070mod tests {
1071    use super::*;
1072
1073    #[test]
1074    fn test_simple_comparison() {
1075        let result = parse("x > 5");
1076        assert!(result.is_ok());
1077    }
1078
1079    #[test]
1080    fn test_boolean_and() {
1081        let result = parse("x > 5 AND y < 10");
1082        assert!(result.is_ok());
1083    }
1084
1085    #[test]
1086    fn test_like_operator() {
1087        let result = parse("name LIKE '%test%'");
1088        assert!(result.is_ok());
1089    }
1090
1091    #[test]
1092    fn test_between() {
1093        let result = parse("age BETWEEN 18 AND 65");
1094        assert!(result.is_ok());
1095    }
1096
1097    #[test]
1098    fn test_in_operator() {
1099        let result = parse("status IN ('active', 'pending')");
1100        assert!(result.is_ok());
1101    }
1102
1103    #[test]
1104    fn test_is_null() {
1105        let result = parse("value IS NULL");
1106        assert!(result.is_ok());
1107    }
1108
1109    #[test]
1110    fn test_arithmetic_in_comparison() {
1111        let result = parse("(a + b) > (c - d)");
1112        assert!(result.is_ok());
1113    }
1114}