Skip to main content

jsonata_core/
parser.rs

1// JSONata expression parser
2// Mirrors parser.js from the reference implementation
3
4#![allow(clippy::approx_constant)]
5
6use crate::ast::{AstNode, BinaryOp, PathStep, Stage, UnaryOp};
7use thiserror::Error;
8
9/// Parser errors
10#[derive(Error, Debug)]
11pub enum ParserError {
12    #[error("Unexpected token: {0}")]
13    UnexpectedToken(String),
14
15    #[error("Unexpected end of expression")]
16    UnexpectedEnd,
17
18    #[error("Invalid syntax: {0}")]
19    InvalidSyntax(String),
20
21    #[error("Invalid number: {0}")]
22    InvalidNumber(String),
23
24    #[error("Unclosed string literal")]
25    UnclosedString,
26
27    #[error("Invalid escape sequence: {0}")]
28    InvalidEscape(String),
29
30    #[error("Unclosed comment")]
31    UnclosedComment,
32
33    #[error("Unclosed backtick name")]
34    UnclosedBacktick,
35
36    #[error("Expected {expected}, found {found}")]
37    Expected { expected: String, found: String },
38}
39
40/// Token types for the lexer
41#[derive(Debug, Clone, PartialEq)]
42pub enum Token {
43    // Literals
44    String(String),
45    Number(f64),
46    True,
47    False,
48    Null,
49    Undefined,                                // The `undefined` keyword
50    Regex { pattern: String, flags: String }, // /pattern/flags
51
52    // Identifiers and operators
53    Identifier(String),
54    Variable(String),
55    ParentVariable(String), // $$ variables
56    Function,               // function keyword
57
58    // Operators
59    Plus,
60    Minus,
61    Star,
62    StarStar, // **
63    Slash,
64    Percent,
65    Equal,
66    NotEqual,
67    LessThan,
68    LessThanOrEqual,
69    GreaterThan,
70    GreaterThanOrEqual,
71    And,
72    Or,
73    In,
74    Ampersand,
75    Dot,
76    DotDot,
77    Question,
78    QuestionQuestion, // ??
79    QuestionColon,    // ?:
80    Colon,
81    ColonEqual, // :=
82    TildeArrow, // ~>
83
84    // Delimiters
85    LeftParen,
86    RightParen,
87    LeftBracket,
88    RightBracket,
89    LeftBrace,
90    RightBrace,
91    Comma,
92    Semicolon,
93    Caret, // ^ sort operator
94    Pipe,  // | transform operator
95
96    // Special
97    Hash, // # index binding operator
98    Eof,
99}
100
101/// Lexer for tokenizing JSONata expressions
102pub struct Lexer {
103    input: Vec<char>,
104    position: usize,
105    last_token: Option<Token>,
106}
107
108impl Lexer {
109    pub fn new(input: String) -> Self {
110        Lexer {
111            input: input.chars().collect(),
112            position: 0,
113            last_token: None,
114        }
115    }
116
117    fn current(&self) -> Option<char> {
118        self.input.get(self.position).copied()
119    }
120
121    fn peek(&self, offset: usize) -> Option<char> {
122        self.input.get(self.position + offset).copied()
123    }
124
125    fn advance(&mut self) {
126        if self.position < self.input.len() {
127            self.position += 1;
128        }
129    }
130
131    fn skip_whitespace(&mut self) {
132        while self.current().is_some_and(|ch| ch.is_whitespace()) {
133            self.advance();
134        }
135    }
136
137    fn skip_comment(&mut self) -> Result<(), ParserError> {
138        // We're at '/', check if next is '*'
139        if self.peek(1) == Some('*') {
140            self.advance(); // skip '/'
141            self.advance(); // skip '*'
142
143            // Find closing */
144            loop {
145                match self.current() {
146                    None => return Err(ParserError::UnclosedComment),
147                    Some('*') if self.peek(1) == Some('/') => {
148                        self.advance(); // skip '*'
149                        self.advance(); // skip '/'
150                        break;
151                    }
152                    Some(_) => self.advance(),
153                }
154            }
155        }
156        Ok(())
157    }
158
159    fn read_string(&mut self, quote_char: char) -> Result<String, ParserError> {
160        let mut result = String::new();
161        self.advance(); // skip opening quote
162
163        loop {
164            match self.current() {
165                None => return Err(ParserError::UnclosedString),
166                Some(ch) if ch == quote_char => {
167                    self.advance(); // skip closing quote
168                    return Ok(result);
169                }
170                Some('\\') => {
171                    self.advance();
172                    match self.current() {
173                        None => return Err(ParserError::UnclosedString),
174                        Some('"') => result.push('"'),
175                        Some('\\') => result.push('\\'),
176                        Some('/') => result.push('/'),
177                        Some('b') => result.push('\u{0008}'),
178                        Some('f') => result.push('\u{000C}'),
179                        Some('n') => result.push('\n'),
180                        Some('r') => result.push('\r'),
181                        Some('t') => result.push('\t'),
182                        Some('u') => {
183                            // Unicode escape sequence \uXXXX
184                            self.advance();
185                            let mut hex = String::new();
186                            for _ in 0..4 {
187                                match self.current() {
188                                    Some(h) if h.is_ascii_hexdigit() => {
189                                        hex.push(h);
190                                        self.advance();
191                                    }
192                                    _ => {
193                                        return Err(ParserError::InvalidEscape(format!(
194                                            "\\u{}",
195                                            hex
196                                        )))
197                                    }
198                                }
199                            }
200                            let code = u32::from_str_radix(&hex, 16).unwrap();
201                            if (0xD800..=0xDBFF).contains(&code) {
202                                // High surrogate - expect \uXXXX low surrogate to follow
203                                if self.current() == Some('\\') {
204                                    self.advance();
205                                    if self.current() == Some('u') {
206                                        self.advance();
207                                        let mut low_hex = String::new();
208                                        for _ in 0..4 {
209                                            match self.current() {
210                                                Some(h) if h.is_ascii_hexdigit() => {
211                                                    low_hex.push(h);
212                                                    self.advance();
213                                                }
214                                                _ => {
215                                                    return Err(ParserError::InvalidEscape(
216                                                        format!("\\u{}", low_hex),
217                                                    ))
218                                                }
219                                            }
220                                        }
221                                        let low = u32::from_str_radix(&low_hex, 16).unwrap();
222                                        if (0xDC00..=0xDFFF).contains(&low) {
223                                            let cp =
224                                                0x10000 + (code - 0xD800) * 0x400 + (low - 0xDC00);
225                                            if let Some(ch) = char::from_u32(cp) {
226                                                result.push(ch);
227                                            } else {
228                                                return Err(ParserError::InvalidEscape(format!(
229                                                    "\\u{}\\u{}",
230                                                    hex, low_hex
231                                                )));
232                                            }
233                                        } else {
234                                            return Err(ParserError::InvalidEscape(format!(
235                                                "\\u{}\\u{}",
236                                                hex, low_hex
237                                            )));
238                                        }
239                                    } else {
240                                        return Err(ParserError::InvalidEscape(format!(
241                                            "\\u{}",
242                                            hex
243                                        )));
244                                    }
245                                } else {
246                                    return Err(ParserError::InvalidEscape(format!("\\u{}", hex)));
247                                }
248                            } else if let Some(ch) = char::from_u32(code) {
249                                result.push(ch);
250                            } else {
251                                return Err(ParserError::InvalidEscape(format!("\\u{}", hex)));
252                            }
253                            continue; // Don't advance again
254                        }
255                        Some(ch) => return Err(ParserError::InvalidEscape(format!("\\{}", ch))),
256                    }
257                    self.advance();
258                }
259                Some(ch) => {
260                    result.push(ch);
261                    self.advance();
262                }
263            }
264        }
265    }
266
267    fn read_number(&mut self) -> Result<f64, ParserError> {
268        let start = self.position;
269
270        // Integer part (no minus sign - negation is handled as unary operator)
271        if self.current() == Some('0') {
272            self.advance();
273        } else if self.current().is_some_and(|c| c.is_ascii_digit()) {
274            while self.current().is_some_and(|c| c.is_ascii_digit()) {
275                self.advance();
276            }
277        } else {
278            return Err(ParserError::InvalidNumber("Expected digit".to_string()));
279        }
280
281        // Fractional part
282        if self.current() == Some('.') && self.peek(1) != Some('.') {
283            // Only consume '.' if next char is not '.', to avoid consuming '..' range operator
284            self.advance();
285            if !self.current().is_some_and(|c| c.is_ascii_digit()) {
286                return Err(ParserError::InvalidNumber(
287                    "Expected digit after decimal point".to_string(),
288                ));
289            }
290            while self.current().is_some_and(|c| c.is_ascii_digit()) {
291                self.advance();
292            }
293        }
294
295        // Exponent part
296        if matches!(self.current(), Some('e') | Some('E')) {
297            self.advance();
298            if matches!(self.current(), Some('+') | Some('-')) {
299                self.advance();
300            }
301            if !self.current().is_some_and(|c| c.is_ascii_digit()) {
302                return Err(ParserError::InvalidNumber(
303                    "Expected digit in exponent".to_string(),
304                ));
305            }
306            while self.current().is_some_and(|c| c.is_ascii_digit()) {
307                self.advance();
308            }
309        }
310
311        let num_str: String = self.input[start..self.position].iter().collect();
312        let num: f64 = num_str
313            .parse()
314            .map_err(|_| ParserError::InvalidNumber(num_str.clone()))?;
315
316        // Check for overflow to infinity
317        if num.is_infinite() {
318            return Err(ParserError::InvalidNumber(format!(
319                "S0102: Number out of range: {}",
320                num_str
321            )));
322        }
323
324        Ok(num)
325    }
326
327    fn read_identifier(&mut self) -> String {
328        let start = self.position;
329
330        while let Some(ch) = self.current() {
331            // Continue if alphanumeric or underscore
332            if ch.is_alphanumeric() || ch == '_' {
333                self.advance();
334            } else {
335                break;
336            }
337        }
338
339        self.input[start..self.position].iter().collect()
340    }
341
342    fn read_backtick_name(&mut self) -> Result<String, ParserError> {
343        self.advance(); // skip opening backtick
344        let start = self.position;
345
346        while let Some(ch) = self.current() {
347            if ch == '`' {
348                let name: String = self.input[start..self.position].iter().collect();
349                self.advance(); // skip closing backtick
350                return Ok(name);
351            }
352            self.advance();
353        }
354
355        Err(ParserError::UnclosedBacktick)
356    }
357
358    fn read_regex(&mut self) -> Result<Token, ParserError> {
359        self.advance(); // skip opening /
360
361        // Read pattern
362        let mut pattern = String::new();
363        let mut escaped = false;
364
365        loop {
366            match self.current() {
367                None => return Err(ParserError::UnclosedString),
368                Some('/') if !escaped => {
369                    self.advance(); // skip closing /
370                    break;
371                }
372                Some('\\') if !escaped => {
373                    escaped = true;
374                    pattern.push('\\');
375                    self.advance();
376                }
377                Some(ch) => {
378                    escaped = false;
379                    pattern.push(ch);
380                    self.advance();
381                }
382            }
383        }
384
385        // Read flags (optional)
386        let mut flags = String::new();
387        while let Some(ch) = self.current() {
388            if ch.is_alphabetic() {
389                flags.push(ch);
390                self.advance();
391            } else {
392                break;
393            }
394        }
395
396        Ok(Token::Regex { pattern, flags })
397    }
398
399    fn emit_token(&mut self, token: Token) -> Result<Token, ParserError> {
400        self.last_token = Some(token.clone());
401        Ok(token)
402    }
403
404    pub fn next_token(&mut self) -> Result<Token, ParserError> {
405        loop {
406            self.skip_whitespace();
407
408            match self.current() {
409                None => return Ok(Token::Eof),
410
411                // Comments
412                Some('/') if self.peek(1) == Some('*') => {
413                    self.skip_comment()?;
414                    continue; // Skip whitespace again after comment
415                }
416
417                // String literals
418                Some('"') => {
419                    let s = self.read_string('"')?;
420                    return self.emit_token(Token::String(s));
421                }
422                Some('\'') => {
423                    let s = self.read_string('\'')?;
424                    return self.emit_token(Token::String(s));
425                }
426
427                // Backtick names
428                Some('`') => {
429                    let name = self.read_backtick_name()?;
430                    return self.emit_token(Token::Identifier(name));
431                }
432
433                // Numbers (positive only - negation handled as unary operator)
434                Some(ch) if ch.is_ascii_digit() => {
435                    let num = self.read_number()?;
436                    return self.emit_token(Token::Number(num));
437                }
438
439                // Variables (start with $)
440                Some('$') if self.peek(1) == Some('$') => {
441                    // $$ - parent variable
442                    self.advance(); // skip first $
443                    self.advance(); // skip second $
444                    let name = self.read_identifier();
445                    return self.emit_token(Token::ParentVariable(name));
446                }
447                Some('$') => {
448                    self.advance();
449                    let name = self.read_identifier();
450                    return self.emit_token(Token::Variable(name));
451                }
452
453                // Two-character operators
454                Some('.') if self.peek(1) == Some('.') => {
455                    self.advance();
456                    self.advance();
457                    return Ok(Token::DotDot);
458                }
459                Some(':') if self.peek(1) == Some('=') => {
460                    self.advance();
461                    self.advance();
462                    return Ok(Token::ColonEqual);
463                }
464                Some('!') if self.peek(1) == Some('=') => {
465                    self.advance();
466                    self.advance();
467                    return Ok(Token::NotEqual);
468                }
469                Some('>') if self.peek(1) == Some('=') => {
470                    self.advance();
471                    self.advance();
472                    return Ok(Token::GreaterThanOrEqual);
473                }
474                Some('<') if self.peek(1) == Some('=') => {
475                    self.advance();
476                    self.advance();
477                    return Ok(Token::LessThanOrEqual);
478                }
479                Some('~') if self.peek(1) == Some('>') => {
480                    self.advance();
481                    self.advance();
482                    return self.emit_token(Token::TildeArrow);
483                }
484
485                // Single-character operators and delimiters
486                Some('(') => {
487                    self.advance();
488                    return Ok(Token::LeftParen);
489                }
490                Some(')') => {
491                    self.advance();
492                    return self.emit_token(Token::RightParen);
493                }
494                Some('[') => {
495                    self.advance();
496                    return Ok(Token::LeftBracket);
497                }
498                Some(']') => {
499                    self.advance();
500                    return self.emit_token(Token::RightBracket);
501                }
502                Some('{') => {
503                    self.advance();
504                    return Ok(Token::LeftBrace);
505                }
506                Some('}') => {
507                    self.advance();
508                    return self.emit_token(Token::RightBrace);
509                }
510                Some(',') => {
511                    self.advance();
512                    return self.emit_token(Token::Comma);
513                }
514                Some(';') => {
515                    self.advance();
516                    return self.emit_token(Token::Semicolon);
517                }
518                Some(':') => {
519                    self.advance();
520                    return Ok(Token::Colon);
521                }
522                Some('?') if self.peek(1) == Some('?') => {
523                    self.advance();
524                    self.advance();
525                    return Ok(Token::QuestionQuestion);
526                }
527                Some('?') if self.peek(1) == Some(':') => {
528                    self.advance();
529                    self.advance();
530                    return Ok(Token::QuestionColon);
531                }
532                Some('?') => {
533                    self.advance();
534                    return Ok(Token::Question);
535                }
536                Some('λ') => {
537                    // Lambda symbol (alternative to "function" keyword)
538                    self.advance();
539                    return Ok(Token::Function);
540                }
541                Some('.') => {
542                    self.advance();
543                    return Ok(Token::Dot);
544                }
545                Some('+') => {
546                    self.advance();
547                    return Ok(Token::Plus);
548                }
549                Some('-') => {
550                    self.advance();
551                    return Ok(Token::Minus);
552                }
553                Some('*') if self.peek(1) == Some('*') => {
554                    self.advance();
555                    self.advance();
556                    return Ok(Token::StarStar);
557                }
558                Some('*') => {
559                    self.advance();
560                    return Ok(Token::Star);
561                }
562                Some('/') => {
563                    // Determine if this is a regex literal or division operator
564                    // Regex literals can appear after:
565                    // - Start of expression (last_token is None)
566                    // - Operators: (, [, {, ,, ;, :, =, !=, <, >, <=, >=, +, -, *, %, &, |, ~, !, ?
567                    // - Keywords: and, or, in, function
568                    // Division operator appears after:
569                    // - Values: ), ], }, identifiers, variables, numbers, strings, etc.
570
571                    let is_regex = match &self.last_token {
572                        None => true, // Start of expression
573                        Some(Token::LeftParen)
574                        | Some(Token::LeftBracket)
575                        | Some(Token::LeftBrace) => true,
576                        Some(Token::Comma) | Some(Token::Semicolon) | Some(Token::Colon) => true,
577                        Some(Token::Equal) | Some(Token::NotEqual) => true,
578                        Some(Token::LessThan) | Some(Token::LessThanOrEqual) => true,
579                        Some(Token::GreaterThan) | Some(Token::GreaterThanOrEqual) => true,
580                        Some(Token::Plus) | Some(Token::Minus) | Some(Token::Star)
581                        | Some(Token::Percent) => true,
582                        Some(Token::Ampersand)
583                        | Some(Token::Question)
584                        | Some(Token::TildeArrow) => true,
585                        Some(Token::ColonEqual)
586                        | Some(Token::QuestionQuestion)
587                        | Some(Token::QuestionColon) => true,
588                        Some(Token::And) | Some(Token::Or) | Some(Token::In) => true,
589                        Some(Token::Function) => true,
590                        Some(Token::Identifier(s)) if s == "and" || s == "or" || s == "in" => true,
591                        _ => false, // After values, treat as division
592                    };
593
594                    if is_regex {
595                        let tok = self.read_regex()?;
596                        return self.emit_token(tok);
597                    } else {
598                        self.advance();
599                        return self.emit_token(Token::Slash);
600                    }
601                }
602                Some('%') => {
603                    self.advance();
604                    return Ok(Token::Percent);
605                }
606                Some('^') => {
607                    self.advance();
608                    return Ok(Token::Caret);
609                }
610                Some('#') => {
611                    self.advance();
612                    return Ok(Token::Hash);
613                }
614                Some('=') => {
615                    self.advance();
616                    return Ok(Token::Equal);
617                }
618                Some('<') => {
619                    self.advance();
620                    return Ok(Token::LessThan);
621                }
622                Some('>') => {
623                    self.advance();
624                    return Ok(Token::GreaterThan);
625                }
626                Some('&') => {
627                    self.advance();
628                    return Ok(Token::Ampersand);
629                }
630                Some('|') => {
631                    self.advance();
632                    return Ok(Token::Pipe);
633                }
634
635                // Identifiers and keywords
636                Some(ch) if ch.is_alphabetic() || ch == '_' => {
637                    let ident = self.read_identifier();
638                    let tok = match ident.as_str() {
639                        "true" => Token::True,
640                        "false" => Token::False,
641                        "null" => Token::Null,
642                        "undefined" => Token::Undefined,
643                        "function" => Token::Function,
644                        // "and", "or", "in" are now contextual keywords (handled in parser)
645                        _ => Token::Identifier(ident),
646                    };
647                    return self.emit_token(tok);
648                }
649
650                Some(ch) => {
651                    return Err(ParserError::UnexpectedToken(ch.to_string()));
652                }
653            }
654        }
655    }
656}
657
658/// Parser for JSONata expressions using Pratt parsing
659pub struct Parser {
660    lexer: Lexer,
661    current_token: Token,
662}
663
664impl Parser {
665    pub fn new(input: String) -> Result<Self, ParserError> {
666        let mut lexer = Lexer::new(input);
667        let current_token = lexer.next_token()?;
668        Ok(Parser {
669            lexer,
670            current_token,
671        })
672    }
673
674    fn advance(&mut self) -> Result<(), ParserError> {
675        self.current_token = self.lexer.next_token()?;
676        Ok(())
677    }
678
679    fn expect(&mut self, expected: Token) -> Result<(), ParserError> {
680        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
681            self.advance()?;
682            Ok(())
683        } else {
684            Err(ParserError::Expected {
685                expected: format!("{:?}", expected),
686                found: format!("{:?}", self.current_token),
687            })
688        }
689    }
690
691    /// Get the binding power (precedence) for the current token
692    fn binding_power(&self, token: &Token) -> Option<(u8, u8)> {
693        // Returns (left_bp, right_bp) for left and right binding power
694        // Higher numbers = higher precedence
695        match token {
696            // Contextual keywords (treated as operators when in infix position)
697            Token::Identifier(name) if name == "or" => Some((25, 26)),
698            Token::Identifier(name) if name == "and" => Some((30, 31)),
699            Token::Identifier(name) if name == "in" => Some((40, 41)),
700            // Regular operators
701            Token::Or => Some((25, 26)),
702            Token::And => Some((30, 31)),
703            Token::Equal
704            | Token::NotEqual
705            | Token::LessThan
706            | Token::LessThanOrEqual
707            | Token::GreaterThan
708            | Token::GreaterThanOrEqual
709            | Token::In => Some((40, 41)),
710            Token::Ampersand => Some((50, 51)),
711            Token::Plus | Token::Minus => Some((50, 51)),
712            Token::Star | Token::Slash | Token::Percent => Some((60, 61)),
713            Token::Dot => Some((75, 85)), // Right bp is higher to prevent consuming postfix operators
714            Token::LeftBracket => Some((80, 81)),
715            Token::LeftParen => Some((80, 81)),
716            Token::LeftBrace => Some((80, 81)), // Object constructor as postfix
717            Token::Caret => Some((80, 81)),     // Sort operator as postfix
718            Token::Hash => Some((80, 81)),      // Index binding operator as postfix
719            Token::Question => Some((20, 21)),
720            Token::QuestionQuestion => Some((15, 16)), // Coalescing operator
721            Token::QuestionColon => Some((15, 16)),    // Default operator
722            Token::DotDot => Some((20, 21)),
723            Token::ColonEqual => Some((10, 9)), // Right associative
724            Token::TildeArrow => Some((70, 71)), // Chain/pipe operator
725            _ => None,
726        }
727    }
728
729    /// Parse a function signature: <param-types:return-type>
730    fn parse_signature(&mut self) -> Result<String, ParserError> {
731        // Expect <
732        if self.current_token != Token::LessThan {
733            return Err(ParserError::Expected {
734                expected: "<".to_string(),
735                found: format!("{:?}", self.current_token),
736            });
737        }
738
739        // Build signature string by collecting characters until we find >
740        let mut signature = String::from("<");
741        self.advance()?; // skip <
742
743        // Collect all characters until we find the closing >
744        // This is a bit tricky because we need to handle nested <> for array types like a<s>
745        let mut depth = 1;
746
747        while depth > 0 && self.current_token != Token::Eof {
748            match &self.current_token {
749                Token::LessThan => {
750                    signature.push('<');
751                    depth += 1;
752                    self.advance()?;
753                }
754                Token::GreaterThan => {
755                    depth -= 1;
756                    if depth > 0 {
757                        signature.push('>');
758                    }
759                    self.advance()?;
760                }
761                Token::Minus => {
762                    signature.push('-');
763                    self.advance()?;
764                }
765                Token::Colon => {
766                    signature.push(':');
767                    self.advance()?;
768                }
769                Token::Question => {
770                    signature.push('?');
771                    self.advance()?;
772                }
773                Token::QuestionColon => {
774                    // ?:  gets tokenized as QuestionColon in signatures like s?:s
775                    signature.push('?');
776                    signature.push(':');
777                    self.advance()?;
778                }
779                Token::Identifier(s) => {
780                    signature.push_str(s);
781                    self.advance()?;
782                }
783                Token::LeftParen | Token::RightParen => {
784                    // Handle parentheses for union types like (ns)
785                    let c = if self.current_token == Token::LeftParen {
786                        '('
787                    } else {
788                        ')'
789                    };
790                    signature.push(c);
791                    self.advance()?;
792                }
793                _ => {
794                    return Err(ParserError::UnexpectedToken(format!(
795                        "Unexpected token in signature: {:?}",
796                        self.current_token
797                    )));
798                }
799            }
800        }
801
802        signature.push('>');
803        Ok(signature)
804    }
805
806    /// Parse a primary expression (literals, identifiers, variables, grouping)
807    fn parse_primary(&mut self) -> Result<AstNode, ParserError> {
808        match &self.current_token {
809            Token::String(s) => {
810                let value = s.clone();
811                self.advance()?;
812                Ok(AstNode::String(value))
813            }
814            Token::Number(n) => {
815                let value = *n;
816                self.advance()?;
817                Ok(AstNode::Number(value))
818            }
819            Token::True => {
820                self.advance()?;
821                Ok(AstNode::Boolean(true))
822            }
823            Token::False => {
824                self.advance()?;
825                Ok(AstNode::Boolean(false))
826            }
827            Token::Null => {
828                self.advance()?;
829                Ok(AstNode::Null)
830            }
831            Token::Undefined => {
832                self.advance()?;
833                Ok(AstNode::Undefined)
834            }
835            Token::Regex { pattern, flags } => {
836                let pat = pattern.clone();
837                let flg = flags.clone();
838                self.advance()?;
839                Ok(AstNode::Regex {
840                    pattern: pat,
841                    flags: flg,
842                })
843            }
844            Token::Identifier(name) => {
845                let name = name.clone();
846                self.advance()?;
847                Ok(AstNode::Path {
848                    steps: vec![PathStep::new(AstNode::Name(name))],
849                })
850            }
851            Token::Variable(name) => {
852                let name = name.clone();
853                self.advance()?;
854                Ok(AstNode::Variable(name))
855            }
856            Token::ParentVariable(name) => {
857                let name = name.clone();
858                self.advance()?;
859                Ok(AstNode::ParentVariable(name))
860            }
861            Token::LeftParen => {
862                self.advance()?; // skip '('
863
864                // Check for empty parentheses () which means undefined
865                if self.current_token == Token::RightParen {
866                    self.advance()?;
867                    return Ok(AstNode::Undefined);
868                }
869
870                // Parse block expressions (separated by semicolons)
871                // NOTE: Parentheses ALWAYS create a block in JSONata, even with a single expression.
872                // This is important for variable scoping - ( $x := value ) creates a new scope.
873                let mut expressions = vec![self.parse_expression(0)?];
874
875                while self.current_token == Token::Semicolon {
876                    self.advance()?;
877                    if self.current_token == Token::RightParen {
878                        break;
879                    }
880                    expressions.push(self.parse_expression(0)?);
881                }
882
883                self.expect(Token::RightParen)?;
884
885                // Always create a block, matching JavaScript implementation
886                Ok(AstNode::Block(expressions))
887            }
888            Token::LeftBracket => {
889                self.advance()?; // skip '['
890
891                let mut elements = Vec::new();
892
893                if self.current_token != Token::RightBracket {
894                    loop {
895                        let element = self.parse_expression(0)?;
896                        elements.push(element);
897
898                        if self.current_token != Token::Comma {
899                            break;
900                        }
901                        self.advance()?;
902                    }
903                }
904
905                self.expect(Token::RightBracket)?;
906                Ok(AstNode::Array(elements))
907            }
908            Token::LeftBrace => {
909                self.advance()?; // skip '{'
910
911                let mut pairs = Vec::new();
912
913                if self.current_token != Token::RightBrace {
914                    loop {
915                        let key = self.parse_expression(0)?;
916                        self.expect(Token::Colon)?;
917                        let value = self.parse_expression(0)?;
918                        pairs.push((key, value));
919
920                        if self.current_token != Token::Comma {
921                            break;
922                        }
923                        self.advance()?;
924                    }
925                }
926
927                self.expect(Token::RightBrace)?;
928                Ok(AstNode::Object(pairs))
929            }
930            Token::Pipe => {
931                // Transform operator: |location|update[,delete]|
932                self.advance()?; // skip first '|'
933
934                // Parse location expression
935                let location = self.parse_expression(0)?;
936
937                // Expect second '|'
938                self.expect(Token::Pipe)?;
939
940                // Parse update expression (object constructor)
941                let update = self.parse_expression(0)?;
942
943                // Check for optional delete part
944                let delete = if self.current_token == Token::Comma {
945                    self.advance()?; // skip comma
946                    Some(Box::new(self.parse_expression(0)?))
947                } else {
948                    None
949                };
950
951                // Expect final '|'
952                self.expect(Token::Pipe)?;
953
954                Ok(AstNode::Transform {
955                    location: Box::new(location),
956                    update: Box::new(update),
957                    delete,
958                })
959            }
960            Token::Minus => {
961                self.advance()?;
962                let operand = self.parse_expression(70)?; // High precedence for unary
963                Ok(AstNode::Unary {
964                    op: UnaryOp::Negate,
965                    operand: Box::new(operand),
966                })
967            }
968            Token::Star => {
969                // Wildcard operator in primary position
970                self.advance()?;
971                Ok(AstNode::Wildcard)
972            }
973            Token::StarStar => {
974                // Descendant operator in primary position
975                self.advance()?;
976                Ok(AstNode::Descendant)
977            }
978            Token::Function => {
979                // Parse lambda: function($param1, $param2, ...) { body }
980                self.advance()?; // skip 'function'
981                self.expect(Token::LeftParen)?;
982
983                // Parse parameters
984                let mut params = Vec::new();
985                if self.current_token != Token::RightParen {
986                    loop {
987                        match &self.current_token {
988                            Token::Variable(name) => {
989                                params.push(name.clone());
990                                self.advance()?;
991                            }
992                            _ => {
993                                return Err(ParserError::Expected {
994                                    expected: "parameter name".to_string(),
995                                    found: format!("{:?}", self.current_token),
996                                })
997                            }
998                        }
999
1000                        if self.current_token != Token::Comma {
1001                            break;
1002                        }
1003                        self.advance()?; // skip comma
1004                    }
1005                }
1006
1007                self.expect(Token::RightParen)?;
1008
1009                // Check for optional signature: <type-type:returntype>
1010                let signature = if self.current_token == Token::LessThan {
1011                    Some(self.parse_signature()?)
1012                } else {
1013                    None
1014                };
1015
1016                self.expect(Token::LeftBrace)?;
1017
1018                // Parse body
1019                let body = self.parse_expression(0)?;
1020
1021                self.expect(Token::RightBrace)?;
1022
1023                // Apply tail call optimization to the body
1024                let (optimized_body, is_thunk) = Self::tail_call_optimize(body);
1025
1026                Ok(AstNode::Lambda {
1027                    params,
1028                    body: Box::new(optimized_body),
1029                    signature,
1030                    thunk: is_thunk,
1031                })
1032            }
1033            _ => Err(ParserError::UnexpectedToken(format!(
1034                "{:?}",
1035                self.current_token
1036            ))),
1037        }
1038    }
1039
1040    /// Parse an expression with Pratt parsing
1041    fn parse_expression(&mut self, min_bp: u8) -> Result<AstNode, ParserError> {
1042        let mut lhs = self.parse_primary()?;
1043
1044        loop {
1045            // Check for end of expression
1046            if matches!(
1047                self.current_token,
1048                Token::Eof
1049                    | Token::RightParen
1050                    | Token::RightBracket
1051                    | Token::RightBrace
1052                    | Token::Comma
1053                    | Token::Semicolon
1054                    | Token::Colon
1055            ) {
1056                break;
1057            }
1058
1059            // Get binding power for current operator
1060            let (left_bp, right_bp) = match self.binding_power(&self.current_token) {
1061                Some(bp) => bp,
1062                None => break,
1063            };
1064
1065            if left_bp < min_bp {
1066                break;
1067            }
1068
1069            // Handle infix operators
1070            match &self.current_token {
1071                Token::Dot => {
1072                    self.advance()?;
1073
1074                    // Check for .[expr] syntax (array grouping)
1075                    if self.current_token == Token::LeftBracket {
1076                        self.advance()?;
1077
1078                        // Parse the array elements
1079                        let mut elements = Vec::new();
1080                        if self.current_token != Token::RightBracket {
1081                            loop {
1082                                elements.push(self.parse_expression(0)?);
1083                                if self.current_token != Token::Comma {
1084                                    break;
1085                                }
1086                                self.advance()?;
1087                            }
1088                        }
1089
1090                        self.expect(Token::RightBracket)?;
1091
1092                        // Create ArrayGroup node as a path step
1093                        let mut steps = match lhs {
1094                            AstNode::Path { steps } => steps,
1095                            _ => vec![PathStep::new(lhs)],
1096                        };
1097
1098                        steps.push(PathStep::new(AstNode::ArrayGroup(elements)));
1099                        lhs = AstNode::Path { steps };
1100                    } else if self.current_token == Token::LeftParen {
1101                        // Check for .(expr) syntax (function application)
1102                        self.advance()?;
1103
1104                        // Parse the expression(s) to apply - may be block with semicolons
1105                        let mut expressions = vec![self.parse_expression(0)?];
1106
1107                        while self.current_token == Token::Semicolon {
1108                            self.advance()?;
1109                            if self.current_token == Token::RightParen {
1110                                break;
1111                            }
1112                            expressions.push(self.parse_expression(0)?);
1113                        }
1114
1115                        self.expect(Token::RightParen)?;
1116
1117                        // Wrap in Block if multiple expressions, otherwise use single expression
1118                        let expr = if expressions.len() == 1 {
1119                            expressions.into_iter().next().unwrap()
1120                        } else {
1121                            AstNode::Block(expressions)
1122                        };
1123
1124                        // Create FunctionApplication node as a path step
1125                        let mut steps = match lhs {
1126                            AstNode::Path { steps } => steps,
1127                            _ => vec![PathStep::new(lhs)],
1128                        };
1129
1130                        steps.push(PathStep::new(AstNode::FunctionApplication(Box::new(expr))));
1131                        lhs = AstNode::Path { steps };
1132                    } else {
1133                        // Normal dot path
1134                        let rhs = self.parse_expression(right_bp)?;
1135
1136                        // Flatten path expressions
1137                        let mut steps = match lhs {
1138                            AstNode::Path { steps } => steps,
1139                            // Convert string literals to field names when used as first step in path
1140                            // e.g., "foo".bar should behave like foo.bar
1141                            AstNode::String(field_name) => {
1142                                vec![PathStep::new(AstNode::Name(field_name))]
1143                            }
1144                            _ => vec![PathStep::new(lhs)],
1145                        };
1146
1147                        // S0213: The literal value cannot be used as a step within a path expression
1148                        // Numbers, booleans (true/false), and null cannot be path steps
1149                        match &rhs {
1150                            AstNode::Number(n) => {
1151                                return Err(ParserError::InvalidSyntax(
1152                                    format!("S0213: The literal value {} cannot be used as a step within a path expression", n),
1153                                ));
1154                            }
1155                            AstNode::Boolean(b) => {
1156                                return Err(ParserError::InvalidSyntax(
1157                                    format!("S0213: The literal value {} cannot be used as a step within a path expression", b),
1158                                ));
1159                            }
1160                            AstNode::Null => {
1161                                return Err(ParserError::InvalidSyntax(
1162                                    "S0213: The literal value null cannot be used as a step within a path expression".to_string(),
1163                                ));
1164                            }
1165                            _ => {}
1166                        }
1167
1168                        match rhs {
1169                            AstNode::Path {
1170                                steps: mut rhs_steps,
1171                            } => {
1172                                steps.append(&mut rhs_steps);
1173                            }
1174                            // Convert string literals to field names when they appear after a dot
1175                            // e.g., $."Field.Name" should access a property named "Field.Name"
1176                            AstNode::String(field_name) => {
1177                                steps.push(PathStep::new(AstNode::Name(field_name)));
1178                            }
1179                            _ => steps.push(PathStep::new(rhs)),
1180                        }
1181
1182                        // Check for following predicates and attach as stages to the last step
1183                        // This implements JSONata semantics where foo.bar[0] has [0] apply during extraction
1184                        while self.current_token == Token::LeftBracket {
1185                            self.advance()?;
1186
1187                            let predicate_expr = if self.current_token == Token::RightBracket {
1188                                // Empty brackets []
1189                                Box::new(AstNode::Boolean(true))
1190                            } else {
1191                                // Normal predicate expression
1192                                let pred = self.parse_expression(0)?;
1193                                Box::new(pred)
1194                            };
1195
1196                            self.expect(Token::RightBracket)?;
1197
1198                            // Attach predicate as stage to the last step
1199                            if let Some(last_step) = steps.last_mut() {
1200                                last_step.stages.push(Stage::Filter(predicate_expr));
1201                            }
1202                        }
1203
1204                        lhs = AstNode::Path { steps };
1205                    }
1206                }
1207                Token::LeftBracket => {
1208                    // S0209: A predicate cannot follow a grouping expression in a step
1209                    // Check if lhs is an ObjectTransform (grouping expression)
1210                    if matches!(lhs, AstNode::ObjectTransform { .. }) {
1211                        return Err(ParserError::InvalidSyntax(
1212                            "S0209: A predicate cannot follow a grouping expression in a step"
1213                                .to_string(),
1214                        ));
1215                    }
1216
1217                    self.advance()?;
1218
1219                    // Predicates in postfix position are always separate steps
1220                    // Predicates as stages are only attached during DOT operator parsing
1221                    if self.current_token == Token::RightBracket {
1222                        // Empty brackets []
1223                        self.advance()?;
1224
1225                        let mut steps = match lhs {
1226                            AstNode::Path { steps } => steps,
1227                            _ => vec![PathStep::new(lhs)],
1228                        };
1229
1230                        steps.push(PathStep::new(AstNode::Predicate(Box::new(
1231                            AstNode::Boolean(true),
1232                        ))));
1233                        lhs = AstNode::Path { steps };
1234                    } else {
1235                        // Normal predicate
1236                        let predicate = self.parse_expression(0)?;
1237                        self.expect(Token::RightBracket)?;
1238
1239                        let mut steps = match lhs {
1240                            AstNode::Path { steps } => steps,
1241                            _ => vec![PathStep::new(lhs)],
1242                        };
1243
1244                        steps.push(PathStep::new(AstNode::Predicate(Box::new(predicate))));
1245                        lhs = AstNode::Path { steps };
1246                    }
1247                }
1248                Token::LeftParen => {
1249                    self.advance()?;
1250
1251                    let mut args = Vec::new();
1252
1253                    if self.current_token != Token::RightParen {
1254                        loop {
1255                            // Check for ? placeholder (partial application)
1256                            if self.current_token == Token::Question {
1257                                args.push(AstNode::Placeholder);
1258                                self.advance()?;
1259                            } else {
1260                                args.push(self.parse_expression(0)?);
1261                            }
1262
1263                            if self.current_token != Token::Comma {
1264                                break;
1265                            }
1266                            self.advance()?;
1267                        }
1268                    }
1269
1270                    self.expect(Token::RightParen)?;
1271
1272                    // Check if lhs is a lambda or callable expression
1273                    match lhs {
1274                        // Direct invocations: lambda(args), block(args), chained calls, function result calls
1275                        AstNode::Lambda { .. }
1276                        | AstNode::Block(_)
1277                        | AstNode::Call { .. }
1278                        | AstNode::Function { .. } => {
1279                            lhs = AstNode::Call {
1280                                procedure: Box::new(lhs),
1281                                args,
1282                            };
1283                        }
1284                        ref other_lhs => {
1285                            // Extract function name from lhs
1286                            match other_lhs {
1287                                // Handle bare function names: uppercase()
1288                                AstNode::Path { steps } if steps.len() == 1 => {
1289                                    let name = match &steps[0].node {
1290                                        AstNode::Name(s) => s.clone(),
1291                                        _ => {
1292                                            return Err(ParserError::InvalidSyntax(
1293                                                "Invalid function name".to_string(),
1294                                            ))
1295                                        }
1296                                    };
1297                                    lhs = AstNode::Function {
1298                                        name,
1299                                        args,
1300                                        is_builtin: false,
1301                                    };
1302                                }
1303                                // Handle path ending with $function: foo.bar.$lowercase(args)
1304                                AstNode::Path { steps } if steps.len() > 1 => {
1305                                    let last_step = &steps[steps.len() - 1].node;
1306
1307                                    // Check if last step is a Variable (function reference)
1308                                    if let AstNode::Variable(func_name) = last_step {
1309                                        // Extract all but the last step as the path context
1310                                        let mut context_steps = steps.clone();
1311                                        context_steps.pop();
1312
1313                                        // Create function call
1314                                        let func_call = AstNode::Function {
1315                                            name: func_name.clone(),
1316                                            args: args.clone(),
1317                                            is_builtin: true, // Variable means $ prefix
1318                                        };
1319
1320                                        // Append function application to the path
1321                                        context_steps.push(PathStep::new(
1322                                            AstNode::FunctionApplication(Box::new(func_call)),
1323                                        ));
1324
1325                                        lhs = AstNode::Path {
1326                                            steps: context_steps,
1327                                        };
1328                                    }
1329                                    // Check if last step is a Lambda (inline function in path)
1330                                    else if let AstNode::Lambda {
1331                                        params,
1332                                        body,
1333                                        signature,
1334                                        thunk,
1335                                    } = last_step
1336                                    {
1337                                        // Extract all but the last step as the path context
1338                                        let mut context_steps = steps.clone();
1339                                        context_steps.pop();
1340
1341                                        // In path context, determine if we need to prepend $
1342                                        // - If fewer args than params, prepend $ (context value) as first arg
1343                                        // - If args == params, use args as-is
1344                                        let full_args = if args.len() < params.len() {
1345                                            let mut new_args =
1346                                                vec![AstNode::Variable("$".to_string())];
1347                                            new_args.extend(args.clone());
1348                                            new_args
1349                                        } else {
1350                                            args.clone()
1351                                        };
1352
1353                                        // Create a lambda invocation block
1354                                        // ($__path_lambda := lambda; $__path_lambda(args...))
1355                                        let lambda_invocation = AstNode::Block(vec![
1356                                            AstNode::Binary {
1357                                                op: crate::ast::BinaryOp::ColonEqual,
1358                                                lhs: Box::new(AstNode::Variable(
1359                                                    "__path_lambda__".to_string(),
1360                                                )),
1361                                                rhs: Box::new(AstNode::Lambda {
1362                                                    params: params.clone(),
1363                                                    body: body.clone(),
1364                                                    signature: signature.clone(),
1365                                                    thunk: *thunk,
1366                                                }),
1367                                            },
1368                                            AstNode::Function {
1369                                                name: "__path_lambda__".to_string(),
1370                                                args: full_args,
1371                                                is_builtin: true,
1372                                            },
1373                                        ]);
1374
1375                                        // Append as function application to the path
1376                                        context_steps.push(PathStep::new(
1377                                            AstNode::FunctionApplication(Box::new(
1378                                                lambda_invocation,
1379                                            )),
1380                                        ));
1381
1382                                        lhs = AstNode::Path {
1383                                            steps: context_steps,
1384                                        };
1385                                    } else {
1386                                        return Err(ParserError::InvalidSyntax(
1387                                            "Invalid function call".to_string(),
1388                                        ));
1389                                    }
1390                                }
1391                                // Handle $-prefixed function names: $uppercase()
1392                                AstNode::Variable(name) => {
1393                                    lhs = AstNode::Function {
1394                                        name: name.clone(),
1395                                        args,
1396                                        is_builtin: true,
1397                                    };
1398                                }
1399                                _ => {
1400                                    return Err(ParserError::InvalidSyntax(
1401                                        "Invalid function call".to_string(),
1402                                    ))
1403                                }
1404                            };
1405                        }
1406                    }
1407                }
1408                Token::Question => {
1409                    self.advance()?;
1410                    let then_branch = self.parse_expression(0)?;
1411
1412                    let else_branch = if self.current_token == Token::Colon {
1413                        self.advance()?;
1414                        // Use 0 for right-associativity: a ? b : c ? d : e parses as a ? b : (c ? d : e)
1415                        Some(Box::new(self.parse_expression(0)?))
1416                    } else {
1417                        None
1418                    };
1419
1420                    lhs = AstNode::Conditional {
1421                        condition: Box::new(lhs),
1422                        then_branch: Box::new(then_branch),
1423                        else_branch,
1424                    };
1425                }
1426                Token::LeftBrace => {
1427                    // S0210: Each step can only have one grouping expression
1428                    // Check if lhs is already an ObjectTransform
1429                    if matches!(lhs, AstNode::ObjectTransform { .. }) {
1430                        return Err(ParserError::InvalidSyntax(
1431                            "S0210: Each step can only have one grouping expression".to_string(),
1432                        ));
1433                    }
1434
1435                    // Object constructor as postfix: expr{key: value}
1436                    self.advance()?; // skip '{'
1437
1438                    let mut pairs = Vec::new();
1439
1440                    if self.current_token != Token::RightBrace {
1441                        loop {
1442                            // Parse key expression - parse_expression handles identifiers correctly
1443                            let key = self.parse_expression(0)?;
1444
1445                            self.expect(Token::Colon)?;
1446
1447                            // Parse value expression - can be any expression including paths
1448                            let value = self.parse_expression(0)?;
1449
1450                            pairs.push((key, value));
1451
1452                            if self.current_token != Token::Comma {
1453                                break;
1454                            }
1455                            self.advance()?; // skip comma
1456                        }
1457                    }
1458
1459                    self.expect(Token::RightBrace)?;
1460
1461                    // Object constructor with input: lhs{k:v} means transform lhs using the object pattern
1462                    lhs = AstNode::ObjectTransform {
1463                        input: Box::new(lhs),
1464                        pattern: pairs,
1465                    };
1466                }
1467                Token::Hash => {
1468                    // Index binding operator: #$var
1469                    // Binds the current array index to the specified variable
1470                    self.advance()?; // skip '#'
1471
1472                    // Expect a variable name
1473                    let var_name = match &self.current_token {
1474                        Token::Variable(name) => name.clone(),
1475                        _ => {
1476                            return Err(ParserError::InvalidSyntax(
1477                                "Expected variable name after #".to_string(),
1478                            ));
1479                        }
1480                    };
1481                    self.advance()?; // skip variable
1482
1483                    lhs = AstNode::IndexBind {
1484                        input: Box::new(lhs),
1485                        variable: var_name,
1486                    };
1487                }
1488                Token::Caret => {
1489                    // Sort operator: ^(expr) or ^(<expr) or ^(>expr)
1490                    self.advance()?; // skip '^'
1491                    self.expect(Token::LeftParen)?;
1492
1493                    let mut terms = Vec::new();
1494
1495                    loop {
1496                        // Check for optional sort direction prefix
1497                        let ascending = match &self.current_token {
1498                            Token::LessThan => {
1499                                self.advance()?;
1500                                true
1501                            }
1502                            Token::GreaterThan => {
1503                                self.advance()?;
1504                                false
1505                            }
1506                            _ => true, // Default to ascending
1507                        };
1508
1509                        // Parse the sort expression
1510                        let expr = self.parse_expression(0)?;
1511                        terms.push((expr, ascending));
1512
1513                        // Check for more sort terms
1514                        if self.current_token != Token::Comma {
1515                            break;
1516                        }
1517                        self.advance()?; // skip comma
1518                    }
1519
1520                    self.expect(Token::RightParen)?;
1521
1522                    lhs = AstNode::Sort {
1523                        input: Box::new(lhs),
1524                        terms,
1525                    };
1526                }
1527                _ => {
1528                    // Binary operators
1529                    let op = match &self.current_token {
1530                        // Contextual keyword operators
1531                        Token::Identifier(name) if name == "and" => BinaryOp::And,
1532                        Token::Identifier(name) if name == "or" => BinaryOp::Or,
1533                        Token::Identifier(name) if name == "in" => BinaryOp::In,
1534                        // Regular operators
1535                        Token::Plus => BinaryOp::Add,
1536                        Token::Minus => BinaryOp::Subtract,
1537                        Token::Star => BinaryOp::Multiply,
1538                        Token::Slash => BinaryOp::Divide,
1539                        Token::Percent => BinaryOp::Modulo,
1540                        Token::Equal => BinaryOp::Equal,
1541                        Token::NotEqual => BinaryOp::NotEqual,
1542                        Token::LessThan => BinaryOp::LessThan,
1543                        Token::LessThanOrEqual => BinaryOp::LessThanOrEqual,
1544                        Token::GreaterThan => BinaryOp::GreaterThan,
1545                        Token::GreaterThanOrEqual => BinaryOp::GreaterThanOrEqual,
1546                        Token::And => BinaryOp::And,
1547                        Token::Or => BinaryOp::Or,
1548                        Token::In => BinaryOp::In,
1549                        Token::Ampersand => BinaryOp::Concatenate,
1550                        Token::DotDot => BinaryOp::Range,
1551                        Token::ColonEqual => BinaryOp::ColonEqual,
1552                        Token::QuestionQuestion => BinaryOp::Coalesce,
1553                        Token::QuestionColon => BinaryOp::Default,
1554                        Token::TildeArrow => BinaryOp::ChainPipe,
1555                        _ => {
1556                            return Err(ParserError::UnexpectedToken(format!(
1557                                "{:?}",
1558                                self.current_token
1559                            )))
1560                        }
1561                    };
1562
1563                    self.advance()?;
1564                    let rhs = self.parse_expression(right_bp)?;
1565
1566                    lhs = AstNode::Binary {
1567                        op,
1568                        lhs: Box::new(lhs),
1569                        rhs: Box::new(rhs),
1570                    };
1571                }
1572            }
1573        }
1574
1575        Ok(lhs)
1576    }
1577
1578    pub fn parse(&mut self) -> Result<AstNode, ParserError> {
1579        let ast = self.parse_expression(0)?;
1580
1581        if self.current_token != Token::Eof {
1582            return Err(ParserError::Expected {
1583                expected: "end of expression".to_string(),
1584                found: format!("{:?}", self.current_token),
1585            });
1586        }
1587
1588        Ok(ast)
1589    }
1590
1591    /// Analyze an expression for tail call optimization
1592    /// Returns (optimized_expr, is_thunk) where:
1593    /// - optimized_expr is the expression (unchanged)
1594    /// - is_thunk is true if the expression's tail position is a function call
1595    ///
1596    /// A tail position is where a function call's result is directly returned:
1597    /// - The body itself if it's a function call
1598    /// - Both branches of a conditional at tail position
1599    /// - The last expression of a block at tail position
1600    fn tail_call_optimize(expr: AstNode) -> (AstNode, bool) {
1601        let is_thunk = Self::is_tail_call(&expr);
1602        (expr, is_thunk)
1603    }
1604
1605    /// Check if an expression is in tail call position
1606    /// Returns true if the expression is a function call (or contains function calls in all tail positions)
1607    fn is_tail_call(expr: &AstNode) -> bool {
1608        match expr {
1609            // Direct function calls are tail calls
1610            AstNode::Function { .. } => true,
1611            AstNode::Call { .. } => true,
1612
1613            // Conditional: both branches must be tail calls (or at least one if only one branch)
1614            AstNode::Conditional {
1615                then_branch,
1616                else_branch,
1617                ..
1618            } => {
1619                let then_is_tail = Self::is_tail_call(then_branch);
1620                let else_is_tail = else_branch.as_ref().is_some_and(|e| Self::is_tail_call(e));
1621                // At least one branch should be a tail call for TCO to be useful
1622                then_is_tail || else_is_tail
1623            }
1624
1625            // Block: last expression is tail position
1626            AstNode::Block(exprs) => exprs.last().is_some_and(Self::is_tail_call),
1627
1628            // Variable binding with result: the result expression is tail position
1629            AstNode::Binary {
1630                op: BinaryOp::ColonEqual,
1631                rhs,
1632                ..
1633            } => {
1634                // The rhs (or next expression) could be tail position
1635                // But typically := is used for assignment within blocks
1636                // Check if rhs is a block (common pattern)
1637                Self::is_tail_call(rhs)
1638            }
1639
1640            // Anything else is not a tail call
1641            _ => false,
1642        }
1643    }
1644}
1645
1646/// Parse a JSONata expression string into an AST
1647///
1648/// This is the main entry point for parsing.
1649pub fn parse(expression: &str) -> Result<AstNode, ParserError> {
1650    let mut parser = Parser::new(expression.to_string())?;
1651    parser.parse()
1652}
1653
1654#[cfg(test)]
1655mod tests {
1656    use super::*;
1657
1658    // Lexer tests
1659    #[test]
1660    fn test_lexer_numbers() {
1661        let mut lexer = Lexer::new("42 3.14 -10 2.5e10 1E-5".to_string());
1662
1663        assert_eq!(lexer.next_token().unwrap(), Token::Number(42.0));
1664        assert_eq!(lexer.next_token().unwrap(), Token::Number(3.14));
1665        // Negation is handled as a unary operator, not part of the number literal
1666        assert_eq!(lexer.next_token().unwrap(), Token::Minus);
1667        assert_eq!(lexer.next_token().unwrap(), Token::Number(10.0));
1668        assert_eq!(lexer.next_token().unwrap(), Token::Number(2.5e10));
1669        assert_eq!(lexer.next_token().unwrap(), Token::Number(1e-5));
1670        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1671    }
1672
1673    #[test]
1674    fn test_lexer_strings() {
1675        let mut lexer = Lexer::new(r#""hello" 'world' "with\nnewline""#.to_string());
1676
1677        assert_eq!(
1678            lexer.next_token().unwrap(),
1679            Token::String("hello".to_string())
1680        );
1681        assert_eq!(
1682            lexer.next_token().unwrap(),
1683            Token::String("world".to_string())
1684        );
1685        assert_eq!(
1686            lexer.next_token().unwrap(),
1687            Token::String("with\nnewline".to_string())
1688        );
1689        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1690    }
1691
1692    #[test]
1693    fn test_lexer_string_escapes() {
1694        let mut lexer = Lexer::new(r#""a\"b\\c\/d""#.to_string());
1695        assert_eq!(
1696            lexer.next_token().unwrap(),
1697            Token::String("a\"b\\c/d".to_string())
1698        );
1699    }
1700
1701    #[test]
1702    fn test_lexer_keywords() {
1703        let mut lexer = Lexer::new("true false null and or in".to_string());
1704
1705        assert_eq!(lexer.next_token().unwrap(), Token::True);
1706        assert_eq!(lexer.next_token().unwrap(), Token::False);
1707        assert_eq!(lexer.next_token().unwrap(), Token::Null);
1708        // "and", "or", "in" are now contextual keywords - lexer emits them as identifiers
1709        assert_eq!(
1710            lexer.next_token().unwrap(),
1711            Token::Identifier("and".to_string())
1712        );
1713        assert_eq!(
1714            lexer.next_token().unwrap(),
1715            Token::Identifier("or".to_string())
1716        );
1717        assert_eq!(
1718            lexer.next_token().unwrap(),
1719            Token::Identifier("in".to_string())
1720        );
1721        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1722    }
1723
1724    #[test]
1725    fn test_lexer_identifiers() {
1726        let mut lexer = Lexer::new("foo bar_baz test123".to_string());
1727
1728        assert_eq!(
1729            lexer.next_token().unwrap(),
1730            Token::Identifier("foo".to_string())
1731        );
1732        assert_eq!(
1733            lexer.next_token().unwrap(),
1734            Token::Identifier("bar_baz".to_string())
1735        );
1736        assert_eq!(
1737            lexer.next_token().unwrap(),
1738            Token::Identifier("test123".to_string())
1739        );
1740        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1741    }
1742
1743    #[test]
1744    fn test_lexer_variables() {
1745        let mut lexer = Lexer::new("$var $foo_bar".to_string());
1746
1747        assert_eq!(
1748            lexer.next_token().unwrap(),
1749            Token::Variable("var".to_string())
1750        );
1751        assert_eq!(
1752            lexer.next_token().unwrap(),
1753            Token::Variable("foo_bar".to_string())
1754        );
1755        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1756    }
1757
1758    #[test]
1759    fn test_lexer_operators() {
1760        // Test non-slash operators first
1761        let mut lexer = Lexer::new("+ - * % = != < <= > >= & . .. := ".to_string());
1762
1763        assert_eq!(lexer.next_token().unwrap(), Token::Plus);
1764        assert_eq!(lexer.next_token().unwrap(), Token::Minus);
1765        assert_eq!(lexer.next_token().unwrap(), Token::Star);
1766        assert_eq!(lexer.next_token().unwrap(), Token::Percent);
1767        assert_eq!(lexer.next_token().unwrap(), Token::Equal);
1768        assert_eq!(lexer.next_token().unwrap(), Token::NotEqual);
1769        assert_eq!(lexer.next_token().unwrap(), Token::LessThan);
1770        assert_eq!(lexer.next_token().unwrap(), Token::LessThanOrEqual);
1771        assert_eq!(lexer.next_token().unwrap(), Token::GreaterThan);
1772        assert_eq!(lexer.next_token().unwrap(), Token::GreaterThanOrEqual);
1773        assert_eq!(lexer.next_token().unwrap(), Token::Ampersand);
1774        assert_eq!(lexer.next_token().unwrap(), Token::Dot);
1775        assert_eq!(lexer.next_token().unwrap(), Token::DotDot);
1776        assert_eq!(lexer.next_token().unwrap(), Token::ColonEqual);
1777        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1778
1779        // Slash is context-dependent: after a value token it's division, otherwise regex.
1780        // Test slash after a number (value context) to get Token::Slash.
1781        let mut lexer2 = Lexer::new("42 / 2".to_string());
1782        assert_eq!(lexer2.next_token().unwrap(), Token::Number(42.0));
1783        assert_eq!(lexer2.next_token().unwrap(), Token::Slash);
1784        assert_eq!(lexer2.next_token().unwrap(), Token::Number(2.0));
1785        assert_eq!(lexer2.next_token().unwrap(), Token::Eof);
1786    }
1787
1788    #[test]
1789    fn test_lexer_delimiters() {
1790        let mut lexer = Lexer::new("()[]{},:;?".to_string());
1791
1792        assert_eq!(lexer.next_token().unwrap(), Token::LeftParen);
1793        assert_eq!(lexer.next_token().unwrap(), Token::RightParen);
1794        assert_eq!(lexer.next_token().unwrap(), Token::LeftBracket);
1795        assert_eq!(lexer.next_token().unwrap(), Token::RightBracket);
1796        assert_eq!(lexer.next_token().unwrap(), Token::LeftBrace);
1797        assert_eq!(lexer.next_token().unwrap(), Token::RightBrace);
1798        assert_eq!(lexer.next_token().unwrap(), Token::Comma);
1799        assert_eq!(lexer.next_token().unwrap(), Token::Colon);
1800        assert_eq!(lexer.next_token().unwrap(), Token::Semicolon);
1801        assert_eq!(lexer.next_token().unwrap(), Token::Question);
1802        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1803    }
1804
1805    #[test]
1806    fn test_empty_brackets() {
1807        let mut parser = Parser::new("foo[]".to_string()).unwrap();
1808        let ast = parser.parse().unwrap();
1809
1810        // Should create a Path with two steps: Name("foo") and Predicate(Boolean(true))
1811        if let AstNode::Path { steps } = ast {
1812            assert_eq!(steps.len(), 2);
1813            assert!(matches!(steps[0].node, AstNode::Name(ref s) if s == "foo"));
1814            if let AstNode::Predicate(pred) = &steps[1].node {
1815                assert!(matches!(**pred, AstNode::Boolean(true)));
1816            } else {
1817                panic!("Expected Predicate as second step, got {:?}", steps[1].node);
1818            }
1819        } else {
1820            panic!("Expected Path, got {:?}", ast);
1821        }
1822    }
1823
1824    #[test]
1825    fn test_lexer_comments() {
1826        let mut lexer = Lexer::new("foo /* comment */ bar".to_string());
1827
1828        assert_eq!(
1829            lexer.next_token().unwrap(),
1830            Token::Identifier("foo".to_string())
1831        );
1832        assert_eq!(
1833            lexer.next_token().unwrap(),
1834            Token::Identifier("bar".to_string())
1835        );
1836        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1837    }
1838
1839    #[test]
1840    fn test_lexer_backtick_names() {
1841        let mut lexer = Lexer::new("`field name` `with-dash`".to_string());
1842
1843        assert_eq!(
1844            lexer.next_token().unwrap(),
1845            Token::Identifier("field name".to_string())
1846        );
1847        assert_eq!(
1848            lexer.next_token().unwrap(),
1849            Token::Identifier("with-dash".to_string())
1850        );
1851        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1852    }
1853
1854    // Parser tests
1855    #[test]
1856    fn test_parse_number() {
1857        let ast = parse("42").unwrap();
1858        assert_eq!(ast, AstNode::Number(42.0));
1859    }
1860
1861    #[test]
1862    fn test_parse_string() {
1863        let ast = parse(r#""hello""#).unwrap();
1864        assert_eq!(ast, AstNode::String("hello".to_string()));
1865    }
1866
1867    #[test]
1868    fn test_parse_boolean() {
1869        let ast = parse("true").unwrap();
1870        assert_eq!(ast, AstNode::Boolean(true));
1871
1872        let ast = parse("false").unwrap();
1873        assert_eq!(ast, AstNode::Boolean(false));
1874    }
1875
1876    #[test]
1877    fn test_parse_null() {
1878        let ast = parse("null").unwrap();
1879        assert_eq!(ast, AstNode::Null);
1880    }
1881
1882    #[test]
1883    fn test_parse_variable() {
1884        let ast = parse("$var").unwrap();
1885        assert_eq!(ast, AstNode::Variable("var".to_string()));
1886    }
1887
1888    #[test]
1889    fn test_parse_identifier() {
1890        let ast = parse("foo").unwrap();
1891        assert_eq!(
1892            ast,
1893            AstNode::Path {
1894                steps: vec![PathStep::new(AstNode::Name("foo".to_string()))]
1895            }
1896        );
1897    }
1898
1899    #[test]
1900    fn test_parse_addition() {
1901        let ast = parse("1 + 2").unwrap();
1902        match ast {
1903            AstNode::Binary { op, lhs, rhs } => {
1904                assert_eq!(op, BinaryOp::Add);
1905                assert_eq!(*lhs, AstNode::Number(1.0));
1906                assert_eq!(*rhs, AstNode::Number(2.0));
1907            }
1908            _ => panic!("Expected Binary node"),
1909        }
1910    }
1911
1912    #[test]
1913    fn test_parse_precedence() {
1914        // 1 + 2 * 3 should parse as 1 + (2 * 3)
1915        let ast = parse("1 + 2 * 3").unwrap();
1916        match ast {
1917            AstNode::Binary {
1918                op: BinaryOp::Add,
1919                lhs,
1920                rhs,
1921            } => {
1922                assert_eq!(*lhs, AstNode::Number(1.0));
1923                match *rhs {
1924                    AstNode::Binary {
1925                        op: BinaryOp::Multiply,
1926                        lhs,
1927                        rhs,
1928                    } => {
1929                        assert_eq!(*lhs, AstNode::Number(2.0));
1930                        assert_eq!(*rhs, AstNode::Number(3.0));
1931                    }
1932                    _ => panic!("Expected Binary node for multiplication"),
1933                }
1934            }
1935            _ => panic!("Expected Binary node for addition"),
1936        }
1937    }
1938
1939    #[test]
1940    fn test_parse_parentheses() {
1941        // (1 + 2) * 3 should parse as Block([1 + 2]) * 3
1942        // Parenthesized expressions always create a Block in JSONata
1943        let ast = parse("(1 + 2) * 3").unwrap();
1944        match ast {
1945            AstNode::Binary {
1946                op: BinaryOp::Multiply,
1947                lhs,
1948                rhs,
1949            } => {
1950                match *lhs {
1951                    AstNode::Block(ref exprs) => {
1952                        assert_eq!(exprs.len(), 1);
1953                        match &exprs[0] {
1954                            AstNode::Binary {
1955                                op: BinaryOp::Add,
1956                                lhs,
1957                                rhs,
1958                            } => {
1959                                assert_eq!(**lhs, AstNode::Number(1.0));
1960                                assert_eq!(**rhs, AstNode::Number(2.0));
1961                            }
1962                            _ => panic!("Expected Binary node for addition inside block"),
1963                        }
1964                    }
1965                    _ => panic!(
1966                        "Expected Block node for parenthesized expression, got {:?}",
1967                        lhs
1968                    ),
1969                }
1970                assert_eq!(*rhs, AstNode::Number(3.0));
1971            }
1972            _ => panic!("Expected Binary node for multiplication"),
1973        }
1974    }
1975
1976    #[test]
1977    fn test_parse_array() {
1978        let ast = parse("[1, 2, 3]").unwrap();
1979        match ast {
1980            AstNode::Array(elements) => {
1981                assert_eq!(elements.len(), 3);
1982                assert_eq!(elements[0], AstNode::Number(1.0));
1983                assert_eq!(elements[1], AstNode::Number(2.0));
1984                assert_eq!(elements[2], AstNode::Number(3.0));
1985            }
1986            _ => panic!("Expected Array node"),
1987        }
1988    }
1989
1990    #[test]
1991    fn test_parse_object() {
1992        let ast = parse(r#"{"a": 1, "b": 2}"#).unwrap();
1993        match ast {
1994            AstNode::Object(pairs) => {
1995                assert_eq!(pairs.len(), 2);
1996                assert_eq!(pairs[0].0, AstNode::String("a".to_string()));
1997                assert_eq!(pairs[0].1, AstNode::Number(1.0));
1998                assert_eq!(pairs[1].0, AstNode::String("b".to_string()));
1999                assert_eq!(pairs[1].1, AstNode::Number(2.0));
2000            }
2001            _ => panic!("Expected Object node"),
2002        }
2003    }
2004
2005    #[test]
2006    fn test_parse_path() {
2007        let ast = parse("foo.bar").unwrap();
2008        match ast {
2009            AstNode::Path { steps } => {
2010                assert_eq!(steps.len(), 2);
2011                assert_eq!(steps[0].node, AstNode::Name("foo".to_string()));
2012                assert_eq!(steps[1].node, AstNode::Name("bar".to_string()));
2013            }
2014            _ => panic!("Expected Path node"),
2015        }
2016    }
2017
2018    #[test]
2019    fn test_parse_function_call() {
2020        let ast = parse("sum(1, 2, 3)").unwrap();
2021        match ast {
2022            AstNode::Function {
2023                name,
2024                args,
2025                is_builtin,
2026            } => {
2027                assert_eq!(name, "sum");
2028                assert_eq!(args.len(), 3);
2029                assert_eq!(args[0], AstNode::Number(1.0));
2030                assert_eq!(args[1], AstNode::Number(2.0));
2031                assert_eq!(args[2], AstNode::Number(3.0));
2032                assert!(!is_builtin); // Bare function call (no $ prefix)
2033            }
2034            _ => panic!("Expected Function node"),
2035        }
2036    }
2037
2038    #[test]
2039    fn test_parse_conditional() {
2040        let ast = parse("x > 0 ? 1 : -1").unwrap();
2041        match ast {
2042            AstNode::Conditional {
2043                condition,
2044                then_branch,
2045                else_branch,
2046            } => {
2047                assert!(matches!(*condition, AstNode::Binary { .. }));
2048                assert_eq!(*then_branch, AstNode::Number(1.0));
2049                // Negative numbers are parsed as Unary { Negate, Number(1.0) }
2050                assert_eq!(
2051                    else_branch,
2052                    Some(Box::new(AstNode::Unary {
2053                        op: UnaryOp::Negate,
2054                        operand: Box::new(AstNode::Number(1.0)),
2055                    }))
2056                );
2057            }
2058            _ => panic!("Expected Conditional node"),
2059        }
2060    }
2061
2062    #[test]
2063    fn test_parse_comparison() {
2064        let ast = parse("x < 10").unwrap();
2065        match ast {
2066            AstNode::Binary { op, .. } => {
2067                assert_eq!(op, BinaryOp::LessThan);
2068            }
2069            _ => panic!("Expected Binary node"),
2070        }
2071    }
2072
2073    #[test]
2074    fn test_parse_logical_and() {
2075        let ast = parse("true and false").unwrap();
2076        match ast {
2077            AstNode::Binary { op, lhs, rhs } => {
2078                assert_eq!(op, BinaryOp::And);
2079                assert_eq!(*lhs, AstNode::Boolean(true));
2080                assert_eq!(*rhs, AstNode::Boolean(false));
2081            }
2082            _ => panic!("Expected Binary node"),
2083        }
2084    }
2085
2086    #[test]
2087    fn test_parse_string_concatenation() {
2088        let ast = parse(r#""hello" & " " & "world""#).unwrap();
2089        match ast {
2090            AstNode::Binary { op, .. } => {
2091                assert_eq!(op, BinaryOp::Concatenate);
2092            }
2093            _ => panic!("Expected Binary node"),
2094        }
2095    }
2096
2097    #[test]
2098    fn test_parse_unary_minus() {
2099        // Negative numbers are parsed as Unary { Negate, Number(5.0) }
2100        let ast = parse("-5").unwrap();
2101        assert_eq!(
2102            ast,
2103            AstNode::Unary {
2104                op: UnaryOp::Negate,
2105                operand: Box::new(AstNode::Number(5.0)),
2106            }
2107        );
2108    }
2109
2110    #[test]
2111    fn test_parse_block() {
2112        let ast = parse("(1; 2; 3)").unwrap();
2113        match ast {
2114            AstNode::Block(expressions) => {
2115                assert_eq!(expressions.len(), 3);
2116                assert_eq!(expressions[0], AstNode::Number(1.0));
2117                assert_eq!(expressions[1], AstNode::Number(2.0));
2118                assert_eq!(expressions[2], AstNode::Number(3.0));
2119            }
2120            _ => panic!("Expected Block node"),
2121        }
2122    }
2123
2124    #[test]
2125    fn test_parse_complex_expression() {
2126        // Test a more complex expression
2127        let ast = parse("(a + b) * c.d").unwrap();
2128        assert!(matches!(ast, AstNode::Binary { .. }));
2129    }
2130
2131    #[test]
2132    fn test_parse_dollar_function_call() {
2133        // Test $uppercase function
2134        let ast = parse(r#"$uppercase("hello")"#).unwrap();
2135        match ast {
2136            AstNode::Function {
2137                name,
2138                args,
2139                is_builtin,
2140            } => {
2141                assert_eq!(name, "uppercase");
2142                assert_eq!(args.len(), 1);
2143                assert_eq!(args[0], AstNode::String("hello".to_string()));
2144                assert!(is_builtin); // $ prefix means builtin
2145            }
2146            _ => panic!("Expected Function node"),
2147        }
2148
2149        // Test $sum function
2150        let ast = parse("$sum([1, 2, 3])").unwrap();
2151        match ast {
2152            AstNode::Function {
2153                name,
2154                args,
2155                is_builtin,
2156            } => {
2157                assert_eq!(name, "sum");
2158                assert_eq!(args.len(), 1);
2159                assert!(is_builtin); // $ prefix means builtin
2160            }
2161            _ => panic!("Expected Function node"),
2162        }
2163    }
2164
2165    #[test]
2166    fn test_parse_nested_dollar_functions() {
2167        // Test nested $function calls
2168        let ast = parse(r#"$length($lowercase("HELLO"))"#).unwrap();
2169        match ast {
2170            AstNode::Function {
2171                name,
2172                args,
2173                is_builtin,
2174            } => {
2175                assert_eq!(name, "length");
2176                assert_eq!(args.len(), 1);
2177                assert!(is_builtin);
2178                // Check nested function
2179                match &args[0] {
2180                    AstNode::Function {
2181                        name: inner_name,
2182                        is_builtin: inner_builtin,
2183                        ..
2184                    } => {
2185                        assert_eq!(inner_name, "lowercase");
2186                        assert!(inner_builtin);
2187                    }
2188                    _ => panic!("Expected nested Function node"),
2189                }
2190            }
2191            _ => panic!("Expected Function node"),
2192        }
2193    }
2194}