1#![allow(clippy::approx_constant)]
5
6use crate::ast::{AstNode, BinaryOp, PathStep, Stage, UnaryOp};
7use thiserror::Error;
8
9#[derive(Error, Debug)]
11pub enum ParserError {
12 #[error("Unexpected token: {0}")]
13 UnexpectedToken(String),
14
15 #[error("Unexpected end of expression")]
16 UnexpectedEnd,
17
18 #[error("Invalid syntax: {0}")]
19 InvalidSyntax(String),
20
21 #[error("Invalid number: {0}")]
22 InvalidNumber(String),
23
24 #[error("Unclosed string literal")]
25 UnclosedString,
26
27 #[error("Invalid escape sequence: {0}")]
28 InvalidEscape(String),
29
30 #[error("Unclosed comment")]
31 UnclosedComment,
32
33 #[error("Unclosed backtick name")]
34 UnclosedBacktick,
35
36 #[error("Expected {expected}, found {found}")]
37 Expected { expected: String, found: String },
38}
39
40#[derive(Debug, Clone, PartialEq)]
42pub enum Token {
43 String(String),
45 Number(f64),
46 True,
47 False,
48 Null,
49 Undefined, Regex { pattern: String, flags: String }, Identifier(String),
54 Variable(String),
55 ParentVariable(String), Function, Plus,
60 Minus,
61 Star,
62 StarStar, Slash,
64 Percent,
65 Equal,
66 NotEqual,
67 LessThan,
68 LessThanOrEqual,
69 GreaterThan,
70 GreaterThanOrEqual,
71 And,
72 Or,
73 In,
74 Ampersand,
75 Dot,
76 DotDot,
77 Question,
78 QuestionQuestion, QuestionColon, Colon,
81 ColonEqual, TildeArrow, LeftParen,
86 RightParen,
87 LeftBracket,
88 RightBracket,
89 LeftBrace,
90 RightBrace,
91 Comma,
92 Semicolon,
93 Caret, Pipe, Hash, Eof,
99}
100
101pub struct Lexer {
103 input: Vec<char>,
104 position: usize,
105 last_token: Option<Token>,
106}
107
108impl Lexer {
109 pub fn new(input: String) -> Self {
110 Lexer {
111 input: input.chars().collect(),
112 position: 0,
113 last_token: None,
114 }
115 }
116
117 fn current(&self) -> Option<char> {
118 self.input.get(self.position).copied()
119 }
120
121 fn peek(&self, offset: usize) -> Option<char> {
122 self.input.get(self.position + offset).copied()
123 }
124
125 fn advance(&mut self) {
126 if self.position < self.input.len() {
127 self.position += 1;
128 }
129 }
130
131 fn skip_whitespace(&mut self) {
132 while self.current().is_some_and(|ch| ch.is_whitespace()) {
133 self.advance();
134 }
135 }
136
137 fn skip_comment(&mut self) -> Result<(), ParserError> {
138 if self.peek(1) == Some('*') {
140 self.advance(); self.advance(); loop {
145 match self.current() {
146 None => return Err(ParserError::UnclosedComment),
147 Some('*') if self.peek(1) == Some('/') => {
148 self.advance(); self.advance(); break;
151 }
152 Some(_) => self.advance(),
153 }
154 }
155 }
156 Ok(())
157 }
158
159 fn read_string(&mut self, quote_char: char) -> Result<String, ParserError> {
160 let mut result = String::new();
161 self.advance(); loop {
164 match self.current() {
165 None => return Err(ParserError::UnclosedString),
166 Some(ch) if ch == quote_char => {
167 self.advance(); return Ok(result);
169 }
170 Some('\\') => {
171 self.advance();
172 match self.current() {
173 None => return Err(ParserError::UnclosedString),
174 Some('"') => result.push('"'),
175 Some('\\') => result.push('\\'),
176 Some('/') => result.push('/'),
177 Some('b') => result.push('\u{0008}'),
178 Some('f') => result.push('\u{000C}'),
179 Some('n') => result.push('\n'),
180 Some('r') => result.push('\r'),
181 Some('t') => result.push('\t'),
182 Some('u') => {
183 self.advance();
185 let mut hex = String::new();
186 for _ in 0..4 {
187 match self.current() {
188 Some(h) if h.is_ascii_hexdigit() => {
189 hex.push(h);
190 self.advance();
191 }
192 _ => {
193 return Err(ParserError::InvalidEscape(format!(
194 "\\u{}",
195 hex
196 )))
197 }
198 }
199 }
200 let code = u32::from_str_radix(&hex, 16).unwrap();
201 if (0xD800..=0xDBFF).contains(&code) {
202 if self.current() == Some('\\') {
204 self.advance();
205 if self.current() == Some('u') {
206 self.advance();
207 let mut low_hex = String::new();
208 for _ in 0..4 {
209 match self.current() {
210 Some(h) if h.is_ascii_hexdigit() => {
211 low_hex.push(h);
212 self.advance();
213 }
214 _ => {
215 return Err(ParserError::InvalidEscape(
216 format!("\\u{}", low_hex),
217 ))
218 }
219 }
220 }
221 let low = u32::from_str_radix(&low_hex, 16).unwrap();
222 if (0xDC00..=0xDFFF).contains(&low) {
223 let cp =
224 0x10000 + (code - 0xD800) * 0x400 + (low - 0xDC00);
225 if let Some(ch) = char::from_u32(cp) {
226 result.push(ch);
227 } else {
228 return Err(ParserError::InvalidEscape(format!(
229 "\\u{}\\u{}",
230 hex, low_hex
231 )));
232 }
233 } else {
234 return Err(ParserError::InvalidEscape(format!(
235 "\\u{}\\u{}",
236 hex, low_hex
237 )));
238 }
239 } else {
240 return Err(ParserError::InvalidEscape(format!(
241 "\\u{}",
242 hex
243 )));
244 }
245 } else {
246 return Err(ParserError::InvalidEscape(format!("\\u{}", hex)));
247 }
248 } else if let Some(ch) = char::from_u32(code) {
249 result.push(ch);
250 } else {
251 return Err(ParserError::InvalidEscape(format!("\\u{}", hex)));
252 }
253 continue; }
255 Some(ch) => return Err(ParserError::InvalidEscape(format!("\\{}", ch))),
256 }
257 self.advance();
258 }
259 Some(ch) => {
260 result.push(ch);
261 self.advance();
262 }
263 }
264 }
265 }
266
267 fn read_number(&mut self) -> Result<f64, ParserError> {
268 let start = self.position;
269
270 if self.current() == Some('0') {
272 self.advance();
273 } else if self.current().is_some_and(|c| c.is_ascii_digit()) {
274 while self.current().is_some_and(|c| c.is_ascii_digit()) {
275 self.advance();
276 }
277 } else {
278 return Err(ParserError::InvalidNumber("Expected digit".to_string()));
279 }
280
281 if self.current() == Some('.') && self.peek(1) != Some('.') {
283 self.advance();
285 if !self.current().is_some_and(|c| c.is_ascii_digit()) {
286 return Err(ParserError::InvalidNumber(
287 "Expected digit after decimal point".to_string(),
288 ));
289 }
290 while self.current().is_some_and(|c| c.is_ascii_digit()) {
291 self.advance();
292 }
293 }
294
295 if matches!(self.current(), Some('e') | Some('E')) {
297 self.advance();
298 if matches!(self.current(), Some('+') | Some('-')) {
299 self.advance();
300 }
301 if !self.current().is_some_and(|c| c.is_ascii_digit()) {
302 return Err(ParserError::InvalidNumber(
303 "Expected digit in exponent".to_string(),
304 ));
305 }
306 while self.current().is_some_and(|c| c.is_ascii_digit()) {
307 self.advance();
308 }
309 }
310
311 let num_str: String = self.input[start..self.position].iter().collect();
312 let num: f64 = num_str
313 .parse()
314 .map_err(|_| ParserError::InvalidNumber(num_str.clone()))?;
315
316 if num.is_infinite() {
318 return Err(ParserError::InvalidNumber(format!(
319 "S0102: Number out of range: {}",
320 num_str
321 )));
322 }
323
324 Ok(num)
325 }
326
327 fn read_identifier(&mut self) -> String {
328 let start = self.position;
329
330 while let Some(ch) = self.current() {
331 if ch.is_alphanumeric() || ch == '_' {
333 self.advance();
334 } else {
335 break;
336 }
337 }
338
339 self.input[start..self.position].iter().collect()
340 }
341
342 fn read_backtick_name(&mut self) -> Result<String, ParserError> {
343 self.advance(); let start = self.position;
345
346 while let Some(ch) = self.current() {
347 if ch == '`' {
348 let name: String = self.input[start..self.position].iter().collect();
349 self.advance(); return Ok(name);
351 }
352 self.advance();
353 }
354
355 Err(ParserError::UnclosedBacktick)
356 }
357
358 fn read_regex(&mut self) -> Result<Token, ParserError> {
359 self.advance(); let mut pattern = String::new();
363 let mut escaped = false;
364
365 loop {
366 match self.current() {
367 None => return Err(ParserError::UnclosedString),
368 Some('/') if !escaped => {
369 self.advance(); break;
371 }
372 Some('\\') if !escaped => {
373 escaped = true;
374 pattern.push('\\');
375 self.advance();
376 }
377 Some(ch) => {
378 escaped = false;
379 pattern.push(ch);
380 self.advance();
381 }
382 }
383 }
384
385 let mut flags = String::new();
387 while let Some(ch) = self.current() {
388 if ch.is_alphabetic() {
389 flags.push(ch);
390 self.advance();
391 } else {
392 break;
393 }
394 }
395
396 Ok(Token::Regex { pattern, flags })
397 }
398
399 fn emit_token(&mut self, token: Token) -> Result<Token, ParserError> {
400 self.last_token = Some(token.clone());
401 Ok(token)
402 }
403
404 pub fn next_token(&mut self) -> Result<Token, ParserError> {
405 loop {
406 self.skip_whitespace();
407
408 match self.current() {
409 None => return Ok(Token::Eof),
410
411 Some('/') if self.peek(1) == Some('*') => {
413 self.skip_comment()?;
414 continue; }
416
417 Some('"') => {
419 let s = self.read_string('"')?;
420 return self.emit_token(Token::String(s));
421 }
422 Some('\'') => {
423 let s = self.read_string('\'')?;
424 return self.emit_token(Token::String(s));
425 }
426
427 Some('`') => {
429 let name = self.read_backtick_name()?;
430 return self.emit_token(Token::Identifier(name));
431 }
432
433 Some(ch) if ch.is_ascii_digit() => {
435 let num = self.read_number()?;
436 return self.emit_token(Token::Number(num));
437 }
438
439 Some('$') if self.peek(1) == Some('$') => {
441 self.advance(); self.advance(); let name = self.read_identifier();
445 return self.emit_token(Token::ParentVariable(name));
446 }
447 Some('$') => {
448 self.advance();
449 let name = self.read_identifier();
450 return self.emit_token(Token::Variable(name));
451 }
452
453 Some('.') if self.peek(1) == Some('.') => {
455 self.advance();
456 self.advance();
457 return Ok(Token::DotDot);
458 }
459 Some(':') if self.peek(1) == Some('=') => {
460 self.advance();
461 self.advance();
462 return Ok(Token::ColonEqual);
463 }
464 Some('!') if self.peek(1) == Some('=') => {
465 self.advance();
466 self.advance();
467 return Ok(Token::NotEqual);
468 }
469 Some('>') if self.peek(1) == Some('=') => {
470 self.advance();
471 self.advance();
472 return Ok(Token::GreaterThanOrEqual);
473 }
474 Some('<') if self.peek(1) == Some('=') => {
475 self.advance();
476 self.advance();
477 return Ok(Token::LessThanOrEqual);
478 }
479 Some('~') if self.peek(1) == Some('>') => {
480 self.advance();
481 self.advance();
482 return self.emit_token(Token::TildeArrow);
483 }
484
485 Some('(') => {
487 self.advance();
488 return Ok(Token::LeftParen);
489 }
490 Some(')') => {
491 self.advance();
492 return self.emit_token(Token::RightParen);
493 }
494 Some('[') => {
495 self.advance();
496 return Ok(Token::LeftBracket);
497 }
498 Some(']') => {
499 self.advance();
500 return self.emit_token(Token::RightBracket);
501 }
502 Some('{') => {
503 self.advance();
504 return Ok(Token::LeftBrace);
505 }
506 Some('}') => {
507 self.advance();
508 return self.emit_token(Token::RightBrace);
509 }
510 Some(',') => {
511 self.advance();
512 return self.emit_token(Token::Comma);
513 }
514 Some(';') => {
515 self.advance();
516 return self.emit_token(Token::Semicolon);
517 }
518 Some(':') => {
519 self.advance();
520 return Ok(Token::Colon);
521 }
522 Some('?') if self.peek(1) == Some('?') => {
523 self.advance();
524 self.advance();
525 return Ok(Token::QuestionQuestion);
526 }
527 Some('?') if self.peek(1) == Some(':') => {
528 self.advance();
529 self.advance();
530 return Ok(Token::QuestionColon);
531 }
532 Some('?') => {
533 self.advance();
534 return Ok(Token::Question);
535 }
536 Some('λ') => {
537 self.advance();
539 return Ok(Token::Function);
540 }
541 Some('.') => {
542 self.advance();
543 return Ok(Token::Dot);
544 }
545 Some('+') => {
546 self.advance();
547 return Ok(Token::Plus);
548 }
549 Some('-') => {
550 self.advance();
551 return Ok(Token::Minus);
552 }
553 Some('*') if self.peek(1) == Some('*') => {
554 self.advance();
555 self.advance();
556 return Ok(Token::StarStar);
557 }
558 Some('*') => {
559 self.advance();
560 return Ok(Token::Star);
561 }
562 Some('/') => {
563 let is_regex = match &self.last_token {
572 None => true, Some(Token::LeftParen)
574 | Some(Token::LeftBracket)
575 | Some(Token::LeftBrace) => true,
576 Some(Token::Comma) | Some(Token::Semicolon) | Some(Token::Colon) => true,
577 Some(Token::Equal) | Some(Token::NotEqual) => true,
578 Some(Token::LessThan) | Some(Token::LessThanOrEqual) => true,
579 Some(Token::GreaterThan) | Some(Token::GreaterThanOrEqual) => true,
580 Some(Token::Plus) | Some(Token::Minus) | Some(Token::Star)
581 | Some(Token::Percent) => true,
582 Some(Token::Ampersand)
583 | Some(Token::Question)
584 | Some(Token::TildeArrow) => true,
585 Some(Token::ColonEqual)
586 | Some(Token::QuestionQuestion)
587 | Some(Token::QuestionColon) => true,
588 Some(Token::And) | Some(Token::Or) | Some(Token::In) => true,
589 Some(Token::Function) => true,
590 Some(Token::Identifier(s)) if s == "and" || s == "or" || s == "in" => true,
591 _ => false, };
593
594 if is_regex {
595 let tok = self.read_regex()?;
596 return self.emit_token(tok);
597 } else {
598 self.advance();
599 return self.emit_token(Token::Slash);
600 }
601 }
602 Some('%') => {
603 self.advance();
604 return Ok(Token::Percent);
605 }
606 Some('^') => {
607 self.advance();
608 return Ok(Token::Caret);
609 }
610 Some('#') => {
611 self.advance();
612 return Ok(Token::Hash);
613 }
614 Some('=') => {
615 self.advance();
616 return Ok(Token::Equal);
617 }
618 Some('<') => {
619 self.advance();
620 return Ok(Token::LessThan);
621 }
622 Some('>') => {
623 self.advance();
624 return Ok(Token::GreaterThan);
625 }
626 Some('&') => {
627 self.advance();
628 return Ok(Token::Ampersand);
629 }
630 Some('|') => {
631 self.advance();
632 return Ok(Token::Pipe);
633 }
634
635 Some(ch) if ch.is_alphabetic() || ch == '_' => {
637 let ident = self.read_identifier();
638 let tok = match ident.as_str() {
639 "true" => Token::True,
640 "false" => Token::False,
641 "null" => Token::Null,
642 "undefined" => Token::Undefined,
643 "function" => Token::Function,
644 _ => Token::Identifier(ident),
646 };
647 return self.emit_token(tok);
648 }
649
650 Some(ch) => {
651 return Err(ParserError::UnexpectedToken(ch.to_string()));
652 }
653 }
654 }
655 }
656}
657
658pub struct Parser {
660 lexer: Lexer,
661 current_token: Token,
662}
663
664impl Parser {
665 pub fn new(input: String) -> Result<Self, ParserError> {
666 let mut lexer = Lexer::new(input);
667 let current_token = lexer.next_token()?;
668 Ok(Parser {
669 lexer,
670 current_token,
671 })
672 }
673
674 fn advance(&mut self) -> Result<(), ParserError> {
675 self.current_token = self.lexer.next_token()?;
676 Ok(())
677 }
678
679 fn expect(&mut self, expected: Token) -> Result<(), ParserError> {
680 if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
681 self.advance()?;
682 Ok(())
683 } else {
684 Err(ParserError::Expected {
685 expected: format!("{:?}", expected),
686 found: format!("{:?}", self.current_token),
687 })
688 }
689 }
690
691 fn binding_power(&self, token: &Token) -> Option<(u8, u8)> {
693 match token {
696 Token::Identifier(name) if name == "or" => Some((25, 26)),
698 Token::Identifier(name) if name == "and" => Some((30, 31)),
699 Token::Identifier(name) if name == "in" => Some((40, 41)),
700 Token::Or => Some((25, 26)),
702 Token::And => Some((30, 31)),
703 Token::Equal
704 | Token::NotEqual
705 | Token::LessThan
706 | Token::LessThanOrEqual
707 | Token::GreaterThan
708 | Token::GreaterThanOrEqual
709 | Token::In => Some((40, 41)),
710 Token::Ampersand => Some((50, 51)),
711 Token::Plus | Token::Minus => Some((50, 51)),
712 Token::Star | Token::Slash | Token::Percent => Some((60, 61)),
713 Token::Dot => Some((75, 85)), Token::LeftBracket => Some((80, 81)),
715 Token::LeftParen => Some((80, 81)),
716 Token::LeftBrace => Some((80, 81)), Token::Caret => Some((80, 81)), Token::Hash => Some((80, 81)), Token::Question => Some((20, 21)),
720 Token::QuestionQuestion => Some((15, 16)), Token::QuestionColon => Some((15, 16)), Token::DotDot => Some((20, 21)),
723 Token::ColonEqual => Some((10, 9)), Token::TildeArrow => Some((70, 71)), _ => None,
726 }
727 }
728
729 fn parse_signature(&mut self) -> Result<String, ParserError> {
731 if self.current_token != Token::LessThan {
733 return Err(ParserError::Expected {
734 expected: "<".to_string(),
735 found: format!("{:?}", self.current_token),
736 });
737 }
738
739 let mut signature = String::from("<");
741 self.advance()?; let mut depth = 1;
746
747 while depth > 0 && self.current_token != Token::Eof {
748 match &self.current_token {
749 Token::LessThan => {
750 signature.push('<');
751 depth += 1;
752 self.advance()?;
753 }
754 Token::GreaterThan => {
755 depth -= 1;
756 if depth > 0 {
757 signature.push('>');
758 }
759 self.advance()?;
760 }
761 Token::Minus => {
762 signature.push('-');
763 self.advance()?;
764 }
765 Token::Colon => {
766 signature.push(':');
767 self.advance()?;
768 }
769 Token::Question => {
770 signature.push('?');
771 self.advance()?;
772 }
773 Token::QuestionColon => {
774 signature.push('?');
776 signature.push(':');
777 self.advance()?;
778 }
779 Token::Identifier(s) => {
780 signature.push_str(s);
781 self.advance()?;
782 }
783 Token::LeftParen | Token::RightParen => {
784 let c = if self.current_token == Token::LeftParen {
786 '('
787 } else {
788 ')'
789 };
790 signature.push(c);
791 self.advance()?;
792 }
793 _ => {
794 return Err(ParserError::UnexpectedToken(format!(
795 "Unexpected token in signature: {:?}",
796 self.current_token
797 )));
798 }
799 }
800 }
801
802 signature.push('>');
803 Ok(signature)
804 }
805
806 fn parse_primary(&mut self) -> Result<AstNode, ParserError> {
808 match &self.current_token {
809 Token::String(s) => {
810 let value = s.clone();
811 self.advance()?;
812 Ok(AstNode::String(value))
813 }
814 Token::Number(n) => {
815 let value = *n;
816 self.advance()?;
817 Ok(AstNode::Number(value))
818 }
819 Token::True => {
820 self.advance()?;
821 Ok(AstNode::Boolean(true))
822 }
823 Token::False => {
824 self.advance()?;
825 Ok(AstNode::Boolean(false))
826 }
827 Token::Null => {
828 self.advance()?;
829 Ok(AstNode::Null)
830 }
831 Token::Undefined => {
832 self.advance()?;
833 Ok(AstNode::Undefined)
834 }
835 Token::Regex { pattern, flags } => {
836 let pat = pattern.clone();
837 let flg = flags.clone();
838 self.advance()?;
839 Ok(AstNode::Regex {
840 pattern: pat,
841 flags: flg,
842 })
843 }
844 Token::Identifier(name) => {
845 let name = name.clone();
846 self.advance()?;
847 Ok(AstNode::Path {
848 steps: vec![PathStep::new(AstNode::Name(name))],
849 })
850 }
851 Token::Variable(name) => {
852 let name = name.clone();
853 self.advance()?;
854 Ok(AstNode::Variable(name))
855 }
856 Token::ParentVariable(name) => {
857 let name = name.clone();
858 self.advance()?;
859 Ok(AstNode::ParentVariable(name))
860 }
861 Token::LeftParen => {
862 self.advance()?; if self.current_token == Token::RightParen {
866 self.advance()?;
867 return Ok(AstNode::Undefined);
868 }
869
870 let mut expressions = vec![self.parse_expression(0)?];
874
875 while self.current_token == Token::Semicolon {
876 self.advance()?;
877 if self.current_token == Token::RightParen {
878 break;
879 }
880 expressions.push(self.parse_expression(0)?);
881 }
882
883 self.expect(Token::RightParen)?;
884
885 Ok(AstNode::Block(expressions))
887 }
888 Token::LeftBracket => {
889 self.advance()?; let mut elements = Vec::new();
892
893 if self.current_token != Token::RightBracket {
894 loop {
895 let element = self.parse_expression(0)?;
896 elements.push(element);
897
898 if self.current_token != Token::Comma {
899 break;
900 }
901 self.advance()?;
902 }
903 }
904
905 self.expect(Token::RightBracket)?;
906 Ok(AstNode::Array(elements))
907 }
908 Token::LeftBrace => {
909 self.advance()?; let mut pairs = Vec::new();
912
913 if self.current_token != Token::RightBrace {
914 loop {
915 let key = self.parse_expression(0)?;
916 self.expect(Token::Colon)?;
917 let value = self.parse_expression(0)?;
918 pairs.push((key, value));
919
920 if self.current_token != Token::Comma {
921 break;
922 }
923 self.advance()?;
924 }
925 }
926
927 self.expect(Token::RightBrace)?;
928 Ok(AstNode::Object(pairs))
929 }
930 Token::Pipe => {
931 self.advance()?; let location = self.parse_expression(0)?;
936
937 self.expect(Token::Pipe)?;
939
940 let update = self.parse_expression(0)?;
942
943 let delete = if self.current_token == Token::Comma {
945 self.advance()?; Some(Box::new(self.parse_expression(0)?))
947 } else {
948 None
949 };
950
951 self.expect(Token::Pipe)?;
953
954 Ok(AstNode::Transform {
955 location: Box::new(location),
956 update: Box::new(update),
957 delete,
958 })
959 }
960 Token::Minus => {
961 self.advance()?;
962 let operand = self.parse_expression(70)?; Ok(AstNode::Unary {
964 op: UnaryOp::Negate,
965 operand: Box::new(operand),
966 })
967 }
968 Token::Star => {
969 self.advance()?;
971 Ok(AstNode::Wildcard)
972 }
973 Token::StarStar => {
974 self.advance()?;
976 Ok(AstNode::Descendant)
977 }
978 Token::Function => {
979 self.advance()?; self.expect(Token::LeftParen)?;
982
983 let mut params = Vec::new();
985 if self.current_token != Token::RightParen {
986 loop {
987 match &self.current_token {
988 Token::Variable(name) => {
989 params.push(name.clone());
990 self.advance()?;
991 }
992 _ => {
993 return Err(ParserError::Expected {
994 expected: "parameter name".to_string(),
995 found: format!("{:?}", self.current_token),
996 })
997 }
998 }
999
1000 if self.current_token != Token::Comma {
1001 break;
1002 }
1003 self.advance()?; }
1005 }
1006
1007 self.expect(Token::RightParen)?;
1008
1009 let signature = if self.current_token == Token::LessThan {
1011 Some(self.parse_signature()?)
1012 } else {
1013 None
1014 };
1015
1016 self.expect(Token::LeftBrace)?;
1017
1018 let body = self.parse_expression(0)?;
1020
1021 self.expect(Token::RightBrace)?;
1022
1023 let (optimized_body, is_thunk) = Self::tail_call_optimize(body);
1025
1026 Ok(AstNode::Lambda {
1027 params,
1028 body: Box::new(optimized_body),
1029 signature,
1030 thunk: is_thunk,
1031 })
1032 }
1033 _ => Err(ParserError::UnexpectedToken(format!(
1034 "{:?}",
1035 self.current_token
1036 ))),
1037 }
1038 }
1039
1040 fn parse_expression(&mut self, min_bp: u8) -> Result<AstNode, ParserError> {
1042 let mut lhs = self.parse_primary()?;
1043
1044 loop {
1045 if matches!(
1047 self.current_token,
1048 Token::Eof
1049 | Token::RightParen
1050 | Token::RightBracket
1051 | Token::RightBrace
1052 | Token::Comma
1053 | Token::Semicolon
1054 | Token::Colon
1055 ) {
1056 break;
1057 }
1058
1059 let (left_bp, right_bp) = match self.binding_power(&self.current_token) {
1061 Some(bp) => bp,
1062 None => break,
1063 };
1064
1065 if left_bp < min_bp {
1066 break;
1067 }
1068
1069 match &self.current_token {
1071 Token::Dot => {
1072 self.advance()?;
1073
1074 if self.current_token == Token::LeftBracket {
1076 self.advance()?;
1077
1078 let mut elements = Vec::new();
1080 if self.current_token != Token::RightBracket {
1081 loop {
1082 elements.push(self.parse_expression(0)?);
1083 if self.current_token != Token::Comma {
1084 break;
1085 }
1086 self.advance()?;
1087 }
1088 }
1089
1090 self.expect(Token::RightBracket)?;
1091
1092 let mut steps = match lhs {
1094 AstNode::Path { steps } => steps,
1095 _ => vec![PathStep::new(lhs)],
1096 };
1097
1098 steps.push(PathStep::new(AstNode::ArrayGroup(elements)));
1099 lhs = AstNode::Path { steps };
1100 } else if self.current_token == Token::LeftParen {
1101 self.advance()?;
1103
1104 let mut expressions = vec![self.parse_expression(0)?];
1106
1107 while self.current_token == Token::Semicolon {
1108 self.advance()?;
1109 if self.current_token == Token::RightParen {
1110 break;
1111 }
1112 expressions.push(self.parse_expression(0)?);
1113 }
1114
1115 self.expect(Token::RightParen)?;
1116
1117 let expr = if expressions.len() == 1 {
1119 expressions.into_iter().next().unwrap()
1120 } else {
1121 AstNode::Block(expressions)
1122 };
1123
1124 let mut steps = match lhs {
1126 AstNode::Path { steps } => steps,
1127 _ => vec![PathStep::new(lhs)],
1128 };
1129
1130 steps.push(PathStep::new(AstNode::FunctionApplication(Box::new(expr))));
1131 lhs = AstNode::Path { steps };
1132 } else {
1133 let rhs = self.parse_expression(right_bp)?;
1135
1136 let mut steps = match lhs {
1138 AstNode::Path { steps } => steps,
1139 AstNode::String(field_name) => {
1142 vec![PathStep::new(AstNode::Name(field_name))]
1143 }
1144 _ => vec![PathStep::new(lhs)],
1145 };
1146
1147 match &rhs {
1150 AstNode::Number(n) => {
1151 return Err(ParserError::InvalidSyntax(
1152 format!("S0213: The literal value {} cannot be used as a step within a path expression", n),
1153 ));
1154 }
1155 AstNode::Boolean(b) => {
1156 return Err(ParserError::InvalidSyntax(
1157 format!("S0213: The literal value {} cannot be used as a step within a path expression", b),
1158 ));
1159 }
1160 AstNode::Null => {
1161 return Err(ParserError::InvalidSyntax(
1162 "S0213: The literal value null cannot be used as a step within a path expression".to_string(),
1163 ));
1164 }
1165 _ => {}
1166 }
1167
1168 match rhs {
1169 AstNode::Path {
1170 steps: mut rhs_steps,
1171 } => {
1172 steps.append(&mut rhs_steps);
1173 }
1174 AstNode::String(field_name) => {
1177 steps.push(PathStep::new(AstNode::Name(field_name)));
1178 }
1179 _ => steps.push(PathStep::new(rhs)),
1180 }
1181
1182 while self.current_token == Token::LeftBracket {
1185 self.advance()?;
1186
1187 let predicate_expr = if self.current_token == Token::RightBracket {
1188 Box::new(AstNode::Boolean(true))
1190 } else {
1191 let pred = self.parse_expression(0)?;
1193 Box::new(pred)
1194 };
1195
1196 self.expect(Token::RightBracket)?;
1197
1198 if let Some(last_step) = steps.last_mut() {
1200 last_step.stages.push(Stage::Filter(predicate_expr));
1201 }
1202 }
1203
1204 lhs = AstNode::Path { steps };
1205 }
1206 }
1207 Token::LeftBracket => {
1208 if matches!(lhs, AstNode::ObjectTransform { .. }) {
1211 return Err(ParserError::InvalidSyntax(
1212 "S0209: A predicate cannot follow a grouping expression in a step"
1213 .to_string(),
1214 ));
1215 }
1216
1217 self.advance()?;
1218
1219 if self.current_token == Token::RightBracket {
1222 self.advance()?;
1224
1225 let mut steps = match lhs {
1226 AstNode::Path { steps } => steps,
1227 _ => vec![PathStep::new(lhs)],
1228 };
1229
1230 steps.push(PathStep::new(AstNode::Predicate(Box::new(
1231 AstNode::Boolean(true),
1232 ))));
1233 lhs = AstNode::Path { steps };
1234 } else {
1235 let predicate = self.parse_expression(0)?;
1237 self.expect(Token::RightBracket)?;
1238
1239 let mut steps = match lhs {
1240 AstNode::Path { steps } => steps,
1241 _ => vec![PathStep::new(lhs)],
1242 };
1243
1244 steps.push(PathStep::new(AstNode::Predicate(Box::new(predicate))));
1245 lhs = AstNode::Path { steps };
1246 }
1247 }
1248 Token::LeftParen => {
1249 self.advance()?;
1250
1251 let mut args = Vec::new();
1252
1253 if self.current_token != Token::RightParen {
1254 loop {
1255 if self.current_token == Token::Question {
1257 args.push(AstNode::Placeholder);
1258 self.advance()?;
1259 } else {
1260 args.push(self.parse_expression(0)?);
1261 }
1262
1263 if self.current_token != Token::Comma {
1264 break;
1265 }
1266 self.advance()?;
1267 }
1268 }
1269
1270 self.expect(Token::RightParen)?;
1271
1272 match lhs {
1274 AstNode::Lambda { .. }
1276 | AstNode::Block(_)
1277 | AstNode::Call { .. }
1278 | AstNode::Function { .. } => {
1279 lhs = AstNode::Call {
1280 procedure: Box::new(lhs),
1281 args,
1282 };
1283 }
1284 ref other_lhs => {
1285 match other_lhs {
1287 AstNode::Path { steps } if steps.len() == 1 => {
1289 let name = match &steps[0].node {
1290 AstNode::Name(s) => s.clone(),
1291 _ => {
1292 return Err(ParserError::InvalidSyntax(
1293 "Invalid function name".to_string(),
1294 ))
1295 }
1296 };
1297 lhs = AstNode::Function {
1298 name,
1299 args,
1300 is_builtin: false,
1301 };
1302 }
1303 AstNode::Path { steps } if steps.len() > 1 => {
1305 let last_step = &steps[steps.len() - 1].node;
1306
1307 if let AstNode::Variable(func_name) = last_step {
1309 let mut context_steps = steps.clone();
1311 context_steps.pop();
1312
1313 let func_call = AstNode::Function {
1315 name: func_name.clone(),
1316 args: args.clone(),
1317 is_builtin: true, };
1319
1320 context_steps.push(PathStep::new(
1322 AstNode::FunctionApplication(Box::new(func_call)),
1323 ));
1324
1325 lhs = AstNode::Path {
1326 steps: context_steps,
1327 };
1328 }
1329 else if let AstNode::Lambda {
1331 params,
1332 body,
1333 signature,
1334 thunk,
1335 } = last_step
1336 {
1337 let mut context_steps = steps.clone();
1339 context_steps.pop();
1340
1341 let full_args = if args.len() < params.len() {
1345 let mut new_args =
1346 vec![AstNode::Variable("$".to_string())];
1347 new_args.extend(args.clone());
1348 new_args
1349 } else {
1350 args.clone()
1351 };
1352
1353 let lambda_invocation = AstNode::Block(vec![
1356 AstNode::Binary {
1357 op: crate::ast::BinaryOp::ColonEqual,
1358 lhs: Box::new(AstNode::Variable(
1359 "__path_lambda__".to_string(),
1360 )),
1361 rhs: Box::new(AstNode::Lambda {
1362 params: params.clone(),
1363 body: body.clone(),
1364 signature: signature.clone(),
1365 thunk: *thunk,
1366 }),
1367 },
1368 AstNode::Function {
1369 name: "__path_lambda__".to_string(),
1370 args: full_args,
1371 is_builtin: true,
1372 },
1373 ]);
1374
1375 context_steps.push(PathStep::new(
1377 AstNode::FunctionApplication(Box::new(
1378 lambda_invocation,
1379 )),
1380 ));
1381
1382 lhs = AstNode::Path {
1383 steps: context_steps,
1384 };
1385 } else {
1386 return Err(ParserError::InvalidSyntax(
1387 "Invalid function call".to_string(),
1388 ));
1389 }
1390 }
1391 AstNode::Variable(name) => {
1393 lhs = AstNode::Function {
1394 name: name.clone(),
1395 args,
1396 is_builtin: true,
1397 };
1398 }
1399 _ => {
1400 return Err(ParserError::InvalidSyntax(
1401 "Invalid function call".to_string(),
1402 ))
1403 }
1404 };
1405 }
1406 }
1407 }
1408 Token::Question => {
1409 self.advance()?;
1410 let then_branch = self.parse_expression(0)?;
1411
1412 let else_branch = if self.current_token == Token::Colon {
1413 self.advance()?;
1414 Some(Box::new(self.parse_expression(0)?))
1416 } else {
1417 None
1418 };
1419
1420 lhs = AstNode::Conditional {
1421 condition: Box::new(lhs),
1422 then_branch: Box::new(then_branch),
1423 else_branch,
1424 };
1425 }
1426 Token::LeftBrace => {
1427 if matches!(lhs, AstNode::ObjectTransform { .. }) {
1430 return Err(ParserError::InvalidSyntax(
1431 "S0210: Each step can only have one grouping expression".to_string(),
1432 ));
1433 }
1434
1435 self.advance()?; let mut pairs = Vec::new();
1439
1440 if self.current_token != Token::RightBrace {
1441 loop {
1442 let key = self.parse_expression(0)?;
1444
1445 self.expect(Token::Colon)?;
1446
1447 let value = self.parse_expression(0)?;
1449
1450 pairs.push((key, value));
1451
1452 if self.current_token != Token::Comma {
1453 break;
1454 }
1455 self.advance()?; }
1457 }
1458
1459 self.expect(Token::RightBrace)?;
1460
1461 lhs = AstNode::ObjectTransform {
1463 input: Box::new(lhs),
1464 pattern: pairs,
1465 };
1466 }
1467 Token::Hash => {
1468 self.advance()?; let var_name = match &self.current_token {
1474 Token::Variable(name) => name.clone(),
1475 _ => {
1476 return Err(ParserError::InvalidSyntax(
1477 "Expected variable name after #".to_string(),
1478 ));
1479 }
1480 };
1481 self.advance()?; lhs = AstNode::IndexBind {
1484 input: Box::new(lhs),
1485 variable: var_name,
1486 };
1487 }
1488 Token::Caret => {
1489 self.advance()?; self.expect(Token::LeftParen)?;
1492
1493 let mut terms = Vec::new();
1494
1495 loop {
1496 let ascending = match &self.current_token {
1498 Token::LessThan => {
1499 self.advance()?;
1500 true
1501 }
1502 Token::GreaterThan => {
1503 self.advance()?;
1504 false
1505 }
1506 _ => true, };
1508
1509 let expr = self.parse_expression(0)?;
1511 terms.push((expr, ascending));
1512
1513 if self.current_token != Token::Comma {
1515 break;
1516 }
1517 self.advance()?; }
1519
1520 self.expect(Token::RightParen)?;
1521
1522 lhs = AstNode::Sort {
1523 input: Box::new(lhs),
1524 terms,
1525 };
1526 }
1527 _ => {
1528 let op = match &self.current_token {
1530 Token::Identifier(name) if name == "and" => BinaryOp::And,
1532 Token::Identifier(name) if name == "or" => BinaryOp::Or,
1533 Token::Identifier(name) if name == "in" => BinaryOp::In,
1534 Token::Plus => BinaryOp::Add,
1536 Token::Minus => BinaryOp::Subtract,
1537 Token::Star => BinaryOp::Multiply,
1538 Token::Slash => BinaryOp::Divide,
1539 Token::Percent => BinaryOp::Modulo,
1540 Token::Equal => BinaryOp::Equal,
1541 Token::NotEqual => BinaryOp::NotEqual,
1542 Token::LessThan => BinaryOp::LessThan,
1543 Token::LessThanOrEqual => BinaryOp::LessThanOrEqual,
1544 Token::GreaterThan => BinaryOp::GreaterThan,
1545 Token::GreaterThanOrEqual => BinaryOp::GreaterThanOrEqual,
1546 Token::And => BinaryOp::And,
1547 Token::Or => BinaryOp::Or,
1548 Token::In => BinaryOp::In,
1549 Token::Ampersand => BinaryOp::Concatenate,
1550 Token::DotDot => BinaryOp::Range,
1551 Token::ColonEqual => BinaryOp::ColonEqual,
1552 Token::QuestionQuestion => BinaryOp::Coalesce,
1553 Token::QuestionColon => BinaryOp::Default,
1554 Token::TildeArrow => BinaryOp::ChainPipe,
1555 _ => {
1556 return Err(ParserError::UnexpectedToken(format!(
1557 "{:?}",
1558 self.current_token
1559 )))
1560 }
1561 };
1562
1563 self.advance()?;
1564 let rhs = self.parse_expression(right_bp)?;
1565
1566 lhs = AstNode::Binary {
1567 op,
1568 lhs: Box::new(lhs),
1569 rhs: Box::new(rhs),
1570 };
1571 }
1572 }
1573 }
1574
1575 Ok(lhs)
1576 }
1577
1578 pub fn parse(&mut self) -> Result<AstNode, ParserError> {
1579 let ast = self.parse_expression(0)?;
1580
1581 if self.current_token != Token::Eof {
1582 return Err(ParserError::Expected {
1583 expected: "end of expression".to_string(),
1584 found: format!("{:?}", self.current_token),
1585 });
1586 }
1587
1588 Ok(ast)
1589 }
1590
1591 fn tail_call_optimize(expr: AstNode) -> (AstNode, bool) {
1601 let is_thunk = Self::is_tail_call(&expr);
1602 (expr, is_thunk)
1603 }
1604
1605 fn is_tail_call(expr: &AstNode) -> bool {
1608 match expr {
1609 AstNode::Function { .. } => true,
1611 AstNode::Call { .. } => true,
1612
1613 AstNode::Conditional {
1615 then_branch,
1616 else_branch,
1617 ..
1618 } => {
1619 let then_is_tail = Self::is_tail_call(then_branch);
1620 let else_is_tail = else_branch.as_ref().is_some_and(|e| Self::is_tail_call(e));
1621 then_is_tail || else_is_tail
1623 }
1624
1625 AstNode::Block(exprs) => exprs.last().is_some_and(Self::is_tail_call),
1627
1628 AstNode::Binary {
1630 op: BinaryOp::ColonEqual,
1631 rhs,
1632 ..
1633 } => {
1634 Self::is_tail_call(rhs)
1638 }
1639
1640 _ => false,
1642 }
1643 }
1644}
1645
1646pub fn parse(expression: &str) -> Result<AstNode, ParserError> {
1650 let mut parser = Parser::new(expression.to_string())?;
1651 parser.parse()
1652}
1653
1654#[cfg(test)]
1655mod tests {
1656 use super::*;
1657
1658 #[test]
1660 fn test_lexer_numbers() {
1661 let mut lexer = Lexer::new("42 3.14 -10 2.5e10 1E-5".to_string());
1662
1663 assert_eq!(lexer.next_token().unwrap(), Token::Number(42.0));
1664 assert_eq!(lexer.next_token().unwrap(), Token::Number(3.14));
1665 assert_eq!(lexer.next_token().unwrap(), Token::Minus);
1667 assert_eq!(lexer.next_token().unwrap(), Token::Number(10.0));
1668 assert_eq!(lexer.next_token().unwrap(), Token::Number(2.5e10));
1669 assert_eq!(lexer.next_token().unwrap(), Token::Number(1e-5));
1670 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1671 }
1672
1673 #[test]
1674 fn test_lexer_strings() {
1675 let mut lexer = Lexer::new(r#""hello" 'world' "with\nnewline""#.to_string());
1676
1677 assert_eq!(
1678 lexer.next_token().unwrap(),
1679 Token::String("hello".to_string())
1680 );
1681 assert_eq!(
1682 lexer.next_token().unwrap(),
1683 Token::String("world".to_string())
1684 );
1685 assert_eq!(
1686 lexer.next_token().unwrap(),
1687 Token::String("with\nnewline".to_string())
1688 );
1689 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1690 }
1691
1692 #[test]
1693 fn test_lexer_string_escapes() {
1694 let mut lexer = Lexer::new(r#""a\"b\\c\/d""#.to_string());
1695 assert_eq!(
1696 lexer.next_token().unwrap(),
1697 Token::String("a\"b\\c/d".to_string())
1698 );
1699 }
1700
1701 #[test]
1702 fn test_lexer_keywords() {
1703 let mut lexer = Lexer::new("true false null and or in".to_string());
1704
1705 assert_eq!(lexer.next_token().unwrap(), Token::True);
1706 assert_eq!(lexer.next_token().unwrap(), Token::False);
1707 assert_eq!(lexer.next_token().unwrap(), Token::Null);
1708 assert_eq!(
1710 lexer.next_token().unwrap(),
1711 Token::Identifier("and".to_string())
1712 );
1713 assert_eq!(
1714 lexer.next_token().unwrap(),
1715 Token::Identifier("or".to_string())
1716 );
1717 assert_eq!(
1718 lexer.next_token().unwrap(),
1719 Token::Identifier("in".to_string())
1720 );
1721 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1722 }
1723
1724 #[test]
1725 fn test_lexer_identifiers() {
1726 let mut lexer = Lexer::new("foo bar_baz test123".to_string());
1727
1728 assert_eq!(
1729 lexer.next_token().unwrap(),
1730 Token::Identifier("foo".to_string())
1731 );
1732 assert_eq!(
1733 lexer.next_token().unwrap(),
1734 Token::Identifier("bar_baz".to_string())
1735 );
1736 assert_eq!(
1737 lexer.next_token().unwrap(),
1738 Token::Identifier("test123".to_string())
1739 );
1740 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1741 }
1742
1743 #[test]
1744 fn test_lexer_variables() {
1745 let mut lexer = Lexer::new("$var $foo_bar".to_string());
1746
1747 assert_eq!(
1748 lexer.next_token().unwrap(),
1749 Token::Variable("var".to_string())
1750 );
1751 assert_eq!(
1752 lexer.next_token().unwrap(),
1753 Token::Variable("foo_bar".to_string())
1754 );
1755 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1756 }
1757
1758 #[test]
1759 fn test_lexer_operators() {
1760 let mut lexer = Lexer::new("+ - * % = != < <= > >= & . .. := ".to_string());
1762
1763 assert_eq!(lexer.next_token().unwrap(), Token::Plus);
1764 assert_eq!(lexer.next_token().unwrap(), Token::Minus);
1765 assert_eq!(lexer.next_token().unwrap(), Token::Star);
1766 assert_eq!(lexer.next_token().unwrap(), Token::Percent);
1767 assert_eq!(lexer.next_token().unwrap(), Token::Equal);
1768 assert_eq!(lexer.next_token().unwrap(), Token::NotEqual);
1769 assert_eq!(lexer.next_token().unwrap(), Token::LessThan);
1770 assert_eq!(lexer.next_token().unwrap(), Token::LessThanOrEqual);
1771 assert_eq!(lexer.next_token().unwrap(), Token::GreaterThan);
1772 assert_eq!(lexer.next_token().unwrap(), Token::GreaterThanOrEqual);
1773 assert_eq!(lexer.next_token().unwrap(), Token::Ampersand);
1774 assert_eq!(lexer.next_token().unwrap(), Token::Dot);
1775 assert_eq!(lexer.next_token().unwrap(), Token::DotDot);
1776 assert_eq!(lexer.next_token().unwrap(), Token::ColonEqual);
1777 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1778
1779 let mut lexer2 = Lexer::new("42 / 2".to_string());
1782 assert_eq!(lexer2.next_token().unwrap(), Token::Number(42.0));
1783 assert_eq!(lexer2.next_token().unwrap(), Token::Slash);
1784 assert_eq!(lexer2.next_token().unwrap(), Token::Number(2.0));
1785 assert_eq!(lexer2.next_token().unwrap(), Token::Eof);
1786 }
1787
1788 #[test]
1789 fn test_lexer_delimiters() {
1790 let mut lexer = Lexer::new("()[]{},:;?".to_string());
1791
1792 assert_eq!(lexer.next_token().unwrap(), Token::LeftParen);
1793 assert_eq!(lexer.next_token().unwrap(), Token::RightParen);
1794 assert_eq!(lexer.next_token().unwrap(), Token::LeftBracket);
1795 assert_eq!(lexer.next_token().unwrap(), Token::RightBracket);
1796 assert_eq!(lexer.next_token().unwrap(), Token::LeftBrace);
1797 assert_eq!(lexer.next_token().unwrap(), Token::RightBrace);
1798 assert_eq!(lexer.next_token().unwrap(), Token::Comma);
1799 assert_eq!(lexer.next_token().unwrap(), Token::Colon);
1800 assert_eq!(lexer.next_token().unwrap(), Token::Semicolon);
1801 assert_eq!(lexer.next_token().unwrap(), Token::Question);
1802 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1803 }
1804
1805 #[test]
1806 fn test_empty_brackets() {
1807 let mut parser = Parser::new("foo[]".to_string()).unwrap();
1808 let ast = parser.parse().unwrap();
1809
1810 if let AstNode::Path { steps } = ast {
1812 assert_eq!(steps.len(), 2);
1813 assert!(matches!(steps[0].node, AstNode::Name(ref s) if s == "foo"));
1814 if let AstNode::Predicate(pred) = &steps[1].node {
1815 assert!(matches!(**pred, AstNode::Boolean(true)));
1816 } else {
1817 panic!("Expected Predicate as second step, got {:?}", steps[1].node);
1818 }
1819 } else {
1820 panic!("Expected Path, got {:?}", ast);
1821 }
1822 }
1823
1824 #[test]
1825 fn test_lexer_comments() {
1826 let mut lexer = Lexer::new("foo /* comment */ bar".to_string());
1827
1828 assert_eq!(
1829 lexer.next_token().unwrap(),
1830 Token::Identifier("foo".to_string())
1831 );
1832 assert_eq!(
1833 lexer.next_token().unwrap(),
1834 Token::Identifier("bar".to_string())
1835 );
1836 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1837 }
1838
1839 #[test]
1840 fn test_lexer_backtick_names() {
1841 let mut lexer = Lexer::new("`field name` `with-dash`".to_string());
1842
1843 assert_eq!(
1844 lexer.next_token().unwrap(),
1845 Token::Identifier("field name".to_string())
1846 );
1847 assert_eq!(
1848 lexer.next_token().unwrap(),
1849 Token::Identifier("with-dash".to_string())
1850 );
1851 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
1852 }
1853
1854 #[test]
1856 fn test_parse_number() {
1857 let ast = parse("42").unwrap();
1858 assert_eq!(ast, AstNode::Number(42.0));
1859 }
1860
1861 #[test]
1862 fn test_parse_string() {
1863 let ast = parse(r#""hello""#).unwrap();
1864 assert_eq!(ast, AstNode::String("hello".to_string()));
1865 }
1866
1867 #[test]
1868 fn test_parse_boolean() {
1869 let ast = parse("true").unwrap();
1870 assert_eq!(ast, AstNode::Boolean(true));
1871
1872 let ast = parse("false").unwrap();
1873 assert_eq!(ast, AstNode::Boolean(false));
1874 }
1875
1876 #[test]
1877 fn test_parse_null() {
1878 let ast = parse("null").unwrap();
1879 assert_eq!(ast, AstNode::Null);
1880 }
1881
1882 #[test]
1883 fn test_parse_variable() {
1884 let ast = parse("$var").unwrap();
1885 assert_eq!(ast, AstNode::Variable("var".to_string()));
1886 }
1887
1888 #[test]
1889 fn test_parse_identifier() {
1890 let ast = parse("foo").unwrap();
1891 assert_eq!(
1892 ast,
1893 AstNode::Path {
1894 steps: vec![PathStep::new(AstNode::Name("foo".to_string()))]
1895 }
1896 );
1897 }
1898
1899 #[test]
1900 fn test_parse_addition() {
1901 let ast = parse("1 + 2").unwrap();
1902 match ast {
1903 AstNode::Binary { op, lhs, rhs } => {
1904 assert_eq!(op, BinaryOp::Add);
1905 assert_eq!(*lhs, AstNode::Number(1.0));
1906 assert_eq!(*rhs, AstNode::Number(2.0));
1907 }
1908 _ => panic!("Expected Binary node"),
1909 }
1910 }
1911
1912 #[test]
1913 fn test_parse_precedence() {
1914 let ast = parse("1 + 2 * 3").unwrap();
1916 match ast {
1917 AstNode::Binary {
1918 op: BinaryOp::Add,
1919 lhs,
1920 rhs,
1921 } => {
1922 assert_eq!(*lhs, AstNode::Number(1.0));
1923 match *rhs {
1924 AstNode::Binary {
1925 op: BinaryOp::Multiply,
1926 lhs,
1927 rhs,
1928 } => {
1929 assert_eq!(*lhs, AstNode::Number(2.0));
1930 assert_eq!(*rhs, AstNode::Number(3.0));
1931 }
1932 _ => panic!("Expected Binary node for multiplication"),
1933 }
1934 }
1935 _ => panic!("Expected Binary node for addition"),
1936 }
1937 }
1938
1939 #[test]
1940 fn test_parse_parentheses() {
1941 let ast = parse("(1 + 2) * 3").unwrap();
1944 match ast {
1945 AstNode::Binary {
1946 op: BinaryOp::Multiply,
1947 lhs,
1948 rhs,
1949 } => {
1950 match *lhs {
1951 AstNode::Block(ref exprs) => {
1952 assert_eq!(exprs.len(), 1);
1953 match &exprs[0] {
1954 AstNode::Binary {
1955 op: BinaryOp::Add,
1956 lhs,
1957 rhs,
1958 } => {
1959 assert_eq!(**lhs, AstNode::Number(1.0));
1960 assert_eq!(**rhs, AstNode::Number(2.0));
1961 }
1962 _ => panic!("Expected Binary node for addition inside block"),
1963 }
1964 }
1965 _ => panic!(
1966 "Expected Block node for parenthesized expression, got {:?}",
1967 lhs
1968 ),
1969 }
1970 assert_eq!(*rhs, AstNode::Number(3.0));
1971 }
1972 _ => panic!("Expected Binary node for multiplication"),
1973 }
1974 }
1975
1976 #[test]
1977 fn test_parse_array() {
1978 let ast = parse("[1, 2, 3]").unwrap();
1979 match ast {
1980 AstNode::Array(elements) => {
1981 assert_eq!(elements.len(), 3);
1982 assert_eq!(elements[0], AstNode::Number(1.0));
1983 assert_eq!(elements[1], AstNode::Number(2.0));
1984 assert_eq!(elements[2], AstNode::Number(3.0));
1985 }
1986 _ => panic!("Expected Array node"),
1987 }
1988 }
1989
1990 #[test]
1991 fn test_parse_object() {
1992 let ast = parse(r#"{"a": 1, "b": 2}"#).unwrap();
1993 match ast {
1994 AstNode::Object(pairs) => {
1995 assert_eq!(pairs.len(), 2);
1996 assert_eq!(pairs[0].0, AstNode::String("a".to_string()));
1997 assert_eq!(pairs[0].1, AstNode::Number(1.0));
1998 assert_eq!(pairs[1].0, AstNode::String("b".to_string()));
1999 assert_eq!(pairs[1].1, AstNode::Number(2.0));
2000 }
2001 _ => panic!("Expected Object node"),
2002 }
2003 }
2004
2005 #[test]
2006 fn test_parse_path() {
2007 let ast = parse("foo.bar").unwrap();
2008 match ast {
2009 AstNode::Path { steps } => {
2010 assert_eq!(steps.len(), 2);
2011 assert_eq!(steps[0].node, AstNode::Name("foo".to_string()));
2012 assert_eq!(steps[1].node, AstNode::Name("bar".to_string()));
2013 }
2014 _ => panic!("Expected Path node"),
2015 }
2016 }
2017
2018 #[test]
2019 fn test_parse_function_call() {
2020 let ast = parse("sum(1, 2, 3)").unwrap();
2021 match ast {
2022 AstNode::Function {
2023 name,
2024 args,
2025 is_builtin,
2026 } => {
2027 assert_eq!(name, "sum");
2028 assert_eq!(args.len(), 3);
2029 assert_eq!(args[0], AstNode::Number(1.0));
2030 assert_eq!(args[1], AstNode::Number(2.0));
2031 assert_eq!(args[2], AstNode::Number(3.0));
2032 assert!(!is_builtin); }
2034 _ => panic!("Expected Function node"),
2035 }
2036 }
2037
2038 #[test]
2039 fn test_parse_conditional() {
2040 let ast = parse("x > 0 ? 1 : -1").unwrap();
2041 match ast {
2042 AstNode::Conditional {
2043 condition,
2044 then_branch,
2045 else_branch,
2046 } => {
2047 assert!(matches!(*condition, AstNode::Binary { .. }));
2048 assert_eq!(*then_branch, AstNode::Number(1.0));
2049 assert_eq!(
2051 else_branch,
2052 Some(Box::new(AstNode::Unary {
2053 op: UnaryOp::Negate,
2054 operand: Box::new(AstNode::Number(1.0)),
2055 }))
2056 );
2057 }
2058 _ => panic!("Expected Conditional node"),
2059 }
2060 }
2061
2062 #[test]
2063 fn test_parse_comparison() {
2064 let ast = parse("x < 10").unwrap();
2065 match ast {
2066 AstNode::Binary { op, .. } => {
2067 assert_eq!(op, BinaryOp::LessThan);
2068 }
2069 _ => panic!("Expected Binary node"),
2070 }
2071 }
2072
2073 #[test]
2074 fn test_parse_logical_and() {
2075 let ast = parse("true and false").unwrap();
2076 match ast {
2077 AstNode::Binary { op, lhs, rhs } => {
2078 assert_eq!(op, BinaryOp::And);
2079 assert_eq!(*lhs, AstNode::Boolean(true));
2080 assert_eq!(*rhs, AstNode::Boolean(false));
2081 }
2082 _ => panic!("Expected Binary node"),
2083 }
2084 }
2085
2086 #[test]
2087 fn test_parse_string_concatenation() {
2088 let ast = parse(r#""hello" & " " & "world""#).unwrap();
2089 match ast {
2090 AstNode::Binary { op, .. } => {
2091 assert_eq!(op, BinaryOp::Concatenate);
2092 }
2093 _ => panic!("Expected Binary node"),
2094 }
2095 }
2096
2097 #[test]
2098 fn test_parse_unary_minus() {
2099 let ast = parse("-5").unwrap();
2101 assert_eq!(
2102 ast,
2103 AstNode::Unary {
2104 op: UnaryOp::Negate,
2105 operand: Box::new(AstNode::Number(5.0)),
2106 }
2107 );
2108 }
2109
2110 #[test]
2111 fn test_parse_block() {
2112 let ast = parse("(1; 2; 3)").unwrap();
2113 match ast {
2114 AstNode::Block(expressions) => {
2115 assert_eq!(expressions.len(), 3);
2116 assert_eq!(expressions[0], AstNode::Number(1.0));
2117 assert_eq!(expressions[1], AstNode::Number(2.0));
2118 assert_eq!(expressions[2], AstNode::Number(3.0));
2119 }
2120 _ => panic!("Expected Block node"),
2121 }
2122 }
2123
2124 #[test]
2125 fn test_parse_complex_expression() {
2126 let ast = parse("(a + b) * c.d").unwrap();
2128 assert!(matches!(ast, AstNode::Binary { .. }));
2129 }
2130
2131 #[test]
2132 fn test_parse_dollar_function_call() {
2133 let ast = parse(r#"$uppercase("hello")"#).unwrap();
2135 match ast {
2136 AstNode::Function {
2137 name,
2138 args,
2139 is_builtin,
2140 } => {
2141 assert_eq!(name, "uppercase");
2142 assert_eq!(args.len(), 1);
2143 assert_eq!(args[0], AstNode::String("hello".to_string()));
2144 assert!(is_builtin); }
2146 _ => panic!("Expected Function node"),
2147 }
2148
2149 let ast = parse("$sum([1, 2, 3])").unwrap();
2151 match ast {
2152 AstNode::Function {
2153 name,
2154 args,
2155 is_builtin,
2156 } => {
2157 assert_eq!(name, "sum");
2158 assert_eq!(args.len(), 1);
2159 assert!(is_builtin); }
2161 _ => panic!("Expected Function node"),
2162 }
2163 }
2164
2165 #[test]
2166 fn test_parse_nested_dollar_functions() {
2167 let ast = parse(r#"$length($lowercase("HELLO"))"#).unwrap();
2169 match ast {
2170 AstNode::Function {
2171 name,
2172 args,
2173 is_builtin,
2174 } => {
2175 assert_eq!(name, "length");
2176 assert_eq!(args.len(), 1);
2177 assert!(is_builtin);
2178 match &args[0] {
2180 AstNode::Function {
2181 name: inner_name,
2182 is_builtin: inner_builtin,
2183 ..
2184 } => {
2185 assert_eq!(inner_name, "lowercase");
2186 assert!(inner_builtin);
2187 }
2188 _ => panic!("Expected nested Function node"),
2189 }
2190 }
2191 _ => panic!("Expected Function node"),
2192 }
2193 }
2194}