1use crate::error::{
23 argument_error, parse_error, recursion_error, scope_error, NightjarLanguageError, Span,
24};
25use crate::language::grammar::{
26 BoolExpr, FuncOp, Keyword, Literal, Predicate, Program, QuantifierOp, Spanned, SpannedBoolExpr,
27 SpannedValueExpr, SymbolRoot, Token, UnaryCheckOp, ValueExpr, VerifierOp,
28};
29
30#[derive(Debug, Clone)]
32pub struct ParserConfig {
33 pub max_depth: usize,
40}
41
42impl Default for ParserConfig {
43 fn default() -> Self {
44 Self { max_depth: 256 }
45 }
46}
47
48pub struct Tokenizer<'a> {
52 input: &'a str,
54 cursor: usize,
56 chars: Vec<(usize, char)>,
60 eof: usize,
62}
63
64impl<'a> Tokenizer<'a> {
65 pub fn new(input: &'a str) -> Self {
70 let chars: Vec<(usize, char)> = input.char_indices().collect();
71 Self {
72 input,
73 cursor: 0,
74 chars,
75 eof: input.len(),
76 }
77 }
78
79 pub fn tokenize(&mut self) -> Result<Vec<Spanned<Token>>, NightjarLanguageError> {
84 let mut tokens = Vec::new();
85 loop {
86 self.skip_whitespace(); let Some(c) = self.peek_char() else {
89 break; };
91 let start = self.byte_pos(); let token = match c {
93 '(' => {
94 self.advance();
95 Token::LParen
96 }
97 ')' => {
98 self.advance();
99 Token::RParen
100 }
101 '"' => self.read_string(start)?,
102 '.' => self.read_symbol(start, SymbolRoot::Root)?,
103 '@' => self.read_symbol(start, SymbolRoot::Element)?,
104 '-' if self.is_negative_literal() => self.read_number(start)?,
105 c if c.is_ascii_digit() => self.read_number(start)?,
106 c if c.is_alphabetic() || c == '_' => self.read_ident(start)?,
107 other => {
108 return Err(parse_error(
109 Span::new(start, start + other.len_utf8()),
110 format!("unexpected character `{}`", other),
111 ));
112 }
113 };
114 let end = self.byte_pos();
115 tokens.push(Spanned::new(token, Span::new(start, end)));
116 }
117 Ok(tokens)
118 }
119
120 fn byte_pos(&self) -> usize {
127 if self.cursor < self.chars.len() {
128 self.chars[self.cursor].0
129 } else {
130 self.eof
131 }
132 }
133
134 fn _is_eof(&self) -> bool {
136 self.cursor >= self.chars.len()
137 }
138
139 fn peek_char(&self) -> Option<char> {
143 self.chars.get(self.cursor).map(|(_, c)| *c)
144 }
145
146 fn peek_char_at(&self, offset: usize) -> Option<char> {
151 self.chars.get(self.cursor + offset).map(|(_, c)| *c)
152 }
153
154 fn advance(&mut self) {
156 self.cursor += 1;
157 }
158
159 fn skip_whitespace(&mut self) {
162 while let Some(c) = self.peek_char() {
163 if c.is_whitespace() {
164 self.advance();
165 } else {
166 break;
167 }
168 }
169 }
170
171 fn is_negative_literal(&self) -> bool {
178 self.peek_char() == Some('-') && self.peek_char_at(1).is_some_and(|c| c.is_ascii_digit())
179 }
181
182 fn read_string(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
192 self.advance();
194 let mut buf = String::new();
195 loop {
196 match self.peek_char() {
197 Some('"') => {
198 self.advance();
199 return Ok(Token::StringLiteral(buf));
200 }
201 Some(c) => {
202 buf.push(c); self.advance();
204 }
205 None => {
206 return Err(parse_error(
207 Span::new(start, self.byte_pos()),
208 "unterminated string literal",
209 ));
210 }
211 }
212 }
213 }
214
215 fn read_number(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
221 if self.peek_char() == Some('-') {
223 self.advance();
224 }
225 while let Some(c) = self.peek_char() {
227 if c.is_ascii_digit() {
228 self.advance();
229 } else {
230 break;
231 }
232 }
233
234 let mut is_float = false; if self.peek_char() == Some('.') && self.peek_char_at(1) .is_some_and(|c| c.is_ascii_digit())
239 {
240 is_float = true;
242 self.advance(); while let Some(c) = self.peek_char() {
245 if c.is_ascii_digit() {
246 self.advance();
247 } else {
248 break;
249 }
250 }
251 }
252
253 let end = self.byte_pos(); let text = &self.input[start..end]; if is_float {
257 text.parse::<f64>().map(Token::FloatLiteral).map_err(|_| {
259 parse_error(
261 Span::new(start, end),
263 format!("invalid float literal `{}`", text),
264 )
265 }) } else {
267 text.parse::<i64>().map(Token::IntLiteral).map_err(|_| {
269 parse_error(
271 Span::new(start, end),
273 format!("invalid integer literal `{}`", text),
274 )
275 }) }
277 }
278
279 fn read_symbol(
298 &mut self,
299 _start: usize,
300 root: SymbolRoot, ) -> Result<Token, NightjarLanguageError> {
302 self.advance(); let sigil: char = match root {
304 SymbolRoot::Root => '.', SymbolRoot::Element => '@', };
307 let mut path = String::new(); match root {
313 SymbolRoot::Root => {
315 match self.try_read_segment() {
316 Some(seg) => Self::push_segment(&mut path, seg), None => return self.complete_bare_sigil(root, path, sigil),
319 }
320 }
321 SymbolRoot::Element if self.peek_char() != Some('.') => {
323 return self.complete_bare_sigil(root, path, sigil); }
325 _ => {}
326 }
327
328 while self.peek_char() == Some('.') {
330 let dot_pos = self.byte_pos();
331 self.advance(); match self.try_read_segment() {
333 Some(seg) => Self::push_segment(&mut path, seg),
334 None => {
335 return Err(parse_error(
336 Span::new(dot_pos, dot_pos + 1),
337 "expected symbol segment after `.`",
338 ));
339 }
340 }
341 }
342 Ok(Token::Symbol { root, path })
343 }
344
345 fn try_read_segment(&mut self) -> Option<&str> {
363 let seg_start = self.byte_pos(); while let Some(c) = self.peek_char() {
365 if c.is_alphanumeric() || c == '_' {
367 self.advance();
368 } else {
369 break;
370 }
371 }
372 let seg_end = self.byte_pos(); if seg_start == seg_end {
374 None
376 } else {
377 Some(&self.input[seg_start..seg_end]) }
379 }
380
381 fn push_segment(path: &mut String, seg: &str) {
386 if !path.is_empty() {
387 path.push('.');
388 }
389 path.push_str(seg);
390 }
391
392 fn complete_bare_sigil(
411 &self,
412 root: SymbolRoot, path: String, sigil: char, ) -> Result<Token, NightjarLanguageError> {
416 match self.peek_char() {
417 None => Ok(Token::Symbol { root, path }), Some(c) if c.is_whitespace() || c == ')' =>
419 {
421 Ok(Token::Symbol { root, path })
422 }
423 Some(c) => {
424 let pos = self.byte_pos();
425 Err(parse_error(
426 Span::new(pos, pos + c.len_utf8()),
427 format!("unexpected character `{}` after `{}`", c, sigil),
428 ))
429 }
430 }
431 }
432
433 fn read_ident(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
441 while let Some(c) = self.peek_char() {
442 if c.is_alphanumeric() || c == '_' {
443 self.advance();
444 } else {
445 break;
446 }
447 }
448 let end = self.byte_pos();
449 let text = &self.input[start..end];
450 match text {
451 "True" => Ok(Token::BoolLiteral(true)),
452 "False" => Ok(Token::BoolLiteral(false)),
453 "Null" => Ok(Token::NullLiteral),
454 _ => match Keyword::from_ident(text) {
455 Some(kw) => Ok(Token::Keyword(kw)),
456 None => Err(parse_error(
457 Span::new(start, end),
458 format!("unknown identifier `{}`", text),
459 )),
460 },
461 }
462 }
463
464 }
466
467pub struct Parser {
471 tokens: Vec<Spanned<Token>>,
472 pos: usize,
473 depth: usize,
474 max_depth: usize,
475 input_len: usize,
476}
477
478impl Parser {
479 pub fn parse(
483 tokens: Vec<Spanned<Token>>,
484 config: &ParserConfig,
485 ) -> Result<Program, NightjarLanguageError> {
486 let input_len = tokens.last().map(|t| t.span.end).unwrap_or(0);
487 let mut p = Self {
488 tokens,
489 pos: 0,
490 depth: 0,
491 max_depth: config.max_depth,
492 input_len,
493 };
494 let expr = p.parse_bool_expr()?;
495 p.expect_eof()?;
496 Ok(Program { expr })
497 }
498
499 fn peek(&self) -> Option<&Spanned<Token>> {
503 self.tokens.get(self.pos)
504 }
505
506 fn peek_token(&self) -> Option<&Token> {
510 self.peek().map(|t| &t.node)
511 }
512
513 fn bump(&mut self) -> Spanned<Token> {
518 let t = self.tokens[self.pos].clone();
519 self.pos += 1;
520 t
521 }
522
523 fn current_span(&self) -> Span {
528 self.peek()
529 .map(|t| t.span)
530 .unwrap_or(Span::point(self.input_len)) }
532
533 fn expect_rparen(&mut self) -> Result<Span, NightjarLanguageError> {
535 match self.peek_token() {
536 Some(Token::RParen) => Ok(self.bump().span),
537 _ => Err(parse_error(self.current_span(), "expected `)`")),
538 }
539 }
540
541 fn expect_eof(&mut self) -> Result<(), NightjarLanguageError> {
546 match self.peek() {
547 None => Ok(()),
548 Some(t) => Err(parse_error(
549 t.span,
550 "unexpected token after complete expression",
551 )),
552 }
553 }
554
555 fn enter_depth(&mut self, span: Span) -> Result<(), NightjarLanguageError> {
562 self.depth += 1;
563 if self.depth > self.max_depth {
564 return Err(recursion_error(span, self.max_depth));
565 }
566 Ok(())
567 }
568
569 fn exit_depth(&mut self) {
572 self.depth = self.depth.saturating_sub(1); }
574
575 fn parse_bool_expr(&mut self) -> Result<SpannedBoolExpr, NightjarLanguageError> {
585 let start_span = self.current_span();
586 match self.peek_token() {
587 Some(Token::BoolLiteral(b)) => {
588 let b = *b;
590 let span = self.bump().span;
591 Ok(Spanned::new(BoolExpr::Literal(b), span))
592 }
593 Some(Token::LParen) => {
594 let lparen_span = self.bump().span;
596 self.enter_depth(lparen_span)?; let result = self.parse_bool_body(lparen_span.start);
598 self.exit_depth(); result
600 }
601 Some(_) => Err(parse_error(start_span, "expected boolean expression")),
602 None => Err(parse_error(
603 start_span,
604 "expected boolean expression, got end of input",
605 )),
606 }
607 }
608
609 fn parse_bool_body(&mut self, start: usize) -> Result<SpannedBoolExpr, NightjarLanguageError> {
616 let kw = self.expect_keyword_token()?;
618 match kw.node { Keyword::EQ | Keyword::NE | Keyword::LT | Keyword::LE | Keyword::GT | Keyword::GE => {
620 let op = VerifierOp::from_keyword(kw.node).ok_or_else(|| {
622 parse_error(kw.span, "internal: expected verifier keyword")
623 })?;
624 let left = self.parse_value_expr()?; let right = self.parse_value_expr()?; let close = self.expect_rparen_for_verifier(kw.span)?;
629 Ok(Spanned::new(
630 BoolExpr::Verifier {
631 op,
632 left: Box::new(left),
633 right: Box::new(right),
634 },
635 Span::new(start, close.end),
636 ))
637 }
638
639 Keyword::AND => {
640 let l = self.parse_bool_expr()?;
641 let r = self.parse_bool_expr()?;
642 let close = self.expect_rparen()?;
643 Ok(Spanned::new(
644 BoolExpr::And(Box::new(l), Box::new(r)),
645 Span::new(start, close.end),
646 ))
647 }
648
649 Keyword::OR => {
650 let l = self.parse_bool_expr()?;
651 let r = self.parse_bool_expr()?;
652 let close = self.expect_rparen()?;
653 Ok(Spanned::new(
654 BoolExpr::Or(Box::new(l), Box::new(r)),
655 Span::new(start, close.end),
656 ))
657 }
658
659 Keyword::NOT => {
660 let inner = self.parse_bool_expr()?;
661 let close = self.expect_rparen()?;
662 Ok(Spanned::new(
663 BoolExpr::Not(Box::new(inner)),
664 Span::new(start, close.end),
665 ))
666 }
667
668 Keyword::NonEmpty => {
669 let operand = self.parse_value_expr()?;
670 let close = self.expect_rparen()?;
671 Ok(Spanned::new(
672 BoolExpr::UnaryCheck {
673 op: UnaryCheckOp::NonEmpty,
674 operand: Box::new(operand),
675 },
676 Span::new(start, close.end),
677 ))
678 }
679
680 Keyword::ForAll | Keyword::Exists => {
681 let op = QuantifierOp::from_keyword(kw.node).ok_or_else(|| {
682 parse_error(kw.span, "internal: expected quantifier keyword")
683 })?;
684 let predicate = self.parse_predicate()?; let operand = self.parse_value_expr()?;
686 let close = self.expect_rparen()?;
687 Ok(Spanned::new(
688 BoolExpr::Quantifier {
689 op,
690 predicate,
691 operand: Box::new(operand),
692 },
693 Span::new(start, close.end),
694 ))
695 }
696 other => Err(parse_error(
697 kw.span,
698 format!(
699 "expected boolean operator (verifier / connective / quantifier / NonEmpty), found `{:?}`",
700 other
701 ),
702 )),
703 }
704 }
705
706 fn expect_rparen_for_verifier(
711 &mut self,
712 _kw_span: Span,
713 ) -> Result<Span, NightjarLanguageError> {
714 match self.peek_token() {
715 Some(Token::RParen) => Ok(self.bump().span),
716 Some(_) => {
717 let sp = self.current_span();
718 Err(argument_error(sp, "verifier takes exactly 2 operands"))
719 }
720 None => Err(parse_error(
721 self.current_span(),
722 "expected `)` to close verifier",
723 )),
724 }
725 }
726
727 fn parse_predicate(&mut self) -> Result<Spanned<Predicate>, NightjarLanguageError> {
746 if matches!(self.peek_token(), Some(Token::Keyword(Keyword::NonEmpty))) {
748 let span = self.bump().span;
749 return Ok(Spanned::new(
750 Predicate::UnaryCheck(UnaryCheckOp::NonEmpty),
751 span,
752 ));
753 }
754 if matches!(self.peek_token(), Some(Token::LParen))
757 && matches!(
758 self.tokens.get(self.pos + 1).map(|t| &t.node),
759 Some(Token::Keyword(
760 Keyword::EQ
761 | Keyword::NE
762 | Keyword::LT
763 | Keyword::LE
764 | Keyword::GT
765 | Keyword::GE
766 )),
767 )
768 {
769 let lparen_span = self.bump().span;
770 self.enter_depth(lparen_span)?;
771 let result = self.parse_verifier_predicate(lparen_span.start);
772 self.exit_depth();
773 return result;
774 }
775 let body = self.parse_bool_expr()?;
777 let span = body.span;
778 Ok(Spanned::new(Predicate::Full(Box::new(body)), span))
779 }
780
781 fn parse_verifier_predicate(
790 &mut self,
791 start: usize,
792 ) -> Result<Spanned<Predicate>, NightjarLanguageError> {
793 let kw = self.expect_keyword_token()?;
794 let op = VerifierOp::from_keyword(kw.node).ok_or_else(|| {
795 parse_error(
796 kw.span,
797 "verifier predicate must use a verifier operator (EQ/NE/LT/LE/GT/GE)",
798 )
799 })?;
800 let first = self.parse_value_expr()?; match self.peek_token() {
802 Some(Token::RParen) => {
803 let close = self.bump().span;
805 Ok(Spanned::new(
806 Predicate::PartialVerifier {
807 op,
808 bound: Box::new(first),
809 },
810 Span::new(start, close.end),
811 ))
812 }
813 Some(_) => {
814 let second = self.parse_value_expr()?;
816 let close = self.expect_rparen_for_verifier(kw.span)?; let body_span = Span::new(start, close.end);
818 let body = Spanned::new(
819 BoolExpr::Verifier {
820 op,
821 left: Box::new(first),
822 right: Box::new(second),
823 },
824 body_span,
825 );
826 Ok(Spanned::new(Predicate::Full(Box::new(body)), body_span))
827 }
828 None => Err(parse_error(
829 self.current_span(),
830 "expected `)` or value expression in verifier predicate",
831 )),
832 }
833 }
834
835 fn parse_value_expr(&mut self) -> Result<SpannedValueExpr, NightjarLanguageError> {
842 let start_span = self.current_span();
843 match self.peek_token() {
844 Some(Token::IntLiteral(_))
846 | Some(Token::FloatLiteral(_))
847 | Some(Token::StringLiteral(_))
848 | Some(Token::BoolLiteral(_))
849 | Some(Token::NullLiteral) => {
850 let tok = self.bump();
851 let lit = match tok.node {
852 Token::IntLiteral(i) => Literal::Int(i),
854 Token::FloatLiteral(f) => Literal::Float(f),
855 Token::StringLiteral(s) => Literal::String(s),
856 Token::BoolLiteral(b) => Literal::Bool(b),
857 Token::NullLiteral => Literal::Null,
858 _ => unreachable!(),
859 };
860 Ok(Spanned::new(ValueExpr::Literal(lit), tok.span))
861 }
862
863 Some(Token::Symbol { .. }) => {
865 let tok = self.bump();
866 let (root, path) = match tok.node {
867 Token::Symbol { root, path } => (root, path),
868 _ => unreachable!(),
869 };
870 Ok(Spanned::new(ValueExpr::Symbol { root, path }, tok.span))
871 }
872
873 Some(Token::LParen) => {
875 let lparen_span = self.bump().span;
876 self.enter_depth(lparen_span)?;
877 let result = self.parse_func_call(lparen_span.start);
878 self.exit_depth();
879 result
880 }
881
882 Some(_) => Err(parse_error(start_span, "expected value expression")),
884 None => Err(parse_error(
885 start_span,
886 "expected value expression, got end of input",
887 )),
888 }
889 }
890
891 fn parse_func_call(&mut self, start: usize) -> Result<SpannedValueExpr, NightjarLanguageError> {
899 let kw = self.expect_keyword_token()?;
900 let op = FuncOp::from_keyword(kw.node).ok_or_else(|| {
901 parse_error(
902 kw.span,
903 format!(
904 "`{:?}` is not a value-producing function in this position",
905 kw.node
906 ),
907 )
908 })?;
909 let expected_operand_count = op.expected_arity();
910 let mut args = Vec::with_capacity(expected_operand_count);
911
912 for _ in 0..expected_operand_count {
914 args.push(self.parse_value_expr()?);
915 }
916
917 let close = match self.peek_token() {
920 Some(Token::RParen) => self.bump().span, Some(_) => {
922 return Err(argument_error(
923 self.current_span(),
924 format!(
925 "`{}` takes exactly {} argument(s)",
926 op.name(),
927 expected_operand_count
928 ),
929 ));
930 }
931 None => {
932 return Err(parse_error(
933 self.current_span(),
934 format!("expected `)` to close `{}` call", op.name()),
935 ));
936 }
937 };
938 Ok(Spanned::new(
939 ValueExpr::FuncCall { op, args },
940 Span::new(start, close.end),
941 ))
942 }
943
944 fn expect_keyword_token(&mut self) -> Result<Spanned<Keyword>, NightjarLanguageError> {
949 match self.peek_token() {
950 Some(Token::Keyword(_)) => {
951 let tok = self.bump();
952 if let Token::Keyword(kw) = tok.node {
953 Ok(Spanned::new(kw, tok.span))
954 } else {
955 unreachable!()
956 }
957 }
958 _ => Err(parse_error(
959 self.current_span(),
960 "expected operator keyword",
961 )),
962 }
963 }
964}
965pub fn parse(input: &str) -> Result<Program, NightjarLanguageError> {
971 parse_with_config(input, &ParserConfig::default())
972}
973
974pub fn parse_with_config(
978 input: &str,
979 config: &ParserConfig,
980) -> Result<Program, NightjarLanguageError> {
981 let tokens = Tokenizer::new(input).tokenize()?;
982 let program = Parser::parse(tokens, config)?;
983 validate_scope(&program)?;
984 Ok(program)
985}
986fn validate_scope(program: &Program) -> Result<(), NightjarLanguageError> {
1010 walk_bool(&program.expr, 0)
1011}
1012
1013fn walk_bool(expr: &SpannedBoolExpr, predicate_depth: u32) -> Result<(), NightjarLanguageError> {
1038 match &expr.node {
1039 BoolExpr::Literal(_) => Ok(()),
1040 BoolExpr::Verifier { left, right, .. } => {
1041 walk_value(left, predicate_depth)?;
1042 walk_value(right, predicate_depth)
1043 }
1044 BoolExpr::And(l, r) | BoolExpr::Or(l, r) => {
1045 walk_bool(l, predicate_depth)?;
1046 walk_bool(r, predicate_depth)
1047 }
1048 BoolExpr::Not(inner) => walk_bool(inner, predicate_depth),
1049 BoolExpr::UnaryCheck { operand, .. } => walk_value(operand, predicate_depth),
1050 BoolExpr::Quantifier {
1051 predicate, operand, ..
1052 } => {
1053 walk_predicate(predicate, predicate_depth + 1)?;
1054 walk_value(operand, predicate_depth)
1061 }
1062 }
1063}
1064
1065fn walk_value(expr: &SpannedValueExpr, predicate_depth: u32) -> Result<(), NightjarLanguageError> {
1080 match &expr.node {
1081 ValueExpr::Literal(_) => Ok(()),
1082 ValueExpr::Symbol { root, .. } => {
1083 if matches!(root, SymbolRoot::Element) && predicate_depth == 0 {
1084 Err(scope_error(
1085 expr.span,
1086 "`@` element-relative symbols may only appear inside a ForAll/Exists predicate",
1087 ))
1088 } else {
1089 Ok(())
1090 }
1091 }
1092 ValueExpr::FuncCall { args, .. } => {
1093 for a in args {
1094 walk_value(a, predicate_depth)?;
1095 }
1096 Ok(())
1097 }
1098 }
1099}
1100
1101fn walk_predicate(
1115 pred: &Spanned<Predicate>,
1116 predicate_depth: u32,
1117) -> Result<(), NightjarLanguageError> {
1118 match &pred.node {
1119 Predicate::PartialVerifier { bound, .. } => walk_value(bound, predicate_depth),
1120 Predicate::UnaryCheck(_) => Ok(()),
1121 Predicate::Full(body) => walk_bool(body, predicate_depth),
1122 }
1123}
1124
1125#[cfg(test)]
1126mod tests {
1127 use super::*;
1128
1129 fn tokenize(input: &str) -> Vec<Token> {
1132 Tokenizer::new(input)
1133 .tokenize()
1134 .expect("tokenization should succeed")
1135 .into_iter()
1136 .map(|s| s.node)
1137 .collect()
1138 }
1139
1140 #[test]
1141 fn tokenizes_parentheses_and_keywords() {
1142 let toks = tokenize("(EQ 1 1)");
1143 assert_eq!(
1144 toks,
1145 vec![
1146 Token::LParen,
1147 Token::Keyword(Keyword::EQ),
1148 Token::IntLiteral(1),
1149 Token::IntLiteral(1),
1150 Token::RParen,
1151 ]
1152 );
1153 }
1154
1155 #[test]
1156 fn tokenizes_negative_integer_literal() {
1157 let toks = tokenize("-5");
1158 assert_eq!(toks, vec![Token::IntLiteral(-5)]);
1159 }
1160
1161 #[test]
1162 fn tokenizes_negative_float_literal() {
1163 let toks = tokenize("-1.618");
1164 assert_eq!(toks, vec![Token::FloatLiteral(-1.618)]);
1165 }
1166
1167 #[test]
1168 fn space_between_minus_and_digit_is_error() {
1169 let err = Tokenizer::new("- 5").tokenize().unwrap_err();
1170 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1171 }
1172
1173 #[test]
1174 fn tokenizes_string_literal_with_unicode() {
1175 let toks = tokenize("\"營收\"");
1176 assert_eq!(toks, vec![Token::StringLiteral("營收".into())]);
1177 }
1178
1179 #[test]
1180 fn tokenizes_unterminated_string_errors() {
1181 let err = Tokenizer::new("\"abc").tokenize().unwrap_err();
1182 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1183 }
1184
1185 #[test]
1186 fn tokenizes_root_symbol_bare_dot() {
1187 assert_eq!(
1188 tokenize("."),
1189 vec![Token::Symbol {
1190 root: SymbolRoot::Root,
1191 path: "".into()
1192 }]
1193 );
1194 assert_eq!(
1195 tokenize("(NonEmpty .)"),
1196 vec![
1197 Token::LParen,
1198 Token::Keyword(Keyword::NonEmpty),
1199 Token::Symbol {
1200 root: SymbolRoot::Root,
1201 path: "".into()
1202 },
1203 Token::RParen,
1204 ]
1205 );
1206 }
1207
1208 #[test]
1209 fn tokenizes_nested_symbol_path() {
1210 assert_eq!(
1211 tokenize(".data.department_1.revenue"),
1212 vec![Token::Symbol {
1213 root: SymbolRoot::Root,
1214 path: "data.department_1.revenue".into()
1215 }]
1216 );
1217 }
1218
1219 #[test]
1220 fn tokenizes_unicode_symbol() {
1221 assert_eq!(
1222 tokenize(".營收"),
1223 vec![Token::Symbol {
1224 root: SymbolRoot::Root,
1225 path: "營收".into()
1226 }]
1227 );
1228 assert_eq!(
1229 tokenize(".données.résultat"),
1230 vec![Token::Symbol {
1231 root: SymbolRoot::Root,
1232 path: "données.résultat".into()
1233 }]
1234 );
1235 }
1236
1237 #[test]
1238 fn tokenizes_element_symbol_with_at_sigil() {
1239 assert_eq!(
1240 tokenize("@"),
1241 vec![Token::Symbol {
1242 root: SymbolRoot::Element,
1243 path: "".into()
1244 }]
1245 );
1246 assert_eq!(
1247 tokenize("@.a"),
1248 vec![Token::Symbol {
1249 root: SymbolRoot::Element,
1250 path: "a".into()
1251 }]
1252 );
1253 assert_eq!(
1254 tokenize("@._0.name"),
1255 vec![Token::Symbol {
1256 root: SymbolRoot::Element,
1257 path: "_0.name".into()
1258 }]
1259 );
1260 }
1261
1262 #[test]
1263 fn tokenizes_bool_and_null_literals() {
1264 assert_eq!(tokenize("True"), vec![Token::BoolLiteral(true)]);
1265 assert_eq!(tokenize("False"), vec![Token::BoolLiteral(false)]);
1266 assert_eq!(tokenize("Null"), vec![Token::NullLiteral]);
1267 }
1268
1269 #[test]
1270 fn unknown_identifier_errors() {
1271 let err = Tokenizer::new("FooBar").tokenize().unwrap_err();
1272 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1273 }
1274
1275 #[test]
1276 fn token_spans_are_byte_offsets() {
1277 let tokens = Tokenizer::new("(EQ 1 2)").tokenize().unwrap();
1278 assert_eq!(tokens[0].span, Span::new(0, 1));
1280 assert_eq!(tokens[1].span, Span::new(1, 3));
1282 assert_eq!(tokens[2].span, Span::new(4, 5));
1284 assert_eq!(tokens[3].span, Span::new(6, 7));
1286 assert_eq!(tokens[4].span, Span::new(7, 8));
1288 }
1289
1290 fn must_parse(input: &str) -> Program {
1293 parse(input)
1294 .unwrap_or_else(|e| panic!("expected parse success for `{}`, got {:?}", input, e))
1295 }
1296
1297 fn must_fail(input: &str) -> NightjarLanguageError {
1298 parse(input).expect_err(&format!("expected parse failure for `{}`", input))
1299 }
1300
1301 #[test]
1302 fn parses_simple_verifier() {
1303 let p = must_parse("(GT 1 2)");
1304 match p.expr.node {
1305 BoolExpr::Verifier { op, .. } => assert_eq!(op, VerifierOp::GT),
1306 other => panic!("expected Verifier, got {:?}", other),
1307 }
1308 }
1309
1310 #[test]
1311 fn verifier_arity_mismatch_produces_arity_error() {
1312 let err = must_fail("(GT 1 2 3)");
1313 assert!(
1314 matches!(err, NightjarLanguageError::ArgumentError { .. }),
1315 "got {:?}",
1316 err
1317 );
1318 }
1319
1320 #[test]
1321 fn bare_gt_without_parens_fails() {
1322 let err = must_fail("GT 1 2");
1323 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1324 }
1325
1326 #[test]
1327 fn parses_nested_connective_and_verifier() {
1328 let p = must_parse("(AND (GT 1 0) (LT 1 10))");
1329 match p.expr.node {
1330 BoolExpr::And(_, _) => {}
1331 other => panic!("expected And, got {:?}", other),
1332 }
1333 }
1334
1335 #[test]
1336 fn parses_forall_with_partial_verifier() {
1337 let p = must_parse("(ForAll (GT 0) .ids)");
1338 match p.expr.node {
1339 BoolExpr::Quantifier {
1340 op,
1341 predicate,
1342 operand,
1343 } => {
1344 assert_eq!(op, QuantifierOp::ForAll);
1345 match predicate.node {
1346 Predicate::PartialVerifier { op, .. } => assert_eq!(op, VerifierOp::GT),
1347 other => panic!("expected PartialVerifier, got {:?}", other),
1348 }
1349 match operand.node {
1350 ValueExpr::Symbol { root, path } => {
1351 assert_eq!(root, SymbolRoot::Root);
1352 assert_eq!(path, "ids");
1353 }
1354 other => panic!("expected Symbol, got {:?}", other),
1355 }
1356 }
1357 other => panic!("expected Quantifier, got {:?}", other),
1358 }
1359 }
1360
1361 #[test]
1362 fn parses_exists_with_nonempty_predicate() {
1363 let p = must_parse("(Exists NonEmpty .names)");
1364 match p.expr.node {
1365 BoolExpr::Quantifier { op, predicate, .. } => {
1366 assert_eq!(op, QuantifierOp::Exists);
1367 assert_eq!(
1368 predicate.node,
1369 Predicate::UnaryCheck(UnaryCheckOp::NonEmpty)
1370 );
1371 }
1372 other => panic!("expected Quantifier, got {:?}", other),
1373 }
1374 }
1375
1376 #[test]
1377 fn parses_not_of_verifier() {
1378 let p = must_parse("(NOT (EQ .status \"inactive\"))");
1379 match p.expr.node {
1380 BoolExpr::Not(inner) => match inner.node {
1381 BoolExpr::Verifier { op, .. } => assert_eq!(op, VerifierOp::EQ),
1382 other => panic!("expected Verifier, got {:?}", other),
1383 },
1384 other => panic!("expected Not, got {:?}", other),
1385 }
1386 }
1387
1388 #[test]
1389 fn parses_top_level_bool_literal() {
1390 let p = must_parse("True");
1391 assert_eq!(p.expr.node, BoolExpr::Literal(true));
1392 }
1393
1394 #[test]
1395 fn parses_negative_literals_in_verifier() {
1396 let p = must_parse("(GT -5 -10)");
1397 match p.expr.node {
1398 BoolExpr::Verifier { left, right, .. } => {
1399 assert_eq!(left.node, ValueExpr::Literal(Literal::Int(-5)));
1400 assert_eq!(right.node, ValueExpr::Literal(Literal::Int(-10)));
1401 }
1402 other => panic!("expected Verifier, got {:?}", other),
1403 }
1404 }
1405
1406 #[test]
1407 fn parses_root_symbol_as_operand() {
1408 let p = must_parse("(NonEmpty .)");
1409 match p.expr.node {
1410 BoolExpr::UnaryCheck { op, operand } => {
1411 assert_eq!(op, UnaryCheckOp::NonEmpty);
1412 assert_eq!(
1413 operand.node,
1414 ValueExpr::Symbol {
1415 root: SymbolRoot::Root,
1416 path: "".into()
1417 }
1418 );
1419 }
1420 other => panic!("expected UnaryCheck, got {:?}", other),
1421 }
1422 }
1423
1424 #[test]
1425 fn func_call_arity_too_many_is_arity_error() {
1426 let err = must_fail("(EQ (Add 1 2 3) 6)");
1428 assert!(
1429 matches!(err, NightjarLanguageError::ArgumentError { .. }),
1430 "got {:?}",
1431 err
1432 );
1433 }
1434
1435 #[test]
1436 fn func_call_arity_too_few_is_parse_error() {
1437 let err = must_fail("(EQ (Add 1) 1)");
1440 assert!(
1441 matches!(err, NightjarLanguageError::ParseError { .. }),
1442 "got {:?}",
1443 err
1444 );
1445 }
1446
1447 #[test]
1448 fn missing_rparen_is_parse_error() {
1449 let err = must_fail("(GT 1 2");
1450 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1451 }
1452
1453 #[test]
1454 fn trailing_tokens_is_parse_error() {
1455 let err = must_fail("(GT 1 2) extra");
1456 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1457 }
1458
1459 #[test]
1460 fn empty_input_fails() {
1461 let err = must_fail("");
1462 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1463 }
1464
1465 #[test]
1466 fn partial_verifier_outside_quantifier_is_rejected() {
1467 let err = must_fail("(GT 2)");
1470 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1471 }
1472
1473 #[test]
1474 fn depth_limit_is_enforced() {
1475 let mut s = String::new();
1477 let n = 10;
1478 for _ in 0..n {
1479 s.push_str("(NOT ");
1480 }
1481 s.push_str("True");
1482 for _ in 0..n {
1483 s.push(')');
1484 }
1485 let cfg = ParserConfig { max_depth: 5 };
1487 let err = parse_with_config(&s, &cfg).unwrap_err();
1488 assert!(
1489 matches!(err, NightjarLanguageError::RecursionError { .. }),
1490 "got {:?}",
1491 err
1492 );
1493 parse(&s).expect("default depth should parse this");
1495 }
1496
1497 #[test]
1498 fn parses_nested_arithmetic_inside_verifier() {
1499 let p = must_parse("(EQ (Add (Mul 2 3) (Sub 10 4)) 12)");
1500 match p.expr.node {
1501 BoolExpr::Verifier { left, .. } => match left.node {
1502 ValueExpr::FuncCall { op, args } => {
1503 assert_eq!(op, FuncOp::Add);
1504 assert_eq!(args.len(), 2);
1505 }
1506 other => panic!("expected FuncCall, got {:?}", other),
1507 },
1508 other => panic!("expected Verifier, got {:?}", other),
1509 }
1510 }
1511
1512 #[test]
1513 fn parses_bool_literal_as_operand_to_eq() {
1514 let p = must_parse("(EQ True False)");
1515 match p.expr.node {
1516 BoolExpr::Verifier { left, right, .. } => {
1517 assert_eq!(left.node, ValueExpr::Literal(Literal::Bool(true)));
1518 assert_eq!(right.node, ValueExpr::Literal(Literal::Bool(false)));
1519 }
1520 other => panic!("expected Verifier, got {:?}", other),
1521 }
1522 }
1523
1524 #[test]
1525 fn rejects_func_op_as_top_level_bool_expr() {
1526 let err = must_fail("(Add 1 2)");
1528 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1529 }
1530
1531 #[test]
1532 fn parses_unicode_symbol_in_verifier() {
1533 let p = must_parse("(EQ .數量 100)");
1534 match p.expr.node {
1535 BoolExpr::Verifier { left, .. } => {
1536 assert_eq!(
1537 left.node,
1538 ValueExpr::Symbol {
1539 root: SymbolRoot::Root,
1540 path: "數量".into()
1541 }
1542 );
1543 }
1544 other => panic!("expected Verifier, got {:?}", other),
1545 }
1546 }
1547
1548 #[test]
1549 fn top_level_span_covers_whole_expression() {
1550 let p = must_parse("(EQ 1 1)");
1551 assert_eq!(p.expr.span, Span::new(0, 8));
1552 }
1553
1554 #[test]
1557 fn rejects_at_followed_by_bare_identifier() {
1558 let err = Tokenizer::new("@a").tokenize().unwrap_err();
1560 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1561 }
1562
1563 #[test]
1564 fn rejects_at_dot_with_no_segment() {
1565 let err = Tokenizer::new("@.").tokenize().unwrap_err();
1566 assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1567 }
1568
1569 #[test]
1570 fn parses_forall_with_full_verifier_predicate() {
1571 let p = must_parse("(ForAll (EQ @.a @.b) .items)");
1572 match p.expr.node {
1573 BoolExpr::Quantifier { predicate, .. } => match predicate.node {
1574 Predicate::Full(body) => match body.node {
1575 BoolExpr::Verifier { op, left, right } => {
1576 assert_eq!(op, VerifierOp::EQ);
1577 assert_eq!(
1578 left.node,
1579 ValueExpr::Symbol {
1580 root: SymbolRoot::Element,
1581 path: "a".into()
1582 }
1583 );
1584 assert_eq!(
1585 right.node,
1586 ValueExpr::Symbol {
1587 root: SymbolRoot::Element,
1588 path: "b".into()
1589 }
1590 );
1591 }
1592 other => panic!("expected Verifier inside Full, got {:?}", other),
1593 },
1594 other => panic!("expected Predicate::Full, got {:?}", other),
1595 },
1596 other => panic!("expected Quantifier, got {:?}", other),
1597 }
1598 }
1599
1600 #[test]
1601 fn partial_verifier_still_parses_as_partial() {
1602 let p = must_parse("(ForAll (GT 0) .items)");
1605 match p.expr.node {
1606 BoolExpr::Quantifier { predicate, .. } => {
1607 assert!(matches!(predicate.node, Predicate::PartialVerifier { .. }));
1608 }
1609 other => panic!("expected Quantifier, got {:?}", other),
1610 }
1611 }
1612
1613 #[test]
1614 fn parses_forall_with_nested_arithmetic_on_element() {
1615 let p = must_parse("(ForAll (EQ (Add @.a @.b) @.c) .items)");
1616 match p.expr.node {
1617 BoolExpr::Quantifier { predicate, .. } => {
1618 assert!(matches!(predicate.node, Predicate::Full(_)));
1619 }
1620 other => panic!("expected Quantifier, got {:?}", other),
1621 }
1622 }
1623
1624 #[test]
1625 fn rejects_at_outside_quantifier_with_scope_error() {
1626 let err = parse("(EQ @.a 1)").expect_err("`@` outside quantifier should fail");
1627 assert!(
1628 matches!(err, NightjarLanguageError::ScopeError { .. }),
1629 "got {:?}",
1630 err
1631 );
1632 }
1633
1634 #[test]
1635 fn rejects_at_in_quantifier_operand_with_scope_error() {
1636 let err = parse("(ForAll (GT 0) @.items)")
1639 .expect_err("`@` in quantifier operand at top level should fail");
1640 assert!(
1641 matches!(err, NightjarLanguageError::ScopeError { .. }),
1642 "got {:?}",
1643 err
1644 );
1645 }
1646
1647 #[test]
1648 fn bare_at_symbol_parses_in_predicate() {
1649 let p = must_parse("(ForAll (GT @ 0) .scores)");
1650 match p.expr.node {
1651 BoolExpr::Quantifier { predicate, .. } => match predicate.node {
1652 Predicate::Full(body) => match body.node {
1653 BoolExpr::Verifier { left, .. } => assert_eq!(
1654 left.node,
1655 ValueExpr::Symbol {
1656 root: SymbolRoot::Element,
1657 path: "".into()
1658 }
1659 ),
1660 other => panic!("expected Verifier, got {:?}", other),
1661 },
1662 other => panic!("expected Full, got {:?}", other),
1663 },
1664 other => panic!("expected Quantifier, got {:?}", other),
1665 }
1666 }
1667
1668 #[test]
1669 fn nonempty_with_operand_in_predicate_parses_as_full() {
1670 let p = must_parse("(ForAll (NonEmpty .x) .items)");
1673 match p.expr.node {
1674 BoolExpr::Quantifier { predicate, .. } => match predicate.node {
1675 Predicate::Full(body) => assert!(matches!(
1676 body.node,
1677 BoolExpr::UnaryCheck {
1678 op: UnaryCheckOp::NonEmpty,
1679 ..
1680 }
1681 )),
1682 other => panic!("expected Full, got {:?}", other),
1683 },
1684 other => panic!("expected Quantifier, got {:?}", other),
1685 }
1686 }
1687}