Skip to main content

nightjar_lang/language/
parser.rs

1// Copyright 2026 Wayne Hong (h-alice) <contact@halice.art>
2// Nightjar Language Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! # Parser module
17//!
18//! This module includes tokenizer and recursive-descent parser that turn
19//! a Nightjar source string into a spanned AST, with Unicode-safe byte
20//! offsets and configurable nesting-depth limits.
21
22use crate::error::{
23    argument_error, parse_error, recursion_error, scope_error, NightjarLanguageError, Span,
24};
25use crate::language::grammar::{
26    BoolExpr, FuncOp, Keyword, Literal, Predicate, Program, QuantifierOp, Spanned, SpannedBoolExpr,
27    SpannedValueExpr, SymbolRoot, Token, UnaryCheckOp, ValueExpr, VerifierOp,
28};
29
30/// Configuration for the parser.
31#[derive(Debug, Clone)]
32pub struct ParserConfig {
33    /// Max depth of AST. Default is 256.
34    ///
35    /// If the AST depth exceeds this value, a `RecursionError` will be
36    /// returned.
37    ///
38    /// (TBH, who will verify data with that many operators?)
39    pub max_depth: usize,
40}
41
42impl Default for ParserConfig {
43    fn default() -> Self {
44        Self { max_depth: 256 }
45    }
46}
47
48/// Tokenizer
49///
50/// Tokenizer turns a Nightjar language source string into a stream of tokens.
51pub struct Tokenizer<'a> {
52    /// The program we want to parse.
53    input: &'a str,
54    /// Index into `chars` for the next un-consumed char.
55    cursor: usize,
56    /// Character vector + byte offsets. We need byte offsets for Span, and
57    /// char-level access so that Unicode content inside strings/symbols is
58    /// handled without slicing through a codepoint.
59    chars: Vec<(usize, char)>,
60    /// End-of-input byte offset (i.e. `input.len()`).
61    eof: usize,
62}
63
64impl<'a> Tokenizer<'a> {
65    /// Initialize a tokenizer for `input`.
66    ///
67    /// Build a tokenizer for `input`, precomputing `(byte_offset, char)`
68    /// pairs so peeking and slicing stay cheap and Unicode-safe.
69    pub fn new(input: &'a str) -> Self {
70        let chars: Vec<(usize, char)> = input.char_indices().collect();
71        Self {
72            input,
73            cursor: 0,
74            chars,
75            eof: input.len(),
76        }
77    }
78
79    /// Main tokenizing procedure.
80    ///
81    /// Consume the whole input and return the full token stream with spans,
82    /// or a `ParseError` on the first unrecoverable lexical issue.
83    pub fn tokenize(&mut self) -> Result<Vec<Spanned<Token>>, NightjarLanguageError> {
84        let mut tokens = Vec::new();
85        loop {
86            // Loop until EOF.
87            self.skip_whitespace(); // Skip encountered whitespace.
88            let Some(c) = self.peek_char() else {
89                break; // EOF reached, exit the loop.
90            };
91            let start = self.byte_pos(); // Record the next-unconsumed byte offset.
92            let token = match c {
93                '(' => {
94                    self.advance();
95                    Token::LParen
96                }
97                ')' => {
98                    self.advance();
99                    Token::RParen
100                }
101                '"' => self.read_string(start)?,
102                '.' => self.read_symbol(start, SymbolRoot::Root)?,
103                '@' => self.read_symbol(start, SymbolRoot::Element)?,
104                '-' if self.is_negative_literal() => self.read_number(start)?,
105                c if c.is_ascii_digit() => self.read_number(start)?,
106                c if c.is_alphabetic() || c == '_' => self.read_ident(start)?,
107                other => {
108                    return Err(parse_error(
109                        Span::new(start, start + other.len_utf8()),
110                        format!("unexpected character `{}`", other),
111                    ));
112                }
113            };
114            let end = self.byte_pos();
115            tokens.push(Spanned::new(token, Span::new(start, end)));
116        }
117        Ok(tokens)
118    }
119
120    // ────────────────────── position helpers ──────────────────────
121
122    /// Get byte-level offset
123    ///
124    /// Byte offset of the next un-consumed character, or EOF offset if the
125    /// cursor has run off the end. Used for building `Span`s.
126    fn byte_pos(&self) -> usize {
127        if self.cursor < self.chars.len() {
128            self.chars[self.cursor].0
129        } else {
130            self.eof
131        }
132    }
133
134    /// `true` when no more characters remain to tokenize.
135    fn _is_eof(&self) -> bool {
136        self.cursor >= self.chars.len()
137    }
138
139    /// Get next character without consuming it.
140    ///
141    /// Return the next character without consuming it, or `None` at EOF.
142    fn peek_char(&self) -> Option<char> {
143        self.chars.get(self.cursor).map(|(_, c)| *c)
144    }
145
146    /// Get character with given offset
147    ///
148    /// Return the character at `cursor + offset` without consuming it.
149    /// Useful for two-char lookahead, e.g. distinguishing `-5` from `- `.
150    fn peek_char_at(&self, offset: usize) -> Option<char> {
151        self.chars.get(self.cursor + offset).map(|(_, c)| *c)
152    }
153
154    /// Consume one character by moving the cursor forward.
155    fn advance(&mut self) {
156        self.cursor += 1;
157    }
158
159    /// Consume any run of whitespace characters so the next `peek_char`
160    /// returns the start of the next token (or EOF).
161    fn skip_whitespace(&mut self) {
162        while let Some(c) = self.peek_char() {
163            if c.is_whitespace() {
164                self.advance();
165            } else {
166                break;
167            }
168        }
169    }
170
171    /// Negative literal checker
172    ///
173    /// If met a `-` character, this function will check if the next character
174    /// is a digit.
175    ///
176    /// If so, it will return `true`, otherwise `false`.
177    fn is_negative_literal(&self) -> bool {
178        self.peek_char() == Some('-') && self.peek_char_at(1).is_some_and(|c| c.is_ascii_digit())
179        // If the character is not alphabetic, it can't be a digit.
180    }
181
182    // ────────────────────────── readers ──────────────────────────
183
184    /// String reader
185    ///
186    /// Consume a `"..."` string literal. The opening `"` has not yet been
187    /// consumed. Errors if EOF is reached before a closing `"`.
188    ///
189    /// While using `read_string`, the position MUST point at the start of the string literal
190    /// opening, that is, the first `"`.
191    fn read_string(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
192        // consume opening "
193        self.advance();
194        let mut buf = String::new();
195        loop {
196            match self.peek_char() {
197                Some('"') => {
198                    self.advance();
199                    return Ok(Token::StringLiteral(buf));
200                }
201                Some(c) => {
202                    buf.push(c); // Push read content into buffer.
203                    self.advance();
204                }
205                None => {
206                    return Err(parse_error(
207                        Span::new(start, self.byte_pos()),
208                        "unterminated string literal",
209                    ));
210                }
211            }
212        }
213    }
214
215    /// Number reader
216    ///
217    /// Consume an integer or floating-point literal starting at `start`.
218    /// Produces `Token::IntLiteral` by default, and `Token::FloatLiteral`
219    /// when a `.digit` fractional part is present.
220    fn read_number(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
221        // Handle optional leading '-'
222        if self.peek_char() == Some('-') {
223            self.advance();
224        }
225        // Consume all digits until meet non-digit character.
226        while let Some(c) = self.peek_char() {
227            if c.is_ascii_digit() {
228                self.advance();
229            } else {
230                break;
231            }
232        }
233
234        // Optional fractional part, only if '.' followed by at least one digit.
235        let mut is_float = false; // A flag indicating whether the number is a float.
236        if self.peek_char() == Some('.') && // Current char is '.'
237            self.peek_char_at(1)            // Next char is a digit
238                .is_some_and(|c| c.is_ascii_digit())
239        {
240            // If next char is a digit, then it is a float.
241            is_float = true;
242            self.advance(); // '.'
243                            // Consume all subsequent digits as the fractional part.
244            while let Some(c) = self.peek_char() {
245                if c.is_ascii_digit() {
246                    self.advance();
247                } else {
248                    break;
249                }
250            }
251        }
252
253        // If everything works fine, now we've consumed all the digits and the decimal point.
254        let end = self.byte_pos(); // Get the end position of the number.
255        let text = &self.input[start..end]; // Get the number as a string from input.
256        if is_float {
257            // Check the flag if it is a float.
258            text.parse::<f64>().map(Token::FloatLiteral).map_err(|_| {
259                // Try to parse the number as a float.
260                parse_error(
261                    // If not, return an error.
262                    Span::new(start, end),
263                    format!("invalid float literal `{}`", text),
264                )
265            }) // Return successfully parsed float or error.
266        } else {
267            // If not a float, try to parse it as an integer.
268            text.parse::<i64>().map(Token::IntLiteral).map_err(|_| {
269                // Try to parse the number as an integer.
270                parse_error(
271                    // If not, return an error.
272                    Span::new(start, end),
273                    format!("invalid integer literal `{}`", text),
274                )
275            }) // Return successfully parsed integer or error.
276        }
277    }
278
279    /// Symbol reader
280    ///
281    /// The leading sigil (`.` for root, `@` for the current iteration element)
282    /// has already been peeked but not consumed.
283    ///
284    /// Accepts:
285    /// - the bare sigil alone (`.` or `@`) when followed by whitespace, `)`,
286    ///   or EOF — returns an empty `path`,
287    /// - dotted paths such as `.data.department_1.revenue` or `@._0.a`.
288    ///
289    /// Explanation of the difference between `.` and `@`:
290    /// - `.` doubles as both the root marker **and** the separator before its
291    ///   first segment (so `.a` is read as sigil + `a`).
292    /// - `@` is a standalone marker that requires an explicit `.` separator
293    ///   before any segments (so `@.a` is sigil + `.` + `a`; `@a` is rejected).
294    ///
295    /// The returned `Token::Symbol` stores `path` *without* any leading
296    /// sigil or separator; `root` records which sigil was used.
297    fn read_symbol(
298        &mut self,
299        _start: usize,
300        root: SymbolRoot, // Leading sigil ('@' or '.')
301    ) -> Result<Token, NightjarLanguageError> {
302        self.advance(); // consume the leading sigil ('.' or '@')
303        let sigil: char = match root {
304            SymbolRoot::Root => '.',    // Root sigil is '.'
305            SymbolRoot::Element => '@', // Element sigil is '@'
306        };
307        let mut path = String::new(); // Initialize path of the symbol
308
309        // Root's '.' sigil also serves as the separator for the first
310        // segment, so we attempt to read a segment immediately. For Element,
311        // the first segment (if any) must be introduced by an explicit '.'.
312        match root {
313            // The root is `.`, try to read the first segment
314            SymbolRoot::Root => {
315                match self.try_read_segment() {
316                    // Try to read the first segment
317                    Some(seg) => Self::push_segment(&mut path, seg), // There's some segment after `.`
318                    None => return self.complete_bare_sigil(root, path, sigil),
319                }
320            }
321            // If the root is `@` and the next character is not `.`, then it is a bare sigil.
322            SymbolRoot::Element if self.peek_char() != Some('.') => {
323                return self.complete_bare_sigil(root, path, sigil); // Complete the token, it is a `@`
324            }
325            _ => {}
326        }
327
328        // Consume any number of `.segment` continuations.
329        while self.peek_char() == Some('.') {
330            let dot_pos = self.byte_pos();
331            self.advance(); // consume '.'
332            match self.try_read_segment() {
333                Some(seg) => Self::push_segment(&mut path, seg),
334                None => {
335                    return Err(parse_error(
336                        Span::new(dot_pos, dot_pos + 1),
337                        "expected symbol segment after `.`",
338                    ));
339                }
340            }
341        }
342        Ok(Token::Symbol { root, path })
343    }
344
345    /// Try to read a symbol segment
346    ///
347    /// Try to consume one identifier segment (Unicode alphanumeric or `_`).
348    /// Returns the segment as a `&str` slice, or `None` if no segment
349    /// characters were present at the current position.
350    ///
351    /// The caller is responsible for assembling segments into a dot-joined
352    /// path (see [`push_segment`]).
353    ///
354    /// Example (internal; tokenizer-private):
355    ///
356    /// ```ignore
357    /// let mut tz = Tokenizer::new("foo.bar");
358    /// assert_eq!(tz.try_read_segment(), Some("foo"));
359    /// // Caller is responsible for consuming the `.` separator itself
360    /// // before the next call.
361    /// ```
362    fn try_read_segment(&mut self) -> Option<&str> {
363        let seg_start = self.byte_pos(); // Current position of the parser
364        while let Some(c) = self.peek_char() {
365            // Advance the parser until a non-alphanumeric character is encountered
366            if c.is_alphanumeric() || c == '_' {
367                self.advance();
368            } else {
369                break;
370            }
371        }
372        let seg_end = self.byte_pos(); // Current position of the parser
373        if seg_start == seg_end {
374            // Empty segment
375            None
376        } else {
377            Some(&self.input[seg_start..seg_end]) // Return the segment as a &str slice
378        }
379    }
380
381    /// Push symbol segment
382    ///
383    /// Append a segment to a dot-separated path string. Inserts a `.`
384    /// separator when `path` is non-empty.
385    fn push_segment(path: &mut String, seg: &str) {
386        if !path.is_empty() {
387            path.push('.');
388        }
389        path.push_str(seg);
390    }
391
392    /// Complete sigil reading and construct the tokenized symbol.
393    ///
394    /// The function has the following purposes:
395    /// - Complete tokenization of a bare sigil (`.` or `@` with no path
396    ///   segments).
397    /// - Validates that the next character is a legal terminator
398    ///   (whitespace, `)`, or EOF).
399    /// - Produces the final `Token::Symbol` instance.
400    /// - Returns a parse error if an unexpected character follows.
401    ///
402    /// Example (internal):
403    ///
404    /// ```ignore
405    /// // `.` followed by EOF → bare root symbol.
406    /// let tz = Tokenizer::new(".");
407    /// let tok = tz.complete_bare_sigil(SymbolRoot::Root, String::new(), '.').unwrap();
408    /// assert!(matches!(tok, Token::Symbol { path, .. } if path.is_empty()));
409    /// ```
410    fn complete_bare_sigil(
411        &self,
412        root: SymbolRoot, // Root sigil ('@' or '.')
413        path: String,     // Path of the symbol
414        sigil: char,      // Sigil
415    ) -> Result<Token, NightjarLanguageError> {
416        match self.peek_char() {
417            None => Ok(Token::Symbol { root, path }), // Next char is EOF, complete the token
418            Some(c) if c.is_whitespace() || c == ')' =>
419            // Next char is vaild terminator, complete the token
420            {
421                Ok(Token::Symbol { root, path })
422            }
423            Some(c) => {
424                let pos = self.byte_pos();
425                Err(parse_error(
426                    Span::new(pos, pos + c.len_utf8()),
427                    format!("unexpected character `{}` after `{}`", c, sigil),
428                ))
429            }
430        }
431    }
432
433    /// Identifier reader
434    ///
435    /// Consume an alphanumeric identifier and classify it as `True`,
436    /// `False`, `Null`, or a known operator keyword.
437    ///
438    /// Unknown identifiers become `ParseError`, we do not support user
439    /// defined variable names (for now).
440    fn read_ident(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
441        while let Some(c) = self.peek_char() {
442            if c.is_alphanumeric() || c == '_' {
443                self.advance();
444            } else {
445                break;
446            }
447        }
448        let end = self.byte_pos();
449        let text = &self.input[start..end];
450        match text {
451            "True" => Ok(Token::BoolLiteral(true)),
452            "False" => Ok(Token::BoolLiteral(false)),
453            "Null" => Ok(Token::NullLiteral),
454            _ => match Keyword::from_ident(text) {
455                Some(kw) => Ok(Token::Keyword(kw)),
456                None => Err(parse_error(
457                    Span::new(start, end),
458                    format!("unknown identifier `{}`", text),
459                )),
460            },
461        }
462    }
463
464    // ────────────────────────────────────────────────────────────
465}
466
467/// Parser
468///
469/// Parser turns a token stream into a spanned AST.
470pub struct Parser {
471    tokens: Vec<Spanned<Token>>,
472    pos: usize,
473    depth: usize,
474    max_depth: usize,
475    input_len: usize,
476}
477
478impl Parser {
479    /// Run the parser over a full token stream and return a `Program`
480    /// whose top-level expression is guaranteed (by grammar) to reduce
481    /// to a boolean. Fails on leftover tokens after the expression.
482    pub fn parse(
483        tokens: Vec<Spanned<Token>>,
484        config: &ParserConfig,
485    ) -> Result<Program, NightjarLanguageError> {
486        let input_len = tokens.last().map(|t| t.span.end).unwrap_or(0);
487        let mut p = Self {
488            tokens,
489            pos: 0,
490            depth: 0,
491            max_depth: config.max_depth,
492            input_len,
493        };
494        let expr = p.parse_bool_expr()?;
495        p.expect_eof()?;
496        Ok(Program { expr })
497    }
498
499    // ───────────────────── helpers for parsing ──────────────────────
500
501    /// Look at the current token (with its span) without consuming it.
502    fn peek(&self) -> Option<&Spanned<Token>> {
503        self.tokens.get(self.pos)
504    }
505
506    /// Look at the current token kind without consuming it
507    ///
508    /// Convenience wrapper for when the span is not needed.
509    fn peek_token(&self) -> Option<&Token> {
510        self.peek().map(|t| &t.node)
511    }
512
513    /// Advance the parser by one token and return a clone of the token
514    /// we just consumed (including its span).
515    ///
516    /// Callers use this to pull a token they have already validated via `peek`.
517    fn bump(&mut self) -> Spanned<Token> {
518        let t = self.tokens[self.pos].clone();
519        self.pos += 1;
520        t
521    }
522
523    /// Span of the current token, or a zero-width point-span at EOF.
524    ///
525    /// Preferred over `peek().span` for producing error spans because
526    /// it gracefully handles the end-of-input case.
527    fn current_span(&self) -> Span {
528        self.peek()
529            .map(|t| t.span)
530            .unwrap_or(Span::point(self.input_len)) // Points to the end of the input
531    }
532
533    /// Consume a `)` or issue parse error.
534    fn expect_rparen(&mut self) -> Result<Span, NightjarLanguageError> {
535        match self.peek_token() {
536            Some(Token::RParen) => Ok(self.bump().span),
537            _ => Err(parse_error(self.current_span(), "expected `)`")),
538        }
539    }
540
541    /// Ensure the token stream is fully consumed.
542    ///
543    /// Any leftover token is reported as a parse error.
544    /// We designed Nightjar programs to be single expressions.
545    fn expect_eof(&mut self) -> Result<(), NightjarLanguageError> {
546        match self.peek() {
547            None => Ok(()),
548            Some(t) => Err(parse_error(
549                t.span,
550                "unexpected token after complete expression",
551            )),
552        }
553    }
554
555    /// Called when entering a parenthesized sub-expression.
556    ///
557    /// Increments the nesting counter and returns `DepthLimitExceeded` if the
558    /// configured `max_depth` is reached.
559    ///
560    /// This prevents stack overflow from pathological inputs like deeply nested `NOT`s.
561    fn enter_depth(&mut self, span: Span) -> Result<(), NightjarLanguageError> {
562        self.depth += 1;
563        if self.depth > self.max_depth {
564            return Err(recursion_error(span, self.max_depth));
565        }
566        Ok(())
567    }
568
569    /// Counterpart to `enter_depth`, call on exit from a parenthesized
570    /// sub-expression.
571    fn exit_depth(&mut self) {
572        self.depth = self.depth.saturating_sub(1); // Saturating to avoid any chance of underflow.
573    }
574
575    // ─────────────────────────────────────────────────────────
576
577    // ───────────────────── AST producers ──────────────────────
578
579    /// Parse a `bool_expr`
580    ///
581    /// A `bool_expr` is either a bare `True`/`False` literal or a
582    /// parenthesized boolean operator form (verifier, connective,
583    /// quantifier, `NOT`, or `NonEmpty`).
584    fn parse_bool_expr(&mut self) -> Result<SpannedBoolExpr, NightjarLanguageError> {
585        let start_span = self.current_span();
586        match self.peek_token() {
587            Some(Token::BoolLiteral(b)) => {
588                // Boolean literal (true/false)
589                let b = *b;
590                let span = self.bump().span;
591                Ok(Spanned::new(BoolExpr::Literal(b), span))
592            }
593            Some(Token::LParen) => {
594                // Parenthesized boolean expression
595                let lparen_span = self.bump().span;
596                self.enter_depth(lparen_span)?; // Increment depth
597                let result = self.parse_bool_body(lparen_span.start);
598                self.exit_depth(); // Decrement depth
599                result
600            }
601            Some(_) => Err(parse_error(start_span, "expected boolean expression")),
602            None => Err(parse_error(
603                start_span,
604                "expected boolean expression, got end of input",
605            )),
606        }
607    }
608
609    /// Parse the body of a parenthesized boolean expression.
610    ///
611    /// Dispatching on the leading keyword to the appropriate sub-parser.
612    ///
613    /// Expects the opening `(` has already been consumed. This function is responsible for
614    /// matching the closing `)` and producing a span from `start` to `)`.
615    fn parse_bool_body(&mut self, start: usize) -> Result<SpannedBoolExpr, NightjarLanguageError> {
616        // The '(' has already been consumed.
617        let kw = self.expect_keyword_token()?;
618        match kw.node { // Match boolean expressions over keywords
619            Keyword::EQ | Keyword::NE | Keyword::LT | Keyword::LE | Keyword::GT | Keyword::GE => {
620                // All verifiers take exactly two arguments.
621                let op = VerifierOp::from_keyword(kw.node).ok_or_else(|| {
622                    parse_error(kw.span, "internal: expected verifier keyword")
623                })?;
624                let left = self.parse_value_expr()?;  // Left expression
625                let right = self.parse_value_expr()?; // Right expression
626
627                // Use a different rparen parser for verifiers, which produces a better error message
628                let close = self.expect_rparen_for_verifier(kw.span)?;
629                Ok(Spanned::new(
630                    BoolExpr::Verifier {
631                        op,
632                        left: Box::new(left),
633                        right: Box::new(right),
634                    },
635                    Span::new(start, close.end),
636                ))
637            }
638
639            Keyword::AND => {
640                let l = self.parse_bool_expr()?;
641                let r = self.parse_bool_expr()?;
642                let close = self.expect_rparen()?;
643                Ok(Spanned::new(
644                    BoolExpr::And(Box::new(l), Box::new(r)),
645                    Span::new(start, close.end),
646                ))
647            }
648
649            Keyword::OR => {
650                let l = self.parse_bool_expr()?;
651                let r = self.parse_bool_expr()?;
652                let close = self.expect_rparen()?;
653                Ok(Spanned::new(
654                    BoolExpr::Or(Box::new(l), Box::new(r)),
655                    Span::new(start, close.end),
656                ))
657            }
658
659            Keyword::NOT => {
660                let inner = self.parse_bool_expr()?;
661                let close = self.expect_rparen()?;
662                Ok(Spanned::new(
663                    BoolExpr::Not(Box::new(inner)),
664                    Span::new(start, close.end),
665                ))
666            }
667
668            Keyword::NonEmpty => {
669                let operand = self.parse_value_expr()?;
670                let close = self.expect_rparen()?;
671                Ok(Spanned::new(
672                    BoolExpr::UnaryCheck {
673                        op: UnaryCheckOp::NonEmpty,
674                        operand: Box::new(operand),
675                    },
676                    Span::new(start, close.end),
677                ))
678            }
679
680            Keyword::ForAll | Keyword::Exists => {
681                let op = QuantifierOp::from_keyword(kw.node).ok_or_else(|| {
682                    parse_error(kw.span, "internal: expected quantifier keyword")
683                })?;
684                let predicate = self.parse_predicate()?; // Predicates are partial-fulfiled verifiers
685                let operand = self.parse_value_expr()?;
686                let close = self.expect_rparen()?;
687                Ok(Spanned::new(
688                    BoolExpr::Quantifier {
689                        op,
690                        predicate,
691                        operand: Box::new(operand),
692                    },
693                    Span::new(start, close.end),
694                ))
695            }
696            other => Err(parse_error(
697                kw.span,
698                format!(
699                    "expected boolean operator (verifier / connective / quantifier / NonEmpty), found `{:?}`",
700                    other
701                ),
702            )),
703        }
704    }
705
706    /// Specialized `)` matcher for verifier forms for more verbose error messages.
707    ///
708    /// If a value-startable token appears where `)` is expected, surface an `ArgumentError`
709    /// instead of a generic parse error.
710    fn expect_rparen_for_verifier(
711        &mut self,
712        _kw_span: Span,
713    ) -> Result<Span, NightjarLanguageError> {
714        match self.peek_token() {
715            Some(Token::RParen) => Ok(self.bump().span),
716            Some(_) => {
717                let sp = self.current_span();
718                Err(argument_error(sp, "verifier takes exactly 2 operands"))
719            }
720            None => Err(parse_error(
721                self.current_span(),
722                "expected `)` to close verifier",
723            )),
724        }
725    }
726
727    /// Parse a `predicate`.
728    ///
729    /// Predicates are the first operand of a quantifier.
730    ///
731    /// Accepts:
732    /// 1. the bare unary check `NonEmpty` → `Predicate::UnaryCheck`,
733    /// 2. `(VerifierOp x)` (1 operand) → `Predicate::PartialVerifier`,
734    /// 3. `(VerifierOp x y)` (2 operands) → `Predicate::Full(Verifier)`,
735    /// 4. any other `bool_expr` (connectives, `NOT`, `(NonEmpty …)`, nested
736    ///    quantifiers, bool literals) → `Predicate::Full`.
737    ///
738    /// The partial vs. full split for verifier-headed forms is decided by
739    /// operand count at parse time.
740    ///
741    /// Little note on grammar: A predicate is only the part of a clause
742    /// containing the verb and its modifiers, telling what the subject does or
743    /// is (e.g., "larger than x"). A clause is a structure, while a predicate is
744    /// a functional component.
745    fn parse_predicate(&mut self) -> Result<Spanned<Predicate>, NightjarLanguageError> {
746        // Case 1: bare `NonEmpty` keyword (no parentheses).
747        if matches!(self.peek_token(), Some(Token::Keyword(Keyword::NonEmpty))) {
748            let span = self.bump().span;
749            return Ok(Spanned::new(
750                Predicate::UnaryCheck(UnaryCheckOp::NonEmpty),
751                span,
752            ));
753        }
754        // Case 2/3: `(VerifierOp ...)`: partial or full verifier depending
755        // on operand count. We peek 2 tokens ahead to detect this shape.
756        if matches!(self.peek_token(), Some(Token::LParen))
757            && matches!(
758                self.tokens.get(self.pos + 1).map(|t| &t.node),
759                Some(Token::Keyword(
760                    Keyword::EQ
761                        | Keyword::NE
762                        | Keyword::LT
763                        | Keyword::LE
764                        | Keyword::GT
765                        | Keyword::GE
766                )),
767            )
768        {
769            let lparen_span = self.bump().span;
770            self.enter_depth(lparen_span)?;
771            let result = self.parse_verifier_predicate(lparen_span.start);
772            self.exit_depth();
773            return result;
774        }
775        // Case 4 — any other bool_expr: delegate and wrap as Full.
776        let body = self.parse_bool_expr()?;
777        let span = body.span;
778        Ok(Spanned::new(Predicate::Full(Box::new(body)), span))
779    }
780
781    /// Parse a verifier-headed predicate.
782    ///
783    /// Expects the opening `(` has already been consumed. Decides
784    /// `PartialVerifier` vs `Full(Verifier)` by how many value operands appear
785    /// before the closing `)`.
786    ///
787    /// Note that the no-argument verifier `(NonEmpty)` is handled in
788    /// `parse_predicate`.
789    fn parse_verifier_predicate(
790        &mut self,
791        start: usize,
792    ) -> Result<Spanned<Predicate>, NightjarLanguageError> {
793        let kw = self.expect_keyword_token()?;
794        let op = VerifierOp::from_keyword(kw.node).ok_or_else(|| {
795            parse_error(
796                kw.span,
797                "verifier predicate must use a verifier operator (EQ/NE/LT/LE/GT/GE)",
798            )
799        })?;
800        let first = self.parse_value_expr()?; // We have at least one operand
801        match self.peek_token() {
802            Some(Token::RParen) => {
803                // Complete with partial verifier
804                let close = self.bump().span;
805                Ok(Spanned::new(
806                    Predicate::PartialVerifier {
807                        op,
808                        bound: Box::new(first),
809                    },
810                    Span::new(start, close.end),
811                ))
812            }
813            Some(_) => {
814                // Still have one more operand, complete with full verifier
815                let second = self.parse_value_expr()?;
816                let close = self.expect_rparen_for_verifier(kw.span)?; // At most 2 operands
817                let body_span = Span::new(start, close.end);
818                let body = Spanned::new(
819                    BoolExpr::Verifier {
820                        op,
821                        left: Box::new(first),
822                        right: Box::new(second),
823                    },
824                    body_span,
825                );
826                Ok(Spanned::new(Predicate::Full(Box::new(body)), body_span))
827            }
828            None => Err(parse_error(
829                self.current_span(),
830                "expected `)` or value expression in verifier predicate",
831            )),
832        }
833    }
834
835    /// Parse a value expression.
836    ///
837    /// Accepts:
838    /// - a literal
839    /// - a symbol path
840    /// - a nested `(func_op ...)` call that produces an Entity.
841    fn parse_value_expr(&mut self) -> Result<SpannedValueExpr, NightjarLanguageError> {
842        let start_span = self.current_span();
843        match self.peek_token() {
844            // Case 1: literal
845            Some(Token::IntLiteral(_))
846            | Some(Token::FloatLiteral(_))
847            | Some(Token::StringLiteral(_))
848            | Some(Token::BoolLiteral(_))
849            | Some(Token::NullLiteral) => {
850                let tok = self.bump();
851                let lit = match tok.node {
852                    // Convert literal token to Literal enum, it's lit.
853                    Token::IntLiteral(i) => Literal::Int(i),
854                    Token::FloatLiteral(f) => Literal::Float(f),
855                    Token::StringLiteral(s) => Literal::String(s),
856                    Token::BoolLiteral(b) => Literal::Bool(b),
857                    Token::NullLiteral => Literal::Null,
858                    _ => unreachable!(),
859                };
860                Ok(Spanned::new(ValueExpr::Literal(lit), tok.span))
861            }
862
863            // Case 2: symbol path
864            Some(Token::Symbol { .. }) => {
865                let tok = self.bump();
866                let (root, path) = match tok.node {
867                    Token::Symbol { root, path } => (root, path),
868                    _ => unreachable!(),
869                };
870                Ok(Spanned::new(ValueExpr::Symbol { root, path }, tok.span))
871            }
872
873            // Case 3: function call, starts with `(`
874            Some(Token::LParen) => {
875                let lparen_span = self.bump().span;
876                self.enter_depth(lparen_span)?;
877                let result = self.parse_func_call(lparen_span.start);
878                self.exit_depth();
879                result
880            }
881
882            // Error cases
883            Some(_) => Err(parse_error(start_span, "expected value expression")),
884            None => Err(parse_error(
885                start_span,
886                "expected value expression, got end of input",
887            )),
888        }
889    }
890
891    /// Parse a parenthesized function call `(func_op arg ...)`.
892    ///
893    /// Expects the opening `(` has already been consumed.
894    ///
895    /// Argument count is validated against the operator's fixed arity, too few
896    /// is a generic parse error (missing operand), too many is an explicit
897    /// `ArgumentError`.
898    fn parse_func_call(&mut self, start: usize) -> Result<SpannedValueExpr, NightjarLanguageError> {
899        let kw = self.expect_keyword_token()?;
900        let op = FuncOp::from_keyword(kw.node).ok_or_else(|| {
901            parse_error(
902                kw.span,
903                format!(
904                    "`{:?}` is not a value-producing function in this position",
905                    kw.node
906                ),
907            )
908        })?;
909        let expected_operand_count = op.expected_arity();
910        let mut args = Vec::with_capacity(expected_operand_count);
911
912        // Attempt to recursively parse an operand
913        for _ in 0..expected_operand_count {
914            args.push(self.parse_value_expr()?);
915        }
916
917        // After consuming exactly `expected` args, require `)`. Any extra token
918        // before `)` means the caller supplied too many arguments.
919        let close = match self.peek_token() {
920            Some(Token::RParen) => self.bump().span, // consume the closing `)`
921            Some(_) => {
922                return Err(argument_error(
923                    self.current_span(),
924                    format!(
925                        "`{}` takes exactly {} argument(s)",
926                        op.name(),
927                        expected_operand_count
928                    ),
929                ));
930            }
931            None => {
932                return Err(parse_error(
933                    self.current_span(),
934                    format!("expected `)` to close `{}` call", op.name()),
935                ));
936            }
937        };
938        Ok(Spanned::new(
939            ValueExpr::FuncCall { op, args },
940            Span::new(start, close.end),
941        ))
942    }
943
944    /// Consume the next token only if it is a `Keyword`.
945    ///
946    /// Returns the keyword and its span, or a parse error if the token is anything
947    /// else (or we are at EOF).
948    fn expect_keyword_token(&mut self) -> Result<Spanned<Keyword>, NightjarLanguageError> {
949        match self.peek_token() {
950            Some(Token::Keyword(_)) => {
951                let tok = self.bump();
952                if let Token::Keyword(kw) = tok.node {
953                    Ok(Spanned::new(kw, tok.span))
954                } else {
955                    unreachable!()
956                }
957            }
958            _ => Err(parse_error(
959                self.current_span(),
960                "expected operator keyword",
961            )),
962        }
963    }
964}
965// ─────────────────────────────────────────────────────────
966
967// ──────────────── Convenience entry points ─────────────────
968
969/// Tokenize and parse `input` with default parser configuration.
970pub fn parse(input: &str) -> Result<Program, NightjarLanguageError> {
971    parse_with_config(input, &ParserConfig::default())
972}
973
974/// Tokenize and parse `input` with the supplied `ParserConfig`.
975///
976/// Primarily useful for tuning `max_depth` in tests or embedded use.
977pub fn parse_with_config(
978    input: &str,
979    config: &ParserConfig,
980) -> Result<Program, NightjarLanguageError> {
981    let tokens = Tokenizer::new(input).tokenize()?;
982    let program = Parser::parse(tokens, config)?;
983    validate_scope(&program)?;
984    Ok(program)
985}
986// ─────────────────────────────────────────────────────────
987
988// ───────────────── Syntax scope validator ──────────────────
989
990// Static check that every `@` element-rooted symbol sits inside the
991// `predicate` sub-tree of some enclosing `ForAll`/`Exists`.
992//
993// The operand position of a quantifier is NOT counted as a predicate context,
994// a list being quantified is itself resolved against root, not the element.
995
996/// Entry point for the post-parse scope check.
997///
998/// Walks the whole `Program` with an initial predicate-depth of `0`, so any
999/// `@` at the outermost level fails immediately.
1000///
1001/// Example (internal):
1002///
1003/// ```ignore
1004/// // `(EQ @.a 1)` at top level: no enclosing quantifier → ScopeError.
1005/// use crate::language::parser::validate_scope;
1006/// // ... parse the program here, then:
1007/// // assert!(matches!(validate_scope(&program), Err(NightjarError::ScopeError{..})));
1008/// ```
1009fn validate_scope(program: &Program) -> Result<(), NightjarLanguageError> {
1010    walk_bool(&program.expr, 0)
1011}
1012
1013/// Walk a `BoolExpr`, forwarding the current `predicate_depth` into
1014/// every sub-expression.
1015///
1016/// Entering a quantifier's predicate increments the counter; the quantifier's
1017/// operand keeps the same depth because the list being iterated is still
1018/// resolved against the outer scope.
1019///
1020/// ## Note
1021///
1022/// `predicate_depth` tracks how many quantifier-predicate scopes enclose
1023/// the current position:
1024///
1025/// - `0`: outside all predicates; `@` symbols are illegal here.
1026/// - `1`: inside one `ForAll`/`Exists` predicate; `@` resolves to its
1027///   iteration element.
1028/// - `> 1`: nested quantifiers; `@` resolves to the innermost element.
1029///
1030/// Example (internal):
1031///
1032/// ```ignore
1033/// // Inside this call, any `ValueExpr::Symbol { root: Element, .. }`
1034/// // with `predicate_depth == 0` will produce a ScopeError.
1035/// walk_bool(&program.expr, 0)?;
1036/// ```
1037fn walk_bool(expr: &SpannedBoolExpr, predicate_depth: u32) -> Result<(), NightjarLanguageError> {
1038    match &expr.node {
1039        BoolExpr::Literal(_) => Ok(()),
1040        BoolExpr::Verifier { left, right, .. } => {
1041            walk_value(left, predicate_depth)?;
1042            walk_value(right, predicate_depth)
1043        }
1044        BoolExpr::And(l, r) | BoolExpr::Or(l, r) => {
1045            walk_bool(l, predicate_depth)?;
1046            walk_bool(r, predicate_depth)
1047        }
1048        BoolExpr::Not(inner) => walk_bool(inner, predicate_depth),
1049        BoolExpr::UnaryCheck { operand, .. } => walk_value(operand, predicate_depth),
1050        BoolExpr::Quantifier {
1051            predicate, operand, ..
1052        } => {
1053            walk_predicate(predicate, predicate_depth + 1)?;
1054            // The quantifier's operand (the list being iterated) is resolved
1055            // against the current scope, not the predicate's inner scope
1056            // so keep the same depth here.
1057            //
1058            // (ForAll (EQ @.a @.b) .items)
1059            //  ^ same depth        ^ same depth
1060            walk_value(operand, predicate_depth)
1061        }
1062    }
1063}
1064
1065/// Walk a `ValueExpr`.
1066///
1067/// This is the only place that actually reports a
1068/// `ScopeError`: when a `Symbol` with `root == Element` is seen at
1069/// `predicate_depth == 0`, we know it sits outside every enclosing
1070/// quantifier predicate and therefore has no element to resolve against.
1071///
1072/// Example (internal):
1073///
1074/// ```ignore
1075/// // Element symbols are only legal when predicate_depth > 0.
1076/// walk_value(&operand, 0)?;                           // root `.`: always ok
1077/// walk_value(&inner_at_symbol, /* inside pred */ 1)?; // `@.a`: ok
1078/// ```
1079fn walk_value(expr: &SpannedValueExpr, predicate_depth: u32) -> Result<(), NightjarLanguageError> {
1080    match &expr.node {
1081        ValueExpr::Literal(_) => Ok(()),
1082        ValueExpr::Symbol { root, .. } => {
1083            if matches!(root, SymbolRoot::Element) && predicate_depth == 0 {
1084                Err(scope_error(
1085                    expr.span,
1086                    "`@` element-relative symbols may only appear inside a ForAll/Exists predicate",
1087                ))
1088            } else {
1089                Ok(())
1090            }
1091        }
1092        ValueExpr::FuncCall { args, .. } => {
1093            for a in args {
1094                walk_value(a, predicate_depth)?;
1095            }
1096            Ok(())
1097        }
1098    }
1099}
1100
1101/// Walk the bound operand of a partial verifier, or the body of a
1102/// `Full` predicate.
1103///
1104/// With `predicate_depth` already incremented by the caller, `@` is legal
1105/// anywhere inside either.
1106///
1107/// Example (internal):
1108///
1109/// ```ignore
1110/// // `(ForAll (EQ @.a @.b) .items)`: walk_predicate is called with
1111/// // predicate_depth = 1, which permits `@.a` / `@.b` under Predicate::Full.
1112/// walk_predicate(&predicate, 1)?;
1113/// ```
1114fn walk_predicate(
1115    pred: &Spanned<Predicate>,
1116    predicate_depth: u32,
1117) -> Result<(), NightjarLanguageError> {
1118    match &pred.node {
1119        Predicate::PartialVerifier { bound, .. } => walk_value(bound, predicate_depth),
1120        Predicate::UnaryCheck(_) => Ok(()),
1121        Predicate::Full(body) => walk_bool(body, predicate_depth),
1122    }
1123}
1124
1125#[cfg(test)]
1126mod tests {
1127    use super::*;
1128
1129    // ── Tokenizer tests ──────────────────────────────────────
1130
1131    fn tokenize(input: &str) -> Vec<Token> {
1132        Tokenizer::new(input)
1133            .tokenize()
1134            .expect("tokenization should succeed")
1135            .into_iter()
1136            .map(|s| s.node)
1137            .collect()
1138    }
1139
1140    #[test]
1141    fn tokenizes_parentheses_and_keywords() {
1142        let toks = tokenize("(EQ 1 1)");
1143        assert_eq!(
1144            toks,
1145            vec![
1146                Token::LParen,
1147                Token::Keyword(Keyword::EQ),
1148                Token::IntLiteral(1),
1149                Token::IntLiteral(1),
1150                Token::RParen,
1151            ]
1152        );
1153    }
1154
1155    #[test]
1156    fn tokenizes_negative_integer_literal() {
1157        let toks = tokenize("-5");
1158        assert_eq!(toks, vec![Token::IntLiteral(-5)]);
1159    }
1160
1161    #[test]
1162    fn tokenizes_negative_float_literal() {
1163        let toks = tokenize("-1.618");
1164        assert_eq!(toks, vec![Token::FloatLiteral(-1.618)]);
1165    }
1166
1167    #[test]
1168    fn space_between_minus_and_digit_is_error() {
1169        let err = Tokenizer::new("- 5").tokenize().unwrap_err();
1170        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1171    }
1172
1173    #[test]
1174    fn tokenizes_string_literal_with_unicode() {
1175        let toks = tokenize("\"營收\"");
1176        assert_eq!(toks, vec![Token::StringLiteral("營收".into())]);
1177    }
1178
1179    #[test]
1180    fn tokenizes_unterminated_string_errors() {
1181        let err = Tokenizer::new("\"abc").tokenize().unwrap_err();
1182        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1183    }
1184
1185    #[test]
1186    fn tokenizes_root_symbol_bare_dot() {
1187        assert_eq!(
1188            tokenize("."),
1189            vec![Token::Symbol {
1190                root: SymbolRoot::Root,
1191                path: "".into()
1192            }]
1193        );
1194        assert_eq!(
1195            tokenize("(NonEmpty .)"),
1196            vec![
1197                Token::LParen,
1198                Token::Keyword(Keyword::NonEmpty),
1199                Token::Symbol {
1200                    root: SymbolRoot::Root,
1201                    path: "".into()
1202                },
1203                Token::RParen,
1204            ]
1205        );
1206    }
1207
1208    #[test]
1209    fn tokenizes_nested_symbol_path() {
1210        assert_eq!(
1211            tokenize(".data.department_1.revenue"),
1212            vec![Token::Symbol {
1213                root: SymbolRoot::Root,
1214                path: "data.department_1.revenue".into()
1215            }]
1216        );
1217    }
1218
1219    #[test]
1220    fn tokenizes_unicode_symbol() {
1221        assert_eq!(
1222            tokenize(".營收"),
1223            vec![Token::Symbol {
1224                root: SymbolRoot::Root,
1225                path: "營收".into()
1226            }]
1227        );
1228        assert_eq!(
1229            tokenize(".données.résultat"),
1230            vec![Token::Symbol {
1231                root: SymbolRoot::Root,
1232                path: "données.résultat".into()
1233            }]
1234        );
1235    }
1236
1237    #[test]
1238    fn tokenizes_element_symbol_with_at_sigil() {
1239        assert_eq!(
1240            tokenize("@"),
1241            vec![Token::Symbol {
1242                root: SymbolRoot::Element,
1243                path: "".into()
1244            }]
1245        );
1246        assert_eq!(
1247            tokenize("@.a"),
1248            vec![Token::Symbol {
1249                root: SymbolRoot::Element,
1250                path: "a".into()
1251            }]
1252        );
1253        assert_eq!(
1254            tokenize("@._0.name"),
1255            vec![Token::Symbol {
1256                root: SymbolRoot::Element,
1257                path: "_0.name".into()
1258            }]
1259        );
1260    }
1261
1262    #[test]
1263    fn tokenizes_bool_and_null_literals() {
1264        assert_eq!(tokenize("True"), vec![Token::BoolLiteral(true)]);
1265        assert_eq!(tokenize("False"), vec![Token::BoolLiteral(false)]);
1266        assert_eq!(tokenize("Null"), vec![Token::NullLiteral]);
1267    }
1268
1269    #[test]
1270    fn unknown_identifier_errors() {
1271        let err = Tokenizer::new("FooBar").tokenize().unwrap_err();
1272        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1273    }
1274
1275    #[test]
1276    fn token_spans_are_byte_offsets() {
1277        let tokens = Tokenizer::new("(EQ 1 2)").tokenize().unwrap();
1278        // '(' at 0
1279        assert_eq!(tokens[0].span, Span::new(0, 1));
1280        // 'EQ' at 1..3
1281        assert_eq!(tokens[1].span, Span::new(1, 3));
1282        // '1' at 4..5
1283        assert_eq!(tokens[2].span, Span::new(4, 5));
1284        // '2' at 6..7
1285        assert_eq!(tokens[3].span, Span::new(6, 7));
1286        // ')' at 7..8
1287        assert_eq!(tokens[4].span, Span::new(7, 8));
1288    }
1289
1290    // ── Parser tests ─────────────────────────────────────────
1291
1292    fn must_parse(input: &str) -> Program {
1293        parse(input)
1294            .unwrap_or_else(|e| panic!("expected parse success for `{}`, got {:?}", input, e))
1295    }
1296
1297    fn must_fail(input: &str) -> NightjarLanguageError {
1298        parse(input).expect_err(&format!("expected parse failure for `{}`", input))
1299    }
1300
1301    #[test]
1302    fn parses_simple_verifier() {
1303        let p = must_parse("(GT 1 2)");
1304        match p.expr.node {
1305            BoolExpr::Verifier { op, .. } => assert_eq!(op, VerifierOp::GT),
1306            other => panic!("expected Verifier, got {:?}", other),
1307        }
1308    }
1309
1310    #[test]
1311    fn verifier_arity_mismatch_produces_arity_error() {
1312        let err = must_fail("(GT 1 2 3)");
1313        assert!(
1314            matches!(err, NightjarLanguageError::ArgumentError { .. }),
1315            "got {:?}",
1316            err
1317        );
1318    }
1319
1320    #[test]
1321    fn bare_gt_without_parens_fails() {
1322        let err = must_fail("GT 1 2");
1323        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1324    }
1325
1326    #[test]
1327    fn parses_nested_connective_and_verifier() {
1328        let p = must_parse("(AND (GT 1 0) (LT 1 10))");
1329        match p.expr.node {
1330            BoolExpr::And(_, _) => {}
1331            other => panic!("expected And, got {:?}", other),
1332        }
1333    }
1334
1335    #[test]
1336    fn parses_forall_with_partial_verifier() {
1337        let p = must_parse("(ForAll (GT 0) .ids)");
1338        match p.expr.node {
1339            BoolExpr::Quantifier {
1340                op,
1341                predicate,
1342                operand,
1343            } => {
1344                assert_eq!(op, QuantifierOp::ForAll);
1345                match predicate.node {
1346                    Predicate::PartialVerifier { op, .. } => assert_eq!(op, VerifierOp::GT),
1347                    other => panic!("expected PartialVerifier, got {:?}", other),
1348                }
1349                match operand.node {
1350                    ValueExpr::Symbol { root, path } => {
1351                        assert_eq!(root, SymbolRoot::Root);
1352                        assert_eq!(path, "ids");
1353                    }
1354                    other => panic!("expected Symbol, got {:?}", other),
1355                }
1356            }
1357            other => panic!("expected Quantifier, got {:?}", other),
1358        }
1359    }
1360
1361    #[test]
1362    fn parses_exists_with_nonempty_predicate() {
1363        let p = must_parse("(Exists NonEmpty .names)");
1364        match p.expr.node {
1365            BoolExpr::Quantifier { op, predicate, .. } => {
1366                assert_eq!(op, QuantifierOp::Exists);
1367                assert_eq!(
1368                    predicate.node,
1369                    Predicate::UnaryCheck(UnaryCheckOp::NonEmpty)
1370                );
1371            }
1372            other => panic!("expected Quantifier, got {:?}", other),
1373        }
1374    }
1375
1376    #[test]
1377    fn parses_not_of_verifier() {
1378        let p = must_parse("(NOT (EQ .status \"inactive\"))");
1379        match p.expr.node {
1380            BoolExpr::Not(inner) => match inner.node {
1381                BoolExpr::Verifier { op, .. } => assert_eq!(op, VerifierOp::EQ),
1382                other => panic!("expected Verifier, got {:?}", other),
1383            },
1384            other => panic!("expected Not, got {:?}", other),
1385        }
1386    }
1387
1388    #[test]
1389    fn parses_top_level_bool_literal() {
1390        let p = must_parse("True");
1391        assert_eq!(p.expr.node, BoolExpr::Literal(true));
1392    }
1393
1394    #[test]
1395    fn parses_negative_literals_in_verifier() {
1396        let p = must_parse("(GT -5 -10)");
1397        match p.expr.node {
1398            BoolExpr::Verifier { left, right, .. } => {
1399                assert_eq!(left.node, ValueExpr::Literal(Literal::Int(-5)));
1400                assert_eq!(right.node, ValueExpr::Literal(Literal::Int(-10)));
1401            }
1402            other => panic!("expected Verifier, got {:?}", other),
1403        }
1404    }
1405
1406    #[test]
1407    fn parses_root_symbol_as_operand() {
1408        let p = must_parse("(NonEmpty .)");
1409        match p.expr.node {
1410            BoolExpr::UnaryCheck { op, operand } => {
1411                assert_eq!(op, UnaryCheckOp::NonEmpty);
1412                assert_eq!(
1413                    operand.node,
1414                    ValueExpr::Symbol {
1415                        root: SymbolRoot::Root,
1416                        path: "".into()
1417                    }
1418                );
1419            }
1420            other => panic!("expected UnaryCheck, got {:?}", other),
1421        }
1422    }
1423
1424    #[test]
1425    fn func_call_arity_too_many_is_arity_error() {
1426        // Add has arity 2; the 3rd argument should be flagged as arity error.
1427        let err = must_fail("(EQ (Add 1 2 3) 6)");
1428        assert!(
1429            matches!(err, NightjarLanguageError::ArgumentError { .. }),
1430            "got {:?}",
1431            err
1432        );
1433    }
1434
1435    #[test]
1436    fn func_call_arity_too_few_is_parse_error() {
1437        // Add has arity 2; only one operand given before `)` → parsing the 2nd
1438        // operand sees `)` which isn't a valid value expression.
1439        let err = must_fail("(EQ (Add 1) 1)");
1440        assert!(
1441            matches!(err, NightjarLanguageError::ParseError { .. }),
1442            "got {:?}",
1443            err
1444        );
1445    }
1446
1447    #[test]
1448    fn missing_rparen_is_parse_error() {
1449        let err = must_fail("(GT 1 2");
1450        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1451    }
1452
1453    #[test]
1454    fn trailing_tokens_is_parse_error() {
1455        let err = must_fail("(GT 1 2) extra");
1456        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1457    }
1458
1459    #[test]
1460    fn empty_input_fails() {
1461        let err = must_fail("");
1462        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1463    }
1464
1465    #[test]
1466    fn partial_verifier_outside_quantifier_is_rejected() {
1467        // (GT 2) in a full-verifier position is one operand short: the parser
1468        // sees `(GT <literal> )` with a second operand missing.
1469        let err = must_fail("(GT 2)");
1470        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1471    }
1472
1473    #[test]
1474    fn depth_limit_is_enforced() {
1475        // Build ((((...True...))))-style expression wrapped in NOTs
1476        let mut s = String::new();
1477        let n = 10;
1478        for _ in 0..n {
1479            s.push_str("(NOT ");
1480        }
1481        s.push_str("True");
1482        for _ in 0..n {
1483            s.push(')');
1484        }
1485        // With very small max_depth it must fail.
1486        let cfg = ParserConfig { max_depth: 5 };
1487        let err = parse_with_config(&s, &cfg).unwrap_err();
1488        assert!(
1489            matches!(err, NightjarLanguageError::RecursionError { .. }),
1490            "got {:?}",
1491            err
1492        );
1493        // Default depth should comfortably parse it.
1494        parse(&s).expect("default depth should parse this");
1495    }
1496
1497    #[test]
1498    fn parses_nested_arithmetic_inside_verifier() {
1499        let p = must_parse("(EQ (Add (Mul 2 3) (Sub 10 4)) 12)");
1500        match p.expr.node {
1501            BoolExpr::Verifier { left, .. } => match left.node {
1502                ValueExpr::FuncCall { op, args } => {
1503                    assert_eq!(op, FuncOp::Add);
1504                    assert_eq!(args.len(), 2);
1505                }
1506                other => panic!("expected FuncCall, got {:?}", other),
1507            },
1508            other => panic!("expected Verifier, got {:?}", other),
1509        }
1510    }
1511
1512    #[test]
1513    fn parses_bool_literal_as_operand_to_eq() {
1514        let p = must_parse("(EQ True False)");
1515        match p.expr.node {
1516            BoolExpr::Verifier { left, right, .. } => {
1517                assert_eq!(left.node, ValueExpr::Literal(Literal::Bool(true)));
1518                assert_eq!(right.node, ValueExpr::Literal(Literal::Bool(false)));
1519            }
1520            other => panic!("expected Verifier, got {:?}", other),
1521        }
1522    }
1523
1524    #[test]
1525    fn rejects_func_op_as_top_level_bool_expr() {
1526        // `(Add 1 2)` does not reduce to a boolean at top level.
1527        let err = must_fail("(Add 1 2)");
1528        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1529    }
1530
1531    #[test]
1532    fn parses_unicode_symbol_in_verifier() {
1533        let p = must_parse("(EQ .數量 100)");
1534        match p.expr.node {
1535            BoolExpr::Verifier { left, .. } => {
1536                assert_eq!(
1537                    left.node,
1538                    ValueExpr::Symbol {
1539                        root: SymbolRoot::Root,
1540                        path: "數量".into()
1541                    }
1542                );
1543            }
1544            other => panic!("expected Verifier, got {:?}", other),
1545        }
1546    }
1547
1548    #[test]
1549    fn top_level_span_covers_whole_expression() {
1550        let p = must_parse("(EQ 1 1)");
1551        assert_eq!(p.expr.span, Span::new(0, 8));
1552    }
1553
1554    // ── Element-relative symbols (`@`) ───────────────────────
1555
1556    #[test]
1557    fn rejects_at_followed_by_bare_identifier() {
1558        // `@a` has no `.` separator and is not legal.
1559        let err = Tokenizer::new("@a").tokenize().unwrap_err();
1560        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1561    }
1562
1563    #[test]
1564    fn rejects_at_dot_with_no_segment() {
1565        let err = Tokenizer::new("@.").tokenize().unwrap_err();
1566        assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
1567    }
1568
1569    #[test]
1570    fn parses_forall_with_full_verifier_predicate() {
1571        let p = must_parse("(ForAll (EQ @.a @.b) .items)");
1572        match p.expr.node {
1573            BoolExpr::Quantifier { predicate, .. } => match predicate.node {
1574                Predicate::Full(body) => match body.node {
1575                    BoolExpr::Verifier { op, left, right } => {
1576                        assert_eq!(op, VerifierOp::EQ);
1577                        assert_eq!(
1578                            left.node,
1579                            ValueExpr::Symbol {
1580                                root: SymbolRoot::Element,
1581                                path: "a".into()
1582                            }
1583                        );
1584                        assert_eq!(
1585                            right.node,
1586                            ValueExpr::Symbol {
1587                                root: SymbolRoot::Element,
1588                                path: "b".into()
1589                            }
1590                        );
1591                    }
1592                    other => panic!("expected Verifier inside Full, got {:?}", other),
1593                },
1594                other => panic!("expected Predicate::Full, got {:?}", other),
1595            },
1596            other => panic!("expected Quantifier, got {:?}", other),
1597        }
1598    }
1599
1600    #[test]
1601    fn partial_verifier_still_parses_as_partial() {
1602        // Regression guard: `(GT 0)` in predicate position must still produce
1603        // Predicate::PartialVerifier, not Predicate::Full.
1604        let p = must_parse("(ForAll (GT 0) .items)");
1605        match p.expr.node {
1606            BoolExpr::Quantifier { predicate, .. } => {
1607                assert!(matches!(predicate.node, Predicate::PartialVerifier { .. }));
1608            }
1609            other => panic!("expected Quantifier, got {:?}", other),
1610        }
1611    }
1612
1613    #[test]
1614    fn parses_forall_with_nested_arithmetic_on_element() {
1615        let p = must_parse("(ForAll (EQ (Add @.a @.b) @.c) .items)");
1616        match p.expr.node {
1617            BoolExpr::Quantifier { predicate, .. } => {
1618                assert!(matches!(predicate.node, Predicate::Full(_)));
1619            }
1620            other => panic!("expected Quantifier, got {:?}", other),
1621        }
1622    }
1623
1624    #[test]
1625    fn rejects_at_outside_quantifier_with_scope_error() {
1626        let err = parse("(EQ @.a 1)").expect_err("`@` outside quantifier should fail");
1627        assert!(
1628            matches!(err, NightjarLanguageError::ScopeError { .. }),
1629            "got {:?}",
1630            err
1631        );
1632    }
1633
1634    #[test]
1635    fn rejects_at_in_quantifier_operand_with_scope_error() {
1636        // `@` is legal only in the predicate — a quantifier's *operand* is
1637        // still resolved against the outer scope.
1638        let err = parse("(ForAll (GT 0) @.items)")
1639            .expect_err("`@` in quantifier operand at top level should fail");
1640        assert!(
1641            matches!(err, NightjarLanguageError::ScopeError { .. }),
1642            "got {:?}",
1643            err
1644        );
1645    }
1646
1647    #[test]
1648    fn bare_at_symbol_parses_in_predicate() {
1649        let p = must_parse("(ForAll (GT @ 0) .scores)");
1650        match p.expr.node {
1651            BoolExpr::Quantifier { predicate, .. } => match predicate.node {
1652                Predicate::Full(body) => match body.node {
1653                    BoolExpr::Verifier { left, .. } => assert_eq!(
1654                        left.node,
1655                        ValueExpr::Symbol {
1656                            root: SymbolRoot::Element,
1657                            path: "".into()
1658                        }
1659                    ),
1660                    other => panic!("expected Verifier, got {:?}", other),
1661                },
1662                other => panic!("expected Full, got {:?}", other),
1663            },
1664            other => panic!("expected Quantifier, got {:?}", other),
1665        }
1666    }
1667
1668    #[test]
1669    fn nonempty_with_operand_in_predicate_parses_as_full() {
1670        // `(NonEmpty .x)` in predicate position is a full BoolExpr, not a
1671        // UnaryCheck predicate (which is the bare `NonEmpty` keyword form).
1672        let p = must_parse("(ForAll (NonEmpty .x) .items)");
1673        match p.expr.node {
1674            BoolExpr::Quantifier { predicate, .. } => match predicate.node {
1675                Predicate::Full(body) => assert!(matches!(
1676                    body.node,
1677                    BoolExpr::UnaryCheck {
1678                        op: UnaryCheckOp::NonEmpty,
1679                        ..
1680                    }
1681                )),
1682                other => panic!("expected Full, got {:?}", other),
1683            },
1684            other => panic!("expected Quantifier, got {:?}", other),
1685        }
1686    }
1687}