use crate::error::{
argument_error, parse_error, recursion_error, scope_error, NightjarLanguageError, Span,
};
use crate::language::grammar::{
BoolExpr, FuncOp, Keyword, Literal, Predicate, Program, QuantifierOp, Spanned, SpannedBoolExpr,
SpannedValueExpr, SymbolRoot, Token, UnaryCheckOp, ValueExpr, VerifierOp,
};
#[derive(Debug, Clone)]
pub struct ParserConfig {
pub max_depth: usize,
}
impl Default for ParserConfig {
fn default() -> Self {
Self { max_depth: 256 }
}
}
pub struct Tokenizer<'a> {
input: &'a str,
cursor: usize,
chars: Vec<(usize, char)>,
eof: usize,
}
impl<'a> Tokenizer<'a> {
pub fn new(input: &'a str) -> Self {
let chars: Vec<(usize, char)> = input.char_indices().collect();
Self {
input,
cursor: 0,
chars,
eof: input.len(),
}
}
pub fn tokenize(&mut self) -> Result<Vec<Spanned<Token>>, NightjarLanguageError> {
let mut tokens = Vec::new();
loop {
self.skip_whitespace(); let Some(c) = self.peek_char() else {
break; };
let start = self.byte_pos(); let token = match c {
'(' => {
self.advance();
Token::LParen
}
')' => {
self.advance();
Token::RParen
}
'"' => self.read_string(start)?,
'.' => self.read_symbol(start, SymbolRoot::Root)?,
'@' => self.read_symbol(start, SymbolRoot::Element)?,
'-' if self.is_negative_literal() => self.read_number(start)?,
c if c.is_ascii_digit() => self.read_number(start)?,
c if c.is_alphabetic() || c == '_' => self.read_ident(start)?,
other => {
return Err(parse_error(
Span::new(start, start + other.len_utf8()),
format!("unexpected character `{}`", other),
));
}
};
let end = self.byte_pos();
tokens.push(Spanned::new(token, Span::new(start, end)));
}
Ok(tokens)
}
fn byte_pos(&self) -> usize {
if self.cursor < self.chars.len() {
self.chars[self.cursor].0
} else {
self.eof
}
}
fn _is_eof(&self) -> bool {
self.cursor >= self.chars.len()
}
fn peek_char(&self) -> Option<char> {
self.chars.get(self.cursor).map(|(_, c)| *c)
}
fn peek_char_at(&self, offset: usize) -> Option<char> {
self.chars.get(self.cursor + offset).map(|(_, c)| *c)
}
fn advance(&mut self) {
self.cursor += 1;
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.peek_char() {
if c.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn is_negative_literal(&self) -> bool {
self.peek_char() == Some('-') && self.peek_char_at(1).is_some_and(|c| c.is_ascii_digit())
}
fn read_string(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
self.advance();
let mut buf = String::new();
loop {
match self.peek_char() {
Some('"') => {
self.advance();
return Ok(Token::StringLiteral(buf));
}
Some(c) => {
buf.push(c); self.advance();
}
None => {
return Err(parse_error(
Span::new(start, self.byte_pos()),
"unterminated string literal",
));
}
}
}
}
fn read_number(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
if self.peek_char() == Some('-') {
self.advance();
}
while let Some(c) = self.peek_char() {
if c.is_ascii_digit() {
self.advance();
} else {
break;
}
}
let mut is_float = false; if self.peek_char() == Some('.') && self.peek_char_at(1) .is_some_and(|c| c.is_ascii_digit())
{
is_float = true;
self.advance(); while let Some(c) = self.peek_char() {
if c.is_ascii_digit() {
self.advance();
} else {
break;
}
}
}
let end = self.byte_pos(); let text = &self.input[start..end]; if is_float {
text.parse::<f64>().map(Token::FloatLiteral).map_err(|_| {
parse_error(
Span::new(start, end),
format!("invalid float literal `{}`", text),
)
}) } else {
text.parse::<i64>().map(Token::IntLiteral).map_err(|_| {
parse_error(
Span::new(start, end),
format!("invalid integer literal `{}`", text),
)
}) }
}
fn read_symbol(
&mut self,
_start: usize,
root: SymbolRoot, ) -> Result<Token, NightjarLanguageError> {
self.advance(); let sigil: char = match root {
SymbolRoot::Root => '.', SymbolRoot::Element => '@', };
let mut path = String::new();
match root {
SymbolRoot::Root => {
match self.try_read_segment() {
Some(seg) => Self::push_segment(&mut path, seg), None => return self.complete_bare_sigil(root, path, sigil),
}
}
SymbolRoot::Element if self.peek_char() != Some('.') => {
return self.complete_bare_sigil(root, path, sigil); }
_ => {}
}
while self.peek_char() == Some('.') {
let dot_pos = self.byte_pos();
self.advance(); match self.try_read_segment() {
Some(seg) => Self::push_segment(&mut path, seg),
None => {
return Err(parse_error(
Span::new(dot_pos, dot_pos + 1),
"expected symbol segment after `.`",
));
}
}
}
Ok(Token::Symbol { root, path })
}
fn try_read_segment(&mut self) -> Option<&str> {
let seg_start = self.byte_pos(); while let Some(c) = self.peek_char() {
if c.is_alphanumeric() || c == '_' {
self.advance();
} else {
break;
}
}
let seg_end = self.byte_pos(); if seg_start == seg_end {
None
} else {
Some(&self.input[seg_start..seg_end]) }
}
fn push_segment(path: &mut String, seg: &str) {
if !path.is_empty() {
path.push('.');
}
path.push_str(seg);
}
fn complete_bare_sigil(
&self,
root: SymbolRoot, path: String, sigil: char, ) -> Result<Token, NightjarLanguageError> {
match self.peek_char() {
None => Ok(Token::Symbol { root, path }), Some(c) if c.is_whitespace() || c == ')' =>
{
Ok(Token::Symbol { root, path })
}
Some(c) => {
let pos = self.byte_pos();
Err(parse_error(
Span::new(pos, pos + c.len_utf8()),
format!("unexpected character `{}` after `{}`", c, sigil),
))
}
}
}
fn read_ident(&mut self, start: usize) -> Result<Token, NightjarLanguageError> {
while let Some(c) = self.peek_char() {
if c.is_alphanumeric() || c == '_' {
self.advance();
} else {
break;
}
}
let end = self.byte_pos();
let text = &self.input[start..end];
match text {
"True" => Ok(Token::BoolLiteral(true)),
"False" => Ok(Token::BoolLiteral(false)),
"Null" => Ok(Token::NullLiteral),
_ => match Keyword::from_ident(text) {
Some(kw) => Ok(Token::Keyword(kw)),
None => Err(parse_error(
Span::new(start, end),
format!("unknown identifier `{}`", text),
)),
},
}
}
}
pub struct Parser {
tokens: Vec<Spanned<Token>>,
pos: usize,
depth: usize,
max_depth: usize,
input_len: usize,
}
impl Parser {
pub fn parse(
tokens: Vec<Spanned<Token>>,
config: &ParserConfig,
) -> Result<Program, NightjarLanguageError> {
let input_len = tokens.last().map(|t| t.span.end).unwrap_or(0);
let mut p = Self {
tokens,
pos: 0,
depth: 0,
max_depth: config.max_depth,
input_len,
};
let expr = p.parse_bool_expr()?;
p.expect_eof()?;
Ok(Program { expr })
}
fn peek(&self) -> Option<&Spanned<Token>> {
self.tokens.get(self.pos)
}
fn peek_token(&self) -> Option<&Token> {
self.peek().map(|t| &t.node)
}
fn bump(&mut self) -> Spanned<Token> {
let t = self.tokens[self.pos].clone();
self.pos += 1;
t
}
fn current_span(&self) -> Span {
self.peek()
.map(|t| t.span)
.unwrap_or(Span::point(self.input_len)) }
fn expect_rparen(&mut self) -> Result<Span, NightjarLanguageError> {
match self.peek_token() {
Some(Token::RParen) => Ok(self.bump().span),
_ => Err(parse_error(self.current_span(), "expected `)`")),
}
}
fn expect_eof(&mut self) -> Result<(), NightjarLanguageError> {
match self.peek() {
None => Ok(()),
Some(t) => Err(parse_error(
t.span,
"unexpected token after complete expression",
)),
}
}
fn enter_depth(&mut self, span: Span) -> Result<(), NightjarLanguageError> {
self.depth += 1;
if self.depth > self.max_depth {
return Err(recursion_error(span, self.max_depth));
}
Ok(())
}
fn exit_depth(&mut self) {
self.depth = self.depth.saturating_sub(1); }
fn parse_bool_expr(&mut self) -> Result<SpannedBoolExpr, NightjarLanguageError> {
let start_span = self.current_span();
match self.peek_token() {
Some(Token::BoolLiteral(b)) => {
let b = *b;
let span = self.bump().span;
Ok(Spanned::new(BoolExpr::Literal(b), span))
}
Some(Token::LParen) => {
let lparen_span = self.bump().span;
self.enter_depth(lparen_span)?; let result = self.parse_bool_body(lparen_span.start);
self.exit_depth(); result
}
Some(_) => Err(parse_error(start_span, "expected boolean expression")),
None => Err(parse_error(
start_span,
"expected boolean expression, got end of input",
)),
}
}
fn parse_bool_body(&mut self, start: usize) -> Result<SpannedBoolExpr, NightjarLanguageError> {
let kw = self.expect_keyword_token()?;
match kw.node { Keyword::EQ | Keyword::NE | Keyword::LT | Keyword::LE | Keyword::GT | Keyword::GE => {
let op = VerifierOp::from_keyword(kw.node).ok_or_else(|| {
parse_error(kw.span, "internal: expected verifier keyword")
})?;
let left = self.parse_value_expr()?; let right = self.parse_value_expr()?;
let close = self.expect_rparen_for_verifier(kw.span)?;
Ok(Spanned::new(
BoolExpr::Verifier {
op,
left: Box::new(left),
right: Box::new(right),
},
Span::new(start, close.end),
))
}
Keyword::AND => {
let l = self.parse_bool_expr()?;
let r = self.parse_bool_expr()?;
let close = self.expect_rparen()?;
Ok(Spanned::new(
BoolExpr::And(Box::new(l), Box::new(r)),
Span::new(start, close.end),
))
}
Keyword::OR => {
let l = self.parse_bool_expr()?;
let r = self.parse_bool_expr()?;
let close = self.expect_rparen()?;
Ok(Spanned::new(
BoolExpr::Or(Box::new(l), Box::new(r)),
Span::new(start, close.end),
))
}
Keyword::NOT => {
let inner = self.parse_bool_expr()?;
let close = self.expect_rparen()?;
Ok(Spanned::new(
BoolExpr::Not(Box::new(inner)),
Span::new(start, close.end),
))
}
Keyword::NonEmpty => {
let operand = self.parse_value_expr()?;
let close = self.expect_rparen()?;
Ok(Spanned::new(
BoolExpr::UnaryCheck {
op: UnaryCheckOp::NonEmpty,
operand: Box::new(operand),
},
Span::new(start, close.end),
))
}
Keyword::ForAll | Keyword::Exists => {
let op = QuantifierOp::from_keyword(kw.node).ok_or_else(|| {
parse_error(kw.span, "internal: expected quantifier keyword")
})?;
let predicate = self.parse_predicate()?; let operand = self.parse_value_expr()?;
let close = self.expect_rparen()?;
Ok(Spanned::new(
BoolExpr::Quantifier {
op,
predicate,
operand: Box::new(operand),
},
Span::new(start, close.end),
))
}
other => Err(parse_error(
kw.span,
format!(
"expected boolean operator (verifier / connective / quantifier / NonEmpty), found `{:?}`",
other
),
)),
}
}
fn expect_rparen_for_verifier(
&mut self,
_kw_span: Span,
) -> Result<Span, NightjarLanguageError> {
match self.peek_token() {
Some(Token::RParen) => Ok(self.bump().span),
Some(_) => {
let sp = self.current_span();
Err(argument_error(sp, "verifier takes exactly 2 operands"))
}
None => Err(parse_error(
self.current_span(),
"expected `)` to close verifier",
)),
}
}
fn parse_predicate(&mut self) -> Result<Spanned<Predicate>, NightjarLanguageError> {
if matches!(self.peek_token(), Some(Token::Keyword(Keyword::NonEmpty))) {
let span = self.bump().span;
return Ok(Spanned::new(
Predicate::UnaryCheck(UnaryCheckOp::NonEmpty),
span,
));
}
if matches!(self.peek_token(), Some(Token::LParen))
&& matches!(
self.tokens.get(self.pos + 1).map(|t| &t.node),
Some(Token::Keyword(
Keyword::EQ
| Keyword::NE
| Keyword::LT
| Keyword::LE
| Keyword::GT
| Keyword::GE
)),
)
{
let lparen_span = self.bump().span;
self.enter_depth(lparen_span)?;
let result = self.parse_verifier_predicate(lparen_span.start);
self.exit_depth();
return result;
}
let body = self.parse_bool_expr()?;
let span = body.span;
Ok(Spanned::new(Predicate::Full(Box::new(body)), span))
}
fn parse_verifier_predicate(
&mut self,
start: usize,
) -> Result<Spanned<Predicate>, NightjarLanguageError> {
let kw = self.expect_keyword_token()?;
let op = VerifierOp::from_keyword(kw.node).ok_or_else(|| {
parse_error(
kw.span,
"verifier predicate must use a verifier operator (EQ/NE/LT/LE/GT/GE)",
)
})?;
let first = self.parse_value_expr()?; match self.peek_token() {
Some(Token::RParen) => {
let close = self.bump().span;
Ok(Spanned::new(
Predicate::PartialVerifier {
op,
bound: Box::new(first),
},
Span::new(start, close.end),
))
}
Some(_) => {
let second = self.parse_value_expr()?;
let close = self.expect_rparen_for_verifier(kw.span)?; let body_span = Span::new(start, close.end);
let body = Spanned::new(
BoolExpr::Verifier {
op,
left: Box::new(first),
right: Box::new(second),
},
body_span,
);
Ok(Spanned::new(Predicate::Full(Box::new(body)), body_span))
}
None => Err(parse_error(
self.current_span(),
"expected `)` or value expression in verifier predicate",
)),
}
}
fn parse_value_expr(&mut self) -> Result<SpannedValueExpr, NightjarLanguageError> {
let start_span = self.current_span();
match self.peek_token() {
Some(Token::IntLiteral(_))
| Some(Token::FloatLiteral(_))
| Some(Token::StringLiteral(_))
| Some(Token::BoolLiteral(_))
| Some(Token::NullLiteral) => {
let tok = self.bump();
let lit = match tok.node {
Token::IntLiteral(i) => Literal::Int(i),
Token::FloatLiteral(f) => Literal::Float(f),
Token::StringLiteral(s) => Literal::String(s),
Token::BoolLiteral(b) => Literal::Bool(b),
Token::NullLiteral => Literal::Null,
_ => unreachable!(),
};
Ok(Spanned::new(ValueExpr::Literal(lit), tok.span))
}
Some(Token::Symbol { .. }) => {
let tok = self.bump();
let (root, path) = match tok.node {
Token::Symbol { root, path } => (root, path),
_ => unreachable!(),
};
Ok(Spanned::new(ValueExpr::Symbol { root, path }, tok.span))
}
Some(Token::LParen) => {
let lparen_span = self.bump().span;
self.enter_depth(lparen_span)?;
let result = self.parse_func_call(lparen_span.start);
self.exit_depth();
result
}
Some(_) => Err(parse_error(start_span, "expected value expression")),
None => Err(parse_error(
start_span,
"expected value expression, got end of input",
)),
}
}
fn parse_func_call(&mut self, start: usize) -> Result<SpannedValueExpr, NightjarLanguageError> {
let kw = self.expect_keyword_token()?;
let op = FuncOp::from_keyword(kw.node).ok_or_else(|| {
parse_error(
kw.span,
format!(
"`{:?}` is not a value-producing function in this position",
kw.node
),
)
})?;
let expected_operand_count = op.expected_arity();
let mut args = Vec::with_capacity(expected_operand_count);
for _ in 0..expected_operand_count {
args.push(self.parse_value_expr()?);
}
let close = match self.peek_token() {
Some(Token::RParen) => self.bump().span, Some(_) => {
return Err(argument_error(
self.current_span(),
format!(
"`{}` takes exactly {} argument(s)",
op.name(),
expected_operand_count
),
));
}
None => {
return Err(parse_error(
self.current_span(),
format!("expected `)` to close `{}` call", op.name()),
));
}
};
Ok(Spanned::new(
ValueExpr::FuncCall { op, args },
Span::new(start, close.end),
))
}
fn expect_keyword_token(&mut self) -> Result<Spanned<Keyword>, NightjarLanguageError> {
match self.peek_token() {
Some(Token::Keyword(_)) => {
let tok = self.bump();
if let Token::Keyword(kw) = tok.node {
Ok(Spanned::new(kw, tok.span))
} else {
unreachable!()
}
}
_ => Err(parse_error(
self.current_span(),
"expected operator keyword",
)),
}
}
}
pub fn parse(input: &str) -> Result<Program, NightjarLanguageError> {
parse_with_config(input, &ParserConfig::default())
}
pub fn parse_with_config(
input: &str,
config: &ParserConfig,
) -> Result<Program, NightjarLanguageError> {
let tokens = Tokenizer::new(input).tokenize()?;
let program = Parser::parse(tokens, config)?;
validate_scope(&program)?;
Ok(program)
}
fn validate_scope(program: &Program) -> Result<(), NightjarLanguageError> {
walk_bool(&program.expr, 0)
}
fn walk_bool(expr: &SpannedBoolExpr, predicate_depth: u32) -> Result<(), NightjarLanguageError> {
match &expr.node {
BoolExpr::Literal(_) => Ok(()),
BoolExpr::Verifier { left, right, .. } => {
walk_value(left, predicate_depth)?;
walk_value(right, predicate_depth)
}
BoolExpr::And(l, r) | BoolExpr::Or(l, r) => {
walk_bool(l, predicate_depth)?;
walk_bool(r, predicate_depth)
}
BoolExpr::Not(inner) => walk_bool(inner, predicate_depth),
BoolExpr::UnaryCheck { operand, .. } => walk_value(operand, predicate_depth),
BoolExpr::Quantifier {
predicate, operand, ..
} => {
walk_predicate(predicate, predicate_depth + 1)?;
walk_value(operand, predicate_depth)
}
}
}
fn walk_value(expr: &SpannedValueExpr, predicate_depth: u32) -> Result<(), NightjarLanguageError> {
match &expr.node {
ValueExpr::Literal(_) => Ok(()),
ValueExpr::Symbol { root, .. } => {
if matches!(root, SymbolRoot::Element) && predicate_depth == 0 {
Err(scope_error(
expr.span,
"`@` element-relative symbols may only appear inside a ForAll/Exists predicate",
))
} else {
Ok(())
}
}
ValueExpr::FuncCall { args, .. } => {
for a in args {
walk_value(a, predicate_depth)?;
}
Ok(())
}
}
}
fn walk_predicate(
pred: &Spanned<Predicate>,
predicate_depth: u32,
) -> Result<(), NightjarLanguageError> {
match &pred.node {
Predicate::PartialVerifier { bound, .. } => walk_value(bound, predicate_depth),
Predicate::UnaryCheck(_) => Ok(()),
Predicate::Full(body) => walk_bool(body, predicate_depth),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn tokenize(input: &str) -> Vec<Token> {
Tokenizer::new(input)
.tokenize()
.expect("tokenization should succeed")
.into_iter()
.map(|s| s.node)
.collect()
}
#[test]
fn tokenizes_parentheses_and_keywords() {
let toks = tokenize("(EQ 1 1)");
assert_eq!(
toks,
vec![
Token::LParen,
Token::Keyword(Keyword::EQ),
Token::IntLiteral(1),
Token::IntLiteral(1),
Token::RParen,
]
);
}
#[test]
fn tokenizes_negative_integer_literal() {
let toks = tokenize("-5");
assert_eq!(toks, vec![Token::IntLiteral(-5)]);
}
#[test]
fn tokenizes_negative_float_literal() {
let toks = tokenize("-1.618");
assert_eq!(toks, vec![Token::FloatLiteral(-1.618)]);
}
#[test]
fn space_between_minus_and_digit_is_error() {
let err = Tokenizer::new("- 5").tokenize().unwrap_err();
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn tokenizes_string_literal_with_unicode() {
let toks = tokenize("\"營收\"");
assert_eq!(toks, vec![Token::StringLiteral("營收".into())]);
}
#[test]
fn tokenizes_unterminated_string_errors() {
let err = Tokenizer::new("\"abc").tokenize().unwrap_err();
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn tokenizes_root_symbol_bare_dot() {
assert_eq!(
tokenize("."),
vec![Token::Symbol {
root: SymbolRoot::Root,
path: "".into()
}]
);
assert_eq!(
tokenize("(NonEmpty .)"),
vec![
Token::LParen,
Token::Keyword(Keyword::NonEmpty),
Token::Symbol {
root: SymbolRoot::Root,
path: "".into()
},
Token::RParen,
]
);
}
#[test]
fn tokenizes_nested_symbol_path() {
assert_eq!(
tokenize(".data.department_1.revenue"),
vec![Token::Symbol {
root: SymbolRoot::Root,
path: "data.department_1.revenue".into()
}]
);
}
#[test]
fn tokenizes_unicode_symbol() {
assert_eq!(
tokenize(".營收"),
vec![Token::Symbol {
root: SymbolRoot::Root,
path: "營收".into()
}]
);
assert_eq!(
tokenize(".données.résultat"),
vec![Token::Symbol {
root: SymbolRoot::Root,
path: "données.résultat".into()
}]
);
}
#[test]
fn tokenizes_element_symbol_with_at_sigil() {
assert_eq!(
tokenize("@"),
vec![Token::Symbol {
root: SymbolRoot::Element,
path: "".into()
}]
);
assert_eq!(
tokenize("@.a"),
vec![Token::Symbol {
root: SymbolRoot::Element,
path: "a".into()
}]
);
assert_eq!(
tokenize("@._0.name"),
vec![Token::Symbol {
root: SymbolRoot::Element,
path: "_0.name".into()
}]
);
}
#[test]
fn tokenizes_bool_and_null_literals() {
assert_eq!(tokenize("True"), vec![Token::BoolLiteral(true)]);
assert_eq!(tokenize("False"), vec![Token::BoolLiteral(false)]);
assert_eq!(tokenize("Null"), vec![Token::NullLiteral]);
}
#[test]
fn unknown_identifier_errors() {
let err = Tokenizer::new("FooBar").tokenize().unwrap_err();
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn token_spans_are_byte_offsets() {
let tokens = Tokenizer::new("(EQ 1 2)").tokenize().unwrap();
assert_eq!(tokens[0].span, Span::new(0, 1));
assert_eq!(tokens[1].span, Span::new(1, 3));
assert_eq!(tokens[2].span, Span::new(4, 5));
assert_eq!(tokens[3].span, Span::new(6, 7));
assert_eq!(tokens[4].span, Span::new(7, 8));
}
fn must_parse(input: &str) -> Program {
parse(input)
.unwrap_or_else(|e| panic!("expected parse success for `{}`, got {:?}", input, e))
}
fn must_fail(input: &str) -> NightjarLanguageError {
parse(input).expect_err(&format!("expected parse failure for `{}`", input))
}
#[test]
fn parses_simple_verifier() {
let p = must_parse("(GT 1 2)");
match p.expr.node {
BoolExpr::Verifier { op, .. } => assert_eq!(op, VerifierOp::GT),
other => panic!("expected Verifier, got {:?}", other),
}
}
#[test]
fn verifier_arity_mismatch_produces_arity_error() {
let err = must_fail("(GT 1 2 3)");
assert!(
matches!(err, NightjarLanguageError::ArgumentError { .. }),
"got {:?}",
err
);
}
#[test]
fn bare_gt_without_parens_fails() {
let err = must_fail("GT 1 2");
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn parses_nested_connective_and_verifier() {
let p = must_parse("(AND (GT 1 0) (LT 1 10))");
match p.expr.node {
BoolExpr::And(_, _) => {}
other => panic!("expected And, got {:?}", other),
}
}
#[test]
fn parses_forall_with_partial_verifier() {
let p = must_parse("(ForAll (GT 0) .ids)");
match p.expr.node {
BoolExpr::Quantifier {
op,
predicate,
operand,
} => {
assert_eq!(op, QuantifierOp::ForAll);
match predicate.node {
Predicate::PartialVerifier { op, .. } => assert_eq!(op, VerifierOp::GT),
other => panic!("expected PartialVerifier, got {:?}", other),
}
match operand.node {
ValueExpr::Symbol { root, path } => {
assert_eq!(root, SymbolRoot::Root);
assert_eq!(path, "ids");
}
other => panic!("expected Symbol, got {:?}", other),
}
}
other => panic!("expected Quantifier, got {:?}", other),
}
}
#[test]
fn parses_exists_with_nonempty_predicate() {
let p = must_parse("(Exists NonEmpty .names)");
match p.expr.node {
BoolExpr::Quantifier { op, predicate, .. } => {
assert_eq!(op, QuantifierOp::Exists);
assert_eq!(
predicate.node,
Predicate::UnaryCheck(UnaryCheckOp::NonEmpty)
);
}
other => panic!("expected Quantifier, got {:?}", other),
}
}
#[test]
fn parses_not_of_verifier() {
let p = must_parse("(NOT (EQ .status \"inactive\"))");
match p.expr.node {
BoolExpr::Not(inner) => match inner.node {
BoolExpr::Verifier { op, .. } => assert_eq!(op, VerifierOp::EQ),
other => panic!("expected Verifier, got {:?}", other),
},
other => panic!("expected Not, got {:?}", other),
}
}
#[test]
fn parses_top_level_bool_literal() {
let p = must_parse("True");
assert_eq!(p.expr.node, BoolExpr::Literal(true));
}
#[test]
fn parses_negative_literals_in_verifier() {
let p = must_parse("(GT -5 -10)");
match p.expr.node {
BoolExpr::Verifier { left, right, .. } => {
assert_eq!(left.node, ValueExpr::Literal(Literal::Int(-5)));
assert_eq!(right.node, ValueExpr::Literal(Literal::Int(-10)));
}
other => panic!("expected Verifier, got {:?}", other),
}
}
#[test]
fn parses_root_symbol_as_operand() {
let p = must_parse("(NonEmpty .)");
match p.expr.node {
BoolExpr::UnaryCheck { op, operand } => {
assert_eq!(op, UnaryCheckOp::NonEmpty);
assert_eq!(
operand.node,
ValueExpr::Symbol {
root: SymbolRoot::Root,
path: "".into()
}
);
}
other => panic!("expected UnaryCheck, got {:?}", other),
}
}
#[test]
fn func_call_arity_too_many_is_arity_error() {
let err = must_fail("(EQ (Add 1 2 3) 6)");
assert!(
matches!(err, NightjarLanguageError::ArgumentError { .. }),
"got {:?}",
err
);
}
#[test]
fn func_call_arity_too_few_is_parse_error() {
let err = must_fail("(EQ (Add 1) 1)");
assert!(
matches!(err, NightjarLanguageError::ParseError { .. }),
"got {:?}",
err
);
}
#[test]
fn missing_rparen_is_parse_error() {
let err = must_fail("(GT 1 2");
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn trailing_tokens_is_parse_error() {
let err = must_fail("(GT 1 2) extra");
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn empty_input_fails() {
let err = must_fail("");
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn partial_verifier_outside_quantifier_is_rejected() {
let err = must_fail("(GT 2)");
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn depth_limit_is_enforced() {
let mut s = String::new();
let n = 10;
for _ in 0..n {
s.push_str("(NOT ");
}
s.push_str("True");
for _ in 0..n {
s.push(')');
}
let cfg = ParserConfig { max_depth: 5 };
let err = parse_with_config(&s, &cfg).unwrap_err();
assert!(
matches!(err, NightjarLanguageError::RecursionError { .. }),
"got {:?}",
err
);
parse(&s).expect("default depth should parse this");
}
#[test]
fn parses_nested_arithmetic_inside_verifier() {
let p = must_parse("(EQ (Add (Mul 2 3) (Sub 10 4)) 12)");
match p.expr.node {
BoolExpr::Verifier { left, .. } => match left.node {
ValueExpr::FuncCall { op, args } => {
assert_eq!(op, FuncOp::Add);
assert_eq!(args.len(), 2);
}
other => panic!("expected FuncCall, got {:?}", other),
},
other => panic!("expected Verifier, got {:?}", other),
}
}
#[test]
fn parses_bool_literal_as_operand_to_eq() {
let p = must_parse("(EQ True False)");
match p.expr.node {
BoolExpr::Verifier { left, right, .. } => {
assert_eq!(left.node, ValueExpr::Literal(Literal::Bool(true)));
assert_eq!(right.node, ValueExpr::Literal(Literal::Bool(false)));
}
other => panic!("expected Verifier, got {:?}", other),
}
}
#[test]
fn rejects_func_op_as_top_level_bool_expr() {
let err = must_fail("(Add 1 2)");
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn parses_unicode_symbol_in_verifier() {
let p = must_parse("(EQ .數量 100)");
match p.expr.node {
BoolExpr::Verifier { left, .. } => {
assert_eq!(
left.node,
ValueExpr::Symbol {
root: SymbolRoot::Root,
path: "數量".into()
}
);
}
other => panic!("expected Verifier, got {:?}", other),
}
}
#[test]
fn top_level_span_covers_whole_expression() {
let p = must_parse("(EQ 1 1)");
assert_eq!(p.expr.span, Span::new(0, 8));
}
#[test]
fn rejects_at_followed_by_bare_identifier() {
let err = Tokenizer::new("@a").tokenize().unwrap_err();
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn rejects_at_dot_with_no_segment() {
let err = Tokenizer::new("@.").tokenize().unwrap_err();
assert!(matches!(err, NightjarLanguageError::ParseError { .. }));
}
#[test]
fn parses_forall_with_full_verifier_predicate() {
let p = must_parse("(ForAll (EQ @.a @.b) .items)");
match p.expr.node {
BoolExpr::Quantifier { predicate, .. } => match predicate.node {
Predicate::Full(body) => match body.node {
BoolExpr::Verifier { op, left, right } => {
assert_eq!(op, VerifierOp::EQ);
assert_eq!(
left.node,
ValueExpr::Symbol {
root: SymbolRoot::Element,
path: "a".into()
}
);
assert_eq!(
right.node,
ValueExpr::Symbol {
root: SymbolRoot::Element,
path: "b".into()
}
);
}
other => panic!("expected Verifier inside Full, got {:?}", other),
},
other => panic!("expected Predicate::Full, got {:?}", other),
},
other => panic!("expected Quantifier, got {:?}", other),
}
}
#[test]
fn partial_verifier_still_parses_as_partial() {
let p = must_parse("(ForAll (GT 0) .items)");
match p.expr.node {
BoolExpr::Quantifier { predicate, .. } => {
assert!(matches!(predicate.node, Predicate::PartialVerifier { .. }));
}
other => panic!("expected Quantifier, got {:?}", other),
}
}
#[test]
fn parses_forall_with_nested_arithmetic_on_element() {
let p = must_parse("(ForAll (EQ (Add @.a @.b) @.c) .items)");
match p.expr.node {
BoolExpr::Quantifier { predicate, .. } => {
assert!(matches!(predicate.node, Predicate::Full(_)));
}
other => panic!("expected Quantifier, got {:?}", other),
}
}
#[test]
fn rejects_at_outside_quantifier_with_scope_error() {
let err = parse("(EQ @.a 1)").expect_err("`@` outside quantifier should fail");
assert!(
matches!(err, NightjarLanguageError::ScopeError { .. }),
"got {:?}",
err
);
}
#[test]
fn rejects_at_in_quantifier_operand_with_scope_error() {
let err = parse("(ForAll (GT 0) @.items)")
.expect_err("`@` in quantifier operand at top level should fail");
assert!(
matches!(err, NightjarLanguageError::ScopeError { .. }),
"got {:?}",
err
);
}
#[test]
fn bare_at_symbol_parses_in_predicate() {
let p = must_parse("(ForAll (GT @ 0) .scores)");
match p.expr.node {
BoolExpr::Quantifier { predicate, .. } => match predicate.node {
Predicate::Full(body) => match body.node {
BoolExpr::Verifier { left, .. } => assert_eq!(
left.node,
ValueExpr::Symbol {
root: SymbolRoot::Element,
path: "".into()
}
),
other => panic!("expected Verifier, got {:?}", other),
},
other => panic!("expected Full, got {:?}", other),
},
other => panic!("expected Quantifier, got {:?}", other),
}
}
#[test]
fn nonempty_with_operand_in_predicate_parses_as_full() {
let p = must_parse("(ForAll (NonEmpty .x) .items)");
match p.expr.node {
BoolExpr::Quantifier { predicate, .. } => match predicate.node {
Predicate::Full(body) => assert!(matches!(
body.node,
BoolExpr::UnaryCheck {
op: UnaryCheckOp::NonEmpty,
..
}
)),
other => panic!("expected Full, got {:?}", other),
},
other => panic!("expected Quantifier, got {:?}", other),
}
}
}