use super::condition::{ConditionExpr, Quantifier, QuantifierTarget, Section};
use super::model::{KeywordPattern, LlmPattern, NovaRule, SemanticPattern};
use std::collections::BTreeMap;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ParseError {
#[error("rule must declare `rule <Name> {{`, got `{0}`")]
MissingRuleHeader(String),
#[error("unexpected end of input while parsing {0}")]
UnexpectedEof(&'static str),
#[error("unexpected token `{got}` while parsing {context}")]
UnexpectedToken { context: &'static str, got: String },
#[error("duplicate variable `{var}` in section `{section}`")]
DuplicateVariable { section: &'static str, var: String },
#[error("invalid threshold `{0}` (must be a float in [0.0, 1.0])")]
InvalidThreshold(String),
#[error("unknown section `{0}` (expected meta / keywords / semantics / llm / condition)")]
UnknownSection(String),
#[error("malformed line in section `{section}`: `{line}` ({reason})")]
MalformedLine {
section: &'static str,
line: String,
reason: &'static str,
},
#[error("invalid regex `{pattern}`: {source}")]
InvalidRegex {
pattern: String,
#[source]
source: regex::Error,
},
#[error("condition section is empty")]
EmptyCondition,
#[error("condition references unknown section `{0}`")]
UnknownConditionSection(String),
#[error("condition reference `{section}.${var}` does not match any pattern in the rule")]
DanglingReference { section: Section, var: String },
}
pub fn parse_rules(input: &str) -> Result<Vec<NovaRule>, ParseError> {
let mut rules = Vec::new();
let mut cursor = 0usize;
let bytes = input.as_bytes();
while cursor < bytes.len() {
skip_ws_and_comments(input, &mut cursor);
if cursor >= bytes.len() {
break;
}
let rule = parse_one_rule(input, &mut cursor)?;
rules.push(rule);
}
Ok(rules)
}
fn skip_ws_and_comments(input: &str, cursor: &mut usize) {
let bytes = input.as_bytes();
while *cursor < bytes.len() {
let c = bytes[*cursor];
if c.is_ascii_whitespace() {
*cursor += 1;
continue;
}
if c == b'/' && *cursor + 1 < bytes.len() && bytes[*cursor + 1] == b'/' {
while *cursor < bytes.len() && bytes[*cursor] != b'\n' {
*cursor += 1;
}
continue;
}
return;
}
}
fn parse_one_rule(input: &str, cursor: &mut usize) -> Result<NovaRule, ParseError> {
let header = read_until_brace(input, cursor)?;
let name = parse_rule_header(&header)?;
let body_end = find_matching_brace(input, *cursor)?;
let body = &input[*cursor..body_end];
*cursor = body_end + 1;
let mut rule = NovaRule {
name: name.clone(),
meta: BTreeMap::new(),
keywords: BTreeMap::new(),
semantics: BTreeMap::new(),
llm: BTreeMap::new(),
condition: ConditionExpr::Literal(false),
};
let sections = split_into_sections(body)?;
for (section_name, section_body) in sections {
match section_name.as_str() {
"meta" => rule.meta = parse_meta(section_body)?,
"keywords" => rule.keywords = parse_keywords(section_body)?,
"semantics" => rule.semantics = parse_semantics(section_body)?,
"llm" => rule.llm = parse_llm(section_body)?,
"condition" => rule.condition = parse_condition(section_body)?,
other => return Err(ParseError::UnknownSection(other.to_string())),
}
}
validate_references(&mut rule)?;
Ok(rule)
}
fn parse_rule_header(line: &str) -> Result<String, ParseError> {
let trimmed = line.trim();
let rest = trimmed
.strip_prefix("rule")
.ok_or_else(|| ParseError::MissingRuleHeader(trimmed.to_string()))?
.trim_start();
let mut name = String::new();
for c in rest.chars() {
if c.is_alphanumeric() || c == '_' {
name.push(c);
} else {
break;
}
}
if name.is_empty() {
return Err(ParseError::MissingRuleHeader(trimmed.to_string()));
}
Ok(name)
}
fn read_until_brace(input: &str, cursor: &mut usize) -> Result<String, ParseError> {
let bytes = input.as_bytes();
let start = *cursor;
while *cursor < bytes.len() {
if bytes[*cursor] == b'{' {
let header = &input[start..*cursor];
*cursor += 1;
return Ok(header.to_string());
}
*cursor += 1;
}
Err(ParseError::UnexpectedEof("rule header"))
}
fn find_matching_brace(input: &str, start: usize) -> Result<usize, ParseError> {
let bytes = input.as_bytes();
let mut depth: i32 = 1;
let mut i = start;
let mut in_dq_string = false;
let mut in_regex = false;
let mut in_line_comment = false;
while i < bytes.len() {
let c = bytes[i];
if in_line_comment {
if c == b'\n' {
in_line_comment = false;
}
i += 1;
continue;
}
if in_dq_string {
if c == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if c == b'"' {
in_dq_string = false;
}
i += 1;
continue;
}
if in_regex {
if c == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if c == b'/' {
in_regex = false;
}
i += 1;
continue;
}
if c == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'/' {
in_line_comment = true;
i += 2;
continue;
}
if c == b'/' && i + 1 < bytes.len() && bytes[i + 1] != b'/' {
let prev_meaningful = input[..i].chars().rev().find(|c| !c.is_whitespace());
if matches!(prev_meaningful, Some('=') | Some('(') | Some(',') | None) {
in_regex = true;
i += 1;
continue;
}
}
if c == b'"' {
in_dq_string = true;
i += 1;
continue;
}
if c == b'{' {
depth += 1;
} else if c == b'}' {
depth -= 1;
if depth == 0 {
return Ok(i);
}
}
i += 1;
}
Err(ParseError::UnexpectedEof("rule body"))
}
fn split_into_sections(body: &str) -> Result<Vec<(String, String)>, ParseError> {
let mut sections: Vec<(String, String)> = Vec::new();
let mut current_name: Option<String> = None;
let mut current_body = String::new();
for raw_line in body.lines() {
let line = strip_line_comment(raw_line).trim_end();
let trimmed = line.trim();
if trimmed.is_empty() {
if current_name.is_some() {
current_body.push('\n');
}
continue;
}
if let Some(stripped) = trimmed.strip_suffix(':') {
if let Some(name) = current_name.take() {
sections.push((name, std::mem::take(&mut current_body)));
}
current_name = Some(stripped.trim().to_lowercase());
continue;
}
if current_name.is_none() {
return Err(ParseError::MalformedLine {
section: "rule body",
line: trimmed.to_string(),
reason: "expected a section header (`meta:`, `keywords:`, …) first",
});
}
current_body.push_str(line);
current_body.push('\n');
}
if let Some(name) = current_name {
sections.push((name, current_body));
}
Ok(sections)
}
fn strip_line_comment(line: &str) -> &str {
if let Some(idx) = find_unquoted(line, "//") {
&line[..idx]
} else {
line
}
}
fn find_unquoted(haystack: &str, needle: &str) -> Option<usize> {
let bytes = haystack.as_bytes();
let nb = needle.as_bytes();
let mut in_dq_string = false;
let mut in_regex = false;
let mut i = 0;
while i + nb.len() <= bytes.len() {
let c = bytes[i];
if in_dq_string {
if c == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if c == b'"' {
in_dq_string = false;
}
i += 1;
continue;
}
if in_regex {
if c == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if c == b'/' {
in_regex = false;
}
i += 1;
continue;
}
if c == b'"' {
in_dq_string = true;
i += 1;
continue;
}
if c == b'/' && i + 1 < bytes.len() && bytes[i + 1] != b'/' {
let prev_meaningful = haystack[..i].chars().rev().find(|c| !c.is_whitespace());
if matches!(prev_meaningful, Some('=') | Some('(') | Some(',')) {
in_regex = true;
i += 1;
continue;
}
}
if &bytes[i..i + nb.len()] == nb {
return Some(i);
}
i += 1;
}
None
}
fn parse_meta(body: String) -> Result<BTreeMap<String, String>, ParseError> {
let mut out = BTreeMap::new();
for raw_line in body.lines() {
let line = strip_line_comment(raw_line).trim();
if line.is_empty() {
continue;
}
let Some((key, value)) = line.split_once('=') else {
return Err(ParseError::MalformedLine {
section: "meta",
line: line.to_string(),
reason: "missing `=` between key and value",
});
};
let key = key.trim().to_string();
let value = strip_string_quotes(value.trim());
out.insert(key, value);
}
Ok(out)
}
fn strip_string_quotes(value: &str) -> String {
let bytes = value.as_bytes();
if bytes.len() >= 2 {
let first = bytes[0];
let last = bytes[bytes.len() - 1];
if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
return value[1..value.len() - 1].to_string();
}
}
value.to_string()
}
fn parse_keywords(body: String) -> Result<BTreeMap<String, KeywordPattern>, ParseError> {
let mut out = BTreeMap::new();
for raw_line in body.lines() {
let line = strip_line_comment(raw_line).trim();
if line.is_empty() {
continue;
}
let (var, value) = split_var_assignment(line, "keywords")?;
if out.contains_key(&var) {
return Err(ParseError::DuplicateVariable {
section: "keywords",
var,
});
}
let pattern = parse_keyword_value(value)?;
out.insert(var, pattern);
}
Ok(out)
}
fn parse_keyword_value(raw: &str) -> Result<KeywordPattern, ParseError> {
let value = raw.trim();
let bytes = value.as_bytes();
if value.starts_with('/') {
let trailing_i = if value.ends_with('/') {
false
} else {
value.ends_with("/i")
};
let closing = if trailing_i {
value.len() - 2
} else if value.ends_with('/') {
value.len() - 1
} else {
return Err(ParseError::MalformedLine {
section: "keywords",
line: raw.to_string(),
reason: "regex pattern is not closed with `/` or `/i`",
});
};
let body = &value[1..closing];
let case_sensitive = !trailing_i; regex::Regex::new(body).map_err(|source| ParseError::InvalidRegex {
pattern: body.to_string(),
source,
})?;
return Ok(KeywordPattern {
pattern: body.to_string(),
is_regex: true,
case_sensitive,
});
}
if bytes.len() >= 2
&& ((bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"')
|| (bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\''))
{
let inner = &value[1..value.len() - 1];
return Ok(KeywordPattern {
pattern: inner.to_string(),
is_regex: false,
case_sensitive: false,
});
}
Err(ParseError::MalformedLine {
section: "keywords",
line: raw.to_string(),
reason: "value must be quoted (\"…\" / '…') or a regex (/…/ or /…/i)",
})
}
fn parse_semantics(body: String) -> Result<BTreeMap<String, SemanticPattern>, ParseError> {
let mut out = BTreeMap::new();
for raw_line in body.lines() {
let line = strip_line_comment(raw_line).trim();
if line.is_empty() {
continue;
}
let (var, value) = split_var_assignment(line, "semantics")?;
if out.contains_key(&var) {
return Err(ParseError::DuplicateVariable {
section: "semantics",
var,
});
}
let (pattern, threshold) = parse_pattern_with_threshold(value, "semantics", 0.1)?;
out.insert(var, SemanticPattern { pattern, threshold });
}
Ok(out)
}
fn parse_llm(body: String) -> Result<BTreeMap<String, LlmPattern>, ParseError> {
let mut out = BTreeMap::new();
for raw_line in body.lines() {
let line = strip_line_comment(raw_line).trim();
if line.is_empty() {
continue;
}
let (var, value) = split_var_assignment(line, "llm")?;
if out.contains_key(&var) {
return Err(ParseError::DuplicateVariable {
section: "llm",
var,
});
}
let (pattern, threshold) = parse_pattern_with_threshold(value, "llm", 0.1)?;
out.insert(var, LlmPattern { pattern, threshold });
}
Ok(out)
}
fn parse_pattern_with_threshold(
raw: &str,
section: &'static str,
default_threshold: f32,
) -> Result<(String, f32), ParseError> {
let value = raw.trim();
if !value.starts_with('"') {
return Err(ParseError::MalformedLine {
section,
line: value.to_string(),
reason: "pattern must start with a double-quoted string",
});
}
let bytes = value.as_bytes();
let close = bytes
.iter()
.enumerate()
.skip(1)
.find_map(|(i, b)| if *b == b'"' { Some(i) } else { None });
let close = close.ok_or(ParseError::MalformedLine {
section,
line: value.to_string(),
reason: "pattern string is not closed",
})?;
let pattern = value[1..close].to_string();
let rest = value[close + 1..].trim();
let threshold = if rest.is_empty() {
default_threshold
} else {
let inner = rest
.strip_prefix('(')
.and_then(|s| s.strip_suffix(')'))
.ok_or(ParseError::MalformedLine {
section,
line: value.to_string(),
reason: "trailing threshold must be enclosed in parentheses",
})?
.trim();
let parsed: f32 = inner
.parse()
.map_err(|_| ParseError::InvalidThreshold(inner.to_string()))?;
if !(0.0..=1.0).contains(&parsed) {
return Err(ParseError::InvalidThreshold(inner.to_string()));
}
parsed
};
Ok((pattern, threshold))
}
fn split_var_assignment<'a>(
line: &'a str,
section: &'static str,
) -> Result<(String, &'a str), ParseError> {
let (key, value) = line.split_once('=').ok_or(ParseError::MalformedLine {
section,
line: line.to_string(),
reason: "missing `=` between variable and pattern",
})?;
let key = key.trim();
let stripped = key.strip_prefix('$').ok_or(ParseError::MalformedLine {
section,
line: line.to_string(),
reason: "variable name must start with `$`",
})?;
Ok((stripped.to_string(), value.trim()))
}
fn parse_condition(body: String) -> Result<ConditionExpr, ParseError> {
let cleaned: String = body
.lines()
.map(strip_line_comment)
.collect::<Vec<_>>()
.join(" ");
let normalized = normalize_whitespace(&cleaned);
if normalized.trim().is_empty() {
return Err(ParseError::EmptyCondition);
}
let tokens = tokenize_condition(&normalized)?;
let mut iter = TokenIter::new(tokens);
let expr = parse_or(&mut iter)?;
if let Some(extra) = iter.peek() {
return Err(ParseError::UnexpectedToken {
context: "condition (trailing input)",
got: format!("{extra:?}"),
});
}
Ok(expr)
}
fn normalize_whitespace(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut prev_ws = false;
for c in input.chars() {
if c.is_whitespace() {
if !prev_ws {
out.push(' ');
prev_ws = true;
}
} else {
out.push(c);
prev_ws = false;
}
}
out.trim().to_string()
}
#[derive(Debug, Clone, PartialEq)]
enum CondToken {
LParen,
RParen,
Dot,
Star,
Comma,
Ident(String),
Var(String), Number(u32),
KwAnd,
KwOr,
KwNot,
KwOf,
KwAny,
KwAll,
KwTrue,
KwFalse,
}
fn tokenize_condition(input: &str) -> Result<Vec<CondToken>, ParseError> {
let mut out = Vec::new();
let bytes = input.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
if c.is_ascii_whitespace() {
i += 1;
continue;
}
match c {
b'(' => {
out.push(CondToken::LParen);
i += 1;
}
b')' => {
out.push(CondToken::RParen);
i += 1;
}
b'.' => {
out.push(CondToken::Dot);
i += 1;
}
b'*' => {
out.push(CondToken::Star);
i += 1;
}
b',' => {
out.push(CondToken::Comma);
i += 1;
}
b'$' => {
let start = i + 1;
let mut j = start;
while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
j += 1;
}
if start == j {
return Err(ParseError::UnexpectedToken {
context: "condition (variable name)",
got: "$".into(),
});
}
out.push(CondToken::Var(input[start..j].to_string()));
i = j;
}
d if d.is_ascii_digit() => {
let start = i;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
let n: u32 = input[start..i]
.parse()
.map_err(|_| ParseError::UnexpectedToken {
context: "condition (integer)",
got: input[start..i].to_string(),
})?;
out.push(CondToken::Number(n));
}
a if a.is_ascii_alphabetic() || a == b'_' => {
let start = i;
while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
i += 1;
}
let word = &input[start..i];
let token = match word.to_ascii_lowercase().as_str() {
"and" => CondToken::KwAnd,
"or" => CondToken::KwOr,
"not" => CondToken::KwNot,
"of" => CondToken::KwOf,
"any" => CondToken::KwAny,
"all" => CondToken::KwAll,
"true" => CondToken::KwTrue,
"false" => CondToken::KwFalse,
_ => CondToken::Ident(word.to_string()),
};
out.push(token);
}
other => {
return Err(ParseError::UnexpectedToken {
context: "condition (lexer)",
got: (other as char).to_string(),
});
}
}
}
Ok(out)
}
struct TokenIter {
tokens: Vec<CondToken>,
pos: usize,
}
impl TokenIter {
fn new(tokens: Vec<CondToken>) -> Self {
Self { tokens, pos: 0 }
}
fn peek(&self) -> Option<&CondToken> {
self.tokens.get(self.pos)
}
fn bump(&mut self) -> Option<CondToken> {
let t = self.tokens.get(self.pos).cloned();
if t.is_some() {
self.pos += 1;
}
t
}
fn eat(&mut self, want: &CondToken) -> bool {
if self.peek() == Some(want) {
self.pos += 1;
true
} else {
false
}
}
}
fn parse_or(iter: &mut TokenIter) -> Result<ConditionExpr, ParseError> {
let mut items = vec![parse_and(iter)?];
while iter.eat(&CondToken::KwOr) {
items.push(parse_and(iter)?);
}
if items.len() == 1 {
Ok(items.pop().unwrap())
} else {
Ok(ConditionExpr::Or(items))
}
}
fn parse_and(iter: &mut TokenIter) -> Result<ConditionExpr, ParseError> {
let mut items = vec![parse_not(iter)?];
while iter.eat(&CondToken::KwAnd) {
items.push(parse_not(iter)?);
}
if items.len() == 1 {
Ok(items.pop().unwrap())
} else {
Ok(ConditionExpr::And(items))
}
}
fn parse_not(iter: &mut TokenIter) -> Result<ConditionExpr, ParseError> {
if iter.eat(&CondToken::KwNot) {
let inner = parse_not(iter)?;
return Ok(ConditionExpr::Not(Box::new(inner)));
}
parse_atom(iter)
}
fn parse_atom(iter: &mut TokenIter) -> Result<ConditionExpr, ParseError> {
let next = iter
.bump()
.ok_or(ParseError::UnexpectedEof("condition atom"))?;
match next {
CondToken::LParen => {
let inner = parse_or(iter)?;
if !iter.eat(&CondToken::RParen) {
return Err(ParseError::UnexpectedToken {
context: "condition (expected `)`)",
got: format!("{:?}", iter.peek()),
});
}
Ok(inner)
}
CondToken::KwTrue => Ok(ConditionExpr::Literal(true)),
CondToken::KwFalse => Ok(ConditionExpr::Literal(false)),
CondToken::KwAny => parse_quantifier_tail(iter, Quantifier::Any),
CondToken::KwAll => parse_quantifier_tail(iter, Quantifier::All),
CondToken::Number(n) => parse_quantifier_tail(iter, Quantifier::AtLeast(n)),
CondToken::Ident(section_name) => parse_section_atom(iter, §ion_name),
CondToken::Var(name) => Ok(ConditionExpr::Reference {
section: Section::Keywords,
var: format!("__bare__:{name}"),
}),
other => Err(ParseError::UnexpectedToken {
context: "condition atom",
got: format!("{other:?}"),
}),
}
}
fn parse_quantifier_tail(iter: &mut TokenIter, q: Quantifier) -> Result<ConditionExpr, ParseError> {
if !iter.eat(&CondToken::KwOf) {
return Err(ParseError::UnexpectedToken {
context: "condition (expected `of` after quantifier)",
got: format!("{:?}", iter.peek()),
});
}
let target = parse_quantifier_target(iter)?;
Ok(ConditionExpr::Quantified {
quantifier: q,
target: Box::new(target),
})
}
fn parse_quantifier_target(iter: &mut TokenIter) -> Result<QuantifierTarget, ParseError> {
if let Some(CondToken::Ident(section_name)) = iter.peek().cloned() {
let saved_pos = iter.pos;
iter.bump(); if iter.eat(&CondToken::Dot) && iter.eat(&CondToken::Star) {
let section = Section::from_str(§ion_name)
.ok_or(ParseError::UnknownConditionSection(section_name))?;
return Ok(QuantifierTarget::SectionWildcard(section));
}
iter.pos = saved_pos;
}
if iter.eat(&CondToken::LParen) {
let first = parse_or(iter)?;
let mut items = vec![first];
while iter.eat(&CondToken::Comma) {
items.push(parse_or(iter)?);
}
if !iter.eat(&CondToken::RParen) {
return Err(ParseError::UnexpectedToken {
context: "condition (expected `)` after quantifier target)",
got: format!("{:?}", iter.peek()),
});
}
let inner = if items.len() == 1 {
items.pop().unwrap()
} else {
ConditionExpr::Or(items)
};
return Ok(QuantifierTarget::Inner(Box::new(inner)));
}
let expr = parse_atom(iter)?;
Ok(QuantifierTarget::Inner(Box::new(expr)))
}
fn parse_section_atom(
iter: &mut TokenIter,
section_name: &str,
) -> Result<ConditionExpr, ParseError> {
if !iter.eat(&CondToken::Dot) {
return Err(ParseError::UnexpectedToken {
context: "condition (expected `.` after section name)",
got: format!("{:?}", iter.peek()),
});
}
let section = Section::from_str(section_name)
.ok_or_else(|| ParseError::UnknownConditionSection(section_name.to_string()))?;
if iter.eat(&CondToken::Star) {
return Ok(ConditionExpr::Wildcard { section });
}
let next = iter
.bump()
.ok_or(ParseError::UnexpectedEof("condition (variable name)"))?;
match next {
CondToken::Var(name) => {
if iter.eat(&CondToken::Star) {
Ok(ConditionExpr::PrefixWildcard {
section,
prefix: name,
})
} else {
Ok(ConditionExpr::Reference { section, var: name })
}
}
other => Err(ParseError::UnexpectedToken {
context: "condition (expected `*` or `$var` after section name)",
got: format!("{other:?}"),
}),
}
}
fn validate_references(rule: &mut NovaRule) -> Result<(), ParseError> {
let snapshot_keys = (
rule.keywords.keys().cloned().collect::<Vec<_>>(),
rule.semantics.keys().cloned().collect::<Vec<_>>(),
rule.llm.keys().cloned().collect::<Vec<_>>(),
);
rewrite_bare_refs(&mut rule.condition, &snapshot_keys)?;
check_refs(&rule.condition, rule)
}
fn rewrite_bare_refs(
expr: &mut ConditionExpr,
keys: &(Vec<String>, Vec<String>, Vec<String>),
) -> Result<(), ParseError> {
const BARE_PREFIX: &str = "__bare__:";
match expr {
ConditionExpr::Reference { section, var } => {
if let Some(name) = var.strip_prefix(BARE_PREFIX) {
let in_kw = keys.0.iter().any(|k| k == name);
let in_sem = keys.1.iter().any(|k| k == name);
let in_llm = keys.2.iter().any(|k| k == name);
let count = u8::from(in_kw) + u8::from(in_sem) + u8::from(in_llm);
let resolved_section = if in_kw {
Section::Keywords
} else if in_sem {
Section::Semantics
} else if in_llm {
Section::Llm
} else {
return Err(ParseError::DanglingReference {
section: Section::Keywords,
var: name.to_string(),
});
};
if count > 1 {
return Err(ParseError::DanglingReference {
section: resolved_section,
var: format!("{name} (ambiguous: defined in multiple sections)"),
});
}
*section = resolved_section;
*var = name.to_string();
}
Ok(())
}
ConditionExpr::PrefixWildcard { .. }
| ConditionExpr::Wildcard { .. }
| ConditionExpr::Literal(_) => Ok(()),
ConditionExpr::Not(inner) => rewrite_bare_refs(inner, keys),
ConditionExpr::And(items) | ConditionExpr::Or(items) => {
for item in items {
rewrite_bare_refs(item, keys)?;
}
Ok(())
}
ConditionExpr::Quantified { target, .. } => match target.as_mut() {
QuantifierTarget::SectionWildcard(_) => Ok(()),
QuantifierTarget::Inner(inner) => rewrite_bare_refs(inner, keys),
},
}
}
fn check_refs(expr: &ConditionExpr, rule: &NovaRule) -> Result<(), ParseError> {
match expr {
ConditionExpr::Reference { section, var } => {
let exists = match section {
Section::Keywords => rule.keywords.contains_key(var),
Section::Semantics => rule.semantics.contains_key(var),
Section::Llm => rule.llm.contains_key(var),
};
if !exists {
return Err(ParseError::DanglingReference {
section: *section,
var: var.clone(),
});
}
Ok(())
}
ConditionExpr::PrefixWildcard { section, prefix } => {
let any_match = match section {
Section::Keywords => rule.keywords.keys().any(|k| k.starts_with(prefix)),
Section::Semantics => rule.semantics.keys().any(|k| k.starts_with(prefix)),
Section::Llm => rule.llm.keys().any(|k| k.starts_with(prefix)),
};
if !any_match {
return Err(ParseError::DanglingReference {
section: *section,
var: format!("{prefix}* (no patterns match this prefix)"),
});
}
Ok(())
}
ConditionExpr::Wildcard { .. } | ConditionExpr::Literal(_) => Ok(()),
ConditionExpr::Not(inner) => check_refs(inner, rule),
ConditionExpr::And(items) | ConditionExpr::Or(items) => {
for item in items {
check_refs(item, rule)?;
}
Ok(())
}
ConditionExpr::Quantified { target, .. } => match target.as_ref() {
QuantifierTarget::SectionWildcard(_) => Ok(()),
QuantifierTarget::Inner(inner) => check_refs(inner, rule),
},
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::nova::condition::{ConditionExpr, Section};
#[test]
fn parses_minimal_keywords_only_rule() {
let body = r#"
rule MinimalKW {
meta:
description = "Tiny"
severity = "low"
keywords:
$a = "foo"
$b = /bar\d+/
condition:
keywords.$a or keywords.$b
}
"#;
let rules = parse_rules(body).unwrap();
assert_eq!(rules.len(), 1);
let r = &rules[0];
assert_eq!(r.name, "MinimalKW");
assert_eq!(r.meta.get("severity").map(String::as_str), Some("low"));
assert_eq!(r.keywords.len(), 2);
let a = &r.keywords["a"];
assert_eq!(a.pattern, "foo");
assert!(!a.is_regex);
let b = &r.keywords["b"];
assert!(b.is_regex);
assert_eq!(b.pattern, "bar\\d+");
}
#[test]
fn parses_real_inject_dynamic_context_rule() {
let body = r#"
rule InjectDynamicContext
{
meta:
description = "Detects dynamic context injection inside agent skills."
author = "Marco Pedrinazzi (@pedrinazziM)"
version = "1.0.0"
category = "abusing_functions/agentic_misuse"
severity = "high"
date = "2026-03-18"
keywords:
$command_placeholder = /!\`.+?\`/
condition:
keywords.$command_placeholder
}"#;
let rules = parse_rules(body).unwrap();
assert_eq!(rules.len(), 1);
let r = &rules[0];
assert_eq!(r.name, "InjectDynamicContext");
assert!(r.keywords["command_placeholder"].is_regex);
assert_eq!(r.meta["severity"], "high");
assert!(matches!(
r.condition,
ConditionExpr::Reference {
section: Section::Keywords,
..
}
));
}
#[test]
fn parses_semantics_threshold_default_and_custom() {
let body = r#"
rule SemanticsTest {
semantics:
$no_threshold = "phrase A"
$custom = "phrase B" (0.42)
condition:
semantics.$no_threshold or semantics.$custom
}
"#;
let rules = parse_rules(body).unwrap();
let r = &rules[0];
assert!((r.semantics["no_threshold"].threshold - 0.1).abs() < 1e-6);
assert!((r.semantics["custom"].threshold - 0.42).abs() < 1e-6);
}
#[test]
fn rejects_dangling_condition_reference() {
let body = r#"
rule BadRef {
keywords:
$real = "x"
condition:
keywords.$missing
}
"#;
let err = parse_rules(body).expect_err("dangling ref must error");
assert!(matches!(
err,
ParseError::DanglingReference {
section: Section::Keywords,
..
}
));
}
#[test]
fn parses_both_bare_wildcard_and_any_of_wildcard() {
let bare = parse_condition("keywords.*".to_string()).unwrap();
assert!(matches!(
bare,
ConditionExpr::Wildcard {
section: Section::Keywords
}
));
let any_of = parse_condition("any of semantics.*".to_string()).unwrap();
assert!(matches!(
any_of,
ConditionExpr::Quantified {
quantifier: super::super::condition::Quantifier::Any,
..
}
));
}
#[test]
fn boolean_precedence_matches_nova() {
let cond = parse_condition("not keywords.$a and keywords.$b".to_string()).unwrap();
match cond {
ConditionExpr::And(items) => {
assert_eq!(items.len(), 2);
assert!(matches!(items[0], ConditionExpr::Not(_)));
assert!(matches!(items[1], ConditionExpr::Reference { .. }));
}
other => panic!("expected And at top, got {other:?}"),
}
}
#[test]
fn line_comments_do_not_eat_regex_literals() {
let body = r#"
rule CommentRegex {
// a comment before keywords
keywords:
$x = /\/foo\// // trailing comment
condition:
keywords.$x // condition comment
}
"#;
let rules = parse_rules(body).unwrap();
assert!(rules[0].keywords["x"].is_regex);
assert_eq!(rules[0].keywords["x"].pattern, "\\/foo\\/");
}
#[test]
fn parses_multiple_rules_in_source_order() {
let body = r#"
rule First {
keywords:
$a = "x"
condition:
keywords.$a
}
rule Second {
keywords:
$b = "y"
condition:
keywords.$b
}
"#;
let rules = parse_rules(body).unwrap();
assert_eq!(rules.len(), 2);
assert_eq!(rules[0].name, "First");
assert_eq!(rules[1].name, "Second");
}
#[test]
fn parser_accepts_real_nova_rule_pack_subset() {
let bodies = [
include_str!("test_fixtures/jailbreak_subset.nov"),
include_str!("test_fixtures/keywords_only.nov"),
include_str!("test_fixtures/semantics_and_llm.nov"),
include_str!("test_fixtures/vendor_host.nov"),
];
for body in bodies {
parse_rules(body).unwrap_or_else(|e| {
panic!("real-world fixture failed to parse: {e}\n--- body ---\n{body}")
});
}
}
}