use oxibonsai_runtime::grammar::{parse_gbnf, GbnfParseError, GrammarConstraint};
fn parse_ok(src: &str) -> oxibonsai_runtime::grammar::Grammar {
parse_gbnf(src).unwrap_or_else(|e| panic!("parse_gbnf failed on:\n{src}\nError: {e}"))
}
#[test]
fn test_empty_grammar_error() {
assert!(matches!(parse_gbnf(""), Err(GbnfParseError::EmptyGrammar)));
assert!(matches!(
parse_gbnf(" \n\n \t\n"),
Err(GbnfParseError::EmptyGrammar)
));
}
#[test]
fn test_missing_root_rule_error() {
let err = parse_gbnf(r#"word ::= "hello""#);
assert!(matches!(err, Err(GbnfParseError::MissingRootRule)));
}
#[test]
fn test_simple_literal() {
let g = parse_ok(r#"root ::= "hello""#);
assert!(g.start() < g.nt_count, "start symbol must be a valid NT id");
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert!(!root_rules.is_empty(), "root must have at least one rule");
}
#[test]
fn test_alternation() {
let g = parse_ok(r#"root ::= "cat" | "dog""#);
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert_eq!(root_rules.len(), 2, "expected 2 root rules for alternation");
}
#[test]
fn test_kleene_star() {
let g = parse_ok(r#"root ::= "a"*"#);
assert!(
g.rules.len() >= 2,
"star should create synthetic NT with at least 2 rules, got {}",
g.rules.len()
);
let has_epsilon = g.rules.iter().any(|r| r.is_epsilon());
assert!(has_epsilon, "star must introduce an ε rule");
}
#[test]
fn test_plus() {
let g = parse_ok(r#"root ::= "a"+"#);
assert!(
g.rules.len() >= 3,
"plus should create 3+ rules, got {}",
g.rules.len()
);
let root_start = g.start();
let root_rules: Vec<_> = g.rules_for(root_start).collect();
assert_eq!(root_rules.len(), 1);
assert!(!root_rules[0].1.is_epsilon());
}
#[test]
fn test_optional() {
let g = parse_ok(r#"root ::= "colo" "u"? "r""#);
let has_epsilon = g.rules.iter().any(|r| r.is_epsilon());
assert!(has_epsilon, "optional must introduce an ε rule");
}
#[test]
fn test_char_class_simple() {
let g = parse_ok(r#"root ::= [abc]+"#);
assert!(
g.rules.len() >= 6,
"char class plus should produce 6+ rules, got {}",
g.rules.len()
);
}
#[test]
fn test_char_class_range() {
let g = parse_ok(r#"root ::= [a-z]+"#);
assert!(
g.rules.len() >= 29,
"a-z class + plus expected >=29 rules, got {}",
g.rules.len()
);
}
#[test]
fn test_char_class_negated() {
let g = parse_ok(r#"root ::= [^abc]+"#);
assert!(
g.rules.len() >= 256,
"negated class [^abc] + plus expected >=256 rules, got {}",
g.rules.len()
);
}
#[test]
fn test_char_class_mixed() {
let g = parse_ok(r#"root ::= [a-zA-Z0-9_]+"#);
let class_nt_rules: Vec<_> = g
.rules
.iter()
.filter(|r| {
r.rhs.len() == 1
&& r.rhs[0]
.terminal_bytes()
.map(|b| b.len() == 1)
.unwrap_or(false)
&& r.lhs != g.start()
})
.collect();
assert_eq!(
class_nt_rules.len(),
63,
"mixed class should have 63 terminal rules, got {}",
class_nt_rules.len()
);
}
#[test]
fn test_rule_reference() {
let g = parse_ok("root ::= word\nword ::= [a-z]+\n");
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert_eq!(root_rules.len(), 1);
assert!(
root_rules[0].1.rhs.iter().any(|s| s.is_non_terminal()),
"root should reference word via NonTerminal"
);
}
#[test]
fn test_unknown_rule_reference_error() {
let err = parse_gbnf("root ::= ghost");
assert!(
matches!(err, Err(GbnfParseError::UnknownRule(ref n)) if n == "ghost"),
"expected UnknownRule(\"ghost\"), got {err:?}"
);
}
#[test]
fn test_comment_ignored() {
let g = parse_ok("# this is a comment\nroot ::= \"x\"");
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert_eq!(root_rules.len(), 1);
}
#[test]
fn test_string_escape_sequences() {
let g = parse_ok(r#"root ::= "\n\t\r\\""#);
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert!(!root_rules.is_empty());
let total_terminals: usize = root_rules
.iter()
.map(|(_, r)| r.rhs.iter().filter(|s| s.is_terminal()).count())
.sum();
assert_eq!(
total_terminals, 4,
"expected 4 terminal bytes (\\n, \\t, \\r, \\\\)"
);
}
#[test]
fn test_hex_escape_in_class() {
let g = parse_ok(r#"root ::= [\x41-\x5A]+"#);
let class_rules: Vec<_> = g
.rules
.iter()
.filter(|r| {
r.rhs.len() == 1
&& r.rhs[0]
.terminal_bytes()
.map(|b| b.len() == 1)
.unwrap_or(false)
})
.collect();
assert_eq!(
class_rules.len(),
26,
"\\x41-\\x5A should produce 26 terminal rules, got {}",
class_rules.len()
);
}
#[test]
fn test_group_with_quantifier() {
let g = parse_ok(r#"root ::= ("a" "b")+"#);
assert!(
g.rules.len() >= 4,
"group+ should produce 4+ rules, got {}",
g.rules.len()
);
assert!(g.rules.iter().any(|r| r.is_epsilon()));
}
#[test]
fn test_nested_alternation() {
let g = parse_ok(r#"root ::= "a" | ("b" | "c")"#);
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert_eq!(
root_rules.len(),
2,
"expected 2 root rules for nested alternation"
);
let group_nt_id = match &root_rules[1].1.rhs[..] {
[oxibonsai_runtime::grammar::Symbol::NonTerminal(id)] => *id,
_ => panic!("second alternative should be a NonTerminal group reference"),
};
let group_rules: Vec<_> = g.rules_for(group_nt_id).collect();
assert_eq!(
group_rules.len(),
2,
"group should have 2 alternatives (b | c)"
);
}
#[test]
fn test_recursive_rule() {
let g = parse_ok(r#"root ::= "a" root?"#);
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert_eq!(root_rules.len(), 1);
assert_eq!(root_rules[0].1.rhs.len(), 2);
}
#[test]
fn test_multiword_sequence() {
let g = parse_ok(r#"root ::= "hello" " " "world""#);
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert_eq!(root_rules.len(), 1);
let terminal_count = root_rules[0]
.1
.rhs
.iter()
.filter(|s| s.is_terminal())
.count();
assert_eq!(
terminal_count, 11,
"expected 11 terminal bytes for \"hello\" \" \" \"world\""
);
}
#[test]
fn test_complex_json_like() {
let src = r#"
root ::= "{" ws "}"
| "{" ws members ws "}"
members ::= pair ("," ws pair)*
pair ::= string ws ":" ws value
value ::= string | number
string ::= "\"" [^"]* "\""
number ::= [0-9]+
ws ::= [ \t\n]*
"#;
let g = parse_gbnf(src).unwrap_or_else(|e| panic!("JSON-like parse failed: {e}"));
assert!(!g.rules.is_empty());
let root_name = g.nt_name(g.start()).to_string();
assert_eq!(root_name, "root");
}
#[test]
fn test_grammar_has_correct_start_symbol() {
let g = parse_ok("root ::= \"x\"\nother ::= \"y\"");
let start_name = g.nt_name(g.start()).to_string();
assert_eq!(start_name, "root", "start symbol must be `root`");
}
#[test]
fn test_constraint_integration() {
let g = parse_ok(r#"root ::= [0-9]+"#);
let _constraint = GrammarConstraint::new(
g,
|id| {
if id < 128 {
vec![id as u8]
} else {
vec![]
}
},
128,
);
}
#[test]
fn test_multiple_rules() {
let src =
"root ::= noun verb noun\nnoun ::= \"cat\" | \"dog\"\nverb ::= \"chases\" | \"sees\"\n";
let g = parse_ok(src);
assert!(
g.nt_count >= 3,
"expected at least 3 NTs, got {}",
g.nt_count
);
let root_rules: Vec<_> = g.rules_for(g.start()).collect();
assert_eq!(root_rules.len(), 1);
let noun_id = *g.nt_names.iter().find(|(_, n)| *n == "noun").unwrap().0;
let verb_id = *g.nt_names.iter().find(|(_, n)| *n == "verb").unwrap().0;
assert_eq!(g.rules_for(noun_id).count(), 2);
assert_eq!(g.rules_for(verb_id).count(), 2);
}
#[test]
fn test_digit_word_pattern() {
let g = parse_ok(r#"root ::= [0-9]+ "." [0-9]+"#);
assert!(!g.rules.is_empty());
let root_name = g.nt_name(g.start()).to_string();
assert_eq!(root_name, "root");
assert!(
g.rules.iter().any(|r| r.is_epsilon()),
"plus quantifier must introduce ε rule"
);
}
#[test]
fn test_error_display_coverage() {
let cases: &[GbnfParseError] = &[
GbnfParseError::EmptyGrammar,
GbnfParseError::MissingRootRule,
GbnfParseError::UnknownRule("foo".to_string()),
GbnfParseError::UnexpectedChar {
line: 1,
col: 2,
ch: '!',
},
GbnfParseError::UnterminatedString,
GbnfParseError::UnterminatedCharClass,
GbnfParseError::InvalidEscape('z'),
GbnfParseError::UnsupportedFeature("test".to_string()),
GbnfParseError::RecursionLimit,
];
for e in cases {
assert!(
!e.to_string().is_empty(),
"Display for {e:?} must not be empty"
);
}
let e: &dyn std::error::Error = &GbnfParseError::EmptyGrammar;
assert!(e.source().is_none());
}