#[cfg(feature = "ABNF")]
use crate::ABNF;
use crate::error::Error;
use crate::generation::{GenerationStrategy, RandomWalk};
use crate::parsers::{self, BNF, Format};
use crate::production::Production;
use crate::term::Term;
use rand::{Rng, SeedableRng, rng, rngs::StdRng};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt::{self, Write};
use std::rc::Rc;
use std::str;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseTreeNode<'gram> {
Terminal(&'gram str),
Nonterminal(ParseTree<'gram>),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseTree<'gram> {
pub lhs: &'gram Term,
rhs: Vec<ParseTreeNode<'gram>>,
}
impl<'gram> ParseTree<'gram> {
pub(crate) const fn new(lhs: &'gram Term, rhs: Vec<ParseTreeNode<'gram>>) -> Self {
Self { lhs, rhs }
}
}
type ParseTreeFormatSet = crate::HashSet<usize>;
impl<'gram> ParseTree<'gram> {
fn fmt(
&self,
f: &mut fmt::Formatter<'_>,
depth_format_set: &mut ParseTreeFormatSet,
depth: usize,
is_last_child: bool,
) -> fmt::Result {
depth_format_set.insert(depth);
Self::fmt_node_prefix(f, depth_format_set, depth, is_last_child)?;
write!(f, "{} ::=", self.lhs)?;
for matched in &self.rhs {
match matched {
ParseTreeNode::Terminal(terminal) => write!(f, " \"{terminal}\"")?,
ParseTreeNode::Nonterminal(parse_tree) => write!(f, " {}", parse_tree.lhs)?,
}
}
writeln!(f)?;
let child_depth = depth + 1;
if let Some(last_child_idx) = self.rhs.len().checked_sub(1) {
for (idx, child) in self.rhs.iter().enumerate() {
let is_last_child = idx == last_child_idx;
if is_last_child {
depth_format_set.remove(&depth);
}
match child {
ParseTreeNode::Terminal(terminal) => {
Self::fmt_node_prefix(f, depth_format_set, child_depth, is_last_child)?;
writeln!(f, "\"{terminal}\"")?;
}
ParseTreeNode::Nonterminal(nonterminal) => {
nonterminal.fmt(f, depth_format_set, child_depth, is_last_child)?;
}
}
}
}
Ok(())
}
fn fmt_node_prefix(
f: &mut fmt::Formatter,
depth_format_set: &mut ParseTreeFormatSet,
depth: usize,
is_last_child: bool,
) -> fmt::Result {
const CHILD_PREFIX: &str = "├── ";
const GRANDCHILD_PREFIX: &str = "│ ";
const LAST_CHILD_PREFIX: &str = "└── ";
const LAST_GRANDCHILD_PREFIX: &str = " ";
for idx in 0..depth {
let prefix = if (idx + 1) == depth {
if is_last_child {
LAST_CHILD_PREFIX
} else {
CHILD_PREFIX
}
} else if depth_format_set.contains(&idx) {
GRANDCHILD_PREFIX
} else {
LAST_GRANDCHILD_PREFIX
};
write!(f, "{prefix}")?;
}
Ok(())
}
#[must_use]
pub const fn mermaid(&self) -> MermaidParseTree<'_> {
MermaidParseTree { parse_tree: self }
}
pub fn rhs_iter<'a>(&'a self) -> impl Iterator<Item = &'a ParseTreeNode<'gram>> {
self.rhs.iter()
}
pub fn rhs_iter_mut<'a>(&'a mut self) -> impl Iterator<Item = &'a mut ParseTreeNode<'gram>> {
self.rhs.iter_mut()
}
}
impl fmt::Display for ParseTree<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut depth_format_set = ParseTreeFormatSet::new();
self.fmt(f, &mut depth_format_set, 0, true)
}
}
const MERMAID_ESCAPE_CHARS: &[char] = &['"', '&', '#', '<', '>', '[', '\\', ']', ';', '\n', '\r'];
#[must_use]
pub fn escape_mermaid_label(s: &str) -> String {
let mut out = String::with_capacity(s.len().saturating_mul(2));
for c in s.chars() {
if MERMAID_ESCAPE_CHARS.contains(&c) {
write!(out, "#{};", c as u32).expect("write to String never fails");
} else {
out.push(c);
}
}
out
}
pub struct MermaidParseTree<'a> {
parse_tree: &'a ParseTree<'a>,
}
impl MermaidParseTree<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>, count: &mut usize) -> fmt::Result {
if *count == 0 {
writeln!(f, "flowchart TD")?;
}
let lhs = match self.parse_tree.lhs {
Term::Nonterminal(str) => str,
Term::Terminal(_) => unreachable!(),
};
let lhs_escaped = escape_mermaid_label(lhs);
let lhs_count = *count;
for rhs in &self.parse_tree.rhs {
*count += 1;
match rhs {
ParseTreeNode::Terminal(rhs) => {
let rhs_escaped = escape_mermaid_label(rhs);
writeln!(
f,
"{}[\"{}\"] --> {}[\"{}\"]",
lhs_count, lhs_escaped, *count, rhs_escaped
)?;
}
ParseTreeNode::Nonterminal(parse_tree) => {
let rhs = match parse_tree.lhs {
Term::Nonterminal(str) => str,
Term::Terminal(_) => unreachable!(),
};
let rhs_escaped = escape_mermaid_label(rhs);
writeln!(
f,
"{}[\"{}\"] --> {}[\"{}\"]",
lhs_count, lhs_escaped, *count, rhs_escaped
)?;
let mermaid = MermaidParseTree { parse_tree };
mermaid.fmt(f, count)?;
}
};
}
Ok(())
}
}
impl fmt::Display for MermaidParseTree<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut count = 0usize;
self.fmt(f, &mut count)
}
}
#[derive(Clone, Default, Debug, Eq, Hash, PartialEq)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
pub struct Grammar {
productions: Vec<Production>,
}
impl Grammar {
#[must_use]
pub const fn new() -> Grammar {
Grammar {
productions: vec![],
}
}
#[must_use]
pub const fn from_parts(v: Vec<Production>) -> Grammar {
Grammar { productions: v }
}
pub fn parse_from<F: Format>(input: &str) -> Result<Self, self::Error> {
match parsers::grammar_complete::<F>(input) {
Ok((_, g)) => Ok(g),
Err(e) => Err(Error::from(e)),
}
}
pub fn add_production(&mut self, prod: Production) {
self.productions.push(prod);
}
pub fn remove_production(&mut self, prod: &Production) -> Option<Production> {
self.productions
.iter()
.position(|x| *x == *prod)
.map(|pos| self.productions.swap_remove(pos))
}
pub fn productions_iter(&self) -> impl Iterator<Item = &Production> {
self.productions.iter()
}
pub(crate) const fn production_count(&self) -> usize {
self.productions.len()
}
pub fn productions_iter_mut(&mut self) -> impl Iterator<Item = &mut Production> {
self.productions.iter_mut()
}
pub fn validate(&self) -> Result<(), Error> {
if self.productions.is_empty() {
return Err(Error::ValidationError(
"Grammar must have at least one production".to_string(),
));
}
let mut sets = crate::validation::NonterminalSets::new();
for production in self.productions_iter() {
if let Term::Nonterminal(nt) = &production.lhs {
sets.record_lhs(nt.as_str());
}
for expression in production.rhs_iter() {
for term in expression.terms_iter() {
if let Term::Nonterminal(nt) = term {
sets.record_rhs(nt.as_str());
}
}
}
}
if let Some(undefined) = sets.undefined().next() {
return Err(Error::ValidationError(format!(
"Undefined nonterminals: <{undefined}>"
)));
}
Ok(())
}
pub fn build_parser(&self) -> Result<crate::GrammarParser<'_>, Error> {
crate::GrammarParser::new(self)
}
#[deprecated(
since = "0.6.0",
note = "Use Grammar::build_parser() and GrammarParser::parse_input() instead for validation and reusability"
)]
pub fn parse_input<'gram>(
&'gram self,
input: &'gram str,
) -> impl Iterator<Item = ParseTree<'gram>> {
let parser = Rc::new(crate::GrammarParser::new_unchecked(self));
crate::earley::parse_with_parser_rc(parser, input, None)
}
#[deprecated(
since = "0.6.0",
note = "Use Grammar::build_parser() and GrammarParser::parse_input_starting_with() instead for validation and reusability"
)]
pub fn parse_input_starting_with<'gram>(
&'gram self,
input: &'gram str,
starting_term: &'gram Term,
) -> impl Iterator<Item = ParseTree<'gram>> {
let parser = Rc::new(crate::GrammarParser::new_unchecked(self));
crate::earley::parse_with_parser_rc(parser, input, Some(starting_term))
}
pub(crate) fn starting_term(&self) -> Option<&Term> {
self.productions_iter().next().map(|prod| &prod.lhs)
}
fn eval_terminal_with_strategy<S: GenerationStrategy>(
&self,
term: &Term,
f: &impl Fn(&str, &str) -> bool,
strategy: &mut S,
depth: usize,
) -> Result<String, Error> {
match term {
Term::Nonterminal(nt) => self.traverse_with_strategy(nt, f, strategy, depth + 1),
Term::Terminal(t) => Ok(t.clone()),
}
}
fn traverse_with_strategy<S: GenerationStrategy>(
&self,
ident: &str,
f: &impl Fn(&str, &str) -> bool,
strategy: &mut S,
depth: usize,
) -> Result<String, Error> {
loop {
let production = match self
.productions_iter()
.find(|p| matches!(&p.lhs, Term::Nonterminal(s) if s.as_str() == ident))
{
Some(p) => p,
None => return Ok(ident.to_string()),
};
fn production_has_no_choice(production: &Production) -> bool {
if production.is_empty() {
return true;
}
if production.len() == 1 {
let only = production.rhs_iter().next().expect("len is 1");
return only.terms_iter().next().is_none();
}
false
}
if production_has_no_choice(production) {
return Err(Error::GenerateError(String::from(
"Production has no choosable alternative!",
)));
}
let Some(expression) = strategy.choose(production, depth) else {
return Err(Error::GenerateError(String::from(
"Couldn't select random Expression!",
)));
};
debug_assert!(
production.rhs_iter().any(|e| std::ptr::eq(e, expression)),
"strategy returned an expression not from the provided production"
);
let mut result = String::new();
for term in expression.terms_iter() {
match self.eval_terminal_with_strategy(term, f, strategy, depth) {
Ok(s) => result.push_str(&s),
Err(e) => return Err(e),
}
}
if f(ident, &result) {
return Ok(result);
}
}
}
pub fn generate_seeded(&self, rng: &mut StdRng) -> Result<String, Error> {
let mut seed = [0u8; 32];
rng.fill(&mut seed);
let mut strategy = RandomWalk::from_seed(seed);
self.generate_seeded_with_strategy(&mut strategy)
}
pub fn generate_seeded_callback(
&self,
rng: &mut StdRng,
f: impl Fn(&str, &str) -> bool,
) -> Result<String, Error> {
let mut seed = [0u8; 32];
rng.fill(&mut seed);
let mut strategy = RandomWalk::from_seed(seed);
self.generate_seeded_callback_with_strategy(f, &mut strategy)
}
pub fn generate_seeded_with_strategy<S: GenerationStrategy>(
&self,
strategy: &mut S,
) -> Result<String, Error> {
self.generate_seeded_callback_with_strategy(|_, _| true, strategy)
}
pub fn generate_seeded_callback_with_strategy<S: GenerationStrategy>(
&self,
f: impl Fn(&str, &str) -> bool,
strategy: &mut S,
) -> Result<String, Error> {
if !self.terminates() {
return Err(Error::GenerateError(
"Can't generate, first rule in grammar doesn't lead to a terminal state"
.to_string(),
));
}
let start_rule = match self.productions_iter().next() {
Some(term) => match &term.lhs {
Term::Nonterminal(nt) => nt.clone(),
Term::Terminal(_) => {
return Err(Error::GenerateError(format!(
"Terminal type cannot define a production in '{term}'!"
)));
}
},
None => {
return Err(Error::GenerateError(String::from(
"Failed to get first production!",
)));
}
};
self.traverse_with_strategy(&start_rule, &f, strategy, 0)
}
pub fn generate(&self) -> Result<String, Error> {
self.generate_callback(|_, _| true)
}
pub fn generate_callback(&self, f: impl Fn(&str, &str) -> bool) -> Result<String, Error> {
let mut seed: [u8; 32] = [0; 32];
rng().fill(&mut seed);
let mut rng: StdRng = SeedableRng::from_seed(seed);
self.generate_seeded_callback(&mut rng, f)
}
pub(crate) fn terminates(&self) -> bool {
let Some(starting_term) = self.starting_term() else {
return true;
};
let mut terminating_rules: Vec<&Term> = vec![];
for p in self.productions.iter() {
if p.has_terminating_expression(None) {
terminating_rules.push(&p.lhs);
}
}
if terminating_rules.is_empty() {
return false;
}
let mut is_progress = true;
while is_progress {
is_progress = false;
for prod in self.productions_iter() {
let marked = terminating_rules.contains(&&prod.lhs);
if marked {
continue;
}
let terminates = prod.has_terminating_expression(Some(&terminating_rules));
if terminates {
if &prod.lhs == starting_term {
return true;
}
terminating_rules.push(&prod.lhs);
is_progress = true;
}
}
}
terminating_rules.contains(&starting_term)
}
}
impl fmt::Display for Grammar {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(
f,
"{}",
self.productions
.iter()
.map(std::string::ToString::to_string)
.collect::<Vec<_>>()
.join("\n")
)
}
}
impl str::FromStr for Grammar {
type Err = Error;
#[cfg(feature = "ABNF")]
fn from_str(s: &str) -> Result<Self, Self::Err> {
match parsers::is_format_standard_bnf(s) {
true => match parsers::grammar_complete::<BNF>(s) {
Ok((_, g)) => Ok(g),
Err(e) => Err(Error::from(e)),
},
false => match parsers::grammar_complete::<ABNF>(s) {
Ok((_, g)) => Ok(g),
Err(e) => Err(Error::from(e)),
},
}
}
#[cfg(not(feature = "ABNF"))]
#[cfg_attr(test, mutants::skip)]
fn from_str(s: &str) -> Result<Self, Self::Err> {
match parsers::grammar_complete::<BNF>(s) {
Ok((_, g)) => Ok(g),
Err(e) => Err(Error::from(e)),
}
}
}
#[macro_export]
macro_rules! grammar {
() => {
$crate::Grammar::new()
};
(<$lhs:ident> ::= $($rest:tt)*) => {
{
let productions = $crate::grammar!(@collect_prods [[] [] [$lhs]] $($rest)*);
$crate::Grammar::from_parts(productions)
}
};
(@collect_prods [[$($curr_terms:expr),*] [$($curr_exprs:tt)*] [$curr_lhs:ident] $($prev_prods:tt)*] ; <$next_lhs:ident> ::= $($rest:tt)*) => {
{
$crate::grammar!(@collect_prods [[] [] [$next_lhs] [[$($curr_terms),*] $($curr_exprs)*] [$curr_lhs] $($prev_prods)*] $($rest)*)
}
};
(@collect_prods [[$($curr_terms:expr),*] [$($curr_exprs:tt)*] [$curr_lhs:ident] $($prev_prods:tt)*] ;) => {
$crate::grammar!(@collect_prods [[$($curr_terms),*] [$($curr_exprs)*] [$curr_lhs] $($prev_prods)*])
};
(@collect_prods [[$($curr_terms:expr),*] [$($curr_exprs:tt)*] [$lhs:ident] $($prev_prods:tt)*] | $($rest:tt)*) => {
$crate::grammar!(@collect_prods [[] [[$($curr_terms),*] $($curr_exprs)*] [$lhs] $($prev_prods)*] $($rest)*)
};
(@collect_prods [[$($curr_terms:expr),*] $($state:tt)*] $t:literal $($rest:tt)*) => {
$crate::grammar!(@collect_prods [[$($curr_terms,)* $crate::term!($t)] $($state)*] $($rest)*)
};
(@collect_prods [[$($curr_terms:expr),*] $($state:tt)*] <$nt:ident> $($rest:tt)*) => {
$crate::grammar!(@collect_prods [[$($curr_terms,)* $crate::term!(<$nt>)] $($state)*] $($rest)*)
};
(@collect_prods [[$($last_terms:expr),*] [$($last_exprs:tt)*] [$last_lhs:ident] $([$($prod_exprs:tt)*] [$prod_lhs:ident])*]) => {
{
#[allow(clippy::vec_init_then_push)]
{
let mut prods = vec![];
$(
let mut exprs = vec![];
$crate::grammar!(@build_exprs exprs $($prod_exprs)*);
prods.push($crate::Production::from_parts(
$crate::term!(<$prod_lhs>),
exprs,
));
)*
let mut last_exprs = vec![];
$crate::grammar!(@build_exprs last_exprs $($last_exprs)* [$($last_terms),*]);
prods.push($crate::Production::from_parts(
$crate::term!(<$last_lhs>),
last_exprs,
));
prods
}
}
};
(@build_exprs $exprs:ident $([$($terms:expr),*])*) => {
#[allow(clippy::vec_init_then_push)]
{
$(
$exprs.push($crate::Expression::from_parts(vec![$($terms),*]));
)*
}
};
}
#[cfg(test)]
#[allow(deprecated)]
mod tests {
use super::*;
use crate::expression::Expression;
use crate::generation::{
CoverageGuided, DepthBounded, GenerationStrategy, RandomWalk, Weighted,
};
use crate::production::Production;
use crate::term::Term;
use quickcheck::{Arbitrary, Gen, QuickCheck, TestResult};
impl Arbitrary for Grammar {
fn arbitrary(g: &mut Gen) -> Self {
let mut productions = Vec::<Production>::arbitrary(g);
if productions.is_empty() {
productions.push(Production::arbitrary(g));
}
Grammar { productions }
}
}
fn prop_to_string_and_back(gram: Grammar) -> TestResult {
let to_string = gram.to_string();
let from_str: Result<Grammar, _> = to_string.parse();
match from_str {
Ok(from_prod) => TestResult::from_bool(from_prod == gram),
_ => TestResult::error(format!("{gram} to string and back should be safe")),
}
}
#[test]
fn to_string_and_back() {
QuickCheck::new()
.tests(1000)
.r#gen(Gen::new(12usize))
.quickcheck(prop_to_string_and_back as fn(Grammar) -> TestResult);
}
#[test]
fn new_grammars() {
let lhs1 = Term::Nonterminal(String::from("STRING A"));
let rhs1 = Expression::from_parts(vec![
Term::Terminal(String::from("STRING B")),
Term::Nonterminal(String::from("STRING C")),
]);
let p1 = Production::from_parts(lhs1, vec![rhs1]);
let lhs2 = Term::Nonterminal(String::from("STRING A"));
let rhs2 = Expression::from_parts(vec![
Term::Terminal(String::from("STRING B")),
Term::Nonterminal(String::from("STRING C")),
]);
let p2 = Production::from_parts(lhs2, vec![rhs2]);
let mut g1 = Grammar::new();
g1.add_production(p1.clone());
g1.add_production(p2.clone());
let g2 = Grammar::from_parts(vec![p1, p2]);
assert_eq!(g1, g2);
}
#[test]
fn add_production() {
let production = crate::production!(<dna> ::= "base" | "base" <dna>);
let productions = vec![production.clone()];
let mut grammar = Grammar::new();
assert_eq!(grammar.productions_iter().count(), 0);
grammar.add_production(production);
assert_eq!(grammar.productions_iter().count(), 1);
let filled_grammar = Grammar::from_parts(productions);
assert_eq!(grammar, filled_grammar);
}
#[test]
fn remove_production() {
let production = crate::production!(<dna> ::= "base" | "base" <dna>);
let productions = vec![production.clone()];
let mut grammar = Grammar::from_parts(productions.clone());
assert_eq!(
Some(&production),
grammar.productions_iter().find(|&prod| *prod == production)
);
assert_eq!(grammar.productions_iter().count(), productions.len());
let removed = grammar.remove_production(&production);
assert_eq!(removed, Some(production.clone()));
assert_eq!(grammar.productions_iter().count(), productions.len() - 1);
assert_eq!(
None,
grammar.productions_iter().find(|&prod| *prod == production)
);
}
#[test]
fn remove_nonexistent_production() {
let production = crate::production!(<dna> ::= "base" | "base" <dna>);
let productions = vec![production.clone()];
let mut grammar = Grammar::from_parts(productions.clone());
let unused = crate::production!(<nonexistent> ::=);
assert_eq!(
Some(&production),
grammar.productions_iter().find(|&prod| *prod == production)
);
assert_eq!(grammar.productions_iter().count(), productions.len());
let removed = grammar.remove_production(&unused);
assert_eq!(removed, None);
assert_eq!(grammar.productions_iter().count(), productions.len());
assert_eq!(
None,
grammar.productions_iter().find(|&prod| *prod == unused)
);
}
#[test]
fn validate_fails_for_empty_grammar() {
let grammar = Grammar::from_parts(vec![]);
let result = grammar.validate();
assert!(result.is_err(), "validate should fail for empty grammar");
assert!(matches!(result.unwrap_err(), Error::ValidationError(_)));
}
#[test]
fn validate_succeeds_for_valid_grammar() {
let grammar: Grammar = "<start> ::= <a> | <b>
<a> ::= 'a'
<b> ::= 'b'"
.parse()
.unwrap();
assert!(grammar.validate().is_ok());
}
#[test]
fn validate_fails_for_undefined_nonterminal() {
let grammar: Grammar = "<start> ::= <a> | <b>
<a> ::= 'a'"
.parse()
.unwrap();
let result = grammar.validate();
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), Error::ValidationError(_)));
}
#[test]
fn validate_error_message_contains_undefined_nonterminal() {
let grammar: Grammar = "<start> ::= <undefined_nt>".parse().unwrap();
let err = grammar.validate().unwrap_err();
let Error::ValidationError(msg) = err else {
panic!("expected ValidationError");
};
assert!(
msg.contains("<undefined_nt>"),
"message should mention undefined nonterminal: {msg}"
);
}
#[test]
fn parse_error() {
let grammar: Result<Grammar, _> = "<almost_grammar> ::= <test".parse();
assert!(grammar.is_err(), "{grammar:?} should be error");
}
#[test]
fn parse_error_on_incomplete() {
let result: Result<Grammar, _> = "".parse();
assert!(
matches!(result, Err(Error::ParseError(_))),
"{result:?} should be ParseError"
);
}
#[test]
fn parse_from_bnf_format() {
use crate::parsers::BNF;
let input = "<dna> ::= <base> | <base> <dna>\n<base> ::= 'A' | 'C' | 'G' | 'T'";
let result = Grammar::parse_from::<BNF>(input);
assert!(result.is_ok(), "Should parse BNF format");
let grammar = result.unwrap();
assert!(grammar.productions_iter().count() >= 2);
}
#[cfg(feature = "ABNF")]
#[test]
fn parse_from_abnf_format() {
use crate::parsers::ABNF;
let input = "dna = base / base dna\nbase = \"A\" / \"C\" / \"G\" / \"T\"";
let result = Grammar::parse_from::<ABNF>(input);
assert!(result.is_ok(), "Should parse ABNF format");
let grammar = result.unwrap();
assert!(grammar.productions_iter().count() >= 2);
}
#[test]
fn parse_from_error() {
use crate::parsers::BNF;
let input = "<incomplete";
let result = Grammar::parse_from::<BNF>(input);
assert!(result.is_err(), "Should fail on incomplete input");
assert!(matches!(result.unwrap_err(), Error::ParseError(_)));
}
#[test]
fn lhs_is_terminal_parse() {
let grammar: Result<Grammar, _> = "\"wrong place\" ::= <not-used>".parse();
assert!(grammar.is_err(), "{grammar:?} should be error");
}
#[test]
fn lhs_is_terminal_generate() {
let lhs = Term::Terminal(String::from("\"bad LHS\""));
let terminal = Term::Terminal(String::from("\"good RHS\""));
let expression = Expression::from_parts(vec![terminal]);
let production = Production::from_parts(lhs, vec![expression]);
let grammar = Grammar::from_parts(vec![production]);
let sentence = grammar.generate();
assert!(sentence.is_err(), "{sentence:?} should be error");
}
#[test]
fn no_productions() {
let grammar = Grammar::from_parts(vec![]);
let sentence = grammar.generate();
assert!(sentence.is_err(), "{sentence:?} should be error");
}
#[test]
fn no_expressions() {
let lhs = Term::Terminal(String::from("<good-lhs>"));
let production = Production::from_parts(lhs, vec![]);
let grammar = Grammar::from_parts(vec![production]);
let sentence = grammar.generate();
assert!(sentence.is_err(), "{sentence:?} should be error");
}
#[test]
fn generate_with_groups_and_optionals() {
let grammar: Grammar = "
<s> ::= (<a> | <b>) [<c>]
<a> ::= 'a'
<b> ::= 'b'
<c> ::= 'c'"
.parse()
.unwrap();
let valid: crate::HashSet<String> = ["a", "b", "ac", "bc"]
.into_iter()
.map(String::from)
.collect();
for _ in 0..50 {
let s = grammar.generate().expect("generate should succeed");
assert!(
valid.contains(&s),
"generated '{s}' should be one of a, b, ac, bc"
);
}
}
struct BuggyStrategy<'a> {
other_production: &'a Production,
}
impl GenerationStrategy for BuggyStrategy<'_> {
fn choose<'p>(
&mut self,
_production: &'p Production,
_depth: usize,
) -> Option<&'p Expression> {
let wrong = self.other_production.rhs_iter().next();
unsafe { std::mem::transmute(wrong) }
}
}
#[test]
#[cfg(debug_assertions)]
#[should_panic(expected = "strategy returned an expression not from the provided production")]
fn strategy_must_return_expression_from_provided_production() {
let grammar: Grammar = "<s> ::= 'a'
<x> ::= 'b'"
.parse()
.unwrap();
let other_production = grammar
.productions_iter()
.nth(1)
.expect("grammar has two productions");
let mut strategy = BuggyStrategy { other_production };
grammar
.generate_seeded_with_strategy(&mut strategy)
.expect("test should panic in debug before returning");
}
#[test]
fn generate_seeded_with_strategy_random_walk() {
let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= 'A' | 'C' | 'G' | 'T'"
.parse()
.unwrap();
let mut strategy = RandomWalk::default();
let s = grammar
.generate_seeded_with_strategy(&mut strategy)
.expect("generate should succeed");
assert!(
s.chars().all(|c| matches!(c, 'A' | 'C' | 'G' | 'T')),
"RandomWalk should produce valid DNA: {s}"
);
}
#[test]
fn generate_seeded_with_strategy_depth_bounded() {
let grammar: Grammar = "<s> ::= 'a' | '(' <s> ')'
"
.parse()
.unwrap();
let mut strategy = DepthBounded::new(1);
for _ in 0..20 {
let s = grammar
.generate_seeded_with_strategy(&mut strategy)
.expect("generate should succeed");
assert!(
s == "a" || s == "(a)",
"DepthBounded(1) should produce only 'a' or '(a)', got {s:?}"
);
}
}
#[test]
fn generate_seeded_with_strategy_coverage_guided() {
let grammar: Grammar = "<s> ::= 'a' | 'b' | 'c'
"
.parse()
.unwrap();
let mut strategy = CoverageGuided::new();
let mut seen = crate::HashSet::new();
for _ in 0..30 {
let s = grammar
.generate_seeded_with_strategy(&mut strategy)
.expect("generate should succeed");
seen.insert(s);
}
assert!(
seen.len() >= 2,
"CoverageGuided should produce multiple distinct outputs, got {seen:?}"
);
}
#[test]
fn generate_seeded_with_strategy_weighted() {
let grammar: Grammar = "<s> ::= 'x' | <s> 'x'
"
.parse()
.unwrap();
let mut strategy_heavy = Weighted::new(100, 1);
let lengths_heavy: Vec<usize> = (0..20)
.map(|_| {
grammar
.generate_seeded_with_strategy(&mut strategy_heavy)
.expect("generate should succeed")
.len()
})
.collect();
let mut strategy_light = Weighted::new(1, 100);
let lengths_light: Vec<usize> = (0..20)
.map(|_| {
grammar
.generate_seeded_with_strategy(&mut strategy_light)
.expect("generate should succeed")
.len()
})
.collect();
let avg_heavy: usize = lengths_heavy.iter().sum::<usize>() / lengths_heavy.len();
let avg_light: usize = lengths_light.iter().sum::<usize>() / lengths_light.len();
assert!(
avg_heavy >= avg_light,
"Weighted(100,1) should tend to longer strings than Weighted(1,100); heavy avg {avg_heavy}, light avg {avg_light}"
);
}
#[test]
fn generate_seeded_callback_with_strategy_non_terminating_grammar() {
let grammar: Grammar = "<nonterm> ::= <nonterm>".parse().unwrap();
let mut strategy = RandomWalk::default();
let result = grammar.generate_seeded_callback_with_strategy(|_, _| true, &mut strategy);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.to_string()
.contains("first rule in grammar doesn't lead to a terminal state"),
"expected non-terminating error, got: {err}"
);
}
#[test]
fn generate_seeded_callback_with_strategy_no_productions() {
let grammar = Grammar::from_parts(vec![]);
let mut strategy = RandomWalk::default();
let result = grammar.generate_seeded_callback_with_strategy(|_, _| true, &mut strategy);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.to_string().contains("Failed to get first production"),
"expected no productions error, got: {err}"
);
}
#[test]
fn generate_seeded_callback_with_strategy_terminal_lhs() {
let lhs = Term::Terminal(String::from("\"bad LHS\""));
let terminal = Term::Terminal(String::from("\"good RHS\""));
let expression = Expression::from_parts(vec![terminal]);
let production = Production::from_parts(lhs, vec![expression]);
let grammar = Grammar::from_parts(vec![production]);
let mut strategy = RandomWalk::default();
let result = grammar.generate_seeded_callback_with_strategy(|_, _| true, &mut strategy);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.to_string()
.contains("Terminal type cannot define a production"),
"expected terminal LHS error, got: {err}"
);
}
#[test]
fn generate_seeded_callback_with_strategy_empty_rhs() {
let production = Production::from_parts(Term::Nonterminal("s".to_string()), vec![]);
let grammar = Grammar::from_parts(vec![production]);
let mut strategy = RandomWalk::default();
let result = grammar.generate_seeded_callback_with_strategy(|_, _| true, &mut strategy);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.to_string()
.contains("first rule in grammar doesn't lead to a terminal state")
|| err
.to_string()
.contains("Couldn't select random Expression"),
"expected termination or no-choice error, got: {err}"
);
}
#[test]
fn format_grammar() {
let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= 'A' | 'C' | 'G' | 'T'"
.parse()
.unwrap();
let format = format!("{grammar}");
assert_eq!(
format,
"<dna> ::= <base> | <base> <dna>\n<base> ::= 'A' | 'C' | 'G' | 'T'\n"
);
}
#[test]
fn parse_tree_fmt_empty_rhs() {
let lhs = Term::Nonterminal("root".to_string());
let tree = ParseTree::new(&lhs, vec![]);
let s = tree.to_string();
assert!(s.contains("root"));
assert!(s.contains("::="));
}
#[test]
fn parse_tree_fmt_single_child() {
let lhs = Term::Nonterminal("root".to_string());
let child = ParseTreeNode::Terminal("x");
let tree = ParseTree::new(&lhs, vec![child]);
let s = tree.to_string();
assert!(s.contains("root"));
assert!(s.contains("::="));
assert!(s.contains("\"x\""));
}
#[test]
fn iterate_parse_tree() {
let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= 'A' | 'C' | 'G' | 'T'"
.parse()
.unwrap();
let input = "GATTACA";
let parse_tree = grammar.parse_input(input).next().unwrap();
let rhs_iterated = parse_tree.rhs_iter().next().unwrap();
assert_eq!(parse_tree.rhs.first().unwrap(), rhs_iterated);
}
#[test]
fn iterate_mut_parse_tree() {
let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= 'A' | 'C' | 'G' | 'T'"
.parse()
.unwrap();
let input = "GATTACA";
let mut parse_tree = grammar.parse_input(input).next().unwrap();
let rhs_iterated = parse_tree.rhs_iter_mut().next().unwrap();
*rhs_iterated = ParseTreeNode::Terminal("Z");
assert_eq!(
parse_tree.rhs.first().unwrap(),
&ParseTreeNode::Terminal("Z")
);
}
#[test]
fn does_not_terminate() {
let mut grammar: Grammar = "<nonterm> ::= <nonterm>".parse().unwrap();
assert!(!grammar.terminates());
grammar = "
<A> ::= <X> | <A> <X>
<X> ::= <Y> | <X> <Y>
<Y> ::= <Y> <Z>
<Z> ::= 'terminating state!'"
.parse()
.unwrap();
assert!(!grammar.terminates());
grammar = "
<not-a-good-first-state-lhs> ::= <not-a-good-first-state-rhs>
<A> ::= <X> | <A> <X>
<X> ::= <Y> | <X> <Y>
<Y> ::= <Z> | <Y> <Z>
<Z> ::= 'terminating state!'"
.parse()
.unwrap();
assert!(!grammar.terminates());
}
#[test]
fn does_terminate() {
let mut grammar: Grammar;
grammar = "<nonterm> ::= 'term'".parse().unwrap();
assert!(grammar.terminates());
grammar = "
<A> ::= <B> | <A> <B>
<B> ::= <C> | <B> <C>
<C> ::= <D> | <C> <D>
<D> ::= <E> | <D> <E>
<E> ::= <F> | <E> <F>
<F> ::= <G> | <F> <G>
<G> ::= <H> | <G> <H>
<H> ::= <I> | <H> <I>
<I> ::= <J> | <I> <J>
<J> ::= <K> | <J> <K>
<K> ::= <L> | <K> <L>
<L> ::= <M> | <L> <M>
<M> ::= <N> | <M> <N>
<N> ::= <O> | <N> <O>
<O> ::= <P> | <O> <P>
<P> ::= <Q> | <P> <Q>
<Q> ::= <R> | <Q> <R>
<R> ::= <S> | <R> <S>
<S> ::= <T> | <S> <T>
<T> ::= <U> | <T> <U>
<U> ::= <V> | <U> <V>
<V> ::= <W> | <V> <W>
<W> ::= <X> | <W> <X>
<X> ::= <Y> | <X> <Y>
<Y> ::= <Z> | <Y> <Z>
<Z> ::= 'terminating state!'"
.parse()
.unwrap();
assert!(grammar.terminates());
}
#[test]
fn macro_construct() {
let grammar = crate::grammar! {
<dna> ::= <base> | <base> <dna> ;
<base> ::= 'A' | 'C' | 'G' | 'T' ;
};
let expected = crate::grammar! {
<dna> ::= <base> | <base> <dna> ;
<base> ::= 'A' | 'C' | 'G' | 'T' ;
};
assert_eq!(grammar, expected);
}
#[test]
fn unicode_grammar_parsing() {
let grammar: Result<Grammar, _> = "<emoji> ::= '😀' | '😍' | '🎉' | '🚀'
<symbol> ::= 'α' | 'β' | 'γ' | 'δ'
<text> ::= <emoji> | <symbol> | <emoji> <text>"
.parse();
assert!(grammar.is_ok(), "Should parse Unicode grammar: {grammar:?}");
let grammar = grammar.unwrap();
assert_eq!(grammar.productions_iter().count(), 3);
}
#[test]
fn unicode_grammar_generation() {
let grammar: Grammar = "<emoji> ::= '😀' | '😍' | '🎉' | '🚀'
<symbol> ::= 'α' | 'β' | 'γ' | 'δ'
<text> ::= <emoji> | <symbol> | <emoji> <text>"
.parse()
.unwrap();
let sentence = grammar.generate();
assert!(
sentence.is_ok(),
"Should generate Unicode sentence: {sentence:?}"
);
let sentence = sentence.unwrap();
assert!(
sentence.contains('😀')
|| sentence.contains('😍')
|| sentence.contains('🎉')
|| sentence.contains('🚀')
|| sentence.contains('α')
|| sentence.contains('β')
|| sentence.contains('γ')
|| sentence.contains('δ')
);
}
#[test]
fn unicode_input_parsing() {
let grammar: Grammar = "<text> ::= <emoji> | <emoji> <text>
<emoji> ::= '😀' | '😍' | '🎉' | '🚀'"
.parse()
.unwrap();
let input = "😀🎉";
let mut parse_trees = grammar.parse_input(input);
assert!(
parse_trees.next().is_some(),
"Should parse Unicode input '{input}'"
);
}
#[test]
fn unicode_parse_tree_display() {
let grammar: Grammar = "<text> ::= <emoji> | <emoji> <text>
<emoji> ::= '😀' | '😍' | '🎉' | '🚀'"
.parse()
.unwrap();
let input = "😀😍";
let parse_tree = grammar.parse_input(input).next().unwrap();
let display = parse_tree.to_string();
assert!(display.contains('😀') || display.contains('😍'));
}
#[test]
fn unicode_parse_tree_iteration() {
let grammar: Grammar = "<text> ::= <emoji> | <emoji> <text>
<emoji> ::= '😀' | '😍' | '🎉' | '🚀'"
.parse()
.unwrap();
let input = "😀😍🎉";
let parse_tree = grammar.parse_input(input).next().unwrap();
let rhs_count = parse_tree.rhs_iter().count();
assert!(rhs_count > 0, "Should have RHS nodes in Unicode parse tree");
let mut parse_tree_clone = parse_tree.clone();
let mut rhs_iter = parse_tree_clone.rhs_iter_mut();
assert!(
rhs_iter.next().is_some(),
"Should be able to iterate mutably over Unicode parse tree"
);
}
#[test]
fn unicode_mermaid_format() {
let grammar: Grammar = "<text> ::= <emoji> | <emoji> <text>
<emoji> ::= '😀' | '😍' | '🎉' | '🚀'"
.parse()
.unwrap();
let input = "😀😍";
let parse_tree = grammar.parse_input(input).next().unwrap();
let mermaid = parse_tree.mermaid().to_string();
assert!(
mermaid.contains("flowchart TD"),
"Mermaid should contain flowchart declaration"
);
assert!(
mermaid.contains("-->"),
"Mermaid should contain connection arrows"
);
}
#[test]
fn unicode_complex_grammar() {
let grammar: Grammar = "<greeting> ::= 'Hello' | 'Hola' | 'Bonjour' | '😀'
<name> ::= 'Alice' | 'Bob' | 'Charlie' | 'αβγ'
<punctuation> ::= '.' | '!' | '?' | '🎉'
<sentence> ::= <greeting> <name> <punctuation> | <greeting> <sentence>"
.parse()
.unwrap();
let sentence = grammar.generate();
assert!(
sentence.is_ok(),
"Should generate from complex Unicode grammar: {sentence:?}"
);
let sentence = sentence.unwrap();
assert!(
sentence.contains("Hello")
|| sentence.contains("Hola")
|| sentence.contains("Bonjour")
|| sentence.contains('😀')
|| sentence.contains("Alice")
|| sentence.contains("Bob")
|| sentence.contains("Charlie")
|| sentence.contains("αβγ")
|| sentence.contains('.')
|| sentence.contains('!')
|| sentence.contains('?')
|| sentence.contains('🎉')
);
}
#[test]
fn unicode_nonterminal_names() {
let grammar: Grammar = "<文本> ::= <表情> | <符号> | <表情> <文本>
<表情> ::= '😀' | '😍'
<符号> ::= 'α' | 'β'"
.parse()
.unwrap();
let sentence = grammar.generate();
assert!(
sentence.is_ok(),
"Should generate from grammar with Unicode nonterminals: {sentence:?}"
);
let sentence = sentence.unwrap();
assert!(
sentence.contains('😀')
|| sentence.contains('😍')
|| sentence.contains('α')
|| sentence.contains('β')
);
let input = "😀α";
let mut parse_trees = grammar.parse_input(input);
assert!(
parse_trees.next().is_some(),
"Should parse Unicode input with Unicode nonterminals: '{input}'"
);
}
#[test]
fn parse_starting_with() {
let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
<dna> ::= <base> | <base> <dna>"
.parse()
.unwrap();
let input = "GATTACA";
assert!(
grammar
.parse_input_starting_with(input, &crate::term!(<dna>))
.next()
.is_some()
);
assert!(
grammar
.parse_input_starting_with(input, &crate::term!(<base>))
.next()
.is_none()
);
}
#[test]
fn parse_starting_with_not_found_production() {
let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
<dna> ::= <base> | <base> <dna>"
.parse()
.unwrap();
let input = "GATTACA";
assert!(
grammar
.parse_input_starting_with(input, &crate::term!(<notfound>))
.next()
.is_none()
)
}
#[test]
fn escape_mermaid_label_identity() {
assert_eq!(escape_mermaid_label("abc"), "abc");
assert_eq!(escape_mermaid_label("sum"), "sum");
assert_eq!(escape_mermaid_label("G"), "G");
}
#[test]
fn escape_mermaid_label_double_quote() {
assert_eq!(escape_mermaid_label("\""), "#34;");
assert_eq!(escape_mermaid_label("a\"b"), "a#34;b");
}
#[test]
fn escape_mermaid_label_brackets() {
assert_eq!(escape_mermaid_label("["), "#91;");
assert_eq!(escape_mermaid_label("]"), "#93;");
assert_eq!(escape_mermaid_label("[x]"), "#91;x#93;");
}
#[test]
fn escape_mermaid_label_backslash_and_hash_semicolon() {
assert_eq!(escape_mermaid_label("\\"), "#92;");
assert_eq!(escape_mermaid_label("#"), "#35;");
assert_eq!(escape_mermaid_label(";"), "#59;");
assert_eq!(escape_mermaid_label("#34;"), "#35;34#59;");
}
#[test]
fn escape_mermaid_label_angle_brackets_and_ampersand() {
assert_eq!(escape_mermaid_label("<"), "#60;");
assert_eq!(escape_mermaid_label(">"), "#62;");
assert_eq!(escape_mermaid_label("&"), "#38;");
assert_eq!(escape_mermaid_label("<foo>"), "#60;foo#62;");
assert_eq!(escape_mermaid_label("a&b"), "a#38;b");
}
#[test]
fn escape_mermaid_label_newlines() {
assert_eq!(escape_mermaid_label("\n"), "#10;");
assert_eq!(escape_mermaid_label("\r"), "#13;");
assert_eq!(escape_mermaid_label("a\nb"), "a#10;b");
}
#[test]
fn escape_mermaid_label_mixed() {
assert_eq!(
escape_mermaid_label(r#"["];\#""#),
"#91;#34;#93;#59;#92;#35;#34;"
);
}
#[test]
fn mermaid_entity_codes_double_quote_in_label() {
let grammar: Grammar = r#"<q> ::= '"'"#.parse().unwrap();
let input = "\"";
let parse_tree = grammar.parse_input(input).next().unwrap();
let mermaid = parse_tree.mermaid().to_string();
assert!(
mermaid.contains("#34;"),
"mermaid should escape quote as #34;"
);
for line in mermaid.lines() {
if line.contains("-->") {
let quote_count = line.matches('"').count();
assert_eq!(
quote_count, 4,
"each edge line must have 4 quotes (balanced labels); line: {line:?}"
);
}
}
}
#[test]
fn mermaid_entity_codes_brackets_in_label() {
let grammar: Grammar = "<b> ::= ']' | '['
"
.parse()
.unwrap();
for input in ["]", "["] {
let parse_tree = grammar.parse_input(input).next().unwrap();
let mermaid = parse_tree.mermaid().to_string();
assert!(
mermaid.contains("#91;") || mermaid.contains("#93;"),
"mermaid should escape brackets"
);
for line in mermaid.lines() {
if line.contains("-->") {
let quote_count = line.matches('"').count();
assert_eq!(quote_count, 4, "line: {line:?}");
}
}
}
}
}