pub(crate) mod grammar;
use crate::ParseTree;
use crate::error::Error;
use crate::grammar::Grammar;
use crate::term::Term;
use crate::tracing;
use grammar::ParseGrammar;
use std::rc::Rc;
#[derive(Debug)]
pub struct GrammarParser<'gram> {
pub(crate) starting_term: &'gram Term,
pub(crate) parse_grammar: Rc<ParseGrammar<'gram>>,
}
impl<'gram> GrammarParser<'gram> {
pub fn new(grammar: &'gram Grammar) -> Result<Self, Error> {
let _span = tracing::span!(DEBUG, "GrammarParser::new").entered();
let starting_term = grammar.starting_term().ok_or_else(|| {
Error::ValidationError("Grammar must have at least one production".to_string())
})?;
let parse_grammar = Rc::new(ParseGrammar::new(grammar)?);
Ok(Self {
starting_term,
parse_grammar,
})
}
pub(crate) fn new_unchecked(grammar: &'gram Grammar) -> Self {
let _span = tracing::span!(DEBUG, "GrammarParser::new_unchecked").entered();
let starting_term = grammar
.starting_term()
.expect("Grammar must have at least one production");
let parse_grammar = Rc::new(ParseGrammar::new_unchecked(grammar));
Self {
starting_term,
parse_grammar,
}
}
pub fn parse_input<'p: 'gram>(
&'p self,
input: &'gram str,
) -> impl Iterator<Item = ParseTree<'gram>> + use<'p, 'gram> {
let _span = tracing::span!(DEBUG, "GrammarParser::parse_input").entered();
self.parse_input_starting_with(input, self.starting_term)
}
pub fn parse_input_starting_with<'p: 'gram>(
&'p self,
input: &'gram str,
start: &'gram Term,
) -> impl Iterator<Item = ParseTree<'gram>> + use<'p, 'gram> {
let _span = tracing::span!(DEBUG, "GrammarParser::parse_input_starting_with").entered();
crate::earley::parse(self, input, Some(start))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Grammar;
use crate::expression::Expression;
use crate::production::Production;
use quickcheck::{Arbitrary, Gen, QuickCheck, TestResult};
#[test]
fn parser_construction_with_valid_grammar() {
let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= 'A' | 'C' | 'G' | 'T'"
.parse()
.unwrap();
let parser = grammar.build_parser();
assert!(
parser.is_ok(),
"Parser should be constructible from valid grammar"
);
}
#[test]
fn parser_construction_fails_with_empty_grammar() {
let grammar = Grammar::from_parts(vec![]);
let parser = grammar.build_parser();
assert!(
parser.is_err(),
"Parser construction should fail with empty grammar"
);
assert!(
matches!(parser.unwrap_err(), Error::ValidationError(_)),
"Error should be ValidationError about missing productions"
);
}
#[test]
fn parser_validation_with_group_containing_undefined() {
let grammar: Grammar = "<start> ::= (<undefined>)".parse().unwrap();
let parser = grammar.build_parser();
assert!(
parser.is_err(),
"Parser should fail when group contains undefined nonterminal"
);
assert!(matches!(parser.unwrap_err(), Error::ValidationError(_)));
}
#[test]
fn parser_validation_with_group_containing_defined() {
let grammar: Grammar = r#"<start> ::= (<base>)
<base> ::= 'A'"#
.parse()
.unwrap();
let parser = grammar.build_parser();
assert!(
parser.is_ok(),
"Parser should succeed when group contains defined nonterminal"
);
}
#[test]
fn normalization_groups_and_optionals_produce_named_nonterminals() {
let grammar: Grammar = r#"<s> ::= (<a> | <b>) [<c>]
<a> ::= 'a'
<b> ::= 'b'
<c> ::= 'c'"#
.parse()
.unwrap();
for prod in grammar.productions_iter() {
for expr in prod.rhs_iter() {
for term in expr.terms_iter() {
match term {
crate::Term::Terminal(_) | crate::Term::Nonterminal(_) => {}
}
}
}
}
let parser = grammar.build_parser().unwrap();
assert!(parser.parse_input("a").next().is_some());
assert!(parser.parse_input("ac").next().is_some());
assert!(parser.parse_input("").next().is_none());
}
#[test]
fn parse_empty_optional_bnf() {
let grammar: Grammar = r#"<s> ::= [<x>]
<x> ::= 'x'"#
.parse()
.unwrap();
let parser = grammar.build_parser().unwrap();
assert!(parser.parse_input("").next().is_some());
assert!(parser.parse_input("x").next().is_some());
}
#[test]
fn user_defined_anon_name_no_collision() {
let grammar: Grammar = r#"<__anon0> ::= 'a'
<s> ::= (<__anon0>)"#
.parse()
.unwrap();
let parser = grammar.build_parser().unwrap();
assert!(parser.parse_input("a").next().is_some());
let lhs_names: Vec<_> = grammar
.productions_iter()
.map(|p| match &p.lhs {
crate::Term::Nonterminal(n) => n.as_str(),
_ => "",
})
.collect();
assert!(lhs_names.contains(&"__anon0"));
assert!(lhs_names.iter().any(|n| n.starts_with("__anon")));
}
#[test]
fn parser_construction_fails_with_undefined_nonterminal() {
let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= <undefined>"
.parse()
.unwrap();
let parser = grammar.build_parser();
assert!(
parser.is_err(),
"Parser construction should fail with undefined nonterminal"
);
assert!(
matches!(parser.unwrap_err(), Error::ValidationError(_)),
"Error should be ValidationError"
);
}
#[test]
fn parser_can_parse_multiple_inputs() {
let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= 'A' | 'C' | 'G' | 'T'"
.parse()
.unwrap();
let parser = grammar.build_parser().unwrap();
let input1 = "GATTACA";
let input2 = "ATCG";
let parse_trees1: Vec<_> = parser.parse_input(input1).collect();
let parse_trees2: Vec<_> = parser.parse_input(input2).collect();
assert!(!parse_trees1.is_empty(), "Should parse first input");
assert!(!parse_trees2.is_empty(), "Should parse second input");
}
#[test]
fn parser_accepts_explicit_starting_nonterminal() {
let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
<dna> ::= <base> | <base> <dna>"
.parse()
.unwrap();
let parser = grammar.build_parser().unwrap();
let input = "GATTACA";
let start_term = crate::term!(<dna>);
let parse_trees: Vec<_> = parser
.parse_input_starting_with(input, &start_term)
.collect();
assert!(
!parse_trees.is_empty(),
"Should parse with explicit starting nonterminal"
);
}
#[test]
fn parser_accepts_explicit_starting_terminal() {
let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
<dna> ::= <base> | <base> <dna>"
.parse()
.unwrap();
let parser = grammar.build_parser().unwrap();
let input = "G";
let start_term = crate::term!("G");
let parse_trees: Vec<_> = parser
.parse_input_starting_with(input, &start_term)
.collect();
assert_eq!(
parse_trees.len(),
0,
"Terminal starting term produces no parse trees"
);
}
#[test]
fn parser_is_order_independent() {
let grammar1: Grammar = "<dna> ::= <base> | <base> <dna>
<base> ::= 'A' | 'C' | 'G' | 'T'"
.parse()
.unwrap();
let grammar2: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
<dna> ::= <base> | <base> <dna>"
.parse()
.unwrap();
let parser1 = grammar1.build_parser().unwrap();
let parser2 = grammar2.build_parser().unwrap();
let input = "GATTACA";
let start_term = crate::term!(<dna>);
let parse_trees1: Vec<_> = parser1
.parse_input_starting_with(input, &start_term)
.collect();
let parse_trees2: Vec<_> = parser2
.parse_input_starting_with(input, &start_term)
.collect();
assert_eq!(
parse_trees1.len(),
parse_trees2.len(),
"Should produce same number of parse trees regardless of order"
);
}
#[derive(Debug, Clone)]
struct SimpleValidGrammar(Grammar);
impl Arbitrary for SimpleValidGrammar {
fn arbitrary(g: &mut Gen) -> Self {
let num_nonterms = usize::arbitrary(g) % 5 + 1;
let nonterms: Vec<String> = (0..num_nonterms).map(|i| format!("nt{}", i)).collect();
let mut productions = Vec::new();
for (idx, nt) in nonterms.iter().enumerate() {
let mut expressions = Vec::new();
let num_alternatives = usize::arbitrary(g) % 3 + 1;
for _ in 0..num_alternatives {
let mut terms = Vec::new();
let num_terms = usize::arbitrary(g) % 3 + 1;
for _ in 0..num_terms {
if bool::arbitrary(g) && idx > 0 {
let ref_idx = usize::arbitrary(g) % idx;
if let Some(nt) = nonterms.get(ref_idx) {
terms.push(Term::Nonterminal(nt.clone()));
} else {
let term_str = String::arbitrary(g);
terms.push(Term::Terminal(term_str));
}
} else {
let term_str = String::arbitrary(g);
terms.push(Term::Terminal(term_str));
}
}
expressions.push(Expression::from_parts(terms));
}
productions.push(Production::from_parts(
Term::Nonterminal(nt.clone()),
expressions,
));
}
Self(Grammar::from_parts(productions))
}
}
#[derive(Debug, Clone)]
struct GrammarWithUndefined(Grammar);
impl Arbitrary for GrammarWithUndefined {
fn arbitrary(g: &mut Gen) -> Self {
let defined_count = usize::arbitrary(g) % 4 + 1;
let num_undefined = usize::arbitrary(g) % 3 + 1;
let defined_nonterms: Vec<String> =
(0..defined_count).map(|i| format!("nt{}", i)).collect();
let undefined_nonterms: Vec<String> = (0..num_undefined)
.map(|i| format!("undefined{}", i))
.collect();
let all_nonterms: Vec<String> = defined_nonterms
.iter()
.chain(undefined_nonterms.iter())
.cloned()
.collect();
let mut productions = Vec::new();
let mut has_undefined_reference = false;
for (idx, nt) in defined_nonterms.iter().enumerate() {
let mut expressions = Vec::new();
let num_alternatives = usize::arbitrary(g) % 2 + 1;
for alt_idx in 0..num_alternatives {
let mut terms = Vec::new();
let num_terms = usize::arbitrary(g) % 2 + 1;
let is_first_alt_of_first_prod = idx == 0 && alt_idx == 0;
let must_insert_undefined =
is_first_alt_of_first_prod && !has_undefined_reference;
for term_idx in 0..num_terms {
let use_nonterminal = must_insert_undefined && term_idx == 0
|| (bool::arbitrary(g) && !all_nonterms.is_empty());
if use_nonterminal {
let ref_idx = if must_insert_undefined && term_idx == 0 {
has_undefined_reference = true;
defined_count + usize::arbitrary(g) % num_undefined
} else {
usize::arbitrary(g) % all_nonterms.len()
};
if let Some(ref_nt) = all_nonterms.get(ref_idx) {
terms.push(Term::Nonterminal(ref_nt.clone()));
} else {
terms.push(Term::Terminal(String::arbitrary(g)));
}
} else {
terms.push(Term::Terminal(String::arbitrary(g)));
}
}
expressions.push(Expression::from_parts(terms));
}
productions.push(Production::from_parts(
Term::Nonterminal(nt.clone()),
expressions,
));
}
Self(Grammar::from_parts(productions))
}
}
fn prop_parser_fails_with_undefined_nonterminal(grammar: GrammarWithUndefined) -> TestResult {
let grammar = grammar.0;
let parser = grammar.build_parser();
let is_validation_error = matches!(parser, Err(Error::ValidationError(_)));
TestResult::from_bool(is_validation_error)
}
#[test]
fn parser_fails_with_undefined_nonterminal() {
QuickCheck::new().tests(100).quickcheck(
prop_parser_fails_with_undefined_nonterminal as fn(GrammarWithUndefined) -> TestResult,
);
}
#[derive(Debug, Clone)]
struct ValidGrammarWithMultipleProductions(Grammar);
impl Arbitrary for ValidGrammarWithMultipleProductions {
fn arbitrary(g: &mut Gen) -> Self {
let num_nonterms = usize::arbitrary(g) % 4 + 2;
let nonterms: Vec<String> = (0..num_nonterms).map(|i| format!("nt{}", i)).collect();
let mut productions = Vec::new();
for (idx, nt) in nonterms.iter().enumerate() {
let mut expressions = Vec::new();
let num_alternatives = usize::arbitrary(g) % 2 + 1;
for _ in 0..num_alternatives {
let mut terms = Vec::new();
let num_terms = usize::arbitrary(g) % 2 + 1;
for _ in 0..num_terms {
if bool::arbitrary(g) && idx > 0 {
let ref_idx = usize::arbitrary(g) % idx;
if let Some(nt) = nonterms.get(ref_idx) {
terms.push(Term::Nonterminal(nt.clone()));
} else {
terms.push(Term::Terminal(String::arbitrary(g)));
}
} else {
terms.push(Term::Terminal(String::arbitrary(g)));
}
}
expressions.push(Expression::from_parts(terms));
}
productions.push(Production::from_parts(
Term::Nonterminal(nt.clone()),
expressions,
));
}
Self(Grammar::from_parts(productions))
}
}
fn prop_parser_order_independent(grammar: ValidGrammarWithMultipleProductions) -> TestResult {
let grammar = grammar.0;
let mut productions: Vec<_> = grammar.productions_iter().cloned().collect();
let mut rng = rand::rng();
rand::seq::SliceRandom::shuffle(productions.as_mut_slice(), &mut rng);
let grammar1 = grammar;
let grammar2 = Grammar::from_parts(productions);
let parser1 = match grammar1.build_parser() {
Ok(p) => p,
Err(_) => return TestResult::discard(),
};
let parser2 = match grammar2.build_parser() {
Ok(p) => p,
Err(_) => return TestResult::discard(),
};
let starting_term = match grammar1.starting_term() {
Some(t) => t,
None => return TestResult::discard(),
};
let sentence = match grammar1.generate() {
Ok(s) => s,
Err(_) => return TestResult::discard(),
};
let parse_trees1: Vec<_> = parser1
.parse_input_starting_with(&sentence, starting_term)
.collect();
let parse_trees2: Vec<_> = parser2
.parse_input_starting_with(&sentence, starting_term)
.collect();
TestResult::from_bool(parse_trees1.len() == parse_trees2.len())
}
#[test]
fn parser_order_independent() {
QuickCheck::new().tests(50).quickcheck(
prop_parser_order_independent as fn(ValidGrammarWithMultipleProductions) -> TestResult,
);
}
fn prop_parser_reusable(grammar: SimpleValidGrammar) -> TestResult {
let grammar = grammar.0;
if !grammar.terminates() {
return TestResult::discard();
}
let parser = match grammar.build_parser() {
Ok(p) => p,
Err(_) => return TestResult::discard(),
};
let sentence = match grammar.generate() {
Ok(s) => s,
Err(_) => return TestResult::discard(),
};
let parse_trees1: Vec<_> = parser.parse_input(&sentence).collect();
let parse_trees2: Vec<_> = parser.parse_input(&sentence).collect();
let parse_trees3: Vec<_> = parser.parse_input(&sentence).collect();
TestResult::from_bool(
parse_trees1.len() == parse_trees2.len() && parse_trees2.len() == parse_trees3.len(),
)
}
#[test]
fn parser_reusable() {
QuickCheck::new()
.tests(100)
.quickcheck(prop_parser_reusable as fn(SimpleValidGrammar) -> TestResult);
}
fn prop_validation_catches_all_undefined(grammar: GrammarWithUndefined) -> TestResult {
let grammar = grammar.0;
let mut defined = crate::HashSet::new();
for production in grammar.productions_iter() {
if let Term::Nonterminal(nt) = &production.lhs {
defined.insert(nt.clone());
}
}
let mut referenced = crate::HashSet::new();
for production in grammar.productions_iter() {
for expression in production.rhs_iter() {
for term in expression.terms_iter() {
if let Term::Nonterminal(nt) = term {
referenced.insert(nt.clone());
}
}
}
}
let undefined: Vec<_> = referenced.difference(&defined).cloned().collect();
let parser_result = grammar.build_parser();
match parser_result {
Ok(_) => {
TestResult::from_bool(undefined.is_empty())
}
Err(Error::ValidationError(msg)) => {
let any_mentioned = undefined
.iter()
.any(|nt| msg.contains(&format!("<{nt}>")) || msg.contains(nt));
TestResult::from_bool(!undefined.is_empty() && any_mentioned)
}
Err(_) => TestResult::error("Expected ValidationError"),
}
}
#[test]
fn validation_catches_all_undefined() {
QuickCheck::new().tests(100).quickcheck(
prop_validation_catches_all_undefined as fn(GrammarWithUndefined) -> TestResult,
);
}
}