#![deny(warnings)]
use std::collections::HashMap;
use std::rc::Rc;
use std::{fmt, hash};
pub enum Symbol {
NonTerm(String),
Term(String, Box<dyn Fn(&str) -> bool>),
}
impl Symbol {
pub fn name(&self) -> &str {
match self {
Symbol::NonTerm(name) => name,
Symbol::Term(name, _) => name,
}
}
pub fn matches(&self, input: &str) -> bool {
match self {
Symbol::Term(_, matcher) => matcher(input),
_ => false,
}
}
pub fn is_terminal(&self) -> bool {
matches!(self, Symbol::Term(_, _))
}
}
impl hash::Hash for Symbol {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
match self {
Symbol::Term(name, matcher) => {
name.hash(state);
(matcher as *const dyn Fn(&str) -> bool).hash(state);
}
Symbol::NonTerm(name) => name.hash(state),
}
}
}
impl PartialEq for Symbol {
fn eq(&self, other: &Symbol) -> bool {
match (self, other) {
(Symbol::Term(s, m1), Symbol::Term(o, m2)) => {
s == o
&& std::ptr::addr_eq(&m1 as *const dyn Fn(&str) -> bool, &m2 as *const dyn Fn(&str) -> bool)
}
(Symbol::NonTerm(s), Symbol::NonTerm(o)) => s == o,
_ => false,
}
}
}
impl Eq for Symbol {}
impl fmt::Debug for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Symbol::Term(name, _) => write!(f, "Term({})", name),
Symbol::NonTerm(name) => write!(f, "NonTerm({})", name),
}
}
}
#[derive(PartialEq, Hash)]
pub struct Rule {
pub head: String,
pub spec: Vec<Rc<Symbol>>,
}
impl Rule {
#[cfg(test)]
pub fn new(head: &str, spec: &[Rc<Symbol>]) -> Self {
Rule {
head: head.to_string(),
spec: spec.iter().cloned().collect(),
}
}
}
impl fmt::Display for Rule {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
write!(f,
"{} -> {}",
self.head,
self.spec
.iter()
.map(|s| s.name())
.collect::<Vec<_>>()
.join(" ")
)
}
}
impl fmt::Debug for Rule {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self)
}
}
#[derive(Clone)]
pub struct Grammar {
pub start: String,
pub rules: Vec<Rc<Rule>>,
}
impl fmt::Debug for Grammar {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use std::collections::hash_map::Entry;
let mut group_order = Vec::new();
let mut rule_groups = HashMap::new();
for r in &self.rules {
match rule_groups.entry(&r.head) {
Entry::Vacant(e) => {
group_order.push(&r.head);
e.insert(Vec::new()).push(r);
}
Entry::Occupied(mut e) => e.get_mut().push(r),
}
}
writeln!(f, "Start: {}", self.start)?;
for head in group_order {
writeln!(f)?;
for rule in rule_groups.get(head).unwrap() {
writeln!(f, "{}", rule)?;
}
}
Ok(())
}
}
#[derive(Default)]
pub struct GrammarBuilder {
symbols: HashMap<String, Rc<Symbol>>,
rules: Vec<Rc<Rule>>,
error: Option<String>,
}
impl GrammarBuilder {
fn add_symbol(&mut self, symbol: Symbol, ignore_dups: bool) {
if !self.symbols.contains_key(symbol.name()) {
self.symbols
.insert(symbol.name().to_string(), Rc::new(symbol));
} else if !ignore_dups {
self.error = Some(format!("Duplicate Symbol: {}", symbol.name()));
}
}
pub fn nonterm(mut self, name: &str) -> Self {
self.add_symbol(Symbol::NonTerm(name.into()), false);
self
}
pub fn terminal(mut self, name: &str, pred: impl Fn(&str) -> bool + 'static) -> Self {
self.add_symbol(Symbol::Term(name.into(), Box::new(pred)), false);
self
}
pub fn nonterm_try(&mut self, name: &str) {
self.add_symbol(Symbol::NonTerm(name.into()), true);
}
pub fn terminal_try(&mut self, name: &str, pred: impl Fn(&str) -> bool + 'static) {
self.add_symbol(Symbol::Term(name.into(), Box::new(pred)), true);
}
fn add_rule(&mut self, head: &str, spec: &[&str], ignore_dups: bool) {
if let Some(s) = spec.iter().find(|&n| !self.symbols.contains_key(*n)) {
self.error = Some(format!("Missing Symbol: {}", s));
return;
}
if let Some(s) = self.symbols.get(head) {
if s.is_terminal() {
self.error = Some(format!("Rule head must be Term: {}", head));
return;
}
} else {
self.error = Some(format!("Missing Symbol: {}", head));
return;
}
let rule = Rc::new(Rule {
head: head.to_string(),
spec: spec.iter().map(|&s| self.symbols[s].clone()).collect(),
});
if !self.rules.contains(&rule) {
self.rules.push(rule);
} else if !ignore_dups {
self.error = Some(format!("Duplicate Rule: {}", rule));
}
}
pub fn rule(mut self, head: &str, spec: &[&str]) -> Self {
self.add_rule(head, spec, false);
self
}
pub fn rule_try(&mut self, head: &str, spec: &[&str]) {
self.add_rule(head, spec, true)
}
pub fn into_grammar(mut self, start: &str) -> Result<Grammar, String> {
let start = start.into();
if let Some(s) = self.symbols.get(&start) {
if s.is_terminal() {
self.error = Some(format!("Grammar start must be NonTerm: {}", start));
}
} else {
self.error = Some(format!("Missing start Symbol: {}", start));
}
self.error.map_or(
Ok(Grammar {
start,
rules: self.rules,
}),
Err,
)
}
pub fn unique_symbol_name(&self) -> String {
format!("<Uniq-{}>", self.symbols.len())
}
}
#[cfg(test)]
mod tests {
use super::{GrammarBuilder, Symbol};
use std::collections::HashSet;
#[test]
fn symbol_check_eq_hash() {
assert_ne!(
Symbol::NonTerm("X".to_string()),
Symbol::Term("X".to_string(), Box::new(|_| true))
);
let mut m = HashSet::new();
m.insert(Symbol::NonTerm("X".to_string()));
m.insert(Symbol::Term("X".to_string(), Box::new(|_| true)));
assert_eq!(m.len(), 2);
}
#[test]
fn symbol_terminal_matches() {
let term = Symbol::Term(
"uint".to_string(),
Box::new(|n| n.chars().all(|c| "1234567890".contains(c))),
);
assert_eq!(term.name(), "uint");
assert!(term.matches("123"));
}
#[test]
fn build_grammar() {
let g = GrammarBuilder::default()
.nonterm("Sum")
.terminal("Num", |n| n.chars().all(|c| "123".contains(c)))
.terminal("+", |n| n == "+")
.rule("Sum", &["Sum", "+", "Num"])
.rule("Sum", &["Num"])
.into_grammar("Sum");
assert!(g.is_ok());
}
#[test]
fn grammar_has_dup_symbol() {
let g = GrammarBuilder::default()
.nonterm("Sum")
.nonterm("Sum")
.into_grammar("Sum");
assert_eq!(g.unwrap_err(), "Duplicate Symbol: Sum");
}
#[test]
fn grammar_has_dup_rule() {
let g = GrammarBuilder::default()
.nonterm("Sum")
.terminal("Num", |n| n.chars().all(|c| "123".contains(c)))
.terminal("+", |n| n == "+")
.rule("Sum", &["Sum", "+", "Num"])
.rule("Sum", &["Sum", "+", "Num"])
.rule("Sum", &["Num"])
.into_grammar("Sum");
assert_eq!(g.unwrap_err(), "Duplicate Rule: Sum -> Sum + Num");
}
#[test]
fn grammar_rule_head_nonterm() {
let g = GrammarBuilder::default()
.nonterm("Sum")
.terminal("Num", |n| n.chars().all(|c| "123".contains(c)))
.terminal("+", |n| n == "+")
.rule("Sum", &["Sum", "+", "Num"])
.rule("Sum", &["Num"])
.into_grammar("Num");
assert_eq!(g.unwrap_err(), "Grammar start must be NonTerm: Num");
}
#[test]
fn grammar_missing_symbol() {
let g = GrammarBuilder::default()
.nonterm("Sum")
.terminal("Num", |n| n.chars().all(|c| "123".contains(c)))
.rule("Sum", &["Num"])
.into_grammar("Xum");
assert_eq!(g.unwrap_err(), "Missing start Symbol: Xum");
let g = GrammarBuilder::default()
.nonterm("Sum")
.terminal("Num", |n| n.chars().all(|c| "123".contains(c)))
.rule("Sum", &["Num", "+", "Num"])
.into_grammar("Sum");
assert_eq!(g.unwrap_err(), "Missing Symbol: +");
let g = GrammarBuilder::default()
.nonterm("Sum")
.terminal("Num", |n| n.chars().all(|c| "123".contains(c)))
.rule("Rum", &["Num"])
.into_grammar("Sum");
assert_eq!(g.unwrap_err(), "Missing Symbol: Rum");
}
}