pub mod grammar_def;
pub mod mapper;
use std::collections::HashMap;
use std::hash::Hash;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Symbol {
Terminal(usize),
NonTerminal(usize),
}
#[derive(Debug, Clone)]
pub struct Rule {
pub(crate) productions: Vec<Vec<Symbol>>,
}
#[derive(Debug, Clone)]
pub struct Grammar<T> {
rules: Vec<Rule>,
terminals: Vec<T>,
start: usize,
}
impl<T: Eq + Hash + Clone> Grammar<T> {
pub fn builder() -> GrammarBuilder<T> {
GrammarBuilder::new()
}
}
impl<T> Grammar<T> {
pub fn terminal_value(&self, idx: usize) -> &T {
&self.terminals[idx]
}
pub fn rules(&self) -> &[Rule] {
&self.rules
}
pub fn terminals(&self) -> &[T] {
&self.terminals
}
pub fn start(&self) -> usize {
self.start
}
}
#[derive(Debug)]
pub struct GrammarBuilder<T> {
rules: Vec<(T, Vec<Vec<T>>)>,
start: Option<T>,
}
impl<T: Eq + Hash + Clone> GrammarBuilder<T> {
pub fn new() -> Self {
Self {
rules: Vec::new(),
start: None,
}
}
pub fn rule(mut self, name: T, productions: &[&[T]]) -> Self {
let prods = productions.iter().map(|p| p.to_vec()).collect();
self.rules.push((name, prods));
self
}
pub fn start(mut self, name: T) -> Self {
self.start = Some(name);
self
}
pub fn build(self) -> Grammar<T> {
let start_name = self.start.expect("start symbol must be set");
let mut rule_index: HashMap<&T, usize> = HashMap::new();
for (i, (name, _)) in self.rules.iter().enumerate() {
rule_index.insert(name, i);
}
assert!(
rule_index.contains_key(&start_name),
"start symbol must match a rule name"
);
let mut terminal_index: HashMap<&T, usize> = HashMap::new();
let mut terminals: Vec<T> = Vec::new();
for (_, prods) in &self.rules {
for prod in prods {
for sym in prod {
if !rule_index.contains_key(sym) && !terminal_index.contains_key(sym) {
terminal_index.insert(sym, terminals.len());
terminals.push(sym.clone());
}
}
}
}
let rules: Vec<Rule> = self
.rules
.iter()
.map(|(_name, prods)| {
let productions = prods
.iter()
.map(|p| {
p.iter()
.map(|s| {
if let Some(&idx) = rule_index.get(s) {
Symbol::NonTerminal(idx)
} else {
Symbol::Terminal(terminal_index[s])
}
})
.collect()
})
.collect();
Rule { productions }
})
.collect();
let start = rule_index[&start_name];
Grammar {
rules,
terminals,
start,
}
}
}
impl<T: Eq + Hash + Clone> Default for GrammarBuilder<T> {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn builder_constructs_grammar() {
let grammar = Grammar::builder()
.rule("expr", &[&["expr", "op", "expr"], &["x"]])
.rule("op", &[&["+"], &["-"]])
.start("expr")
.build();
assert_eq!(grammar.start, 0);
assert_eq!(grammar.rules[grammar.start].productions.len(), 2);
}
#[test]
fn symbol_classification() {
let grammar = Grammar::builder()
.rule("expr", &[&["expr", "+", "x"]])
.start("expr")
.build();
assert_eq!(grammar.rules.len(), 1);
assert_eq!(grammar.terminals.len(), 2);
assert!(grammar.terminals.contains(&"+"));
assert!(grammar.terminals.contains(&"x"));
}
#[test]
fn terminal_value_lookup() {
let grammar = Grammar::builder()
.rule("s", &[&["hello"]])
.start("s")
.build();
assert_eq!(grammar.terminal_value(0), &"hello");
}
#[test]
#[should_panic(expected = "start symbol must be set")]
fn missing_start_panics() {
Grammar::<&str>::builder().rule("s", &[&["x"]]).build();
}
#[test]
#[should_panic(expected = "start symbol must match a rule name")]
fn invalid_start_panics() {
Grammar::builder()
.rule("s", &[&["x"]])
.start("missing")
.build();
}
}