use crate::grammar::Rule;
use crate::lexer::{LexerConf, Symbol, Tokenizer};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
#[derive(Debug)]
pub struct ParserConf {
rules: HashMap<Arc<Symbol>, Vec<Arc<Rule>>>,
ignore_symbols: Arc<HashSet<Arc<Symbol>>>,
all_expansions: Vec<Arc<Rule>>,
}
impl ParserConf {
pub fn new(rules: HashMap<Arc<Symbol>, Vec<Arc<Rule>>>, ignores: Vec<String>) -> Self {
let ignore_symbols = ignores
.iter()
.map(|name| Arc::new(Symbol::Terminal(name.clone())))
.collect::<HashSet<_>>();
let all_expansions = rules.values().flatten().cloned().collect();
Self {
rules,
ignore_symbols: Arc::new(ignore_symbols),
all_expansions,
}
}
pub(crate) fn get_ignore_symbols(&self) -> &Arc<HashSet<Arc<Symbol>>> {
&self.ignore_symbols
}
pub fn contains_rule(&self, name: &Arc<Symbol>) -> bool {
self.rules.contains_key(name)
}
pub fn add_rules(&mut self, rule: Arc<Rule>) {
let val = self.rules.entry(rule.origin.clone()).or_default();
val.push(rule.clone());
self.all_expansions.push(rule);
}
pub fn next_expansion(&self, name: &Arc<Symbol>) -> impl Iterator<Item = &Arc<Rule>> + '_ {
self.rules.get(name).unwrap().iter()
}
pub fn get_all_expansion(&self) -> Vec<Arc<Rule>> {
self.all_expansions.clone()
}
pub fn get_expansion(&self, key: &Arc<Symbol>) -> Option<&Vec<Arc<Rule>>> {
self.rules.get(key)
}
}
#[derive(Debug)]
pub struct ParserFrontend {
lexer: Arc<LexerConf>,
parser: Arc<ParserConf>,
}
impl ParserFrontend {
pub(crate) fn new(lexer: Arc<LexerConf>, parser: Arc<ParserConf>) -> Self {
Self { lexer, parser }
}
pub(crate) fn tokenizer(&self, text: &str) -> Tokenizer {
self.lexer
.tokenize(text, self.parser.get_ignore_symbols().clone())
}
#[allow(dead_code)]
pub(crate) fn get_lexer(&self) -> Arc<LexerConf> {
self.lexer.clone()
}
pub fn get_parser(&self) -> &Arc<ParserConf> {
&self.parser
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::grammar::RuleOption;
use crate::lexer::{RegexFlag, TerminalDef};
fn sample_rule(origin: &str, expansion: &[&str]) -> Arc<Rule> {
Arc::new(Rule::new(
Arc::new(Symbol::NonTerminal(origin.to_string())),
expansion
.iter()
.map(|x| crate::lexer::get_symbol(x))
.collect(),
Arc::new(RuleOption::default()),
0,
))
}
#[test]
fn parser_conf_crud_and_iteration_work() {
let start_rule = sample_rule("start", &["expr"]);
let mut pc = ParserConf::new(
HashMap::from([(
Arc::new(Symbol::NonTerminal("start".to_string())),
vec![start_rule],
)]),
vec!["WS".to_string()],
);
let expr_rule = sample_rule("expr", &["INT"]);
pc.add_rules(expr_rule.clone());
let start = Arc::new(Symbol::NonTerminal("start".to_string()));
let expr = Arc::new(Symbol::NonTerminal("expr".to_string()));
assert!(pc.contains_rule(&start));
assert!(pc.contains_rule(&expr));
assert!(
pc.get_ignore_symbols()
.contains(&Arc::new(Symbol::Terminal("WS".to_string())))
);
assert_eq!(pc.next_expansion(&expr).count(), 1);
assert_eq!(pc.get_all_expansion().len(), 2);
assert_eq!(pc.get_expansion(&expr).unwrap().len(), 1);
}
#[test]
fn parser_frontend_exposes_lexer_and_parser() {
let pc = Arc::new(ParserConf::new(HashMap::new(), vec![]));
let lx = Arc::new(LexerConf::new(vec![Arc::new(TerminalDef::with_regex(
"INT",
r"\d+",
RegexFlag::default(),
0
))]));
let pf = ParserFrontend::new(lx.clone(), pc.clone());
let mut tk = pf.tokenizer("42");
assert_eq!(tk.next().unwrap().word(), "42");
assert!(Arc::ptr_eq(pf.get_parser(), &pc));
assert!(Arc::ptr_eq(&pf.get_lexer(), &lx));
}
}