use crate::analysis::lookahead_dfa::ProductionIndex;
use crate::parser::parol_grammar::LookaheadExpression;
use crate::{Pos, Pr, Symbol, Terminal, TerminalKind};
use parol_runtime::once_cell::sync::Lazy;
use parol_runtime::{NonTerminalIndex, TerminalIndex};
use regex::Regex;
use std::collections::{BTreeMap, BTreeSet};
use std::collections::{HashMap, HashSet};
use std::ops::Index;
pub(crate) static RX_NUM_SUFFIX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[0-9]+$").expect("error parsing regex"));
pub trait TerminalIndexFn {
fn terminal_index(
&self,
t: &str,
k: TerminalKind,
l: &Option<LookaheadExpression>,
) -> TerminalIndex;
}
impl<F> TerminalIndexFn for F
where
F: Fn(&str, TerminalKind, &Option<LookaheadExpression>) -> TerminalIndex,
{
fn terminal_index(
&self,
t: &str,
k: TerminalKind,
l: &Option<LookaheadExpression>,
) -> TerminalIndex {
self(t, k, l)
}
}
pub trait NonTerminalIndexFn {
fn non_terminal_index(&self, t: &str) -> NonTerminalIndex;
}
impl<F> NonTerminalIndexFn for F
where
F: Fn(&str) -> NonTerminalIndex,
{
fn non_terminal_index(&self, t: &str) -> NonTerminalIndex {
self(t)
}
}
pub(crate) type FnPrimaryNonTerminalFinder = Box<dyn Fn(TerminalIndex) -> Option<String>>;
#[derive(Debug, Default, Clone)]
pub struct Cfg {
pub st: String,
pub pr: Vec<Pr>,
}
impl Cfg {
pub fn get_start_symbol(&self) -> &str {
&self.st
}
pub fn with_start_symbol(n: &str) -> Self {
Self {
st: n.to_owned(),
..Default::default()
}
}
pub fn add_pr(mut self, p: Pr) -> Self {
self.pr.push(p);
self
}
pub fn get_start_symbol_position(&self) -> Option<Pos> {
self.pr
.iter()
.position(|p| p.get_n_str() == self.st)
.map(|i| (i, 0).into())
}
pub fn get_non_terminal_set(&self) -> BTreeSet<String> {
let mut set = BTreeSet::new();
set.insert(self.st.clone());
self.pr.iter().fold(set, |mut acc, p| {
acc.insert(p.get_n());
acc = p.get_r().iter().fold(acc, |mut acc, s| {
if let Symbol::N(n, ..) = s {
acc.insert(n.clone());
}
acc
});
acc
})
}
pub fn get_non_terminal_ordering(&self) -> Vec<(String, Pos)> {
let vec = vec![(
self.st.clone(),
self.get_start_symbol_position()
.expect("Start symbol not found in any production"),
)];
self.pr.iter().enumerate().fold(vec, |mut acc, (pi, p)| {
let pos = (pi, 0).into();
if !acc.contains(&(p.get_n(), pos)) {
acc.push((p.get_n(), pos));
}
acc = p.get_r().iter().enumerate().fold(acc, |mut acc, (si, s)| {
let pos = (pi, si + 1).into();
if let Symbol::N(n, ..) = s {
let entry = (n.clone(), pos);
if !acc.contains(&entry) {
acc.push(entry);
}
}
acc
});
acc
})
}
pub fn get_non_terminal_index_function(&self) -> impl NonTerminalIndexFn {
let vec = self.get_non_terminal_set();
move |nt_name: &str| vec.iter().position(|nt| nt == nt_name).unwrap()
}
pub fn get_terminal_index_function(&self) -> impl TerminalIndexFn + use<> {
let vec = self
.get_ordered_terminals_owned()
.into_iter()
.map(|(s, k, l, _)| (s, k, l))
.collect::<Vec<(String, TerminalKind, Option<LookaheadExpression>)>>();
move |t: &str, k: TerminalKind, l: &Option<LookaheadExpression>| {
(vec.iter()
.position(|(t0, k0, l0)| t == t0 && k.behaves_like(*k0) && l0 == l)
.unwrap()) as TerminalIndex
+ parol_runtime::lexer::FIRST_USER_TOKEN
}
}
pub fn get_primary_non_terminal_finder(&self) -> FnPrimaryNonTerminalFinder {
let terminal_index_finder = self.get_terminal_index_function();
let primary_non_terminals =
self.pr
.iter()
.fold(HashMap::<TerminalIndex, String>::new(), |mut acc, p| {
if p.1.len() == 1
&& let crate::Symbol::T(Terminal::Trm(s, k, _, _, _, _, l)) = &p.1[0]
{
let t = terminal_index_finder.terminal_index(s, *k, l);
acc.insert(t, p.0.get_n().unwrap());
}
acc
});
Box::new(move |t: TerminalIndex| primary_non_terminals.get(&t).cloned())
}
pub fn get_ordered_terminals(
&self,
) -> Vec<(&str, TerminalKind, Option<LookaheadExpression>, Vec<usize>)> {
self.pr.iter().fold(Vec::new(), |mut acc, p| {
acc = p.get_r().iter().fold(acc, |mut acc, s| {
if let Symbol::T(Terminal::Trm(t, k, s, _, _, _, l)) = s {
if let Some(pos) = acc
.iter_mut()
.position(|(trm, knd, la, _)| trm == t && knd.behaves_like(*k) && la == l)
{
for st in s {
if !acc[pos].3.contains(st) {
acc[pos].3.push(*st);
}
}
} else {
acc.push((t, *k, l.clone(), s.to_vec()));
}
}
acc
});
acc
})
}
pub fn get_ordered_terminals_owned(
&self,
) -> Vec<(
String,
TerminalKind,
Option<LookaheadExpression>,
Vec<usize>,
)> {
self.get_ordered_terminals()
.into_iter()
.map(|(s, k, l, v)| (s.to_owned(), k, l, v))
.collect()
}
pub fn get_terminal_positions(&self) -> BTreeMap<Pos, &Symbol> {
self.pr
.iter()
.enumerate()
.fold(BTreeMap::new(), |mut acc, (pi, p)| {
acc = p.get_r().iter().enumerate().fold(acc, |mut acc, (si, s)| {
if matches!(s, Symbol::T(Terminal::Trm(..)))
|| matches!(s, Symbol::T(Terminal::End))
{
acc.insert(Pos::new(pi, si + 1), s);
}
acc
});
acc
})
}
pub fn get_non_terminal_positions(&self) -> BTreeMap<Pos, String> {
self.pr
.iter()
.enumerate()
.fold(BTreeMap::new(), |mut acc, (pi, p)| {
acc.insert(Pos::new(pi, 0), p.get_n());
acc = p.get_r().iter().enumerate().fold(acc, |mut acc, (si, s)| {
if let Symbol::N(n, ..) = s {
acc.insert(Pos::new(pi, si + 1), n.clone());
}
acc
});
acc
})
}
pub fn matching_productions(&self, n: &str) -> Vec<(usize, &Pr)> {
self.pr
.iter()
.enumerate()
.fold(Vec::new(), |mut acc, (i, p)| {
if p.get_n() == n {
acc.push((i, p));
}
acc
})
}
pub fn get_alternations_count(&self, prod_num: ProductionIndex) -> Result<usize, &'static str> {
if prod_num >= self.pr.len() {
Err("Invalid production number!")
} else {
Ok(self
.matching_productions(self.pr[prod_num].get_n_str())
.len())
}
}
pub fn get_alternation_index_of_production(
&self,
prod_num: ProductionIndex,
) -> Result<usize, &'static str> {
if prod_num >= self.pr.len() {
Err("Invalid production number!")
} else {
self.matching_productions(self.pr[prod_num].get_n_str())
.iter()
.position(|(i, _)| *i == prod_num)
.ok_or("Invalid production number!")
}
}
pub fn calculate_nullable_non_terminals(&self) -> BTreeSet<String> {
fn initial_nullables(cfg: &Cfg, vars: &[String]) -> HashSet<String> {
vars.iter()
.filter(|v| {
cfg.matching_productions(v)
.iter()
.any(|(_, p)| p.is_empty())
})
.map(<String as Clone>::clone)
.collect()
}
fn collect_nullables(cfg: &Cfg, nullables: &mut HashSet<String>, vars: &[String]) -> bool {
fn has_nullable_alt(prods: Vec<&Pr>, nullables: &HashSet<String>) -> bool {
fn is_already_nullable(s: &Symbol, nullables: &HashSet<String>) -> bool {
match s {
Symbol::N(n, ..) => nullables.contains(n),
_ => false,
}
}
prods
.iter()
.any(|p| p.get_r().iter().all(|s| is_already_nullable(s, nullables)))
}
let start_len = nullables.len();
for v in vars {
let v_prods = cfg
.matching_productions(v)
.into_iter()
.map(|(_, p)| p)
.collect();
if has_nullable_alt(v_prods, nullables) {
nullables.insert(v.clone());
}
}
start_len < nullables.len()
}
let vars = self
.get_non_terminal_ordering()
.into_iter()
.map(|(n, _)| n)
.collect::<Vec<String>>();
let mut nullables = initial_nullables(self, &vars);
while collect_nullables(self, &mut nullables, &vars) {}
let mut nullables_vec = BTreeSet::new();
for n in nullables.drain() {
nullables_vec.insert(n);
}
nullables_vec
}
}
impl Index<usize> for Cfg {
type Output = Pr;
fn index(&self, idx: usize) -> &Self::Output {
&self.pr[idx]
}
}
#[cfg(test)]
mod test {}