use std::rc::Rc;
use crate::analysis::FirstCache;
use crate::grammar::cfg::{NonTerminalIndexFn, TerminalIndexFn};
use crate::grammar::symbol_string::SymbolString;
use crate::{CompiledTerminal, GrammarConfig, KTuples, Pr, Symbol};
use parol_runtime::TerminalIndex;
use parol_runtime::lexer::FIRST_USER_TOKEN;
use parol_runtime::log::trace;
use super::k_tuples::KTuplesBuilder;
#[derive(Debug, Clone, Default)]
pub struct FirstSet {
pub productions: Vec<KTuples>,
pub non_terminals: Vec<KTuples>,
}
impl FirstSet {
pub fn is_empty(&self) -> bool {
self.productions.is_empty() && self.non_terminals.is_empty()
}
}
type DomainType = KTuples;
type DomainTypeBuilder<'a> = KTuplesBuilder<'a>;
type ResultVector = Vec<DomainType>;
type TransferFunction<'a> = Box<dyn Fn(Rc<ResultVector>) -> DomainType + 'a>;
type EquationSystem<'a> = Vec<TransferFunction<'a>>;
type StepFunction = Box<dyn Fn(Rc<ResultVector>) -> ResultVector>;
pub fn first_k(grammar_config: &GrammarConfig, k: usize, first_cache: &FirstCache) -> FirstSet {
let cfg = &grammar_config.cfg;
let pr_count = cfg.pr.len();
let nt_count = cfg.get_non_terminal_set().len();
let nti = Rc::new(grammar_config.cfg.get_non_terminal_index_function());
let ti = Rc::new(grammar_config.cfg.get_terminal_index_function());
let max_terminal_index = cfg.get_ordered_terminals().len() + FIRST_USER_TOKEN as usize;
let nt_for_production: Vec<usize> =
cfg.get_non_terminal_set()
.iter()
.fold(vec![0; pr_count], |mut acc, nt| {
let non_terminal_index = nti.non_terminal_index(nt);
for (pi, _) in cfg.matching_productions(nt) {
acc[pi] = non_terminal_index;
}
acc
});
let equation_system: EquationSystem =
cfg.pr
.iter()
.fold(Vec::with_capacity(pr_count), |mut es, pr| {
es.push(combine_production_equation(
pr,
pr_count,
Rc::clone(&ti),
Rc::clone(&nti),
k,
max_terminal_index,
));
es
});
trace!(
"Number of equations in equation system for FIRST(k) is {}",
equation_system.len()
);
let step_function: StepFunction = {
Box::new(move |result_vector: Rc<ResultVector>| {
let mut new_result_vector: ResultVector = vec![
DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.build()
.unwrap();
result_vector.len()
];
for pr_i in 0..pr_count {
let r = equation_system[pr_i](result_vector.clone());
new_result_vector[pr_count + nt_for_production[pr_i]].append(r.clone());
new_result_vector[pr_i] = r;
}
new_result_vector
})
};
let mut result_vector = Rc::new(if k == 0 {
(0..pr_count + nt_count).fold(Vec::with_capacity(pr_count + nt_count), |mut acc, i| {
if i < pr_count {
acc.push(
DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.build()
.unwrap(),
);
} else {
acc.push(
DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.eps()
.unwrap(),
);
}
acc
})
} else {
let last_first_set = first_cache.get(k - 1, grammar_config).borrow().clone();
let mut result_vector = Vec::with_capacity(pr_count + nt_count);
for t in last_first_set.productions.iter() {
result_vector.push(t.clone().set_k(k));
}
debug_assert_eq!(last_first_set.non_terminals.len(), nt_count);
for t in last_first_set.non_terminals.iter() {
result_vector.push(t.clone().set_k(k));
}
result_vector
});
let mut iterations = 0usize;
loop {
let new_result_vector = Rc::new(step_function(result_vector.clone()));
if new_result_vector == result_vector {
break;
}
result_vector = new_result_vector;
iterations += 1;
trace!("Iteration number {iterations} completed");
}
let (r, k_tuples_of_nt) = result_vector.split_at(pr_count);
FirstSet {
productions: r.to_vec(),
non_terminals: k_tuples_of_nt.to_vec(),
}
}
fn combine_production_equation<'a, N, T>(
pr: &Pr,
pr_count: usize,
ti_fn: Rc<T>,
nti_fn: Rc<N>,
k: usize,
max_terminal_index: usize,
) -> TransferFunction<'a>
where
T: TerminalIndexFn + 'a,
N: NonTerminalIndexFn,
{
let parts = pr
.get_r()
.iter()
.fold(Vec::<SymbolString>::new(), |mut acc, s| {
match s {
Symbol::N(..) => acc.push(SymbolString(vec![s.clone()])),
Symbol::T(_) => {
if acc.is_empty() {
acc.push(SymbolString(vec![s.clone()]));
} else {
let last = acc.len() - 1;
let last_len = acc[last].0.len();
let last_terminal = &acc[last].0[last_len - 1];
if matches!(last_terminal, Symbol::T(_)) {
acc[last].0.push(s.clone());
} else {
acc.push(SymbolString(vec![s.clone()]));
}
}
}
_ => {
unreachable!(
"Scanner switching directives have been removed from the grammar syntax."
);
}
}
acc
});
let mut result_function: TransferFunction = Box::new(move |_| {
DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.eps()
.unwrap()
});
for symbol_string in parts {
match &symbol_string.0[0] {
Symbol::T(_) => {
let ti_fn = Rc::clone(&ti_fn);
result_function = Box::new(move |result_vector: Rc<ResultVector>| {
let mapper = |s| CompiledTerminal::create(s, Rc::clone(&ti_fn));
let terminal_indices: Vec<TerminalIndex> =
symbol_string.0.iter().map(|s| mapper(s).0).collect();
result_function(result_vector).k_concat(
&DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.terminal_indices(&[&terminal_indices])
.build()
.unwrap(),
k,
)
});
}
Symbol::N(nt, _, _, _) => {
let f = create_union_access_function(nt, pr_count, Rc::clone(&nti_fn));
result_function = Box::new(move |result_vector: Rc<ResultVector>| {
result_function(result_vector.clone()).k_concat(&f(result_vector), k)
});
}
_ => {
unreachable!(
"Scanner switching directives have been removed from the grammar syntax."
);
}
}
}
result_function
}
fn create_union_access_function<'a, N>(
nt: &str,
pr_count: usize,
nti: Rc<N>,
) -> TransferFunction<'a>
where
N: NonTerminalIndexFn,
{
let index = nti.non_terminal_index(nt);
Box::new(move |result_vector: Rc<ResultVector>| result_vector[pr_count + index].clone())
}