use super::k_tuples::KTuplesBuilder;
use super::{FirstSet, FollowCache};
use crate::analysis::FirstCache;
use crate::analysis::compiled_terminal::CompiledTerminal;
use crate::grammar::cfg::{NonTerminalIndexFn, TerminalIndexFn};
use crate::grammar::symbol_string::SymbolString;
use crate::{GrammarConfig, KTuples, Pos, Pr, Symbol};
use parol_runtime::TerminalIndex;
use parol_runtime::lexer::FIRST_USER_TOKEN;
use parol_runtime::log::trace;
use rustc_hash::FxHashMap;
use std::cell::RefCell;
use std::rc::Rc;
#[cfg(feature = "profiling")]
macro_rules! profile_scope {
($name:expr) => {
#[cfg(feature = "profiling")]
let _profile = profiling::ProfileScope::new($name);
};
}
type DomainType = KTuples;
type DomainTypeBuilder<'a> = KTuplesBuilder<'a>;
#[derive(Debug, Clone, Default)]
pub struct FollowSet {
pub non_terminals: Vec<DomainType>,
}
impl FollowSet {
pub fn new(non_terminals: Vec<DomainType>) -> Self {
FollowSet { non_terminals }
}
pub fn is_empty(&self) -> bool {
self.non_terminals.is_empty()
}
}
pub(crate) type ResultMap = FxHashMap<Pos, DomainType>;
type TransferFunction = Box<dyn Fn(Rc<ResultMap>, Rc<RefCell<FollowSet>>) -> DomainType>;
type EquationSystem = FxHashMap<Pos, TransferFunction>;
type StepFunction = Box<dyn Fn(Rc<ResultMap>, Rc<RefCell<FollowSet>>) -> ResultMap>;
#[inline(always)]
pub fn follow_k(
grammar_config: &GrammarConfig,
k: usize,
first_cache: &FirstCache,
follow_cache: &FollowCache,
) -> (ResultMap, FollowSet) {
#[cfg(feature = "profiling")]
profile_scope!("follow_k_total");
let cfg = &grammar_config.cfg;
let terminals = grammar_config.cfg.get_ordered_terminals_owned();
let max_terminal_index = terminals.len() + FIRST_USER_TOKEN as usize;
let ti = Rc::new(grammar_config.cfg.get_terminal_index_function());
let first_k_of_nt = first_cache.get(k, grammar_config);
let start_symbol = cfg.get_start_symbol();
let nti = Rc::new(cfg.get_non_terminal_index_function());
let non_terminal_positions = Rc::new(
cfg.get_non_terminal_positions()
.iter()
.filter(|(p, _)| p.sy_index() > 0)
.fold(FxHashMap::<Pos, usize>::default(), |mut acc, (p, s)| {
acc.insert(*p, nti.non_terminal_index(s));
acc
}),
);
let equation_system: EquationSystem = {
#[cfg(feature = "profiling")]
profile_scope!("equation_system_build");
cfg.pr
.iter()
.enumerate()
.fold(EquationSystem::default(), |es, (i, pr)| {
let args = UpdateProductionEquationsArgs {
prod_num: i,
pr,
first_k_of_nt: Rc::clone(&first_k_of_nt),
ti: Rc::clone(&ti),
nti: Rc::clone(&nti),
k,
max_terminal_index,
};
update_production_equations(es, args)
})
};
trace!(
"FOLLOW({}): {} equations in equation system",
k,
equation_system.len()
);
let step_function: StepFunction = {
let non_terminal_positions = Rc::clone(&non_terminal_positions);
Box::new(
move |result_map: Rc<ResultMap>, non_terminal_results: Rc<RefCell<FollowSet>>| {
let mut new_result_vector = ResultMap::with_capacity_and_hasher(
result_map.len(),
rustc_hash::FxBuildHasher,
);
for (pos, _) in result_map.iter() {
let pos_result = equation_system[pos](
Rc::clone(&result_map),
Rc::clone(&non_terminal_results),
);
if let Some(&sym) = non_terminal_positions.get(pos) {
let mut borrowed = non_terminal_results.borrow_mut();
if let Some(set) = borrowed.non_terminals.get_mut(sym) {
let (new_set, _changed) = set.union(&pos_result);
*set = new_set;
}
}
new_result_vector.insert(*pos, pos_result);
}
new_result_vector
},
)
};
let non_terminal_results = Rc::new(RefCell::new(FollowSet::new(
cfg.get_non_terminal_set()
.iter()
.fold(Vec::new(), |mut acc, nt| {
if nt == start_symbol {
acc.push(
DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.end()
.unwrap(),
);
} else {
acc.push(
DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.build()
.unwrap(),
);
}
acc
}),
)));
let mut result_map = if k == 0 {
let mut initial_map = ResultMap::with_capacity_and_hasher(
non_terminal_positions.len(),
rustc_hash::FxBuildHasher,
);
for (p, _) in non_terminal_positions.iter() {
initial_map.insert(
*p,
DomainTypeBuilder::new()
.k(k)
.max_terminal_index(max_terminal_index)
.build()
.unwrap(),
);
}
Rc::new(initial_map)
} else {
let cache_ref = follow_cache.get(k - 1, grammar_config, first_cache);
let borrowed_cache = cache_ref.borrow();
let mut cached = ResultMap::with_capacity_and_hasher(
borrowed_cache.last_result.len(),
rustc_hash::FxBuildHasher,
);
for (p, t) in borrowed_cache.last_result.iter() {
cached.insert(*p, t.clone().set_k(k));
}
drop(borrowed_cache); Rc::new(cached)
};
let mut iterations = 0usize;
let mut new_result_vector;
loop {
#[cfg(feature = "profiling")]
profile_scope!("iteration_step");
new_result_vector = step_function(Rc::clone(&result_map), Rc::clone(&non_terminal_results));
if new_result_vector == *result_map {
break;
}
result_map = Rc::new(new_result_vector);
iterations += 1;
trace!("Iteration number {iterations} completed");
}
#[cfg(feature = "profiling")]
profiling::output_profiling_data();
(
new_result_vector,
Rc::try_unwrap(non_terminal_results).unwrap().into_inner(),
)
}
struct UpdateProductionEquationsArgs<'a, T, N> {
prod_num: usize,
pr: &'a Pr,
first_k_of_nt: Rc<RefCell<FirstSet>>,
ti: Rc<T>,
nti: Rc<N>,
k: usize,
max_terminal_index: usize,
}
fn update_production_equations<T, N>(
mut es: EquationSystem,
args: UpdateProductionEquationsArgs<T, N>,
) -> EquationSystem
where
T: TerminalIndexFn + 'static,
N: NonTerminalIndexFn,
{
let pr_symbols = args.pr.get_r();
let mut parts = Vec::<(usize, SymbolString)>::with_capacity(pr_symbols.len());
for (i, s) in pr_symbols.iter().enumerate() {
match s {
Symbol::N(..) => parts.push((i + 1, SymbolString(vec![s.clone()]))),
Symbol::T(_) => {
if parts.is_empty() {
parts.push((i + 1, SymbolString(vec![s.clone()])));
} else {
let last_idx = parts.len() - 1;
let last_symbols = &parts[last_idx].1.0;
if !last_symbols.is_empty() && matches!(last_symbols.last(), Some(Symbol::T(_)))
{
parts[last_idx].1.0.push(s.clone());
} else {
parts.push((i + 1, SymbolString(vec![s.clone()])));
}
}
}
_ => {
unreachable!(
"Scanner switching directives have been removed from the grammar syntax."
);
}
}
}
for (part_index, (symbol_index, symbol_string)) in parts.iter().enumerate() {
if let Symbol::N(..) = &symbol_string.0[0] {
let mut result_function: TransferFunction = Box::new(move |_, _| {
DomainTypeBuilder::new()
.k(args.k)
.max_terminal_index(args.max_terminal_index)
.eps()
.unwrap()
});
for (_, symbol_string) in parts.iter().skip(part_index + 1) {
let symbol = &symbol_string.0[0]; match symbol {
Symbol::T(_) => {
let terminal_indices: Vec<TerminalIndex> = symbol_string
.0
.iter()
.map(|s| CompiledTerminal::create(s, Rc::clone(&args.ti)).0)
.collect();
let domain_type = DomainTypeBuilder::new()
.k(args.k)
.max_terminal_index(args.max_terminal_index)
.terminal_indices(&[&terminal_indices])
.build()
.unwrap();
result_function =
Box::new(move |result_map: Rc<ResultMap>, non_terminal_results| {
result_function(result_map, non_terminal_results)
.k_concat(&domain_type, args.k)
});
}
Symbol::N(nt, _, _, _) => {
let first_k_of_nt = Rc::clone(&args.first_k_of_nt);
let nt_i = args.nti.non_terminal_index(nt);
result_function =
Box::new(move |result_map: Rc<ResultMap>, non_terminal_results| {
let borrowed_first_of_nt = first_k_of_nt.borrow();
let first_of_nt = borrowed_first_of_nt
.non_terminals
.get(nt_i)
.expect("Non-terminal index should be valid");
result_function(result_map, non_terminal_results)
.k_concat(first_of_nt, args.k)
});
}
_ => {
unreachable!(
"Scanner switching directives have been removed from the grammar syntax."
);
}
}
}
let nt = args.nti.non_terminal_index(args.pr.get_n_str());
es.insert(
(args.prod_num, *symbol_index).into(),
Box::new(
move |result_map, non_terminal_results: Rc<RefCell<FollowSet>>| {
let intermediate_result =
result_function(result_map, Rc::clone(&non_terminal_results));
let borrowed_nt_results = non_terminal_results.borrow();
let nt_follow_set = borrowed_nt_results
.non_terminals
.get(nt)
.expect("Non-terminal index should be valid");
intermediate_result.k_concat(nt_follow_set, args.k)
},
),
);
}
}
es
}
#[cfg(feature = "profiling")]
mod profiling {
use rustc_hash::FxHashMap;
use std::cell::RefCell;
use std::time::{Duration, Instant};
thread_local! {
static PROFILE_DATA: RefCell<FxHashMap<&'static str, (u64, Duration)>> =
RefCell::new(FxHashMap::default());
}
pub struct ProfileScope {
name: &'static str,
start: Instant,
}
impl ProfileScope {
pub fn new(name: &'static str) -> Self {
Self {
name,
start: Instant::now(),
}
}
}
impl Drop for ProfileScope {
fn drop(&mut self) {
let duration = self.start.elapsed();
PROFILE_DATA.with(|data| {
let mut map = data.borrow_mut();
let entry = map.entry(self.name).or_insert((0, Duration::ZERO));
entry.0 += 1;
entry.1 += duration;
});
}
}
pub fn output_profiling_data() {
use std::env;
use std::fs::File;
use std::io::{BufWriter, Write};
let file_path = match env::current_dir() {
Ok(mut path) => {
path.push("profiling_data.txt");
path
}
Err(_) => std::path::PathBuf::from("profiling_data.txt"),
};
let file = match File::create(&file_path) {
Ok(f) => f,
Err(e) => {
eprintln!("Failed to create profiling data file: {}", e);
return;
}
};
let mut writer = BufWriter::new(file);
PROFILE_DATA.with(|data| {
let map = data.borrow();
for (name, (count, duration)) in map.iter() {
let _ = writeln!(writer, "{}: {} calls, {:?} total", name, count, duration);
}
});
}
}