use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use crate::grammar::{AstNodeType, Grammar, GrammarItem, GrammarRuleId, HasTokenType, TokenType};
use crate::lr::{
InternalToken, LalrItem, LalrKernel, Lr0Closure, Lr0Item, Lr0Kernel, Lr1Closure, Lr1Item,
Lr1Kernel,
};
use crate::parser::{Action, Parser};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Ambiguity {
pub state: usize,
pub token: InternalToken,
pub kind: AmbiguityKind,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AmbiguityKind {
ReduceReduce {
rules: Vec<GrammarRuleId>,
chosen: GrammarRuleId,
},
ShiftReduce {
shift_items: Vec<Lr0Item>,
reduce_rules: Vec<GrammarRuleId>,
},
}
#[must_use]
pub fn generate_parser<T: HasTokenType, A>(
grammar: Grammar<T, A>,
) -> (Parser<T, A>, Vec<Ambiguity>) {
let generators = build_generators(&grammar);
let first_sets = build_first_sets(&grammar, &generators);
let lr0_item_closures = build_lr0_item_closures(&grammar, &generators);
let lr0_states = build_lr0_states(&grammar, &lr0_item_closures);
let lalr_states = build_lalr_states(
&lr0_states,
&grammar,
&generators,
&first_sets,
&lr0_item_closures,
);
let (goto_table, action_table, ambiguities) = build_parse_tables(
&lalr_states,
&grammar,
&generators,
&first_sets,
&lr0_states,
);
(
Parser {
grammar,
goto_table,
action_table,
},
ambiguities,
)
}
fn build_parse_tables<T: HasTokenType, A>(
lalr_states: &[LalrKernel],
grammar: &Grammar<T, A>,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
first_sets: &HashMap<AstNodeType, HashSet<TokenType>>,
lr0_states: &BTreeMap<Lr0Kernel, usize>,
) -> ParseTables {
let mut goto_table: Vec<HashMap<AstNodeType, usize>> =
vec![HashMap::default(); lalr_states.len()];
let mut action_table: Vec<HashMap<InternalToken, Action>> =
vec![HashMap::default(); lalr_states.len()];
let mut ambiguities = Vec::new();
for (state_num, lalr_kernel) in lalr_states.iter().enumerate() {
let (shifts, reductions, goto) =
compute_state_actions(lalr_kernel, grammar, generators, first_sets);
populate_goto_table(&goto, &mut goto_table, state_num, lr0_states);
populate_action_table(
&shifts,
&reductions,
&mut action_table,
&mut ambiguities,
state_num,
lr0_states,
);
}
(goto_table, action_table, ambiguities)
}
fn compute_state_actions<T: HasTokenType, A>(
lalr_kernel: &LalrKernel,
grammar: &Grammar<T, A>,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
first_sets: &HashMap<AstNodeType, HashSet<TokenType>>,
) -> StateActions {
let mut shifts: HashMap<InternalToken, Lr1Kernel> = HashMap::new();
let mut reductions: HashMap<InternalToken, Vec<GrammarRuleId>> = HashMap::new();
let mut goto: HashMap<AstNodeType, Lr1Kernel> = HashMap::new();
let lr1_closure = compute_lalr_closure(lalr_kernel, grammar, generators, first_sets);
for Lr1Item {
rule,
index,
lookahead,
} in lr1_closure
{
let components = &grammar.get_rule(rule).components;
match components.get(index) {
Some(GrammarItem::Token(token_type)) => {
shifts
.entry(InternalToken::User(*token_type))
.or_default()
.insert(Lr1Item::new(rule, index + 1, lookahead));
}
Some(GrammarItem::AstNode(node_type)) => {
goto.entry(*node_type).or_default().insert(Lr1Item::new(
rule,
index + 1,
lookahead,
));
}
None => {
reductions.entry(lookahead).or_default().push(rule);
}
}
}
(shifts, reductions, goto)
}
fn compute_lalr_closure<T: HasTokenType, A>(
kernel: &LalrKernel,
grammar: &Grammar<T, A>,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
first_sets: &HashMap<AstNodeType, HashSet<TokenType>>,
) -> Lr1Closure {
let mut closure = Lr1Closure::default();
for LalrItem {
rule,
index,
lookaheads,
} in kernel
{
for lookahead in lookaheads {
closure.extend(
Lr1Item::new(*rule, *index, *lookahead).closure(grammar, generators, first_sets),
);
}
}
closure
}
fn lr1_to_state_num(kernel: &Lr1Kernel, lr0_states: &BTreeMap<Lr0Kernel, usize>) -> usize {
let Some(state_num) = lr0_states
.get(&Lr0Kernel(
kernel
.iter()
.map(
|&Lr1Item {
rule,
index,
lookahead: _,
}| Lr0Item::new(rule, index),
)
.collect(),
))
.copied()
else {
unreachable!("ERROR: Can't find state number for nonexistent state: {kernel:?}");
};
state_num
}
fn populate_goto_table(
goto: &HashMap<AstNodeType, Lr1Kernel>,
goto_table: &mut [HashMap<AstNodeType, usize>],
state_num: usize,
lr0_states: &BTreeMap<Lr0Kernel, usize>,
) {
for (node_type, lr1_items) in goto {
goto_table[state_num].insert(*node_type, lr1_to_state_num(lr1_items, lr0_states));
}
}
fn populate_action_table(
shifts: &HashMap<InternalToken, Lr1Kernel>,
reductions: &HashMap<InternalToken, Vec<GrammarRuleId>>,
action_table: &mut [HashMap<InternalToken, Action>],
ambiguities: &mut Vec<Ambiguity>,
state_num: usize,
lr0_states: &BTreeMap<Lr0Kernel, usize>,
) {
for (&internal_token, lr1_items) in shifts {
if matches!(internal_token, InternalToken::User(_)) {
action_table[state_num].insert(
internal_token,
Action::Shift(lr1_to_state_num(lr1_items, lr0_states)),
);
}
}
for (internal_token, rules) in reductions {
if *internal_token == InternalToken::Special {
continue;
}
if rules.len() > 1 {
ambiguities.push(Ambiguity {
state: state_num,
token: *internal_token,
kind: AmbiguityKind::ReduceReduce {
rules: rules.clone(),
chosen: rules[0],
},
});
}
if let Some(lr1_items) = shifts.get(internal_token) {
let shift_items = lr1_items
.iter()
.map(
|&Lr1Item {
rule,
index,
lookahead: _,
}| Lr0Item::new(rule, index),
)
.collect::<Vec<_>>();
ambiguities.push(Ambiguity {
state: state_num,
token: *internal_token,
kind: AmbiguityKind::ShiftReduce {
shift_items,
reduce_rules: rules.clone(),
},
});
}
if rules[0] == GrammarRuleId(0) {
action_table[state_num].insert(*internal_token, Action::Accept);
} else {
action_table[state_num].insert(*internal_token, Action::Reduce(rules[0]));
}
}
}
fn build_generators<T: HasTokenType, A>(
grammar: &Grammar<T, A>,
) -> HashMap<AstNodeType, Vec<GrammarRuleId>> {
let mut generators: HashMap<_, Vec<_>> = HashMap::new();
for (id, rule) in grammar.rules.iter().enumerate().skip(1) {
generators
.entry(rule.result)
.or_default()
.push(GrammarRuleId(id));
}
generators
}
fn build_first_sets<T: HasTokenType, A>(
grammar: &Grammar<T, A>,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
) -> HashMap<AstNodeType, HashSet<TokenType>> {
fn get_first<T: HasTokenType, A>(
node_type: AstNodeType,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
grammar: &Grammar<T, A>,
) -> HashSet<TokenType> {
let mut visited = HashSet::new();
visited.insert(node_type);
let mut first_set = HashSet::new();
let mut queue = vec![node_type];
while let Some(node_type) = queue.pop() {
if let Some(rules) = generators.get(&node_type) {
for &rule_id in rules {
match grammar.get_rule(rule_id).components[0] {
GrammarItem::Token(token_type) => {
first_set.insert(token_type);
}
GrammarItem::AstNode(node_type) => {
if visited.insert(node_type) {
queue.push(node_type);
}
}
}
}
}
}
first_set
}
let mut first_sets: HashMap<_, HashSet<_>> = HashMap::new();
for result in grammar.rules.iter().map(|r| r.result) {
first_sets
.entry(result)
.or_default()
.extend(get_first(result, generators, grammar));
}
first_sets
}
fn build_lr0_item_closures<T: HasTokenType, A>(
grammar: &Grammar<T, A>,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
) -> BTreeMap<Lr0Item, Lr0Closure> {
fn calc_closure<T: HasTokenType, A>(
item: Lr0Item,
lr0_item_closures: &BTreeMap<Lr0Item, Lr0Closure>,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
grammar: &Grammar<T, A>,
) -> Lr0Closure {
let mut item_closure = BTreeSet::new();
item_closure.insert(item.clone());
let mut item_queue = vec![item];
while let Some(item) = item_queue.pop() {
if let Some(c) = lr0_item_closures.get(&item) {
item_closure.extend(c.iter().cloned());
continue;
}
if let Some(GrammarItem::AstNode(node_type)) =
grammar.get_rule(item.rule).components.get(item.index)
{
for &rule in generators.get(node_type).into_iter().flatten() {
let new_item = Lr0Item { rule, index: 0 };
if item_closure.insert(new_item.clone()) {
item_queue.push(new_item);
}
}
}
}
Lr0Closure(item_closure)
}
let mut lr0_item_closures: BTreeMap<_, Lr0Closure> = BTreeMap::new();
for (rule_ind, rule) in grammar.rules.iter().enumerate() {
for index in 0..=rule.components.len() {
let item = Lr0Item {
rule: GrammarRuleId(rule_ind),
index,
};
lr0_item_closures.insert(
item.clone(),
calc_closure(item, &lr0_item_closures, generators, grammar),
);
}
}
lr0_item_closures
}
fn lr0_set_closure(
kernel: &Lr0Kernel,
lr0_item_closures: &BTreeMap<Lr0Item, Lr0Closure>,
) -> Lr0Closure {
let mut closure = Lr0Closure(kernel.0.clone());
for lr0_item in kernel {
closure.extend(
lr0_item_closures
.get(lr0_item)
.cloned()
.into_iter()
.flatten(),
);
}
closure
}
fn build_lr0_states<T: HasTokenType, A>(
grammar: &Grammar<T, A>,
lr0_item_closures: &BTreeMap<Lr0Item, Lr0Closure>,
) -> BTreeMap<Lr0Kernel, usize> {
let initial_state = Lr0Kernel(BTreeSet::from([Lr0Item::new(GrammarRuleId(0), 0)]));
let mut lr0_states = BTreeMap::from([(initial_state.clone(), 0)]);
let mut queue = vec![initial_state];
while let Some(kernel) = queue.pop() {
let mut next_kernels: BTreeMap<GrammarItem, Lr0Kernel> = BTreeMap::new();
for Lr0Item { rule, index } in lr0_set_closure(&kernel, lr0_item_closures) {
let components = &grammar.get_rule(rule).components;
if index < components.len() {
next_kernels
.entry(components[index])
.or_default()
.insert(Lr0Item::new(rule, index + 1));
}
}
for kernel in next_kernels.into_values() {
if !lr0_states.contains_key(&kernel) {
lr0_states.insert(kernel.clone(), lr0_states.len());
queue.push(kernel);
}
}
}
lr0_states
}
struct Lr1ItemClosures<'a, T: HasTokenType, A> {
map: BTreeMap<Lr1Item, Lr1Closure>,
grammar: &'a Grammar<T, A>,
generators: &'a HashMap<AstNodeType, Vec<GrammarRuleId>>,
first_sets: &'a HashMap<AstNodeType, HashSet<TokenType>>,
}
impl<T: HasTokenType, A> Lr1ItemClosures<'_, T, A> {
fn get(&mut self, lr1_item: Lr1Item) -> &Lr1Closure {
self.map
.entry(lr1_item)
.or_insert_with_key(|item| item.closure(self.grammar, self.generators, self.first_sets))
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct Propagation {
from_state: Lr0Kernel,
from_item: Lr0Item,
to_state: Lr0Kernel,
to_item: Lr0Item,
}
type LookaheadMap = BTreeMap<Lr0Kernel, BTreeMap<Lr0Item, BTreeSet<InternalToken>>>;
type GotoTable = Vec<HashMap<AstNodeType, usize>>;
type ActionTable = Vec<HashMap<InternalToken, Action>>;
type ParseTables = (GotoTable, ActionTable, Vec<Ambiguity>);
type StateActions = (
HashMap<InternalToken, Lr1Kernel>,
HashMap<InternalToken, Vec<GrammarRuleId>>,
HashMap<AstNodeType, Lr1Kernel>,
);
fn build_lalr_states<T: HasTokenType, A>(
lr0_states: &BTreeMap<Lr0Kernel, usize>,
grammar: &Grammar<T, A>,
generators: &HashMap<AstNodeType, Vec<GrammarRuleId>>,
first_sets: &HashMap<AstNodeType, HashSet<TokenType>>,
lr0_item_closures: &BTreeMap<Lr0Item, Lr0Closure>,
) -> Vec<LalrKernel> {
let mut lr1_item_closures = Lr1ItemClosures {
map: BTreeMap::new(),
grammar,
generators,
first_sets,
};
let mut lookaheads = initialize_goal_lookaheads(grammar);
let propagations = build_propagations(
lr0_states,
grammar,
&mut lr1_item_closures,
lr0_item_closures,
&mut lookaheads,
);
propagate_lookaheads(&propagations, &mut lookaheads);
convert_to_lalr_kernels(lr0_states, &lookaheads)
}
fn initialize_goal_lookaheads<T: HasTokenType, A>(grammar: &Grammar<T, A>) -> LookaheadMap {
let mut lookaheads = BTreeMap::new();
for index in 0..=grammar.rules[0].components.len() {
let goal_item = Lr0Item::new(GrammarRuleId(0), index);
lookaheads.insert(
Lr0Kernel(BTreeSet::from([goal_item.clone()])),
BTreeMap::from([(goal_item, BTreeSet::from([InternalToken::Eof]))]),
);
}
lookaheads
}
fn build_propagations<T: HasTokenType, A>(
lr0_states: &BTreeMap<Lr0Kernel, usize>,
grammar: &Grammar<T, A>,
lr1_item_closures: &mut Lr1ItemClosures<'_, T, A>,
lr0_item_closures: &BTreeMap<Lr0Item, Lr0Closure>,
lookaheads: &mut LookaheadMap,
) -> BTreeSet<Propagation> {
let mut propagations = BTreeSet::new();
for kernel in lr0_states.keys() {
let lr0_state = lr0_set_closure(kernel, lr0_item_closures);
for item in kernel {
let components = &grammar.get_rule(item.rule).components;
if item.index == components.len() {
continue;
}
for &Lr1Item {
rule,
index,
lookahead,
} in
lr1_item_closures.get(Lr1Item::new(item.rule, item.index, InternalToken::Special))
{
let components = &grammar.get_rule(rule).components;
if index == components.len() {
continue;
}
let next_kernel = lr0_state.goto(grammar, &components[index]);
let next_item = Lr0Item::new(rule, index + 1);
if lookahead == InternalToken::Special {
propagations.insert(Propagation {
from_state: kernel.clone(),
from_item: item.clone(),
to_state: next_kernel,
to_item: next_item,
});
} else {
lookaheads
.entry(next_kernel)
.or_default()
.entry(next_item)
.or_default()
.insert(lookahead);
}
}
}
}
propagations
}
fn propagate_lookaheads(propagations: &BTreeSet<Propagation>, lookaheads: &mut LookaheadMap) {
'outer: loop {
for Propagation {
from_state,
from_item,
to_state,
to_item,
} in propagations
{
let from_lookaheads = lookaheads
.get(from_state)
.and_then(|l| l.get(from_item))
.cloned();
let to_lookaheads = lookaheads
.entry(to_state.clone())
.or_default()
.entry(to_item.clone())
.or_default();
if let Some(from_set) = from_lookaheads
&& from_set.into_iter().fold(false, |changed, lookahead| {
to_lookaheads.insert(lookahead) || changed
})
{
continue 'outer;
}
}
break;
}
}
fn convert_to_lalr_kernels(
lr0_states: &BTreeMap<Lr0Kernel, usize>,
lookaheads: &LookaheadMap,
) -> Vec<LalrKernel> {
let mut states = vec![LalrKernel::default(); lr0_states.len()];
for (kernel, &state) in lr0_states {
let kernel_lookaheads = lookaheads.get(kernel).cloned().unwrap_or_default();
states[state] = LalrKernel(
kernel
.into_iter()
.map(|item| {
LalrItem::new(
item.rule,
item.index,
kernel_lookaheads.get(item).cloned().unwrap_or_default(),
)
})
.collect(),
);
}
states
}