use std::{
collections::{BTreeMap, BTreeSet},
fmt::Display,
};
use crate::{
grammar::cfg::{NonTerminalIndexFn, TerminalIndexFn},
render_par_string, Cfg, GrammarAnalysisError, GrammarConfig, Pr, Terminal,
};
use anyhow::{anyhow, Result};
use lalry::{Config, LR1ResolvedConflict, LRConflictResolution};
use parol_runtime::{
lexer::{BLOCK_COMMENT, EOI, FIRST_USER_TOKEN, LINE_COMMENT, NEW_LINE, WHITESPACE},
log::trace,
NonTerminalIndex, ProductionIndex, TerminalIndex,
};
type LR1ParseTableLalr<'a> =
lalry::LR1ParseTable<'a, TerminalIndex, NonTerminalIndex, ProductionIndex>;
type LR1StateLalr<'a> = lalry::LR1State<'a, TerminalIndex, NonTerminalIndex, ProductionIndex>;
type LRActionLalr<'a> = lalry::LRAction<'a, TerminalIndex, NonTerminalIndex, ProductionIndex>;
type LR1ConflictLalr<'a> = lalry::LR1Conflict<'a, TerminalIndex, NonTerminalIndex, ProductionIndex>;
type ItemLalr<'a> = lalry::Item<'a, TerminalIndex, NonTerminalIndex, ProductionIndex>;
type ItemSetLalr<'a> = lalry::ItemSet<'a, TerminalIndex, NonTerminalIndex, ProductionIndex>;
type RhsLalr = lalry::Rhs<TerminalIndex, NonTerminalIndex, ProductionIndex>;
type GrammarLalr = lalry::Grammar<TerminalIndex, NonTerminalIndex, ProductionIndex>;
impl From<&Cfg> for GrammarLalr {
fn from(cfg: &Cfg) -> Self {
let ti = cfg.get_terminal_index_function();
let nti = cfg.get_non_terminal_index_function();
let mut grammar = GrammarLalr {
rules: BTreeMap::new(),
start: nti.non_terminal_index(&cfg.st),
};
for (i, Pr(s, rhs, _)) in cfg.pr.iter().enumerate() {
let lhs = nti.non_terminal_index(s.get_n_ref().unwrap());
let rhs = RhsLalr {
syms: rhs
.iter()
.map(|s| match s {
crate::Symbol::N(n, _, _) => {
lalry::Symbol::Nonterminal(nti.non_terminal_index(n))
}
crate::Symbol::T(Terminal::Trm(s, k, _, _, _)) => {
lalry::Symbol::Terminal(ti.terminal_index(s, *k))
}
_ => unreachable!(),
})
.collect(),
act: i,
};
trace!("LALR(1) rule: {} -> {:?}", lhs, rhs);
grammar.rules.entry(lhs).or_default().push(rhs);
}
grammar
}
}
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
pub struct Item {
pub prod: ProductionIndex,
pub pos: usize,
}
impl From<ItemLalr<'_>> for Item {
fn from(item: ItemLalr) -> Self {
Item {
prod: item.rhs.act,
pos: item.pos,
}
}
}
#[derive(Debug)]
pub struct ItemSet {
pub items: BTreeSet<Item>,
}
impl From<ItemSetLalr<'_>> for ItemSet {
fn from(item_set: ItemSetLalr) -> Self {
let items = item_set.items.into_iter().map(|item| item.into()).collect();
ItemSet { items }
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub enum LRAction {
Shift(usize),
Reduce(NonTerminalIndex, ProductionIndex),
Accept,
}
impl From<LRActionLalr<'_>> for LRAction {
fn from(action: LRActionLalr<'_>) -> Self {
match action {
lalry::LRAction::Shift(s) => LRAction::Shift(s),
lalry::LRAction::Reduce(p, r) => LRAction::Reduce(*p, r.act),
lalry::LRAction::Accept => LRAction::Accept,
}
}
}
#[derive(Debug)]
pub struct LR1State {
pub actions: BTreeMap<TerminalIndex, LRAction>,
pub gotos: BTreeMap<NonTerminalIndex, usize>,
}
impl From<LR1StateLalr<'_>> for LR1State {
fn from(state: LR1StateLalr) -> Self {
let mut actions = BTreeMap::new();
if let Some(action) = state.eof {
actions.insert(EOI, action.into());
};
for (terminal, action) in state.lookahead {
actions.insert(*terminal, action.into());
}
let gotos = state.goto.into_iter().map(|(n, s)| (*n, s)).collect();
LR1State { actions, gotos }
}
}
#[derive(Debug)]
pub enum LRConflict {
ReduceReduce {
state: ItemSet,
token: TerminalIndex,
r1: ProductionIndex,
r2: ProductionIndex,
},
ShiftReduce {
state: ItemSet,
token: TerminalIndex,
rule: ProductionIndex,
},
}
impl From<LR1ConflictLalr<'_>> for LRConflict {
fn from(conflict: LR1ConflictLalr) -> Self {
match conflict {
LR1ConflictLalr::ReduceReduce {
state,
token,
r1,
r2,
} => LRConflict::ReduceReduce {
state: state.into(),
token: *token.unwrap_or(&EOI),
r1: r1.1.act,
r2: r2.1.act,
},
LR1ConflictLalr::ShiftReduce { state, token, rule } => LRConflict::ShiftReduce {
state: state.into(),
token: *token.unwrap_or(&EOI),
rule: rule.1.act,
},
}
}
}
impl Display for LRConflict {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LRConflict::ReduceReduce {
state,
token,
r1,
r2,
} => {
writeln!(
f,
"Reduce-reduce conflict in state {:?} on token {:?}",
state, token
)?;
write!(
f,
"Decission between reducing with production {} or {} on token {}",
r1, r2, token
)
}
LRConflict::ShiftReduce { state, token, rule } => {
writeln!(
f,
"Shift-reduce conflict in state {:?} on token {:?}",
state, token
)?;
write!(
f,
"Decission between shifting the token {} or reducing with production {}",
token, rule
)
}
}
}
}
#[derive(Debug)]
pub struct LRConflictError {
pub conflict: LRConflict,
cfg: Option<Cfg>,
}
impl LRConflictError {
pub fn new(conflict: LRConflict, cfg: Option<Cfg>) -> Self {
LRConflictError { conflict, cfg }
}
pub fn set_cfg(&mut self, cfg: Cfg) {
self.cfg = Some(cfg);
}
}
impl From<LRConflict> for LRConflictError {
fn from(conflict: LRConflict) -> Self {
LRConflictError::new(conflict, None)
}
}
impl Display for LRConflictError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let tr: Box<dyn Fn(TerminalIndex) -> String> = if let Some(cfg) = &self.cfg {
let terminals = cfg
.get_ordered_terminals()
.iter()
.map(|(t, _, _)| t.to_string())
.collect::<Vec<_>>();
Box::new(move |ti: TerminalIndex| {
if ti >= FIRST_USER_TOKEN {
terminals[(ti - FIRST_USER_TOKEN) as usize].clone()
} else {
match ti {
EOI => "<$>".to_owned(),
NEW_LINE => "<NL>".to_owned(),
WHITESPACE => "<WS>".to_owned(),
LINE_COMMENT => "<LC>".to_owned(),
BLOCK_COMMENT => "<BC>".to_owned(),
_ => unreachable!(),
}
}
}) as Box<dyn Fn(TerminalIndex) -> String>
} else {
Box::new(|i: TerminalIndex| i.to_string()) as Box<dyn Fn(TerminalIndex) -> String>
};
match &self.conflict {
LRConflict::ReduceReduce {
state,
token,
r1,
r2,
} => {
writeln!(
f,
"Reduce-reduce conflict in state {:?} on token {}",
state,
tr(*token)
)?;
if let Some(cfg) = &self.cfg {
writeln!(
f,
"Can't decide which of the following two productions to reduce with:",
)?;
writeln!(f, " Production {}: {}", r1, cfg.pr[*r1])?;
writeln!(f, " Production {}: {}", r2, cfg.pr[*r2])?;
}
Ok(())
}
LRConflict::ShiftReduce { state, token, rule } => {
if let Some(cfg) = &self.cfg {
writeln!(f, "Shift-reduce conflict in state")?;
state.items.iter().for_each(|item| {
let Pr(lhs, rhs, _) = &cfg.pr[item.prod];
let mut r = rhs
.iter()
.enumerate()
.map(|(i, s)| {
if i == item.pos {
format!("•{}", s)
} else {
format!("{}", s)
}
})
.collect::<Vec<_>>()
.join(" ");
if item.pos == rhs.len() {
r.push('•');
}
writeln!(f, " {},{}: {} -> {}", item.prod, item.pos, lhs, r).unwrap();
});
writeln!(
f,
"Can't decide between shifting the token or reducing with the production:",
)?;
writeln!(f, " Token {}: {}", token, tr(*token))?;
writeln!(f, " Production {}: {}", rule, cfg.pr[*rule])?;
} else {
writeln!(
f,
"Shift-reduce conflict in state {:?} on token {:?}",
state, token
)?;
}
Ok(())
}
}
}
}
#[derive(Debug)]
pub struct LRResolvedConflict {
pub conflict: LRConflict,
pub applied_resolution: LRConflictResolution,
}
impl From<LR1ResolvedConflict<'_, TerminalIndex, NonTerminalIndex, ProductionIndex>>
for LRResolvedConflict
{
fn from(
conflict: LR1ResolvedConflict<'_, TerminalIndex, NonTerminalIndex, ProductionIndex>,
) -> Self {
LRResolvedConflict {
conflict: conflict.conflict.into(),
applied_resolution: conflict.applied_resolution,
}
}
}
impl Display for LRResolvedConflict {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(
f,
"{} resolved by {:?}",
self.conflict, self.applied_resolution
)
}
}
#[derive(Debug)]
pub struct LRParseTable {
pub states: Vec<LR1State>,
}
impl From<LR1ParseTableLalr<'_>> for LRParseTable {
fn from(parse_table: LR1ParseTableLalr) -> Self {
let mut states = Vec::new();
for state in parse_table.states.into_iter() {
let state = state.into();
states.push(state);
}
LRParseTable { states }
}
}
struct LALRConfig {
calls: std::cell::RefCell<Vec<LRResolvedConflict>>,
}
impl LALRConfig {
fn new() -> Self {
LALRConfig {
calls: std::cell::RefCell::new(vec![]),
}
}
}
impl<'a> Config<'a, TerminalIndex, NonTerminalIndex, ProductionIndex> for LALRConfig {
fn resolve_shift_reduce_conflict_in_favor_of_shift(&self) -> bool {
true
}
fn warn_on_resolved_conflicts(&self) -> bool {
true
}
fn on_resolved_conflict(
&self,
conflict: LR1ResolvedConflict<'a, TerminalIndex, NonTerminalIndex, ProductionIndex>,
) {
let conflict: LRResolvedConflict = conflict.into();
println!("{}", conflict);
self.calls.borrow_mut().push(conflict);
}
fn priority_of(
&self,
rhs: &lalry::Rhs<TerminalIndex, NonTerminalIndex, ProductionIndex>,
_lookahead: Option<&TerminalIndex>,
) -> i32 {
-(rhs.act as i32)
}
}
pub fn calculate_lalr1_parse_table(
grammar_config: &GrammarConfig,
) -> Result<(LRParseTable, Vec<LRResolvedConflict>)> {
trace!("CFG: \n{}", render_par_string(grammar_config, true)?);
let cfg = &grammar_config.cfg;
let grammar = GrammarLalr::from(cfg);
trace!("{:#?}", grammar);
let config = LALRConfig::new();
let parse_table = grammar.lalr1(&config).map_err(|e| {
let conflict: LRConflict = e.into();
let mut conflict: LRConflictError = conflict.into();
conflict.set_cfg(cfg.clone());
anyhow!(GrammarAnalysisError::LALR1ParseTableConstructionFailed { conflict })
})?;
trace!("LALR(1) parse table: {:#?}", parse_table);
let parse_table = LRParseTable::from(parse_table);
trace!("Converted LALR(1) parse table: {:#?}", parse_table);
Ok((parse_table, config.calls.into_inner()))
}