use std::fmt;
use std::ops::{Index, IndexMut};
use std::collections::{HashSet, VecDeque};
use indexmap::{IndexMap, IndexSet};
use Pretty;
use grammar::{Grammar, RuleId, Symbol, TerminalId};
use item_set::{Action, ItemSetId, ItemSets};
#[derive(Debug)]
pub struct GlrAnalysis {}
impl GlrAnalysis {
pub fn compute(grammar: &Grammar, item_sets: &ItemSets) -> GlrAnalysis {
let conflicts = find_conflicts(item_sets);
for conflict in conflicts {
let arc = find_conflict_arc(&conflict, grammar, item_sets);
trace!("{:#?}", arc);
let reconvs = find_reconvergences(&arc);
trace!("reconvergences {:#?}", reconvs);
}
GlrAnalysis {}
}
}
pub fn find_conflicts(item_sets: &ItemSets) -> Vec<Conflict> {
let mut conflicts = Vec::new();
for is in item_sets.all().iter() {
if is.items().len() < 2 {
continue;
}
let mut table: IndexMap<Symbol, IndexSet<Action>> = IndexMap::new();
for &(symbol, action) in is.actions() {
table
.entry(symbol)
.or_insert_with(|| IndexSet::new())
.insert(action);
}
conflicts.extend(table.into_iter().filter_map(|(symbol, actions)| {
if actions.len() > 1 {
Some(Conflict {
item_set: is.id(),
symbol: symbol,
actions: actions.into_iter().collect(),
})
} else {
None
}
}));
}
conflicts
}
#[derive(Debug)]
pub struct Conflict {
pub item_set: ItemSetId,
pub symbol: Symbol,
pub actions: Vec<Action>,
}
pub fn find_conflict_arc(
conflict: &Conflict,
grammar: &Grammar,
item_sets: &ItemSets,
) -> ConflictArc {
debug!("analyzing {:#?}", conflict);
let mut arc = ConflictArc {
origin: conflict.item_set,
nodes: vec![
ConflictNode {
rank: 0,
lanes: vec![
ConflictLane {
parents: vec![],
ancestors: vec![],
seq: vec![conflict.item_set],
states: vec![conflict.item_set],
},
],
shifts: vec![],
resolved: false,
reconverged: false,
},
],
ranks: vec![vec![ConflictNodeId(0)]],
};
advance_node_to_shift(&mut arc, ConflictNodeId(0), grammar, item_sets);
trace!("{:#?}", arc);
for i in 0.. {
if i > 1000 {
panic!("conflict arc analysis did not converge after 1000 iterations");
}
debug!("step {}", i);
let any_spawned = spawn_next_rank(&mut arc, grammar, item_sets);
if !any_spawned {
break;
}
for node_id in arc.ranks.last().unwrap().clone() {
advance_node_to_shift(&mut arc, node_id, grammar, item_sets);
}
}
arc
}
fn advance_node_to_shift(
arc: &mut ConflictArc,
node: ConflictNodeId,
grammar: &Grammar,
item_sets: &ItemSets,
) {
let mut todo: VecDeque<ConflictLane> = VecDeque::new();
todo.extend(arc[node].lanes.drain(..));
let mut new_lanes = Vec::new();
while let Some(current_lane) = todo.pop_front() {
let id = current_lane.last();
debug!("advancing lane {:?} ({:?})", current_lane, id);
let mut any_shifts = false;
let mut visited_reductions = HashSet::new();
for &(symbol, action) in item_sets[id].actions() {
match (symbol, action) {
(Symbol::Terminal(_), Action::Shift(_)) => any_shifts = true,
(Symbol::Terminal(_), Action::Reduce(rule_id)) => {
if !visited_reductions.insert(rule_id) {
continue;
}
let nt = grammar[rule_id].name();
let len = grammar[rule_id].symbols().len();
trace!(
" - reduction r{} covers {} symbols",
rule_id.as_usize(),
len
);
let mut lanes = Vec::new();
backtrack_lane(¤t_lane, len, rule_id, &arc, item_sets, &mut lanes);
for mut lane in lanes {
lane.ancestors = current_lane.ancestors.clone();
lane.states = current_lane.states.clone();
let id = lane.last();
trace!(" - lookup goto {} at {:?}", nt.pretty(grammar), id);
let mut visited_targets = HashSet::new();
for &(symbol, action) in item_sets[id].actions() {
if symbol == Symbol::Nonterminal(nt) {
match action {
Action::Shift(target) => {
if !visited_targets.insert(target) {
continue;
}
trace!(" - goto {:?}", target);
let mut new_lane = lane.clone();
new_lane.seq.push(target);
new_lane.states.push(target);
todo.push_back(new_lane);
}
_ => unreachable!(),
}
}
}
}
}
_ => (),
}
}
if any_shifts {
trace!(" - has shifts");
new_lanes.push(current_lane);
}
}
arc[node].lanes = new_lanes;
}
fn backtrack_lane(
lane: &ConflictLane,
len: usize,
rule_id: RuleId,
arc: &ConflictArc,
item_sets: &ItemSets,
into: &mut Vec<ConflictLane>,
) {
trace!(" - backtrack {} symbols in lane {:?}", len, lane);
if len < lane.seq.len() {
into.push(ConflictLane {
parents: lane.parents.clone(),
ancestors: vec![],
seq: lane.seq[0..lane.seq.len() - len].into(),
states: vec![],
});
} else {
if lane.parents.is_empty() {
let mut origins = IndexSet::new();
origins.insert(lane.last());
trace!(" - extrapolating from {:?}", origins);
for marker in (0..len).rev() {
origins = item_sets
.all()
.iter()
.filter_map(|is| {
if is.items()
.iter()
.filter(|item| item.rule() == rule_id && item.marker() == marker)
.any(|item| match item.action() {
Some((_, Action::Shift(target))) => origins.contains(&target),
_ => false,
}) {
Some(is.id)
} else {
None
}
})
.collect();
trace!(" - extrapolated to {:?} (marker = {})", origins, marker);
}
into.extend(origins.into_iter().map(|id| ConflictLane {
parents: vec![],
ancestors: vec![],
seq: vec![id],
states: vec![],
}));
} else {
for &(node_id, lane_id) in lane.parents.iter() {
backtrack_lane(
&arc[node_id][lane_id],
len - lane.seq.len(),
rule_id,
arc,
item_sets,
into,
);
}
}
}
}
fn spawn_next_rank(arc: &mut ConflictArc, grammar: &Grammar, item_sets: &ItemSets) -> bool {
let new_rank = arc.ranks.len();
let mut new_rank_nodes = vec![];
for node_id in arc.ranks.last().unwrap().clone() {
debug!("expanding {}", node_id);
let mut shifts: IndexMap<TerminalId, IndexSet<ItemSetId>> = IndexMap::new();
let mut parents: IndexMap<ItemSetId, IndexSet<ConflictLaneId>> = IndexMap::new();
for (lane_id, item_set) in arc[node_id]
.lanes
.iter()
.enumerate()
.map(|(i, l)| (ConflictLaneId(i), l.last()))
{
trace!(" - tracing {} from {:?}", node_id, item_set);
for &(symbol, action) in item_sets[item_set].actions() {
if let (Symbol::Terminal(t), Action::Shift(target)) = (symbol, action) {
trace!(" - {} -> i{}", t.pretty(grammar), target);
shifts.entry(t).or_insert_with(IndexSet::new).insert(target);
parents
.entry(target)
.or_insert_with(IndexSet::new)
.insert(lane_id);
}
}
}
if shifts.is_empty() {
panic!("arrived in deadlock with lanes {:#?}", arc[node_id].lanes);
}
trace!(" - shifts = {:?}", shifts);
trace!(" - parents = {:?}", parents);
let mut all_resolved = true;
for (terminal, mut item_sets) in shifts {
let reconverged = item_sets.iter().any(|is| parents[is].len() > 1);
if item_sets.len() > 1 || reconverged {
trace!(
" - shift {} has item sets {:?}",
terminal.pretty(grammar),
item_sets
);
let new_lanes = item_sets
.into_iter()
.map(|item_set| {
let new_parents: Vec<_> = parents[&item_set]
.iter()
.map(|&lane_id| (node_id, lane_id))
.collect();
ConflictLane {
parents: new_parents.clone(),
ancestors: new_parents,
seq: vec![item_set],
states: vec![item_set],
}
})
.collect();
let new_node_id = ConflictNodeId(arc.nodes.len());
let new_node = ConflictNode {
rank: new_rank,
lanes: new_lanes,
shifts: vec![],
resolved: false,
reconverged: reconverged,
};
arc.nodes.push(new_node);
new_rank_nodes.push(new_node_id);
arc[node_id]
.shifts
.push((terminal, ConflictEdge::Ambiguous(new_node_id)));
all_resolved = false;
} else {
trace!(" - shift {} resolves ambiguity", terminal.pretty(grammar));
arc[node_id]
.shifts
.push((terminal, ConflictEdge::Resolved(item_sets.pop().unwrap())));
}
}
arc[node_id].resolved = all_resolved;
}
if new_rank_nodes.is_empty() {
false
} else {
arc.ranks.push(new_rank_nodes);
true
}
}
pub fn find_reconvergences(arc: &ConflictArc) -> Vec<Reconv> {
let mut v = vec![];
for (node_id, node) in arc.nodes.iter().enumerate() {
let node_id = ConflictNodeId(node_id);
for (lane_id, lane) in node.lanes.iter().enumerate() {
let lane_id = ConflictLaneId(lane_id);
if lane.ancestors.len() > 1 {
v.push(Reconv {
at: (node_id, lane_id),
from: lane.ancestors.clone(),
});
}
}
}
v
}
pub fn find_local_ambiguity(reconv: &Reconv, arc: &ConflictArc) -> LocalAmbiguity {
debug!("find ambiguity at {:?}", reconv);
let traces: Vec<_> = reconv
.from
.iter()
.flat_map(|&(node_id, lane_id)| {
let states = arc[reconv.at.0][reconv.at.1].states.clone();
trace_states_backwards(node_id, lane_id, arc, states).into_iter()
})
.collect();
trace!(" - traced {:?}", traces);
let shortest = traces.iter().map(Vec::len).min().unwrap();
let mut prefix_len = 0;
for i in 0..shortest {
let state = traces[0][i];
if traces.iter().any(|trace| trace[i] != state) {
break;
}
prefix_len = i + 1;
}
assert!(prefix_len > 0, "prefix length should be at least one state, since the traces contain the initial common state");
let mut suffix_len = 0;
for i in 0..shortest {
let state = traces[0][traces[0].len() - i - 1];
if traces
.iter()
.any(|trace| trace[trace.len() - i - 1] != state)
{
break;
}
suffix_len = i + 1;
}
assert!(suffix_len > 0, "suffix length should be at least one state, since the traces contain the final common state");
trace!(" - common prefix = {}, suffix = {}", prefix_len, suffix_len);
LocalAmbiguity {
first: traces[0][prefix_len - 1],
last: traces[0][traces[0].len() - suffix_len],
seqs: traces
.into_iter()
.map(|trace| trace[prefix_len..trace.len() - suffix_len].into())
.collect(),
}
}
fn trace_states_backwards(
node: ConflictNodeId,
lane: ConflictLaneId,
arc: &ConflictArc,
tail: Vec<ItemSetId>,
) -> Vec<Vec<ItemSetId>> {
let lane = &arc[node][lane];
let mut states = lane.states.clone();
states.extend(tail.iter());
if lane.ancestors.is_empty() {
vec![states]
} else {
lane.ancestors
.iter()
.flat_map(|&(node_id, lane_id)| {
trace_states_backwards(node_id, lane_id, arc, states.clone()).into_iter()
})
.collect()
}
}
pub fn resolve_local_ambiguity(
ambig: &LocalAmbiguity,
grammar: &Grammar,
item_sets: &ItemSets,
) -> Vec<RuleSlice> {
debug!("resolving {:?}", ambig);
let nonterms = {
let mut sets = ambig.seqs.iter().map(|seq| {
item_sets[*seq.last().unwrap()]
.items()
.iter()
.map(|item| grammar[item.rule].name())
.collect::<HashSet<_>>()
});
let mut intersected = sets.next().unwrap();
for set in sets {
intersected.retain(|nt| set.contains(nt));
}
intersected
};
trace!(" - nonterminals {:?}", nonterms);
let unify_rules: IndexSet<_> = ambig
.seqs
.iter()
.flat_map(|seq| item_sets[*seq.last().unwrap()].items().iter())
.filter_map(|item| {
let id = item.rule();
if nonterms.contains(&grammar[id].name()) {
Some(id)
} else {
None
}
})
.collect();
for &id in &unify_rules {
trace!(" - unify {:?} {}", id, grammar[id].pretty(grammar));
}
let shortest = unify_rules
.iter()
.map(|&id| grammar[id].symbols().len())
.min()
.unwrap();
let prefix_len = (0..shortest)
.take_while(|&i| {
let mut iter = unify_rules.iter().cloned();
let symbol = grammar[iter.next().unwrap()].symbols()[i];
iter.all(|id| grammar[id].symbols()[i] == symbol)
})
.last()
.map(|i| i + 1)
.unwrap_or(0);
let suffix_len = (0..shortest)
.take_while(|&i| {
let mut iter = unify_rules.iter().cloned();
let symbols = grammar[iter.next().unwrap()].symbols();
let symbol = symbols[symbols.len() - i - 1];
iter.all(|id| grammar[id].symbols()[grammar[id].symbols().len() - i - 1] == symbol)
})
.last()
.map(|i| i + 1)
.unwrap_or(0);
trace!(" - common prefix = {}, suffix = {}", prefix_len, suffix_len);
unify_rules
.iter()
.map(|&id| RuleSlice {
rule: id,
from: prefix_len,
to: grammar[id].symbols().len() - suffix_len,
})
.collect()
}
#[derive(Debug)]
pub struct ConflictArc {
origin: ItemSetId,
nodes: Vec<ConflictNode>,
ranks: Vec<Vec<ConflictNodeId>>,
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ConflictNodeId(usize);
impl fmt::Display for ConflictNodeId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "n{}", self.0)
}
}
impl fmt::Debug for ConflictNodeId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self)
}
}
impl Index<ConflictNodeId> for ConflictArc {
type Output = ConflictNode;
fn index(&self, index: ConflictNodeId) -> &ConflictNode {
&self.nodes[index.0]
}
}
impl IndexMut<ConflictNodeId> for ConflictArc {
fn index_mut(&mut self, index: ConflictNodeId) -> &mut ConflictNode {
&mut self.nodes[index.0]
}
}
#[derive(Debug)]
pub struct ConflictNode {
rank: usize,
lanes: Vec<ConflictLane>,
shifts: Vec<(TerminalId, ConflictEdge)>,
resolved: bool,
reconverged: bool,
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ConflictLaneId(usize);
impl fmt::Display for ConflictLaneId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "nl{}", self.0)
}
}
impl fmt::Debug for ConflictLaneId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self)
}
}
impl Index<ConflictLaneId> for ConflictNode {
type Output = ConflictLane;
fn index(&self, index: ConflictLaneId) -> &ConflictLane {
&self.lanes[index.0]
}
}
impl IndexMut<ConflictLaneId> for ConflictNode {
fn index_mut(&mut self, index: ConflictLaneId) -> &mut ConflictLane {
&mut self.lanes[index.0]
}
}
#[allow(missing_docs)]
#[derive(Debug)]
pub enum ConflictEdge {
Resolved(ItemSetId),
Ambiguous(ConflictNodeId),
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ConflictLane {
parents: Vec<(ConflictNodeId, ConflictLaneId)>,
ancestors: Vec<(ConflictNodeId, ConflictLaneId)>,
seq: Vec<ItemSetId>,
states: Vec<ItemSetId>,
}
impl ConflictLane {
pub fn last(&self) -> ItemSetId {
self.seq
.last()
.expect("lane has no items, which is forbidden")
.clone()
}
}
impl fmt::Debug for ConflictLane {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{:?}{:?}->{:?}{:?}",
self.ancestors, self.parents, self.states, self.seq
)
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Reconv {
at: (ConflictNodeId, ConflictLaneId),
from: Vec<(ConflictNodeId, ConflictLaneId)>,
}
impl fmt::Debug for Reconv {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}->{:?}", self.from, self.at)
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct LocalAmbiguity {
pub first: ItemSetId,
pub seqs: Vec<Vec<ItemSetId>>,
pub last: ItemSetId,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RuleSlice {
pub rule: RuleId,
pub from: usize,
pub to: usize,
}
impl RuleSlice {
pub fn pretty<'a>(&'a self, grammar: &'a Grammar) -> Pretty<&'a Grammar, &'a Self> {
Pretty::new(grammar, self)
}
}
impl<'a> fmt::Display for Pretty<&'a Grammar, &'a RuleSlice> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use std::iter::{once, repeat};
let symbols = &self.ctx[self.item.rule].symbols()[self.item.from..self.item.to];
for (sep, sym) in once("").chain(repeat(" ")).zip(symbols.iter()) {
write!(f, "{}{}", sep, sym.pretty(self.ctx))?;
}
Ok(())
}
}