treebender/
earley.rs

1use std::fmt;
2use std::rc::Rc;
3
4use crate::rules::{Grammar, Production, Rule};
5
6#[derive(Debug, Clone, PartialEq)]
7pub struct LR0 {
8  pub rule: Rc<Rule>,
9  pub pos: usize,
10}
11
12impl LR0 {
13  pub fn new(rule: &Rc<Rule>) -> Self {
14    Self {
15      rule: rule.clone(),
16      pos: 0,
17    }
18  }
19
20  pub fn is_active(&self) -> bool {
21    self.pos < self.rule.len()
22  }
23
24  pub fn advance(&self) -> Self {
25    assert!(self.is_active());
26    Self {
27      rule: self.rule.clone(),
28      pos: self.pos + 1,
29    }
30  }
31
32  pub fn next_production(&self) -> Option<&Production> {
33    self.rule.productions.get(self.pos)
34  }
35}
36
37impl fmt::Display for LR0 {
38  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39    write!(f, "{} →", self.rule.symbol)?;
40    for idx in 0..self.rule.len() {
41      if idx == self.pos {
42        write!(f, " ・")?;
43      }
44      write!(f, " {}", self.rule.productions[idx])?;
45    }
46    if !self.is_active() {
47      write!(f, " ・")?;
48    }
49    Ok(())
50  }
51}
52
53#[derive(Debug, Clone, PartialEq)]
54pub struct State {
55  pub lr0: LR0,
56  pub origin: usize,
57}
58
59impl State {
60  pub fn new(lr0: LR0, origin: usize) -> Self {
61    Self { lr0, origin }
62  }
63
64  pub fn advance(&self) -> Self {
65    Self::new(self.lr0.advance(), self.origin)
66  }
67}
68
69#[derive(Debug)]
70pub struct Chart(Vec<Vec<State>>);
71
72impl Chart {
73  pub fn new(length: usize) -> Self {
74    Self(vec![Vec::new(); length])
75  }
76
77  pub fn len(&self) -> usize {
78    self.0.len()
79  }
80
81  pub fn is_empty(&self) -> bool {
82    self.len() == 0
83  }
84
85  pub fn len_at(&self, k: usize) -> usize {
86    self.0[k].len()
87  }
88
89  pub fn has(&self, k: usize, state: &State) -> bool {
90    self.0[k].contains(state)
91  }
92
93  pub fn add(&mut self, k: usize, state: State) {
94    if !self.has(k, &state) {
95      self.0[k].push(state);
96    }
97  }
98
99  /// Get an owned state so that passing around &mut chart is more ergonomic
100  /// The clone is fairly cheap, only an rc + 2 usize, State would be copy if not
101  /// for the Rc<Rule>
102  fn get_state(&self, k: usize, idx: usize) -> State {
103    self.0[k][idx].clone()
104  }
105}
106
107impl IntoIterator for Chart {
108  type Item = (usize, Vec<State>);
109  type IntoIter = std::iter::Enumerate<std::vec::IntoIter<Vec<State>>>;
110
111  fn into_iter(self) -> Self::IntoIter {
112    self.0.into_iter().enumerate()
113  }
114}
115
116impl fmt::Display for Chart {
117  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118    for k in 0..self.len() {
119      writeln!(f, "State {}:", k)?;
120      for state in self.0[k].iter() {
121        writeln!(f, "  {}..{}: {}", state.origin, k, state.lr0)?;
122      }
123    }
124    Ok(())
125  }
126}
127
128pub fn parse_chart(g: &Grammar, input: &[&str]) -> Chart {
129  let mut chart = Chart::new(input.len() + 1);
130
131  for rule in g.rules.get(&g.start).expect("grammar missing start rules") {
132    chart.add(0, State::new(LR0::new(&rule), 0));
133  }
134
135  for k in 0..chart.len() {
136    // need to use while loop because the number of states at k can expand during the loop
137    let mut idx = 0;
138    while idx < chart.len_at(k) {
139      let state = chart.get_state(k, idx);
140      idx += 1;
141
142      if let Some(production) = state.lr0.next_production() {
143        if production.is_nonterminal() {
144          predictor(g, &mut chart, k, &state);
145        } else {
146          scanner(&mut chart, k, &state, input);
147        }
148      } else {
149        completer(&mut chart, k, &state);
150      }
151    }
152  }
153
154  chart
155}
156
157fn completer(chart: &mut Chart, k: usize, state: &State) {
158  assert!(!state.lr0.is_active(), "tried to complete active state");
159
160  // lr0 has been completed, now look for states in the chart that are waiting for its symbol
161  for idx in 0..chart.len_at(state.origin) {
162    let other = chart.get_state(state.origin, idx);
163
164    if let Some(np) = other.lr0.next_production() {
165      if np.symbol == state.lr0.rule.symbol {
166        // found one, advance its dot and add the new state to the chart *at k*,
167        // because it's now waiting on a token there
168        chart.add(k, other.advance())
169      }
170    }
171  }
172}
173
174fn predictor(g: &Grammar, chart: &mut Chart, k: usize, state: &State) {
175  assert!(state.lr0.is_active(), "tried to predict non-active state");
176  assert!(
177    state.lr0.next_production().unwrap().is_nonterminal(),
178    "tried to predict a terminal"
179  );
180
181  // this lr0 is waiting for the next production
182  // let's hypothesize that one of the rules that can build this production will
183  // succeed at its current position
184  let needed_symbol = &state.lr0.next_production().unwrap().symbol;
185  for wanted_rule in g
186    .rules
187    .get(needed_symbol)
188    .unwrap_or_else(|| panic!("missing rules for production {}", needed_symbol))
189  {
190    chart.add(k, State::new(LR0::new(wanted_rule), k));
191
192    if g.is_nullable(&needed_symbol) {
193      // automatically complete `state` early, because we know
194      // it will be completable anyways, because its next_production may be produced
195      // by empty input. If we don't do this, nullable rules won't be completed
196      // correctly, because complete() won't run after predict() without a new symbol.
197      chart.add(k, state.advance());
198    }
199  }
200}
201
202fn scanner(chart: &mut Chart, k: usize, state: &State, input: &[&str]) {
203  assert!(state.lr0.is_active(), "tried to scan non-active state");
204  assert!(
205    state.lr0.next_production().unwrap().is_terminal(),
206    "tried to scan a nonterminal"
207  );
208
209  let needed_symbol = &state.lr0.next_production().unwrap().symbol;
210  if k < input.len() && input[k] == needed_symbol {
211    // advance the state to consume this token, and add to state k + 1, where
212    // it will look for the next token
213    chart.add(k + 1, state.advance());
214  }
215}