perplex/
item_set.rs

1// Copyright (c) 2018 Fabian Schuiki
2
3//! Item sets derived from a grammar.
4
5use std;
6use std::fmt;
7use std::ops::{Index, IndexMut};
8use std::collections::{BTreeSet, HashSet};
9use std::iter::{once, repeat};
10use std::mem::replace;
11
12use bit_set::BitSet;
13
14use Pretty;
15use grammar::{self, Grammar, NonterminalId, RuleId, Symbol, TerminalId};
16use first::FirstSets;
17use honalee;
18
19/// All item sets of a grammar.
20#[derive(Debug, Clone, PartialEq, Eq, Hash)]
21pub struct ItemSets(Vec<ItemSet>);
22
23/// An item set.
24#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub struct ItemSet {
26    /// The id of this item set.
27    pub(crate) id: ItemSetId,
28    /// The items in the set.
29    pub(crate) items: Vec<Item>,
30    /// The number of kernel items.
31    pub(crate) kernel: usize,
32}
33
34/// A single item.
35#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
36pub struct Item {
37    /// The rule of the item.
38    pub(crate) rule: RuleId,
39    /// The lookahead terminal.
40    pub(crate) lookahead: TerminalId,
41    /// The position of the marker within the rule.
42    pub(crate) marker: usize,
43    /// The action for this item.
44    pub(crate) action: Option<(Symbol, Action)>,
45}
46
47/// An action to be taken upon encountering a symbol.
48#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
49pub enum Action {
50    /// Shift the symbol and go to the given item set.
51    Shift(ItemSetId),
52    /// Reduce with the given rule.
53    Reduce(RuleId),
54}
55
56/// A unique item set identifier.
57#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
58pub struct ItemSetId(usize);
59
60impl ItemSets {
61    /// Create a new list of item sets.
62    pub fn new(sets: Vec<ItemSet>) -> ItemSets {
63        ItemSets(sets)
64    }
65
66    /// Compute the item sets for a grammar.
67    pub fn compute(grammar: &Grammar) -> ItemSets {
68        honalee::construct_item_sets(grammar)
69    }
70
71    /// Get the item sets in the grammar.
72    pub fn all(&self) -> &[ItemSet] {
73        &self.0
74    }
75
76    /// Get a pretty printer for this item set.
77    pub fn pretty<'a>(&'a self, grammar: &'a Grammar) -> Pretty<&'a Grammar, &'a Self> {
78        Pretty::new(grammar, self)
79    }
80
81    /// Compress all item sets.
82    pub fn compress(&mut self) {
83        for is in &mut self.0 {
84            is.compress();
85        }
86    }
87}
88
89impl fmt::Debug for ItemSetId {
90    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
91        write!(f, "i{}", self.0)
92    }
93}
94
95impl Index<ItemSetId> for ItemSets {
96    type Output = ItemSet;
97
98    fn index(&self, index: ItemSetId) -> &ItemSet {
99        &self.0[index.as_usize()]
100    }
101}
102
103impl IndexMut<ItemSetId> for ItemSets {
104    fn index_mut(&mut self, index: ItemSetId) -> &mut ItemSet {
105        &mut self.0[index.as_usize()]
106    }
107}
108
109impl<'a> fmt::Display for Pretty<&'a Grammar, &'a ItemSets> {
110    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
111        for (is, sep) in self.item.0.iter().zip(once("").chain(repeat("\n"))) {
112            write!(f, "{}{}", sep, is.pretty(self.ctx))?;
113        }
114        Ok(())
115    }
116}
117
118impl ItemSet {
119    /// Create a new item set.
120    pub fn new(id: ItemSetId) -> ItemSet {
121        ItemSet {
122            id: id,
123            items: Vec::new(),
124            kernel: 0,
125        }
126    }
127
128    /// Create a new item set with certain items.
129    pub fn with_items(id: ItemSetId, items: Vec<Item>) -> ItemSet {
130        let mut set = ItemSet::new(id);
131        set.kernel = items.len();
132        set.items = items;
133        set
134    }
135
136    /// Get the id of the item set.
137    pub fn id(&self) -> ItemSetId {
138        self.id
139    }
140
141    /// Get the items in the set.
142    pub fn items(&self) -> &[Item] {
143        &self.items
144    }
145
146    /// Get the kernel items in the set.
147    pub fn kernel_items(&self) -> &[Item] {
148        &self.items[0..self.kernel]
149    }
150
151    /// Get a pretty printer for this item set.
152    pub fn pretty<'a>(&'a self, grammar: &'a Grammar) -> Pretty<&'a Grammar, &'a Self> {
153        Pretty::new(grammar, self)
154    }
155
156    /// Compute the closure of the item set.
157    pub fn closure(&mut self, grammar: &Grammar, first_sets: &FirstSets) {
158        compute_closure(self, grammar, first_sets)
159    }
160
161    /// Compute the kernel item cores.
162    ///
163    /// The returned struct can be used to compare two item sets for equality in
164    /// their kernel item cores.
165    pub fn kernel_item_cores(&self) -> KernelCores {
166        let set: BTreeSet<_> = self.items[0..self.kernel]
167            .iter()
168            .map(|item| (item.rule, item.marker))
169            .collect();
170        KernelCores(set.into_iter().collect())
171    }
172
173    /// Replace all occurrences of one action with another.
174    pub fn replace_actions(&mut self, from: Action, to: Action) {
175        for &mut (_, ref mut action) in self.actions_mut() {
176            if *action == from {
177                *action = to;
178            }
179        }
180    }
181
182    /// Get an iterator over the actions in the set.
183    pub fn actions(&self) -> Actions {
184        Actions(self.items.iter())
185    }
186
187    /// Get a mutable iterator over the actions in the set.
188    pub fn actions_mut(&mut self) -> ActionsMut {
189        ActionsMut(self.items.iter_mut())
190    }
191
192    /// Merge another item set into this.
193    pub fn merge(&mut self, other: ItemSet) {
194        let mut present: HashSet<Item> = self.items
195            .iter()
196            .cloned()
197            .map(|mut item| {
198                item.action = None;
199                item
200            })
201            .collect();
202        for (index, item) in other.items.into_iter().enumerate() {
203            let mut item_na = item;
204            item_na.action = None;
205            if present.insert(item_na) {
206                if index < other.kernel {
207                    self.items.insert(self.kernel, item);
208                    self.kernel += 1;
209                } else {
210                    self.items.push(item);
211                }
212            }
213        }
214    }
215
216    /// Compress the item set.
217    ///
218    /// This will remove redundant items and replace their lookahead token with
219    /// a `#`.
220    pub fn compress(&mut self) {
221        let mut present = HashSet::<Item>::new();
222        let items = replace(&mut self.items, Vec::new());
223        for (index, mut item) in items.into_iter().enumerate() {
224            if index < self.kernel {
225                self.items.push(item);
226            } else {
227                if item.is_shift() {
228                    item.lookahead = grammar::NIL;
229                }
230                if present.insert(item) {
231                    self.items.push(item);
232                }
233            }
234        }
235    }
236}
237
238impl<'a> fmt::Display for Pretty<&'a Grammar, &'a ItemSet> {
239    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
240        writeln!(f, "i{}:", self.item.id)?;
241        for (index, item) in self.item.items.iter().enumerate() {
242            if index > 0 {
243                write!(f, "\n")?;
244            }
245            write!(f, "    {} {}", index, item.pretty(self.ctx))?;
246            if index < self.item.kernel {
247                write!(f, "*")?;
248            }
249            if let Some((symbol, action)) = item.action {
250                write!(f, " ({}, {})", symbol.pretty(self.ctx), action)?;
251            }
252        }
253        if self.item.items.is_empty() {
254            write!(f, "    <empty>")?;
255        }
256        Ok(())
257    }
258}
259
260impl Item {
261    /// Get the rule this item represents.
262    pub fn rule(&self) -> RuleId {
263        self.rule
264    }
265
266    /// Get the lookahead terminal of this item.
267    pub fn lookahead(&self) -> TerminalId {
268        self.lookahead
269    }
270
271    /// Get the position of the marker within the rule.
272    pub fn marker(&self) -> usize {
273        self.marker
274    }
275
276    /// Get the action associated with this item.
277    pub fn action(&self) -> Option<(Symbol, Action)> {
278        self.action
279    }
280
281    /// Change the action of this item.
282    pub fn set_action(&mut self, action: Option<(Symbol, Action)>) {
283        self.action = action
284    }
285
286    /// Get a pretty printer for this item.
287    pub fn pretty<'a>(&'a self, grammar: &'a Grammar) -> Pretty<&'a Grammar, &'a Self> {
288        Pretty::new(grammar, self)
289    }
290
291    /// Whether this item has a shift action.
292    pub fn is_shift(&self) -> bool {
293        self.action
294            .map(|(_, action)| action.is_shift())
295            .unwrap_or(false)
296    }
297
298    /// Whether this item has a reduce action.
299    pub fn is_reduce(&self) -> bool {
300        self.action
301            .map(|(_, action)| action.is_reduce())
302            .unwrap_or(false)
303    }
304
305    /// Whether this item has an action.
306    pub fn has_action(&self) -> bool {
307        self.action.is_some()
308    }
309}
310
311impl<'a> fmt::Display for Pretty<&'a Grammar, &'a Item> {
312    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
313        if self.item.rule == grammar::ACCEPT {
314            write!(f, "[$accept ->")?;
315            if self.item.marker == 0 {
316                write!(f, " .")?;
317            }
318            write!(f, " {}", NonterminalId::from_usize(0).pretty(self.ctx))?;
319            if self.item.marker == 1 {
320                write!(f, " .")?;
321            }
322        } else {
323            let rule = self.ctx.rule(self.item.rule);
324            write!(f, "[{} ->", rule.name().pretty(self.ctx))?;
325            let symbols = rule.symbols();
326            for symbol in &symbols[0..self.item.marker] {
327                write!(f, " {}", symbol.pretty(self.ctx))?;
328            }
329            write!(f, " .")?;
330            for symbol in &symbols[self.item.marker..] {
331                write!(f, " {}", symbol.pretty(self.ctx))?;
332            }
333        }
334        write!(f, ", {}]", self.item.lookahead.pretty(self.ctx))?;
335        Ok(())
336    }
337}
338
339impl Action {
340    /// Whether this is a shift action.
341    pub fn is_shift(&self) -> bool {
342        match *self {
343            Action::Shift(_) => true,
344            _ => false,
345        }
346    }
347
348    /// Whether this is a reduce action.
349    pub fn is_reduce(&self) -> bool {
350        match *self {
351            Action::Reduce(_) => true,
352            _ => false,
353        }
354    }
355}
356
357impl fmt::Display for Action {
358    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
359        match *self {
360            Action::Shift(id) => write!(f, "i{}", id),
361            Action::Reduce(grammar::ACCEPT) => write!(f, "$accept"),
362            Action::Reduce(id) => write!(f, "r{}", id.as_usize()),
363        }
364    }
365}
366
367impl ItemSetId {
368    /// Create an item set id from a usize.
369    pub fn from_usize(id: usize) -> ItemSetId {
370        ItemSetId(id)
371    }
372
373    /// Obtain the id as a usize.
374    pub fn as_usize(self) -> usize {
375        self.0
376    }
377}
378
379impl fmt::Display for ItemSetId {
380    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
381        write!(f, "{}", self.0)
382    }
383}
384
385/// Compute the closure of an item set.
386fn compute_closure(item_set: &mut ItemSet, grammar: &Grammar, first_sets: &FirstSets) {
387    let mut done: HashSet<Item> = item_set.items.iter().cloned().collect();
388    let mut tail = 0;
389    while item_set.items.len() > tail {
390        let item = item_set.items[tail];
391        tail += 1;
392        if item.rule == grammar::ACCEPT {
393            if item.marker == 0 {
394                for &rule_id in grammar.rules_for_nonterminal(NonterminalId::from_usize(0)) {
395                    let new_item = Item {
396                        rule: rule_id,
397                        lookahead: item.lookahead,
398                        marker: 0,
399                        action: None,
400                    };
401                    if !done.contains(&new_item) {
402                        done.insert(new_item);
403                        item_set.items.push(new_item);
404                    }
405                }
406            }
407        } else {
408            let symbols = grammar.rule(item.rule).symbols();
409            if item.marker == symbols.len() {
410                continue;
411            }
412            match symbols[item.marker] {
413                Symbol::Terminal(_) => (),
414                Symbol::Nonterminal(id) => {
415                    // Compute the follow set for the nonterminal.
416                    let (mut follow_set, epsilon) =
417                        compute_follow_set(&symbols[item.marker + 1..], grammar, first_sets);
418                    if epsilon {
419                        follow_set.insert(item.lookahead.as_usize());
420                    }
421
422                    // Generate the new items.
423                    for &rule_id in grammar.rules_for_nonterminal(id) {
424                        for fs in &follow_set {
425                            let new_item = Item {
426                                rule: rule_id,
427                                lookahead: TerminalId::from_usize(fs),
428                                marker: 0,
429                                action: None,
430                            };
431                            if !done.contains(&new_item) {
432                                done.insert(new_item);
433                                item_set.items.push(new_item);
434                            }
435                        }
436                    }
437                }
438            }
439        }
440    }
441}
442
443/// Compute the follow set of a sequence of symbols.
444fn compute_follow_set<'a, I>(
445    symbols: I,
446    grammar: &Grammar,
447    first_sets: &FirstSets,
448) -> (BitSet, bool)
449where
450    I: IntoIterator<Item = &'a Symbol>,
451{
452    let mut set = BitSet::with_capacity(grammar.terminal_id_bound());
453    for symbol in symbols.into_iter() {
454        match *symbol {
455            Symbol::Terminal(id) => {
456                set.insert(id.as_usize());
457                return (set, false);
458            }
459            Symbol::Nonterminal(id) => {
460                let fs = first_sets.get(id);
461                set.union_with(&fs.symbols);
462                if !fs.has_epsilon {
463                    return (set, false);
464                }
465            }
466        }
467    }
468    (set, true)
469}
470
471/// A list of kernel item cores.
472///
473/// The entries are sorted such that two item sets with the same kernel item
474/// cores but different order will produce the same KernelCores struct.
475#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
476pub struct KernelCores(Vec<(RuleId, usize)>);
477
478/// An iterator over the actions of an item set.
479pub struct Actions<'a>(std::slice::Iter<'a, Item>);
480
481/// A mutable iterator over the actions of an item set.
482pub struct ActionsMut<'a>(std::slice::IterMut<'a, Item>);
483
484impl<'a> Iterator for Actions<'a> {
485    type Item = &'a (Symbol, Action);
486
487    fn next(&mut self) -> Option<Self::Item> {
488        while let Some(item) = self.0.next() {
489            if let Some(ref action) = item.action {
490                return Some(action);
491            }
492        }
493        None
494    }
495}
496
497impl<'a> Iterator for ActionsMut<'a> {
498    type Item = &'a mut (Symbol, Action);
499
500    fn next(&mut self) -> Option<Self::Item> {
501        while let Some(item) = self.0.next() {
502            if let Some(ref mut action) = item.action {
503                return Some(action);
504            }
505        }
506        None
507    }
508}
509
510#[cfg(test)]
511mod tests {
512    use grammar::{Grammar, Rule, END};
513    use item_set::ItemSets;
514
515    /// Ensure that left recursive rules that have a nonterminal after the
516    /// recursion produce all the required lookaheads. This is a regression test
517    /// to catch the following:
518    ///
519    ///     A : A B | B ;
520    ///     B : c ;
521    ///
522    /// Would generated the following items in the first set:
523    ///
524    ///     [A -> . A B, $end]
525    ///     [A -> . B, $end]
526    ///     [B -> . c, $end]
527    ///
528    /// But would be missing the following:
529    ///
530    ///     [A -> . A B, c]
531    ///     [A -> . B, c]
532    ///     [B -> . c, c]
533    #[test]
534    fn left_recursion() {
535        let mut g = Grammar::new();
536        let a = g.add_nonterminal("A");
537        let b = g.add_nonterminal("B");
538        let c = g.add_terminal("c");
539        g.add_rule(Rule::new(a, vec![a.into(), b.into()]));
540        g.add_rule(Rule::new(a, vec![b.into()]));
541        g.add_rule(Rule::new(b, vec![c.into()]));
542        let is = ItemSets::compute(&g);
543        println!("{}", is.pretty(&g));
544        let la_exp = vec![END, c];
545        for is in is.all()[2..].iter() {
546            let la_act: Vec<_> = is.items().iter().map(|i| i.lookahead).collect();
547            assert_eq!(la_act, la_exp);
548        }
549    }
550}