cfg_classify/
lr.rs

1//! The Lr grammar class.
2
3#![allow(missing_docs)]
4
5use std::collections::{BTreeMap, VecDeque};
6use std::rc::Rc;
7
8use cfg_grammar::history::node::RootHistoryNode;
9use cfg_grammar::symbol::set::SymbolBitSet;
10use cfg_grammar::RuleContainer;
11use cfg_symbol::Symbol;
12
13type Dot = u32;
14type RuleId = u32;
15type SetId = u32;
16
17/// A container of LR(0) items.
18#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
19pub struct Lr0Items {
20    pub map: BTreeMap<RuleId, Lr0Item>,
21}
22
23#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
24pub struct Lr0Item {
25    pub rhs: Vec<Symbol>,
26    pub dot: Dot,
27}
28
29/// A builder for an LR(0) item closure.
30pub struct Lr0ClosureBuilder<'a, G> {
31    grammar: &'a mut G,
32    queue: VecDeque<Lr0Item>,
33    terminal_set: SymbolBitSet,
34}
35
36/// Builder of LR(0) Finite State Machine.
37pub struct Lr0FsmBuilder<'a, G> {
38    closure: Lr0ClosureBuilder<'a, G>,
39    sets_queue: VecDeque<Rc<Lr0Items>>,
40    cached_sets: BTreeMap<Rc<Lr0Items>, u32>,
41}
42
43/// An LR(0) node.
44#[derive(Debug, Eq, PartialEq)]
45pub struct Lr0Node {
46    /// List of LR(0) items.
47    pub items: Rc<Lr0Items>,
48    /// List of transitions through terminals.
49    pub link: BTreeMap<Symbol, SetId>,
50}
51
52impl Lr0Items {
53    fn new() -> Self {
54        Lr0Items {
55            map: BTreeMap::new(),
56        }
57    }
58}
59
60impl<'a, G> Lr0ClosureBuilder<'a, G>
61where
62    G: RuleContainer,
63{
64    /// Creates a builder for an LR(0) item closure.
65    pub fn new(grammar: &'a mut G) -> Self {
66        Lr0ClosureBuilder {
67            queue: VecDeque::new(),
68            terminal_set: SymbolBitSet::terminal_set(&grammar),
69            grammar,
70        }
71    }
72
73    /// We compute the closure for LR(0) items.
74    pub fn closure(&mut self, items: &mut Lr0Items) {
75        self.queue.clear();
76        self.queue.extend(items.map.values().cloned());
77
78        while let Some(item) = self.queue.pop_front() {
79            if let Some(nonterminal_postdot) = self.nonterminal_postdot(&item) {
80                for (rule_idx, rule) in self.grammar.rules().enumerate() {
81                    if rule.lhs == nonterminal_postdot {
82                        let new_item = Lr0Item {
83                            rhs: rule.rhs.to_vec(),
84                            dot: 0,
85                        };
86                        if items
87                            .map
88                            .insert(rule_idx as RuleId, new_item.clone())
89                            .is_none()
90                        {
91                            self.queue.push_back(new_item);
92                        }
93                    }
94                }
95            }
96        }
97    }
98
99    /// Advances the items by a symbol.
100    pub fn advance(&mut self, items: &Lr0Items, sym: Symbol) -> Option<Lr0Items> {
101        let mut new_items = Lr0Items::new();
102        for (&rule_id, item) in items.map.iter() {
103            if let Some(postdot) = self.postdot(item) {
104                if postdot == sym {
105                    new_items.map.insert(
106                        rule_id,
107                        Lr0Item {
108                            rhs: item.rhs.clone(),
109                            dot: item.dot + 1,
110                        },
111                    );
112                }
113            }
114        }
115        if new_items.map.is_empty() {
116            None
117        } else {
118            self.closure(&mut new_items);
119            Some(new_items)
120        }
121    }
122
123    fn postdot(&self, item: &Lr0Item) -> Option<Symbol> {
124        item.rhs.get(item.dot as usize).cloned()
125    }
126
127    fn nonterminal_postdot(&self, item: &Lr0Item) -> Option<Symbol> {
128        match item.rhs.get(item.dot as usize) {
129            Some(&postdot) => {
130                if !self.terminal_set.has_sym(postdot) {
131                    Some(postdot)
132                } else {
133                    None
134                }
135            }
136            _ => None,
137        }
138    }
139}
140
141impl<'a, G> Lr0FsmBuilder<'a, G>
142where
143    G: RuleContainer,
144{
145    /// Creates a new LR(0) Finite State Machine builder.
146    pub fn new(grammar: &'a mut G) -> Self {
147        Lr0FsmBuilder {
148            closure: Lr0ClosureBuilder::new(grammar),
149            sets_queue: VecDeque::new(),
150            cached_sets: BTreeMap::new(),
151        }
152    }
153
154    /// Construct an LR(0) Finite State Machine.
155    pub fn make_lr0_fsm(&mut self, start_sym: Symbol) -> Vec<Lr0Node> {
156        self.cached_sets.clear();
157        self.sets_queue.clear();
158
159        let initial_item_set = self.initial_item_set(start_sym);
160        self.introduce_set(initial_item_set, 0);
161        let mut result = vec![];
162        while let Some(items) = self.sets_queue.pop_front() {
163            let mut link = BTreeMap::new();
164            let terminals: Vec<Symbol> = self.closure.terminal_set.iter().collect();
165            for terminal in terminals {
166                if let Some(advanced_set) = self.closure.advance(&items, terminal) {
167                    let id = self.id_of(Rc::new(advanced_set));
168                    link.insert(terminal, id);
169                }
170            }
171            result.push(Lr0Node { items, link })
172        }
173        result
174    }
175
176    fn initial_item_set(&mut self, start_sym: Symbol) -> Rc<Lr0Items> {
177        let (_new_start, new_start_rule_id) = self.augment_grammar(start_sym);
178        let initial_item = Lr0Item {
179            rhs: vec![start_sym],
180            dot: 0,
181        };
182        let mut initial_item_set = Lr0Items::new();
183        initial_item_set.map.insert(new_start_rule_id, initial_item);
184        self.closure.closure(&mut initial_item_set);
185        Rc::new(initial_item_set)
186    }
187
188    fn augment_grammar(&mut self, start_sym: Symbol) -> (Symbol, RuleId) {
189        let new_start = self.closure.grammar.next_sym();
190        let rule_id = self.closure.grammar.rules().count() as RuleId;
191        let history_id = self
192            .closure
193            .grammar
194            .add_history_node(RootHistoryNode::NoOp.into());
195        self.closure
196            .grammar
197            .rule(new_start)
198            .rhs_with_history(&[start_sym], history_id);
199        (new_start, rule_id)
200    }
201
202    fn id_of(&mut self, items: Rc<Lr0Items>) -> SetId {
203        match self.cached_sets.get(&items) {
204            None => {
205                let id = self.cached_sets.len() as SetId;
206                self.introduce_set(items, id);
207                id
208            }
209            Some(&id) => id,
210        }
211    }
212
213    fn introduce_set(&mut self, items: Rc<Lr0Items>, id: SetId) {
214        self.cached_sets.insert(items.clone(), id);
215        self.sets_queue.push_back(items);
216    }
217}