gll_pg_core/
state.rs

1//! GLL Grammar state and SPPF structure
2
3/// Re-exported `petgraph` structs to avoid requiring dependency of generated code on it.
4pub use petgraph::{dot::Dot, Directed, Graph};
5use petgraph::{
6    graph::{EdgeReference, NodeIndex},
7    visit::EdgeRef,
8};
9use std::collections::{BTreeMap, BTreeSet};
10use std::fmt::{Debug, Write};
11/// Re-exported `streaming_iterator` structs to avoid requiring dependency of generated code on it.
12pub use streaming_iterator::StreamingIterator;
13
14/// GSS Node Label Type
15pub type GSSNode<L> = (L, usize);
16
17/// SPPF Nodes are stored into a vec, and references are stored as integer
18pub type SPPFNodeIndex = usize;
19
20/// A common trait that all symbols should impl,
21/// the symbols and impl are generated.
22/// You don't need to impl it in your code.
23pub trait GrammarSymbol: Debug + PartialEq {
24    fn is_eps(&self) -> bool;
25}
26
27/// A common trait that all labels  should impl,
28/// the labels and impl are generated.
29/// You don't need to impl it in your code.
30pub trait GrammarLabel: Debug {
31    type Symbol: GrammarSymbol;
32    /// if self is of form `X ::= a . b`,
33    /// return true if a is a terminal or a non-nullable nonterminal and if b is not eps
34    fn first(&self) -> bool;
35
36    /// return Some(lhs) if it is the end of a grammar rule `lhs -> ...`, otherwise None
37    fn end(&self) -> Option<Self::Symbol>;
38}
39
40/// Binary SPPF Node structure.
41///
42/// Each node can be one of: Dummy, Symbol, Intermediate and Packed.
43///
44/// Symbol is a Terminal or a Nonterminal.
45///
46/// Intermediate is a grammar rule with position.
47///
48/// Packed means different derivations of the same grammar rule.
49///
50/// Each grammar rule possible position corresponds to a label.
51/// So store labels for Intermediate and Packed rules.
52#[derive(Clone, Eq, PartialEq, Ord, PartialOrd)]
53pub enum SPPFNode<L: GrammarLabel> {
54    /// $ node in original paper
55    Dummy,
56    /// (symbol, left, right, children)
57    Symbol(L::Symbol, usize, usize, Vec<SPPFNodeIndex>),
58    /// (label, left, right, children)
59    Intermediate(L, usize, usize, Vec<SPPFNodeIndex>),
60    /// (label, split, children)
61    Packed(L, usize, Vec<SPPFNodeIndex>),
62}
63
64/// All GSS Parser states.
65/// It is used by generated code, don't use it directly.
66pub struct GSSState<L: Ord + Clone + GrammarLabel> {
67    /// Direct GSS graph
68    pub graph: Graph<GSSNode<L>, SPPFNodeIndex, Directed>,
69    /// Mapping from node to its index
70    pub nodes: BTreeMap<GSSNode<L>, NodeIndex>,
71    /// All sppf nodes, and nodes reference each other by index
72    pub sppf_nodes: Vec<SPPFNode<L>>,
73    pub initial_node_index: NodeIndex,
74    /// U_j in original paper
75    pub visited: Vec<BTreeSet<(L, NodeIndex, SPPFNodeIndex)>>,
76    /// R in original paper
77    pub todo: Vec<(L, NodeIndex, usize, SPPFNodeIndex)>,
78    /// P in original paper
79    pub pop: BTreeSet<(NodeIndex, SPPFNodeIndex)>,
80    /// C_i in original paper
81    pub current_position: usize,
82    /// C_u in original paper
83    pub current_node_index: NodeIndex,
84    /// C_n in original paper
85    pub current_sppf_node: usize,
86}
87
88impl<L: GrammarLabel> SPPFNode<L> {
89    /// Get right extent of the node, panics if it doesn't have it.
90    pub fn right_extent(&self) -> usize {
91        use SPPFNode::*;
92        match self {
93            Symbol(_, _, r, _) => *r,
94            Intermediate(_, _, r, _) => *r,
95            _ => panic!("no right extent for packed and dummy"),
96        }
97    }
98
99    /// Get left extent of the node, panics if it doesn't have it.
100    pub fn left_extent(&self) -> usize {
101        use SPPFNode::*;
102        match self {
103            Symbol(_, l, _, _) => *l,
104            Intermediate(_, l, _, _) => *l,
105            _ => panic!("no left extent for packed and dummy"),
106        }
107    }
108
109    /// Get children node references.
110    pub fn children(&self) -> Option<&Vec<SPPFNodeIndex>> {
111        use SPPFNode::*;
112        match self {
113            Dummy => None,
114            Symbol(_, _, _, children) => Some(children),
115            Intermediate(_, _, _, children) => Some(children),
116            Packed(_, _, children) => Some(children),
117        }
118    }
119
120    fn children_mut(&mut self) -> Option<&mut Vec<SPPFNodeIndex>> {
121        use SPPFNode::*;
122        match self {
123            Symbol(_, _, _, children) => Some(children),
124            Intermediate(_, _, _, children) => Some(children),
125            _ => panic!("no children for packed and dummy"),
126        }
127    }
128}
129
130impl<L: Ord + Clone + GrammarLabel> GSSState<L> {
131    /// The `add` function in the paper
132    pub fn add(&mut self, l: L, u: NodeIndex, i: usize, w: SPPFNodeIndex) {
133        if !self.visited[i].contains(&(l.clone(), u, w)) {
134            self.visited[i].insert((l.clone(), u, w));
135            self.todo.push((l, u, i, w));
136        }
137    }
138
139    /// The `pop` function in the paper
140    pub fn pop(&mut self, u: NodeIndex, i: usize, z: SPPFNodeIndex) {
141        if u != self.initial_node_index {
142            let (l, _k) = self.graph[u].clone();
143            self.pop.insert((u, z));
144            let edges: Vec<EdgeReference<SPPFNodeIndex>> = self.graph.edges(u).collect();
145            let edge_data: Vec<(NodeIndex, SPPFNodeIndex)> = edges
146                .iter()
147                .map(|edge| (edge.target(), *edge.weight()))
148                .collect();
149            for (v, w) in edge_data {
150                let y = self.get_node_p(l.clone(), w, z);
151                self.add(l.clone(), v, i, y);
152            }
153        }
154    }
155
156    /// The `create` function in the paper
157    pub fn create(&mut self, l: L, u: NodeIndex, j: usize, w: SPPFNodeIndex) -> NodeIndex {
158        let node = (l.clone(), j);
159        let v = if let Some(index) = self.nodes.get(&node) {
160            *index
161        } else {
162            let index = self.graph.add_node(node.clone());
163            self.nodes.insert(node, index);
164            index
165        };
166        if self.graph.find_edge(v, u).is_none() {
167            self.graph.add_edge(v, u, w);
168            let pop = self.pop.clone();
169            for (index, z) in pop.into_iter() {
170                if index == v {
171                    let y = self.get_node_p(l.clone(), w, z);
172                    let h = self.sppf_nodes[z].right_extent();
173                    self.add(l.clone(), u, h, y);
174                }
175            }
176        }
177        v
178    }
179
180    /// The `get_node_t` function in the paper
181    pub fn get_node_t(&mut self, x: L::Symbol, i: usize) -> SPPFNodeIndex {
182        let h = if x.is_eps() { i } else { i + 1 };
183        self.find_or_create_sppf_symbol(x, i, h)
184    }
185
186    /// The `get_node_p` function in the paper
187    pub fn get_node_p(&mut self, l: L, w: SPPFNodeIndex, z: SPPFNodeIndex) -> SPPFNodeIndex {
188        if l.first() {
189            return z;
190        } else {
191            let node_z = &self.sppf_nodes[z];
192            let k = node_z.left_extent();
193            let i = node_z.right_extent();
194            let node_w = &self.sppf_nodes[w];
195            if SPPFNode::Dummy != *node_w {
196                // w != $
197                let j = node_w.left_extent();
198                assert_eq!(node_w.right_extent(), k);
199                if let Some(t) = l.end() {
200                    // t = X
201                    let y = self.find_or_create_sppf_symbol(t, j, i);
202                    if !self.has_packed_child(y, &l, k) {
203                        let len = self.sppf_nodes.len();
204                        self.sppf_nodes[y].children_mut().unwrap().push(len);
205                        self.sppf_nodes.push(SPPFNode::Packed(l, k, vec![w, z]));
206                    }
207                    y
208                } else {
209                    // t = l
210                    let y = self.find_or_create_sppf_intermediate(l.clone(), j, i);
211                    if !self.has_packed_child(y, &l, k) {
212                        let len = self.sppf_nodes.len();
213                        self.sppf_nodes[y].children_mut().unwrap().push(len);
214                        self.sppf_nodes.push(SPPFNode::Packed(l, k, vec![w, z]));
215                    }
216                    y
217                }
218            } else {
219                // w = $
220                if let Some(t) = l.end() {
221                    // t = X
222                    let y = self.find_or_create_sppf_symbol(t, k, i);
223                    if !self.has_packed_child(y, &l, k) {
224                        let len = self.sppf_nodes.len();
225                        self.sppf_nodes[y].children_mut().unwrap().push(len);
226                        self.sppf_nodes.push(SPPFNode::Packed(l, k, vec![z]));
227                    }
228                    y
229                } else {
230                    // t = l
231                    let y = self.find_or_create_sppf_intermediate(l.clone(), k, i);
232                    if !self.has_packed_child(y, &l, k) {
233                        let len = self.sppf_nodes.len();
234                        self.sppf_nodes[y].children_mut().unwrap().push(len);
235                        self.sppf_nodes.push(SPPFNode::Packed(l, k, vec![z]));
236                    }
237                    y
238                }
239            }
240        }
241    }
242
243    fn find_or_create_sppf_symbol(&mut self, s: L::Symbol, i: usize, j: usize) -> SPPFNodeIndex {
244        for (index, node) in self.sppf_nodes.iter().enumerate() {
245            if let SPPFNode::Symbol(node_s, node_i, node_j, _) = node {
246                if *node_s == s && *node_i == i && *node_j == j {
247                    return index;
248                }
249            }
250        }
251        self.sppf_nodes.push(SPPFNode::Symbol(s, i, j, vec![]));
252        self.sppf_nodes.len() - 1
253    }
254
255    fn find_or_create_sppf_intermediate(&mut self, l: L, i: usize, j: usize) -> SPPFNodeIndex {
256        // TODO: linear search is slow
257        for (index, node) in self.sppf_nodes.iter().enumerate() {
258            if let SPPFNode::Intermediate(node_l, node_i, node_j, _) = node {
259                if *node_l == l && *node_i == i && *node_j == j {
260                    return index;
261                }
262            }
263        }
264        self.sppf_nodes
265            .push(SPPFNode::Intermediate(l, i, j, vec![]));
266        self.sppf_nodes.len() - 1
267    }
268
269    /// Collect all symbol leaves of a packed node
270    pub fn collect_symbols(&self, node: SPPFNodeIndex) -> Vec<SPPFNodeIndex> {
271        use SPPFNode::*;
272        match &self.sppf_nodes[node] {
273            Dummy => vec![],
274            Symbol(_, _, _, _) => vec![node],
275            Intermediate(_, _, _, children) => children
276                .iter()
277                .map(|node| self.collect_symbols(*node))
278                .flatten()
279                .collect(),
280            Packed(_, _, children) => children
281                .iter()
282                .map(|node| self.collect_symbols(*node))
283                .flatten()
284                .collect(),
285        }
286    }
287
288    fn has_packed_child(&self, node: SPPFNodeIndex, l: &L, k: usize) -> bool {
289        // TODO: linear search is slow
290        if let Some(children) = self.sppf_nodes[node].children() {
291            return children.iter().any(|index| match &self.sppf_nodes[*index] {
292                SPPFNode::Packed(node_l, node_k, _) => node_l == l && *node_k == k,
293                _ => false,
294            });
295        } else {
296            unreachable!()
297        }
298    }
299
300    /// Print current SPPF graph in graphviz format
301    pub fn print_sppf_dot(&self) -> String {
302        let mut res = String::new();
303        write!(&mut res, "digraph {{\n").unwrap();
304        for (i, node) in self.sppf_nodes.iter().enumerate() {
305            let label = match node {
306                SPPFNode::Symbol(s, _, _, _) => format!("{:?}", s),
307                SPPFNode::Intermediate(_, _, _, _) => format!("I"),
308                SPPFNode::Packed(_, _, _) => format!("P"),
309                SPPFNode::Dummy => format!("D"),
310            };
311            write!(&mut res, "{} [label={:?}]\n", i, label).unwrap();
312            if let Some(children) = node.children() {
313                for child in children {
314                    write!(&mut res, "{} -> {}\n", i, child).unwrap();
315                }
316            }
317        }
318        write!(&mut res, "}}").unwrap();
319        res
320    }
321
322    /// Print current GSS graph in graphviz format
323    pub fn print_gss_dot(&self) -> String {
324        format!("{:?}", Dot::with_config(&self.graph, &[]))
325    }
326}