1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/*
* Copyright 2022 Arnaud Golfouse
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
use super::precedence_annotations::PrecedenceAnnotations;
use crate::{
grammar_data::GrammarData,
indices::{ItemIdx, SymbolIdx},
input::{Grammar, Node, ProdIdx, Symbol},
output::Lookahead,
structures::{BitSet, Set},
};
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct LRItem {
/// The production of this item.
///
/// This has the form `lhs → rhs`, where `rhs` may be retrieved using
/// [`Grammar::get_rhs`](crate::input::Grammar::get_rhs).
pub(crate) prod_idx: ProdIdx,
/// At which point of [`production`](Self::prod_idx) we are.
pub(crate) index: SymbolIdx,
/// Which items in the same state are directly derived from this item.
pub(crate) directly_derives: BitSet<ItemIdx>,
/// Which items in the same state are _directly_ derived from this item.
pub(crate) derives: BitSet<ItemIdx>,
/// If the dot is _not_ on the rightmost position, this item will be involved in a
/// state transition.
///
/// Then, `next_item_local_index` contains the index of the shifted item (in the next state).
pub(crate) next_item_local_index: Option<ItemIdx>,
/// Annotations for precedence.
pub(crate) annotations: PrecedenceAnnotations,
/// All symbols in `rhs` _strictly_ before this index are nullable.
///
/// Will be compared against `Self::index`.
nullable_up_to: SymbolIdx,
/// All symbols in `rhs` after this index are nullable.
///
/// Will be compared against `Self::index`.
nullable_after: SymbolIdx,
pub(crate) lookaheads: Set<Lookahead>,
/// This contains any node `A`, such that this item forbid some `A →∙...`
/// derivation.
pub(crate) forbidden_nodes: Set<Node>,
}
impl LRItem {
pub(crate) fn current_symbol(&self, grammar: &Grammar) -> Option<Symbol> {
grammar
.get_rhs(self.prod_idx)
.unwrap_or_default()
.get(self.index as usize)
.copied()
}
/// Creates a new LR(0) item, with `index` 0 (aka the dot is on the left).
///
/// - The `PrecedenceAnnotations` will be those specified in the grammar.
/// - `derives` will be empty.
/// - `nullable_up_to` and `nullable_after` will be computed.
pub(super) fn new(prod_idx: ProdIdx, grammar_data: &GrammarData) -> Self {
let production = grammar_data.get_production(prod_idx).unwrap();
let mut nullable_up_to = 0;
let mut nullable_after = production.symbols.len() as SymbolIdx;
while let Some(symbol) = production.symbols.get(nullable_up_to as usize) {
if !grammar_data.nullables.is_nullable_symbol(*symbol) {
break;
}
nullable_up_to += 1;
}
while nullable_after > 0 {
let symbol = match production.symbols.get(nullable_after as usize - 1) {
Some(s) => s,
None => break,
};
if !grammar_data.nullables.is_nullable_symbol(*symbol) {
break;
}
nullable_after -= 1;
}
Self {
prod_idx,
index: 0,
annotations: PrecedenceAnnotations::default(),
directly_derives: BitSet::new(),
derives: BitSet::new(),
next_item_local_index: None,
nullable_up_to,
nullable_after,
lookaheads: Set::default(),
forbidden_nodes: Set::default(),
}
}
/// Get the (LR(0)) item obtained by shifting `self.index` to the right (`+1`).
///
/// Assumes `self.index < self.production.len()`.
///
/// This will:
/// - Generate the appropriate `PrecedenceAnnotations`.
/// - Clear `derives`.
pub(crate) fn successor(&self) -> Self {
let index = self.index + 1;
Self {
prod_idx: self.prod_idx,
index,
annotations: PrecedenceAnnotations {
forbidden_left: self.annotations.forbidden_left.clone(),
forbidden_right: self.annotations.forbidden_right.clone(),
},
directly_derives: BitSet::new(),
derives: BitSet::new(),
next_item_local_index: None,
nullable_up_to: self.nullable_up_to,
nullable_after: self.nullable_after,
lookaheads: Set::default(),
forbidden_nodes: Set::default(),
}
}
/// Get all the productions derived from this item, with the _new_ precedence.
///
/// After each iteration of the returned iterator, if a precedence annotation is
/// used to forbid some `A →∙...` derivation, then:
/// - `used_precedence` is set to `true`.
/// - `A` is inserted into `forbidden_nodes`.
pub(super) fn derived<'a>(
&'a self,
grammar: &'a Grammar,
used_precedence: &'a mut bool,
forbidden_nodes: &'a mut Set<Node>,
) -> impl Iterator<Item = (ProdIdx, PrecedenceAnnotations)> + 'a {
struct MaybeIter<I>(Option<I>);
impl<I: Iterator> Iterator for MaybeIter<I> {
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
self.0.as_mut()?.next()
}
}
let node = match self.current_symbol(grammar) {
Some(Symbol::Node(node)) => node,
_ => return MaybeIter(None),
};
let production = grammar.get_production(self.prod_idx).unwrap();
// For example, E →∙E + E
let is_on_left = self.index <= self.nullable_up_to;
// For example, E → E +∙E
let is_on_right = self.index + 1 >= self.nullable_after;
let inherited_left = &self.annotations.forbidden_left;
let production_left = &production.precedence.left;
let inherited_right = &self.annotations.forbidden_right;
let production_right = &production.precedence.right;
MaybeIter(Some(grammar.get_node_productions(node).filter_map(
move |(new_prod_idx, new_production)| {
if production
.forbidden_derivations
.contains(&(self.index, new_prod_idx.index))
{
*used_precedence = true;
forbidden_nodes.insert(new_prod_idx.lhs);
return None;
}
if is_on_left
&& (!PrecedenceAnnotations::compatible_with(
production_left,
&new_production.precedence.right,
) || !PrecedenceAnnotations::compatible_with(
inherited_left,
&new_production.precedence.left,
))
{
*used_precedence = true;
return None;
}
if is_on_right
&& (!PrecedenceAnnotations::compatible_with(
production_right,
&new_production.precedence.left,
) || !PrecedenceAnnotations::compatible_with(
inherited_right,
&new_production.precedence.right,
))
{
*used_precedence = true;
return None;
}
// This is useful to prune useless annotations, for example on E →∙INT or left annotations on E →∙- E
let can_derive_on_left =
matches!(new_production.symbols.first(), Some(Symbol::Node(_)));
let can_derive_on_right =
matches!(new_production.symbols.last(), Some(Symbol::Node(_)));
let mut new_annotations = PrecedenceAnnotations::new();
if is_on_left {
// For example:
// E →∙E + E
// E →∙- E ← new item: what was forbidden on the left is now forbidden on the right.
if can_derive_on_left {
PrecedenceAnnotations::merge_increasing(
&mut new_annotations.forbidden_left,
inherited_left,
);
}
if can_derive_on_right {
PrecedenceAnnotations::merge_increasing(
&mut new_annotations.forbidden_right,
production_left,
);
}
}
if is_on_right {
// For example:
// E → E +∙E
// E →∙E ? ← new item: what was forbidden on the right is now forbidden on the left.
if can_derive_on_right {
PrecedenceAnnotations::merge_increasing(
&mut new_annotations.forbidden_right,
inherited_right,
);
}
if can_derive_on_left {
PrecedenceAnnotations::merge_increasing(
&mut new_annotations.forbidden_left,
production_right,
);
}
}
Some((new_prod_idx, new_annotations))
},
)))
}
/// Returns an ordering on [`LRItem`], such that items are sorted by:
/// - [`Self::index`] in reverse.
/// - In case of equal `index`, sort by [`Self::prod_idx`].
pub(super) fn order(item1: &Self, item2: &Self) -> std::cmp::Ordering {
item1
.index
.cmp(&item2.index)
.reverse()
.then(item1.annotations.cmp(&item2.annotations))
.then(item1.prod_idx.cmp(&item2.prod_idx))
}
}