1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

use super::{ConflictSolution, Node, Symbol};
use crate::{
    indices::{ProdIdxRaw, SymbolIdx},
    structures::Map,
};

/// The type used for specifying precedence levels.
pub type PrecedenceLevel = u16;

#[derive(Clone, Debug, Default)]
pub(crate) struct PrecedenceLevels {
    /// Productions that cannot appear to the left of this production.
    pub(crate) left: Vec<Option<PrecedenceLevel>>,
    /// Productions that cannot appear to the right of this production.
    pub(crate) right: Vec<Option<PrecedenceLevel>>,
}

/// A production of the grammar.
///
/// This can be used to:
/// - Retreive the right-hand side of the associated production via [`Self::symbols`].
/// - Set precedence levels.
/// - Set extra forbidden derivations in the right-hand side.
#[derive(Clone, Debug)]
pub struct Production {
    /// The right-hand side of this production.
    pub symbols: Box<[Symbol]>,
    pub(crate) precedence: PrecedenceLevels,
    /// Contains a list of forbidden derivations.
    ///
    /// If this contains `(i, j)`, and `symbols[i]` is a [`Node`], then the `symbols[i]`
    /// cannot derive the `j`-th production associated with `symbols[i]` (aka
    /// `ProdIdx { lhs: symbols[i], index: j }`).
    pub(crate) forbidden_derivations: Vec<(SymbolIdx, ProdIdxRaw)>,
}

impl Production {
    /// Add a precedence for the current production.
    ///
    /// Any production (including this one) with
    /// - the same precedence family
    /// - strictly lower right precedence
    ///
    /// will not be allowed to appear to the left of this production.
    pub fn set_left_precedence(
        &mut self,
        precedence_family: PrecedenceFamilyToken,
        level: PrecedenceLevel,
    ) -> &mut Self {
        while self.precedence.left.len() <= precedence_family.0 {
            self.precedence.left.push(None);
        }
        self.precedence.left[precedence_family.0] = Some(level);
        self
    }

    /// Add a precedence for the current production.
    ///
    /// Any production (including this one) with
    /// - the same precedence family
    /// - strictly lower left precedence
    ///
    /// will not be allowed to appear to the right of this production.
    pub fn set_right_precedence(
        &mut self,
        precedence_family: PrecedenceFamilyToken,
        level: PrecedenceLevel,
    ) -> &mut Self {
        while self.precedence.right.len() <= precedence_family.0 {
            self.precedence.right.push(None);
        }
        self.precedence.right[precedence_family.0] = Some(level);
        self
    }

    /// The symbols at `symbol_index` of the production will not be able to derive
    /// `forbidden_production`.
    ///
    /// # Errors
    /// This will return an error if
    /// - `symbol_index` is out of bounds for [`Self::symbols`], or refers to a
    /// [`Token`](super::Token).
    /// - `symbol_index` refers to a different [`Node`] than `forbidden_production`.
    ///
    /// # Example
    /// In rust, it is forbidden to chain comparison operator. As such, one could write:
    /// ```
    /// # use ielr::input::{Node, Token, Symbol, Grammar};
    /// const E: Node = Node(0);
    /// const EQUAL: Token = match Token::new(1) {
    ///     Some(t) => t,
    ///     None => unreachable!()
    /// };
    /// let mut grammar = Grammar::new();
    /// let comparison_prod = grammar.add_production(E, vec![Symbol::Node(E), Symbol::Token(EQUAL), Symbol::Node(E)]).unwrap();
    /// let production = grammar.get_production_mut(comparison_prod).unwrap();
    /// production
    ///     .forbid_derivation(0, comparison_prod).unwrap()
    ///     .forbid_derivation(2, comparison_prod).unwrap();
    /// ```
    ///
    /// # Note
    /// This can be used to emulate operator precedence. However, it is recommended to
    /// use the [`Self::set_left_precedence`] and [`Self::set_right_precedence`] methods
    /// instead, as those are more versatile, 'safer' (they don't remove any valid
    /// parse), and lead to faster table generation.
    pub fn forbid_derivation(
        &mut self,
        symbol_index: usize,
        forbidden_production: ProdIdx,
    ) -> Result<&mut Self, ForbidDerivationError> {
        match self.symbols.get(symbol_index) {
            None | Some(Symbol::Token(_)) => Err(ForbidDerivationError),
            Some(Symbol::Node(n)) => {
                if *n == forbidden_production.lhs {
                    self.forbidden_derivations
                        .push((symbol_index as SymbolIdx, forbidden_production.index));
                    Ok(self)
                } else {
                    Err(ForbidDerivationError)
                }
            }
        }
    }
}

/// Token that indentifies a family of precedence annotations.
///
/// Used in [Production::set_left_precedence] and [Production::set_right_precedence].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PrecedenceFamilyToken(usize);

/// The grammar used to define the parser.
///
/// This contains **productions**: rules that describe how to create the various
/// syntactic objects.
///
/// A production has the form `node → symbols`, where `node` is a [`Node`] and
/// `symbols` is a list of [`Symbol`]s.
#[derive(Clone, Debug)]
pub struct Grammar {
    /// All the productions of the grammar.
    productions: Map<Node, Vec<Production>>,
    next_precedence_family: usize,
    /// Solutions for LR conflicts.
    pub(crate) conflict_solutions: Vec<ConflictSolution>,
}

/// Handle to a production in the [`Grammar`].
///
/// The left-hand side may be retrieved directly. However, to get the right-hand
/// side, one will need to call [`Grammar::get_rhs`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ProdIdx {
    /// Left-hand side of the production.
    pub lhs: Node,
    /// Index of the right-hand side of the production in
    /// [`grammar.productions[lhs]`](Grammar::productions).
    pub index: ProdIdxRaw,
}

/// Error while trying to add a new production to the grammar.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum AddProductionError {
    /// The right-hand side of the production exceeds [`u16::MAX`].
    RhsTooLong,
    /// There are already [`u16::MAX`] productions associated with the given left-hand
    /// side.
    TooManyProductions,
}

/// Error while trying to forbid a particular derivation, using
/// [`Production::forbid_derivation`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ForbidDerivationError;

impl Grammar {
    /// Creates a new grammar with no productions.
    pub fn new() -> Self {
        Self {
            productions: Map::default(),
            next_precedence_family: 0,
            conflict_solutions: Vec::new(),
        }
    }

    /// Add a production to the grammar, and returns a [`ProdIdx`] than can be used
    /// to retrieve it.
    ///
    /// Returns `Err(AddProductionError)` if `rhs.len() > u16::MAX`, or if there is
    /// already `u16::MAX` productions associated with `lhs`.
    pub fn add_production(
        &mut self,
        lhs: Node,
        rhs: Vec<Symbol>,
    ) -> Result<ProdIdx, AddProductionError> {
        let productions = self.productions.entry(lhs).or_default();
        if rhs.len() > SymbolIdx::MAX as usize {
            return Err(AddProductionError::RhsTooLong);
        }
        if productions.len() == ProdIdxRaw::MAX as usize {
            return Err(AddProductionError::TooManyProductions);
        }
        let index = productions.len() as ProdIdxRaw;
        productions.push(Production {
            symbols: rhs.into(),
            precedence: PrecedenceLevels::default(),
            forbidden_derivations: Vec::new(),
        });
        Ok(ProdIdx { lhs, index })
    }

    /// Add a solution to a LR conflict to the grammar.
    ///
    /// # Note
    /// When using conflict solutions, the [LALR](crate::Algorithm::Lalr) algorithm
    /// might remove seemingly valid parses.
    ///
    /// To avoid this, conflict resolution is only applied when using the
    /// [LR](crate::Algorithm::Lr) algorithm.
    pub fn add_conflict_solution(&mut self, conflict_solution: ConflictSolution) {
        self.conflict_solutions.push(conflict_solution);
    }

    /// Generate a new precedence family.
    ///
    /// Note that using multiple precedence families on the same production may not be
    /// a good idea.
    pub fn add_precedence_family(&mut self) -> PrecedenceFamilyToken {
        let family = PrecedenceFamilyToken(self.next_precedence_family);
        self.next_precedence_family += 1;
        family
    }

    /// Get all the nodes that appear as the right-hand side of a production in the
    /// grammar.
    pub fn get_all_nodes(&self) -> impl Iterator<Item = Node> + '_ {
        self.productions.keys().copied()
    }

    /// Get all conflict solutions added with [`Self::add_conflict_solution`].
    pub fn get_conflict_solutions(&self) -> &[ConflictSolution] {
        &self.conflict_solutions
    }

    /// Get all conflict solutions added with [`Self::add_conflict_solution`].
    pub fn get_conflict_solutions_mut(&mut self) -> &mut [ConflictSolution] {
        &mut self.conflict_solutions
    }

    /// Get the production associated with `prod_idx`.
    pub fn get_production(&self, prod_idx: ProdIdx) -> Option<&Production> {
        self.productions
            .get(&prod_idx.lhs)?
            .get(prod_idx.index as usize)
    }

    /// Get the production associated with `prod_idx`.
    pub fn get_production_mut(&mut self, prod_idx: ProdIdx) -> Option<&mut Production> {
        self.productions
            .get_mut(&prod_idx.lhs)?
            .get_mut(prod_idx.index as usize)
    }

    /// Get the right-hand side of the production associated with `prod_idx`.
    pub fn get_rhs(&self, prod_idx: ProdIdx) -> Option<&[Symbol]> {
        Some(
            &self
                .productions
                .get(&prod_idx.lhs)?
                .get(prod_idx.index as usize)?
                .symbols as &[_],
        )
    }

    /// Get all the productions in the grammar.
    pub fn get_all_productions(&self) -> impl Iterator<Item = (ProdIdx, &'_ [Symbol])> + '_ {
        self.productions.iter().flat_map(|(&n, productions)| {
            productions.iter().enumerate().map(move |(index, b)| {
                (
                    ProdIdx {
                        lhs: n,
                        index: index as ProdIdxRaw,
                    },
                    (&b.symbols as &[_]),
                )
            })
        })
    }

    /// Get all the productions for node `node`.
    pub fn get_node_productions(
        &self,
        node: Node,
    ) -> impl Iterator<Item = (ProdIdx, &'_ Production)> + '_ {
        self.productions
            .get(&node)
            .map(|productions| {
                productions
                    .iter()
                    .enumerate()
                    .map(move |(index, production)| {
                        let prod_idx = ProdIdx {
                            lhs: node,
                            index: index as ProdIdxRaw,
                        };
                        (prod_idx, production)
                    })
            })
            .into_iter()
            .flatten()
    }
}

impl Default for Grammar {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_common::{nodes::START, tokens::T1};

    #[test]
    fn grammar_errors() {
        let mut grammar = Grammar::default();

        assert_eq!(
            grammar
                .add_production(START, vec![Symbol::Token(T1); SymbolIdx::MAX as usize + 1])
                .unwrap_err(),
            AddProductionError::RhsTooLong
        );
        for _ in 0..ProdIdxRaw::MAX {
            assert!(grammar.add_production(START, vec![]).is_ok());
        }
        assert_eq!(
            grammar.add_production(START, vec![]).unwrap_err(),
            AddProductionError::TooManyProductions
        );
    }
}