1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
/*
* Copyright 2022 Arnaud Golfouse
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
use super::{ConflictSolution, Node, Symbol};
use crate::{
indices::{ProdIdxRaw, SymbolIdx},
structures::Map,
};
/// The type used for specifying precedence levels.
pub type PrecedenceLevel = u16;
#[derive(Clone, Debug, Default)]
pub(crate) struct PrecedenceLevels {
/// Productions that cannot appear to the left of this production.
pub(crate) left: Vec<Option<PrecedenceLevel>>,
/// Productions that cannot appear to the right of this production.
pub(crate) right: Vec<Option<PrecedenceLevel>>,
}
/// A production of the grammar.
///
/// This can be used to:
/// - Retreive the right-hand side of the associated production via [`Self::symbols`].
/// - Set precedence levels.
/// - Set extra forbidden derivations in the right-hand side.
#[derive(Clone, Debug)]
pub struct Production {
/// The right-hand side of this production.
pub symbols: Box<[Symbol]>,
pub(crate) precedence: PrecedenceLevels,
/// Contains a list of forbidden derivations.
///
/// If this contains `(i, j)`, and `symbols[i]` is a [`Node`], then the `symbols[i]`
/// cannot derive the `j`-th production associated with `symbols[i]` (aka
/// `ProdIdx { lhs: symbols[i], index: j }`).
pub(crate) forbidden_derivations: Vec<(SymbolIdx, ProdIdxRaw)>,
}
impl Production {
/// Add a precedence for the current production.
///
/// Any production (including this one) with
/// - the same precedence family
/// - strictly lower right precedence
///
/// will not be allowed to appear to the left of this production.
pub fn set_left_precedence(
&mut self,
precedence_family: PrecedenceFamilyToken,
level: PrecedenceLevel,
) -> &mut Self {
while self.precedence.left.len() <= precedence_family.0 {
self.precedence.left.push(None);
}
self.precedence.left[precedence_family.0] = Some(level);
self
}
/// Add a precedence for the current production.
///
/// Any production (including this one) with
/// - the same precedence family
/// - strictly lower left precedence
///
/// will not be allowed to appear to the right of this production.
pub fn set_right_precedence(
&mut self,
precedence_family: PrecedenceFamilyToken,
level: PrecedenceLevel,
) -> &mut Self {
while self.precedence.right.len() <= precedence_family.0 {
self.precedence.right.push(None);
}
self.precedence.right[precedence_family.0] = Some(level);
self
}
/// The symbols at `symbol_index` of the production will not be able to derive
/// `forbidden_production`.
///
/// # Errors
/// This will return an error if
/// - `symbol_index` is out of bounds for [`Self::symbols`], or refers to a
/// [`Token`](super::Token).
/// - `symbol_index` refers to a different [`Node`] than `forbidden_production`.
///
/// # Example
/// In rust, it is forbidden to chain comparison operator. As such, one could write:
/// ```
/// # use ielr::input::{Node, Token, Symbol, Grammar};
/// const E: Node = Node(0);
/// const EQUAL: Token = match Token::new(1) {
/// Some(t) => t,
/// None => unreachable!()
/// };
/// let mut grammar = Grammar::new();
/// let comparison_prod = grammar.add_production(E, vec![Symbol::Node(E), Symbol::Token(EQUAL), Symbol::Node(E)]).unwrap();
/// let production = grammar.get_production_mut(comparison_prod).unwrap();
/// production
/// .forbid_derivation(0, comparison_prod).unwrap()
/// .forbid_derivation(2, comparison_prod).unwrap();
/// ```
///
/// # Note
/// This can be used to emulate operator precedence. However, it is recommended to
/// use the [`Self::set_left_precedence`] and [`Self::set_right_precedence`] methods
/// instead, as those are more versatile, 'safer' (they don't remove any valid
/// parse), and lead to faster table generation.
pub fn forbid_derivation(
&mut self,
symbol_index: usize,
forbidden_production: ProdIdx,
) -> Result<&mut Self, ForbidDerivationError> {
match self.symbols.get(symbol_index) {
None | Some(Symbol::Token(_)) => Err(ForbidDerivationError),
Some(Symbol::Node(n)) => {
if *n == forbidden_production.lhs {
self.forbidden_derivations
.push((symbol_index as SymbolIdx, forbidden_production.index));
Ok(self)
} else {
Err(ForbidDerivationError)
}
}
}
}
}
/// Token that indentifies a family of precedence annotations.
///
/// Used in [Production::set_left_precedence] and [Production::set_right_precedence].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PrecedenceFamilyToken(usize);
/// The grammar used to define the parser.
///
/// This contains **productions**: rules that describe how to create the various
/// syntactic objects.
///
/// A production has the form `node → symbols`, where `node` is a [`Node`] and
/// `symbols` is a list of [`Symbol`]s.
#[derive(Clone, Debug)]
pub struct Grammar {
/// All the productions of the grammar.
productions: Map<Node, Vec<Production>>,
next_precedence_family: usize,
/// Solutions for LR conflicts.
pub(crate) conflict_solutions: Vec<ConflictSolution>,
}
/// Handle to a production in the [`Grammar`].
///
/// The left-hand side may be retrieved directly. However, to get the right-hand
/// side, one will need to call [`Grammar::get_rhs`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ProdIdx {
/// Left-hand side of the production.
pub lhs: Node,
/// Index of the right-hand side of the production in
/// [`grammar.productions[lhs]`](Grammar::productions).
pub index: ProdIdxRaw,
}
/// Error while trying to add a new production to the grammar.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum AddProductionError {
/// The right-hand side of the production exceeds [`u16::MAX`].
RhsTooLong,
/// There are already [`u16::MAX`] productions associated with the given left-hand
/// side.
TooManyProductions,
}
/// Error while trying to forbid a particular derivation, using
/// [`Production::forbid_derivation`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ForbidDerivationError;
impl Grammar {
/// Creates a new grammar with no productions.
pub fn new() -> Self {
Self {
productions: Map::default(),
next_precedence_family: 0,
conflict_solutions: Vec::new(),
}
}
/// Add a production to the grammar, and returns a [`ProdIdx`] than can be used
/// to retrieve it.
///
/// Returns `Err(AddProductionError)` if `rhs.len() > u16::MAX`, or if there is
/// already `u16::MAX` productions associated with `lhs`.
pub fn add_production(
&mut self,
lhs: Node,
rhs: Vec<Symbol>,
) -> Result<ProdIdx, AddProductionError> {
let productions = self.productions.entry(lhs).or_default();
if rhs.len() > SymbolIdx::MAX as usize {
return Err(AddProductionError::RhsTooLong);
}
if productions.len() == ProdIdxRaw::MAX as usize {
return Err(AddProductionError::TooManyProductions);
}
let index = productions.len() as ProdIdxRaw;
productions.push(Production {
symbols: rhs.into(),
precedence: PrecedenceLevels::default(),
forbidden_derivations: Vec::new(),
});
Ok(ProdIdx { lhs, index })
}
/// Add a solution to a LR conflict to the grammar.
///
/// # Note
/// When using conflict solutions, the [LALR](crate::Algorithm::Lalr) algorithm
/// might remove seemingly valid parses.
///
/// To avoid this, conflict resolution is only applied when using the
/// [LR](crate::Algorithm::Lr) algorithm.
pub fn add_conflict_solution(&mut self, conflict_solution: ConflictSolution) {
self.conflict_solutions.push(conflict_solution);
}
/// Generate a new precedence family.
///
/// Note that using multiple precedence families on the same production may not be
/// a good idea.
pub fn add_precedence_family(&mut self) -> PrecedenceFamilyToken {
let family = PrecedenceFamilyToken(self.next_precedence_family);
self.next_precedence_family += 1;
family
}
/// Get all the nodes that appear as the right-hand side of a production in the
/// grammar.
pub fn get_all_nodes(&self) -> impl Iterator<Item = Node> + '_ {
self.productions.keys().copied()
}
/// Get all conflict solutions added with [`Self::add_conflict_solution`].
pub fn get_conflict_solutions(&self) -> &[ConflictSolution] {
&self.conflict_solutions
}
/// Get all conflict solutions added with [`Self::add_conflict_solution`].
pub fn get_conflict_solutions_mut(&mut self) -> &mut [ConflictSolution] {
&mut self.conflict_solutions
}
/// Get the production associated with `prod_idx`.
pub fn get_production(&self, prod_idx: ProdIdx) -> Option<&Production> {
self.productions
.get(&prod_idx.lhs)?
.get(prod_idx.index as usize)
}
/// Get the production associated with `prod_idx`.
pub fn get_production_mut(&mut self, prod_idx: ProdIdx) -> Option<&mut Production> {
self.productions
.get_mut(&prod_idx.lhs)?
.get_mut(prod_idx.index as usize)
}
/// Get the right-hand side of the production associated with `prod_idx`.
pub fn get_rhs(&self, prod_idx: ProdIdx) -> Option<&[Symbol]> {
Some(
&self
.productions
.get(&prod_idx.lhs)?
.get(prod_idx.index as usize)?
.symbols as &[_],
)
}
/// Get all the productions in the grammar.
pub fn get_all_productions(&self) -> impl Iterator<Item = (ProdIdx, &'_ [Symbol])> + '_ {
self.productions.iter().flat_map(|(&n, productions)| {
productions.iter().enumerate().map(move |(index, b)| {
(
ProdIdx {
lhs: n,
index: index as ProdIdxRaw,
},
(&b.symbols as &[_]),
)
})
})
}
/// Get all the productions for node `node`.
pub fn get_node_productions(
&self,
node: Node,
) -> impl Iterator<Item = (ProdIdx, &'_ Production)> + '_ {
self.productions
.get(&node)
.map(|productions| {
productions
.iter()
.enumerate()
.map(move |(index, production)| {
let prod_idx = ProdIdx {
lhs: node,
index: index as ProdIdxRaw,
};
(prod_idx, production)
})
})
.into_iter()
.flatten()
}
}
impl Default for Grammar {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_common::{nodes::START, tokens::T1};
#[test]
fn grammar_errors() {
let mut grammar = Grammar::default();
assert_eq!(
grammar
.add_production(START, vec![Symbol::Token(T1); SymbolIdx::MAX as usize + 1])
.unwrap_err(),
AddProductionError::RhsTooLong
);
for _ in 0..ProdIdxRaw::MAX {
assert!(grammar.add_production(START, vec![]).is_ok());
}
assert_eq!(
grammar.add_production(START, vec![]).unwrap_err(),
AddProductionError::TooManyProductions
);
}
}