parse_it/memo.rs
1//! Memoization and left recursion support.
2
3use std::{cell::RefCell, fmt::Debug, hash::Hash};
4
5use rustc_hash::FxHashMap;
6
7use crate::{lexer::Cursor, Error, LexIt, ParserState};
8
9/// Memorization for a parser.
10///
11/// It records the results of parsing a given position in the source code, including
12/// the parsed value and the position to which the parser was advanced.
13pub struct Memo<P: Clone + Eq + Hash, T: Clone> {
14 map: RefCell<FxHashMap<P, (T, P)>>,
15}
16
17impl<P: Clone + Eq + Hash, T: Clone> Default for Memo<P, T> {
18 fn default() -> Self {
19 Self {
20 map: RefCell::new(FxHashMap::default()),
21 }
22 }
23}
24
25impl<P: Clone + Eq + Hash + Debug, T: Clone + Debug> Debug for Memo<P, T> {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 self.map.borrow().fmt(f)
28 }
29}
30
31impl<P: Clone + Eq + Hash, T: Clone> Memo<P, T> {
32 /// Get a memoized value.
33 pub fn get(&self, pos: &P) -> Option<(T, P)> {
34 self.map.borrow().get(pos).cloned()
35 }
36
37 /// Insert a memoized value.
38 pub fn insert(&self, pos: P, value: (T, P)) {
39 self.map.borrow_mut().insert(pos, value);
40 }
41}
42
43/// The ["Packrat"] memoization for a parser.
44///
45/// It ensures that parsing the same position in the source code only occurs once,
46/// by recording the results of parsing. The memoization is distinguished by the
47/// position itself, so different parsing processes should have their own memos.
48///
49/// ["Packrat"]: https://en.wikipedia.org/wiki/Packrat_parser
50#[inline]
51pub fn memorize<L: LexIt + Clone, T: Clone>(
52 state: &mut ParserState<L>,
53 memo: &Memo<Cursor, T>,
54 parser: impl FnOnce(&mut ParserState<L>) -> Result<T, Error>,
55) -> Result<T, Error> {
56 let pos = state.cursor();
57 if let Some((value, end)) = memo.get(&pos) {
58 state.advance_to_cursor(end);
59 Ok(value.clone())
60 } else {
61 let value = parser(state)?;
62 let end = state.cursor();
63 memo.insert(pos, (value.clone(), end));
64 Ok(value)
65 }
66}
67
68/// Left recursion support.
69///
70/// Wrapping a parser in `left_rec` allows it to be left-recursive. This is
71/// crucial for parsing left-recursive grammars, as recursive descent
72/// parsers often fail to handle them.
73///
74/// The `left_rec` function solves this problem by employing memoization.
75/// The algorithm used is based on this [blog post].
76///
77/// ```
78/// # use parse_it::*;
79/// fn parse(
80/// state: &mut ParserState<CharLexer>,
81/// memo: &Memo<Cursor, Option<String>>,
82/// ) -> Result<String, Error> {
83/// left_rec(state, memo, |state| {
84/// let fork = &mut state.fork();
85/// if let Ok(mut s) = parse(fork, memo) {
86/// state.advance_to(fork);
87/// s.push(state.parse_char('b')?);
88/// Ok(s)
89/// } else {
90/// state.parse_char('a').map(|_| String::from("a"))
91/// }
92/// })
93/// }
94///
95/// let mut state = ParserState::new("abbbb");
96/// assert_eq!(parse(&mut state, &Memo::default()).unwrap(), "abbbb");
97/// ```
98///
99/// [blog post]:https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1
100#[inline]
101pub fn left_rec<L: LexIt + Clone, T: Clone>(
102 state: &mut ParserState<L>,
103 memo: &Memo<Cursor, Option<T>>,
104 mut parser: impl FnMut(&mut ParserState<L>) -> Result<T, Error>,
105) -> Result<T, Error> {
106 let pos = state.cursor();
107 if let Some((value, end)) = memo.get(&pos) {
108 state.advance_to_cursor(end);
109 if let Some(value) = value {
110 Ok(value.clone())
111 } else {
112 Err(state.error())
113 }
114 } else {
115 memo.insert(pos, (None, pos));
116 let mut last = (None, pos);
117 loop {
118 let mut fork = state.fork();
119 let Ok(value) = parser(&mut fork) else { break };
120 let end = fork.cursor();
121 if end <= last.1 {
122 break;
123 }
124 last = (Some(value), end);
125 memo.insert(pos, last.clone());
126 }
127 state.advance_to_cursor(last.1);
128 last.0.ok_or_else(|| state.error())
129 }
130}