parse_it/
memo.rs

1//! Memoization and left recursion support.
2
3use std::{cell::RefCell, fmt::Debug, hash::Hash};
4
5use rustc_hash::FxHashMap;
6
7use crate::{lexer::Cursor, Error, LexIt, ParserState};
8
9/// Memorization for a parser.
10///
11/// It records the results of parsing a given position in the source code, including
12/// the parsed value and the position to which the parser was advanced.
13pub struct Memo<P: Clone + Eq + Hash, T: Clone> {
14    map: RefCell<FxHashMap<P, (T, P)>>,
15}
16
17impl<P: Clone + Eq + Hash, T: Clone> Default for Memo<P, T> {
18    fn default() -> Self {
19        Self {
20            map: RefCell::new(FxHashMap::default()),
21        }
22    }
23}
24
25impl<P: Clone + Eq + Hash + Debug, T: Clone + Debug> Debug for Memo<P, T> {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        self.map.borrow().fmt(f)
28    }
29}
30
31impl<P: Clone + Eq + Hash, T: Clone> Memo<P, T> {
32    /// Get a memoized value.
33    pub fn get(&self, pos: &P) -> Option<(T, P)> {
34        self.map.borrow().get(pos).cloned()
35    }
36
37    /// Insert a memoized value.
38    pub fn insert(&self, pos: P, value: (T, P)) {
39        self.map.borrow_mut().insert(pos, value);
40    }
41}
42
43/// The ["Packrat"] memoization for a parser.
44///
45/// It ensures that parsing the same position in the source code only occurs once,
46/// by recording the results of parsing. The memoization is distinguished by the
47/// position itself, so different parsing processes should have their own memos.
48///
49/// ["Packrat"]: https://en.wikipedia.org/wiki/Packrat_parser
50#[inline]
51pub fn memorize<L: LexIt + Clone, T: Clone>(
52    state: &mut ParserState<L>,
53    memo: &Memo<Cursor, T>,
54    parser: impl FnOnce(&mut ParserState<L>) -> Result<T, Error>,
55) -> Result<T, Error> {
56    let pos = state.cursor();
57    if let Some((value, end)) = memo.get(&pos) {
58        state.advance_to_cursor(end);
59        Ok(value.clone())
60    } else {
61        let value = parser(state)?;
62        let end = state.cursor();
63        memo.insert(pos, (value.clone(), end));
64        Ok(value)
65    }
66}
67
68/// Left recursion support.
69///
70/// Wrapping a parser in `left_rec` allows it to be left-recursive. This is
71/// crucial for parsing left-recursive grammars, as recursive descent
72/// parsers often fail to handle them.
73///
74/// The `left_rec` function solves this problem by employing memoization.
75/// The algorithm used is based on this [blog post].
76///
77/// ```
78/// # use parse_it::*;
79/// fn parse(
80///     state: &mut ParserState<CharLexer>,
81///     memo: &Memo<Cursor, Option<String>>,
82/// ) -> Result<String, Error> {
83///     left_rec(state, memo, |state| {
84///         let fork = &mut state.fork();
85///         if let Ok(mut s) = parse(fork, memo) {
86///             state.advance_to(fork);
87///             s.push(state.parse_char('b')?);
88///             Ok(s)
89///         } else {
90///             state.parse_char('a').map(|_| String::from("a"))
91///         }
92///     })
93/// }
94///
95/// let mut state = ParserState::new("abbbb");
96/// assert_eq!(parse(&mut state, &Memo::default()).unwrap(), "abbbb");
97/// ```
98///
99/// [blog post]:https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1
100#[inline]
101pub fn left_rec<L: LexIt + Clone, T: Clone>(
102    state: &mut ParserState<L>,
103    memo: &Memo<Cursor, Option<T>>,
104    mut parser: impl FnMut(&mut ParserState<L>) -> Result<T, Error>,
105) -> Result<T, Error> {
106    let pos = state.cursor();
107    if let Some((value, end)) = memo.get(&pos) {
108        state.advance_to_cursor(end);
109        if let Some(value) = value {
110            Ok(value.clone())
111        } else {
112            Err(state.error())
113        }
114    } else {
115        memo.insert(pos, (None, pos));
116        let mut last = (None, pos);
117        loop {
118            let mut fork = state.fork();
119            let Ok(value) = parser(&mut fork) else { break };
120            let end = fork.cursor();
121            if end <= last.1 {
122                break;
123            }
124            last = (Some(value), end);
125            memo.insert(pos, last.clone());
126        }
127        state.advance_to_cursor(last.1);
128        last.0.ok_or_else(|| state.error())
129    }
130}