parse_it/
memo.rs

1//! Memoization and left recursion support.
2
3use std::cell::RefCell;
4use std::fmt::Debug;
5use std::hash::Hash;
6
7use rustc_hash::FxHashMap;
8
9use crate::lexer::Lexer;
10use crate::{Error, ParserState};
11
12/// Memorization for a parser.
13///
14/// It records the results of parsing a given position in the source code, including
15/// the parsed value and the position to which the parser was advanced.
16pub struct Memo<P: Clone + Eq + Hash, T: Clone> {
17    map: RefCell<FxHashMap<P, (T, P)>>,
18}
19
20impl<P: Clone + Eq + Hash, T: Clone> Default for Memo<P, T> {
21    fn default() -> Self {
22        Self {
23            map: RefCell::new(FxHashMap::default()),
24        }
25    }
26}
27
28impl<P: Clone + Eq + Hash + Debug, T: Clone + Debug> Debug for Memo<P, T> {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        self.map.borrow().fmt(f)
31    }
32}
33
34impl<P: Clone + Eq + Hash, T: Clone> Memo<P, T> {
35    /// Get a memoized value.
36    pub fn get(&self, pos: &P) -> Option<(T, P)> {
37        self.map.borrow().get(pos).cloned()
38    }
39
40    /// Insert a memoized value.
41    pub fn insert(&self, pos: P, value: (T, P)) {
42        self.map.borrow_mut().insert(pos, value);
43    }
44}
45
46/// The ["Packrat"] memoization for a parser.
47///
48/// It ensures that parsing the same position in the source code only occurs once,
49/// by recording the results of parsing. The memoization is distinguished by the
50/// position itself, so different parsing processes should have their own memos.
51/// 
52/// ["Packrat"]: https://en.wikipedia.org/wiki/Packrat_parser
53#[inline]
54pub fn memorize<'a, L: Lexer<'a>, T: Clone>(
55    state: &ParserState<L>,
56    memo: &Memo<L::Position, T>,
57    parser: impl FnOnce(&ParserState<L>) -> Result<T, Error>,
58) -> Result<T, Error> {
59    let pos = state.pos();
60    if let Some((value, end)) = memo.get(&pos) {
61        state.advance_to_pos(end);
62        Ok(value.clone())
63    } else {
64        let value = parser(state)?;
65        let end = state.pos();
66        memo.insert(pos, (value.clone(), end));
67        Ok(value)
68    }
69}
70
71/// Left recursion support.
72///
73/// Wrapping a parser in `left_rec` allows it to be left-recursive. This is
74/// crucial for parsing left-recursive grammars, as recursive descent
75/// parsers often fail to handle them.
76///
77/// The `left_rec` function solves this problem by employing memoization.
78/// The algorithm used is based on this [blog post].
79/// 
80/// ```
81/// # use parse_it::*;
82/// fn parse(
83///     state: &ParserState<CharLexer>, 
84///     memo: &Memo<usize, Option<String>>,
85/// ) -> Result<String, Error> {
86///     left_rec(state, memo, |state| {
87///         let fork = state.fork();
88///         if let Ok(mut s) = parse(&fork, memo) {
89///             state.advance_to(&fork);
90///             s.push(state.parse('b')?);
91///             Ok(s)
92///         } else {
93///             state.parse('a').map(|_| String::from("a"))
94///         }
95///     })
96/// }
97/// 
98/// let state = ParserState::new(CharLexer::new("abbbb"));
99/// assert_eq!(parse(&state, &Memo::default()).unwrap(), "abbbb");
100/// ```
101/// 
102/// [blog post]:https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1
103#[inline]
104pub fn left_rec<'a, L: Lexer<'a>, T: Clone>(
105    state: &ParserState<L>,
106    memo: &Memo<L::Position, Option<T>>,
107    mut parser: impl FnMut(&ParserState<L>) -> Result<T, Error>,
108) -> Result<T, Error> {
109    let pos = state.pos();
110    if let Some((value, end)) = memo.get(&pos) {
111        state.advance_to_pos(end);
112        if let Some(value) = value {
113            Ok(value.clone())
114        } else {
115            Err(state.error())
116        }
117    } else {
118        memo.insert(pos, (None, pos));
119        let mut last = (None, pos);
120        loop {
121            let fork = state.fork();
122            let Ok(value) = parser(&fork) else { break };
123            let end = fork.pos();
124            if end <= last.1 {
125                break;
126            }
127            last = (Some(value), end);
128            memo.insert(pos, last.clone());
129        }
130        state.advance_to_pos(last.1);
131        last.0.ok_or_else(|| state.error())
132    }
133}