parse_it/memo.rs
1//! Memoization and left recursion support.
2
3use std::cell::RefCell;
4use std::fmt::Debug;
5use std::hash::Hash;
6
7use rustc_hash::FxHashMap;
8
9use crate::lexer::Lexer;
10use crate::{Error, ParserState};
11
12/// Memorization for a parser.
13///
14/// It records the results of parsing a given position in the source code, including
15/// the parsed value and the position to which the parser was advanced.
16pub struct Memo<P: Clone + Eq + Hash, T: Clone> {
17 map: RefCell<FxHashMap<P, (T, P)>>,
18}
19
20impl<P: Clone + Eq + Hash, T: Clone> Default for Memo<P, T> {
21 fn default() -> Self {
22 Self {
23 map: RefCell::new(FxHashMap::default()),
24 }
25 }
26}
27
28impl<P: Clone + Eq + Hash + Debug, T: Clone + Debug> Debug for Memo<P, T> {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 self.map.borrow().fmt(f)
31 }
32}
33
34impl<P: Clone + Eq + Hash, T: Clone> Memo<P, T> {
35 /// Get a memoized value.
36 pub fn get(&self, pos: &P) -> Option<(T, P)> {
37 self.map.borrow().get(pos).cloned()
38 }
39
40 /// Insert a memoized value.
41 pub fn insert(&self, pos: P, value: (T, P)) {
42 self.map.borrow_mut().insert(pos, value);
43 }
44}
45
46/// The ["Packrat"] memoization for a parser.
47///
48/// It ensures that parsing the same position in the source code only occurs once,
49/// by recording the results of parsing. The memoization is distinguished by the
50/// position itself, so different parsing processes should have their own memos.
51///
52/// ["Packrat"]: https://en.wikipedia.org/wiki/Packrat_parser
53#[inline]
54pub fn memorize<'a, L: Lexer<'a>, T: Clone>(
55 state: &ParserState<L>,
56 memo: &Memo<L::Position, T>,
57 parser: impl FnOnce(&ParserState<L>) -> Result<T, Error>,
58) -> Result<T, Error> {
59 let pos = state.pos();
60 if let Some((value, end)) = memo.get(&pos) {
61 state.advance_to_pos(end);
62 Ok(value.clone())
63 } else {
64 let value = parser(state)?;
65 let end = state.pos();
66 memo.insert(pos, (value.clone(), end));
67 Ok(value)
68 }
69}
70
71/// Left recursion support.
72///
73/// Wrapping a parser in `left_rec` allows it to be left-recursive. This is
74/// crucial for parsing left-recursive grammars, as recursive descent
75/// parsers often fail to handle them.
76///
77/// The `left_rec` function solves this problem by employing memoization.
78/// The algorithm used is based on this [blog post].
79///
80/// ```
81/// # use parse_it::*;
82/// fn parse(
83/// state: &ParserState<CharLexer>,
84/// memo: &Memo<usize, Option<String>>,
85/// ) -> Result<String, Error> {
86/// left_rec(state, memo, |state| {
87/// let fork = state.fork();
88/// if let Ok(mut s) = parse(&fork, memo) {
89/// state.advance_to(&fork);
90/// s.push(state.parse('b')?);
91/// Ok(s)
92/// } else {
93/// state.parse('a').map(|_| String::from("a"))
94/// }
95/// })
96/// }
97///
98/// let state = ParserState::new(CharLexer::new("abbbb"));
99/// assert_eq!(parse(&state, &Memo::default()).unwrap(), "abbbb");
100/// ```
101///
102/// [blog post]:https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1
103#[inline]
104pub fn left_rec<'a, L: Lexer<'a>, T: Clone>(
105 state: &ParserState<L>,
106 memo: &Memo<L::Position, Option<T>>,
107 mut parser: impl FnMut(&ParserState<L>) -> Result<T, Error>,
108) -> Result<T, Error> {
109 let pos = state.pos();
110 if let Some((value, end)) = memo.get(&pos) {
111 state.advance_to_pos(end);
112 if let Some(value) = value {
113 Ok(value.clone())
114 } else {
115 Err(state.error())
116 }
117 } else {
118 memo.insert(pos, (None, pos));
119 let mut last = (None, pos);
120 loop {
121 let fork = state.fork();
122 let Ok(value) = parser(&fork) else { break };
123 let end = fork.pos();
124 if end <= last.1 {
125 break;
126 }
127 last = (Some(value), end);
128 memo.insert(pos, last.clone());
129 }
130 state.advance_to_pos(last.1);
131 last.0.ok_or_else(|| state.error())
132 }
133}