chumsky/
recursive.rs

1//! Recursive parsers (parser that include themselves within their patterns).
2//!
3//! *“It's unpleasantly like being drunk."
4//! "What's so unpleasant about being drunk?"
5//! "You ask a glass of water.”*
6//!
7//! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
8//! definition of parsers more carefully, particularly for mutually-recursive parsers. In such cases, the functions on
9//! [`Recursive`] allow for this.
10
11use super::*;
12
13struct OnceCell<T>(core::cell::Cell<Option<T>>);
14impl<T> OnceCell<T> {
15    pub fn new() -> Self {
16        Self(core::cell::Cell::new(None))
17    }
18    pub fn set(&self, x: T) -> Result<(), ()> {
19        // SAFETY: Function is not reentrant so we have exclusive access to the inner data
20        unsafe {
21            let vacant = (*self.0.as_ptr()).is_none();
22            if vacant {
23                self.0.as_ptr().write(Some(x));
24                Ok(())
25            } else {
26                Err(())
27            }
28        }
29    }
30    #[inline]
31    pub fn get(&self) -> Option<&T> {
32        // SAFETY: We ensure that we never insert twice (so the inner `T` always lives as long as us, if it exists) and
33        // neither function is possibly reentrant so there's no way we can invalidate mut xor shared aliasing
34        unsafe { (*self.0.as_ptr()).as_ref() }
35    }
36}
37
38// TODO: Ensure that this doesn't produce leaks
39enum RecursiveInner<T: ?Sized> {
40    Owned(Rc<T>),
41    Unowned(rc::Weak<T>),
42}
43
44/// Type for recursive parsers that are defined through a call to `recursive`, and as such
45/// need no internal indirection
46pub type Direct<'src, 'b, I, O, Extra> = DynParser<'src, 'b, I, O, Extra>;
47
48/// Type for recursive parsers that are defined through a call to [`Recursive::declare`], and as
49/// such require an additional layer of allocation.
50pub struct Indirect<'src, 'b, I: Input<'src>, O, Extra: ParserExtra<'src, I>> {
51    inner: OnceCell<Box<DynParser<'src, 'b, I, O, Extra>>>,
52}
53
54/// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
55/// [definition](Recursive::define).
56///
57/// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
58pub struct Recursive<P: ?Sized> {
59    inner: RecursiveInner<P>,
60}
61
62impl<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I>> Recursive<Indirect<'src, 'b, I, O, E>> {
63    /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
64    /// being fulled defined.
65    ///
66    /// Declaring a parser before defining it is required for a parser to reference itself.
67    ///
68    /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
69    /// for parsing (i.e: via the [`Parser::parse`] method or similar).
70    ///
71    /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
72    /// possible.
73    ///
74    /// # Examples
75    ///
76    /// ```
77    /// # use chumsky::prelude::*;
78    /// #[derive(Debug, PartialEq)]
79    /// enum Chain {
80    ///     End,
81    ///     Link(char, Box<Chain>),
82    /// }
83    ///
84    /// // Declare the existence of the parser before defining it so that it can reference itself
85    /// let mut chain = Recursive::declare();
86    ///
87    /// // Define the parser in terms of itself.
88    /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
89    /// chain.define(just::<_, _, extra::Err<Simple<char>>>('+')
90    ///     .then(chain.clone())
91    ///     .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
92    ///     .or_not()
93    ///     .map(|chain| chain.unwrap_or(Chain::End)));
94    ///
95    /// assert_eq!(chain.parse("").into_result(), Ok(Chain::End));
96    /// assert_eq!(
97    ///     chain.parse("++").into_result(),
98    ///     Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
99    /// );
100    /// ```
101    pub fn declare() -> Self {
102        Recursive {
103            inner: RecursiveInner::Owned(Rc::new(Indirect {
104                inner: OnceCell::new(),
105            })),
106        }
107    }
108
109    /// Defines the parser after declaring it, allowing it to be used for parsing.
110    // INFO: Clone bound not actually needed, but good to be safe for future compat
111    #[track_caller]
112    pub fn define<P: Parser<'src, I, O, E> + Clone + 'src + 'b>(&mut self, parser: P) {
113        let location = *Location::caller();
114        self.parser()
115            .inner
116            .set(Box::new(parser))
117            .unwrap_or_else(|_| {
118                panic!("recursive parsers can only be defined once, trying to redefine it at {location}")
119            });
120    }
121}
122
123impl<P: ?Sized> Recursive<P> {
124    #[inline]
125    fn parser(&self) -> Rc<P> {
126        match &self.inner {
127            RecursiveInner::Owned(x) => x.clone(),
128            RecursiveInner::Unowned(x) => x
129                .upgrade()
130                .expect("Recursive parser used before being defined"),
131        }
132    }
133}
134
135impl<P: ?Sized> Clone for Recursive<P> {
136    fn clone(&self) -> Self {
137        Self {
138            inner: match &self.inner {
139                RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
140                RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
141            },
142        }
143    }
144}
145
146#[cfg(feature = "stacker")]
147#[inline]
148pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
149    stacker::maybe_grow(1024 * 64, 1024 * 1024, f)
150}
151#[cfg(not(feature = "stacker"))]
152#[inline]
153pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
154    f()
155}
156
157impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Indirect<'src, '_, I, O, E>>
158where
159    I: Input<'src>,
160    E: ParserExtra<'src, I>,
161{
162    #[inline]
163    fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
164        recurse(move || {
165            M::invoke(
166                self.parser()
167                    .inner
168                    .get()
169                    .expect("Recursive parser used before being defined")
170                    .as_ref(),
171                inp,
172            )
173        })
174    }
175
176    go_extra!(O);
177}
178
179impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Direct<'src, '_, I, O, E>>
180where
181    I: Input<'src>,
182    E: ParserExtra<'src, I>,
183{
184    #[inline]
185    fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
186        recurse(move || M::invoke(&*self.parser(), inp))
187    }
188
189    go_extra!(O);
190}
191
192/// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
193///
194/// The given function must create the parser. The parser must not be used to parse input before this function returns.
195///
196/// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
197///
198/// The output type of this parser is `O`, the same as the inner parser.
199///
200/// # Examples
201///
202/// ```
203/// # use chumsky::prelude::*;
204/// #[derive(Debug, PartialEq)]
205/// enum Tree<'src> {
206///     Leaf(&'src str),
207///     Branch(Vec<Tree<'src>>),
208/// }
209///
210/// // Parser that recursively parses nested lists
211/// let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| tree
212///     .separated_by(just(','))
213///     .collect::<Vec<_>>()
214///     .delimited_by(just('['), just(']'))
215///     .map(Tree::Branch)
216///     .or(text::ascii::ident().map(Tree::Leaf))
217///     .padded());
218///
219/// assert_eq!(tree.parse("hello").into_result(), Ok(Tree::Leaf("hello")));
220/// assert_eq!(tree.parse("[a, b, c]").into_result(), Ok(Tree::Branch(vec![
221///     Tree::Leaf("a"),
222///     Tree::Leaf("b"),
223///     Tree::Leaf("c"),
224/// ])));
225/// // The parser can deal with arbitrarily complex nested lists
226/// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]").into_result(), Ok(Tree::Branch(vec![
227///     Tree::Branch(vec![
228///         Tree::Leaf("a"),
229///         Tree::Leaf("b"),
230///     ]),
231///     Tree::Leaf("c"),
232///     Tree::Branch(vec![
233///         Tree::Leaf("d"),
234///         Tree::Branch(vec![
235///             Tree::Leaf("e"),
236///             Tree::Leaf("f"),
237///         ]),
238///     ]),
239/// ])));
240/// ```
241// INFO: Clone bound not actually needed, but good to be safe for future compat
242pub fn recursive<'src, 'b, I, O, E, A, F>(f: F) -> Recursive<Direct<'src, 'b, I, O, E>>
243where
244    I: Input<'src>,
245    E: ParserExtra<'src, I>,
246    A: Parser<'src, I, O, E> + Clone + 'b,
247    F: FnOnce(Recursive<Direct<'src, 'b, I, O, E>>) -> A,
248{
249    let rc = Rc::new_cyclic(|rc| {
250        let rc: rc::Weak<DynParser<'src, 'b, I, O, E>> = rc.clone() as _;
251        let parser = Recursive {
252            inner: RecursiveInner::Unowned(rc.clone()),
253        };
254
255        f(parser)
256    });
257
258    Recursive {
259        inner: RecursiveInner::Owned(rc),
260    }
261}