Skip to main content

chumsky/
recursive.rs

1//! Recursive parsers (parser that include themselves within their patterns).
2//!
3//! *“It's unpleasantly like being drunk."
4//! "What's so unpleasant about being drunk?"
5//! "You ask a glass of water.”*
6//!
7//! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
8//! definition of parsers more carefully, particularly for mutually-recursive parsers. In such cases, the functions on
9//! [`Recursive`] allow for this.
10
11use super::*;
12
13struct OnceCell<T>(core::cell::Cell<Option<T>>);
14impl<T> OnceCell<T> {
15    pub fn new() -> Self {
16        Self(core::cell::Cell::new(None))
17    }
18    pub fn set(&self, x: T) -> Result<(), ()> {
19        // SAFETY: Function is not reentrant so we have exclusive access to the inner data
20        unsafe {
21            let vacant = (*self.0.as_ptr()).is_none();
22            if vacant {
23                self.0.as_ptr().write(Some(x));
24                Ok(())
25            } else {
26                Err(())
27            }
28        }
29    }
30    #[inline]
31    pub fn get(&self) -> Option<&T> {
32        // SAFETY: We ensure that we never insert twice (so the inner `T` always lives as long as us, if it exists) and
33        // neither function is possibly reentrant so there's no way we can invalidate mut xor shared aliasing
34        unsafe { (*self.0.as_ptr()).as_ref() }
35    }
36}
37
38// TODO: Ensure that this doesn't produce leaks
39enum RecursiveInner<T: ?Sized> {
40    Owned(Rc<T>),
41    Unowned(rc::Weak<T>),
42}
43
44/// Type for recursive parsers that are defined through a call to `recursive`, and as such
45/// need no internal indirection
46pub type Direct<'src, 'b, I, O, Extra> = DynParser<'src, 'b, I, O, Extra>;
47
48/// Type for recursive parsers that are defined through a call to [`Recursive::declare`], and as
49/// such require an additional layer of allocation.
50pub struct Indirect<'src, 'b, I: Input<'src>, O, Extra: ParserExtra<'src, I>> {
51    inner: OnceCell<Box<DynParser<'src, 'b, I, O, Extra>>>,
52}
53
54/// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
55/// [definition](Recursive::define).
56///
57/// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
58pub struct Recursive<P: ?Sized> {
59    inner: RecursiveInner<P>,
60}
61
62impl<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I>> Recursive<Indirect<'src, 'b, I, O, E>> {
63    /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
64    /// being fulled defined.
65    ///
66    /// Declaring a parser before defining it is required for a parser to reference itself.
67    ///
68    /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
69    /// for parsing (i.e: via the [`Parser::parse`] method or similar).
70    ///
71    /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
72    /// possible.
73    ///
74    /// # Examples
75    ///
76    /// ```
77    /// # use chumsky::prelude::*;
78    /// #[derive(Debug, PartialEq)]
79    /// enum Chain {
80    ///     End,
81    ///     Link(char, Box<Chain>),
82    /// }
83    ///
84    /// // Declare the existence of the parser before defining it so that it can reference itself
85    /// let mut chain = Recursive::declare();
86    ///
87    /// // Define the parser in terms of itself.
88    /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
89    /// chain.define(just::<_, _, extra::Err<Simple<char>>>('+')
90    ///     .then(chain.clone())
91    ///     .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
92    ///     .or_not()
93    ///     .map(|chain| chain.unwrap_or(Chain::End)));
94    ///
95    /// assert_eq!(chain.parse("").into_result(), Ok(Chain::End));
96    /// assert_eq!(
97    ///     chain.parse("++").into_result(),
98    ///     Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
99    /// );
100    /// ```
101    pub fn declare() -> Self {
102        Recursive {
103            inner: RecursiveInner::Owned(Rc::new(Indirect {
104                inner: OnceCell::new(),
105            })),
106        }
107    }
108
109    /// Defines the parser after declaring it, allowing it to be used for parsing.
110    // INFO: Clone bound not actually needed, but good to be safe for future compat
111    #[track_caller]
112    pub fn define<P: Parser<'src, I, O, E> + Clone + 'b>(&mut self, parser: P) {
113        let location = *Location::caller();
114        self.parser()
115            .inner
116            .set(Box::new(parser))
117            .unwrap_or_else(|_| {
118                panic!("recursive parsers can only be defined once, trying to redefine it at {location}")
119            });
120    }
121}
122
123impl<P: ?Sized> Recursive<P> {
124    #[inline]
125    fn parser(&self) -> Rc<P> {
126        match &self.inner {
127            RecursiveInner::Owned(x) => x.clone(),
128            RecursiveInner::Unowned(x) => x
129                .upgrade()
130                .expect("Recursive parser used before being defined"),
131        }
132    }
133}
134
135impl<P: ?Sized> Clone for Recursive<P> {
136    fn clone(&self) -> Self {
137        Self {
138            inner: match &self.inner {
139                RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
140                RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
141            },
142        }
143    }
144}
145
146#[cfg(feature = "stacker")]
147#[inline]
148pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
149    stacker::maybe_grow(1024 * 64, 1024 * 1024, f)
150}
151#[cfg(not(feature = "stacker"))]
152#[inline]
153pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
154    f()
155}
156
157impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Indirect<'src, '_, I, O, E>>
158where
159    I: Input<'src>,
160    E: ParserExtra<'src, I>,
161{
162    #[doc(hidden)]
163    #[cfg(feature = "debug")]
164    fn node_info(&self, scope: &mut debug::NodeScope) -> debug::NodeInfo {
165        // Debug features don't fall under MSRV
166        #[allow(clippy::incompatible_msrv)]
167        let ptr = match &self.inner {
168            RecursiveInner::Owned(x) => Rc::as_ptr(x).addr(),
169            RecursiveInner::Unowned(x) => rc::Weak::as_ptr(x).addr(),
170        };
171        scope.lookup_rec(ptr, |scope| {
172            self.parser()
173                .inner
174                .get()
175                .expect("Recursive parser used before being defined")
176                .as_ref()
177                .node_info(scope)
178        })
179    }
180
181    #[inline]
182    fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
183        recurse(move || {
184            M::invoke(
185                self.parser()
186                    .inner
187                    .get()
188                    .expect("Recursive parser used before being defined")
189                    .as_ref(),
190                inp,
191            )
192        })
193    }
194
195    go_extra!(O);
196}
197
198impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Direct<'src, '_, I, O, E>>
199where
200    I: Input<'src>,
201    E: ParserExtra<'src, I>,
202{
203    #[doc(hidden)]
204    #[cfg(feature = "debug")]
205    fn node_info(&self, scope: &mut debug::NodeScope) -> debug::NodeInfo {
206        // Debug features don't fall under MSRV
207        #[allow(clippy::incompatible_msrv)]
208        let ptr = match &self.inner {
209            RecursiveInner::Owned(x) => Rc::as_ptr(x).addr(),
210            RecursiveInner::Unowned(x) => rc::Weak::as_ptr(x).addr(),
211        };
212        scope.lookup_rec(ptr, |scope| self.parser().node_info(scope))
213    }
214
215    #[inline]
216    fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
217        recurse(move || M::invoke(&*self.parser(), inp))
218    }
219
220    go_extra!(O);
221}
222
223/// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
224///
225/// The given function must create the parser. The parser must not be used to parse input before this function returns.
226///
227/// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
228///
229/// The output type of this parser is `O`, the same as the inner parser.
230///
231/// # Examples
232///
233/// ```
234/// # use chumsky::prelude::*;
235/// #[derive(Debug, PartialEq)]
236/// enum Tree<'src> {
237///     Leaf(&'src str),
238///     Branch(Vec<Tree<'src>>),
239/// }
240///
241/// // Parser that recursively parses nested lists
242/// let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| tree
243///     .separated_by(just(','))
244///     .collect::<Vec<_>>()
245///     .delimited_by(just('['), just(']'))
246///     .map(Tree::Branch)
247///     .or(text::ascii::ident().map(Tree::Leaf))
248///     .padded());
249///
250/// assert_eq!(tree.parse("hello").into_result(), Ok(Tree::Leaf("hello")));
251/// assert_eq!(tree.parse("[a, b, c]").into_result(), Ok(Tree::Branch(vec![
252///     Tree::Leaf("a"),
253///     Tree::Leaf("b"),
254///     Tree::Leaf("c"),
255/// ])));
256/// // The parser can deal with arbitrarily complex nested lists
257/// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]").into_result(), Ok(Tree::Branch(vec![
258///     Tree::Branch(vec![
259///         Tree::Leaf("a"),
260///         Tree::Leaf("b"),
261///     ]),
262///     Tree::Leaf("c"),
263///     Tree::Branch(vec![
264///         Tree::Leaf("d"),
265///         Tree::Branch(vec![
266///             Tree::Leaf("e"),
267///             Tree::Leaf("f"),
268///         ]),
269///     ]),
270/// ])));
271/// ```
272// INFO: Clone bound not actually needed, but good to be safe for future compat
273pub fn recursive<'src, 'b, I, O, E, A, F>(f: F) -> Recursive<Direct<'src, 'b, I, O, E>>
274where
275    I: Input<'src>,
276    E: ParserExtra<'src, I>,
277    A: Parser<'src, I, O, E> + Clone + 'b,
278    F: FnOnce(Recursive<Direct<'src, 'b, I, O, E>>) -> A,
279{
280    let rc = Rc::new_cyclic(|rc| {
281        let rc: rc::Weak<DynParser<'src, 'b, I, O, E>> = rc.clone() as _;
282        let parser = Recursive {
283            inner: RecursiveInner::Unowned(rc.clone()),
284        };
285
286        f(parser)
287    });
288
289    Recursive {
290        inner: RecursiveInner::Owned(rc),
291    }
292}