chumsky/recursive.rs
1//! Recursive parsers (parser that include themselves within their patterns).
2//!
3//! *“It's unpleasantly like being drunk."
4//! "What's so unpleasant about being drunk?"
5//! "You ask a glass of water.”*
6//!
7//! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
8//! definition of parsers more carefully, particularly for mutually-recursive parsers. In such cases, the functions on
9//! [`Recursive`] allow for this.
10
11use super::*;
12
13struct OnceCell<T>(core::cell::Cell<Option<T>>);
14impl<T> OnceCell<T> {
15 pub fn new() -> Self {
16 Self(core::cell::Cell::new(None))
17 }
18 pub fn set(&self, x: T) -> Result<(), ()> {
19 // SAFETY: Function is not reentrant so we have exclusive access to the inner data
20 unsafe {
21 let vacant = (*self.0.as_ptr()).is_none();
22 if vacant {
23 self.0.as_ptr().write(Some(x));
24 Ok(())
25 } else {
26 Err(())
27 }
28 }
29 }
30 #[inline]
31 pub fn get(&self) -> Option<&T> {
32 // SAFETY: We ensure that we never insert twice (so the inner `T` always lives as long as us, if it exists) and
33 // neither function is possibly reentrant so there's no way we can invalidate mut xor shared aliasing
34 unsafe { (*self.0.as_ptr()).as_ref() }
35 }
36}
37
38// TODO: Ensure that this doesn't produce leaks
39enum RecursiveInner<T: ?Sized> {
40 Owned(Rc<T>),
41 Unowned(rc::Weak<T>),
42}
43
44/// Type for recursive parsers that are defined through a call to `recursive`, and as such
45/// need no internal indirection
46pub type Direct<'src, 'b, I, O, Extra> = DynParser<'src, 'b, I, O, Extra>;
47
48/// Type for recursive parsers that are defined through a call to [`Recursive::declare`], and as
49/// such require an additional layer of allocation.
50pub struct Indirect<'src, 'b, I: Input<'src>, O, Extra: ParserExtra<'src, I>> {
51 inner: OnceCell<Box<DynParser<'src, 'b, I, O, Extra>>>,
52}
53
54/// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
55/// [definition](Recursive::define).
56///
57/// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
58pub struct Recursive<P: ?Sized> {
59 inner: RecursiveInner<P>,
60}
61
62impl<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I>> Recursive<Indirect<'src, 'b, I, O, E>> {
63 /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
64 /// being fulled defined.
65 ///
66 /// Declaring a parser before defining it is required for a parser to reference itself.
67 ///
68 /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
69 /// for parsing (i.e: via the [`Parser::parse`] method or similar).
70 ///
71 /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
72 /// possible.
73 ///
74 /// # Examples
75 ///
76 /// ```
77 /// # use chumsky::prelude::*;
78 /// #[derive(Debug, PartialEq)]
79 /// enum Chain {
80 /// End,
81 /// Link(char, Box<Chain>),
82 /// }
83 ///
84 /// // Declare the existence of the parser before defining it so that it can reference itself
85 /// let mut chain = Recursive::declare();
86 ///
87 /// // Define the parser in terms of itself.
88 /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
89 /// chain.define(just::<_, _, extra::Err<Simple<char>>>('+')
90 /// .then(chain.clone())
91 /// .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
92 /// .or_not()
93 /// .map(|chain| chain.unwrap_or(Chain::End)));
94 ///
95 /// assert_eq!(chain.parse("").into_result(), Ok(Chain::End));
96 /// assert_eq!(
97 /// chain.parse("++").into_result(),
98 /// Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
99 /// );
100 /// ```
101 pub fn declare() -> Self {
102 Recursive {
103 inner: RecursiveInner::Owned(Rc::new(Indirect {
104 inner: OnceCell::new(),
105 })),
106 }
107 }
108
109 /// Defines the parser after declaring it, allowing it to be used for parsing.
110 // INFO: Clone bound not actually needed, but good to be safe for future compat
111 #[track_caller]
112 pub fn define<P: Parser<'src, I, O, E> + Clone + 'b>(&mut self, parser: P) {
113 let location = *Location::caller();
114 self.parser()
115 .inner
116 .set(Box::new(parser))
117 .unwrap_or_else(|_| {
118 panic!("recursive parsers can only be defined once, trying to redefine it at {location}")
119 });
120 }
121}
122
123impl<P: ?Sized> Recursive<P> {
124 #[inline]
125 fn parser(&self) -> Rc<P> {
126 match &self.inner {
127 RecursiveInner::Owned(x) => x.clone(),
128 RecursiveInner::Unowned(x) => x
129 .upgrade()
130 .expect("Recursive parser used before being defined"),
131 }
132 }
133}
134
135impl<P: ?Sized> Clone for Recursive<P> {
136 fn clone(&self) -> Self {
137 Self {
138 inner: match &self.inner {
139 RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
140 RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
141 },
142 }
143 }
144}
145
146#[cfg(feature = "stacker")]
147#[inline]
148pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
149 stacker::maybe_grow(1024 * 64, 1024 * 1024, f)
150}
151#[cfg(not(feature = "stacker"))]
152#[inline]
153pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
154 f()
155}
156
157impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Indirect<'src, '_, I, O, E>>
158where
159 I: Input<'src>,
160 E: ParserExtra<'src, I>,
161{
162 #[doc(hidden)]
163 #[cfg(feature = "debug")]
164 fn node_info(&self, scope: &mut debug::NodeScope) -> debug::NodeInfo {
165 // Debug features don't fall under MSRV
166 #[allow(clippy::incompatible_msrv)]
167 let ptr = match &self.inner {
168 RecursiveInner::Owned(x) => Rc::as_ptr(x).addr(),
169 RecursiveInner::Unowned(x) => rc::Weak::as_ptr(x).addr(),
170 };
171 scope.lookup_rec(ptr, |scope| {
172 self.parser()
173 .inner
174 .get()
175 .expect("Recursive parser used before being defined")
176 .as_ref()
177 .node_info(scope)
178 })
179 }
180
181 #[inline]
182 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
183 recurse(move || {
184 M::invoke(
185 self.parser()
186 .inner
187 .get()
188 .expect("Recursive parser used before being defined")
189 .as_ref(),
190 inp,
191 )
192 })
193 }
194
195 go_extra!(O);
196}
197
198impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Direct<'src, '_, I, O, E>>
199where
200 I: Input<'src>,
201 E: ParserExtra<'src, I>,
202{
203 #[doc(hidden)]
204 #[cfg(feature = "debug")]
205 fn node_info(&self, scope: &mut debug::NodeScope) -> debug::NodeInfo {
206 // Debug features don't fall under MSRV
207 #[allow(clippy::incompatible_msrv)]
208 let ptr = match &self.inner {
209 RecursiveInner::Owned(x) => Rc::as_ptr(x).addr(),
210 RecursiveInner::Unowned(x) => rc::Weak::as_ptr(x).addr(),
211 };
212 scope.lookup_rec(ptr, |scope| self.parser().node_info(scope))
213 }
214
215 #[inline]
216 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
217 recurse(move || M::invoke(&*self.parser(), inp))
218 }
219
220 go_extra!(O);
221}
222
223/// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
224///
225/// The given function must create the parser. The parser must not be used to parse input before this function returns.
226///
227/// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
228///
229/// The output type of this parser is `O`, the same as the inner parser.
230///
231/// # Examples
232///
233/// ```
234/// # use chumsky::prelude::*;
235/// #[derive(Debug, PartialEq)]
236/// enum Tree<'src> {
237/// Leaf(&'src str),
238/// Branch(Vec<Tree<'src>>),
239/// }
240///
241/// // Parser that recursively parses nested lists
242/// let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| tree
243/// .separated_by(just(','))
244/// .collect::<Vec<_>>()
245/// .delimited_by(just('['), just(']'))
246/// .map(Tree::Branch)
247/// .or(text::ascii::ident().map(Tree::Leaf))
248/// .padded());
249///
250/// assert_eq!(tree.parse("hello").into_result(), Ok(Tree::Leaf("hello")));
251/// assert_eq!(tree.parse("[a, b, c]").into_result(), Ok(Tree::Branch(vec![
252/// Tree::Leaf("a"),
253/// Tree::Leaf("b"),
254/// Tree::Leaf("c"),
255/// ])));
256/// // The parser can deal with arbitrarily complex nested lists
257/// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]").into_result(), Ok(Tree::Branch(vec![
258/// Tree::Branch(vec![
259/// Tree::Leaf("a"),
260/// Tree::Leaf("b"),
261/// ]),
262/// Tree::Leaf("c"),
263/// Tree::Branch(vec![
264/// Tree::Leaf("d"),
265/// Tree::Branch(vec![
266/// Tree::Leaf("e"),
267/// Tree::Leaf("f"),
268/// ]),
269/// ]),
270/// ])));
271/// ```
272// INFO: Clone bound not actually needed, but good to be safe for future compat
273pub fn recursive<'src, 'b, I, O, E, A, F>(f: F) -> Recursive<Direct<'src, 'b, I, O, E>>
274where
275 I: Input<'src>,
276 E: ParserExtra<'src, I>,
277 A: Parser<'src, I, O, E> + Clone + 'b,
278 F: FnOnce(Recursive<Direct<'src, 'b, I, O, E>>) -> A,
279{
280 let rc = Rc::new_cyclic(|rc| {
281 let rc: rc::Weak<DynParser<'src, 'b, I, O, E>> = rc.clone() as _;
282 let parser = Recursive {
283 inner: RecursiveInner::Unowned(rc.clone()),
284 };
285
286 f(parser)
287 });
288
289 Recursive {
290 inner: RecursiveInner::Owned(rc),
291 }
292}