chumsky/recursive.rs
1//! Recursive parsers (parser that include themselves within their patterns).
2//!
3//! *“It's unpleasantly like being drunk."
4//! "What's so unpleasant about being drunk?"
5//! "You ask a glass of water.”*
6//!
7//! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
8//! definition of parsers more carefully, particularly for mutually-recursive parsers. In such cases, the functions on
9//! [`Recursive`] allow for this.
10
11use super::*;
12
13struct OnceCell<T>(core::cell::Cell<Option<T>>);
14impl<T> OnceCell<T> {
15 pub fn new() -> Self {
16 Self(core::cell::Cell::new(None))
17 }
18 pub fn set(&self, x: T) -> Result<(), ()> {
19 // SAFETY: Function is not reentrant so we have exclusive access to the inner data
20 unsafe {
21 let vacant = (*self.0.as_ptr()).is_none();
22 if vacant {
23 self.0.as_ptr().write(Some(x));
24 Ok(())
25 } else {
26 Err(())
27 }
28 }
29 }
30 #[inline]
31 pub fn get(&self) -> Option<&T> {
32 // SAFETY: We ensure that we never insert twice (so the inner `T` always lives as long as us, if it exists) and
33 // neither function is possibly reentrant so there's no way we can invalidate mut xor shared aliasing
34 unsafe { (*self.0.as_ptr()).as_ref() }
35 }
36}
37
38// TODO: Ensure that this doesn't produce leaks
39enum RecursiveInner<T: ?Sized> {
40 Owned(Rc<T>),
41 Unowned(rc::Weak<T>),
42}
43
44/// Type for recursive parsers that are defined through a call to `recursive`, and as such
45/// need no internal indirection
46pub type Direct<'src, 'b, I, O, Extra> = DynParser<'src, 'b, I, O, Extra>;
47
48/// Type for recursive parsers that are defined through a call to [`Recursive::declare`], and as
49/// such require an additional layer of allocation.
50pub struct Indirect<'src, 'b, I: Input<'src>, O, Extra: ParserExtra<'src, I>> {
51 inner: OnceCell<Box<DynParser<'src, 'b, I, O, Extra>>>,
52}
53
54/// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
55/// [definition](Recursive::define).
56///
57/// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
58pub struct Recursive<P: ?Sized> {
59 inner: RecursiveInner<P>,
60}
61
62impl<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I>> Recursive<Indirect<'src, 'b, I, O, E>> {
63 /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
64 /// being fulled defined.
65 ///
66 /// Declaring a parser before defining it is required for a parser to reference itself.
67 ///
68 /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
69 /// for parsing (i.e: via the [`Parser::parse`] method or similar).
70 ///
71 /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
72 /// possible.
73 ///
74 /// # Examples
75 ///
76 /// ```
77 /// # use chumsky::prelude::*;
78 /// #[derive(Debug, PartialEq)]
79 /// enum Chain {
80 /// End,
81 /// Link(char, Box<Chain>),
82 /// }
83 ///
84 /// // Declare the existence of the parser before defining it so that it can reference itself
85 /// let mut chain = Recursive::declare();
86 ///
87 /// // Define the parser in terms of itself.
88 /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
89 /// chain.define(just::<_, _, extra::Err<Simple<char>>>('+')
90 /// .then(chain.clone())
91 /// .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
92 /// .or_not()
93 /// .map(|chain| chain.unwrap_or(Chain::End)));
94 ///
95 /// assert_eq!(chain.parse("").into_result(), Ok(Chain::End));
96 /// assert_eq!(
97 /// chain.parse("++").into_result(),
98 /// Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
99 /// );
100 /// ```
101 pub fn declare() -> Self {
102 Recursive {
103 inner: RecursiveInner::Owned(Rc::new(Indirect {
104 inner: OnceCell::new(),
105 })),
106 }
107 }
108
109 /// Defines the parser after declaring it, allowing it to be used for parsing.
110 // INFO: Clone bound not actually needed, but good to be safe for future compat
111 #[track_caller]
112 pub fn define<P: Parser<'src, I, O, E> + Clone + 'src + 'b>(&mut self, parser: P) {
113 let location = *Location::caller();
114 self.parser()
115 .inner
116 .set(Box::new(parser))
117 .unwrap_or_else(|_| {
118 panic!("recursive parsers can only be defined once, trying to redefine it at {location}")
119 });
120 }
121}
122
123impl<P: ?Sized> Recursive<P> {
124 #[inline]
125 fn parser(&self) -> Rc<P> {
126 match &self.inner {
127 RecursiveInner::Owned(x) => x.clone(),
128 RecursiveInner::Unowned(x) => x
129 .upgrade()
130 .expect("Recursive parser used before being defined"),
131 }
132 }
133}
134
135impl<P: ?Sized> Clone for Recursive<P> {
136 fn clone(&self) -> Self {
137 Self {
138 inner: match &self.inner {
139 RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
140 RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
141 },
142 }
143 }
144}
145
146#[cfg(feature = "stacker")]
147#[inline]
148pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
149 stacker::maybe_grow(1024 * 64, 1024 * 1024, f)
150}
151#[cfg(not(feature = "stacker"))]
152#[inline]
153pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
154 f()
155}
156
157impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Indirect<'src, '_, I, O, E>>
158where
159 I: Input<'src>,
160 E: ParserExtra<'src, I>,
161{
162 #[inline]
163 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
164 recurse(move || {
165 M::invoke(
166 self.parser()
167 .inner
168 .get()
169 .expect("Recursive parser used before being defined")
170 .as_ref(),
171 inp,
172 )
173 })
174 }
175
176 go_extra!(O);
177}
178
179impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Direct<'src, '_, I, O, E>>
180where
181 I: Input<'src>,
182 E: ParserExtra<'src, I>,
183{
184 #[inline]
185 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
186 recurse(move || M::invoke(&*self.parser(), inp))
187 }
188
189 go_extra!(O);
190}
191
192/// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
193///
194/// The given function must create the parser. The parser must not be used to parse input before this function returns.
195///
196/// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
197///
198/// The output type of this parser is `O`, the same as the inner parser.
199///
200/// # Examples
201///
202/// ```
203/// # use chumsky::prelude::*;
204/// #[derive(Debug, PartialEq)]
205/// enum Tree<'src> {
206/// Leaf(&'src str),
207/// Branch(Vec<Tree<'src>>),
208/// }
209///
210/// // Parser that recursively parses nested lists
211/// let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| tree
212/// .separated_by(just(','))
213/// .collect::<Vec<_>>()
214/// .delimited_by(just('['), just(']'))
215/// .map(Tree::Branch)
216/// .or(text::ascii::ident().map(Tree::Leaf))
217/// .padded());
218///
219/// assert_eq!(tree.parse("hello").into_result(), Ok(Tree::Leaf("hello")));
220/// assert_eq!(tree.parse("[a, b, c]").into_result(), Ok(Tree::Branch(vec![
221/// Tree::Leaf("a"),
222/// Tree::Leaf("b"),
223/// Tree::Leaf("c"),
224/// ])));
225/// // The parser can deal with arbitrarily complex nested lists
226/// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]").into_result(), Ok(Tree::Branch(vec![
227/// Tree::Branch(vec![
228/// Tree::Leaf("a"),
229/// Tree::Leaf("b"),
230/// ]),
231/// Tree::Leaf("c"),
232/// Tree::Branch(vec![
233/// Tree::Leaf("d"),
234/// Tree::Branch(vec![
235/// Tree::Leaf("e"),
236/// Tree::Leaf("f"),
237/// ]),
238/// ]),
239/// ])));
240/// ```
241// INFO: Clone bound not actually needed, but good to be safe for future compat
242pub fn recursive<'src, 'b, I, O, E, A, F>(f: F) -> Recursive<Direct<'src, 'b, I, O, E>>
243where
244 I: Input<'src>,
245 E: ParserExtra<'src, I>,
246 A: Parser<'src, I, O, E> + Clone + 'b,
247 F: FnOnce(Recursive<Direct<'src, 'b, I, O, E>>) -> A,
248{
249 let rc = Rc::new_cyclic(|rc| {
250 let rc: rc::Weak<DynParser<'src, 'b, I, O, E>> = rc.clone() as _;
251 let parser = Recursive {
252 inner: RecursiveInner::Unowned(rc.clone()),
253 };
254
255 f(parser)
256 });
257
258 Recursive {
259 inner: RecursiveInner::Owned(rc),
260 }
261}