Skip to main content

granit_parser/
parser_stack.rs

1use crate::{
2    input::{str::StrInput, BorrowedInput, BufferedInput},
3    parser::{Event, ParseResult, Parser, ParserTrait, SpannedEventReceiver},
4    scanner::{ScanError, Span},
5};
6use alloc::{boxed::Box, string::String, vec::Vec};
7
8/// A lightweight parser that replays a pre-collected event stream.
9pub struct ReplayParser<'input> {
10    events: Vec<(Event<'input>, Span)>,
11    index: usize,
12    current: Option<(Event<'input>, Span)>,
13    anchor_offset: usize,
14}
15
16impl<'input> ReplayParser<'input> {
17    /// Creates a new `ReplayParser`.
18    #[must_use]
19    pub fn new(events: Vec<(Event<'input>, Span)>, anchor_offset: usize) -> Self {
20        Self {
21            events,
22            index: 0,
23            current: None,
24            anchor_offset,
25        }
26    }
27
28    /// Get the current anchor offset count.
29    #[must_use]
30    pub fn get_anchor_offset(&self) -> usize {
31        self.anchor_offset
32    }
33
34    /// Set the current anchor offset count.
35    pub fn set_anchor_offset(&mut self, offset: usize) {
36        self.anchor_offset = offset;
37    }
38
39    fn advance_anchor_offset(&mut self, event: &Event<'input>) {
40        let anchor_id = match event {
41            Event::Scalar(_, _, anchor_id, _)
42            | Event::SequenceStart(anchor_id, _)
43            | Event::MappingStart(anchor_id, _) => *anchor_id,
44            _ => 0,
45        };
46
47        if anchor_id > 0 {
48            self.anchor_offset = self.anchor_offset.max(anchor_id.saturating_add(1));
49        }
50    }
51}
52
53impl<'input> ParserTrait<'input> for ReplayParser<'input> {
54    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
55        if self.current.is_none() {
56            self.current = self.events.get(self.index).cloned();
57        }
58        self.current.as_ref().map(Ok)
59    }
60
61    fn next_event(&mut self) -> Option<ParseResult<'input>> {
62        if let Some(current) = self.current.take() {
63            self.index += 1;
64            self.advance_anchor_offset(&current.0);
65            return Some(Ok(current));
66        }
67        let event = self.events.get(self.index).cloned()?;
68        self.index += 1;
69        self.advance_anchor_offset(&event.0);
70        Some(Ok(event))
71    }
72
73    fn load<R: SpannedEventReceiver<'input>>(
74        &mut self,
75        recv: &mut R,
76        multi: bool,
77    ) -> Result<(), ScanError> {
78        while let Some(res) = self.next_event() {
79            let (ev, span) = res?;
80            let is_doc_end = matches!(ev, Event::DocumentEnd);
81            let is_stream_end = matches!(ev, Event::StreamEnd);
82            recv.on_event(ev, span);
83            if is_stream_end {
84                break;
85            }
86            if !multi && is_doc_end {
87                break;
88            }
89        }
90        Ok(())
91    }
92}
93
94/// A wrapper for different types of parsers.
95pub enum AnyParser<'input, I, T>
96where
97    I: Iterator<Item = char>,
98    T: BorrowedInput<'input>,
99{
100    /// A parser over a string input.
101    String {
102        /// The parser itself.
103        parser: Parser<'input, StrInput<'input>>,
104        /// The name of the parser.
105        name: String,
106    },
107    /// A parser over an iterator input.
108    Iter {
109        /// The parser itself.
110        parser: Parser<'static, BufferedInput<I>>,
111        /// The name of the parser.
112        name: String,
113    },
114    /// A parser over a custom input.
115    Custom {
116        /// The parser itself.
117        parser: Parser<'input, T>,
118        /// The name of the parser.
119        name: String,
120    },
121    /// A parser over a replayed event stream.
122    Replay {
123        /// The replay parser itself.
124        parser: ReplayParser<'input>,
125        /// The name of the parser.
126        name: String,
127    },
128}
129
130impl<'input, I, T> AnyParser<'input, I, T>
131where
132    I: Iterator<Item = char>,
133    T: BorrowedInput<'input>,
134{
135    fn get_anchor_offset(&self) -> usize {
136        match self {
137            AnyParser::String { parser, .. } => parser.get_anchor_offset(),
138            AnyParser::Iter { parser, .. } => parser.get_anchor_offset(),
139            AnyParser::Custom { parser, .. } => parser.get_anchor_offset(),
140            AnyParser::Replay { parser, .. } => parser.get_anchor_offset(),
141        }
142    }
143
144    fn set_anchor_offset(&mut self, offset: usize) {
145        match self {
146            AnyParser::String { parser, .. } => parser.set_anchor_offset(offset),
147            AnyParser::Iter { parser, .. } => parser.set_anchor_offset(offset),
148            AnyParser::Custom { parser, .. } => parser.set_anchor_offset(offset),
149            AnyParser::Replay { parser, .. } => parser.set_anchor_offset(offset),
150        }
151    }
152}
153
154/// A parser implementation that utilizes a stack for parsing.
155///
156/// Note: `ParserStack` deliberately suppresses nested [`Event::StreamStart`] /
157/// [`Event::DocumentStart`] events when more than one parser is stacked, and the tests assert
158/// outputs where a nested parser starts directly with [`Event::MappingStart`] before the parent
159/// stream/document wrapper appears.
160///
161/// That is exactly what we want for `!include`-style subtree injection.
162pub struct ParserStack<'input, I = core::iter::Empty<char>, T = StrInput<'input>>
163where
164    I: Iterator<Item = char>,
165    T: BorrowedInput<'input>,
166{
167    parsers: Vec<AnyParser<'input, I, T>>,
168    current: Option<(Event<'input>, Span)>,
169    stream_end_emitted: bool,
170    #[allow(clippy::type_complexity)]
171    include_resolver: Option<Box<dyn FnMut(&str) -> Result<String, ScanError> + 'input>>,
172}
173
174impl<'input, I, T> ParserStack<'input, I, T>
175where
176    I: Iterator<Item = char>,
177    T: BorrowedInput<'input>,
178{
179    /// Creates a new, empty parser stack.
180    #[must_use]
181    pub fn new() -> Self {
182        Self {
183            parsers: Vec::new(),
184            current: None,
185            stream_end_emitted: false,
186            include_resolver: None,
187        }
188    }
189
190    /// Sets the include resolver for this stack.
191    pub fn set_resolver(
192        &mut self,
193        resolver: impl FnMut(&str) -> Result<String, ScanError> + 'input,
194    ) {
195        self.include_resolver = Some(Box::new(resolver));
196    }
197
198    /// Resolves an include string using the include resolver.
199    ///
200    /// # Errors
201    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
202    /// included content cannot be parsed.
203    pub fn resolve(&mut self, include_str: &str) -> Result<(), ScanError> {
204        if let Some(resolver) = &mut self.include_resolver {
205            let content = resolver(include_str)?;
206            let mut parser = Parser::new_from_iter(content.chars().collect::<Vec<_>>().into_iter());
207            if let Some(parent) = self.parsers.last() {
208                parser.set_anchor_offset(parent.get_anchor_offset());
209            }
210            let mut events = Vec::new();
211            while let Some(event) = parser.next_event() {
212                events.push(event?);
213            }
214
215            self.push_replay_parser(
216                ReplayParser::new(events, parser.get_anchor_offset()),
217                include_str.into(),
218            );
219            Ok(())
220        } else {
221            Err(ScanError::new(
222                crate::scanner::Marker::new(0, 1, 0),
223                String::from("No include resolver set for parser stack."),
224            ))
225        }
226    }
227
228    /// Pushes a string parser onto the stack.
229    pub fn push_str_parser(&mut self, mut parser: Parser<'input, StrInput<'input>>, name: String) {
230        if let Some(parent) = self.parsers.last() {
231            parser.set_anchor_offset(parent.get_anchor_offset());
232        }
233        self.parsers.push(AnyParser::String { parser, name });
234    }
235
236    /// Pushes an iterator parser onto the stack.
237    pub fn push_iter_parser(
238        &mut self,
239        mut parser: Parser<'static, BufferedInput<I>>,
240        name: String,
241    ) {
242        if let Some(parent) = self.parsers.last() {
243            parser.set_anchor_offset(parent.get_anchor_offset());
244        }
245        self.parsers.push(AnyParser::Iter { parser, name });
246    }
247
248    /// Pushes a custom parser onto the stack.
249    pub fn push_custom_parser(&mut self, mut parser: Parser<'input, T>, name: String) {
250        if let Some(parent) = self.parsers.last() {
251            parser.set_anchor_offset(parent.get_anchor_offset());
252        }
253        self.parsers.push(AnyParser::Custom { parser, name });
254    }
255
256    /// Pushes a replay parser onto the stack.
257    pub fn push_replay_parser(&mut self, mut parser: ReplayParser<'input>, name: String) {
258        if let Some(parent) = self.parsers.last() {
259            let inherited = parent.get_anchor_offset();
260            parser.set_anchor_offset(parser.get_anchor_offset().max(inherited));
261        }
262
263        self.parsers.push(AnyParser::Replay { parser, name });
264    }
265
266    /// Pushes a custom parser onto the stack and primes the next event to be returned from it.
267    pub fn push_custom_parser_with_current(
268        &mut self,
269        mut parser: Parser<'input, T>,
270        name: String,
271        current: (Event<'input>, Span),
272    ) {
273        if let Some(parent) = self.parsers.last() {
274            parser.set_anchor_offset(parent.get_anchor_offset());
275        }
276        self.parsers.push(AnyParser::Custom { parser, name });
277        self.current = Some(current);
278    }
279
280    /// Returns the anchor offset that a newly pushed parser should inherit.
281    #[must_use]
282    pub fn current_anchor_offset(&self) -> usize {
283        self.parsers.last().map_or(0, AnyParser::get_anchor_offset)
284    }
285
286    /// Returns the names of the parsers currently in the stack.
287    #[must_use]
288    pub fn stack(&self) -> Vec<String> {
289        self.parsers
290            .iter()
291            .map(|p| match p {
292                AnyParser::String { name, .. }
293                | AnyParser::Iter { name, .. }
294                | AnyParser::Custom { name, .. }
295                | AnyParser::Replay { name, .. } => name.clone(),
296            })
297            .collect()
298    }
299
300    fn propagate_anchor_offset_from_popped(&mut self, popped: &AnyParser<'input, I, T>) {
301        if let Some(parent) = self.parsers.last_mut() {
302            let next_offset = parent.get_anchor_offset().max(popped.get_anchor_offset());
303            parent.set_anchor_offset(next_offset);
304        }
305    }
306
307    fn next_event_impl(&mut self) -> Result<(Event<'input>, Span), ScanError> {
308        loop {
309            let Some(any_parser) = self.parsers.last_mut() else {
310                return Ok((
311                    Event::StreamEnd,
312                    Span::empty(crate::scanner::Marker::new(0, 1, 0)),
313                ));
314            };
315
316            let res = match any_parser {
317                AnyParser::String { parser, .. } => parser.next_event(),
318                AnyParser::Iter { parser, .. } => parser.next_event(),
319                AnyParser::Custom { parser, .. } => parser.next_event(),
320                AnyParser::Replay { parser, .. } => parser.next_event(),
321            };
322
323            match res {
324                Some(Ok((Event::StreamEnd, span))) => {
325                    if self.parsers.len() == 1 {
326                        self.parsers.pop();
327                        return Ok((Event::StreamEnd, span));
328                    }
329                    let popped = self.parsers.pop().unwrap();
330                    self.propagate_anchor_offset_from_popped(&popped);
331                }
332                None => {
333                    if self.parsers.len() == 1 {
334                        self.parsers.pop();
335                        return Ok((
336                            Event::StreamEnd,
337                            Span::empty(crate::scanner::Marker::new(0, 1, 0)),
338                        ));
339                    }
340                    let popped = self.parsers.pop().unwrap();
341                    self.propagate_anchor_offset_from_popped(&popped);
342                }
343                Some(Err(e)) => {
344                    let popped = self.parsers.pop().unwrap();
345                    self.propagate_anchor_offset_from_popped(&popped);
346                    return Err(e);
347                }
348                Some(Ok((Event::DocumentEnd, span))) => {
349                    if self.parsers.len() == 1 {
350                        return Ok((Event::DocumentEnd, span));
351                    }
352
353                    // Check if it has more documents
354                    let peek_res = match self.parsers.last_mut().unwrap() {
355                        AnyParser::String { parser, .. } => parser.peek(),
356                        AnyParser::Iter { parser, .. } => parser.peek(),
357                        AnyParser::Custom { parser, .. } => parser.peek(),
358                        AnyParser::Replay { parser, .. } => parser.peek(),
359                    };
360
361                    match peek_res {
362                        Some(Ok((Event::StreamEnd, _))) | None => {
363                            let popped = self.parsers.pop().unwrap();
364                            self.propagate_anchor_offset_from_popped(&popped);
365                        }
366                        _ => {
367                            return Err(ScanError::new_str(
368                                span.start,
369                                "multiple documents not supported here",
370                            ));
371                        }
372                    }
373                }
374                Some(Ok(event)) => {
375                    if self.parsers.len() > 1
376                        && matches!(event.0, Event::StreamStart | Event::DocumentStart(_))
377                    {
378                        continue;
379                    }
380                    return Ok(event);
381                }
382            }
383        }
384    }
385}
386
387impl<'input, I, T> Default for ParserStack<'input, I, T>
388where
389    I: Iterator<Item = char>,
390    T: BorrowedInput<'input>,
391{
392    fn default() -> Self {
393        Self::new()
394    }
395}
396
397impl<'input, I, T> ParserTrait<'input> for ParserStack<'input, I, T>
398where
399    I: Iterator<Item = char>,
400    T: BorrowedInput<'input>,
401{
402    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
403        if let Some(ref x) = self.current {
404            Some(Ok(x))
405        } else {
406            if self.stream_end_emitted {
407                return None;
408            }
409            match self.next_event_impl() {
410                Ok(token) => {
411                    self.current = Some(token);
412                    Some(Ok(self.current.as_ref().unwrap()))
413                }
414                Err(e) => Some(Err(e)),
415            }
416        }
417    }
418
419    fn next_event(&mut self) -> Option<ParseResult<'input>> {
420        if let Some(token) = self.current.take() {
421            if let Event::StreamEnd = token.0 {
422                self.stream_end_emitted = true;
423            }
424            return Some(Ok(token));
425        }
426        if self.stream_end_emitted {
427            return None;
428        }
429        match self.next_event_impl() {
430            Ok(token) => {
431                if let Event::StreamEnd = token.0 {
432                    self.stream_end_emitted = true;
433                }
434                Some(Ok(token))
435            }
436            Err(e) => Some(Err(e)),
437        }
438    }
439
440    fn load<R: SpannedEventReceiver<'input>>(
441        &mut self,
442        recv: &mut R,
443        multi: bool,
444    ) -> Result<(), ScanError> {
445        while let Some(res) = self.next_event() {
446            // Fetch the next event, which is properly synced across the stack
447            let (ev, span) = res?;
448
449            // Track if we need to stop based on `multi`
450            let is_doc_end = matches!(ev, Event::DocumentEnd);
451            let is_stream_end = matches!(ev, Event::StreamEnd);
452
453            recv.on_event(ev, span);
454
455            if is_stream_end {
456                break;
457            }
458
459            // If we only want a single document and we just reached the end of one, stop
460            if !multi && is_doc_end {
461                break;
462            }
463        }
464
465        Ok(())
466    }
467}
468
469impl<'input, I, T> Iterator for ParserStack<'input, I, T>
470where
471    I: Iterator<Item = char>,
472    T: BorrowedInput<'input>,
473{
474    type Item = Result<(Event<'input>, Span), ScanError>;
475
476    fn next(&mut self) -> Option<Self::Item> {
477        self.next_event()
478    }
479}