Skip to main content

granit_parser/
parser_stack.rs

1use crate::{
2    input::{str::StrInput, BorrowedInput, BufferedInput},
3    parser::{Event, ParseResult, Parser, ParserTrait, SpannedEventReceiver},
4    scanner::{ScanError, Span},
5};
6use alloc::{boxed::Box, string::String, vec::Vec};
7
8/// A lightweight parser that replays a pre-collected event stream.
9pub struct ReplayParser<'input> {
10    events: Vec<(Event<'input>, Span)>,
11    index: usize,
12    anchor_offset: usize,
13}
14
15impl<'input> ReplayParser<'input> {
16    /// Create a parser that replays `events` and starts anchor allocation at `anchor_offset`.
17    #[must_use]
18    pub fn new(events: Vec<(Event<'input>, Span)>, anchor_offset: usize) -> Self {
19        Self {
20            events,
21            index: 0,
22            anchor_offset,
23        }
24    }
25
26    /// Return the next anchor ID that should be assigned after replayed events.
27    #[must_use]
28    pub fn get_anchor_offset(&self) -> usize {
29        self.anchor_offset
30    }
31
32    /// Set the next anchor ID that should be assigned after replayed events.
33    pub fn set_anchor_offset(&mut self, offset: usize) {
34        self.anchor_offset = offset;
35    }
36
37    fn advance_anchor_offset(&mut self, event: &Event<'input>) {
38        let anchor_id = match event {
39            Event::Scalar(_, _, anchor_id, _)
40            | Event::SequenceStart(_, anchor_id, _)
41            | Event::MappingStart(_, anchor_id, _) => *anchor_id,
42            _ => 0,
43        };
44
45        if anchor_id > 0 {
46            self.anchor_offset = self.anchor_offset.max(anchor_id.saturating_add(1));
47        }
48    }
49}
50
51impl<'input> ParserTrait<'input> for ReplayParser<'input> {
52    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
53        self.events.get(self.index).map(Ok)
54    }
55
56    fn next_event(&mut self) -> Option<ParseResult<'input>> {
57        let event = self.events.get(self.index).cloned()?;
58        self.index += 1;
59        self.advance_anchor_offset(&event.0);
60        Some(Ok(event))
61    }
62
63    fn load<R: SpannedEventReceiver<'input>>(
64        &mut self,
65        recv: &mut R,
66        multi: bool,
67    ) -> Result<(), ScanError> {
68        while let Some(res) = self.next_event() {
69            let (ev, span) = res?;
70            let is_doc_end = matches!(ev, Event::DocumentEnd);
71            let is_stream_end = matches!(ev, Event::StreamEnd);
72            recv.on_event(ev, span);
73            if is_stream_end {
74                break;
75            }
76            if !multi && is_doc_end {
77                break;
78            }
79        }
80        Ok(())
81    }
82}
83
84/// A wrapper for different types of parsers.
85pub enum AnyParser<'input, I, T>
86where
87    I: Iterator<Item = char>,
88    T: BorrowedInput<'input>,
89{
90    /// A parser over borrowed string input.
91    String {
92        /// Parser currently producing events for this stack entry.
93        parser: Parser<'input, StrInput<'input>>,
94        /// Human-readable source name returned by [`ParserStack::stack`].
95        name: String,
96    },
97    /// A parser over an iterator of characters.
98    Iter {
99        /// Parser currently producing events for this stack entry.
100        parser: Parser<'static, BufferedInput<I>>,
101        /// Human-readable source name returned by [`ParserStack::stack`].
102        name: String,
103    },
104    /// A parser over a custom input.
105    Custom {
106        /// Parser currently producing events for this stack entry.
107        parser: Parser<'input, T>,
108        /// Human-readable source name returned by [`ParserStack::stack`].
109        name: String,
110    },
111    /// A parser over a replayed event stream.
112    Replay {
113        /// Replay parser currently producing pre-collected events for this stack entry.
114        parser: ReplayParser<'input>,
115        /// Human-readable source name returned by [`ParserStack::stack`].
116        name: String,
117    },
118}
119
120impl<'input, I, T> AnyParser<'input, I, T>
121where
122    I: Iterator<Item = char>,
123    T: BorrowedInput<'input>,
124{
125    fn get_anchor_offset(&self) -> usize {
126        match self {
127            AnyParser::String { parser, .. } => parser.get_anchor_offset(),
128            AnyParser::Iter { parser, .. } => parser.get_anchor_offset(),
129            AnyParser::Custom { parser, .. } => parser.get_anchor_offset(),
130            AnyParser::Replay { parser, .. } => parser.get_anchor_offset(),
131        }
132    }
133
134    fn set_anchor_offset(&mut self, offset: usize) {
135        match self {
136            AnyParser::String { parser, .. } => parser.set_anchor_offset(offset),
137            AnyParser::Iter { parser, .. } => parser.set_anchor_offset(offset),
138            AnyParser::Custom { parser, .. } => parser.set_anchor_offset(offset),
139            AnyParser::Replay { parser, .. } => parser.set_anchor_offset(offset),
140        }
141    }
142}
143
144/// A parser implementation that uses a stack for include-style parsing.
145///
146/// Note: `ParserStack` deliberately suppresses nested [`Event::StreamStart`] /
147/// [`Event::DocumentStart`] events when more than one parser is stacked, and the tests assert
148/// outputs where a nested parser starts directly with [`Event::MappingStart`] before the parent
149/// stream/document wrapper appears.
150///
151/// That is exactly what we want for `!include`-style subtree injection.
152///
153/// Included parser events, including [`Event::Comment`] events, are replayed through the same
154/// event stream as parent events. Their [`Span`] values remain local to the included source, just
155/// like every other event span from an included parser. `ParserStack` does not attach file names,
156/// source IDs, or other include provenance to events or spans.
157pub struct ParserStack<'input, I = core::iter::Empty<char>, T = StrInput<'input>>
158where
159    I: Iterator<Item = char>,
160    T: BorrowedInput<'input>,
161{
162    parsers: Vec<AnyParser<'input, I, T>>,
163    current: Option<(Event<'input>, Span)>,
164    current_error: Option<ScanError>,
165    stream_end_emitted: bool,
166    #[allow(clippy::type_complexity)]
167    include_resolver: Option<Box<dyn FnMut(&str) -> Result<String, ScanError> + 'input>>,
168}
169
170impl<'input, I, T> ParserStack<'input, I, T>
171where
172    I: Iterator<Item = char>,
173    T: BorrowedInput<'input>,
174{
175    /// Creates a new, empty parser stack.
176    #[must_use]
177    pub fn new() -> Self {
178        Self {
179            parsers: Vec::new(),
180            current: None,
181            current_error: None,
182            stream_end_emitted: false,
183            include_resolver: None,
184        }
185    }
186
187    /// Set the resolver used by [`Self::resolve`] and [`Self::push_include`].
188    ///
189    /// The resolver receives the include name and returns the included YAML source text.
190    pub fn set_resolver(
191        &mut self,
192        resolver: impl FnMut(&str) -> Result<String, ScanError> + 'input,
193    ) {
194        self.include_resolver = Some(Box::new(resolver));
195    }
196
197    /// Resolves an include string using the include resolver.
198    ///
199    /// Comment events from the included content are preserved. Their spans are local to the
200    /// included content returned by the resolver, matching the existing behavior for all included
201    /// document events.
202    ///
203    /// # Errors
204    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
205    /// included content cannot be parsed.
206    pub fn resolve(&mut self, include_str: &str) -> Result<(), ScanError> {
207        if let Some(resolver) = &mut self.include_resolver {
208            let content = resolver(include_str)?;
209            let mut parser = Parser::new_from_iter(content.chars().collect::<Vec<_>>().into_iter());
210            if let Some(parent) = self.parsers.last() {
211                parser.set_anchor_offset(parent.get_anchor_offset());
212            }
213            let mut events = Vec::new();
214            while let Some(event) = parser.next_event() {
215                events.push(event?);
216            }
217
218            self.push_replay_parser(
219                ReplayParser::new(events, parser.get_anchor_offset()),
220                include_str.into(),
221            );
222            Ok(())
223        } else {
224            Err(ScanError::new(
225                crate::scanner::Marker::new(0, 1, 0),
226                String::from("No include resolver set for parser stack."),
227            ))
228        }
229    }
230
231    /// Resolves an include by name and pushes the resulting parser onto the stack.
232    ///
233    /// This is an alias for [`Self::resolve`] with a name that reads naturally in
234    /// include-oriented consumers: `stack.push_include("config.yaml")?`.
235    /// Comment spans from the included content are local to that included source.
236    ///
237    /// # Errors
238    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
239    /// included content cannot be parsed.
240    pub fn push_include(&mut self, include_name: &str) -> Result<(), ScanError> {
241        self.resolve(include_name)
242    }
243
244    fn prepare_for_push(&mut self) {
245        if matches!(self.current.as_ref(), Some((Event::StreamEnd, _))) {
246            self.current = None;
247        }
248        self.stream_end_emitted = false;
249    }
250
251    /// Push a string parser onto the stack.
252    ///
253    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
254    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
255    pub fn push_str_parser(&mut self, mut parser: Parser<'input, StrInput<'input>>, name: String) {
256        self.prepare_for_push();
257        if let Some(parent) = self.parsers.last() {
258            parser.set_anchor_offset(parent.get_anchor_offset());
259        }
260        self.parsers.push(AnyParser::String { parser, name });
261    }
262
263    /// Push an iterator-backed parser onto the stack.
264    ///
265    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
266    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
267    pub fn push_iter_parser(
268        &mut self,
269        mut parser: Parser<'static, BufferedInput<I>>,
270        name: String,
271    ) {
272        self.prepare_for_push();
273        if let Some(parent) = self.parsers.last() {
274            parser.set_anchor_offset(parent.get_anchor_offset());
275        }
276        self.parsers.push(AnyParser::Iter { parser, name });
277    }
278
279    /// Push a custom-input parser onto the stack.
280    ///
281    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
282    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
283    pub fn push_custom_parser(&mut self, mut parser: Parser<'input, T>, name: String) {
284        self.prepare_for_push();
285        if let Some(parent) = self.parsers.last() {
286            parser.set_anchor_offset(parent.get_anchor_offset());
287        }
288        self.parsers.push(AnyParser::Custom { parser, name });
289    }
290
291    /// Push a replay parser onto the stack.
292    ///
293    /// Replay parsers are used for included content that has already been parsed into events.
294    /// `name` is returned by [`Self::stack`] for diagnostics.
295    pub fn push_replay_parser(&mut self, mut parser: ReplayParser<'input>, name: String) {
296        self.prepare_for_push();
297        if let Some(parent) = self.parsers.last() {
298            let inherited = parent.get_anchor_offset();
299            parser.set_anchor_offset(parser.get_anchor_offset().max(inherited));
300        }
301
302        self.parsers.push(AnyParser::Replay { parser, name });
303    }
304
305    /// Push a custom parser and set the first event that should be returned from it.
306    ///
307    /// This is used when the caller has already consumed the parser's first event before deciding
308    /// to place it on the stack.
309    pub fn push_custom_parser_with_current(
310        &mut self,
311        mut parser: Parser<'input, T>,
312        name: String,
313        current: (Event<'input>, Span),
314    ) {
315        self.prepare_for_push();
316        if let Some(parent) = self.parsers.last() {
317            parser.set_anchor_offset(parent.get_anchor_offset());
318        }
319        self.parsers.push(AnyParser::Custom { parser, name });
320        self.current = Some(current);
321    }
322
323    /// Return the anchor offset that a newly pushed parser should inherit.
324    #[must_use]
325    pub fn current_anchor_offset(&self) -> usize {
326        self.parsers.last().map_or(0, AnyParser::get_anchor_offset)
327    }
328
329    /// Return the names of the parsers currently in the stack, from bottom to top.
330    #[must_use]
331    pub fn stack(&self) -> Vec<String> {
332        self.parsers
333            .iter()
334            .map(|p| match p {
335                AnyParser::String { name, .. }
336                | AnyParser::Iter { name, .. }
337                | AnyParser::Custom { name, .. }
338                | AnyParser::Replay { name, .. } => name.clone(),
339            })
340            .collect()
341    }
342
343    fn propagate_anchor_offset_from_popped(&mut self, popped: &AnyParser<'input, I, T>) {
344        if let Some(parent) = self.parsers.last_mut() {
345            let next_offset = parent.get_anchor_offset().max(popped.get_anchor_offset());
346            parent.set_anchor_offset(next_offset);
347        }
348    }
349
350    fn pop_parser_and_propagate_anchor_offset(&mut self) {
351        let popped = self.parsers.pop().unwrap();
352        self.propagate_anchor_offset_from_popped(&popped);
353    }
354
355    fn next_event_impl(&mut self) -> Result<(Event<'input>, Span), ScanError> {
356        loop {
357            let Some(any_parser) = self.parsers.last_mut() else {
358                return Ok((
359                    Event::StreamEnd,
360                    Span::empty(crate::scanner::Marker::new(0, 1, 0)),
361                ));
362            };
363
364            let res = match any_parser {
365                AnyParser::String { parser, .. } => parser.next_event(),
366                AnyParser::Iter { parser, .. } => parser.next_event(),
367                AnyParser::Custom { parser, .. } => parser.next_event(),
368                AnyParser::Replay { parser, .. } => parser.next_event(),
369            };
370
371            match res {
372                Some(Ok((Event::StreamEnd, span))) => {
373                    if self.parsers.len() == 1 {
374                        self.parsers.pop();
375                        return Ok((Event::StreamEnd, span));
376                    }
377                    self.pop_parser_and_propagate_anchor_offset();
378                }
379                None => {
380                    if self.parsers.len() == 1 {
381                        self.parsers.pop();
382                        return Ok((
383                            Event::StreamEnd,
384                            Span::empty(crate::scanner::Marker::new(0, 1, 0)),
385                        ));
386                    }
387                    self.pop_parser_and_propagate_anchor_offset();
388                }
389                Some(Err(e)) => {
390                    self.pop_parser_and_propagate_anchor_offset();
391                    return e.into_result();
392                }
393                Some(Ok((Event::DocumentEnd, span))) => {
394                    if self.parsers.len() == 1 {
395                        return Ok((Event::DocumentEnd, span));
396                    }
397
398                    // Continue the parent parser if it has more documents.
399                    let peek_res = match self.parsers.last_mut().unwrap() {
400                        AnyParser::String { parser, .. } => parser.peek(),
401                        AnyParser::Iter { parser, .. } => parser.peek(),
402                        AnyParser::Custom { parser, .. } => parser.peek(),
403                        AnyParser::Replay { parser, .. } => parser.peek(),
404                    };
405
406                    match peek_res {
407                        Some(Ok((Event::StreamEnd, _))) | None => {
408                            self.pop_parser_and_propagate_anchor_offset();
409                        }
410                        Some(Ok(_)) => {
411                            self.pop_parser_and_propagate_anchor_offset();
412                            return Err(ScanError::new_str(
413                                span.start,
414                                "multiple documents not supported here",
415                            ));
416                        }
417                        Some(Err(e)) => {
418                            self.pop_parser_and_propagate_anchor_offset();
419                            return Err(e);
420                        }
421                    }
422                }
423                Some(Ok(event)) => {
424                    if self.parsers.len() > 1
425                        && matches!(event.0, Event::StreamStart | Event::DocumentStart(..))
426                    {
427                        continue;
428                    }
429                    return Ok(event);
430                }
431            }
432        }
433    }
434}
435
436impl<'input, I, T> Default for ParserStack<'input, I, T>
437where
438    I: Iterator<Item = char>,
439    T: BorrowedInput<'input>,
440{
441    fn default() -> Self {
442        Self::new()
443    }
444}
445
446impl<'input, I, T> ParserTrait<'input> for ParserStack<'input, I, T>
447where
448    I: Iterator<Item = char>,
449    T: BorrowedInput<'input>,
450{
451    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
452        if let Some(ref x) = self.current {
453            Some(Ok(x))
454        } else if let Some(error) = &self.current_error {
455            Some(Err(error.clone()))
456        } else {
457            if self.stream_end_emitted {
458                return None;
459            }
460            match self.next_event_impl() {
461                Ok(token) => {
462                    self.current = Some(token);
463                    Some(Ok(self.current.as_ref().unwrap()))
464                }
465                Err(e) => {
466                    self.current_error = Some(e.clone());
467                    Some(Err(e))
468                }
469            }
470        }
471    }
472
473    fn next_event(&mut self) -> Option<ParseResult<'input>> {
474        if let Some(error) = self.current_error.take() {
475            self.stream_end_emitted = true;
476            return Some(Err(error));
477        }
478
479        if let Some(token) = self.current.take() {
480            if let Event::StreamEnd = token.0 {
481                self.stream_end_emitted = true;
482            }
483            return Some(Ok(token));
484        }
485        if self.stream_end_emitted {
486            return None;
487        }
488        match self.next_event_impl() {
489            Ok(token) => {
490                if let Event::StreamEnd = token.0 {
491                    self.stream_end_emitted = true;
492                }
493                Some(Ok(token))
494            }
495            Err(e) => {
496                self.stream_end_emitted = true;
497                Some(Err(e))
498            }
499        }
500    }
501
502    fn load<R: SpannedEventReceiver<'input>>(
503        &mut self,
504        recv: &mut R,
505        multi: bool,
506    ) -> Result<(), ScanError> {
507        while let Some(res) = self.next_event() {
508            // Fetch the next event from the active stack entry.
509            let (ev, span) = res?;
510
511            // Track whether to stop based on `multi`.
512            let is_doc_end = matches!(ev, Event::DocumentEnd);
513            let is_stream_end = matches!(ev, Event::StreamEnd);
514
515            recv.on_event(ev, span);
516
517            if is_stream_end {
518                break;
519            }
520
521            // Stop after one document when multi-document parsing is disabled.
522            if !multi && is_doc_end {
523                break;
524            }
525        }
526
527        Ok(())
528    }
529}
530
531impl<'input, I, T> Iterator for ParserStack<'input, I, T>
532where
533    I: Iterator<Item = char>,
534    T: BorrowedInput<'input>,
535{
536    type Item = Result<(Event<'input>, Span), ScanError>;
537
538    fn next(&mut self) -> Option<Self::Item> {
539        self.next_event()
540    }
541}