Skip to main content

granit_parser/
parser_stack.rs

1use crate::{
2    input::{str::StrInput, BorrowedInput, BufferedInput},
3    parser::{Event, ParseResult, Parser, ParserTrait, SpannedEventReceiver},
4    scanner::{ScanError, Span},
5};
6use alloc::{boxed::Box, string::String, vec::Vec};
7
8/// A lightweight parser that replays a pre-collected event stream.
9pub struct ReplayParser<'input> {
10    events: Vec<(Event<'input>, Span)>,
11    index: usize,
12    anchor_offset: usize,
13}
14
15impl<'input> ReplayParser<'input> {
16    /// Create a parser that replays `events` and starts anchor allocation at `anchor_offset`.
17    #[must_use]
18    pub fn new(events: Vec<(Event<'input>, Span)>, anchor_offset: usize) -> Self {
19        Self {
20            events,
21            index: 0,
22            anchor_offset,
23        }
24    }
25
26    /// Return the next anchor ID that should be assigned after replayed events.
27    #[must_use]
28    pub fn get_anchor_offset(&self) -> usize {
29        self.anchor_offset
30    }
31
32    /// Set the next anchor ID that should be assigned after replayed events.
33    pub fn set_anchor_offset(&mut self, offset: usize) {
34        self.anchor_offset = offset;
35    }
36
37    fn advance_anchor_offset(&mut self, event: &Event<'input>) {
38        let anchor_id = match event {
39            Event::Scalar(_, _, anchor_id, _)
40            | Event::SequenceStart(_, anchor_id, _)
41            | Event::MappingStart(_, anchor_id, _) => *anchor_id,
42            _ => 0,
43        };
44
45        if anchor_id > 0 {
46            self.anchor_offset = self.anchor_offset.max(anchor_id.saturating_add(1));
47        }
48    }
49}
50
51impl<'input> ParserTrait<'input> for ReplayParser<'input> {
52    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
53        self.events.get(self.index).map(Ok)
54    }
55
56    fn next_event(&mut self) -> Option<ParseResult<'input>> {
57        let event = self.events.get(self.index).cloned()?;
58        self.index += 1;
59        self.advance_anchor_offset(&event.0);
60        Some(Ok(event))
61    }
62
63    fn load<R: SpannedEventReceiver<'input>>(
64        &mut self,
65        recv: &mut R,
66        multi: bool,
67    ) -> Result<(), ScanError> {
68        while let Some(res) = self.next_event() {
69            let (ev, span) = res?;
70            let is_doc_end = matches!(ev, Event::DocumentEnd);
71            let is_stream_end = matches!(ev, Event::StreamEnd);
72            recv.on_event(ev, span);
73            if is_stream_end {
74                break;
75            }
76            if !multi && is_doc_end {
77                break;
78            }
79        }
80        Ok(())
81    }
82}
83
84/// A wrapper for different types of parsers.
85pub enum AnyParser<'input, I, T>
86where
87    I: Iterator<Item = char>,
88    T: BorrowedInput<'input>,
89{
90    /// A parser over borrowed string input.
91    String {
92        /// Parser currently producing events for this stack entry.
93        parser: Parser<'input, StrInput<'input>>,
94        /// Human-readable source name returned by [`ParserStack::stack`].
95        name: String,
96    },
97    /// A parser over an iterator of characters.
98    Iter {
99        /// Parser currently producing events for this stack entry.
100        parser: Parser<'static, BufferedInput<I>>,
101        /// Human-readable source name returned by [`ParserStack::stack`].
102        name: String,
103    },
104    /// A parser over a custom input.
105    Custom {
106        /// Parser currently producing events for this stack entry.
107        parser: Parser<'input, T>,
108        /// Human-readable source name returned by [`ParserStack::stack`].
109        name: String,
110    },
111    /// A parser over a replayed event stream.
112    Replay {
113        /// Replay parser currently producing pre-collected events for this stack entry.
114        parser: ReplayParser<'input>,
115        /// Human-readable source name returned by [`ParserStack::stack`].
116        name: String,
117    },
118}
119
120impl<'input, I, T> AnyParser<'input, I, T>
121where
122    I: Iterator<Item = char>,
123    T: BorrowedInput<'input>,
124{
125    fn get_anchor_offset(&self) -> usize {
126        match self {
127            AnyParser::String { parser, .. } => parser.get_anchor_offset(),
128            AnyParser::Iter { parser, .. } => parser.get_anchor_offset(),
129            AnyParser::Custom { parser, .. } => parser.get_anchor_offset(),
130            AnyParser::Replay { parser, .. } => parser.get_anchor_offset(),
131        }
132    }
133
134    fn set_anchor_offset(&mut self, offset: usize) {
135        match self {
136            AnyParser::String { parser, .. } => parser.set_anchor_offset(offset),
137            AnyParser::Iter { parser, .. } => parser.set_anchor_offset(offset),
138            AnyParser::Custom { parser, .. } => parser.set_anchor_offset(offset),
139            AnyParser::Replay { parser, .. } => parser.set_anchor_offset(offset),
140        }
141    }
142}
143
144/// A parser implementation that uses a stack for include-style parsing.
145///
146/// Note: `ParserStack` deliberately suppresses nested [`Event::StreamStart`] /
147/// [`Event::DocumentStart`] events when more than one parser is stacked, and the tests assert
148/// outputs where a nested parser starts directly with [`Event::MappingStart`] before the parent
149/// stream/document wrapper appears.
150///
151/// That is exactly what we want for `!include`-style subtree injection.
152///
153/// Included parser events, including [`Event::Comment`] events, are replayed through the same
154/// event stream as parent events. Their [`Span`] values remain local to the included source, just
155/// like every other event span from an included parser. `ParserStack` does not attach file names,
156/// source IDs, or other include provenance to events or spans.
157pub struct ParserStack<'input, I = core::iter::Empty<char>, T = StrInput<'input>>
158where
159    I: Iterator<Item = char>,
160    T: BorrowedInput<'input>,
161{
162    parsers: Vec<AnyParser<'input, I, T>>,
163    current: Option<(Event<'input>, Span)>,
164    stream_end_emitted: bool,
165    #[allow(clippy::type_complexity)]
166    include_resolver: Option<Box<dyn FnMut(&str) -> Result<String, ScanError> + 'input>>,
167}
168
169impl<'input, I, T> ParserStack<'input, I, T>
170where
171    I: Iterator<Item = char>,
172    T: BorrowedInput<'input>,
173{
174    /// Creates a new, empty parser stack.
175    #[must_use]
176    pub fn new() -> Self {
177        Self {
178            parsers: Vec::new(),
179            current: None,
180            stream_end_emitted: false,
181            include_resolver: None,
182        }
183    }
184
185    /// Set the resolver used by [`Self::resolve`] and [`Self::push_include`].
186    ///
187    /// The resolver receives the include name and returns the included YAML source text.
188    pub fn set_resolver(
189        &mut self,
190        resolver: impl FnMut(&str) -> Result<String, ScanError> + 'input,
191    ) {
192        self.include_resolver = Some(Box::new(resolver));
193    }
194
195    /// Resolves an include string using the include resolver.
196    ///
197    /// Comment events from the included content are preserved. Their spans are local to the
198    /// included content returned by the resolver, matching the existing behavior for all included
199    /// document events.
200    ///
201    /// # Errors
202    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
203    /// included content cannot be parsed.
204    pub fn resolve(&mut self, include_str: &str) -> Result<(), ScanError> {
205        if let Some(resolver) = &mut self.include_resolver {
206            let content = resolver(include_str)?;
207            let mut parser = Parser::new_from_iter(content.chars().collect::<Vec<_>>().into_iter());
208            if let Some(parent) = self.parsers.last() {
209                parser.set_anchor_offset(parent.get_anchor_offset());
210            }
211            let mut events = Vec::new();
212            while let Some(event) = parser.next_event() {
213                events.push(event?);
214            }
215
216            self.push_replay_parser(
217                ReplayParser::new(events, parser.get_anchor_offset()),
218                include_str.into(),
219            );
220            Ok(())
221        } else {
222            Err(ScanError::new(
223                crate::scanner::Marker::new(0, 1, 0),
224                String::from("No include resolver set for parser stack."),
225            ))
226        }
227    }
228
229    /// Resolves an include by name and pushes the resulting parser onto the stack.
230    ///
231    /// This is an alias for [`Self::resolve`] with a name that reads naturally in
232    /// include-oriented consumers: `stack.push_include("config.yaml")?`.
233    /// Comment spans from the included content are local to that included source.
234    ///
235    /// # Errors
236    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
237    /// included content cannot be parsed.
238    pub fn push_include(&mut self, include_name: &str) -> Result<(), ScanError> {
239        self.resolve(include_name)
240    }
241
242    /// Push a string parser onto the stack.
243    ///
244    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
245    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
246    pub fn push_str_parser(&mut self, mut parser: Parser<'input, StrInput<'input>>, name: String) {
247        if let Some(parent) = self.parsers.last() {
248            parser.set_anchor_offset(parent.get_anchor_offset());
249        }
250        self.parsers.push(AnyParser::String { parser, name });
251    }
252
253    /// Push an iterator-backed parser onto the stack.
254    ///
255    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
256    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
257    pub fn push_iter_parser(
258        &mut self,
259        mut parser: Parser<'static, BufferedInput<I>>,
260        name: String,
261    ) {
262        if let Some(parent) = self.parsers.last() {
263            parser.set_anchor_offset(parent.get_anchor_offset());
264        }
265        self.parsers.push(AnyParser::Iter { parser, name });
266    }
267
268    /// Push a custom-input parser onto the stack.
269    ///
270    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
271    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
272    pub fn push_custom_parser(&mut self, mut parser: Parser<'input, T>, name: String) {
273        if let Some(parent) = self.parsers.last() {
274            parser.set_anchor_offset(parent.get_anchor_offset());
275        }
276        self.parsers.push(AnyParser::Custom { parser, name });
277    }
278
279    /// Push a replay parser onto the stack.
280    ///
281    /// Replay parsers are used for included content that has already been parsed into events.
282    /// `name` is returned by [`Self::stack`] for diagnostics.
283    pub fn push_replay_parser(&mut self, mut parser: ReplayParser<'input>, name: String) {
284        if let Some(parent) = self.parsers.last() {
285            let inherited = parent.get_anchor_offset();
286            parser.set_anchor_offset(parser.get_anchor_offset().max(inherited));
287        }
288
289        self.parsers.push(AnyParser::Replay { parser, name });
290    }
291
292    /// Push a custom parser and set the first event that should be returned from it.
293    ///
294    /// This is used when the caller has already consumed the parser's first event before deciding
295    /// to place it on the stack.
296    pub fn push_custom_parser_with_current(
297        &mut self,
298        mut parser: Parser<'input, T>,
299        name: String,
300        current: (Event<'input>, Span),
301    ) {
302        if let Some(parent) = self.parsers.last() {
303            parser.set_anchor_offset(parent.get_anchor_offset());
304        }
305        self.parsers.push(AnyParser::Custom { parser, name });
306        self.current = Some(current);
307    }
308
309    /// Return the anchor offset that a newly pushed parser should inherit.
310    #[must_use]
311    pub fn current_anchor_offset(&self) -> usize {
312        self.parsers.last().map_or(0, AnyParser::get_anchor_offset)
313    }
314
315    /// Return the names of the parsers currently in the stack, from bottom to top.
316    #[must_use]
317    pub fn stack(&self) -> Vec<String> {
318        self.parsers
319            .iter()
320            .map(|p| match p {
321                AnyParser::String { name, .. }
322                | AnyParser::Iter { name, .. }
323                | AnyParser::Custom { name, .. }
324                | AnyParser::Replay { name, .. } => name.clone(),
325            })
326            .collect()
327    }
328
329    fn propagate_anchor_offset_from_popped(&mut self, popped: &AnyParser<'input, I, T>) {
330        if let Some(parent) = self.parsers.last_mut() {
331            let next_offset = parent.get_anchor_offset().max(popped.get_anchor_offset());
332            parent.set_anchor_offset(next_offset);
333        }
334    }
335
336    fn next_event_impl(&mut self) -> Result<(Event<'input>, Span), ScanError> {
337        loop {
338            let Some(any_parser) = self.parsers.last_mut() else {
339                return Ok((
340                    Event::StreamEnd,
341                    Span::empty(crate::scanner::Marker::new(0, 1, 0)),
342                ));
343            };
344
345            let res = match any_parser {
346                AnyParser::String { parser, .. } => parser.next_event(),
347                AnyParser::Iter { parser, .. } => parser.next_event(),
348                AnyParser::Custom { parser, .. } => parser.next_event(),
349                AnyParser::Replay { parser, .. } => parser.next_event(),
350            };
351
352            match res {
353                Some(Ok((Event::StreamEnd, span))) => {
354                    if self.parsers.len() == 1 {
355                        self.parsers.pop();
356                        return Ok((Event::StreamEnd, span));
357                    }
358                    let popped = self.parsers.pop().unwrap();
359                    self.propagate_anchor_offset_from_popped(&popped);
360                }
361                None => {
362                    if self.parsers.len() == 1 {
363                        self.parsers.pop();
364                        return Ok((
365                            Event::StreamEnd,
366                            Span::empty(crate::scanner::Marker::new(0, 1, 0)),
367                        ));
368                    }
369                    let popped = self.parsers.pop().unwrap();
370                    self.propagate_anchor_offset_from_popped(&popped);
371                }
372                Some(Err(e)) => {
373                    let popped = self.parsers.pop().unwrap();
374                    self.propagate_anchor_offset_from_popped(&popped);
375                    return e.into_result();
376                }
377                Some(Ok((Event::DocumentEnd, span))) => {
378                    if self.parsers.len() == 1 {
379                        return Ok((Event::DocumentEnd, span));
380                    }
381
382                    // Continue the parent parser if it has more documents.
383                    let peek_res = match self.parsers.last_mut().unwrap() {
384                        AnyParser::String { parser, .. } => parser.peek(),
385                        AnyParser::Iter { parser, .. } => parser.peek(),
386                        AnyParser::Custom { parser, .. } => parser.peek(),
387                        AnyParser::Replay { parser, .. } => parser.peek(),
388                    };
389
390                    match peek_res {
391                        Some(Ok((Event::StreamEnd, _))) | None => {
392                            let popped = self.parsers.pop().unwrap();
393                            self.propagate_anchor_offset_from_popped(&popped);
394                        }
395                        _ => {
396                            return Err(ScanError::new_str(
397                                span.start,
398                                "multiple documents not supported here",
399                            ));
400                        }
401                    }
402                }
403                Some(Ok(event)) => {
404                    if self.parsers.len() > 1
405                        && matches!(event.0, Event::StreamStart | Event::DocumentStart(_))
406                    {
407                        continue;
408                    }
409                    return Ok(event);
410                }
411            }
412        }
413    }
414}
415
416impl<'input, I, T> Default for ParserStack<'input, I, T>
417where
418    I: Iterator<Item = char>,
419    T: BorrowedInput<'input>,
420{
421    fn default() -> Self {
422        Self::new()
423    }
424}
425
426impl<'input, I, T> ParserTrait<'input> for ParserStack<'input, I, T>
427where
428    I: Iterator<Item = char>,
429    T: BorrowedInput<'input>,
430{
431    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
432        if let Some(ref x) = self.current {
433            Some(Ok(x))
434        } else {
435            if self.stream_end_emitted {
436                return None;
437            }
438            match self.next_event_impl() {
439                Ok(token) => {
440                    self.current = Some(token);
441                    Some(Ok(self.current.as_ref().unwrap()))
442                }
443                Err(e) => Some(e.into_result()),
444            }
445        }
446    }
447
448    fn next_event(&mut self) -> Option<ParseResult<'input>> {
449        if let Some(token) = self.current.take() {
450            if let Event::StreamEnd = token.0 {
451                self.stream_end_emitted = true;
452            }
453            return Some(Ok(token));
454        }
455        if self.stream_end_emitted {
456            return None;
457        }
458        match self.next_event_impl() {
459            Ok(token) => {
460                if let Event::StreamEnd = token.0 {
461                    self.stream_end_emitted = true;
462                }
463                Some(Ok(token))
464            }
465            Err(e) => Some(e.into_result()),
466        }
467    }
468
469    fn load<R: SpannedEventReceiver<'input>>(
470        &mut self,
471        recv: &mut R,
472        multi: bool,
473    ) -> Result<(), ScanError> {
474        while let Some(res) = self.next_event() {
475            // Fetch the next event from the active stack entry.
476            let (ev, span) = res?;
477
478            // Track whether to stop based on `multi`.
479            let is_doc_end = matches!(ev, Event::DocumentEnd);
480            let is_stream_end = matches!(ev, Event::StreamEnd);
481
482            recv.on_event(ev, span);
483
484            if is_stream_end {
485                break;
486            }
487
488            // Stop after one document when multi-document parsing is disabled.
489            if !multi && is_doc_end {
490                break;
491            }
492        }
493
494        Ok(())
495    }
496}
497
498impl<'input, I, T> Iterator for ParserStack<'input, I, T>
499where
500    I: Iterator<Item = char>,
501    T: BorrowedInput<'input>,
502{
503    type Item = Result<(Event<'input>, Span), ScanError>;
504
505    fn next(&mut self) -> Option<Self::Item> {
506        self.next_event()
507    }
508}