Skip to main content

granit_parser/
parser_stack.rs

1use crate::{
2    input::{str::StrInput, BorrowedInput, BufferedInput},
3    parser::{Event, ParseResult, Parser, ParserTrait, SpannedEventReceiver},
4    scanner::{ScanError, Span},
5};
6use alloc::{boxed::Box, string::String, vec::Vec};
7
8/// A lightweight parser that replays a pre-collected event stream.
9pub struct ReplayParser<'input> {
10    events: Vec<(Event<'input>, Span)>,
11    index: usize,
12    current: Option<(Event<'input>, Span)>,
13    anchor_offset: usize,
14}
15
16impl<'input> ReplayParser<'input> {
17    /// Create a parser that replays `events` and starts anchor allocation at `anchor_offset`.
18    #[must_use]
19    pub fn new(events: Vec<(Event<'input>, Span)>, anchor_offset: usize) -> Self {
20        Self {
21            events,
22            index: 0,
23            current: None,
24            anchor_offset,
25        }
26    }
27
28    /// Return the next anchor ID that should be assigned after replayed events.
29    #[must_use]
30    pub fn get_anchor_offset(&self) -> usize {
31        self.anchor_offset
32    }
33
34    /// Set the next anchor ID that should be assigned after replayed events.
35    pub fn set_anchor_offset(&mut self, offset: usize) {
36        self.anchor_offset = offset;
37    }
38
39    fn advance_anchor_offset(&mut self, event: &Event<'input>) {
40        let anchor_id = match event {
41            Event::Scalar(_, _, anchor_id, _)
42            | Event::SequenceStart(_, anchor_id, _)
43            | Event::MappingStart(_, anchor_id, _) => *anchor_id,
44            _ => 0,
45        };
46
47        if anchor_id > 0 {
48            self.anchor_offset = self.anchor_offset.max(anchor_id.saturating_add(1));
49        }
50    }
51}
52
53impl<'input> ParserTrait<'input> for ReplayParser<'input> {
54    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
55        if self.current.is_none() {
56            self.current = self.events.get(self.index).cloned();
57        }
58        self.current.as_ref().map(Ok)
59    }
60
61    fn next_event(&mut self) -> Option<ParseResult<'input>> {
62        if let Some(current) = self.current.take() {
63            self.index += 1;
64            self.advance_anchor_offset(&current.0);
65            return Some(Ok(current));
66        }
67        let event = self.events.get(self.index).cloned()?;
68        self.index += 1;
69        self.advance_anchor_offset(&event.0);
70        Some(Ok(event))
71    }
72
73    fn load<R: SpannedEventReceiver<'input>>(
74        &mut self,
75        recv: &mut R,
76        multi: bool,
77    ) -> Result<(), ScanError> {
78        while let Some(res) = self.next_event() {
79            let (ev, span) = res?;
80            let is_doc_end = matches!(ev, Event::DocumentEnd);
81            let is_stream_end = matches!(ev, Event::StreamEnd);
82            recv.on_event(ev, span);
83            if is_stream_end {
84                break;
85            }
86            if !multi && is_doc_end {
87                break;
88            }
89        }
90        Ok(())
91    }
92}
93
94/// A wrapper for different types of parsers.
95pub enum AnyParser<'input, I, T>
96where
97    I: Iterator<Item = char>,
98    T: BorrowedInput<'input>,
99{
100    /// A parser over borrowed string input.
101    String {
102        /// Parser currently producing events for this stack entry.
103        parser: Parser<'input, StrInput<'input>>,
104        /// Human-readable source name returned by [`ParserStack::stack`].
105        name: String,
106    },
107    /// A parser over an iterator of characters.
108    Iter {
109        /// Parser currently producing events for this stack entry.
110        parser: Parser<'static, BufferedInput<I>>,
111        /// Human-readable source name returned by [`ParserStack::stack`].
112        name: String,
113    },
114    /// A parser over a custom input.
115    Custom {
116        /// Parser currently producing events for this stack entry.
117        parser: Parser<'input, T>,
118        /// Human-readable source name returned by [`ParserStack::stack`].
119        name: String,
120    },
121    /// A parser over a replayed event stream.
122    Replay {
123        /// Replay parser currently producing pre-collected events for this stack entry.
124        parser: ReplayParser<'input>,
125        /// Human-readable source name returned by [`ParserStack::stack`].
126        name: String,
127    },
128}
129
130impl<'input, I, T> AnyParser<'input, I, T>
131where
132    I: Iterator<Item = char>,
133    T: BorrowedInput<'input>,
134{
135    fn get_anchor_offset(&self) -> usize {
136        match self {
137            AnyParser::String { parser, .. } => parser.get_anchor_offset(),
138            AnyParser::Iter { parser, .. } => parser.get_anchor_offset(),
139            AnyParser::Custom { parser, .. } => parser.get_anchor_offset(),
140            AnyParser::Replay { parser, .. } => parser.get_anchor_offset(),
141        }
142    }
143
144    fn set_anchor_offset(&mut self, offset: usize) {
145        match self {
146            AnyParser::String { parser, .. } => parser.set_anchor_offset(offset),
147            AnyParser::Iter { parser, .. } => parser.set_anchor_offset(offset),
148            AnyParser::Custom { parser, .. } => parser.set_anchor_offset(offset),
149            AnyParser::Replay { parser, .. } => parser.set_anchor_offset(offset),
150        }
151    }
152}
153
154/// A parser implementation that uses a stack for include-style parsing.
155///
156/// Note: `ParserStack` deliberately suppresses nested [`Event::StreamStart`] /
157/// [`Event::DocumentStart`] events when more than one parser is stacked, and the tests assert
158/// outputs where a nested parser starts directly with [`Event::MappingStart`] before the parent
159/// stream/document wrapper appears.
160///
161/// That is exactly what we want for `!include`-style subtree injection.
162///
163/// Included parser events, including [`Event::Comment`] events, are replayed through the same
164/// event stream as parent events. Their [`Span`] values remain local to the included source, just
165/// like every other event span from an included parser. `ParserStack` does not attach file names,
166/// source IDs, or other include provenance to events or spans.
167pub struct ParserStack<'input, I = core::iter::Empty<char>, T = StrInput<'input>>
168where
169    I: Iterator<Item = char>,
170    T: BorrowedInput<'input>,
171{
172    parsers: Vec<AnyParser<'input, I, T>>,
173    current: Option<(Event<'input>, Span)>,
174    stream_end_emitted: bool,
175    #[allow(clippy::type_complexity)]
176    include_resolver: Option<Box<dyn FnMut(&str) -> Result<String, ScanError> + 'input>>,
177}
178
179impl<'input, I, T> ParserStack<'input, I, T>
180where
181    I: Iterator<Item = char>,
182    T: BorrowedInput<'input>,
183{
184    /// Creates a new, empty parser stack.
185    #[must_use]
186    pub fn new() -> Self {
187        Self {
188            parsers: Vec::new(),
189            current: None,
190            stream_end_emitted: false,
191            include_resolver: None,
192        }
193    }
194
195    /// Set the resolver used by [`Self::resolve`] and [`Self::push_include`].
196    ///
197    /// The resolver receives the include name and returns the included YAML source text.
198    pub fn set_resolver(
199        &mut self,
200        resolver: impl FnMut(&str) -> Result<String, ScanError> + 'input,
201    ) {
202        self.include_resolver = Some(Box::new(resolver));
203    }
204
205    /// Resolves an include string using the include resolver.
206    ///
207    /// Comment events from the included content are preserved. Their spans are local to the
208    /// included content returned by the resolver, matching the existing behavior for all included
209    /// document events.
210    ///
211    /// # Errors
212    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
213    /// included content cannot be parsed.
214    pub fn resolve(&mut self, include_str: &str) -> Result<(), ScanError> {
215        if let Some(resolver) = &mut self.include_resolver {
216            let content = resolver(include_str)?;
217            let mut parser = Parser::new_from_iter(content.chars().collect::<Vec<_>>().into_iter());
218            if let Some(parent) = self.parsers.last() {
219                parser.set_anchor_offset(parent.get_anchor_offset());
220            }
221            let mut events = Vec::new();
222            while let Some(event) = parser.next_event() {
223                events.push(event?);
224            }
225
226            self.push_replay_parser(
227                ReplayParser::new(events, parser.get_anchor_offset()),
228                include_str.into(),
229            );
230            Ok(())
231        } else {
232            Err(ScanError::new(
233                crate::scanner::Marker::new(0, 1, 0),
234                String::from("No include resolver set for parser stack."),
235            ))
236        }
237    }
238
239    /// Resolves an include by name and pushes the resulting parser onto the stack.
240    ///
241    /// This is an alias for [`Self::resolve`] with a name that reads naturally in
242    /// include-oriented consumers: `stack.push_include("config.yaml")?`.
243    /// Comment spans from the included content are local to that included source.
244    ///
245    /// # Errors
246    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
247    /// included content cannot be parsed.
248    pub fn push_include(&mut self, include_name: &str) -> Result<(), ScanError> {
249        self.resolve(include_name)
250    }
251
252    /// Push a string parser onto the stack.
253    ///
254    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
255    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
256    pub fn push_str_parser(&mut self, mut parser: Parser<'input, StrInput<'input>>, name: String) {
257        if let Some(parent) = self.parsers.last() {
258            parser.set_anchor_offset(parent.get_anchor_offset());
259        }
260        self.parsers.push(AnyParser::String { parser, name });
261    }
262
263    /// Push an iterator-backed parser onto the stack.
264    ///
265    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
266    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
267    pub fn push_iter_parser(
268        &mut self,
269        mut parser: Parser<'static, BufferedInput<I>>,
270        name: String,
271    ) {
272        if let Some(parent) = self.parsers.last() {
273            parser.set_anchor_offset(parent.get_anchor_offset());
274        }
275        self.parsers.push(AnyParser::Iter { parser, name });
276    }
277
278    /// Push a custom-input parser onto the stack.
279    ///
280    /// The pushed parser inherits the current anchor offset so anchors remain unique across stacked
281    /// sources. `name` is returned by [`Self::stack`] for diagnostics.
282    pub fn push_custom_parser(&mut self, mut parser: Parser<'input, T>, name: String) {
283        if let Some(parent) = self.parsers.last() {
284            parser.set_anchor_offset(parent.get_anchor_offset());
285        }
286        self.parsers.push(AnyParser::Custom { parser, name });
287    }
288
289    /// Push a replay parser onto the stack.
290    ///
291    /// Replay parsers are used for included content that has already been parsed into events.
292    /// `name` is returned by [`Self::stack`] for diagnostics.
293    pub fn push_replay_parser(&mut self, mut parser: ReplayParser<'input>, name: String) {
294        if let Some(parent) = self.parsers.last() {
295            let inherited = parent.get_anchor_offset();
296            parser.set_anchor_offset(parser.get_anchor_offset().max(inherited));
297        }
298
299        self.parsers.push(AnyParser::Replay { parser, name });
300    }
301
302    /// Push a custom parser and set the first event that should be returned from it.
303    ///
304    /// This is used when the caller has already consumed the parser's first event before deciding
305    /// to place it on the stack.
306    pub fn push_custom_parser_with_current(
307        &mut self,
308        mut parser: Parser<'input, T>,
309        name: String,
310        current: (Event<'input>, Span),
311    ) {
312        if let Some(parent) = self.parsers.last() {
313            parser.set_anchor_offset(parent.get_anchor_offset());
314        }
315        self.parsers.push(AnyParser::Custom { parser, name });
316        self.current = Some(current);
317    }
318
319    /// Return the anchor offset that a newly pushed parser should inherit.
320    #[must_use]
321    pub fn current_anchor_offset(&self) -> usize {
322        self.parsers.last().map_or(0, AnyParser::get_anchor_offset)
323    }
324
325    /// Return the names of the parsers currently in the stack, from bottom to top.
326    #[must_use]
327    pub fn stack(&self) -> Vec<String> {
328        self.parsers
329            .iter()
330            .map(|p| match p {
331                AnyParser::String { name, .. }
332                | AnyParser::Iter { name, .. }
333                | AnyParser::Custom { name, .. }
334                | AnyParser::Replay { name, .. } => name.clone(),
335            })
336            .collect()
337    }
338
339    fn propagate_anchor_offset_from_popped(&mut self, popped: &AnyParser<'input, I, T>) {
340        if let Some(parent) = self.parsers.last_mut() {
341            let next_offset = parent.get_anchor_offset().max(popped.get_anchor_offset());
342            parent.set_anchor_offset(next_offset);
343        }
344    }
345
346    fn next_event_impl(&mut self) -> Result<(Event<'input>, Span), ScanError> {
347        loop {
348            let Some(any_parser) = self.parsers.last_mut() else {
349                return Ok((
350                    Event::StreamEnd,
351                    Span::empty(crate::scanner::Marker::new(0, 1, 0)),
352                ));
353            };
354
355            let res = match any_parser {
356                AnyParser::String { parser, .. } => parser.next_event(),
357                AnyParser::Iter { parser, .. } => parser.next_event(),
358                AnyParser::Custom { parser, .. } => parser.next_event(),
359                AnyParser::Replay { parser, .. } => parser.next_event(),
360            };
361
362            match res {
363                Some(Ok((Event::StreamEnd, span))) => {
364                    if self.parsers.len() == 1 {
365                        self.parsers.pop();
366                        return Ok((Event::StreamEnd, span));
367                    }
368                    let popped = self.parsers.pop().unwrap();
369                    self.propagate_anchor_offset_from_popped(&popped);
370                }
371                None => {
372                    if self.parsers.len() == 1 {
373                        self.parsers.pop();
374                        return Ok((
375                            Event::StreamEnd,
376                            Span::empty(crate::scanner::Marker::new(0, 1, 0)),
377                        ));
378                    }
379                    let popped = self.parsers.pop().unwrap();
380                    self.propagate_anchor_offset_from_popped(&popped);
381                }
382                Some(Err(e)) => {
383                    let popped = self.parsers.pop().unwrap();
384                    self.propagate_anchor_offset_from_popped(&popped);
385                    return e.into_result();
386                }
387                Some(Ok((Event::DocumentEnd, span))) => {
388                    if self.parsers.len() == 1 {
389                        return Ok((Event::DocumentEnd, span));
390                    }
391
392                    // Continue the parent parser if it has more documents.
393                    let peek_res = match self.parsers.last_mut().unwrap() {
394                        AnyParser::String { parser, .. } => parser.peek(),
395                        AnyParser::Iter { parser, .. } => parser.peek(),
396                        AnyParser::Custom { parser, .. } => parser.peek(),
397                        AnyParser::Replay { parser, .. } => parser.peek(),
398                    };
399
400                    match peek_res {
401                        Some(Ok((Event::StreamEnd, _))) | None => {
402                            let popped = self.parsers.pop().unwrap();
403                            self.propagate_anchor_offset_from_popped(&popped);
404                        }
405                        _ => {
406                            return Err(ScanError::new_str(
407                                span.start,
408                                "multiple documents not supported here",
409                            ));
410                        }
411                    }
412                }
413                Some(Ok(event)) => {
414                    if self.parsers.len() > 1
415                        && matches!(event.0, Event::StreamStart | Event::DocumentStart(_))
416                    {
417                        continue;
418                    }
419                    return Ok(event);
420                }
421            }
422        }
423    }
424}
425
426impl<'input, I, T> Default for ParserStack<'input, I, T>
427where
428    I: Iterator<Item = char>,
429    T: BorrowedInput<'input>,
430{
431    fn default() -> Self {
432        Self::new()
433    }
434}
435
436impl<'input, I, T> ParserTrait<'input> for ParserStack<'input, I, T>
437where
438    I: Iterator<Item = char>,
439    T: BorrowedInput<'input>,
440{
441    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
442        if let Some(ref x) = self.current {
443            Some(Ok(x))
444        } else {
445            if self.stream_end_emitted {
446                return None;
447            }
448            match self.next_event_impl() {
449                Ok(token) => {
450                    self.current = Some(token);
451                    Some(Ok(self.current.as_ref().unwrap()))
452                }
453                Err(e) => Some(e.into_result()),
454            }
455        }
456    }
457
458    fn next_event(&mut self) -> Option<ParseResult<'input>> {
459        if let Some(token) = self.current.take() {
460            if let Event::StreamEnd = token.0 {
461                self.stream_end_emitted = true;
462            }
463            return Some(Ok(token));
464        }
465        if self.stream_end_emitted {
466            return None;
467        }
468        match self.next_event_impl() {
469            Ok(token) => {
470                if let Event::StreamEnd = token.0 {
471                    self.stream_end_emitted = true;
472                }
473                Some(Ok(token))
474            }
475            Err(e) => Some(e.into_result()),
476        }
477    }
478
479    fn load<R: SpannedEventReceiver<'input>>(
480        &mut self,
481        recv: &mut R,
482        multi: bool,
483    ) -> Result<(), ScanError> {
484        while let Some(res) = self.next_event() {
485            // Fetch the next event from the active stack entry.
486            let (ev, span) = res?;
487
488            // Track whether to stop based on `multi`.
489            let is_doc_end = matches!(ev, Event::DocumentEnd);
490            let is_stream_end = matches!(ev, Event::StreamEnd);
491
492            recv.on_event(ev, span);
493
494            if is_stream_end {
495                break;
496            }
497
498            // Stop after one document when multi-document parsing is disabled.
499            if !multi && is_doc_end {
500                break;
501            }
502        }
503
504        Ok(())
505    }
506}
507
508impl<'input, I, T> Iterator for ParserStack<'input, I, T>
509where
510    I: Iterator<Item = char>,
511    T: BorrowedInput<'input>,
512{
513    type Item = Result<(Event<'input>, Span), ScanError>;
514
515    fn next(&mut self) -> Option<Self::Item> {
516        self.next_event()
517    }
518}