Skip to main content

saphyr_parser_bw/
parser_stack.rs

1use crate::{
2    input::{str::StrInput, BorrowedInput, BufferedInput},
3    parser::{Event, ParseResult, Parser, ParserTrait, SpannedEventReceiver},
4    scanner::{ScanError, Span},
5};
6use alloc::{boxed::Box, string::String, vec::Vec};
7
8/// A lightweight parser that replays a pre-collected event stream.
9pub struct ReplayParser<'input> {
10    events: Vec<(Event<'input>, Span)>,
11    index: usize,
12    current: Option<(Event<'input>, Span)>,
13    anchor_offset: usize,
14}
15
16impl<'input> ReplayParser<'input> {
17    /// Creates a new `ReplayParser`.
18    #[must_use]
19    pub fn new(events: Vec<(Event<'input>, Span)>, anchor_offset: usize) -> Self {
20        Self {
21            events,
22            index: 0,
23            current: None,
24            anchor_offset,
25        }
26    }
27
28    /// Get the current anchor offset count.
29    #[must_use]
30    pub fn get_anchor_offset(&self) -> usize {
31        self.anchor_offset
32    }
33
34    /// Set the current anchor offset count.
35    pub fn set_anchor_offset(&mut self, offset: usize) {
36        self.anchor_offset = offset;
37    }
38
39    fn advance_anchor_offset(&mut self, event: &Event<'input>) {
40        let anchor_id = match event {
41            Event::Scalar(_, _, anchor_id, _)
42            | Event::SequenceStart(anchor_id, _)
43            | Event::MappingStart(anchor_id, _) => *anchor_id,
44            _ => 0,
45        };
46
47        if anchor_id > 0 {
48            self.anchor_offset = self.anchor_offset.max(anchor_id.saturating_add(1));
49        }
50    }
51}
52
53impl<'input> ParserTrait<'input> for ReplayParser<'input> {
54    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
55        if self.current.is_none() {
56            self.current = self.events.get(self.index).cloned();
57        }
58        self.current.as_ref().map(Ok)
59    }
60
61    fn next_event(&mut self) -> Option<ParseResult<'input>> {
62        if let Some(current) = self.current.take() {
63            self.index += 1;
64            self.advance_anchor_offset(&current.0);
65            return Some(Ok(current));
66        }
67        let event = self.events.get(self.index).cloned()?;
68        self.index += 1;
69        self.advance_anchor_offset(&event.0);
70        Some(Ok(event))
71    }
72
73    fn load<R: SpannedEventReceiver<'input>>(
74        &mut self,
75        recv: &mut R,
76        multi: bool,
77    ) -> Result<(), ScanError> {
78        loop {
79            let Some(res) = self.next_event() else {
80                break;
81            };
82            let (ev, span) = res?;
83            let is_doc_end = matches!(ev, Event::DocumentEnd);
84            let is_stream_end = matches!(ev, Event::StreamEnd);
85            recv.on_event(ev, span);
86            if is_stream_end {
87                break;
88            }
89            if !multi && is_doc_end {
90                break;
91            }
92        }
93        Ok(())
94    }
95}
96
97/// A wrapper for different types of parsers.
98pub enum AnyParser<'input, I, T>
99where
100    I: Iterator<Item = char>,
101    T: BorrowedInput<'input>,
102{
103    /// A parser over a string input.
104    String {
105        /// The parser itself.
106        parser: Parser<'input, StrInput<'input>>,
107        /// The name of the parser.
108        name: String,
109    },
110    /// A parser over an iterator input.
111    Iter {
112        /// The parser itself.
113        parser: Parser<'static, BufferedInput<I>>,
114        /// The name of the parser.
115        name: String,
116    },
117    /// A parser over a custom input.
118    Custom {
119        /// The parser itself.
120        parser: Parser<'input, T>,
121        /// The name of the parser.
122        name: String,
123    },
124    /// A parser over a replayed event stream.
125    Replay {
126        /// The replay parser itself.
127        parser: ReplayParser<'input>,
128        /// The name of the parser.
129        name: String,
130    },
131}
132
133impl<'input, I, T> AnyParser<'input, I, T>
134where
135    I: Iterator<Item = char>,
136    T: BorrowedInput<'input>,
137{
138    fn get_anchor_offset(&self) -> usize {
139        match self {
140            AnyParser::String { parser, .. } => parser.get_anchor_offset(),
141            AnyParser::Iter { parser, .. } => parser.get_anchor_offset(),
142            AnyParser::Custom { parser, .. } => parser.get_anchor_offset(),
143            AnyParser::Replay { parser, .. } => parser.get_anchor_offset(),
144        }
145    }
146
147    fn set_anchor_offset(&mut self, offset: usize) {
148        match self {
149            AnyParser::String { parser, .. } => parser.set_anchor_offset(offset),
150            AnyParser::Iter { parser, .. } => parser.set_anchor_offset(offset),
151            AnyParser::Custom { parser, .. } => parser.set_anchor_offset(offset),
152            AnyParser::Replay { parser, .. } => parser.set_anchor_offset(offset),
153        }
154    }
155}
156
157/// A parser implementation that utilizes a stack for parsing.
158pub struct ParserStack<'input, I = core::iter::Empty<char>, T = StrInput<'input>>
159where
160    I: Iterator<Item = char>,
161    T: BorrowedInput<'input>,
162{
163    parsers: Vec<AnyParser<'input, I, T>>,
164    current: Option<(Event<'input>, Span)>,
165    stream_end_emitted: bool,
166    #[allow(clippy::type_complexity)]
167    include_resolver: Option<Box<dyn FnMut(&str) -> Result<String, ScanError> + 'input>>,
168}
169
170impl<'input, I, T> ParserStack<'input, I, T>
171where
172    I: Iterator<Item = char>,
173    T: BorrowedInput<'input>,
174{
175    /// Creates a new, empty parser stack.
176    #[must_use]
177    pub fn new() -> Self {
178        Self {
179            parsers: Vec::new(),
180            current: None,
181            stream_end_emitted: false,
182            include_resolver: None,
183        }
184    }
185
186    /// Sets the include resolver for this stack.
187    pub fn set_resolver(
188        &mut self,
189        resolver: impl FnMut(&str) -> Result<String, ScanError> + 'input,
190    ) {
191        self.include_resolver = Some(Box::new(resolver));
192    }
193
194    /// Resolves an include string using the include resolver.
195    ///
196    /// # Errors
197    /// Returns `ScanError` if no resolver is configured, include resolution fails, or the
198    /// included content cannot be parsed.
199    pub fn resolve(&mut self, include_str: &str) -> Result<(), ScanError> {
200        if let Some(resolver) = &mut self.include_resolver {
201            let content = resolver(include_str)?;
202            let mut parser = Parser::new_from_iter(content.chars().collect::<Vec<_>>().into_iter());
203            if let Some(parent) = self.parsers.last() {
204                parser.set_anchor_offset(parent.get_anchor_offset());
205            }
206            let mut events = Vec::new();
207            while let Some(event) = parser.next_event() {
208                events.push(event?);
209            }
210
211            self.parsers.push(AnyParser::Replay {
212                parser: ReplayParser::new(events, parser.get_anchor_offset()),
213                name: include_str.into(),
214            });
215            Ok(())
216        } else {
217            Err(ScanError::new(
218                crate::scanner::Marker::new(0, 1, 0),
219                String::from("No include resolver set for parser stack."),
220            ))
221        }
222    }
223
224    /// Pushes a string parser onto the stack.
225    pub fn push_str_parser(&mut self, mut parser: Parser<'input, StrInput<'input>>, name: String) {
226        if let Some(parent) = self.parsers.last() {
227            parser.set_anchor_offset(parent.get_anchor_offset());
228        }
229        self.parsers.push(AnyParser::String { parser, name });
230    }
231
232    /// Pushes an iterator parser onto the stack.
233    pub fn push_iter_parser(
234        &mut self,
235        mut parser: Parser<'static, BufferedInput<I>>,
236        name: String,
237    ) {
238        if let Some(parent) = self.parsers.last() {
239            parser.set_anchor_offset(parent.get_anchor_offset());
240        }
241        self.parsers.push(AnyParser::Iter { parser, name });
242    }
243
244    /// Pushes a custom parser onto the stack.
245    pub fn push_custom_parser(&mut self, mut parser: Parser<'input, T>, name: String) {
246        if let Some(parent) = self.parsers.last() {
247            parser.set_anchor_offset(parent.get_anchor_offset());
248        }
249        self.parsers.push(AnyParser::Custom { parser, name });
250    }
251
252    /// Pushes a replay parser onto the stack.
253    pub fn push_replay_parser(&mut self, parser: ReplayParser<'input>, name: String) {
254        self.parsers.push(AnyParser::Replay { parser, name });
255    }
256
257    /// Pushes a custom parser onto the stack and primes the next event to be returned from it.
258    pub fn push_custom_parser_with_current(
259        &mut self,
260        mut parser: Parser<'input, T>,
261        name: String,
262        current: (Event<'input>, Span),
263    ) {
264        if let Some(parent) = self.parsers.last() {
265            parser.set_anchor_offset(parent.get_anchor_offset());
266        }
267        self.parsers.push(AnyParser::Custom { parser, name });
268        self.current = Some(current);
269    }
270
271    /// Returns the anchor offset that a newly pushed parser should inherit.
272    #[must_use]
273    pub fn current_anchor_offset(&self) -> usize {
274        self.parsers.last().map_or(0, AnyParser::get_anchor_offset)
275    }
276
277    /// Returns the names of the parsers currently in the stack.
278    #[must_use]
279    pub fn stack(&self) -> Vec<String> {
280        self.parsers
281            .iter()
282            .map(|p| match p {
283                AnyParser::String { name, .. }
284                | AnyParser::Iter { name, .. }
285                | AnyParser::Custom { name, .. }
286                | AnyParser::Replay { name, .. } => name.clone(),
287            })
288            .collect()
289    }
290
291    fn next_event_impl(&mut self) -> Result<(Event<'input>, Span), ScanError> {
292        loop {
293            let Some(any_parser) = self.parsers.last_mut() else {
294                return Ok((
295                    Event::StreamEnd,
296                    Span::empty(crate::scanner::Marker::new(0, 1, 0)),
297                ));
298            };
299
300            let res = match any_parser {
301                AnyParser::String { parser, .. } => parser.next_event(),
302                AnyParser::Iter { parser, .. } => parser.next_event(),
303                AnyParser::Custom { parser, .. } => parser.next_event(),
304                AnyParser::Replay { parser, .. } => parser.next_event(),
305            };
306
307            match res {
308                Some(Ok((Event::StreamEnd, span))) => {
309                    if self.parsers.len() == 1 {
310                        self.parsers.pop();
311                        return Ok((Event::StreamEnd, span));
312                    }
313                    let popped = self.parsers.pop().unwrap();
314                    if let Some(parent) = self.parsers.last_mut() {
315                        parent.set_anchor_offset(popped.get_anchor_offset());
316                    }
317                }
318                None => {
319                    if self.parsers.len() == 1 {
320                        self.parsers.pop();
321                        return Ok((
322                            Event::StreamEnd,
323                            Span::empty(crate::scanner::Marker::new(0, 1, 0)),
324                        ));
325                    }
326                    let popped = self.parsers.pop().unwrap();
327                    if let Some(parent) = self.parsers.last_mut() {
328                        parent.set_anchor_offset(popped.get_anchor_offset());
329                    }
330                }
331                Some(Err(e)) => {
332                    let popped = self.parsers.pop().unwrap();
333                    if let Some(parent) = self.parsers.last_mut() {
334                        parent.set_anchor_offset(popped.get_anchor_offset());
335                    }
336                    return Err(e);
337                }
338                Some(Ok((Event::DocumentEnd, span))) => {
339                    if self.parsers.len() == 1 {
340                        return Ok((Event::DocumentEnd, span));
341                    }
342
343                    // Check if it has more documents
344                    let peek_res = match self.parsers.last_mut().unwrap() {
345                        AnyParser::String { parser, .. } => parser.peek(),
346                        AnyParser::Iter { parser, .. } => parser.peek(),
347                        AnyParser::Custom { parser, .. } => parser.peek(),
348                        AnyParser::Replay { parser, .. } => parser.peek(),
349                    };
350
351                    match peek_res {
352                        Some(Ok((Event::StreamEnd, _))) | None => {
353                            let popped = self.parsers.pop().unwrap();
354                            if let Some(parent) = self.parsers.last_mut() {
355                                parent.set_anchor_offset(popped.get_anchor_offset());
356                            }
357                        }
358                        _ => {
359                            return Err(ScanError::new_str(
360                                span.start,
361                                "multiple documents not supported here",
362                            ));
363                        }
364                    }
365                }
366                Some(Ok(event)) => {
367                    if self.parsers.len() > 1
368                        && matches!(event.0, Event::StreamStart | Event::DocumentStart(_))
369                    {
370                        continue;
371                    }
372                    return Ok(event);
373                }
374            }
375        }
376    }
377}
378
379impl<'input, I, T> Default for ParserStack<'input, I, T>
380where
381    I: Iterator<Item = char>,
382    T: BorrowedInput<'input>,
383{
384    fn default() -> Self {
385        Self::new()
386    }
387}
388
389impl<'input, I, T> ParserTrait<'input> for ParserStack<'input, I, T>
390where
391    I: Iterator<Item = char>,
392    T: BorrowedInput<'input>,
393{
394    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
395        if let Some(ref x) = self.current {
396            Some(Ok(x))
397        } else {
398            if self.stream_end_emitted {
399                return None;
400            }
401            match self.next_event_impl() {
402                Ok(token) => {
403                    self.current = Some(token);
404                    Some(Ok(self.current.as_ref().unwrap()))
405                }
406                Err(e) => Some(Err(e)),
407            }
408        }
409    }
410
411    fn next_event(&mut self) -> Option<ParseResult<'input>> {
412        if self.current.is_some() {
413            return self.current.take().map(Ok);
414        }
415        if self.stream_end_emitted {
416            return None;
417        }
418        match self.next_event_impl() {
419            Ok(token) => {
420                if let Event::StreamEnd = token.0 {
421                    self.stream_end_emitted = true;
422                }
423                Some(Ok(token))
424            }
425            Err(e) => Some(Err(e)),
426        }
427    }
428
429    fn load<R: SpannedEventReceiver<'input>>(
430        &mut self,
431        recv: &mut R,
432        multi: bool,
433    ) -> Result<(), ScanError> {
434        loop {
435            // Fetch the next event, which is properly synced across the stack
436            let Some(res) = self.next_event() else {
437                break;
438            };
439
440            let (ev, span) = res?;
441
442            // Track if we need to stop based on `multi`
443            let is_doc_end = matches!(ev, Event::DocumentEnd);
444            let is_stream_end = matches!(ev, Event::StreamEnd);
445
446            recv.on_event(ev, span);
447
448            if is_stream_end {
449                break;
450            }
451
452            // If we only want a single document and we just reached the end of one, stop
453            if !multi && is_doc_end {
454                break;
455            }
456        }
457
458        Ok(())
459    }
460}
461
462impl<'input, I, T> Iterator for ParserStack<'input, I, T>
463where
464    I: Iterator<Item = char>,
465    T: BorrowedInput<'input>,
466{
467    type Item = Result<(Event<'input>, Span), ScanError>;
468
469    fn next(&mut self) -> Option<Self::Item> {
470        self.next_event()
471    }
472}