welly_parser/
parser.rs

1use super::{Tree, Location, Loc, Token, Stream};
2
3/// A high-level wrapper around an input [`Stream`].
4///
5/// It handles errors, and tracks the [`Location`]s of the input `Token`s
6/// that could form part of the next output `Token`. It also provides an
7/// `unread()` method to pretend that you didn't read a `Token`.
8pub struct Context<I: Stream> {
9    /// The input [`Stream`].
10    input: I,
11
12    /// Non-error [`Token`]s to be returned before reading from [`input`],
13    /// in reverse order.
14    ///
15    /// [`input`]: Self::input
16    stack: Vec<Loc<Box<dyn Tree>>>,
17
18    /// The [`Location`]s of [`Token`]s that have been read but not yet used to
19    /// form an output.
20    locs: Vec<Location>,
21}
22
23impl<I: Stream> Context<I> {
24    pub fn new(input: I) -> Self {
25        Self {input, stack: Vec::new(), locs: Vec::new()}
26    }
27
28    /// Returns the [`Location`] of the most recent [`Token`], and forgets it.
29    pub fn pop(&mut self) -> Location {
30        self.locs.pop().expect("No tokens have been read")
31    }
32
33    /// Returns the [`Location`] of the first [`Token`] returned by `read()`.
34    pub fn first(&self) -> Location {
35        *self.locs.first().expect("No tokens have been read")
36    }
37
38    /// Returns the [`Location`] of the last [`Token`] returned by `read()`.
39    pub fn last(&self) -> Location {
40        *self.locs.last().expect("No tokens have been read")
41    }
42
43    /// Annotate `t` with `last()`.
44    pub fn locate<T>(&self, value: T) -> Loc<T> { Loc(value, self.last()) }
45
46    /// Returns a [`Location`] containing all [`Token`]s `read()` so far, and
47    /// forgets them.
48    pub fn drain(&mut self) -> Location {
49        let ret = Location {start: self.first().start, end: self.last().end};
50        self.locs.clear();
51        ret
52    }
53
54    /// Returns `self.stack.pop()` if possible, otherwise `self.input.read()`.
55    fn read_inner(&mut self) -> Token {
56        if let Some(Loc(t, loc)) = self.stack.pop() {
57            Token::new(t, loc)
58        } else {
59            self.input.read()
60        }
61    }
62
63    /// Read the next [`Token`] and internally record its [`Location`].
64    ///
65    /// - Ok(tree) - The parse [`Tree`] of the next `Token`.
66    /// - Err(msg) - An error prevented parsing of the next `Token`.
67    pub fn read_any(&mut self) -> Result<Box<dyn Tree>, String> {
68        let token = self.read_inner();
69        self.locs.push(token.location());
70        token.result()
71    }
72
73    /// Read the next [`Token`] and internally record its [`Location`], but
74    /// only if its parse [`Tree`] is of type `T`.
75    ///
76    /// - Ok(Some(tree)) - The next `Token`'s parse tree is of type `T`.
77    /// - Ok(None) - The next `Token` is not a `T`. It has been `unread()`.
78    /// - Err(message) - An error prevented parsing of the next `Token`.
79    pub fn read<T: Tree>(&mut self) -> Result<Option<Box<T>>, String> {
80        Ok(match self.read_any()?.downcast::<T>() {
81            Ok(t) => Some(t),
82            Err(t) => { self.unread(t); None },
83        })
84    }
85
86    /// Read the next [`Token`] and internally record its [`Location`], but
87    /// only if it `is_wanted`.
88    /// - Ok(Some(tree)) - If `is_wanted(tree)`.
89    /// - Ok(None) - The next `Token`'s parse tree is not a `T` or is unwanted.
90    ///   It has been `unread()`.
91    /// - Err(message) - An error prevented parsing of the next `Token`.
92    pub fn read_if<T: Tree>(
93        &mut self,
94        is_wanted: impl FnOnce(&T) -> bool,
95    ) -> Result<Option<Box<T>>, String> {
96        Ok(self.read::<T>()?.and_then(
97            |t| if is_wanted(&*t) { Some(t) } else { self.unread(t); None }
98        ))
99    }
100
101    /// Pretend we haven't read the most recent [`Token`].
102    ///
103    /// `tree` must be the parse [`Tree`] of the most recent `Token`. It will
104    /// be returned by the next call to `read()`.
105    pub fn unread(&mut self, tree: Box<dyn Tree>) {
106        let loc = self.pop();
107        self.stack.push(Loc(tree, loc));
108    }
109}
110
111// ----------------------------------------------------------------------------
112
113/// Parse a [`Stream`].
114pub trait Parse: Sized {
115    /// Read input [`Tree`]s from `input` and try to make a single output tree.
116    ///
117    /// Special cases:
118    /// - An unrecognised input tree should be passed on unchanged.
119    ///   - In particular, [`EndOfFile`] should be passed on unchanged. It must
120    ///     never be incorporated into a larger parse tree.
121    /// - If this parser finds a parse error, abandon the current parse tree
122    ///   and return `Err(message)`.
123    /// - If `input` reports a parse error, abandon the current parse tree and
124    ///   pass on the error unchanged.
125    ///   - In particular, if `input` reports an incomplete file, pass it on.
126    ///
127    /// [`EndOfFile`]: super::EndOfFile
128    fn parse(
129        &self,
130        input: &mut Context<impl Stream>,
131    ) -> Result<Box<dyn Tree>, String>;
132
133    /// Read [`Token`]s from `input` to make a [`Stream`] of output `Token`s.
134    ///
135    /// To make each output `Token`, the returned `Stream` calls
136    /// [`parse()`] to make a [`Tree`], and annotates it with a [`Location`].
137    ///
138    /// [`parse()`]: Self::parse()
139    fn parse_stream<I: Stream>(self, input: I) -> ParseStream<Self, I> {
140        ParseStream {parse: self, input: Context::new(input)}
141    }
142}
143
144// ----------------------------------------------------------------------------
145
146/// The [`Stream`] returned by `Parse::parse_stream()`.
147// TODO: Make private, using a newer version of Rust that supports RPIT.
148pub struct ParseStream<P: Parse, I: Stream> {
149    /// The parsing function.
150    parse: P,
151
152    /// The input stream.
153    input: Context<I>,
154}
155
156impl<P: Parse, I: Stream> Stream for ParseStream<P, I> {
157    fn read(&mut self) -> Token {
158        let ret = self.parse.parse(&mut self.input);
159        let last = self.input.last();
160        let all = self.input.drain();
161        let loc = if ret.is_ok() { all } else { last };
162        Token(Loc(ret, loc))
163    }
164}