welly_parser/parser.rs
1use super::{Tree, Location, Loc, Token, Stream};
2
3/// A high-level wrapper around an input [`Stream`].
4///
5/// It handles errors, and tracks the [`Location`]s of the input `Token`s
6/// that could form part of the next output `Token`. It also provides an
7/// `unread()` method to pretend that you didn't read a `Token`.
8pub struct Context<I: Stream> {
9 /// The input [`Stream`].
10 input: I,
11
12 /// Non-error [`Token`]s to be returned before reading from [`input`],
13 /// in reverse order.
14 ///
15 /// [`input`]: Self::input
16 stack: Vec<Loc<Box<dyn Tree>>>,
17
18 /// The [`Location`]s of [`Token`]s that have been read but not yet used to
19 /// form an output.
20 locs: Vec<Location>,
21}
22
23impl<I: Stream> Context<I> {
24 pub fn new(input: I) -> Self {
25 Self {input, stack: Vec::new(), locs: Vec::new()}
26 }
27
28 /// Returns the [`Location`] of the most recent [`Token`], and forgets it.
29 pub fn pop(&mut self) -> Location {
30 self.locs.pop().expect("No tokens have been read")
31 }
32
33 /// Returns the [`Location`] of the first [`Token`] returned by `read()`.
34 pub fn first(&self) -> Location {
35 *self.locs.first().expect("No tokens have been read")
36 }
37
38 /// Returns the [`Location`] of the last [`Token`] returned by `read()`.
39 pub fn last(&self) -> Location {
40 *self.locs.last().expect("No tokens have been read")
41 }
42
43 /// Annotate `t` with `last()`.
44 pub fn locate<T>(&self, value: T) -> Loc<T> { Loc(value, self.last()) }
45
46 /// Returns a [`Location`] containing all [`Token`]s `read()` so far, and
47 /// forgets them.
48 pub fn drain(&mut self) -> Location {
49 let ret = Location {start: self.first().start, end: self.last().end};
50 self.locs.clear();
51 ret
52 }
53
54 /// Returns `self.stack.pop()` if possible, otherwise `self.input.read()`.
55 fn read_inner(&mut self) -> Token {
56 if let Some(Loc(t, loc)) = self.stack.pop() {
57 Token::new(t, loc)
58 } else {
59 self.input.read()
60 }
61 }
62
63 /// Read the next [`Token`] and internally record its [`Location`].
64 ///
65 /// - Ok(tree) - The parse [`Tree`] of the next `Token`.
66 /// - Err(msg) - An error prevented parsing of the next `Token`.
67 pub fn read_any(&mut self) -> Result<Box<dyn Tree>, String> {
68 let token = self.read_inner();
69 self.locs.push(token.location());
70 token.result()
71 }
72
73 /// Read the next [`Token`] and internally record its [`Location`], but
74 /// only if its parse [`Tree`] is of type `T`.
75 ///
76 /// - Ok(Some(tree)) - The next `Token`'s parse tree is of type `T`.
77 /// - Ok(None) - The next `Token` is not a `T`. It has been `unread()`.
78 /// - Err(message) - An error prevented parsing of the next `Token`.
79 pub fn read<T: Tree>(&mut self) -> Result<Option<Box<T>>, String> {
80 Ok(match self.read_any()?.downcast::<T>() {
81 Ok(t) => Some(t),
82 Err(t) => { self.unread(t); None },
83 })
84 }
85
86 /// Read the next [`Token`] and internally record its [`Location`], but
87 /// only if it `is_wanted`.
88 /// - Ok(Some(tree)) - If `is_wanted(tree)`.
89 /// - Ok(None) - The next `Token`'s parse tree is not a `T` or is unwanted.
90 /// It has been `unread()`.
91 /// - Err(message) - An error prevented parsing of the next `Token`.
92 pub fn read_if<T: Tree>(
93 &mut self,
94 is_wanted: impl FnOnce(&T) -> bool,
95 ) -> Result<Option<Box<T>>, String> {
96 Ok(self.read::<T>()?.and_then(
97 |t| if is_wanted(&*t) { Some(t) } else { self.unread(t); None }
98 ))
99 }
100
101 /// Pretend we haven't read the most recent [`Token`].
102 ///
103 /// `tree` must be the parse [`Tree`] of the most recent `Token`. It will
104 /// be returned by the next call to `read()`.
105 pub fn unread(&mut self, tree: Box<dyn Tree>) {
106 let loc = self.pop();
107 self.stack.push(Loc(tree, loc));
108 }
109}
110
111// ----------------------------------------------------------------------------
112
113/// Parse a [`Stream`].
114pub trait Parse: Sized {
115 /// Read input [`Tree`]s from `input` and try to make a single output tree.
116 ///
117 /// Special cases:
118 /// - An unrecognised input tree should be passed on unchanged.
119 /// - In particular, [`EndOfFile`] should be passed on unchanged. It must
120 /// never be incorporated into a larger parse tree.
121 /// - If this parser finds a parse error, abandon the current parse tree
122 /// and return `Err(message)`.
123 /// - If `input` reports a parse error, abandon the current parse tree and
124 /// pass on the error unchanged.
125 /// - In particular, if `input` reports an incomplete file, pass it on.
126 ///
127 /// [`EndOfFile`]: super::EndOfFile
128 fn parse(
129 &self,
130 input: &mut Context<impl Stream>,
131 ) -> Result<Box<dyn Tree>, String>;
132
133 /// Read [`Token`]s from `input` to make a [`Stream`] of output `Token`s.
134 ///
135 /// To make each output `Token`, the returned `Stream` calls
136 /// [`parse()`] to make a [`Tree`], and annotates it with a [`Location`].
137 ///
138 /// [`parse()`]: Self::parse()
139 fn parse_stream<I: Stream>(self, input: I) -> ParseStream<Self, I> {
140 ParseStream {parse: self, input: Context::new(input)}
141 }
142}
143
144// ----------------------------------------------------------------------------
145
146/// The [`Stream`] returned by `Parse::parse_stream()`.
147// TODO: Make private, using a newer version of Rust that supports RPIT.
148pub struct ParseStream<P: Parse, I: Stream> {
149 /// The parsing function.
150 parse: P,
151
152 /// The input stream.
153 input: Context<I>,
154}
155
156impl<P: Parse, I: Stream> Stream for ParseStream<P, I> {
157 fn read(&mut self) -> Token {
158 let ret = self.parse.parse(&mut self.input);
159 let last = self.input.last();
160 let all = self.input.drain();
161 let loc = if ret.is_ok() { all } else { last };
162 Token(Loc(ret, loc))
163 }
164}