1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
use crate::lex::{Lexer, Set, Token, TokenSrc};
use crate::syntax::{self, Error, ErrorKind, FirstSet, FollowSet, Metadata, Result as SynResult};
use crate::{GrammarError, Lexicon, Production, Span, ToSpan};
use super::{Info, Root};
pub struct Parser<'s, L: Lexicon> {
/// The core state of the parser
info: Info<'s, L>,
/// The lexer
lexer: L::Lexer<'s>,
/// The next token
peeked: Option<Token<L>>,
}
impl<'s, L: Lexicon> Parser<'s, L> {
/// Create a new Parser for the lexicon `L`
pub fn new(source: &'s str) -> Result<Self, GrammarError> {
Ok(Self {
info: Info::new(source),
lexer: L::lexer(source)?,
peeked: None,
})
}
#[inline]
pub fn info(&self) -> &Info<'s, L> {
&self.info
}
#[inline]
pub fn info_mut(&mut self) -> &mut Info<'s, L> {
&mut self.info
}
///////////////////////////////////////////////////////////
// High-level API
///////////////////////////////////////////////////////////
// /Attempt to parse the syntax tree root once.
// ///
// /Note that if you are parsing the same root multiple times,
// /it's more efficient to use [`Parser::iter`]
#[inline]
pub fn parse<T: Root>(&mut self) -> Result<Option<T>, GrammarError>
where
T::Prod: Production<L = L>,
{
let meta = match T::metadata() {
Ok(meta) => meta,
Err(err) => return Err(err.clone()),
};
Ok(self.parse_with_meta::<T>(meta))
}
// /// Create an iterator that can be used to parse all syntax tree roots in the source
// pub fn iter<'p, R: ParseRoot>(&'p mut self) -> Result<ParserIter<'s, 'p, L, R>, GrammarError>
// where R::AST : AbstractSyntaxRoot<L=L>
// {
// ParserIter::<'s, 'p, L, R>::new(self)
// }
//
// /// Parses all roots in the source until the end, returning the results in a Vec.
// /// This is equivalent to calling `Parser::iter` and collecting the results.
// #[inline]
// pub fn parse_all<R: ParseRoot>(&mut self) -> Result<Vec<R> , GrammarError>
// where R::AST : AbstractSyntaxRoot<L=L> {
// Ok(self.iter()?.collect())
// }
fn parse_with_meta<T: Root>(&mut self, meta: &Metadata<L>) -> Option<T>
where
T::Prod: Production<L = L>,
{
match T::produce(self, meta) {
SynResult::Success(tree) => Some(tree),
SynResult::Recovered(tree, errors) => {
self.info.errors.extend(errors);
Some(tree)
}
SynResult::Panic(errors) => {
self.info.errors.extend(errors);
None
}
}
}
///////////////////////////////////////////////////////////
// Stream API
///////////////////////////////////////////////////////////
/// Parse a token of a specific type
///
/// ## Consumption
/// The parser only consumes the next token if it matches the expected type
///
/// ## Recovery
/// None
///
/// ## Panic
/// If the token is not of the expected type, the parser panics, and will not consume the
/// token.
pub fn parse_token(&mut self, ty: L) -> SynResult<Token<L>, L> {
let token = match self.peek_token() {
Some(token) if token.ty == ty => token,
_ => {
let expected = FirstSet::one(ty, None);
let error = syntax::Error::new(self.current_span(), ErrorKind::Expecting(expected));
return SynResult::Panic(vec![error]);
}
};
self.consume_token();
SynResult::Success(token)
}
/// Parse a token of a specific type, matching a specific literal
///
/// ## Consumption
/// The parser only consumes the next token if it matches the expected type and literal.
///
/// ## Recovery
/// If the next token does not match, but is in the follow set passed in,
/// the parser will insert the expected token with a 0-length span and continue.
/// This also applies to the EOF token.
///
/// ## Panic
/// The parser panics if the next token does not match and is not in the follow set.
/// The next token is not consumed.
///
pub fn parse_token_lit(
&mut self,
ty: L,
match_lit: &'static str,
follow: &FollowSet<L>,
) -> SynResult<Token<L>, L> {
let token = match self.peek_token() {
Some(token) => token,
None => {
if follow.contains_eof() {
let expecting = FirstSet::one(ty, Some(match_lit));
let token = Token::new(self.current_span(), ty);
return SynResult::Recovered(token, vec![self.expecting(expecting)]);
}
return SynResult::Panic(vec![self.unexpected_eof()]);
}
};
let token_src = self.info.to_src(&token);
if token.ty == ty && token_src.src == match_lit {
self.consume_token();
return SynResult::Success(token);
}
let expecting = FirstSet::one(ty, Some(match_lit));
if follow.contains(Some(token_src)) {
// do not consume the next token
let token = Token::new(self.empty_span(), ty);
return SynResult::Recovered(token, vec![self.expecting(expecting)]);
}
SynResult::Panic(vec![self.expecting(expecting)])
}
pub fn peek_token_src(&mut self) -> Option<TokenSrc<'s, L>> {
self.peek_token().map(|t| self.info.to_src(&t))
}
/// Create a syntax error for an unexpected end of file
pub fn unexpected_eof(&mut self) -> Error<L> {
Error::new(self.current_span(), ErrorKind::UnexpectedEof)
}
pub fn expecting(&mut self, expected: FirstSet<L>) -> Error<L> {
Error::new(self.current_span(), ErrorKind::Expecting(expected))
}
/// Consume the next token
pub fn consume_token(&mut self) -> Option<Token<L>> {
if self.peeked.is_none() {
self.try_read_next_token();
}
match self.peeked.take() {
Some(token) => {
self.info.tokens.push_unchecked(token);
Some(token)
}
None => None,
}
}
/// Return the next token in the input but does not consume it
pub fn peek_token(&mut self) -> Option<Token<L>> {
if self.peeked.is_none() {
self.try_read_next_token();
}
self.peeked
}
/// If the next token is available, read it and store it in `peeked`
fn try_read_next_token(&mut self) {
debug_assert!(self.peeked.is_none());
// read until a token that is not extract
loop {
let (invalid, token) = self.lexer.next();
let info = &mut self.info;
if let Some(span) = invalid {
info.invalid_source.push(span);
info.errors
.push(Error::new(span, ErrorKind::UnexpectedCharacters));
}
if let Some(token) = token {
if !token.ty.should_extract() {
self.peeked = Some(token);
return;
}
info.extracted_tokens.push(token);
continue;
}
return;
}
}
pub fn remaining(&mut self) -> &'s str {
let lo = self.current_span().lo;
&self.info.source[lo..]
}
/// Get the span of the lookahead token, or the EOF span if there is no more tokens
pub fn current_span(&mut self) -> Span {
self.peek_token()
.map(|t| t.span)
.unwrap_or_else(|| self.info.eof())
}
/// Get an empty span representing the current position of the parser,
/// which is the lowest span that doesn't overlap with consumed tokens
pub fn empty_span(&mut self) -> Span {
let hi = self
.info
.tokens
.last()
.map(|x| x.span.hi)
.unwrap_or_default();
Span::new(hi, hi)
}
pub fn apply_semantic<S: ToSpan>(&mut self, span: &S, semantic: Set<L>) {
self.info
.tokens
.inside_mut(span.span())
.apply_semantic(semantic);
}
}