1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
//!
//! [AST]: crate::ast
//! [Token]: crate::lexer::token::Token
use error::{ParserError, ParserErrorKind};
use super::lexer::Lexer;
use crate::{
lexer::token::{Token, TokenTy},
source_tracking::fragment::Fragment,
};
use std::collections::VecDeque;
mod decl;
pub mod error;
mod identifier;
mod literal;
mod path;
pub mod whitespace;
/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
#[derive(Debug)]
pub struct Parser {
lexer: Lexer,
lookahead: VecDeque<Token>,
}
impl Parser {
/// Construct a new parser around a given [Lexer].
pub fn new(lexer: Lexer) -> Self {
Parser {
lexer,
lookahead: VecDeque::new(),
}
}
/// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
///
/// Skips any non-document comments encountered via the lexer implementation.
///
/// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {
let token = self
.lookahead
.pop_front()
.or_else(|| self.lexer.next_token());
// Check for unknown tokens, which should always convert to an error.
match token {
Some(Token {
variant: TokenTy::Unknown,
fragment,
}) => Err(ParserErrorKind::EncounteredUnknownToken.at(fragment)),
known_token_or_none => Ok(known_token_or_none),
}
}
/// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
///
/// Panics
/// - If `n` is greater than the number of remaining tokens.
pub fn advance(&mut self, n: usize) {
// Add tokens to the lookahead buffer until we have enough to split off.
while self.lookahead.len() < n {
let token = self
.lexer
.next_token()
.expect("advance: `n` <= number of remaining tokens");
self.lookahead.push_back(token);
}
// Split them off.
self.lookahead = self.lookahead.split_off(n);
}
/// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
pub fn peek(&mut self) -> Option<&Token> {
if self.lookahead.is_empty() {
self.lookahead.push_back(self.lexer.next_token()?);
}
self.lookahead.front()
}
/// Peek the [Fragment] of the next [Token].
pub fn peek_fragment(&mut self) -> Option<&Fragment> {
self.peek().map(|token| &token.fragment)
}
/// Peek the [Fragment] of the next [Token] and clone it or return a clone of the
/// remainder [Fragment] of the internal [Lexer]
/// (which will be empty, since there wasn't a [Token] to peek).
///
/// This is likely only useful for error reporting -- a clone of a potentially empty fragment is
/// rarely ever useful otherwise.
pub fn peek_fragment_or_rest_cloned(&mut self) -> Fragment {
match self.peek() {
Some(Token { fragment, .. }) => fragment.clone(),
None => {
let rest = self.lexer.remaining.clone();
// Assert that we're making the right assumptions about the remaining fragment.
// These are (unidiomatically) done using debug_assert -- perhaps that changes eventually
// however it should be fine for now, since this can only produce logic bugs (never memory or
// concurrency bugs).
debug_assert!(rest.is_valid());
debug_assert!(rest.is_empty());
debug_assert!(rest.is_empty_at_end_of_source());
rest
}
}
}
/// Get the [Lexer] that's wrapped.
pub fn lexer(&self) -> &Lexer {
&self.lexer
}
/// Lookahead `k` [Token]s.
///
/// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
pub fn lookahead(&mut self, k: usize) -> Option<&Token> {
while self.lookahead.len() <= k {
self.lookahead.push_back(self.lexer.next_token()?);
}
self.lookahead.get(k)
}
/// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token].
///
/// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser].
pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> {
while self.lookahead.len() < n {
self.lookahead.push_back(self.lexer.next_token()?);
}
// Use make contiguous here to get a unified/single slice.
Some(&self.lookahead.make_contiguous()[..n])
}
/// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
// Peeking successfully first means that the lookahead vec will never be empty here.
(self.peek()?.variant == token_ty)
// SAFETY: We just peeked a token to check its variant so this unwrap is always ok.
.then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
}
/// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this
/// sequence of [TokenTy]s.
pub fn matches(&mut self, seq: &[TokenTy]) -> bool {
// Use the rare let-else to ensure there are at minimum, the given number of tokens remaining.
let Some(lookahead_window) = self.lookahead_window(seq.len()) else {
return false;
};
// Use a zipped iterator to compare all the token variants.
lookahead_window
.iter()
.zip(seq)
.all(|(token, matches)| token.variant == *matches)
}
}