mago_syntax/parser/
stream.rs1use std::fmt::Debug;
2
3use bumpalo::Bump;
4use bumpalo::collections::CollectIn;
5use bumpalo::collections::Vec;
6
7use mago_database::file::FileId;
8use mago_database::file::HasFileId;
9use mago_span::Position;
10use mago_span::Span;
11use mago_syntax_core::parser::LookaheadBuf;
12
13use crate::ast::sequence::Sequence;
14use crate::ast::trivia::Trivia;
15use crate::ast::trivia::TriviaKind;
16use crate::error::ParseError;
17use crate::error::SyntaxError;
18use crate::lexer::Lexer;
19use crate::token::Token;
20use crate::token::TokenKind;
21
22#[derive(Debug)]
23pub struct TokenStream<'input, 'arena> {
24 arena: &'arena Bump,
25 lexer: Lexer<'input>,
26 buffer: LookaheadBuf<Token<'input>, 16>,
27 trivia: Vec<'arena, Token<'input>>,
28 position: Position,
29 file_id: FileId,
30}
31
32impl<'input, 'arena> TokenStream<'input, 'arena> {
33 pub fn new(arena: &'arena Bump, lexer: Lexer<'input>) -> TokenStream<'input, 'arena> {
34 let position = lexer.current_position();
35 let file_id_cached = lexer.file_id();
36
37 TokenStream {
38 arena,
39 lexer,
40 buffer: LookaheadBuf::new(),
41 trivia: Vec::new_in(arena),
42 position,
43 file_id: file_id_cached,
44 }
45 }
46
47 #[inline]
52 #[must_use]
53 pub const fn current_position(&self) -> Position {
54 self.position
55 }
56
57 #[inline]
63 pub fn has_reached_eof(&mut self) -> Result<bool, SyntaxError> {
64 Ok(self.fill_buffer(1)?.is_none())
65 }
66
67 #[inline]
73 pub fn consume(&mut self) -> Result<Token<'input>, ParseError> {
74 match self.advance() {
75 Some(Ok(token)) => Ok(token),
76 Some(Err(error)) => Err(error.into()),
77 None => Err(self.unexpected(None, &[])),
78 }
79 }
80
81 #[inline]
89 pub fn eat(&mut self, kind: TokenKind) -> Result<Token<'input>, ParseError> {
90 if let Some(token) = self.buffer.get(0) {
94 if token.kind == kind {
95 let _ = self.buffer.pop_front();
96
97 self.position = Position::new(token.start.offset + token.value.len() as u32);
98 return Ok(token);
99 }
100
101 return Err(self.unexpected(Some(token), &[kind]));
102 }
103
104 let current_kind = self.peek_kind(0)?;
106 match current_kind {
107 Some(k) if k == kind => self.consume(),
108 Some(_) => {
109 match self.lookahead(0)? {
113 Some(token) => Err(self.unexpected(Some(token), &[kind])),
114 None => Err(self.unexpected(None, &[kind])),
115 }
116 }
117 None => Err(self.unexpected(None, &[kind])),
118 }
119 }
120
121 #[inline]
129 pub fn consume_span(&mut self) -> Result<Span, ParseError> {
130 let file_id = self.file_id();
131 self.consume().map(|t| t.span_for(file_id))
132 }
133
134 #[inline]
142 pub fn eat_span(&mut self, kind: TokenKind) -> Result<Span, ParseError> {
143 let file_id = self.file_id();
144 self.eat(kind).map(|t| t.span_for(file_id))
145 }
146
147 #[inline]
155 pub fn advance(&mut self) -> Option<Result<Token<'input>, SyntaxError>> {
156 match self.fill_buffer(1) {
157 Ok(Some(_)) => {
158 if let Some(token) = self.buffer.pop_front() {
159 self.position = Position::new(token.start.offset + token.value.len() as u32);
161 Some(Ok(token))
162 } else {
163 None
164 }
165 }
166 Ok(None) => None,
167 Err(error) => Some(Err(error)),
168 }
169 }
170
171 #[inline]
179 pub fn is_at(&mut self, kind: TokenKind) -> Result<bool, ParseError> {
180 if let Some(token) = self.buffer.get(0) {
181 return Ok(token.kind == kind);
182 }
183
184 Ok(self.peek_kind(0)? == Some(kind))
185 }
186
187 #[inline]
195 pub fn lookahead(&mut self, n: usize) -> Result<Option<Token<'input>>, ParseError> {
196 if n < self.buffer.len() {
197 return Ok(self.buffer.get(n));
198 }
199
200 match self.fill_buffer(n + 1) {
201 Ok(Some(_)) => Ok(self.buffer.get(n)),
202 Ok(None) => Ok(None),
203 Err(error) => Err(error.into()),
204 }
205 }
206
207 #[inline]
216 pub fn peek_kind(&mut self, n: usize) -> Result<Option<TokenKind>, ParseError> {
217 if n < self.buffer.len() {
218 return Ok(self.buffer.get(n).map(|t| t.kind));
219 }
220
221 match self.fill_buffer(n + 1) {
222 Ok(Some(_)) => Ok(self.buffer.get(n).map(|t| t.kind)),
223 Ok(None) => Ok(None),
224 Err(error) => Err(error.into()),
225 }
226 }
227
228 #[inline]
230 #[must_use]
231 pub fn unexpected(&self, found: Option<Token<'_>>, expected: &[TokenKind]) -> ParseError {
232 let expected_kinds: Box<[TokenKind]> = expected.into();
233 if let Some(token) = found {
234 ParseError::UnexpectedToken(expected_kinds, token.kind, token.span_for(self.file_id()))
235 } else {
236 ParseError::UnexpectedEndOfFile(expected_kinds, self.file_id(), self.current_position())
237 }
238 }
239
240 #[inline]
242 pub fn get_trivia(&mut self) -> Sequence<'arena, Trivia<'arena>> {
243 let mut tokens = Vec::new_in(self.arena);
244 std::mem::swap(&mut self.trivia, &mut tokens);
245
246 let file_id = self.file_id();
247 Sequence::new(
248 tokens
249 .into_iter()
250 .filter_map(|token| {
251 let span = token.span_for(file_id);
252 let kind = match token.kind {
253 TokenKind::Whitespace => TriviaKind::WhiteSpace,
254 TokenKind::HashComment => TriviaKind::HashComment,
255 TokenKind::SingleLineComment => TriviaKind::SingleLineComment,
256 TokenKind::MultiLineComment => TriviaKind::MultiLineComment,
257 TokenKind::DocBlockComment => TriviaKind::DocBlockComment,
258 _ => return None,
262 };
263 Some(Trivia { kind, span, value: token.value })
264 })
265 .collect_in(self.arena),
266 )
267 }
268
269 #[inline]
273 fn fill_buffer(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
274 if self.buffer.len() >= n {
275 return Ok(Some(n));
276 }
277
278 self.fill_buffer_slow(n)
279 }
280
281 #[inline(never)]
282 fn fill_buffer_slow(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
283 while self.buffer.len() < n {
284 match self.lexer.advance() {
285 Some(result) => {
286 let token = result?;
287 if token.kind.is_trivia() {
288 self.trivia.push(token);
289 continue;
290 }
291 self.buffer.push_back(token);
292 }
293 None => return Ok(None),
294 }
295 }
296
297 Ok(Some(n))
298 }
299}
300
301impl HasFileId for TokenStream<'_, '_> {
302 #[inline]
303 fn file_id(&self) -> FileId {
304 self.file_id
305 }
306}