Skip to main content

mago_syntax/parser/
stream.rs

1use std::fmt::Debug;
2
3use bumpalo::Bump;
4use bumpalo::collections::CollectIn;
5use bumpalo::collections::Vec;
6
7use mago_database::file::FileId;
8use mago_database::file::HasFileId;
9use mago_span::Position;
10use mago_span::Span;
11use mago_syntax_core::parser::LookaheadBuf;
12
13use crate::ast::sequence::Sequence;
14use crate::ast::trivia::Trivia;
15use crate::ast::trivia::TriviaKind;
16use crate::error::Expected;
17use crate::error::ParseError;
18use crate::error::SyntaxError;
19use crate::lexer::Lexer;
20use crate::token::Token;
21use crate::token::TokenKind;
22
23#[derive(Debug)]
24pub struct TokenStream<'input, 'arena> {
25    arena: &'arena Bump,
26    lexer: Lexer<'input>,
27    buffer: LookaheadBuf<Token<'input>, 16>,
28    trivia: Vec<'arena, Token<'input>>,
29    position: Position,
30    file_id: FileId,
31}
32
33impl<'input, 'arena> TokenStream<'input, 'arena> {
34    pub fn new(arena: &'arena Bump, lexer: Lexer<'input>) -> TokenStream<'input, 'arena> {
35        let position = lexer.current_position();
36        let file_id_cached = lexer.file_id();
37
38        TokenStream {
39            arena,
40            lexer,
41            buffer: LookaheadBuf::new(),
42            trivia: Vec::new_in(arena),
43            position,
44            file_id: file_id_cached,
45        }
46    }
47
48    /// Returns the current position of the stream within the source file.
49    ///
50    /// This position represents the end location of the most recently
51    /// consumed significant token via `advance()` or `consume()`.
52    #[inline]
53    #[must_use]
54    pub const fn current_position(&self) -> Position {
55        self.position
56    }
57
58    /// Returns whether the stream has consumed all tokens up to EOF.
59    ///
60    /// # Errors
61    ///
62    /// Returns a [`SyntaxError`] if the lexer fails to produce the next token.
63    #[inline]
64    pub fn has_reached_eof(&mut self) -> Result<bool, SyntaxError> {
65        Ok(self.fill_buffer(1)?.is_none())
66    }
67
68    /// Consumes and returns the next significant token.
69    ///
70    /// # Errors
71    ///
72    /// Returns a [`ParseError`] if EOF is reached or a lexer error occurs.
73    #[inline]
74    pub fn consume(&mut self) -> Result<Token<'input>, ParseError> {
75        match self.advance() {
76            Some(Ok(token)) => Ok(token),
77            Some(Err(error)) => Err(error.into()),
78            None => Err(self.unexpected(None, &[])),
79        }
80    }
81
82    /// Consumes the next token only if it matches the expected kind.
83    ///
84    /// Returns the token if it matches, otherwise returns an error.
85    ///
86    /// # Errors
87    ///
88    /// Returns a [`ParseError`] if the next token's kind does not match `kind`, or if EOF is reached.
89    #[inline]
90    pub fn eat(&mut self, kind: TokenKind) -> Result<Token<'input>, ParseError> {
91        // Fast path: head already buffered. Avoids the Result<Option<...>>
92        // round trip from `peek_kind` plus a follow-up `lookahead` on the
93        // happy path.
94        if let Some(token) = self.buffer.get(0) {
95            if token.kind == kind {
96                let _ = self.buffer.pop_front();
97
98                self.position = Position::new(token.start.offset + token.value.len() as u32);
99                return Ok(token);
100            }
101
102            return Err(self.unexpected_kind(Some(token), kind));
103        }
104
105        // Slow path: buffer empty, fill it.
106        let current_kind = self.peek_kind(0)?;
107        match current_kind {
108            Some(k) if k == kind => self.consume(),
109            Some(_) => match self.lookahead(0)? {
110                Some(token) => Err(self.unexpected_kind(Some(token), kind)),
111                None => Err(self.unexpected_kind(None, kind)),
112            },
113            None => Err(self.unexpected_kind(None, kind)),
114        }
115    }
116
117    /// Consumes and returns the span of the next significant token.
118    ///
119    /// This is a convenience method equivalent to `consume()?.span_for(file_id())`.
120    ///
121    /// # Errors
122    ///
123    /// Returns a [`ParseError`] if EOF is reached or a lexer error occurs.
124    #[inline]
125    pub fn consume_span(&mut self) -> Result<Span, ParseError> {
126        let file_id = self.file_id();
127        self.consume().map(|t| t.span_for(file_id))
128    }
129
130    /// Consumes the next token only if it matches the expected kind, returning its span.
131    ///
132    /// This is a convenience method equivalent to `eat(kind)?.span_for(file_id())`.
133    ///
134    /// # Errors
135    ///
136    /// Returns a [`ParseError`] if the next token's kind does not match `kind`, or if EOF is reached.
137    #[inline]
138    pub fn eat_span(&mut self, kind: TokenKind) -> Result<Span, ParseError> {
139        let file_id = self.file_id();
140        self.eat(kind).map(|t| t.span_for(file_id))
141    }
142
143    /// Advances the stream to the next token in the input source code and returns it.
144    ///
145    /// If the stream has already read the entire input source code, this method will return `None`.
146    ///
147    /// # Returns
148    ///
149    /// The next token in the input source code, or `None` if the lexer has reached the end of the input.
150    #[inline]
151    pub fn advance(&mut self) -> Option<Result<Token<'input>, SyntaxError>> {
152        match self.fill_buffer(1) {
153            Ok(Some(_)) => {
154                if let Some(token) = self.buffer.pop_front() {
155                    // Compute end position from start + value length
156                    self.position = Position::new(token.start.offset + token.value.len() as u32);
157                    Some(Ok(token))
158                } else {
159                    None
160                }
161            }
162            Ok(None) => None,
163            Err(error) => Some(Err(error)),
164        }
165    }
166
167    /// Checks if the next token matches the given kind without consuming it.
168    ///
169    /// Returns `false` if at EOF.
170    ///
171    /// # Errors
172    ///
173    /// Returns a [`ParseError`] if the lexer fails to produce the next token.
174    #[inline]
175    pub fn is_at(&mut self, kind: TokenKind) -> Result<bool, ParseError> {
176        if let Some(token) = self.buffer.get(0) {
177            return Ok(token.kind == kind);
178        }
179
180        Ok(self.peek_kind(0)? == Some(kind))
181    }
182
183    /// Peeks at the nth (0-indexed) significant token ahead without consuming it.
184    ///
185    /// Returns `Ok(None)` if EOF is reached before the nth token.
186    ///
187    /// # Errors
188    ///
189    /// Returns a [`ParseError`] if the lexer fails to produce a token while filling the lookahead buffer.
190    #[inline]
191    pub fn lookahead(&mut self, n: usize) -> Result<Option<Token<'input>>, ParseError> {
192        if n < self.buffer.len() {
193            return Ok(self.buffer.get(n));
194        }
195
196        match self.fill_buffer(n + 1) {
197            Ok(Some(_)) => Ok(self.buffer.get(n)),
198            Ok(None) => Ok(None),
199            Err(error) => Err(error.into()),
200        }
201    }
202
203    /// Peeks at the kind of the nth (0-indexed) significant token ahead.
204    ///
205    /// More efficient than `lookahead(n)?.map(|t| t.kind)` as it avoids
206    /// copying the full token when only the kind is needed.
207    ///
208    /// # Errors
209    ///
210    /// Returns a [`ParseError`] if the lexer fails to produce a token while filling the lookahead buffer.
211    #[inline]
212    pub fn peek_kind(&mut self, n: usize) -> Result<Option<TokenKind>, ParseError> {
213        if n < self.buffer.len() {
214            return Ok(self.buffer.get(n).map(|t| t.kind));
215        }
216
217        match self.fill_buffer(n + 1) {
218            Ok(Some(_)) => Ok(self.buffer.get(n).map(|t| t.kind)),
219            Ok(None) => Ok(None),
220            Err(error) => Err(error.into()),
221        }
222    }
223
224    /// Creates a `ParseError` for an unexpected token or EOF, given one or more expected kinds.
225    #[inline]
226    #[must_use]
227    pub fn unexpected(&self, found: Option<Token<'_>>, expected: &'static [TokenKind]) -> ParseError {
228        self.unexpected_with(found, Expected::OneOf(expected))
229    }
230
231    /// Creates a `ParseError` for an unexpected token or EOF when a single, runtime-known kind was expected.
232    #[inline]
233    #[must_use]
234    pub fn unexpected_kind(&self, found: Option<Token<'_>>, expected: TokenKind) -> ParseError {
235        self.unexpected_with(found, Expected::Exactly(expected))
236    }
237
238    #[inline]
239    #[must_use]
240    fn unexpected_with(&self, found: Option<Token<'_>>, expected: Expected) -> ParseError {
241        if let Some(token) = found {
242            ParseError::UnexpectedToken(expected, token.kind, token.span_for(self.file_id()))
243        } else {
244            ParseError::UnexpectedEndOfFile(expected, self.file_id(), self.current_position())
245        }
246    }
247
248    /// Consumes the comments collected by the lexer and returns them.
249    #[inline]
250    pub fn get_trivia(&mut self) -> Sequence<'arena, Trivia<'arena>> {
251        let mut tokens = Vec::new_in(self.arena);
252        std::mem::swap(&mut self.trivia, &mut tokens);
253
254        let file_id = self.file_id();
255        Sequence::new(
256            tokens
257                .into_iter()
258                .filter_map(|token| {
259                    let span = token.span_for(file_id);
260                    let kind = match token.kind {
261                        TokenKind::Whitespace => TriviaKind::WhiteSpace,
262                        TokenKind::HashComment => TriviaKind::HashComment,
263                        TokenKind::SingleLineComment => TriviaKind::SingleLineComment,
264                        TokenKind::MultiLineComment => TriviaKind::MultiLineComment,
265                        TokenKind::DocBlockComment => TriviaKind::DocBlockComment,
266                        // Tokens collected into `self.trivia` are guaranteed by `fill_buffer_slow`
267                        // to satisfy `kind.is_trivia()`; any non-trivia kind here is a parser bug
268                        // and the safe response is to drop it rather than panic.
269                        _ => return None,
270                    };
271                    Some(Trivia { kind, span, value: token.value })
272                })
273                .collect_in(self.arena),
274        )
275    }
276
277    /// Fills the token buffer until at least `n` tokens are available, unless the lexer returns EOF.
278    ///
279    /// Trivia tokens are collected separately and are not stored in the main token buffer.
280    #[inline]
281    fn fill_buffer(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
282        if self.buffer.len() >= n {
283            return Ok(Some(n));
284        }
285
286        self.fill_buffer_slow(n)
287    }
288
289    #[inline(never)]
290    fn fill_buffer_slow(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
291        while self.buffer.len() < n {
292            match self.lexer.advance() {
293                Some(result) => {
294                    let token = result?;
295                    if token.kind.is_trivia() {
296                        self.trivia.push(token);
297                        continue;
298                    }
299                    self.buffer.push_back(token);
300                }
301                None => return Ok(None),
302            }
303        }
304
305        Ok(Some(n))
306    }
307}
308
309impl HasFileId for TokenStream<'_, '_> {
310    #[inline]
311    fn file_id(&self) -> FileId {
312        self.file_id
313    }
314}