Skip to main content

mago_syntax/parser/
stream.rs

1use std::fmt::Debug;
2
3use bumpalo::Bump;
4use bumpalo::collections::CollectIn;
5use bumpalo::collections::Vec;
6
7use mago_database::file::FileId;
8use mago_database::file::HasFileId;
9use mago_span::Position;
10use mago_span::Span;
11use mago_syntax_core::parser::LookaheadBuf;
12
13use crate::ast::sequence::Sequence;
14use crate::ast::trivia::Trivia;
15use crate::ast::trivia::TriviaKind;
16use crate::error::ParseError;
17use crate::error::SyntaxError;
18use crate::lexer::Lexer;
19use crate::token::Token;
20use crate::token::TokenKind;
21
22#[derive(Debug)]
23pub struct TokenStream<'input, 'arena> {
24    arena: &'arena Bump,
25    lexer: Lexer<'input>,
26    buffer: LookaheadBuf<Token<'input>, 16>,
27    trivia: Vec<'arena, Token<'input>>,
28    position: Position,
29    file_id: FileId,
30}
31
32impl<'input, 'arena> TokenStream<'input, 'arena> {
33    pub fn new(arena: &'arena Bump, lexer: Lexer<'input>) -> TokenStream<'input, 'arena> {
34        let position = lexer.current_position();
35        let file_id_cached = lexer.file_id();
36
37        TokenStream {
38            arena,
39            lexer,
40            buffer: LookaheadBuf::new(),
41            trivia: Vec::new_in(arena),
42            position,
43            file_id: file_id_cached,
44        }
45    }
46
47    /// Returns the current position of the stream within the source file.
48    ///
49    /// This position represents the end location of the most recently
50    /// consumed significant token via `advance()` or `consume()`.
51    #[inline]
52    pub const fn current_position(&self) -> Position {
53        self.position
54    }
55
56    #[inline]
57    pub fn has_reached_eof(&mut self) -> Result<bool, SyntaxError> {
58        Ok(self.fill_buffer(1)?.is_none())
59    }
60
61    /// Consumes and returns the next significant token.
62    ///
63    /// Returns an error if EOF is reached or a lexer error occurs.
64    #[inline]
65    pub fn consume(&mut self) -> Result<Token<'input>, ParseError> {
66        match self.advance() {
67            Some(Ok(token)) => Ok(token),
68            Some(Err(error)) => Err(error.into()),
69            None => Err(self.unexpected(None, &[])),
70        }
71    }
72
73    /// Consumes the next token only if it matches the expected kind.
74    ///
75    /// Returns the token if it matches, otherwise returns an error.
76    #[inline]
77    pub fn eat(&mut self, kind: TokenKind) -> Result<Token<'input>, ParseError> {
78        // Fast path: head already buffered. Avoids the Result<Option<...>>
79        // round trip from `peek_kind` plus a follow-up `lookahead` on the
80        // happy path.
81        if let Some(token) = self.buffer.get(0) {
82            if token.kind == kind {
83                let _ = self.buffer.pop_front();
84
85                self.position = Position::new(token.start.offset + token.value.len() as u32);
86                return Ok(token);
87            }
88
89            return Err(self.unexpected(Some(token), &[kind]));
90        }
91
92        // Slow path: buffer empty, fill it.
93        let current_kind = self.peek_kind(0)?;
94        match current_kind {
95            Some(k) if k == kind => self.consume(),
96            Some(_) => {
97                let token = self.lookahead(0)?.unwrap();
98                Err(self.unexpected(Some(token), &[kind]))
99            }
100            None => Err(self.unexpected(None, &[kind])),
101        }
102    }
103
104    /// Consumes and returns the span of the next significant token.
105    ///
106    /// This is a convenience method equivalent to `consume()?.span_for(file_id())`.
107    #[inline]
108    pub fn consume_span(&mut self) -> Result<Span, ParseError> {
109        let file_id = self.file_id();
110        self.consume().map(|t| t.span_for(file_id))
111    }
112
113    /// Consumes the next token only if it matches the expected kind, returning its span.
114    ///
115    /// This is a convenience method equivalent to `eat(kind)?.span_for(file_id())`.
116    #[inline]
117    pub fn eat_span(&mut self, kind: TokenKind) -> Result<Span, ParseError> {
118        let file_id = self.file_id();
119        self.eat(kind).map(|t| t.span_for(file_id))
120    }
121
122    /// Advances the stream to the next token in the input source code and returns it.
123    ///
124    /// If the stream has already read the entire input source code, this method will return `None`.
125    ///
126    /// # Returns
127    ///
128    /// The next token in the input source code, or `None` if the lexer has reached the end of the input.
129    #[inline]
130    pub fn advance(&mut self) -> Option<Result<Token<'input>, SyntaxError>> {
131        match self.fill_buffer(1) {
132            Ok(Some(_)) => {
133                if let Some(token) = self.buffer.pop_front() {
134                    // Compute end position from start + value length
135                    self.position = Position::new(token.start.offset + token.value.len() as u32);
136                    Some(Ok(token))
137                } else {
138                    None
139                }
140            }
141            Ok(None) => None,
142            Err(error) => Some(Err(error)),
143        }
144    }
145
146    /// Checks if the next token matches the given kind without consuming it.
147    ///
148    /// Returns `false` if at EOF.
149    #[inline]
150    pub fn is_at(&mut self, kind: TokenKind) -> Result<bool, ParseError> {
151        if let Some(token) = self.buffer.get(0) {
152            return Ok(token.kind == kind);
153        }
154
155        Ok(self.peek_kind(0)? == Some(kind))
156    }
157
158    /// Peeks at the nth (0-indexed) significant token ahead without consuming it.
159    ///
160    /// Returns `Ok(None)` if EOF is reached before the nth token.
161    #[inline]
162    pub fn lookahead(&mut self, n: usize) -> Result<Option<Token<'input>>, ParseError> {
163        if n < self.buffer.len() {
164            return Ok(self.buffer.get(n));
165        }
166
167        match self.fill_buffer(n + 1) {
168            Ok(Some(_)) => Ok(self.buffer.get(n)),
169            Ok(None) => Ok(None),
170            Err(error) => Err(error.into()),
171        }
172    }
173
174    /// Peeks at the kind of the nth (0-indexed) significant token ahead.
175    ///
176    /// More efficient than `lookahead(n)?.map(|t| t.kind)` as it avoids
177    /// copying the full token when only the kind is needed.
178    #[inline]
179    pub fn peek_kind(&mut self, n: usize) -> Result<Option<TokenKind>, ParseError> {
180        if n < self.buffer.len() {
181            return Ok(self.buffer.get(n).map(|t| t.kind));
182        }
183
184        match self.fill_buffer(n + 1) {
185            Ok(Some(_)) => Ok(self.buffer.get(n).map(|t| t.kind)),
186            Ok(None) => Ok(None),
187            Err(error) => Err(error.into()),
188        }
189    }
190
191    /// Creates a `ParseError` for an unexpected token or EOF.
192    #[inline]
193    pub fn unexpected(&self, found: Option<Token<'_>>, expected: &[TokenKind]) -> ParseError {
194        let expected_kinds: Box<[TokenKind]> = expected.into();
195        if let Some(token) = found {
196            ParseError::UnexpectedToken(expected_kinds, token.kind, token.span_for(self.file_id()))
197        } else {
198            ParseError::UnexpectedEndOfFile(expected_kinds, self.file_id(), self.current_position())
199        }
200    }
201
202    /// Consumes the comments collected by the lexer and returns them.
203    #[inline]
204    pub fn get_trivia(&mut self) -> Sequence<'arena, Trivia<'arena>> {
205        let mut tokens = Vec::new_in(self.arena);
206        std::mem::swap(&mut self.trivia, &mut tokens);
207
208        let file_id = self.file_id();
209        Sequence::new(
210            tokens
211                .into_iter()
212                .map(|token| {
213                    let span = token.span_for(file_id);
214                    match token.kind {
215                        TokenKind::Whitespace => Trivia { kind: TriviaKind::WhiteSpace, span, value: token.value },
216                        TokenKind::HashComment => Trivia { kind: TriviaKind::HashComment, span, value: token.value },
217                        TokenKind::SingleLineComment => {
218                            Trivia { kind: TriviaKind::SingleLineComment, span, value: token.value }
219                        }
220                        TokenKind::MultiLineComment => {
221                            Trivia { kind: TriviaKind::MultiLineComment, span, value: token.value }
222                        }
223                        TokenKind::DocBlockComment => {
224                            Trivia { kind: TriviaKind::DocBlockComment, span, value: token.value }
225                        }
226                        _ => unreachable!(),
227                    }
228                })
229                .collect_in(self.arena),
230        )
231    }
232
233    /// Fills the token buffer until at least `n` tokens are available, unless the lexer returns EOF.
234    ///
235    /// Trivia tokens are collected separately and are not stored in the main token buffer.
236    #[inline]
237    fn fill_buffer(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
238        if self.buffer.len() >= n {
239            return Ok(Some(n));
240        }
241
242        self.fill_buffer_slow(n)
243    }
244
245    #[inline(never)]
246    fn fill_buffer_slow(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
247        while self.buffer.len() < n {
248            match self.lexer.advance() {
249                Some(result) => {
250                    let token = result?;
251                    if token.kind.is_trivia() {
252                        self.trivia.push(token);
253                        continue;
254                    }
255                    self.buffer.push_back(token);
256                }
257                None => return Ok(None),
258            }
259        }
260
261        Ok(Some(n))
262    }
263}
264
265impl HasFileId for TokenStream<'_, '_> {
266    #[inline]
267    fn file_id(&self) -> FileId {
268        self.file_id
269    }
270}