mago_syntax/parser/
stream.rs1use std::collections::VecDeque;
2use std::fmt::Debug;
3
4use bumpalo::Bump;
5use bumpalo::collections::CollectIn;
6use bumpalo::collections::Vec;
7
8use mago_database::file::HasFileId;
9use mago_span::Position;
10use mago_span::Span;
11
12use crate::ast::sequence::Sequence;
13use crate::ast::trivia::Trivia;
14use crate::ast::trivia::TriviaKind;
15use crate::error::ParseError;
16use crate::error::SyntaxError;
17use crate::lexer::Lexer;
18use crate::token::Token;
19use crate::token::TokenKind;
20
21#[derive(Debug)]
22pub struct TokenStream<'input, 'arena> {
23 arena: &'arena Bump,
24 lexer: Lexer<'input>,
25 buffer: VecDeque<Token<'input>>,
26 trivia: Vec<'arena, Token<'input>>,
27 position: Position,
28}
29
30impl<'input, 'arena> TokenStream<'input, 'arena> {
31 const BUFFER_INITIAL_CAPACITY: usize = 8;
33
34 pub fn new(arena: &'arena Bump, lexer: Lexer<'input>) -> TokenStream<'input, 'arena> {
35 let position = lexer.current_position();
36
37 TokenStream {
38 arena,
39 lexer,
40 buffer: VecDeque::with_capacity(Self::BUFFER_INITIAL_CAPACITY),
41 trivia: Vec::new_in(arena),
42 position,
43 }
44 }
45
46 #[inline]
51 pub const fn current_position(&self) -> Position {
52 self.position
53 }
54
55 #[inline]
56 pub fn has_reached_eof(&mut self) -> Result<bool, SyntaxError> {
57 Ok(self.fill_buffer(1)?.is_none())
58 }
59
60 #[inline]
64 pub fn consume(&mut self) -> Result<Token<'input>, ParseError> {
65 match self.advance() {
66 Some(Ok(token)) => Ok(token),
67 Some(Err(error)) => Err(error.into()),
68 None => Err(self.unexpected(None, &[])),
69 }
70 }
71
72 #[inline]
76 pub fn eat(&mut self, kind: TokenKind) -> Result<Token<'input>, ParseError> {
77 let current_kind = self.peek_kind(0)?;
79 match current_kind {
80 Some(k) if k == kind => self.consume(),
81 Some(_) => {
82 let token = self.lookahead(0)?.unwrap();
83
84 Err(self.unexpected(Some(token), &[kind]))
85 }
86 None => Err(self.unexpected(None, &[kind])),
87 }
88 }
89
90 #[inline]
94 pub fn consume_span(&mut self) -> Result<Span, ParseError> {
95 let file_id = self.file_id();
96 self.consume().map(|t| t.span_for(file_id))
97 }
98
99 #[inline]
103 pub fn eat_span(&mut self, kind: TokenKind) -> Result<Span, ParseError> {
104 let file_id = self.file_id();
105 self.eat(kind).map(|t| t.span_for(file_id))
106 }
107
108 #[inline]
116 pub fn advance(&mut self) -> Option<Result<Token<'input>, SyntaxError>> {
117 match self.fill_buffer(1) {
118 Ok(Some(_)) => {
119 if let Some(token) = self.buffer.pop_front() {
120 self.position = Position::new(token.start.offset + token.value.len() as u32);
122 Some(Ok(token))
123 } else {
124 None
125 }
126 }
127 Ok(None) => None,
128 Err(error) => Some(Err(error)),
129 }
130 }
131
132 #[inline]
136 pub fn is_at(&mut self, kind: TokenKind) -> Result<bool, ParseError> {
137 Ok(self.peek_kind(0)? == Some(kind))
138 }
139
140 #[inline]
144 pub fn lookahead(&mut self, n: usize) -> Result<Option<Token<'input>>, ParseError> {
145 match self.fill_buffer(n + 1) {
146 Ok(Some(_)) => Ok(self.buffer.get(n).copied()),
147 Ok(None) => Ok(None),
148 Err(error) => Err(error.into()),
149 }
150 }
151
152 #[inline]
157 pub fn peek_kind(&mut self, n: usize) -> Result<Option<TokenKind>, ParseError> {
158 match self.fill_buffer(n + 1) {
159 Ok(Some(_)) => Ok(self.buffer.get(n).map(|t| t.kind)),
160 Ok(None) => Ok(None),
161 Err(error) => Err(error.into()),
162 }
163 }
164
165 #[inline]
167 pub fn unexpected(&self, found: Option<Token<'_>>, expected: &[TokenKind]) -> ParseError {
168 let expected_kinds: Box<[TokenKind]> = expected.into();
169 if let Some(token) = found {
170 ParseError::UnexpectedToken(expected_kinds, token.kind, token.span_for(self.file_id()))
171 } else {
172 ParseError::UnexpectedEndOfFile(expected_kinds, self.file_id(), self.current_position())
173 }
174 }
175
176 #[inline]
178 pub fn get_trivia(&mut self) -> Sequence<'arena, Trivia<'arena>> {
179 let mut tokens = Vec::new_in(self.arena);
180 std::mem::swap(&mut self.trivia, &mut tokens);
181
182 let file_id = self.file_id();
183 Sequence::new(
184 tokens
185 .into_iter()
186 .map(|token| {
187 let span = token.span_for(file_id);
188 match token.kind {
189 TokenKind::Whitespace => Trivia { kind: TriviaKind::WhiteSpace, span, value: token.value },
190 TokenKind::HashComment => Trivia { kind: TriviaKind::HashComment, span, value: token.value },
191 TokenKind::SingleLineComment => {
192 Trivia { kind: TriviaKind::SingleLineComment, span, value: token.value }
193 }
194 TokenKind::MultiLineComment => {
195 Trivia { kind: TriviaKind::MultiLineComment, span, value: token.value }
196 }
197 TokenKind::DocBlockComment => {
198 Trivia { kind: TriviaKind::DocBlockComment, span, value: token.value }
199 }
200 _ => unreachable!(),
201 }
202 })
203 .collect_in(self.arena),
204 )
205 }
206
207 #[inline]
211 fn fill_buffer(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
212 if self.buffer.len() >= n {
213 return Ok(Some(n));
214 }
215
216 self.fill_buffer_slow(n)
217 }
218
219 #[inline(never)]
220 fn fill_buffer_slow(&mut self, n: usize) -> Result<Option<usize>, SyntaxError> {
221 while self.buffer.len() < n {
222 match self.lexer.advance() {
223 Some(result) => match result {
224 Ok(token) => {
225 if token.kind.is_trivia() {
226 self.trivia.push(token);
227 continue;
228 }
229 self.buffer.push_back(token);
230 }
231 Err(error) => return Err(error),
232 },
233 None => return Ok(None),
234 }
235 }
236
237 Ok(Some(n))
238 }
239}
240
241impl HasFileId for TokenStream<'_, '_> {
242 fn file_id(&self) -> mago_database::file::FileId {
243 self.lexer.file_id()
244 }
245}