1mod keyword;
2
3use mago_database::file::FileId;
4use mago_database::file::HasFileId;
5use mago_span::Position;
6use mago_syntax_core::float_exponent;
7use mago_syntax_core::float_separator;
8use mago_syntax_core::input::Input;
9use mago_syntax_core::number_sign;
10use mago_syntax_core::part_of_identifier;
11use mago_syntax_core::start_of_binary_number;
12use mago_syntax_core::start_of_float_number;
13use mago_syntax_core::start_of_hexadecimal_number;
14use mago_syntax_core::start_of_identifier;
15use mago_syntax_core::start_of_number;
16use mago_syntax_core::start_of_octal_number;
17use mago_syntax_core::start_of_octal_or_float_number;
18use mago_syntax_core::utils::read_digits_of_base;
19
20use crate::error::SyntaxError;
21use crate::token::TypeToken;
22use crate::token::TypeTokenKind;
23
24#[derive(Debug)]
25pub struct TypeLexer<'input> {
26 input: Input<'input>,
27}
28
29impl<'input> TypeLexer<'input> {
30 #[inline]
31 #[must_use]
32 pub fn new(input: Input<'input>) -> TypeLexer<'input> {
33 TypeLexer { input }
34 }
35
36 #[inline]
37 #[must_use]
38 pub fn has_reached_eof(&self) -> bool {
39 self.input.has_reached_eof()
40 }
41
42 #[inline]
43 #[must_use]
44 pub fn current_position(&self) -> Position {
45 self.input.current_position()
46 }
47
48 #[inline]
49 #[must_use]
50 pub fn slice_in_range(&self, from: u32, to: u32) -> &'input str {
51 let bytes_slice = self.input.slice_in_range(from, to);
52 bytes_slice.utf8_chunks().next().map_or("", |chunk| chunk.valid())
53 }
54
55 #[inline]
56 pub fn advance(&mut self) -> Option<Result<TypeToken<'input>, SyntaxError>> {
57 if self.input.has_reached_eof() {
58 return None;
59 }
60
61 let start = self.input.current_position();
62 let whitespaces = self.input.consume_whitespaces();
63 if !whitespaces.is_empty() {
64 let end = self.input.current_position();
65 return Some(Ok(self.token(TypeTokenKind::Whitespace, whitespaces, start, end)));
66 }
67
68 let (kind, length) = match self.input.read(3) {
69 [b'*', ..] => (TypeTokenKind::Asterisk, 1),
70 [b'.', b'.', b'.'] => (TypeTokenKind::Ellipsis, 3),
71 [b':', b':', ..] => (TypeTokenKind::ColonColon, 2),
72 [b'/', b'/', ..] => self.read_single_line_comment(),
73 [b'.', start_of_number!(), ..] => self.read_decimal(),
74 [start_of_number!(), ..] => self.read_number(),
75 [quote @ (b'\'' | b'"'), ..] => self.read_literal_string(*quote),
76 [b'\\', start_of_identifier!(), ..] => self.read_fully_qualified_identifier(),
77 [start_of_identifier!(), ..] => self.read_identifier_or_keyword(),
78 [b'$', start_of_identifier!(), ..] => self.read_variable(),
79 [b':', ..] => (TypeTokenKind::Colon, 1),
80 [b'=', ..] => (TypeTokenKind::Equals, 1),
81 [b'?', ..] => (TypeTokenKind::Question, 1),
82 [b'!', ..] => (TypeTokenKind::Exclamation, 1),
83 [b'&', ..] => (TypeTokenKind::Ampersand, 1),
84 [b'|', ..] => (TypeTokenKind::Pipe, 1),
85 [b'>', ..] => (TypeTokenKind::GreaterThan, 1),
86 [b'<', ..] => (TypeTokenKind::LessThan, 1),
87 [b'(', ..] => (TypeTokenKind::LeftParenthesis, 1),
88 [b')', ..] => (TypeTokenKind::RightParenthesis, 1),
89 [b'[', ..] => (TypeTokenKind::LeftBracket, 1),
90 [b']', ..] => (TypeTokenKind::RightBracket, 1),
91 [b'{', ..] => (TypeTokenKind::LeftBrace, 1),
92 [b'}', ..] => (TypeTokenKind::RightBrace, 1),
93 [b',', ..] => (TypeTokenKind::Comma, 1),
94 [b'+', ..] => (TypeTokenKind::Plus, 1),
95 [b'-', ..] => (TypeTokenKind::Minus, 1),
96 [unknown_byte, ..] => {
97 return Some(Err(SyntaxError::UnrecognizedToken(
98 self.file_id(),
99 *unknown_byte,
100 self.input.current_position(),
101 )));
102 }
103 [] => unreachable!(),
104 };
105
106 let buffer = self.input.consume(length);
107 let end = self.input.current_position();
108
109 Some(Ok(self.token(kind, buffer, start, end)))
110 }
111
112 #[inline]
113 fn read_variable(&self) -> (TypeTokenKind, usize) {
114 let mut length = 2;
115 while let [part_of_identifier!(), ..] = self.input.peek(length, 1) {
116 length += 1;
117 }
118 (TypeTokenKind::Variable, length)
119 }
120
121 #[inline]
122 fn read_single_line_comment(&self) -> (TypeTokenKind, usize) {
123 let mut length = 2;
124 loop {
125 match self.input.peek(length, 1) {
126 [b'\n', ..] | [] => break,
127 [_, ..] => length += 1,
128 }
129 }
130 (TypeTokenKind::SingleLineComment, length)
131 }
132
133 #[inline]
134 fn read_decimal(&self) -> (TypeTokenKind, usize) {
135 let mut length = read_digits_of_base(&self.input, 2, 10);
136 if let float_exponent!() = self.input.peek(length, 1) {
137 length += 1;
138 if let number_sign!() = self.input.peek(length, 1) {
139 length += 1;
140 }
141 length = read_digits_of_base(&self.input, length, 10);
142 }
143 (TypeTokenKind::LiteralFloat, length)
144 }
145
146 #[inline]
147 fn read_number(&self) -> (TypeTokenKind, usize) {
148 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
149 enum NumberKind {
150 Integer,
151 Float,
152 OctalOrFloat,
153 IntegerOrFloat,
154 }
155
156 let mut length = 1;
157 let (base, kind): (u8, NumberKind) = match self.input.read(3) {
158 start_of_binary_number!() => {
159 length += 1;
160 (2, NumberKind::Integer)
161 }
162 start_of_octal_number!() => {
163 length += 1;
164 (8, NumberKind::Integer)
165 }
166 start_of_hexadecimal_number!() => {
167 length += 1;
168 (16, NumberKind::Integer)
169 }
170 start_of_octal_or_float_number!() => (10, NumberKind::OctalOrFloat),
171 start_of_float_number!() => (10, NumberKind::Float),
172 _ => (10, NumberKind::IntegerOrFloat),
173 };
174
175 if kind != NumberKind::Float {
176 length = read_digits_of_base(&self.input, length, base);
177 if kind == NumberKind::Integer {
178 return (TypeTokenKind::LiteralInteger, length);
179 }
180 }
181
182 let is_float = matches!(self.input.peek(length, 3), float_separator!());
183 if !is_float {
184 return (TypeTokenKind::LiteralInteger, length);
185 }
186
187 if let [b'.'] = self.input.peek(length, 1) {
188 length += 1;
189 length = read_digits_of_base(&self.input, length, 10);
190 }
191
192 if let float_exponent!() = self.input.peek(length, 1) {
193 length += 1;
194 if let number_sign!() = self.input.peek(length, 1) {
195 length += 1;
196 }
197 length = read_digits_of_base(&self.input, length, 10);
198 }
199
200 (TypeTokenKind::LiteralFloat, length)
201 }
202
203 #[inline]
204 fn read_literal_string(&self, quote: u8) -> (TypeTokenKind, usize) {
205 let total = self.input.len();
206 let start = self.input.current_offset();
207 let mut length = 1;
208 let mut last_was_backslash = false;
209 let mut partial = false;
210
211 loop {
212 let pos = start + length;
213 if pos >= total {
214 partial = true;
215 break;
216 }
217
218 let byte = self.input.read_at(pos);
219 if *byte == b'\\' {
220 last_was_backslash = !last_was_backslash;
221 length += 1;
222 } else {
223 if byte == "e && !last_was_backslash {
224 length += 1;
225 break;
226 }
227 length += 1;
228 last_was_backslash = false;
229 }
230 }
231
232 if partial { (TypeTokenKind::PartialLiteralString, length) } else { (TypeTokenKind::LiteralString, length) }
233 }
234
235 #[inline]
236 fn read_fully_qualified_identifier(&self) -> (TypeTokenKind, usize) {
237 let mut length = 2;
238 let mut last_was_slash = false;
239 loop {
240 match self.input.peek(length, 1) {
241 [start_of_identifier!(), ..] if last_was_slash => {
242 length += 1;
243 last_was_slash = false;
244 }
245 [part_of_identifier!(), ..] if !last_was_slash => {
246 length += 1;
247 }
248 [b'\\', ..] => {
249 if last_was_slash {
250 length -= 1;
251 break;
252 }
253 length += 1;
254 last_was_slash = true;
255 }
256 _ => break,
257 }
258 }
259 (TypeTokenKind::FullyQualifiedIdentifier, length)
260 }
261
262 #[inline]
265 fn read_identifier_or_keyword(&self) -> (TypeTokenKind, usize) {
266 let mut length = 1;
267 let mut next_is_hyphen = false;
268 let mut next_is_backslash = false;
269
270 loop {
271 match self.input.peek(length, 2) {
272 [part_of_identifier!(), ..] => length += 1,
273 [b'-', start_of_identifier!() | part_of_identifier!(), ..] => {
274 next_is_hyphen = true;
275 break;
276 }
277 [b'\\', start_of_identifier!(), ..] => {
278 next_is_backslash = true;
279 break;
280 }
281 _ => break,
282 }
283 }
284
285 if next_is_backslash {
286 return self.finish_qualified_identifier(length);
287 }
288
289 if !next_is_hyphen {
290 let bytes = self.input.read(length);
291 if let Some(kind) = keyword::lookup_keyword(bytes) {
292 return (kind, length);
293 }
294 return (TypeTokenKind::Identifier, length);
295 }
296
297 let base_len = length;
298 loop {
299 match self.input.peek(length, 2) {
300 [part_of_identifier!(), ..] => length += 1,
301 [b'-', start_of_identifier!() | part_of_identifier!(), ..] => length += 1,
302 _ => break,
303 }
304 }
305
306 let bytes = self.input.read(length);
307 if let Some(kind) = keyword::lookup_keyword(bytes) {
308 return (kind, length);
309 }
310
311 let base_bytes = self.input.read(base_len);
312 if let Some(kind) = keyword::lookup_keyword(base_bytes) {
313 return (kind, base_len);
314 }
315
316 (TypeTokenKind::Identifier, base_len)
317 }
318
319 #[inline]
321 fn finish_qualified_identifier(&self, start_len: usize) -> (TypeTokenKind, usize) {
322 let mut length = start_len;
323 let mut slashes = 0;
324 let mut last_was_slash = false;
325
326 loop {
327 match self.input.peek(length, 1) {
328 [start_of_identifier!(), ..] if last_was_slash => {
329 length += 1;
330 last_was_slash = false;
331 }
332 [part_of_identifier!(), ..] if !last_was_slash => {
333 length += 1;
334 }
335 [b'\\', ..] => {
336 if last_was_slash {
337 length -= 1;
338 slashes -= 1;
339 break;
340 }
341 length += 1;
342 slashes += 1;
343 last_was_slash = true;
344 }
345 _ => break,
346 }
347 }
348
349 if last_was_slash {
350 length -= 1;
351 slashes -= 1;
352 }
353
354 if slashes > 0 { (TypeTokenKind::QualifiedIdentifier, length) } else { (TypeTokenKind::Identifier, length) }
355 }
356
357 #[inline]
358 fn token(&self, kind: TypeTokenKind, value: &'input [u8], start: Position, _end: Position) -> TypeToken<'input> {
359 let value_str = value.utf8_chunks().next().map_or("", |chunk| chunk.valid());
360 debug_assert_eq!(value_str.len(), value.len());
361 TypeToken { kind, start, value: value_str }
362 }
363}
364
365impl HasFileId for TypeLexer<'_> {
366 #[inline]
367 fn file_id(&self) -> FileId {
368 self.input.file_id()
369 }
370}