1use mago_span::Position;
2use mago_span::Span;
3use mago_syntax_core::float_exponent;
4use mago_syntax_core::float_separator;
5use mago_syntax_core::input::Input;
6use mago_syntax_core::number_sign;
7use mago_syntax_core::part_of_identifier;
8use mago_syntax_core::start_of_binary_number;
9use mago_syntax_core::start_of_float_number;
10use mago_syntax_core::start_of_hexadecimal_number;
11use mago_syntax_core::start_of_identifier;
12use mago_syntax_core::start_of_number;
13use mago_syntax_core::start_of_octal_number;
14use mago_syntax_core::start_of_octal_or_float_number;
15use mago_syntax_core::utils::read_digits_of_base;
16
17use crate::error::SyntaxError;
18use crate::token::TypeToken;
19use crate::token::TypeTokenKind;
20
21#[derive(Debug)]
22pub struct TypeLexer<'input> {
23 input: Input<'input>,
24}
25
26impl<'input> TypeLexer<'input> {
27 pub fn new(input: Input<'input>) -> TypeLexer<'input> {
28 TypeLexer { input }
29 }
30
31 pub fn has_reached_eof(&self) -> bool {
32 self.input.has_reached_eof()
33 }
34
35 pub fn current_position(&self) -> Position {
36 self.input.current_position()
37 }
38
39 #[inline]
40 pub fn advance(&mut self) -> Option<Result<TypeToken<'input>, SyntaxError>> {
41 if self.input.has_reached_eof() {
42 return None;
43 }
44
45 let start = self.input.current_position();
46 let whitespaces = self.input.consume_whitespaces();
47 if !whitespaces.is_empty() {
48 let end = self.input.current_position();
49
50 return self.token(TypeTokenKind::Whitespace, whitespaces, start, end);
51 }
52
53 let (kind, length) = match self.input.read(3) {
54 [b'n' | b'N', b'o' | b'O', b'n' | b'N'] => {
55 if self.input.is_at(b"non-empty-literal-string", true) {
56 (TypeTokenKind::NonEmptyUnspecifiedLiteralString, 26)
57 } else if self.input.is_at(b"non-empty-string", true) {
58 (TypeTokenKind::NonEmptyString, 16)
59 } else if self.input.is_at(b"non-empty-array", true) {
60 (TypeTokenKind::NonEmptyArray, 15)
61 } else if self.input.is_at(b"non-empty-list", true) {
62 (TypeTokenKind::NonEmptyList, 14)
63 } else {
64 self.read_identifier()
65 }
66 }
67 [b'p' | b'P', b'u' | b'U', b'r' | b'R'] => {
68 if self.input.is_at(b"pure-closure", true) {
69 (TypeTokenKind::PureClosure, 12)
70 } else if self.input.is_at(b"pure-callable", true) {
71 (TypeTokenKind::PureCallable, 13)
72 } else {
73 self.read_identifier()
74 }
75 }
76 [b'n' | b'N', b'e' | b'E', b'v' | b'V'] => {
77 if self.input.is_at(b"never-return", true) {
78 (TypeTokenKind::NeverReturn, 12)
79 } else if self.input.is_at(b"never-returns", true) {
80 (TypeTokenKind::NeverReturns, 13)
81 } else {
82 self.read_identifier()
83 }
84 }
85 [b't' | b'T', b'r' | b'R', b'u' | b'U'] => {
86 if self.input.is_at(b"truthy-string", true) {
87 (TypeTokenKind::TruthyString, 13)
88 } else {
89 self.read_identifier()
90 }
91 }
92 [b't' | b'T', b'r' | b'R', b'a' | b'A'] => {
93 if self.input.is_at(b"trait-string", true) {
94 (TypeTokenKind::TraitString, 12)
95 } else {
96 self.read_identifier()
97 }
98 }
99 [b'a' | b'A', b's' | b'S', b's' | b'S'] => {
100 if self.input.is_at(b"associative-array", true) {
101 (TypeTokenKind::AssociativeArray, 17)
102 } else {
103 self.read_identifier()
104 }
105 }
106 [b'c' | b'C', b'l' | b'L', b'a' | b'A'] => {
107 if self.input.is_at(b"class-string", true) {
108 (TypeTokenKind::ClassString, 12)
109 } else {
110 self.read_identifier()
111 }
112 }
113 [b'e' | b'E', b'n' | b'N', b'u' | b'U'] => {
114 if self.input.is_at(b"enum-string", true) {
115 (TypeTokenKind::EnumString, 11)
116 } else {
117 self.read_identifier()
118 }
119 }
120 [b'i' | b'I', b'n' | b'N', b't' | b'T'] => {
121 if self.input.is_at(b"interface-string", true) {
122 (TypeTokenKind::InterfaceString, 16)
123 } else {
124 self.read_identifier()
125 }
126 }
127 [b'c' | b'C', b'l' | b'L', b'o' | b'O'] => {
128 if self.input.is_at(b"closed-resource", true) {
129 (TypeTokenKind::ClosedResource, 15)
130 } else {
131 self.read_identifier()
132 }
133 }
134 [b's' | b'S', b't' | b'T', b'r' | b'R'] => {
135 if self.input.is_at(b"stringable-object", true) {
136 (TypeTokenKind::StringableObject, 17)
137 } else {
138 self.read_identifier()
139 }
140 }
141 [b'n' | b'N', b'u' | b'U', b'm' | b'M'] => {
142 if self.input.is_at(b"numeric-string", true) {
143 (TypeTokenKind::NumericString, 14)
144 } else {
145 self.read_identifier()
146 }
147 }
148 [b'l' | b'L', b'i' | b'I', b't' | b'T'] => {
149 if self.input.is_at(b"literal-string", true) {
150 (TypeTokenKind::UnspecifiedLiteralString, 14)
151 } else if self.input.is_at(b"literal-int", true) {
152 (TypeTokenKind::UnspecifiedLiteralInt, 11)
153 } else {
154 self.read_identifier()
155 }
156 }
157 [b'l' | b'L', b'o' | b'O', b'w' | b'W'] => {
158 if self.input.is_at(b"lowercase-string", true) {
159 (TypeTokenKind::LowercaseString, 15)
160 } else {
161 self.read_identifier()
162 }
163 }
164 [b'o' | b'O', b'p' | b'P', b'e' | b'E'] => {
165 if self.input.is_at(b"open-resource", true) {
166 (TypeTokenKind::OpenResource, 13)
167 } else {
168 self.read_identifier()
169 }
170 }
171 [b'a' | b'A', b'r' | b'R', b'r' | b'R'] => {
172 if self.input.is_at(b"array-key", true) {
173 (TypeTokenKind::ArrayKey, 9)
174 } else {
175 self.read_identifier()
176 }
177 }
178 [b'n' | b'N', b'o' | b'O', b'-'] => {
179 if self.input.is_at(b"no-return", true) {
180 (TypeTokenKind::NoReturn, 9)
181 } else {
182 self.read_identifier()
183 }
184 }
185 [b'v' | b'V', b'a' | b'A', b'l' | b'L'] => {
186 if self.input.is_at(b"value-of", true) {
187 (TypeTokenKind::ValueOf, 8)
188 } else {
189 self.read_identifier()
190 }
191 }
192 [b'k' | b'K', b'e' | b'E', b'y' | b'Y'] => {
193 if self.input.is_at(b"key-of", true) {
194 (TypeTokenKind::KeyOf, 6)
195 } else {
196 self.read_identifier()
197 }
198 }
199 [b'p' | b'P', b'r' | b'R', b'o' | b'O'] => {
200 if self.input.is_at(b"protected-properties-of", true) {
201 (TypeTokenKind::ProtectedPropertiesOf, 23)
202 } else if self.input.is_at(b"properties-of", true) {
203 (TypeTokenKind::PropertiesOf, 13)
204 } else {
205 self.read_identifier()
206 }
207 }
208 [b'p' | b'P', b'u' | b'U', b'b' | b'B'] => {
209 if self.input.is_at(b"public-properties-of", true) {
210 (TypeTokenKind::PublicPropertiesOf, 20)
211 } else {
212 self.read_identifier()
213 }
214 }
215 [b'p' | b'P', b'r' | b'R', b'i' | b'I'] => {
216 if self.input.is_at(b"private-properties-of", true) {
217 (TypeTokenKind::PrivatePropertiesOf, 21)
218 } else {
219 self.read_identifier()
220 }
221 }
222 [b'p' | b'P', b'o' | b'O', b's' | b'S'] => {
223 if self.input.is_at(b"positive-int", true) {
224 (TypeTokenKind::PositiveInt, 12)
225 } else {
226 self.read_identifier()
227 }
228 }
229 [b'n' | b'N', b'e' | b'E', b'g' | b'G'] => {
230 if self.input.is_at(b"negative-int", true) {
231 (TypeTokenKind::NegativeInt, 12)
232 } else {
233 self.read_identifier()
234 }
235 }
236 [b'.', b'.', b'.'] => (TypeTokenKind::Ellipsis, 3),
237 [b':', b':', ..] => (TypeTokenKind::ColonColon, 2),
238 [b'/', b'/', ..] => self.read_single_line_comment(),
239 [b'.', start_of_number!(), ..] => self.read_decimal(),
240 [start_of_number!(), ..] => self.read_number(),
241 [quote @ b'\'' | quote @ b'"', ..] => self.read_literal_string(quote),
242 [b'\\', start_of_identifier!(), ..] => self.read_fully_qualified_identifier(),
243 [start_of_identifier!(), ..] => self.read_identifier(),
244 [b'$', start_of_identifier!(), ..] => {
245 let mut length = 2;
246 while let [part_of_identifier!(), ..] = self.input.peek(length, 1) {
247 length += 1;
248 }
249
250 (TypeTokenKind::Variable, length)
251 }
252 [b':', ..] => (TypeTokenKind::Colon, 1),
253 [b'=', ..] => (TypeTokenKind::Equals, 1),
254 [b'?', ..] => (TypeTokenKind::Question, 1),
255 [b'&', ..] => (TypeTokenKind::Ampersand, 1),
256 [b'|', ..] => (TypeTokenKind::Pipe, 1),
257 [b'>', ..] => (TypeTokenKind::GreaterThan, 1),
258 [b'<', ..] => (TypeTokenKind::LessThan, 1),
259 [b'(', ..] => (TypeTokenKind::LeftParenthesis, 1),
260 [b')', ..] => (TypeTokenKind::RightParenthesis, 1),
261 [b'[', ..] => (TypeTokenKind::LeftBracket, 1),
262 [b']', ..] => (TypeTokenKind::RightBracket, 1),
263 [b'{', ..] => (TypeTokenKind::LeftBrace, 1),
264 [b'}', ..] => (TypeTokenKind::RightBrace, 1),
265 [b',', ..] => (TypeTokenKind::Comma, 1),
266 [b'+', ..] => (TypeTokenKind::Plus, 1),
267 [b'-', ..] => (TypeTokenKind::Minus, 1),
268 [unknown_byte, ..] => {
269 return Some(Err(SyntaxError::UnrecognizedToken(*unknown_byte, self.input.current_position())));
270 }
271 [] => {
272 unreachable!()
273 }
274 };
275
276 let buffer = self.input.consume(length);
277 let end = self.input.current_position();
278
279 self.token(kind, buffer, start, end)
280 }
281
282 fn read_single_line_comment(&self) -> (TypeTokenKind, usize) {
283 let mut length = 2;
284 loop {
285 match self.input.peek(length, 1) {
286 [b'\n', ..] | [] => {
287 break;
288 }
289 [_, ..] => {
290 length += 1;
291 }
292 }
293 }
294
295 (TypeTokenKind::SingleLineComment, length)
296 }
297
298 fn read_decimal(&self) -> (TypeTokenKind, usize) {
299 let mut length = read_digits_of_base(&self.input, 2, 10);
300 if let float_exponent!() = self.input.peek(length, 1) {
301 length += 1;
302 if let number_sign!() = self.input.peek(length, 1) {
303 length += 1;
304 }
305
306 length = read_digits_of_base(&self.input, length, 10);
307 }
308
309 (TypeTokenKind::LiteralFloat, length)
310 }
311
312 fn read_number(&self) -> (TypeTokenKind, usize) {
313 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
314 pub enum NumberKind {
315 Integer,
316 Float,
317 OctalOrFloat,
318 IntegerOrFloat,
319 }
320
321 let mut length = 1;
322
323 let (base, kind): (u8, NumberKind) = match self.input.read(3) {
324 start_of_binary_number!() => {
325 length += 1;
326
327 (2, NumberKind::Integer)
328 }
329 start_of_octal_number!() => {
330 length += 1;
331
332 (8, NumberKind::Integer)
333 }
334 start_of_hexadecimal_number!() => {
335 length += 1;
336
337 (16, NumberKind::Integer)
338 }
339 start_of_octal_or_float_number!() => (10, NumberKind::OctalOrFloat),
340 start_of_float_number!() => (10, NumberKind::Float),
341 _ => (10, NumberKind::IntegerOrFloat),
342 };
343
344 if kind != NumberKind::Float {
345 length = read_digits_of_base(&self.input, length, base);
346
347 if kind == NumberKind::Integer {
348 return (TypeTokenKind::LiteralInteger, length);
349 }
350 }
351
352 let is_float = matches!(self.input.peek(length, 3), float_separator!());
353
354 if !is_float {
355 return (TypeTokenKind::LiteralInteger, length);
356 }
357
358 if let [b'.'] = self.input.peek(length, 1) {
359 length += 1;
360 length = read_digits_of_base(&self.input, length, 10);
361 }
362
363 if let float_exponent!() = self.input.peek(length, 1) {
364 length += 1;
365 if let number_sign!() = self.input.peek(length, 1) {
366 length += 1;
367 }
368
369 length = read_digits_of_base(&self.input, length, 10);
370 }
371
372 (TypeTokenKind::LiteralFloat, length)
373 }
374
375 fn read_literal_string(&self, quote: &u8) -> (TypeTokenKind, usize) {
376 let total = self.input.len();
377 let start = self.input.current_offset();
378 let mut length = 1; let mut last_was_backslash = false;
380 let mut partial = false;
381
382 loop {
383 let pos = start + length;
384 if pos >= total {
385 partial = true;
387 break;
388 }
389
390 let byte = self.input.read_at(pos);
391 if matches!(byte, b'\\') {
392 last_was_backslash = !last_was_backslash;
394 length += 1;
395 } else {
396 if byte == quote && !last_was_backslash {
398 length += 1; break;
400 }
401
402 length += 1;
403 last_was_backslash = false;
404 }
405 }
406
407 if partial { (TypeTokenKind::PartialLiteralString, length) } else { (TypeTokenKind::LiteralString, length) }
408 }
409
410 fn read_fully_qualified_identifier(&self) -> (TypeTokenKind, usize) {
411 let mut length = 2;
412 let mut last_was_slash = false;
413 loop {
414 match self.input.peek(length, 1) {
415 [start_of_identifier!(), ..] if last_was_slash => {
416 length += 1;
417 last_was_slash = false;
418 }
419 [part_of_identifier!(), ..] if !last_was_slash => {
420 length += 1;
421 }
422 [b'\\', ..] => {
423 if last_was_slash {
424 length -= 1;
425
426 break;
427 }
428
429 length += 1;
430 last_was_slash = true;
431 }
432 _ => {
433 break;
434 }
435 }
436 }
437
438 (TypeTokenKind::FullyQualifiedIdentifier, length)
439 }
440
441 fn read_identifier(&self) -> (TypeTokenKind, usize) {
442 const KEYWORD_TYPES: [(&[u8], TypeTokenKind); 24] = [
443 (b"list", TypeTokenKind::List),
444 (b"int", TypeTokenKind::Int),
445 (b"string", TypeTokenKind::String),
446 (b"float", TypeTokenKind::Float),
447 (b"bool", TypeTokenKind::Bool),
448 (b"false", TypeTokenKind::False),
449 (b"true", TypeTokenKind::True),
450 (b"object", TypeTokenKind::Object),
451 (b"callable", TypeTokenKind::Callable),
452 (b"array", TypeTokenKind::Array),
453 (b"iterable", TypeTokenKind::Iterable),
454 (b"null", TypeTokenKind::Null),
455 (b"mixed", TypeTokenKind::Mixed),
456 (b"resource", TypeTokenKind::Resource),
457 (b"void", TypeTokenKind::Void),
458 (b"scalar", TypeTokenKind::Scalar),
459 (b"numeric", TypeTokenKind::Numeric),
460 (b"never", TypeTokenKind::Never),
461 (b"nothing", TypeTokenKind::Nothing),
462 (b"as", TypeTokenKind::As),
463 (b"is", TypeTokenKind::Is),
464 (b"not", TypeTokenKind::Not),
465 (b"min", TypeTokenKind::Min),
466 (b"max", TypeTokenKind::Max),
467 ];
468
469 let mut length = 1;
470 let mut ended_with_slash = false;
471 loop {
472 match self.input.peek(length, 2) {
473 [part_of_identifier!(), ..] => {
474 length += 1;
475 }
476 [b'\\', start_of_identifier!(), ..] => {
477 ended_with_slash = true;
478 break;
479 }
480 _ => {
481 break;
482 }
483 }
484 }
485
486 if !ended_with_slash {
487 for (value, kind) in KEYWORD_TYPES {
488 let keyword_length = value.len();
489 if keyword_length != length {
490 continue;
491 }
492
493 if self.input.is_at(value, true) {
494 return (kind, keyword_length);
495 }
496 }
497 }
498
499 let mut slashes = 0;
500 let mut last_was_slash = false;
501 loop {
502 match self.input.peek(length, 1) {
503 [start_of_identifier!(), ..] if last_was_slash => {
504 length += 1;
505 last_was_slash = false;
506 }
507 [part_of_identifier!(), ..] if !last_was_slash => {
508 length += 1;
509 }
510 [b'\\', ..] => {
511 if !last_was_slash {
512 length += 1;
513 slashes += 1;
514 last_was_slash = true;
515 } else {
516 length -= 1;
517 slashes -= 1;
518 last_was_slash = false;
519
520 break;
521 }
522 }
523 _ => {
524 break;
525 }
526 }
527 }
528
529 if last_was_slash {
530 length -= 1;
531 slashes -= 1;
532 }
533
534 if slashes > 0 { (TypeTokenKind::QualifiedIdentifier, length) } else { (TypeTokenKind::Identifier, length) }
535 }
536
537 #[inline]
538 fn token(
539 &self,
540 kind: TypeTokenKind,
541 value: &'input [u8],
542 from: Position,
543 to: Position,
544 ) -> Option<Result<TypeToken<'input>, SyntaxError>> {
545 let mut value_chunks = value.utf8_chunks();
546 let value_str = if let Some(chunk) = value_chunks.next() {
547 let valid = chunk.valid();
548
549 debug_assert_eq!(valid.len(), value.len());
550
551 valid
552 } else {
553 ""
554 };
555
556 Some(Ok(TypeToken { kind, value: value_str, span: Span::new(from, to) }))
557 }
558}