1use memchr::{memchr2, memmem};
2use php_ast::Span;
3
4use crate::token::{resolve_keyword, TokenKind};
5
6const fn make_whitespace_table() -> [bool; 256] {
14 let mut t = [false; 256];
15 t[b' ' as usize] = true;
16 t[b'\t' as usize] = true;
17 t[b'\r' as usize] = true;
18 t[b'\n' as usize] = true;
19 t[0x0C] = true; t
21}
22
23const fn make_ident_start_table() -> [bool; 256] {
24 let mut t = [false; 256];
25 let mut i = 0usize;
26 while i < 256 {
27 let b = i as u8;
28 t[i] = (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z') || b == b'_' || b >= 0x80;
29 i += 1;
30 }
31 t
32}
33
34const fn make_ident_continue_table() -> [bool; 256] {
35 let mut t = [false; 256];
36 let mut i = 0usize;
37 while i < 256 {
38 let b = i as u8;
39 t[i] = (b >= b'a' && b <= b'z')
40 || (b >= b'A' && b <= b'Z')
41 || (b >= b'0' && b <= b'9')
42 || b == b'_'
43 || b >= 0x80;
44 i += 1;
45 }
46 t
47}
48
49static IS_PHP_WHITESPACE: [bool; 256] = make_whitespace_table();
50static IS_IDENT_START: [bool; 256] = make_ident_start_table();
51static IS_IDENT_CONTINUE: [bool; 256] = make_ident_continue_table();
52
53#[derive(Debug, Clone, PartialEq)]
54pub struct LexerError {
55 pub message: String,
56 pub span: Span,
57}
58
59#[derive(Debug, Clone, Copy, PartialEq)]
60pub struct Token {
61 pub kind: TokenKind,
62 pub span: Span,
63}
64
65impl Token {
66 pub fn new(kind: TokenKind, span: Span) -> Self {
67 Self { kind, span }
68 }
69
70 pub fn eof(offset: u32) -> Self {
71 Self {
72 kind: TokenKind::Eof,
73 span: Span::new(offset, offset),
74 }
75 }
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79enum LexerMode {
80 InlineHtml,
81 Php,
82}
83
84pub struct Lexer<'src> {
85 source: &'src str,
86 mode: LexerMode,
87 pos: usize,
88 peeked: Option<Token>,
89 peeked2: Option<Token>,
90 pub errors: Vec<LexerError>,
91}
92
93#[inline(always)]
94fn is_ident_start(b: u8) -> bool {
95 IS_IDENT_START[b as usize]
96}
97
98#[inline(always)]
99fn is_ident_continue(b: u8) -> bool {
100 IS_IDENT_CONTINUE[b as usize]
101}
102
103impl<'src> Lexer<'src> {
104 pub fn new(source: &'src str) -> Self {
105 let pos = if source.starts_with("#!") {
107 source.find('\n').map(|p| p + 1).unwrap_or(source.len())
108 } else {
109 0
110 };
111
112 let remaining = &source[pos..];
114 let rem_bytes = remaining.as_bytes();
115 let mode = if (rem_bytes.len() >= 5
116 && rem_bytes[0] == b'<'
117 && rem_bytes[1] == b'?'
118 && rem_bytes[2..5].eq_ignore_ascii_case(b"php"))
119 || remaining.starts_with("<?=")
120 {
121 LexerMode::Php
122 } else {
123 LexerMode::InlineHtml
124 };
125
126 Self {
127 source,
128 mode,
129 pos,
130 peeked: None,
131 peeked2: None,
132 errors: Vec::new(),
133 }
134 }
135
136 pub fn new_at(source: &'src str, offset: usize) -> Self {
141 Self {
142 source,
143 mode: LexerMode::Php,
144 pos: offset,
145 peeked: None,
146 peeked2: None,
147 errors: Vec::new(),
148 }
149 }
150
151 pub fn source(&self) -> &'src str {
152 self.source
153 }
154
155 pub fn peek(&mut self) -> &Token {
156 if self.peeked.is_none() {
157 self.peeked = Some(self.read_next_token());
158 }
159 self.peeked.as_ref().expect("peeked is Some: set above")
160 }
161
162 pub fn peek2(&mut self) -> &Token {
164 if self.peeked.is_none() {
166 self.peeked = Some(self.read_next_token());
167 }
168 if self.peeked2.is_none() {
169 self.peeked2 = Some(self.read_next_token());
170 }
171 self.peeked2.as_ref().expect("peeked2 is Some: set above")
172 }
173
174 pub fn next_token(&mut self) -> Token {
175 if let Some(token) = self.peeked.take() {
176 self.peeked = self.peeked2.take();
177 return token;
178 }
179 self.read_next_token()
180 }
181
182 pub fn token_text(&self, token: &Token) -> &'src str {
184 &self.source[token.span.start as usize..token.span.end as usize]
185 }
186
187 fn read_next_token(&mut self) -> Token {
188 if self.pos >= self.source.len() {
189 return Token::eof(self.source.len() as u32);
190 }
191
192 match self.mode {
193 LexerMode::InlineHtml => self.lex_inline_html(),
194 LexerMode::Php => self.lex_php(),
195 }
196 }
197
198 fn lex_inline_html(&mut self) -> Token {
199 let start = self.pos;
200 let bytes = self.source.as_bytes();
201
202 let mut search = self.pos;
206 let tag_pos = loop {
207 match memchr::memchr(b'<', &bytes[search..]) {
208 None => break None,
209 Some(offset) => {
210 let p = search + offset;
211 let rest = &bytes[p..];
212 if (rest.len() >= 5
213 && rest[0] == b'<'
214 && rest[1] == b'?'
215 && rest[2..5].eq_ignore_ascii_case(b"php"))
216 || rest.starts_with(b"<?=")
217 {
218 break Some(p - self.pos);
219 }
220 search = p + 1;
221 }
222 }
223 };
224
225 if let Some(tag_pos) = tag_pos {
226 if tag_pos == 0 {
227 self.mode = LexerMode::Php;
229 return self.lex_php();
230 }
231 let end = self.pos + tag_pos;
233 self.pos = end;
234 self.mode = LexerMode::Php;
235 Token::new(TokenKind::InlineHtml, Span::new(start as u32, end as u32))
236 } else {
237 let end = self.source.len();
239 self.pos = end;
240 Token::new(TokenKind::InlineHtml, Span::new(start as u32, end as u32))
241 }
242 }
243
244 fn lex_php(&mut self) -> Token {
245 let remaining = &self.source[self.pos..];
246
247 if let Some(token) = self.try_lex_heredoc(remaining) {
249 return token;
250 }
251
252 self.skip_whitespace();
254
255 if self.pos >= self.source.len() {
256 return Token::eof(self.source.len() as u32);
257 }
258
259 let bytes = self.source.as_bytes();
260 let start = self.pos;
261
262 if bytes[self.pos] == b'/' && self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'/' {
265 self.pos += 2;
266 Self::skip_line_comment_body(bytes, &mut self.pos);
267 return self.tok(TokenKind::LineComment, start);
268 }
269
270 if bytes[self.pos] == b'/' && self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'*' {
272 self.pos += 2;
273 let kind = if self.pos < bytes.len()
276 && bytes[self.pos] == b'*'
277 && !(self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'/')
278 {
279 TokenKind::DocComment
280 } else {
281 TokenKind::BlockComment
282 };
283 match memmem::find(&bytes[self.pos..], b"*/") {
284 Some(end) => self.pos += end + 2,
285 None => self.pos = bytes.len(), }
287 return self.tok(kind, start);
288 }
289
290 if bytes[self.pos] == b'#' && !(self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'[') {
293 self.pos += 1;
294 Self::skip_line_comment_body(bytes, &mut self.pos);
295 return self.tok(TokenKind::HashComment, start);
296 }
297
298 self.scan_token()
299 }
300
301 fn skip_whitespace(&mut self) {
303 let bytes = self.source.as_bytes();
304 while self.pos < bytes.len() && IS_PHP_WHITESPACE[bytes[self.pos] as usize] {
305 self.pos += 1;
306 }
307 }
308
309 fn scan_token(&mut self) -> Token {
311 let start = self.pos;
312 let bytes = self.source.as_bytes();
313 let b = bytes[start];
314
315 match b {
316 b'+' => {
318 if self.check_at(1, b'+') {
319 self.pos = start + 2;
320 self.tok(TokenKind::PlusPlus, start)
321 } else if self.check_at(1, b'=') {
322 self.pos = start + 2;
323 self.tok(TokenKind::PlusEquals, start)
324 } else {
325 self.pos = start + 1;
326 self.tok(TokenKind::Plus, start)
327 }
328 }
329 b'-' => {
330 if self.check_at(1, b'-') {
331 self.pos = start + 2;
332 self.tok(TokenKind::MinusMinus, start)
333 } else if self.check_at(1, b'=') {
334 self.pos = start + 2;
335 self.tok(TokenKind::MinusEquals, start)
336 } else if self.check_at(1, b'>') {
337 self.pos = start + 2;
338 self.tok(TokenKind::Arrow, start)
339 } else {
340 self.pos = start + 1;
341 self.tok(TokenKind::Minus, start)
342 }
343 }
344 b'*' => {
345 if self.check_at(1, b'*') {
346 if self.check_at(2, b'=') {
347 self.pos = start + 3;
348 self.tok(TokenKind::StarStarEquals, start)
349 } else {
350 self.pos = start + 2;
351 self.tok(TokenKind::StarStar, start)
352 }
353 } else if self.check_at(1, b'=') {
354 self.pos = start + 2;
355 self.tok(TokenKind::StarEquals, start)
356 } else {
357 self.pos = start + 1;
358 self.tok(TokenKind::Star, start)
359 }
360 }
361 b'/' => {
362 if self.check_at(1, b'=') {
364 self.pos = start + 2;
365 self.tok(TokenKind::SlashEquals, start)
366 } else {
367 self.pos = start + 1;
368 self.tok(TokenKind::Slash, start)
369 }
370 }
371 b'%' => {
372 if self.check_at(1, b'=') {
373 self.pos = start + 2;
374 self.tok(TokenKind::PercentEquals, start)
375 } else {
376 self.pos = start + 1;
377 self.tok(TokenKind::Percent, start)
378 }
379 }
380 b'.' => {
381 if start + 1 < bytes.len() && bytes[start + 1].is_ascii_digit() {
383 self.pos = start + 1;
384 self.scan_digits(u8::is_ascii_digit);
385 if self.pos < bytes.len() && matches!(bytes[self.pos], b'e' | b'E') {
387 self.try_scan_exponent();
388 }
389 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
391 self.consume_invalid_numeric_rest();
392 return self.invalid_numeric(start);
393 }
394 return self.tok(TokenKind::FloatLiteralLeadingDot, start);
395 }
396 if self.check_at(1, b'.') && self.check_at(2, b'.') {
397 self.pos = start + 3;
398 self.tok(TokenKind::Ellipsis, start)
399 } else if self.check_at(1, b'=') {
400 self.pos = start + 2;
401 self.tok(TokenKind::DotEquals, start)
402 } else {
403 self.pos = start + 1;
404 self.tok(TokenKind::Dot, start)
405 }
406 }
407 b'=' => {
408 if self.check_at(1, b'=') {
409 if self.check_at(2, b'=') {
410 self.pos = start + 3;
411 self.tok(TokenKind::EqualsEqualsEquals, start)
412 } else {
413 self.pos = start + 2;
414 self.tok(TokenKind::EqualsEquals, start)
415 }
416 } else if self.check_at(1, b'>') {
417 self.pos = start + 2;
418 self.tok(TokenKind::FatArrow, start)
419 } else {
420 self.pos = start + 1;
421 self.tok(TokenKind::Equals, start)
422 }
423 }
424 b'!' => {
425 if self.check_at(1, b'=') {
426 if self.check_at(2, b'=') {
427 self.pos = start + 3;
428 self.tok(TokenKind::BangEqualsEquals, start)
429 } else {
430 self.pos = start + 2;
431 self.tok(TokenKind::BangEquals, start)
432 }
433 } else {
434 self.pos = start + 1;
435 self.tok(TokenKind::Bang, start)
436 }
437 }
438 b'<' => self.scan_less_than(start),
439 b'>' => {
440 if self.check_at(1, b'>') {
441 if self.check_at(2, b'=') {
442 self.pos = start + 3;
443 self.tok(TokenKind::ShiftRightEquals, start)
444 } else {
445 self.pos = start + 2;
446 self.tok(TokenKind::ShiftRight, start)
447 }
448 } else if self.check_at(1, b'=') {
449 self.pos = start + 2;
450 self.tok(TokenKind::GreaterThanEquals, start)
451 } else {
452 self.pos = start + 1;
453 self.tok(TokenKind::GreaterThan, start)
454 }
455 }
456 b'&' => {
457 if self.check_at(1, b'&') {
458 self.pos = start + 2;
459 self.tok(TokenKind::AmpersandAmpersand, start)
460 } else if self.check_at(1, b'=') {
461 self.pos = start + 2;
462 self.tok(TokenKind::AmpersandEquals, start)
463 } else {
464 self.pos = start + 1;
465 self.tok(TokenKind::Ampersand, start)
466 }
467 }
468 b'|' => {
469 if self.check_at(1, b'|') {
470 self.pos = start + 2;
471 self.tok(TokenKind::PipePipe, start)
472 } else if self.check_at(1, b'=') {
473 self.pos = start + 2;
474 self.tok(TokenKind::PipeEquals, start)
475 } else if self.check_at(1, b'>') {
476 self.pos = start + 2;
477 self.tok(TokenKind::PipeArrow, start)
478 } else {
479 self.pos = start + 1;
480 self.tok(TokenKind::Pipe, start)
481 }
482 }
483 b'^' => {
484 if self.check_at(1, b'=') {
485 self.pos = start + 2;
486 self.tok(TokenKind::CaretEquals, start)
487 } else {
488 self.pos = start + 1;
489 self.tok(TokenKind::Caret, start)
490 }
491 }
492 b'~' => {
493 self.pos = start + 1;
494 self.tok(TokenKind::Tilde, start)
495 }
496 b'?' => {
497 if self.check_at(1, b'>') {
498 self.pos = start + 2;
499 self.mode = LexerMode::InlineHtml;
500 self.tok(TokenKind::CloseTag, start)
501 } else if self.check_at(1, b'?') {
502 if self.check_at(2, b'=') {
503 self.pos = start + 3;
504 self.tok(TokenKind::CoalesceEquals, start)
505 } else {
506 self.pos = start + 2;
507 self.tok(TokenKind::QuestionQuestion, start)
508 }
509 } else if self.check_at(1, b'-') && self.check_at(2, b'>') {
510 self.pos = start + 3;
511 self.tok(TokenKind::NullsafeArrow, start)
512 } else {
513 self.pos = start + 1;
514 self.tok(TokenKind::Question, start)
515 }
516 }
517 b':' => {
518 if self.check_at(1, b':') {
519 self.pos = start + 2;
520 self.tok(TokenKind::DoubleColon, start)
521 } else {
522 self.pos = start + 1;
523 self.tok(TokenKind::Colon, start)
524 }
525 }
526 b'@' => {
527 self.pos = start + 1;
528 self.tok(TokenKind::At, start)
529 }
530 b'\\' => {
531 self.pos = start + 1;
532 self.tok(TokenKind::Backslash, start)
533 }
534 b'#' => {
535 if self.check_at(1, b'[') {
538 self.pos = start + 2;
539 self.tok(TokenKind::HashBracket, start)
540 } else {
541 self.pos = start + 1;
543 self.read_next_token()
544 }
545 }
546
547 b'(' => {
549 self.pos = start + 1;
550 self.tok(TokenKind::LeftParen, start)
551 }
552 b')' => {
553 self.pos = start + 1;
554 self.tok(TokenKind::RightParen, start)
555 }
556 b'[' => {
557 self.pos = start + 1;
558 self.tok(TokenKind::LeftBracket, start)
559 }
560 b']' => {
561 self.pos = start + 1;
562 self.tok(TokenKind::RightBracket, start)
563 }
564 b'{' => {
565 self.pos = start + 1;
566 self.tok(TokenKind::LeftBrace, start)
567 }
568 b'}' => {
569 self.pos = start + 1;
570 self.tok(TokenKind::RightBrace, start)
571 }
572 b';' => {
573 self.pos = start + 1;
574 self.tok(TokenKind::Semicolon, start)
575 }
576 b',' => {
577 self.pos = start + 1;
578 self.tok(TokenKind::Comma, start)
579 }
580
581 b'\'' => self.scan_single_quoted_string(),
583 b'"' => self.scan_double_quoted_string(),
584 b'`' => self.scan_backtick_string(),
585
586 b'$' => {
588 if start + 1 < bytes.len() && is_ident_start(bytes[start + 1]) {
589 self.pos = start + 2;
590 while self.pos < bytes.len() && is_ident_continue(bytes[self.pos]) {
591 self.pos += 1;
592 }
593 self.tok(TokenKind::Variable, start)
594 } else {
595 self.pos = start + 1;
596 self.tok(TokenKind::Dollar, start)
597 }
598 }
599
600 b'0'..=b'9' => self.scan_number(),
602
603 _ if is_ident_start(b) => {
605 if b == b'b' || b == b'B' {
607 if self.check_at(1, b'\'') {
608 return self.scan_single_quoted_string();
609 }
610 if self.check_at(1, b'"') {
611 return self.scan_double_quoted_string();
612 }
613 if self.check_at(1, b'<') && self.check_at(2, b'<') && self.check_at(3, b'<') {
614 let remaining = &self.source[self.pos..];
615 if let Some(token) = self.try_lex_heredoc(remaining) {
616 return token;
617 }
618 }
619 }
620 self.scan_identifier()
621 }
622
623 _ => {
625 self.pos = start + 1;
626 self.read_next_token()
627 }
628 }
629 }
630
631 fn scan_less_than(&mut self, start: usize) -> Token {
633 if self.check_at(1, b'<') {
634 if self.check_at(2, b'<') {
635 let remaining = &self.source[self.pos..];
637 if let Some(token) = self.try_lex_heredoc(remaining) {
638 return token;
639 }
640 }
642 if self.check_at(2, b'=') {
643 self.pos = start + 3;
644 return self.tok(TokenKind::ShiftLeftEquals, start);
645 }
646 self.pos = start + 2;
647 return self.tok(TokenKind::ShiftLeft, start);
648 }
649 if self.check_at(1, b'=') {
650 if self.check_at(2, b'>') {
651 self.pos = start + 3;
652 return self.tok(TokenKind::Spaceship, start);
653 }
654 self.pos = start + 2;
655 return self.tok(TokenKind::LessThanEquals, start);
656 }
657 if self.check_at(1, b'?') {
658 let bytes = self.source.as_bytes();
659 if bytes.len() >= self.pos + 5
660 && bytes[self.pos + 2..self.pos + 5].eq_ignore_ascii_case(b"php")
661 {
662 self.pos = start + 5;
663 return self.tok(TokenKind::OpenTag, start);
664 }
665 if self.source[self.pos..].starts_with("<?=") {
666 self.pos = start + 3;
667 return self.tok(TokenKind::OpenTag, start);
668 }
669 }
670 self.pos = start + 1;
671 self.tok(TokenKind::LessThan, start)
672 }
673
674 fn scan_single_quoted_string(&mut self) -> Token {
677 let start = self.pos;
678 let bytes = self.source.as_bytes();
679 let mut p = self.pos;
680 if bytes[p] == b'b' || bytes[p] == b'B' {
682 p += 1;
683 }
684 p += 1; loop {
686 match memchr2(b'\\', b'\'', &bytes[p..]) {
687 None => {
688 self.pos = start + 1;
690 return self.read_next_token();
691 }
692 Some(offset) => {
693 p += offset;
694 match bytes[p] {
695 b'\\' => {
696 p += 1;
697 if p < bytes.len() {
698 p += 1;
699 }
700 }
701 _ => {
702 p += 1;
704 break;
705 }
706 }
707 }
708 }
709 }
710 self.pos = p;
711 self.tok(TokenKind::SingleQuotedString, start)
712 }
713
714 fn scan_double_quoted_string(&mut self) -> Token {
715 let start = self.pos;
716 let bytes = self.source.as_bytes();
717 let mut p = self.pos;
718 if bytes[p] == b'b' || bytes[p] == b'B' {
720 p += 1;
721 }
722 p += 1; loop {
724 match memchr2(b'\\', b'"', &bytes[p..]) {
725 None => {
726 self.pos = start + 1;
728 return self.read_next_token();
729 }
730 Some(offset) => {
731 p += offset;
732 match bytes[p] {
733 b'\\' => {
734 p += 1;
735 if p < bytes.len() {
736 p += 1;
737 }
738 }
739 _ => {
740 p += 1;
742 break;
743 }
744 }
745 }
746 }
747 }
748 self.pos = p;
749 self.tok(TokenKind::DoubleQuotedString, start)
750 }
751
752 fn scan_backtick_string(&mut self) -> Token {
753 let start = self.pos;
754 let bytes = self.source.as_bytes();
755 let mut p = self.pos;
756 p += 1; loop {
758 match memchr2(b'\\', b'`', &bytes[p..]) {
759 None => {
760 self.pos = start + 1;
762 return self.read_next_token();
763 }
764 Some(offset) => {
765 p += offset;
766 match bytes[p] {
767 b'\\' => {
768 p += 1;
769 if p < bytes.len() {
770 p += 1;
771 }
772 }
773 _ => {
774 p += 1;
776 break;
777 }
778 }
779 }
780 }
781 }
782 self.pos = p;
783 self.tok(TokenKind::BacktickString, start)
784 }
785
786 fn scan_number(&mut self) -> Token {
789 let start = self.pos;
790 let bytes = self.source.as_bytes();
791
792 if bytes[start] == b'0' && start + 1 < bytes.len() {
794 match bytes[start + 1] {
795 b'x' | b'X' => {
796 self.pos = start + 2;
797 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
798 self.consume_invalid_numeric_rest();
799 return self.invalid_numeric(start);
800 }
801 if self.scan_digits(u8::is_ascii_hexdigit) {
802 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
803 self.consume_invalid_numeric_rest();
804 return self.invalid_numeric(start);
805 }
806 return self.tok(TokenKind::HexIntLiteral, start);
807 }
808 self.pos = start;
810 }
811 b'b' | b'B' => {
812 self.pos = start + 2;
813 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
814 self.consume_invalid_numeric_rest();
815 return self.invalid_numeric(start);
816 }
817 if self.scan_digits(|b| b == &b'0' || b == &b'1') {
818 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
819 self.consume_invalid_numeric_rest();
820 return self.invalid_numeric(start);
821 }
822 return self.tok(TokenKind::BinIntLiteral, start);
823 }
824 self.pos = start;
826 }
827 b'o' | b'O' => {
828 self.pos = start + 2;
829 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
830 self.consume_invalid_numeric_rest();
831 return self.invalid_numeric(start);
832 }
833 if self.scan_digits(|b| (b'0'..=b'7').contains(b)) {
834 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
835 self.consume_invalid_numeric_rest();
836 return self.invalid_numeric(start);
837 }
838 return self.tok(TokenKind::OctIntLiteralNew, start);
839 }
840 self.pos = start;
842 }
843 _ => {}
844 }
845 }
846
847 self.pos = start;
849 self.scan_digits(u8::is_ascii_digit);
850 let integer_end = self.pos;
851 let mut kind = TokenKind::IntLiteral;
852
853 if bytes[start] == b'0' && integer_end > start + 1 {
856 kind = TokenKind::OctIntLiteral;
857 }
858
859 if self.pos < bytes.len() && bytes[self.pos] == b'.' {
861 if self.pos + 1 < bytes.len() && bytes[self.pos + 1].is_ascii_digit() {
862 self.pos += 1; self.scan_digits(u8::is_ascii_digit);
865 kind = TokenKind::FloatLiteralSimple;
866 } else if self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'_' {
867 self.consume_invalid_numeric_rest();
869 return self.invalid_numeric(start);
870 } else if self.pos + 1 >= bytes.len() || bytes[self.pos + 1] != b'.' {
871 self.pos += 1; kind = TokenKind::FloatLiteralSimple;
874 }
875 }
876
877 if self.pos < bytes.len() && matches!(bytes[self.pos], b'e' | b'E') {
879 if self.try_scan_exponent() {
880 kind = TokenKind::FloatLiteral;
881 } else if self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'_' {
882 self.consume_invalid_numeric_rest();
884 return self.invalid_numeric(start);
885 }
886 }
887
888 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
890 self.consume_invalid_numeric_rest();
891 return self.invalid_numeric(start);
892 }
893
894 self.tok(kind, start)
895 }
896
897 fn scan_digits(&mut self, is_valid: fn(&u8) -> bool) -> bool {
900 let bytes = self.source.as_bytes();
901 if self.pos >= bytes.len() || !is_valid(&bytes[self.pos]) {
902 return false;
903 }
904 self.pos += 1;
905 loop {
906 if self.pos >= bytes.len() {
907 break;
908 }
909 if is_valid(&bytes[self.pos]) {
910 self.pos += 1;
911 } else if bytes[self.pos] == b'_'
912 && self.pos + 1 < bytes.len()
913 && is_valid(&bytes[self.pos + 1])
914 {
915 self.pos += 2;
916 } else {
917 break;
918 }
919 }
920 true
921 }
922
923 fn try_scan_exponent(&mut self) -> bool {
926 let bytes = self.source.as_bytes();
927 let saved = self.pos;
928 self.pos += 1; if self.pos < bytes.len() && matches!(bytes[self.pos], b'+' | b'-') {
932 self.pos += 1;
933 }
934
935 if self.scan_digits(u8::is_ascii_digit) {
937 true
938 } else {
939 self.pos = saved;
940 false
941 }
942 }
943
944 fn scan_identifier(&mut self) -> Token {
947 let start = self.pos;
948 let bytes = self.source.as_bytes();
949 self.pos += 1; while self.pos < bytes.len() && is_ident_continue(bytes[self.pos]) {
951 self.pos += 1;
952 }
953 let text = &self.source[start..self.pos];
954 let kind = resolve_keyword(text).unwrap_or(TokenKind::Identifier);
955 self.tok(kind, start)
956 }
957
958 #[inline]
964 fn skip_line_comment_body(bytes: &[u8], pos: &mut usize) {
965 loop {
966 match memchr2(b'\n', b'?', &bytes[*pos..]) {
967 None => {
968 *pos = bytes.len();
969 return;
970 }
971 Some(offset) => {
972 let p = *pos + offset;
973 if bytes[p] == b'\n' {
974 *pos = p; return;
976 }
977 if p + 1 < bytes.len() && bytes[p + 1] == b'>' {
979 *pos = p; return;
981 }
982 *pos = p + 1;
984 }
985 }
986 }
987 }
988
989 #[inline]
990 fn check_at(&self, offset: usize, expected: u8) -> bool {
991 self.source.as_bytes().get(self.pos + offset) == Some(&expected)
992 }
993
994 #[inline]
995 fn tok(&self, kind: TokenKind, start: usize) -> Token {
996 Token::new(kind, Span::new(start as u32, self.pos as u32))
997 }
998
999 fn invalid_numeric(&mut self, start: usize) -> Token {
1000 let span = Span::new(start as u32, self.pos as u32);
1001 self.errors.push(LexerError {
1002 message: "Invalid numeric literal".to_string(),
1003 span,
1004 });
1005 Token::new(TokenKind::InvalidNumericLiteral, span)
1006 }
1007
1008 fn consume_invalid_numeric_rest(&mut self) {
1010 let bytes = self.source.as_bytes();
1011 while self.pos < bytes.len() {
1012 let b = bytes[self.pos];
1013 if b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'+' || b == b'-' {
1014 if (b == b'+' || b == b'-') && self.pos > 0 {
1016 let prev = bytes[self.pos - 1];
1017 if prev != b'e' && prev != b'E' {
1018 break;
1019 }
1020 }
1021 self.pos += 1;
1022 } else {
1023 break;
1024 }
1025 }
1026 }
1027
1028 fn try_lex_heredoc(&mut self, remaining: &str) -> Option<Token> {
1032 let trimmed = remaining.trim_start_matches(|c: char| {
1034 c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\x0C'
1035 });
1036 let ws_len = remaining.len() - trimmed.len();
1037
1038 let (after_prefix, prefix_len) = if (trimmed.starts_with("b<<<")
1040 || trimmed.starts_with("B<<<"))
1041 && !trimmed[1..].starts_with("<<<>")
1042 {
1043 (&trimmed[1..], 1)
1044 } else {
1045 (trimmed, 0)
1046 };
1047
1048 if !after_prefix.starts_with("<<<") {
1049 return None;
1050 }
1051
1052 let base_pos = self.pos; let start = base_pos + ws_len; let after_arrows = &after_prefix[3..];
1055 let after_arrows_trimmed = after_arrows.trim_start_matches([' ', '\t']);
1056 let arrows_offset =
1057 ws_len + prefix_len + 3 + (after_arrows.len() - after_arrows_trimmed.len());
1058
1059 let (label, is_nowdoc, label_line_end);
1061 if let Some(after_quote) = after_arrows_trimmed.strip_prefix('\'') {
1062 let closing = after_quote.find('\'')?;
1064 label = after_quote[..closing].to_string();
1065 is_nowdoc = true;
1066 let after_label = &after_arrows_trimmed[2 + closing..];
1067 let nl = after_label.find('\n').unwrap_or(after_label.len());
1069 label_line_end = arrows_offset + 2 + closing + nl;
1070 if label_line_end < remaining.len() {
1071 }
1073 } else {
1074 let s = if let Some(after_dquote) = after_arrows_trimmed.strip_prefix('"') {
1076 let closing = after_dquote.find('"')?;
1077 label = after_dquote[..closing].to_string();
1078 &after_dquote[1 + closing..]
1079 } else {
1080 let end = after_arrows_trimmed
1082 .find(|c: char| !c.is_ascii_alphanumeric() && c != '_')
1083 .unwrap_or(after_arrows_trimmed.len());
1084 if end == 0 {
1085 return None;
1086 }
1087 label = after_arrows_trimmed[..end].to_string();
1088 &after_arrows_trimmed[end..]
1089 };
1090 is_nowdoc = false;
1091 let nl = s.find('\n').unwrap_or(s.len());
1092 label_line_end = arrows_offset + (after_arrows_trimmed.len() - s.len()) + nl;
1093 };
1094
1095 if label.is_empty() {
1096 return None;
1097 }
1098
1099 let body_start_in_remaining = if label_line_end < remaining.len() {
1101 label_line_end + 1 } else {
1103 return None; };
1105
1106 let body = &remaining[body_start_in_remaining..];
1107
1108 let mut search_pos = 0;
1110 let end_marker_pos;
1111 loop {
1112 if search_pos >= body.len() {
1113 return None; }
1115 let line_start = search_pos;
1116 let line_end = body[line_start..]
1117 .find('\n')
1118 .map(|p| line_start + p)
1119 .unwrap_or(body.len());
1120 let line = &body[line_start..line_end];
1121 let trimmed_line = line.trim_start_matches([' ', '\t']);
1122
1123 if trimmed_line == label
1127 || trimmed_line.starts_with(&label)
1128 && trimmed_line[label.len()..]
1129 .trim_start_matches([';', ',', ')'])
1130 .trim()
1131 .is_empty()
1132 {
1133 end_marker_pos = line_start;
1134 break;
1135 }
1136
1137 search_pos = if line_end < body.len() {
1138 line_end + 1
1139 } else {
1140 body.len()
1141 };
1142 }
1143
1144 let end_marker_line = &body[end_marker_pos..];
1146 let trimmed = end_marker_line.trim_start_matches([' ', '\t']);
1147 let indent_len = end_marker_line.len() - trimmed.len();
1148 let token_end_in_remaining =
1149 body_start_in_remaining + end_marker_pos + indent_len + label.len();
1150 self.pos = base_pos + token_end_in_remaining;
1151
1152 let span = Span::new(start as u32, self.pos as u32);
1153
1154 if is_nowdoc {
1155 Some(Token::new(TokenKind::Nowdoc, span))
1156 } else {
1157 Some(Token::new(TokenKind::Heredoc, span))
1158 }
1159 }
1160}
1161
1162pub fn lex_all(source: &str) -> (Vec<Token>, Vec<LexerError>) {
1170 let mut lexer = Lexer::new(source);
1171 let mut tokens = Vec::new();
1172
1173 loop {
1174 let tok = lexer.next_token();
1175 let is_eof = tok.kind == TokenKind::Eof;
1176 tokens.push(tok);
1177 if is_eof {
1178 break;
1179 }
1180 }
1181
1182 let eof_span = tokens.last().unwrap().span;
1185 tokens.push(Token::new(TokenKind::Eof, eof_span));
1186
1187 let errors = lexer.errors;
1188 (tokens, errors)
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193 use super::*;
1194
1195 fn collect_tokens(source: &str) -> Vec<Token> {
1196 let mut lexer = Lexer::new(source);
1197 let mut tokens = Vec::new();
1198 loop {
1199 let token = lexer.next_token();
1200 if token.kind == TokenKind::Eof {
1201 tokens.push(token);
1202 break;
1203 }
1204 tokens.push(token);
1205 }
1206 tokens
1207 }
1208
1209 fn collect_kinds(source: &str) -> Vec<TokenKind> {
1210 collect_tokens(source).into_iter().map(|t| t.kind).collect()
1211 }
1212
1213 fn php_kinds(code: &str) -> Vec<TokenKind> {
1215 let full = format!("<?php {}", code);
1216 collect_kinds(&full)
1217 .into_iter()
1218 .filter(|k| *k != TokenKind::OpenTag && *k != TokenKind::Eof)
1219 .collect()
1220 }
1221
1222 fn php_tokens(code: &str) -> Vec<(TokenKind, String)> {
1224 let full = format!("<?php {}", code);
1225 let mut lexer = Lexer::new(&full);
1226 let mut result = Vec::new();
1227 loop {
1228 let token = lexer.next_token();
1229 if token.kind == TokenKind::Eof {
1230 break;
1231 }
1232 if token.kind == TokenKind::OpenTag {
1233 continue;
1234 }
1235 let text = lexer.token_text(&token).to_string();
1236 result.push((token.kind, text));
1237 }
1238 result
1239 }
1240
1241 mod open_tag_and_html {
1242 use super::*;
1243
1244 #[test]
1245 fn test_php_only() {
1246 let tokens = collect_kinds("<?php $x = 42;");
1247 assert_eq!(
1248 tokens,
1249 vec![
1250 TokenKind::OpenTag,
1251 TokenKind::Variable,
1252 TokenKind::Equals,
1253 TokenKind::IntLiteral,
1254 TokenKind::Semicolon,
1255 TokenKind::Eof,
1256 ]
1257 );
1258 }
1259
1260 #[test]
1261 fn test_inline_html_before_php() {
1262 let tokens = collect_kinds("<html><?php echo 1;");
1263 assert_eq!(
1264 tokens,
1265 vec![
1266 TokenKind::InlineHtml,
1267 TokenKind::OpenTag,
1268 TokenKind::Echo,
1269 TokenKind::IntLiteral,
1270 TokenKind::Semicolon,
1271 TokenKind::Eof,
1272 ]
1273 );
1274 }
1275
1276 #[test]
1277 fn test_inline_html_after_close_tag() {
1278 let tokens = collect_kinds("<?php echo 1; ?><html>");
1279 assert_eq!(
1280 tokens,
1281 vec![
1282 TokenKind::OpenTag,
1283 TokenKind::Echo,
1284 TokenKind::IntLiteral,
1285 TokenKind::Semicolon,
1286 TokenKind::CloseTag,
1287 TokenKind::InlineHtml,
1288 TokenKind::Eof,
1289 ]
1290 );
1291 }
1292
1293 #[test]
1294 fn test_empty_source() {
1295 let tokens = collect_kinds("");
1296 assert_eq!(tokens, vec![TokenKind::Eof]);
1297 }
1298
1299 #[test]
1300 fn test_only_inline_html() {
1301 let tokens = collect_kinds("<html><body>Hello</body></html>");
1302 assert_eq!(tokens, vec![TokenKind::InlineHtml, TokenKind::Eof]);
1303 }
1304
1305 #[test]
1306 fn test_open_tag_uppercase() {
1307 for tag in &["<?PHP", "<?Php", "<?PhP", "<?pHP", "<?phP"] {
1309 let src = format!("{} $x = 1;", tag);
1310 let tokens = collect_kinds(&src);
1311 assert_eq!(
1312 tokens[0],
1313 TokenKind::OpenTag,
1314 "expected OpenTag for opening tag '{tag}'"
1315 );
1316 }
1317 }
1318
1319 #[test]
1320 fn test_open_tag_uppercase_mid_file() {
1321 let tokens = collect_kinds("<html><?PHP echo 1;");
1323 assert_eq!(
1324 tokens,
1325 vec![
1326 TokenKind::InlineHtml,
1327 TokenKind::OpenTag,
1328 TokenKind::Echo,
1329 TokenKind::IntLiteral,
1330 TokenKind::Semicolon,
1331 TokenKind::Eof,
1332 ]
1333 );
1334 }
1335 }
1336
1337 mod keywords {
1338 use super::*;
1339
1340 #[test]
1341 fn test_keyword_resolution() {
1342 let tokens = collect_kinds("<?php if else while for foreach function return");
1343 assert_eq!(
1344 tokens,
1345 vec![
1346 TokenKind::OpenTag,
1347 TokenKind::If,
1348 TokenKind::Else,
1349 TokenKind::While,
1350 TokenKind::For,
1351 TokenKind::Foreach,
1352 TokenKind::Function,
1353 TokenKind::Return,
1354 TokenKind::Eof,
1355 ]
1356 );
1357 }
1358
1359 #[test]
1360 fn test_keyword_case_insensitive() {
1361 let tokens = collect_kinds("<?php IF ELSE TRUE FALSE NULL");
1362 assert_eq!(
1363 tokens,
1364 vec![
1365 TokenKind::OpenTag,
1366 TokenKind::If,
1367 TokenKind::Else,
1368 TokenKind::True,
1369 TokenKind::False,
1370 TokenKind::Null,
1371 TokenKind::Eof,
1372 ]
1373 );
1374 }
1375
1376 #[test]
1377 fn test_logical_keywords() {
1378 let tokens = collect_kinds("<?php and or xor");
1379 assert_eq!(
1380 tokens,
1381 vec![
1382 TokenKind::OpenTag,
1383 TokenKind::And,
1384 TokenKind::Or,
1385 TokenKind::Xor,
1386 TokenKind::Eof,
1387 ]
1388 );
1389 }
1390 }
1391
1392 mod lexer_api {
1393 use super::*;
1394
1395 #[test]
1396 fn test_peek_doesnt_consume() {
1397 let mut lexer = Lexer::new("<?php 42");
1398 let peeked = *lexer.peek();
1399 assert_eq!(peeked.kind, TokenKind::OpenTag);
1400 let next = lexer.next_token();
1401 assert_eq!(next.kind, TokenKind::OpenTag);
1402 let next = lexer.next_token();
1403 assert_eq!(next.kind, TokenKind::IntLiteral);
1404 }
1405
1406 #[test]
1407 fn test_token_text() {
1408 let source = "<?php $myVar = 'hello';";
1409 let mut lexer = Lexer::new(source);
1410 lexer.next_token(); let var_tok = lexer.next_token();
1412 assert_eq!(lexer.token_text(&var_tok), "$myVar");
1413 lexer.next_token(); let str_tok = lexer.next_token();
1415 assert_eq!(lexer.token_text(&str_tok), "'hello'");
1416 }
1417
1418 #[test]
1419 fn test_spans_are_correct() {
1420 let source = "<?php $x";
1421 let tokens = collect_tokens(source);
1422 assert_eq!(tokens[0].span, Span::new(0, 5)); assert_eq!(tokens[1].span, Span::new(6, 8)); }
1425 }
1426
1427 mod operators {
1428 use super::*;
1429
1430 #[test]
1431 fn test_basic_operators() {
1432 assert_eq!(
1433 php_kinds("+ - * / % ** ."),
1434 vec![
1435 TokenKind::Plus,
1436 TokenKind::Minus,
1437 TokenKind::Star,
1438 TokenKind::Slash,
1439 TokenKind::Percent,
1440 TokenKind::StarStar,
1441 TokenKind::Dot,
1442 ]
1443 );
1444 }
1445
1446 #[test]
1447 fn test_operators() {
1448 let tokens = collect_kinds("<?php === !== <=> ?? ++ -- **");
1449 assert_eq!(
1450 tokens,
1451 vec![
1452 TokenKind::OpenTag,
1453 TokenKind::EqualsEqualsEquals,
1454 TokenKind::BangEqualsEquals,
1455 TokenKind::Spaceship,
1456 TokenKind::QuestionQuestion,
1457 TokenKind::PlusPlus,
1458 TokenKind::MinusMinus,
1459 TokenKind::StarStar,
1460 TokenKind::Eof,
1461 ]
1462 );
1463 }
1464
1465 #[test]
1466 fn test_assignment_operators() {
1467 let tokens = collect_kinds("<?php += -= *= /= %= **= .= ??=");
1468 assert_eq!(
1469 tokens,
1470 vec![
1471 TokenKind::OpenTag,
1472 TokenKind::PlusEquals,
1473 TokenKind::MinusEquals,
1474 TokenKind::StarEquals,
1475 TokenKind::SlashEquals,
1476 TokenKind::PercentEquals,
1477 TokenKind::StarStarEquals,
1478 TokenKind::DotEquals,
1479 TokenKind::CoalesceEquals,
1480 TokenKind::Eof,
1481 ]
1482 );
1483 }
1484
1485 #[test]
1486 fn test_hash_bracket_not_comment() {
1487 let kinds = php_kinds("#[Attribute]");
1488 assert_eq!(
1489 kinds,
1490 vec![
1491 TokenKind::HashBracket,
1492 TokenKind::Identifier,
1493 TokenKind::RightBracket,
1494 ]
1495 );
1496 }
1497
1498 #[test]
1499 fn test_nullsafe_arrow() {
1500 let kinds = php_kinds("$x?->y");
1501 assert_eq!(
1502 kinds,
1503 vec![
1504 TokenKind::Variable,
1505 TokenKind::NullsafeArrow,
1506 TokenKind::Identifier,
1507 ]
1508 );
1509 }
1510
1511 #[test]
1512 fn test_pipe_arrow() {
1513 let kinds = php_kinds("$x |> foo(...)");
1514 assert_eq!(
1515 kinds,
1516 vec![
1517 TokenKind::Variable,
1518 TokenKind::PipeArrow,
1519 TokenKind::Identifier,
1520 TokenKind::LeftParen,
1521 TokenKind::Ellipsis,
1522 TokenKind::RightParen,
1523 ]
1524 );
1525 }
1526 }
1527
1528 mod numeric_literals {
1529 use super::*;
1530
1531 #[test]
1532 fn test_integers() {
1533 let toks = php_tokens("42 0xFF 0b1010 077");
1534 assert_eq!(toks[0], (TokenKind::IntLiteral, "42".to_string()));
1535 assert_eq!(toks[1], (TokenKind::HexIntLiteral, "0xFF".to_string()));
1536 assert_eq!(toks[2], (TokenKind::BinIntLiteral, "0b1010".to_string()));
1537 assert_eq!(toks[3], (TokenKind::OctIntLiteral, "077".to_string()));
1538 }
1539
1540 #[test]
1541 fn test_floats() {
1542 let toks = php_tokens("3.14 1e10 2.5e-3");
1543 assert_eq!(toks[0], (TokenKind::FloatLiteralSimple, "3.14".to_string()));
1544 assert_eq!(toks[1], (TokenKind::FloatLiteral, "1e10".to_string()));
1545 assert_eq!(toks[2], (TokenKind::FloatLiteral, "2.5e-3".to_string()));
1546 }
1547
1548 #[test]
1549 fn test_float_leading_dot() {
1550 let toks = php_tokens(".5 .123e4");
1551 assert_eq!(
1552 toks[0],
1553 (TokenKind::FloatLiteralLeadingDot, ".5".to_string())
1554 );
1555 assert_eq!(
1556 toks[1],
1557 (TokenKind::FloatLiteralLeadingDot, ".123e4".to_string())
1558 );
1559 }
1560
1561 #[test]
1562 fn test_trailing_dot_float() {
1563 let toks = php_tokens("0. 1. 42.");
1565 assert_eq!(toks[0], (TokenKind::FloatLiteralSimple, "0.".to_string()));
1566 assert_eq!(toks[1], (TokenKind::FloatLiteralSimple, "1.".to_string()));
1567 assert_eq!(toks[2], (TokenKind::FloatLiteralSimple, "42.".to_string()));
1568 }
1569
1570 #[test]
1571 fn test_trailing_dot_not_confused_with_dotdot() {
1572 let toks = php_tokens("1..");
1575 assert_eq!(toks[0], (TokenKind::IntLiteral, "1".to_string()));
1576 assert_eq!(toks[1], (TokenKind::Dot, ".".to_string()));
1577 assert_eq!(toks[2], (TokenKind::Dot, ".".to_string()));
1578 }
1579
1580 #[test]
1581 fn test_new_octal_syntax() {
1582 let toks = php_tokens("0o77 0O755");
1583 assert_eq!(toks[0], (TokenKind::OctIntLiteralNew, "0o77".to_string()));
1584 assert_eq!(toks[1], (TokenKind::OctIntLiteralNew, "0O755".to_string()));
1585 }
1586
1587 #[test]
1588 fn test_legacy_octal_with_invalid_digits() {
1589 let toks = php_tokens("0778 019 09");
1592 assert_eq!(toks[0], (TokenKind::OctIntLiteral, "0778".to_string()));
1593 assert_eq!(toks[1], (TokenKind::OctIntLiteral, "019".to_string()));
1594 assert_eq!(toks[2], (TokenKind::OctIntLiteral, "09".to_string()));
1595 }
1596
1597 #[test]
1598 fn test_numeric_underscores() {
1599 let toks = php_tokens("1_000 0xFF_FF 0b1010_0101");
1600 assert_eq!(toks[0], (TokenKind::IntLiteral, "1_000".to_string()));
1601 assert_eq!(toks[1], (TokenKind::HexIntLiteral, "0xFF_FF".to_string()));
1602 assert_eq!(
1603 toks[2],
1604 (TokenKind::BinIntLiteral, "0b1010_0101".to_string())
1605 );
1606 }
1607 }
1608
1609 mod strings_and_variables {
1610 use super::*;
1611
1612 #[test]
1613 fn test_string_literals() {
1614 let tokens = collect_kinds(r#"<?php 'single' "double""#);
1615 assert_eq!(
1616 tokens,
1617 vec![
1618 TokenKind::OpenTag,
1619 TokenKind::SingleQuotedString,
1620 TokenKind::DoubleQuotedString,
1621 TokenKind::Eof,
1622 ]
1623 );
1624 }
1625
1626 #[test]
1627 fn test_strings() {
1628 let kinds = php_kinds(r#"'hello' "world" 'it\'s' "say \"hi\"""#);
1629 assert_eq!(
1630 kinds,
1631 vec![
1632 TokenKind::SingleQuotedString,
1633 TokenKind::DoubleQuotedString,
1634 TokenKind::SingleQuotedString,
1635 TokenKind::DoubleQuotedString,
1636 ]
1637 );
1638 }
1639
1640 #[test]
1641 fn test_binary_prefix_strings() {
1642 let kinds = php_kinds(r#"b'hello' B"world""#);
1643 assert_eq!(
1644 kinds,
1645 vec![TokenKind::SingleQuotedString, TokenKind::DoubleQuotedString,]
1646 );
1647 }
1648
1649 #[test]
1650 fn test_variables() {
1651 let toks = php_tokens("$x $myVar $_foo");
1652 assert_eq!(toks[0], (TokenKind::Variable, "$x".to_string()));
1653 assert_eq!(toks[1], (TokenKind::Variable, "$myVar".to_string()));
1654 assert_eq!(toks[2], (TokenKind::Variable, "$_foo".to_string()));
1655 }
1656
1657 #[test]
1658 fn test_comments_yielded() {
1659 let toks = php_tokens("42 // line comment\n43 /* block */ 44 # hash comment\n45");
1661 assert_eq!(toks[0], (TokenKind::IntLiteral, "42".to_string()));
1662 assert_eq!(
1663 toks[1],
1664 (TokenKind::LineComment, "// line comment".to_string())
1665 );
1666 assert_eq!(toks[2], (TokenKind::IntLiteral, "43".to_string()));
1667 assert_eq!(
1668 toks[3],
1669 (TokenKind::BlockComment, "/* block */".to_string())
1670 );
1671 assert_eq!(toks[4], (TokenKind::IntLiteral, "44".to_string()));
1672 assert_eq!(
1673 toks[5],
1674 (TokenKind::HashComment, "# hash comment".to_string())
1675 );
1676 assert_eq!(toks[6], (TokenKind::IntLiteral, "45".to_string()));
1677 }
1678 }
1679}