1use memchr::{memchr2, memmem};
2use php_ast::Span;
3
4use crate::token::{resolve_keyword, TokenKind};
5
6const fn make_whitespace_table() -> [bool; 256] {
14 let mut t = [false; 256];
15 t[b' ' as usize] = true;
16 t[b'\t' as usize] = true;
17 t[b'\r' as usize] = true;
18 t[b'\n' as usize] = true;
19 t[0x0C] = true; t
21}
22
23const fn make_ident_start_table() -> [bool; 256] {
24 let mut t = [false; 256];
25 let mut i = 0usize;
26 while i < 256 {
27 let b = i as u8;
28 t[i] = (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z') || b == b'_' || b >= 0x80;
29 i += 1;
30 }
31 t
32}
33
34const fn make_ident_continue_table() -> [bool; 256] {
35 let mut t = [false; 256];
36 let mut i = 0usize;
37 while i < 256 {
38 let b = i as u8;
39 t[i] = (b >= b'a' && b <= b'z')
40 || (b >= b'A' && b <= b'Z')
41 || (b >= b'0' && b <= b'9')
42 || b == b'_'
43 || b >= 0x80;
44 i += 1;
45 }
46 t
47}
48
49static IS_PHP_WHITESPACE: [bool; 256] = make_whitespace_table();
50static IS_IDENT_START: [bool; 256] = make_ident_start_table();
51static IS_IDENT_CONTINUE: [bool; 256] = make_ident_continue_table();
52
53#[derive(Debug, Clone, PartialEq)]
54pub struct LexerError {
55 pub message: String,
56 pub span: Span,
57}
58
59#[derive(Debug, Clone, Copy, PartialEq)]
60pub struct Token {
61 pub kind: TokenKind,
62 pub span: Span,
63}
64
65impl Token {
66 pub fn new(kind: TokenKind, span: Span) -> Self {
67 Self { kind, span }
68 }
69
70 pub fn eof(offset: u32) -> Self {
71 Self {
72 kind: TokenKind::Eof,
73 span: Span::new(offset, offset),
74 }
75 }
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79enum LexerMode {
80 InlineHtml,
81 Php,
82}
83
84pub struct Lexer<'src> {
85 source: &'src str,
86 mode: LexerMode,
87 pos: usize,
88 peeked: Option<Token>,
89 peeked2: Option<Token>,
90 pub errors: Vec<LexerError>,
91}
92
93#[inline(always)]
94fn is_ident_start(b: u8) -> bool {
95 IS_IDENT_START[b as usize]
96}
97
98#[inline(always)]
99fn is_ident_continue(b: u8) -> bool {
100 IS_IDENT_CONTINUE[b as usize]
101}
102
103impl<'src> Lexer<'src> {
104 pub fn new(source: &'src str) -> Self {
105 let pos = if source.starts_with("#!") {
107 source.find('\n').map(|p| p + 1).unwrap_or(source.len())
108 } else {
109 0
110 };
111
112 let remaining = &source[pos..];
114 let rem_bytes = remaining.as_bytes();
115 let mode = if (rem_bytes.len() >= 5
116 && rem_bytes[0] == b'<'
117 && rem_bytes[1] == b'?'
118 && rem_bytes[2..5].eq_ignore_ascii_case(b"php"))
119 || remaining.starts_with("<?=")
120 {
121 LexerMode::Php
122 } else {
123 LexerMode::InlineHtml
124 };
125
126 Self {
127 source,
128 mode,
129 pos,
130 peeked: None,
131 peeked2: None,
132 errors: Vec::new(),
133 }
134 }
135
136 pub fn new_at(source: &'src str, offset: usize) -> Self {
141 Self {
142 source,
143 mode: LexerMode::Php,
144 pos: offset,
145 peeked: None,
146 peeked2: None,
147 errors: Vec::new(),
148 }
149 }
150
151 pub fn source(&self) -> &'src str {
152 self.source
153 }
154
155 pub fn peek(&mut self) -> &Token {
156 if self.peeked.is_none() {
157 self.peeked = Some(self.read_next_token());
158 }
159 self.peeked.as_ref().expect("peeked is Some: set above")
160 }
161
162 pub fn peek2(&mut self) -> &Token {
164 if self.peeked.is_none() {
166 self.peeked = Some(self.read_next_token());
167 }
168 if self.peeked2.is_none() {
169 self.peeked2 = Some(self.read_next_token());
170 }
171 self.peeked2.as_ref().expect("peeked2 is Some: set above")
172 }
173
174 pub fn next_token(&mut self) -> Token {
175 if let Some(token) = self.peeked.take() {
176 self.peeked = self.peeked2.take();
177 return token;
178 }
179 self.read_next_token()
180 }
181
182 pub fn token_text(&self, token: &Token) -> &'src str {
184 &self.source[token.span.start as usize..token.span.end as usize]
185 }
186
187 fn read_next_token(&mut self) -> Token {
188 if self.pos >= self.source.len() {
189 return Token::eof(self.source.len() as u32);
190 }
191
192 match self.mode {
193 LexerMode::InlineHtml => self.lex_inline_html(),
194 LexerMode::Php => self.lex_php(),
195 }
196 }
197
198 fn lex_inline_html(&mut self) -> Token {
199 let start = self.pos;
200 let bytes = self.source.as_bytes();
201
202 let mut search = self.pos;
206 let tag_pos = loop {
207 match memchr::memchr(b'<', &bytes[search..]) {
208 None => break None,
209 Some(offset) => {
210 let p = search + offset;
211 let rest = &bytes[p..];
212 if (rest.len() >= 5
213 && rest[0] == b'<'
214 && rest[1] == b'?'
215 && rest[2..5].eq_ignore_ascii_case(b"php"))
216 || rest.starts_with(b"<?=")
217 {
218 break Some(p - self.pos);
219 }
220 search = p + 1;
221 }
222 }
223 };
224
225 if let Some(tag_pos) = tag_pos {
226 if tag_pos == 0 {
227 self.mode = LexerMode::Php;
229 return self.lex_php();
230 }
231 let end = self.pos + tag_pos;
233 self.pos = end;
234 self.mode = LexerMode::Php;
235 Token::new(TokenKind::InlineHtml, Span::new(start as u32, end as u32))
236 } else {
237 let end = self.source.len();
239 self.pos = end;
240 Token::new(TokenKind::InlineHtml, Span::new(start as u32, end as u32))
241 }
242 }
243
244 fn lex_php(&mut self) -> Token {
245 let remaining = &self.source[self.pos..];
246
247 if let Some(token) = self.try_lex_heredoc(remaining) {
249 return token;
250 }
251
252 self.skip_whitespace();
254
255 if self.pos >= self.source.len() {
256 return Token::eof(self.source.len() as u32);
257 }
258
259 let bytes = self.source.as_bytes();
260 let start = self.pos;
261
262 if bytes[self.pos] == b'/' && self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'/' {
265 self.pos += 2;
266 Self::skip_line_comment_body(bytes, &mut self.pos);
267 return self.tok(TokenKind::LineComment, start);
268 }
269
270 if bytes[self.pos] == b'/' && self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'*' {
272 self.pos += 2;
273 let kind = if self.pos < bytes.len()
276 && bytes[self.pos] == b'*'
277 && !(self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'/')
278 {
279 TokenKind::DocComment
280 } else {
281 TokenKind::BlockComment
282 };
283 match memmem::find(&bytes[self.pos..], b"*/") {
284 Some(end) => self.pos += end + 2,
285 None => {
286 let span = Span::new(start as u32, self.source.len() as u32);
287 self.errors.push(LexerError {
288 message: "unterminated block comment".to_string(),
289 span,
290 });
291 self.pos = bytes.len();
292 }
293 }
294 return self.tok(kind, start);
295 }
296
297 if bytes[self.pos] == b'#' && !(self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'[') {
300 self.pos += 1;
301 Self::skip_line_comment_body(bytes, &mut self.pos);
302 return self.tok(TokenKind::HashComment, start);
303 }
304
305 self.scan_token()
306 }
307
308 fn skip_whitespace(&mut self) {
310 let bytes = self.source.as_bytes();
311 while self.pos < bytes.len() && IS_PHP_WHITESPACE[bytes[self.pos] as usize] {
312 self.pos += 1;
313 }
314 }
315
316 fn scan_token(&mut self) -> Token {
318 let start = self.pos;
319 let bytes = self.source.as_bytes();
320 let b = bytes[start];
321
322 match b {
323 b'+' => {
325 if self.check_at(1, b'+') {
326 self.pos = start + 2;
327 self.tok(TokenKind::PlusPlus, start)
328 } else if self.check_at(1, b'=') {
329 self.pos = start + 2;
330 self.tok(TokenKind::PlusEquals, start)
331 } else {
332 self.pos = start + 1;
333 self.tok(TokenKind::Plus, start)
334 }
335 }
336 b'-' => {
337 if self.check_at(1, b'-') {
338 self.pos = start + 2;
339 self.tok(TokenKind::MinusMinus, start)
340 } else if self.check_at(1, b'=') {
341 self.pos = start + 2;
342 self.tok(TokenKind::MinusEquals, start)
343 } else if self.check_at(1, b'>') {
344 self.pos = start + 2;
345 self.tok(TokenKind::Arrow, start)
346 } else {
347 self.pos = start + 1;
348 self.tok(TokenKind::Minus, start)
349 }
350 }
351 b'*' => {
352 if self.check_at(1, b'*') {
353 if self.check_at(2, b'=') {
354 self.pos = start + 3;
355 self.tok(TokenKind::StarStarEquals, start)
356 } else {
357 self.pos = start + 2;
358 self.tok(TokenKind::StarStar, start)
359 }
360 } else if self.check_at(1, b'=') {
361 self.pos = start + 2;
362 self.tok(TokenKind::StarEquals, start)
363 } else {
364 self.pos = start + 1;
365 self.tok(TokenKind::Star, start)
366 }
367 }
368 b'/' => {
369 if self.check_at(1, b'=') {
371 self.pos = start + 2;
372 self.tok(TokenKind::SlashEquals, start)
373 } else {
374 self.pos = start + 1;
375 self.tok(TokenKind::Slash, start)
376 }
377 }
378 b'%' => {
379 if self.check_at(1, b'=') {
380 self.pos = start + 2;
381 self.tok(TokenKind::PercentEquals, start)
382 } else {
383 self.pos = start + 1;
384 self.tok(TokenKind::Percent, start)
385 }
386 }
387 b'.' => {
388 if start + 1 < bytes.len() && bytes[start + 1].is_ascii_digit() {
390 self.pos = start + 1;
391 self.scan_digits(u8::is_ascii_digit);
392 if self.pos < bytes.len() && matches!(bytes[self.pos], b'e' | b'E') {
394 self.try_scan_exponent();
395 }
396 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
398 self.consume_invalid_numeric_rest();
399 return self.invalid_numeric(start);
400 }
401 return self.tok(TokenKind::FloatLiteralLeadingDot, start);
402 }
403 if self.check_at(1, b'.') && self.check_at(2, b'.') {
404 self.pos = start + 3;
405 self.tok(TokenKind::Ellipsis, start)
406 } else if self.check_at(1, b'=') {
407 self.pos = start + 2;
408 self.tok(TokenKind::DotEquals, start)
409 } else {
410 self.pos = start + 1;
411 self.tok(TokenKind::Dot, start)
412 }
413 }
414 b'=' => {
415 if self.check_at(1, b'=') {
416 if self.check_at(2, b'=') {
417 self.pos = start + 3;
418 self.tok(TokenKind::EqualsEqualsEquals, start)
419 } else {
420 self.pos = start + 2;
421 self.tok(TokenKind::EqualsEquals, start)
422 }
423 } else if self.check_at(1, b'>') {
424 self.pos = start + 2;
425 self.tok(TokenKind::FatArrow, start)
426 } else {
427 self.pos = start + 1;
428 self.tok(TokenKind::Equals, start)
429 }
430 }
431 b'!' => {
432 if self.check_at(1, b'=') {
433 if self.check_at(2, b'=') {
434 self.pos = start + 3;
435 self.tok(TokenKind::BangEqualsEquals, start)
436 } else {
437 self.pos = start + 2;
438 self.tok(TokenKind::BangEquals, start)
439 }
440 } else {
441 self.pos = start + 1;
442 self.tok(TokenKind::Bang, start)
443 }
444 }
445 b'<' => self.scan_less_than(start),
446 b'>' => {
447 if self.check_at(1, b'>') {
448 if self.check_at(2, b'=') {
449 self.pos = start + 3;
450 self.tok(TokenKind::ShiftRightEquals, start)
451 } else {
452 self.pos = start + 2;
453 self.tok(TokenKind::ShiftRight, start)
454 }
455 } else if self.check_at(1, b'=') {
456 self.pos = start + 2;
457 self.tok(TokenKind::GreaterThanEquals, start)
458 } else {
459 self.pos = start + 1;
460 self.tok(TokenKind::GreaterThan, start)
461 }
462 }
463 b'&' => {
464 if self.check_at(1, b'&') {
465 self.pos = start + 2;
466 self.tok(TokenKind::AmpersandAmpersand, start)
467 } else if self.check_at(1, b'=') {
468 self.pos = start + 2;
469 self.tok(TokenKind::AmpersandEquals, start)
470 } else {
471 self.pos = start + 1;
472 self.tok(TokenKind::Ampersand, start)
473 }
474 }
475 b'|' => {
476 if self.check_at(1, b'|') {
477 self.pos = start + 2;
478 self.tok(TokenKind::PipePipe, start)
479 } else if self.check_at(1, b'=') {
480 self.pos = start + 2;
481 self.tok(TokenKind::PipeEquals, start)
482 } else if self.check_at(1, b'>') {
483 self.pos = start + 2;
484 self.tok(TokenKind::PipeArrow, start)
485 } else {
486 self.pos = start + 1;
487 self.tok(TokenKind::Pipe, start)
488 }
489 }
490 b'^' => {
491 if self.check_at(1, b'=') {
492 self.pos = start + 2;
493 self.tok(TokenKind::CaretEquals, start)
494 } else {
495 self.pos = start + 1;
496 self.tok(TokenKind::Caret, start)
497 }
498 }
499 b'~' => {
500 self.pos = start + 1;
501 self.tok(TokenKind::Tilde, start)
502 }
503 b'?' => {
504 if self.check_at(1, b'>') {
505 self.pos = start + 2;
506 self.mode = LexerMode::InlineHtml;
507 self.tok(TokenKind::CloseTag, start)
508 } else if self.check_at(1, b'?') {
509 if self.check_at(2, b'=') {
510 self.pos = start + 3;
511 self.tok(TokenKind::CoalesceEquals, start)
512 } else {
513 self.pos = start + 2;
514 self.tok(TokenKind::QuestionQuestion, start)
515 }
516 } else if self.check_at(1, b'-') && self.check_at(2, b'>') {
517 self.pos = start + 3;
518 self.tok(TokenKind::NullsafeArrow, start)
519 } else {
520 self.pos = start + 1;
521 self.tok(TokenKind::Question, start)
522 }
523 }
524 b':' => {
525 if self.check_at(1, b':') {
526 self.pos = start + 2;
527 self.tok(TokenKind::DoubleColon, start)
528 } else {
529 self.pos = start + 1;
530 self.tok(TokenKind::Colon, start)
531 }
532 }
533 b'@' => {
534 self.pos = start + 1;
535 self.tok(TokenKind::At, start)
536 }
537 b'\\' => {
538 self.pos = start + 1;
539 self.tok(TokenKind::Backslash, start)
540 }
541 b'#' => {
542 if self.check_at(1, b'[') {
545 self.pos = start + 2;
546 self.tok(TokenKind::HashBracket, start)
547 } else {
548 self.pos = start + 1;
550 self.read_next_token()
551 }
552 }
553
554 b'(' => {
556 self.pos = start + 1;
557 self.tok(TokenKind::LeftParen, start)
558 }
559 b')' => {
560 self.pos = start + 1;
561 self.tok(TokenKind::RightParen, start)
562 }
563 b'[' => {
564 self.pos = start + 1;
565 self.tok(TokenKind::LeftBracket, start)
566 }
567 b']' => {
568 self.pos = start + 1;
569 self.tok(TokenKind::RightBracket, start)
570 }
571 b'{' => {
572 self.pos = start + 1;
573 self.tok(TokenKind::LeftBrace, start)
574 }
575 b'}' => {
576 self.pos = start + 1;
577 self.tok(TokenKind::RightBrace, start)
578 }
579 b';' => {
580 self.pos = start + 1;
581 self.tok(TokenKind::Semicolon, start)
582 }
583 b',' => {
584 self.pos = start + 1;
585 self.tok(TokenKind::Comma, start)
586 }
587
588 b'\'' => self.scan_single_quoted_string(),
590 b'"' => self.scan_double_quoted_string(),
591 b'`' => self.scan_backtick_string(),
592
593 b'$' => {
595 if start + 1 < bytes.len() && is_ident_start(bytes[start + 1]) {
596 self.pos = start + 2;
597 while self.pos < bytes.len() && is_ident_continue(bytes[self.pos]) {
598 self.pos += 1;
599 }
600 self.tok(TokenKind::Variable, start)
601 } else {
602 self.pos = start + 1;
603 self.tok(TokenKind::Dollar, start)
604 }
605 }
606
607 b'0'..=b'9' => self.scan_number(),
609
610 _ if is_ident_start(b) => {
612 if b == b'b' || b == b'B' {
614 if self.check_at(1, b'\'') {
615 return self.scan_single_quoted_string();
616 }
617 if self.check_at(1, b'"') {
618 return self.scan_double_quoted_string();
619 }
620 if self.check_at(1, b'<') && self.check_at(2, b'<') && self.check_at(3, b'<') {
621 let remaining = &self.source[self.pos..];
622 if let Some(token) = self.try_lex_heredoc(remaining) {
623 return token;
624 }
625 }
626 }
627 self.scan_identifier()
628 }
629
630 _ => {
632 self.pos = start + 1;
633 self.read_next_token()
634 }
635 }
636 }
637
638 fn scan_less_than(&mut self, start: usize) -> Token {
640 if self.check_at(1, b'<') {
641 if self.check_at(2, b'<') {
642 let remaining = &self.source[self.pos..];
644 if let Some(token) = self.try_lex_heredoc(remaining) {
645 return token;
646 }
647 }
649 if self.check_at(2, b'=') {
650 self.pos = start + 3;
651 return self.tok(TokenKind::ShiftLeftEquals, start);
652 }
653 self.pos = start + 2;
654 return self.tok(TokenKind::ShiftLeft, start);
655 }
656 if self.check_at(1, b'=') {
657 if self.check_at(2, b'>') {
658 self.pos = start + 3;
659 return self.tok(TokenKind::Spaceship, start);
660 }
661 self.pos = start + 2;
662 return self.tok(TokenKind::LessThanEquals, start);
663 }
664 if self.check_at(1, b'?') {
665 let bytes = self.source.as_bytes();
666 if bytes.len() >= self.pos + 5
667 && bytes[self.pos + 2..self.pos + 5].eq_ignore_ascii_case(b"php")
668 {
669 self.pos = start + 5;
670 return self.tok(TokenKind::OpenTag, start);
671 }
672 if self.source[self.pos..].starts_with("<?=") {
673 self.pos = start + 3;
674 return self.tok(TokenKind::OpenTag, start);
675 }
676 }
677 self.pos = start + 1;
678 self.tok(TokenKind::LessThan, start)
679 }
680
681 fn scan_single_quoted_string(&mut self) -> Token {
684 let start = self.pos;
685 let bytes = self.source.as_bytes();
686 let mut p = self.pos;
687 if bytes[p] == b'b' || bytes[p] == b'B' {
689 p += 1;
690 }
691 p += 1; loop {
693 match memchr2(b'\\', b'\'', &bytes[p..]) {
694 None => {
695 self.pos = start + 1;
697 return self.read_next_token();
698 }
699 Some(offset) => {
700 p += offset;
701 match bytes[p] {
702 b'\\' => {
703 p += 1;
704 if p < bytes.len() {
705 p += 1;
706 }
707 }
708 _ => {
709 p += 1;
711 break;
712 }
713 }
714 }
715 }
716 }
717 self.pos = p;
718 self.tok(TokenKind::SingleQuotedString, start)
719 }
720
721 fn scan_double_quoted_string(&mut self) -> Token {
722 let start = self.pos;
723 let bytes = self.source.as_bytes();
724 let mut p = self.pos;
725 if bytes[p] == b'b' || bytes[p] == b'B' {
727 p += 1;
728 }
729 p += 1; loop {
731 match memchr2(b'\\', b'"', &bytes[p..]) {
732 None => {
733 self.pos = start + 1;
735 return self.read_next_token();
736 }
737 Some(offset) => {
738 p += offset;
739 match bytes[p] {
740 b'\\' => {
741 p += 1;
742 if p < bytes.len() {
743 p += 1;
744 }
745 }
746 _ => {
747 p += 1;
749 break;
750 }
751 }
752 }
753 }
754 }
755 self.pos = p;
756 self.tok(TokenKind::DoubleQuotedString, start)
757 }
758
759 fn scan_backtick_string(&mut self) -> Token {
760 let start = self.pos;
761 let bytes = self.source.as_bytes();
762 let mut p = self.pos;
763 p += 1; loop {
765 match memchr2(b'\\', b'`', &bytes[p..]) {
766 None => {
767 self.pos = start + 1;
769 return self.read_next_token();
770 }
771 Some(offset) => {
772 p += offset;
773 match bytes[p] {
774 b'\\' => {
775 p += 1;
776 if p < bytes.len() {
777 p += 1;
778 }
779 }
780 _ => {
781 p += 1;
783 break;
784 }
785 }
786 }
787 }
788 }
789 self.pos = p;
790 self.tok(TokenKind::BacktickString, start)
791 }
792
793 fn scan_number(&mut self) -> Token {
796 let start = self.pos;
797 let bytes = self.source.as_bytes();
798
799 if bytes[start] == b'0' && start + 1 < bytes.len() {
801 match bytes[start + 1] {
802 b'x' | b'X' => {
803 self.pos = start + 2;
804 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
805 self.consume_invalid_numeric_rest();
806 return self.invalid_numeric(start);
807 }
808 if self.scan_digits(u8::is_ascii_hexdigit) {
809 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
810 self.consume_invalid_numeric_rest();
811 return self.invalid_numeric(start);
812 }
813 return self.tok(TokenKind::HexIntLiteral, start);
814 }
815 self.pos = start;
817 }
818 b'b' | b'B' => {
819 self.pos = start + 2;
820 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
821 self.consume_invalid_numeric_rest();
822 return self.invalid_numeric(start);
823 }
824 if self.scan_digits(|b| b == &b'0' || b == &b'1') {
825 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
826 self.consume_invalid_numeric_rest();
827 return self.invalid_numeric(start);
828 }
829 return self.tok(TokenKind::BinIntLiteral, start);
830 }
831 self.pos = start;
833 }
834 b'o' | b'O' => {
835 self.pos = start + 2;
836 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
837 self.consume_invalid_numeric_rest();
838 return self.invalid_numeric(start);
839 }
840 if self.scan_digits(|b| (b'0'..=b'7').contains(b)) {
841 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
842 self.consume_invalid_numeric_rest();
843 return self.invalid_numeric(start);
844 }
845 return self.tok(TokenKind::OctIntLiteralNew, start);
846 }
847 self.pos = start;
849 }
850 _ => {}
851 }
852 }
853
854 self.pos = start;
856 self.scan_digits(u8::is_ascii_digit);
857 let integer_end = self.pos;
858 let mut kind = TokenKind::IntLiteral;
859
860 if bytes[start] == b'0' && integer_end > start + 1 {
863 kind = TokenKind::OctIntLiteral;
864 }
865
866 if self.pos < bytes.len() && bytes[self.pos] == b'.' {
868 if self.pos + 1 < bytes.len() && bytes[self.pos + 1].is_ascii_digit() {
869 self.pos += 1; self.scan_digits(u8::is_ascii_digit);
872 kind = TokenKind::FloatLiteralSimple;
873 } else if self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'_' {
874 self.consume_invalid_numeric_rest();
876 return self.invalid_numeric(start);
877 } else if self.pos + 1 >= bytes.len() || bytes[self.pos + 1] != b'.' {
878 self.pos += 1; kind = TokenKind::FloatLiteralSimple;
881 }
882 }
883
884 if self.pos < bytes.len() && matches!(bytes[self.pos], b'e' | b'E') {
886 if self.try_scan_exponent() {
887 kind = TokenKind::FloatLiteral;
888 } else if self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'_' {
889 self.consume_invalid_numeric_rest();
891 return self.invalid_numeric(start);
892 }
893 }
894
895 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
897 self.consume_invalid_numeric_rest();
898 return self.invalid_numeric(start);
899 }
900
901 self.tok(kind, start)
902 }
903
904 fn scan_digits(&mut self, is_valid: fn(&u8) -> bool) -> bool {
907 let bytes = self.source.as_bytes();
908 if self.pos >= bytes.len() || !is_valid(&bytes[self.pos]) {
909 return false;
910 }
911 self.pos += 1;
912 loop {
913 if self.pos >= bytes.len() {
914 break;
915 }
916 if is_valid(&bytes[self.pos]) {
917 self.pos += 1;
918 } else if bytes[self.pos] == b'_'
919 && self.pos + 1 < bytes.len()
920 && is_valid(&bytes[self.pos + 1])
921 {
922 self.pos += 2;
923 } else {
924 break;
925 }
926 }
927 true
928 }
929
930 fn try_scan_exponent(&mut self) -> bool {
933 let bytes = self.source.as_bytes();
934 let saved = self.pos;
935 self.pos += 1; if self.pos < bytes.len() && matches!(bytes[self.pos], b'+' | b'-') {
939 self.pos += 1;
940 }
941
942 if self.scan_digits(u8::is_ascii_digit) {
944 true
945 } else {
946 self.pos = saved;
947 false
948 }
949 }
950
951 fn scan_identifier(&mut self) -> Token {
954 let start = self.pos;
955 let bytes = self.source.as_bytes();
956 self.pos += 1; while self.pos < bytes.len() && is_ident_continue(bytes[self.pos]) {
958 self.pos += 1;
959 }
960 let text = &self.source[start..self.pos];
961 let kind = resolve_keyword(text).unwrap_or(TokenKind::Identifier);
962 self.tok(kind, start)
963 }
964
965 #[inline]
971 fn skip_line_comment_body(bytes: &[u8], pos: &mut usize) {
972 loop {
973 match memchr2(b'\n', b'?', &bytes[*pos..]) {
974 None => {
975 *pos = bytes.len();
976 return;
977 }
978 Some(offset) => {
979 let p = *pos + offset;
980 if bytes[p] == b'\n' {
981 *pos = p; return;
983 }
984 if p + 1 < bytes.len() && bytes[p + 1] == b'>' {
986 *pos = p; return;
988 }
989 *pos = p + 1;
991 }
992 }
993 }
994 }
995
996 #[inline]
997 fn check_at(&self, offset: usize, expected: u8) -> bool {
998 self.source.as_bytes().get(self.pos + offset) == Some(&expected)
999 }
1000
1001 #[inline]
1002 fn tok(&self, kind: TokenKind, start: usize) -> Token {
1003 Token::new(kind, Span::new(start as u32, self.pos as u32))
1004 }
1005
1006 fn invalid_numeric(&mut self, start: usize) -> Token {
1007 let span = Span::new(start as u32, self.pos as u32);
1008 self.errors.push(LexerError {
1009 message: "Invalid numeric literal".to_string(),
1010 span,
1011 });
1012 Token::new(TokenKind::InvalidNumericLiteral, span)
1013 }
1014
1015 fn consume_invalid_numeric_rest(&mut self) {
1017 let bytes = self.source.as_bytes();
1018 while self.pos < bytes.len() {
1019 let b = bytes[self.pos];
1020 if b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'+' || b == b'-' {
1021 if (b == b'+' || b == b'-') && self.pos > 0 {
1023 let prev = bytes[self.pos - 1];
1024 if prev != b'e' && prev != b'E' {
1025 break;
1026 }
1027 }
1028 self.pos += 1;
1029 } else {
1030 break;
1031 }
1032 }
1033 }
1034
1035 fn try_lex_heredoc(&mut self, remaining: &str) -> Option<Token> {
1039 let trimmed = remaining.trim_start_matches(|c: char| {
1041 c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\x0C'
1042 });
1043 let ws_len = remaining.len() - trimmed.len();
1044
1045 let (after_prefix, prefix_len) = if (trimmed.starts_with("b<<<")
1047 || trimmed.starts_with("B<<<"))
1048 && !trimmed[1..].starts_with("<<<>")
1049 {
1050 (&trimmed[1..], 1)
1051 } else {
1052 (trimmed, 0)
1053 };
1054
1055 if !after_prefix.starts_with("<<<") {
1056 return None;
1057 }
1058
1059 let base_pos = self.pos; let start = base_pos + ws_len; let after_arrows = &after_prefix[3..];
1062 let after_arrows_trimmed = after_arrows.trim_start_matches([' ', '\t']);
1063 let arrows_offset =
1064 ws_len + prefix_len + 3 + (after_arrows.len() - after_arrows_trimmed.len());
1065
1066 let (label, is_nowdoc, label_line_end);
1068 if let Some(after_quote) = after_arrows_trimmed.strip_prefix('\'') {
1069 let closing = after_quote.find('\'')?;
1071 label = after_quote[..closing].to_string();
1072 is_nowdoc = true;
1073 let after_label = &after_arrows_trimmed[2 + closing..];
1074 let nl = after_label.find('\n').unwrap_or(after_label.len());
1076 label_line_end = arrows_offset + 2 + closing + nl;
1077 if label_line_end < remaining.len() {
1078 }
1080 } else {
1081 let s = if let Some(after_dquote) = after_arrows_trimmed.strip_prefix('"') {
1083 let closing = after_dquote.find('"')?;
1084 label = after_dquote[..closing].to_string();
1085 &after_dquote[1 + closing..]
1086 } else {
1087 let end = after_arrows_trimmed
1089 .find(|c: char| !c.is_ascii_alphanumeric() && c != '_')
1090 .unwrap_or(after_arrows_trimmed.len());
1091 if end == 0 {
1092 return None;
1093 }
1094 label = after_arrows_trimmed[..end].to_string();
1095 &after_arrows_trimmed[end..]
1096 };
1097 is_nowdoc = false;
1098 let nl = s.find('\n').unwrap_or(s.len());
1099 label_line_end = arrows_offset + (after_arrows_trimmed.len() - s.len()) + nl;
1100 };
1101
1102 if label.is_empty() {
1103 return None;
1104 }
1105
1106 let body_start_in_remaining = if label_line_end < remaining.len() {
1108 label_line_end + 1 } else {
1110 return None; };
1112
1113 let body = &remaining[body_start_in_remaining..];
1114
1115 let mut search_pos = 0;
1117 let end_marker_pos;
1118 loop {
1119 if search_pos >= body.len() {
1120 return None; }
1122 let line_start = search_pos;
1123 let line_end = body[line_start..]
1124 .find('\n')
1125 .map(|p| line_start + p)
1126 .unwrap_or(body.len());
1127 let line = &body[line_start..line_end];
1128 let trimmed_line = line.trim_start_matches([' ', '\t']);
1129
1130 if trimmed_line == label
1134 || trimmed_line.starts_with(&label)
1135 && trimmed_line[label.len()..]
1136 .trim_start_matches([';', ',', ')'])
1137 .trim()
1138 .is_empty()
1139 {
1140 end_marker_pos = line_start;
1141 break;
1142 }
1143
1144 search_pos = if line_end < body.len() {
1145 line_end + 1
1146 } else {
1147 body.len()
1148 };
1149 }
1150
1151 let end_marker_line = &body[end_marker_pos..];
1153 let trimmed = end_marker_line.trim_start_matches([' ', '\t']);
1154 let indent_len = end_marker_line.len() - trimmed.len();
1155 let token_end_in_remaining =
1156 body_start_in_remaining + end_marker_pos + indent_len + label.len();
1157 self.pos = base_pos + token_end_in_remaining;
1158
1159 let span = Span::new(start as u32, self.pos as u32);
1160
1161 if is_nowdoc {
1162 Some(Token::new(TokenKind::Nowdoc, span))
1163 } else {
1164 Some(Token::new(TokenKind::Heredoc, span))
1165 }
1166 }
1167}
1168
1169pub fn lex_all(source: &str) -> (Vec<Token>, Vec<LexerError>) {
1177 let mut lexer = Lexer::new(source);
1178 let mut tokens = Vec::new();
1179
1180 loop {
1181 let tok = lexer.next_token();
1182 let is_eof = tok.kind == TokenKind::Eof;
1183 tokens.push(tok);
1184 if is_eof {
1185 break;
1186 }
1187 }
1188
1189 let eof_span = tokens.last().unwrap().span;
1192 tokens.push(Token::new(TokenKind::Eof, eof_span));
1193
1194 let errors = lexer.errors;
1195 (tokens, errors)
1196}
1197
1198#[cfg(test)]
1199mod tests {
1200 use super::*;
1201
1202 fn collect_tokens(source: &str) -> Vec<Token> {
1203 let mut lexer = Lexer::new(source);
1204 let mut tokens = Vec::new();
1205 loop {
1206 let token = lexer.next_token();
1207 if token.kind == TokenKind::Eof {
1208 tokens.push(token);
1209 break;
1210 }
1211 tokens.push(token);
1212 }
1213 tokens
1214 }
1215
1216 fn collect_kinds(source: &str) -> Vec<TokenKind> {
1217 collect_tokens(source).into_iter().map(|t| t.kind).collect()
1218 }
1219
1220 fn php_kinds(code: &str) -> Vec<TokenKind> {
1222 let full = format!("<?php {}", code);
1223 collect_kinds(&full)
1224 .into_iter()
1225 .filter(|k| *k != TokenKind::OpenTag && *k != TokenKind::Eof)
1226 .collect()
1227 }
1228
1229 fn php_tokens(code: &str) -> Vec<(TokenKind, String)> {
1231 let full = format!("<?php {}", code);
1232 let mut lexer = Lexer::new(&full);
1233 let mut result = Vec::new();
1234 loop {
1235 let token = lexer.next_token();
1236 if token.kind == TokenKind::Eof {
1237 break;
1238 }
1239 if token.kind == TokenKind::OpenTag {
1240 continue;
1241 }
1242 let text = lexer.token_text(&token).to_string();
1243 result.push((token.kind, text));
1244 }
1245 result
1246 }
1247
1248 mod open_tag_and_html {
1249 use super::*;
1250
1251 #[test]
1252 fn test_php_only() {
1253 let tokens = collect_kinds("<?php $x = 42;");
1254 assert_eq!(
1255 tokens,
1256 vec![
1257 TokenKind::OpenTag,
1258 TokenKind::Variable,
1259 TokenKind::Equals,
1260 TokenKind::IntLiteral,
1261 TokenKind::Semicolon,
1262 TokenKind::Eof,
1263 ]
1264 );
1265 }
1266
1267 #[test]
1268 fn test_inline_html_before_php() {
1269 let tokens = collect_kinds("<html><?php echo 1;");
1270 assert_eq!(
1271 tokens,
1272 vec![
1273 TokenKind::InlineHtml,
1274 TokenKind::OpenTag,
1275 TokenKind::Echo,
1276 TokenKind::IntLiteral,
1277 TokenKind::Semicolon,
1278 TokenKind::Eof,
1279 ]
1280 );
1281 }
1282
1283 #[test]
1284 fn test_inline_html_after_close_tag() {
1285 let tokens = collect_kinds("<?php echo 1; ?><html>");
1286 assert_eq!(
1287 tokens,
1288 vec![
1289 TokenKind::OpenTag,
1290 TokenKind::Echo,
1291 TokenKind::IntLiteral,
1292 TokenKind::Semicolon,
1293 TokenKind::CloseTag,
1294 TokenKind::InlineHtml,
1295 TokenKind::Eof,
1296 ]
1297 );
1298 }
1299
1300 #[test]
1301 fn test_empty_source() {
1302 let tokens = collect_kinds("");
1303 assert_eq!(tokens, vec![TokenKind::Eof]);
1304 }
1305
1306 #[test]
1307 fn test_only_inline_html() {
1308 let tokens = collect_kinds("<html><body>Hello</body></html>");
1309 assert_eq!(tokens, vec![TokenKind::InlineHtml, TokenKind::Eof]);
1310 }
1311
1312 #[test]
1313 fn test_open_tag_uppercase() {
1314 for tag in &["<?PHP", "<?Php", "<?PhP", "<?pHP", "<?phP"] {
1316 let src = format!("{} $x = 1;", tag);
1317 let tokens = collect_kinds(&src);
1318 assert_eq!(
1319 tokens[0],
1320 TokenKind::OpenTag,
1321 "expected OpenTag for opening tag '{tag}'"
1322 );
1323 }
1324 }
1325
1326 #[test]
1327 fn test_open_tag_uppercase_mid_file() {
1328 let tokens = collect_kinds("<html><?PHP echo 1;");
1330 assert_eq!(
1331 tokens,
1332 vec![
1333 TokenKind::InlineHtml,
1334 TokenKind::OpenTag,
1335 TokenKind::Echo,
1336 TokenKind::IntLiteral,
1337 TokenKind::Semicolon,
1338 TokenKind::Eof,
1339 ]
1340 );
1341 }
1342 }
1343
1344 mod keywords {
1345 use super::*;
1346
1347 #[test]
1348 fn test_keyword_resolution() {
1349 let tokens = collect_kinds("<?php if else while for foreach function return");
1350 assert_eq!(
1351 tokens,
1352 vec![
1353 TokenKind::OpenTag,
1354 TokenKind::If,
1355 TokenKind::Else,
1356 TokenKind::While,
1357 TokenKind::For,
1358 TokenKind::Foreach,
1359 TokenKind::Function,
1360 TokenKind::Return,
1361 TokenKind::Eof,
1362 ]
1363 );
1364 }
1365
1366 #[test]
1367 fn test_keyword_case_insensitive() {
1368 let tokens = collect_kinds("<?php IF ELSE TRUE FALSE NULL");
1369 assert_eq!(
1370 tokens,
1371 vec![
1372 TokenKind::OpenTag,
1373 TokenKind::If,
1374 TokenKind::Else,
1375 TokenKind::True,
1376 TokenKind::False,
1377 TokenKind::Null,
1378 TokenKind::Eof,
1379 ]
1380 );
1381 }
1382
1383 #[test]
1384 fn test_logical_keywords() {
1385 let tokens = collect_kinds("<?php and or xor");
1386 assert_eq!(
1387 tokens,
1388 vec![
1389 TokenKind::OpenTag,
1390 TokenKind::And,
1391 TokenKind::Or,
1392 TokenKind::Xor,
1393 TokenKind::Eof,
1394 ]
1395 );
1396 }
1397 }
1398
1399 mod lexer_api {
1400 use super::*;
1401
1402 #[test]
1403 fn test_peek_doesnt_consume() {
1404 let mut lexer = Lexer::new("<?php 42");
1405 let peeked = *lexer.peek();
1406 assert_eq!(peeked.kind, TokenKind::OpenTag);
1407 let next = lexer.next_token();
1408 assert_eq!(next.kind, TokenKind::OpenTag);
1409 let next = lexer.next_token();
1410 assert_eq!(next.kind, TokenKind::IntLiteral);
1411 }
1412
1413 #[test]
1414 fn test_token_text() {
1415 let source = "<?php $myVar = 'hello';";
1416 let mut lexer = Lexer::new(source);
1417 lexer.next_token(); let var_tok = lexer.next_token();
1419 assert_eq!(lexer.token_text(&var_tok), "$myVar");
1420 lexer.next_token(); let str_tok = lexer.next_token();
1422 assert_eq!(lexer.token_text(&str_tok), "'hello'");
1423 }
1424
1425 #[test]
1426 fn test_spans_are_correct() {
1427 let source = "<?php $x";
1428 let tokens = collect_tokens(source);
1429 assert_eq!(tokens[0].span, Span::new(0, 5)); assert_eq!(tokens[1].span, Span::new(6, 8)); }
1432 }
1433
1434 mod operators {
1435 use super::*;
1436
1437 #[test]
1438 fn test_basic_operators() {
1439 assert_eq!(
1440 php_kinds("+ - * / % ** ."),
1441 vec![
1442 TokenKind::Plus,
1443 TokenKind::Minus,
1444 TokenKind::Star,
1445 TokenKind::Slash,
1446 TokenKind::Percent,
1447 TokenKind::StarStar,
1448 TokenKind::Dot,
1449 ]
1450 );
1451 }
1452
1453 #[test]
1454 fn test_operators() {
1455 let tokens = collect_kinds("<?php === !== <=> ?? ++ -- **");
1456 assert_eq!(
1457 tokens,
1458 vec![
1459 TokenKind::OpenTag,
1460 TokenKind::EqualsEqualsEquals,
1461 TokenKind::BangEqualsEquals,
1462 TokenKind::Spaceship,
1463 TokenKind::QuestionQuestion,
1464 TokenKind::PlusPlus,
1465 TokenKind::MinusMinus,
1466 TokenKind::StarStar,
1467 TokenKind::Eof,
1468 ]
1469 );
1470 }
1471
1472 #[test]
1473 fn test_assignment_operators() {
1474 let tokens = collect_kinds("<?php += -= *= /= %= **= .= ??=");
1475 assert_eq!(
1476 tokens,
1477 vec![
1478 TokenKind::OpenTag,
1479 TokenKind::PlusEquals,
1480 TokenKind::MinusEquals,
1481 TokenKind::StarEquals,
1482 TokenKind::SlashEquals,
1483 TokenKind::PercentEquals,
1484 TokenKind::StarStarEquals,
1485 TokenKind::DotEquals,
1486 TokenKind::CoalesceEquals,
1487 TokenKind::Eof,
1488 ]
1489 );
1490 }
1491
1492 #[test]
1493 fn test_hash_bracket_not_comment() {
1494 let kinds = php_kinds("#[Attribute]");
1495 assert_eq!(
1496 kinds,
1497 vec![
1498 TokenKind::HashBracket,
1499 TokenKind::Identifier,
1500 TokenKind::RightBracket,
1501 ]
1502 );
1503 }
1504
1505 #[test]
1506 fn test_nullsafe_arrow() {
1507 let kinds = php_kinds("$x?->y");
1508 assert_eq!(
1509 kinds,
1510 vec![
1511 TokenKind::Variable,
1512 TokenKind::NullsafeArrow,
1513 TokenKind::Identifier,
1514 ]
1515 );
1516 }
1517
1518 #[test]
1519 fn test_pipe_arrow() {
1520 let kinds = php_kinds("$x |> foo(...)");
1521 assert_eq!(
1522 kinds,
1523 vec![
1524 TokenKind::Variable,
1525 TokenKind::PipeArrow,
1526 TokenKind::Identifier,
1527 TokenKind::LeftParen,
1528 TokenKind::Ellipsis,
1529 TokenKind::RightParen,
1530 ]
1531 );
1532 }
1533 }
1534
1535 mod numeric_literals {
1536 use super::*;
1537
1538 #[test]
1539 fn test_integers() {
1540 let toks = php_tokens("42 0xFF 0b1010 077");
1541 assert_eq!(toks[0], (TokenKind::IntLiteral, "42".to_string()));
1542 assert_eq!(toks[1], (TokenKind::HexIntLiteral, "0xFF".to_string()));
1543 assert_eq!(toks[2], (TokenKind::BinIntLiteral, "0b1010".to_string()));
1544 assert_eq!(toks[3], (TokenKind::OctIntLiteral, "077".to_string()));
1545 }
1546
1547 #[test]
1548 fn test_floats() {
1549 let toks = php_tokens("3.14 1e10 2.5e-3");
1550 assert_eq!(toks[0], (TokenKind::FloatLiteralSimple, "3.14".to_string()));
1551 assert_eq!(toks[1], (TokenKind::FloatLiteral, "1e10".to_string()));
1552 assert_eq!(toks[2], (TokenKind::FloatLiteral, "2.5e-3".to_string()));
1553 }
1554
1555 #[test]
1556 fn test_float_leading_dot() {
1557 let toks = php_tokens(".5 .123e4");
1558 assert_eq!(
1559 toks[0],
1560 (TokenKind::FloatLiteralLeadingDot, ".5".to_string())
1561 );
1562 assert_eq!(
1563 toks[1],
1564 (TokenKind::FloatLiteralLeadingDot, ".123e4".to_string())
1565 );
1566 }
1567
1568 #[test]
1569 fn test_trailing_dot_float() {
1570 let toks = php_tokens("0. 1. 42.");
1572 assert_eq!(toks[0], (TokenKind::FloatLiteralSimple, "0.".to_string()));
1573 assert_eq!(toks[1], (TokenKind::FloatLiteralSimple, "1.".to_string()));
1574 assert_eq!(toks[2], (TokenKind::FloatLiteralSimple, "42.".to_string()));
1575 }
1576
1577 #[test]
1578 fn test_trailing_dot_not_confused_with_dotdot() {
1579 let toks = php_tokens("1..");
1582 assert_eq!(toks[0], (TokenKind::IntLiteral, "1".to_string()));
1583 assert_eq!(toks[1], (TokenKind::Dot, ".".to_string()));
1584 assert_eq!(toks[2], (TokenKind::Dot, ".".to_string()));
1585 }
1586
1587 #[test]
1588 fn test_new_octal_syntax() {
1589 let toks = php_tokens("0o77 0O755");
1590 assert_eq!(toks[0], (TokenKind::OctIntLiteralNew, "0o77".to_string()));
1591 assert_eq!(toks[1], (TokenKind::OctIntLiteralNew, "0O755".to_string()));
1592 }
1593
1594 #[test]
1595 fn test_legacy_octal_with_invalid_digits() {
1596 let toks = php_tokens("0778 019 09");
1599 assert_eq!(toks[0], (TokenKind::OctIntLiteral, "0778".to_string()));
1600 assert_eq!(toks[1], (TokenKind::OctIntLiteral, "019".to_string()));
1601 assert_eq!(toks[2], (TokenKind::OctIntLiteral, "09".to_string()));
1602 }
1603
1604 #[test]
1605 fn test_numeric_underscores() {
1606 let toks = php_tokens("1_000 0xFF_FF 0b1010_0101");
1607 assert_eq!(toks[0], (TokenKind::IntLiteral, "1_000".to_string()));
1608 assert_eq!(toks[1], (TokenKind::HexIntLiteral, "0xFF_FF".to_string()));
1609 assert_eq!(
1610 toks[2],
1611 (TokenKind::BinIntLiteral, "0b1010_0101".to_string())
1612 );
1613 }
1614 }
1615
1616 mod strings_and_variables {
1617 use super::*;
1618
1619 #[test]
1620 fn test_string_literals() {
1621 let tokens = collect_kinds(r#"<?php 'single' "double""#);
1622 assert_eq!(
1623 tokens,
1624 vec![
1625 TokenKind::OpenTag,
1626 TokenKind::SingleQuotedString,
1627 TokenKind::DoubleQuotedString,
1628 TokenKind::Eof,
1629 ]
1630 );
1631 }
1632
1633 #[test]
1634 fn test_strings() {
1635 let kinds = php_kinds(r#"'hello' "world" 'it\'s' "say \"hi\"""#);
1636 assert_eq!(
1637 kinds,
1638 vec![
1639 TokenKind::SingleQuotedString,
1640 TokenKind::DoubleQuotedString,
1641 TokenKind::SingleQuotedString,
1642 TokenKind::DoubleQuotedString,
1643 ]
1644 );
1645 }
1646
1647 #[test]
1648 fn test_binary_prefix_strings() {
1649 let kinds = php_kinds(r#"b'hello' B"world""#);
1650 assert_eq!(
1651 kinds,
1652 vec![TokenKind::SingleQuotedString, TokenKind::DoubleQuotedString,]
1653 );
1654 }
1655
1656 #[test]
1657 fn test_variables() {
1658 let toks = php_tokens("$x $myVar $_foo");
1659 assert_eq!(toks[0], (TokenKind::Variable, "$x".to_string()));
1660 assert_eq!(toks[1], (TokenKind::Variable, "$myVar".to_string()));
1661 assert_eq!(toks[2], (TokenKind::Variable, "$_foo".to_string()));
1662 }
1663
1664 #[test]
1665 fn test_comments_yielded() {
1666 let toks = php_tokens("42 // line comment\n43 /* block */ 44 # hash comment\n45");
1668 assert_eq!(toks[0], (TokenKind::IntLiteral, "42".to_string()));
1669 assert_eq!(
1670 toks[1],
1671 (TokenKind::LineComment, "// line comment".to_string())
1672 );
1673 assert_eq!(toks[2], (TokenKind::IntLiteral, "43".to_string()));
1674 assert_eq!(
1675 toks[3],
1676 (TokenKind::BlockComment, "/* block */".to_string())
1677 );
1678 assert_eq!(toks[4], (TokenKind::IntLiteral, "44".to_string()));
1679 assert_eq!(
1680 toks[5],
1681 (TokenKind::HashComment, "# hash comment".to_string())
1682 );
1683 assert_eq!(toks[6], (TokenKind::IntLiteral, "45".to_string()));
1684 }
1685 }
1686}