1use memchr::{memchr2, memmem};
2use php_ast::Span;
3
4use crate::token::{resolve_keyword, TokenKind};
5
6const fn make_whitespace_table() -> [bool; 256] {
14 let mut t = [false; 256];
15 t[b' ' as usize] = true;
16 t[b'\t' as usize] = true;
17 t[b'\r' as usize] = true;
18 t[b'\n' as usize] = true;
19 t[0x0C] = true; t
21}
22
23const fn make_ident_start_table() -> [bool; 256] {
24 let mut t = [false; 256];
25 let mut i = 0usize;
26 while i < 256 {
27 let b = i as u8;
28 t[i] = (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z') || b == b'_' || b >= 0x80;
29 i += 1;
30 }
31 t
32}
33
34const fn make_ident_continue_table() -> [bool; 256] {
35 let mut t = [false; 256];
36 let mut i = 0usize;
37 while i < 256 {
38 let b = i as u8;
39 t[i] = (b >= b'a' && b <= b'z')
40 || (b >= b'A' && b <= b'Z')
41 || (b >= b'0' && b <= b'9')
42 || b == b'_'
43 || b >= 0x80;
44 i += 1;
45 }
46 t
47}
48
49static IS_PHP_WHITESPACE: [bool; 256] = make_whitespace_table();
50static IS_IDENT_START: [bool; 256] = make_ident_start_table();
51static IS_IDENT_CONTINUE: [bool; 256] = make_ident_continue_table();
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum LexerErrorKind {
55 UnterminatedString,
56 Other,
57}
58
59#[derive(Debug, Clone, PartialEq)]
60pub struct LexerError {
61 pub kind: LexerErrorKind,
62 pub message: String,
63 pub span: Span,
64}
65
66#[derive(Debug, Clone, Copy, PartialEq)]
67pub struct Token {
68 pub kind: TokenKind,
69 pub span: Span,
70}
71
72impl Token {
73 pub fn new(kind: TokenKind, span: Span) -> Self {
74 Self { kind, span }
75 }
76
77 pub fn eof(offset: u32) -> Self {
78 Self {
79 kind: TokenKind::Eof,
80 span: Span::new(offset, offset),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86enum LexerMode {
87 InlineHtml,
88 Php,
89}
90
91pub struct Lexer<'src> {
92 source: &'src str,
93 mode: LexerMode,
94 pos: usize,
95 peeked: Option<Token>,
96 peeked2: Option<Token>,
97 pub errors: Vec<LexerError>,
98}
99
100#[inline(always)]
101fn is_ident_start(b: u8) -> bool {
102 IS_IDENT_START[b as usize]
103}
104
105#[inline(always)]
106fn is_ident_continue(b: u8) -> bool {
107 IS_IDENT_CONTINUE[b as usize]
108}
109
110impl<'src> Lexer<'src> {
111 pub fn new(source: &'src str) -> Self {
112 let pos = if source.starts_with("#!") {
114 source.find('\n').map(|p| p + 1).unwrap_or(source.len())
115 } else {
116 0
117 };
118
119 let remaining = &source[pos..];
121 let rem_bytes = remaining.as_bytes();
122 let mode = if (rem_bytes.len() >= 5
123 && rem_bytes[0] == b'<'
124 && rem_bytes[1] == b'?'
125 && rem_bytes[2..5].eq_ignore_ascii_case(b"php"))
126 || remaining.starts_with("<?=")
127 {
128 LexerMode::Php
129 } else {
130 LexerMode::InlineHtml
131 };
132
133 Self {
134 source,
135 mode,
136 pos,
137 peeked: None,
138 peeked2: None,
139 errors: Vec::new(),
140 }
141 }
142
143 pub fn new_at(source: &'src str, offset: usize) -> Self {
148 Self {
149 source,
150 mode: LexerMode::Php,
151 pos: offset,
152 peeked: None,
153 peeked2: None,
154 errors: Vec::new(),
155 }
156 }
157
158 pub fn source(&self) -> &'src str {
159 self.source
160 }
161
162 pub fn peek(&mut self) -> &Token {
163 if self.peeked.is_none() {
164 self.peeked = Some(self.read_next_token());
165 }
166 self.peeked.as_ref().expect("peeked is Some: set above")
167 }
168
169 pub fn peek2(&mut self) -> &Token {
171 if self.peeked.is_none() {
173 self.peeked = Some(self.read_next_token());
174 }
175 if self.peeked2.is_none() {
176 self.peeked2 = Some(self.read_next_token());
177 }
178 self.peeked2.as_ref().expect("peeked2 is Some: set above")
179 }
180
181 pub fn next_token(&mut self) -> Token {
182 if let Some(token) = self.peeked.take() {
183 self.peeked = self.peeked2.take();
184 return token;
185 }
186 self.read_next_token()
187 }
188
189 pub fn token_text(&self, token: &Token) -> &'src str {
191 &self.source[token.span.start as usize..token.span.end as usize]
192 }
193
194 fn read_next_token(&mut self) -> Token {
195 if self.pos >= self.source.len() {
196 return Token::eof(self.source.len() as u32);
197 }
198
199 match self.mode {
200 LexerMode::InlineHtml => self.lex_inline_html(),
201 LexerMode::Php => self.lex_php(),
202 }
203 }
204
205 fn lex_inline_html(&mut self) -> Token {
206 let start = self.pos;
207 let bytes = self.source.as_bytes();
208
209 let mut search = self.pos;
213 let tag_pos = loop {
214 match memchr::memchr(b'<', &bytes[search..]) {
215 None => break None,
216 Some(offset) => {
217 let p = search + offset;
218 let rest = &bytes[p..];
219 if (rest.len() >= 5
220 && rest[0] == b'<'
221 && rest[1] == b'?'
222 && rest[2..5].eq_ignore_ascii_case(b"php"))
223 || rest.starts_with(b"<?=")
224 {
225 break Some(p - self.pos);
226 }
227 search = p + 1;
228 }
229 }
230 };
231
232 if let Some(tag_pos) = tag_pos {
233 if tag_pos == 0 {
234 self.mode = LexerMode::Php;
236 return self.lex_php();
237 }
238 let end = self.pos + tag_pos;
240 self.pos = end;
241 self.mode = LexerMode::Php;
242 Token::new(TokenKind::InlineHtml, Span::new(start as u32, end as u32))
243 } else {
244 let end = self.source.len();
246 self.pos = end;
247 Token::new(TokenKind::InlineHtml, Span::new(start as u32, end as u32))
248 }
249 }
250
251 fn lex_php(&mut self) -> Token {
252 let remaining = &self.source[self.pos..];
253
254 if let Some(token) = self.try_lex_heredoc(remaining) {
256 return token;
257 }
258
259 self.skip_whitespace();
261
262 if self.pos >= self.source.len() {
263 return Token::eof(self.source.len() as u32);
264 }
265
266 let bytes = self.source.as_bytes();
267 let start = self.pos;
268
269 if bytes[self.pos] == b'/' && self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'/' {
272 self.pos += 2;
273 Self::skip_line_comment_body(bytes, &mut self.pos);
274 return self.tok(TokenKind::LineComment, start);
275 }
276
277 if bytes[self.pos] == b'/' && self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'*' {
279 self.pos += 2;
280 let kind = if self.pos < bytes.len()
283 && bytes[self.pos] == b'*'
284 && !(self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'/')
285 {
286 TokenKind::DocComment
287 } else {
288 TokenKind::BlockComment
289 };
290 match memmem::find(&bytes[self.pos..], b"*/") {
291 Some(end) => self.pos += end + 2,
292 None => {
293 let span = Span::new(start as u32, self.source.len() as u32);
294 self.errors.push(LexerError {
295 kind: LexerErrorKind::Other,
296 message: "unterminated block comment".to_string(),
297 span,
298 });
299 self.pos = bytes.len();
300 }
301 }
302 return self.tok(kind, start);
303 }
304
305 if bytes[self.pos] == b'#' && !(self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'[') {
308 self.pos += 1;
309 Self::skip_line_comment_body(bytes, &mut self.pos);
310 return self.tok(TokenKind::HashComment, start);
311 }
312
313 self.scan_token()
314 }
315
316 fn skip_whitespace(&mut self) {
318 let bytes = self.source.as_bytes();
319 while self.pos < bytes.len() && IS_PHP_WHITESPACE[bytes[self.pos] as usize] {
320 self.pos += 1;
321 }
322 }
323
324 fn scan_token(&mut self) -> Token {
326 let start = self.pos;
327 let bytes = self.source.as_bytes();
328 let b = bytes[start];
329
330 match b {
331 b'+' => {
333 if self.check_at(1, b'+') {
334 self.pos = start + 2;
335 self.tok(TokenKind::PlusPlus, start)
336 } else if self.check_at(1, b'=') {
337 self.pos = start + 2;
338 self.tok(TokenKind::PlusEquals, start)
339 } else {
340 self.pos = start + 1;
341 self.tok(TokenKind::Plus, start)
342 }
343 }
344 b'-' => {
345 if self.check_at(1, b'-') {
346 self.pos = start + 2;
347 self.tok(TokenKind::MinusMinus, start)
348 } else if self.check_at(1, b'=') {
349 self.pos = start + 2;
350 self.tok(TokenKind::MinusEquals, start)
351 } else if self.check_at(1, b'>') {
352 self.pos = start + 2;
353 self.tok(TokenKind::Arrow, start)
354 } else {
355 self.pos = start + 1;
356 self.tok(TokenKind::Minus, start)
357 }
358 }
359 b'*' => {
360 if self.check_at(1, b'*') {
361 if self.check_at(2, b'=') {
362 self.pos = start + 3;
363 self.tok(TokenKind::StarStarEquals, start)
364 } else {
365 self.pos = start + 2;
366 self.tok(TokenKind::StarStar, start)
367 }
368 } else if self.check_at(1, b'=') {
369 self.pos = start + 2;
370 self.tok(TokenKind::StarEquals, start)
371 } else {
372 self.pos = start + 1;
373 self.tok(TokenKind::Star, start)
374 }
375 }
376 b'/' => {
377 if self.check_at(1, b'=') {
379 self.pos = start + 2;
380 self.tok(TokenKind::SlashEquals, start)
381 } else {
382 self.pos = start + 1;
383 self.tok(TokenKind::Slash, start)
384 }
385 }
386 b'%' => {
387 if self.check_at(1, b'=') {
388 self.pos = start + 2;
389 self.tok(TokenKind::PercentEquals, start)
390 } else {
391 self.pos = start + 1;
392 self.tok(TokenKind::Percent, start)
393 }
394 }
395 b'.' => {
396 if start + 1 < bytes.len() && bytes[start + 1].is_ascii_digit() {
398 self.pos = start + 1;
399 self.scan_digits(u8::is_ascii_digit);
400 if self.pos < bytes.len() && matches!(bytes[self.pos], b'e' | b'E') {
402 self.try_scan_exponent();
403 }
404 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
406 self.consume_invalid_numeric_rest();
407 return self.invalid_numeric(start);
408 }
409 return self.tok(TokenKind::FloatLiteralLeadingDot, start);
410 }
411 if self.check_at(1, b'.') && self.check_at(2, b'.') {
412 self.pos = start + 3;
413 self.tok(TokenKind::Ellipsis, start)
414 } else if self.check_at(1, b'=') {
415 self.pos = start + 2;
416 self.tok(TokenKind::DotEquals, start)
417 } else {
418 self.pos = start + 1;
419 self.tok(TokenKind::Dot, start)
420 }
421 }
422 b'=' => {
423 if self.check_at(1, b'=') {
424 if self.check_at(2, b'=') {
425 self.pos = start + 3;
426 self.tok(TokenKind::EqualsEqualsEquals, start)
427 } else {
428 self.pos = start + 2;
429 self.tok(TokenKind::EqualsEquals, start)
430 }
431 } else if self.check_at(1, b'>') {
432 self.pos = start + 2;
433 self.tok(TokenKind::FatArrow, start)
434 } else {
435 self.pos = start + 1;
436 self.tok(TokenKind::Equals, start)
437 }
438 }
439 b'!' => {
440 if self.check_at(1, b'=') {
441 if self.check_at(2, b'=') {
442 self.pos = start + 3;
443 self.tok(TokenKind::BangEqualsEquals, start)
444 } else {
445 self.pos = start + 2;
446 self.tok(TokenKind::BangEquals, start)
447 }
448 } else {
449 self.pos = start + 1;
450 self.tok(TokenKind::Bang, start)
451 }
452 }
453 b'<' => self.scan_less_than(start),
454 b'>' => {
455 if self.check_at(1, b'>') {
456 if self.check_at(2, b'=') {
457 self.pos = start + 3;
458 self.tok(TokenKind::ShiftRightEquals, start)
459 } else {
460 self.pos = start + 2;
461 self.tok(TokenKind::ShiftRight, start)
462 }
463 } else if self.check_at(1, b'=') {
464 self.pos = start + 2;
465 self.tok(TokenKind::GreaterThanEquals, start)
466 } else {
467 self.pos = start + 1;
468 self.tok(TokenKind::GreaterThan, start)
469 }
470 }
471 b'&' => {
472 if self.check_at(1, b'&') {
473 self.pos = start + 2;
474 self.tok(TokenKind::AmpersandAmpersand, start)
475 } else if self.check_at(1, b'=') {
476 self.pos = start + 2;
477 self.tok(TokenKind::AmpersandEquals, start)
478 } else {
479 self.pos = start + 1;
480 self.tok(TokenKind::Ampersand, start)
481 }
482 }
483 b'|' => {
484 if self.check_at(1, b'|') {
485 self.pos = start + 2;
486 self.tok(TokenKind::PipePipe, start)
487 } else if self.check_at(1, b'=') {
488 self.pos = start + 2;
489 self.tok(TokenKind::PipeEquals, start)
490 } else if self.check_at(1, b'>') {
491 self.pos = start + 2;
492 self.tok(TokenKind::PipeArrow, start)
493 } else {
494 self.pos = start + 1;
495 self.tok(TokenKind::Pipe, start)
496 }
497 }
498 b'^' => {
499 if self.check_at(1, b'=') {
500 self.pos = start + 2;
501 self.tok(TokenKind::CaretEquals, start)
502 } else {
503 self.pos = start + 1;
504 self.tok(TokenKind::Caret, start)
505 }
506 }
507 b'~' => {
508 self.pos = start + 1;
509 self.tok(TokenKind::Tilde, start)
510 }
511 b'?' => {
512 if self.check_at(1, b'>') {
513 self.pos = start + 2;
514 self.mode = LexerMode::InlineHtml;
515 self.tok(TokenKind::CloseTag, start)
516 } else if self.check_at(1, b'?') {
517 if self.check_at(2, b'=') {
518 self.pos = start + 3;
519 self.tok(TokenKind::CoalesceEquals, start)
520 } else {
521 self.pos = start + 2;
522 self.tok(TokenKind::QuestionQuestion, start)
523 }
524 } else if self.check_at(1, b'-') && self.check_at(2, b'>') {
525 self.pos = start + 3;
526 self.tok(TokenKind::NullsafeArrow, start)
527 } else {
528 self.pos = start + 1;
529 self.tok(TokenKind::Question, start)
530 }
531 }
532 b':' => {
533 if self.check_at(1, b':') {
534 self.pos = start + 2;
535 self.tok(TokenKind::DoubleColon, start)
536 } else {
537 self.pos = start + 1;
538 self.tok(TokenKind::Colon, start)
539 }
540 }
541 b'@' => {
542 self.pos = start + 1;
543 self.tok(TokenKind::At, start)
544 }
545 b'\\' => {
546 self.pos = start + 1;
547 self.tok(TokenKind::Backslash, start)
548 }
549 b'#' => {
550 if self.check_at(1, b'[') {
553 self.pos = start + 2;
554 self.tok(TokenKind::HashBracket, start)
555 } else {
556 self.pos = start + 1;
558 self.read_next_token()
559 }
560 }
561
562 b'(' => {
564 self.pos = start + 1;
565 self.tok(TokenKind::LeftParen, start)
566 }
567 b')' => {
568 self.pos = start + 1;
569 self.tok(TokenKind::RightParen, start)
570 }
571 b'[' => {
572 self.pos = start + 1;
573 self.tok(TokenKind::LeftBracket, start)
574 }
575 b']' => {
576 self.pos = start + 1;
577 self.tok(TokenKind::RightBracket, start)
578 }
579 b'{' => {
580 self.pos = start + 1;
581 self.tok(TokenKind::LeftBrace, start)
582 }
583 b'}' => {
584 self.pos = start + 1;
585 self.tok(TokenKind::RightBrace, start)
586 }
587 b';' => {
588 self.pos = start + 1;
589 self.tok(TokenKind::Semicolon, start)
590 }
591 b',' => {
592 self.pos = start + 1;
593 self.tok(TokenKind::Comma, start)
594 }
595
596 b'\'' => self.scan_single_quoted_string(),
598 b'"' => self.scan_double_quoted_string(),
599 b'`' => self.scan_backtick_string(),
600
601 b'$' => {
603 if start + 1 < bytes.len() && is_ident_start(bytes[start + 1]) {
604 self.pos = start + 2;
605 while self.pos < bytes.len() && is_ident_continue(bytes[self.pos]) {
606 self.pos += 1;
607 }
608 self.tok(TokenKind::Variable, start)
609 } else {
610 self.pos = start + 1;
611 self.tok(TokenKind::Dollar, start)
612 }
613 }
614
615 b'0'..=b'9' => self.scan_number(),
617
618 _ if is_ident_start(b) => {
620 if b == b'b' || b == b'B' {
622 if self.check_at(1, b'\'') {
623 return self.scan_single_quoted_string();
624 }
625 if self.check_at(1, b'"') {
626 return self.scan_double_quoted_string();
627 }
628 if self.check_at(1, b'<') && self.check_at(2, b'<') && self.check_at(3, b'<') {
629 let remaining = &self.source[self.pos..];
630 if let Some(token) = self.try_lex_heredoc(remaining) {
631 return token;
632 }
633 }
634 }
635 self.scan_identifier()
636 }
637
638 _ => {
640 self.pos = start + 1;
641 self.read_next_token()
642 }
643 }
644 }
645
646 fn scan_less_than(&mut self, start: usize) -> Token {
648 if self.check_at(1, b'<') {
649 if self.check_at(2, b'<') {
650 let remaining = &self.source[self.pos..];
652 if let Some(token) = self.try_lex_heredoc(remaining) {
653 return token;
654 }
655 }
657 if self.check_at(2, b'=') {
658 self.pos = start + 3;
659 return self.tok(TokenKind::ShiftLeftEquals, start);
660 }
661 self.pos = start + 2;
662 return self.tok(TokenKind::ShiftLeft, start);
663 }
664 if self.check_at(1, b'=') {
665 if self.check_at(2, b'>') {
666 self.pos = start + 3;
667 return self.tok(TokenKind::Spaceship, start);
668 }
669 self.pos = start + 2;
670 return self.tok(TokenKind::LessThanEquals, start);
671 }
672 if self.check_at(1, b'?') {
673 let bytes = self.source.as_bytes();
674 if bytes.len() >= self.pos + 5
675 && bytes[self.pos + 2..self.pos + 5].eq_ignore_ascii_case(b"php")
676 {
677 self.pos = start + 5;
678 return self.tok(TokenKind::OpenTag, start);
679 }
680 if self.source[self.pos..].starts_with("<?=") {
681 self.pos = start + 3;
682 return self.tok(TokenKind::OpenTag, start);
683 }
684 }
685 self.pos = start + 1;
686 self.tok(TokenKind::LessThan, start)
687 }
688
689 fn scan_single_quoted_string(&mut self) -> Token {
692 let start = self.pos;
693 let bytes = self.source.as_bytes();
694 let mut p = self.pos;
695 if bytes[p] == b'b' || bytes[p] == b'B' {
697 p += 1;
698 }
699 p += 1; loop {
701 match memchr2(b'\\', b'\'', &bytes[p..]) {
702 None => {
703 self.errors.push(LexerError {
704 kind: LexerErrorKind::UnterminatedString,
705 message: "unterminated string literal".to_string(),
706 span: Span::new(start as u32, self.source.len() as u32),
707 });
708 self.pos = self.source.len();
709 return self.tok(TokenKind::SingleQuotedString, start);
710 }
711 Some(offset) => {
712 p += offset;
713 match bytes[p] {
714 b'\\' => {
715 p += 1;
716 if p < bytes.len() {
717 p += 1;
718 }
719 }
720 _ => {
721 p += 1;
723 break;
724 }
725 }
726 }
727 }
728 }
729 self.pos = p;
730 self.tok(TokenKind::SingleQuotedString, start)
731 }
732
733 fn scan_double_quoted_string(&mut self) -> Token {
734 let start = self.pos;
735 let bytes = self.source.as_bytes();
736 let mut p = self.pos;
737 if bytes[p] == b'b' || bytes[p] == b'B' {
739 p += 1;
740 }
741 p += 1; loop {
743 match memchr2(b'\\', b'"', &bytes[p..]) {
744 None => {
745 self.errors.push(LexerError {
746 kind: LexerErrorKind::UnterminatedString,
747 message: "unterminated string literal".to_string(),
748 span: Span::new(start as u32, self.source.len() as u32),
749 });
750 self.pos = self.source.len();
751 return self.tok(TokenKind::DoubleQuotedString, start);
752 }
753 Some(offset) => {
754 p += offset;
755 match bytes[p] {
756 b'\\' => {
757 p += 1;
758 if p < bytes.len() {
759 p += 1;
760 }
761 }
762 _ => {
763 p += 1;
765 break;
766 }
767 }
768 }
769 }
770 }
771 self.pos = p;
772 self.tok(TokenKind::DoubleQuotedString, start)
773 }
774
775 fn scan_backtick_string(&mut self) -> Token {
776 let start = self.pos;
777 let bytes = self.source.as_bytes();
778 let mut p = self.pos;
779 p += 1; loop {
781 match memchr2(b'\\', b'`', &bytes[p..]) {
782 None => {
783 self.errors.push(LexerError {
784 kind: LexerErrorKind::UnterminatedString,
785 message: "unterminated string literal".to_string(),
786 span: Span::new(start as u32, self.source.len() as u32),
787 });
788 self.pos = self.source.len();
789 return self.tok(TokenKind::BacktickString, start);
790 }
791 Some(offset) => {
792 p += offset;
793 match bytes[p] {
794 b'\\' => {
795 p += 1;
796 if p < bytes.len() {
797 p += 1;
798 }
799 }
800 _ => {
801 p += 1;
803 break;
804 }
805 }
806 }
807 }
808 }
809 self.pos = p;
810 self.tok(TokenKind::BacktickString, start)
811 }
812
813 fn scan_number(&mut self) -> Token {
816 let start = self.pos;
817 let bytes = self.source.as_bytes();
818
819 if bytes[start] == b'0' && start + 1 < bytes.len() {
821 match bytes[start + 1] {
822 b'x' | b'X' => {
823 self.pos = start + 2;
824 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
825 self.consume_invalid_numeric_rest();
826 return self.invalid_numeric(start);
827 }
828 if self.scan_digits(u8::is_ascii_hexdigit) {
829 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
830 self.consume_invalid_numeric_rest();
831 return self.invalid_numeric(start);
832 }
833 return self.tok(TokenKind::HexIntLiteral, start);
834 }
835 self.pos = start;
837 }
838 b'b' | b'B' => {
839 self.pos = start + 2;
840 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
841 self.consume_invalid_numeric_rest();
842 return self.invalid_numeric(start);
843 }
844 if self.scan_digits(|b| b == &b'0' || b == &b'1') {
845 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
846 self.consume_invalid_numeric_rest();
847 return self.invalid_numeric(start);
848 }
849 return self.tok(TokenKind::BinIntLiteral, start);
850 }
851 self.pos = start;
853 }
854 b'o' | b'O' => {
855 self.pos = start + 2;
856 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
857 self.consume_invalid_numeric_rest();
858 return self.invalid_numeric(start);
859 }
860 if self.scan_digits(|b| (b'0'..=b'7').contains(b)) {
861 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
862 self.consume_invalid_numeric_rest();
863 return self.invalid_numeric(start);
864 }
865 return self.tok(TokenKind::OctIntLiteralNew, start);
866 }
867 self.pos = start;
869 }
870 _ => {}
871 }
872 }
873
874 self.pos = start;
876 self.scan_digits(u8::is_ascii_digit);
877 let integer_end = self.pos;
878 let mut kind = TokenKind::IntLiteral;
879
880 if bytes[start] == b'0' && integer_end > start + 1 {
883 kind = TokenKind::OctIntLiteral;
884 }
885
886 if self.pos < bytes.len() && bytes[self.pos] == b'.' {
888 if self.pos + 1 < bytes.len() && bytes[self.pos + 1].is_ascii_digit() {
889 self.pos += 1; self.scan_digits(u8::is_ascii_digit);
892 kind = TokenKind::FloatLiteralSimple;
893 } else if self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'_' {
894 self.consume_invalid_numeric_rest();
896 return self.invalid_numeric(start);
897 } else if self.pos + 1 >= bytes.len() || bytes[self.pos + 1] != b'.' {
898 self.pos += 1; kind = TokenKind::FloatLiteralSimple;
901 }
902 }
903
904 if self.pos < bytes.len() && matches!(bytes[self.pos], b'e' | b'E') {
906 if self.try_scan_exponent() {
907 kind = TokenKind::FloatLiteral;
908 } else if self.pos + 1 < bytes.len() && bytes[self.pos + 1] == b'_' {
909 self.consume_invalid_numeric_rest();
911 return self.invalid_numeric(start);
912 }
913 }
914
915 if self.pos < bytes.len() && bytes[self.pos] == b'_' {
917 self.consume_invalid_numeric_rest();
918 return self.invalid_numeric(start);
919 }
920
921 self.tok(kind, start)
922 }
923
924 fn scan_digits(&mut self, is_valid: fn(&u8) -> bool) -> bool {
927 let bytes = self.source.as_bytes();
928 if self.pos >= bytes.len() || !is_valid(&bytes[self.pos]) {
929 return false;
930 }
931 self.pos += 1;
932 loop {
933 if self.pos >= bytes.len() {
934 break;
935 }
936 if is_valid(&bytes[self.pos]) {
937 self.pos += 1;
938 } else if bytes[self.pos] == b'_'
939 && self.pos + 1 < bytes.len()
940 && is_valid(&bytes[self.pos + 1])
941 {
942 self.pos += 2;
943 } else {
944 break;
945 }
946 }
947 true
948 }
949
950 fn try_scan_exponent(&mut self) -> bool {
953 let bytes = self.source.as_bytes();
954 let saved = self.pos;
955 self.pos += 1; if self.pos < bytes.len() && matches!(bytes[self.pos], b'+' | b'-') {
959 self.pos += 1;
960 }
961
962 if self.scan_digits(u8::is_ascii_digit) {
964 true
965 } else {
966 self.pos = saved;
967 false
968 }
969 }
970
971 fn scan_identifier(&mut self) -> Token {
974 let start = self.pos;
975 let bytes = self.source.as_bytes();
976 self.pos += 1; while self.pos < bytes.len() && is_ident_continue(bytes[self.pos]) {
978 self.pos += 1;
979 }
980 let text = &self.source[start..self.pos];
981 let kind = resolve_keyword(text).unwrap_or(TokenKind::Identifier);
982 self.tok(kind, start)
983 }
984
985 #[inline]
991 fn skip_line_comment_body(bytes: &[u8], pos: &mut usize) {
992 loop {
993 match memchr2(b'\n', b'?', &bytes[*pos..]) {
994 None => {
995 *pos = bytes.len();
996 return;
997 }
998 Some(offset) => {
999 let p = *pos + offset;
1000 if bytes[p] == b'\n' {
1001 *pos = p; return;
1003 }
1004 if p + 1 < bytes.len() && bytes[p + 1] == b'>' {
1006 *pos = p; return;
1008 }
1009 *pos = p + 1;
1011 }
1012 }
1013 }
1014 }
1015
1016 #[inline]
1017 fn check_at(&self, offset: usize, expected: u8) -> bool {
1018 self.source.as_bytes().get(self.pos + offset) == Some(&expected)
1019 }
1020
1021 #[inline]
1022 fn tok(&self, kind: TokenKind, start: usize) -> Token {
1023 Token::new(kind, Span::new(start as u32, self.pos as u32))
1024 }
1025
1026 fn invalid_numeric(&mut self, start: usize) -> Token {
1027 let span = Span::new(start as u32, self.pos as u32);
1028 self.errors.push(LexerError {
1029 kind: LexerErrorKind::Other,
1030 message: "Invalid numeric literal".to_string(),
1031 span,
1032 });
1033 Token::new(TokenKind::InvalidNumericLiteral, span)
1034 }
1035
1036 fn consume_invalid_numeric_rest(&mut self) {
1038 let bytes = self.source.as_bytes();
1039 while self.pos < bytes.len() {
1040 let b = bytes[self.pos];
1041 if b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'+' || b == b'-' {
1042 if (b == b'+' || b == b'-') && self.pos > 0 {
1044 let prev = bytes[self.pos - 1];
1045 if prev != b'e' && prev != b'E' {
1046 break;
1047 }
1048 }
1049 self.pos += 1;
1050 } else {
1051 break;
1052 }
1053 }
1054 }
1055
1056 fn try_lex_heredoc(&mut self, remaining: &str) -> Option<Token> {
1060 let trimmed = remaining.trim_start_matches(|c: char| {
1062 c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\x0C'
1063 });
1064 let ws_len = remaining.len() - trimmed.len();
1065
1066 let (after_prefix, prefix_len) = if (trimmed.starts_with("b<<<")
1068 || trimmed.starts_with("B<<<"))
1069 && !trimmed[1..].starts_with("<<<>")
1070 {
1071 (&trimmed[1..], 1)
1072 } else {
1073 (trimmed, 0)
1074 };
1075
1076 if !after_prefix.starts_with("<<<") {
1077 return None;
1078 }
1079
1080 let base_pos = self.pos; let start = base_pos + ws_len; let after_arrows = &after_prefix[3..];
1083 let after_arrows_trimmed = after_arrows.trim_start_matches([' ', '\t']);
1084 let arrows_offset =
1085 ws_len + prefix_len + 3 + (after_arrows.len() - after_arrows_trimmed.len());
1086
1087 let (label, is_nowdoc, label_line_end);
1089 if let Some(after_quote) = after_arrows_trimmed.strip_prefix('\'') {
1090 let closing = after_quote.find('\'')?;
1092 label = after_quote[..closing].to_string();
1093 is_nowdoc = true;
1094 let after_label = &after_arrows_trimmed[2 + closing..];
1095 let nl = after_label.find('\n').unwrap_or(after_label.len());
1097 label_line_end = arrows_offset + 2 + closing + nl;
1098 if label_line_end < remaining.len() {
1099 }
1101 } else {
1102 let s = if let Some(after_dquote) = after_arrows_trimmed.strip_prefix('"') {
1104 let closing = after_dquote.find('"')?;
1105 label = after_dquote[..closing].to_string();
1106 &after_dquote[1 + closing..]
1107 } else {
1108 let end = after_arrows_trimmed
1110 .find(|c: char| !c.is_ascii_alphanumeric() && c != '_')
1111 .unwrap_or(after_arrows_trimmed.len());
1112 if end == 0 {
1113 return None;
1114 }
1115 label = after_arrows_trimmed[..end].to_string();
1116 &after_arrows_trimmed[end..]
1117 };
1118 is_nowdoc = false;
1119 let nl = s.find('\n').unwrap_or(s.len());
1120 label_line_end = arrows_offset + (after_arrows_trimmed.len() - s.len()) + nl;
1121 };
1122
1123 if label.is_empty() {
1124 return None;
1125 }
1126
1127 let body_start_in_remaining = if label_line_end < remaining.len() {
1129 label_line_end + 1 } else {
1131 return None; };
1133
1134 let body = &remaining[body_start_in_remaining..];
1135
1136 let mut search_pos = 0;
1138 let end_marker_pos;
1139 loop {
1140 if search_pos >= body.len() {
1141 return None; }
1143 let line_start = search_pos;
1144 let line_end = body[line_start..]
1145 .find('\n')
1146 .map(|p| line_start + p)
1147 .unwrap_or(body.len());
1148 let line = &body[line_start..line_end];
1149 let trimmed_line = line.trim_start_matches([' ', '\t']);
1150
1151 if trimmed_line == label
1155 || trimmed_line.starts_with(&label)
1156 && trimmed_line[label.len()..]
1157 .trim_start_matches([';', ',', ')'])
1158 .trim()
1159 .is_empty()
1160 {
1161 end_marker_pos = line_start;
1162 break;
1163 }
1164
1165 search_pos = if line_end < body.len() {
1166 line_end + 1
1167 } else {
1168 body.len()
1169 };
1170 }
1171
1172 let end_marker_line = &body[end_marker_pos..];
1174 let trimmed = end_marker_line.trim_start_matches([' ', '\t']);
1175 let indent_len = end_marker_line.len() - trimmed.len();
1176 let token_end_in_remaining =
1177 body_start_in_remaining + end_marker_pos + indent_len + label.len();
1178 self.pos = base_pos + token_end_in_remaining;
1179
1180 let span = Span::new(start as u32, self.pos as u32);
1181
1182 if is_nowdoc {
1183 Some(Token::new(TokenKind::Nowdoc, span))
1184 } else {
1185 Some(Token::new(TokenKind::Heredoc, span))
1186 }
1187 }
1188}
1189
1190pub fn lex_all(source: &str) -> (Vec<Token>, Vec<LexerError>) {
1198 let mut lexer = Lexer::new(source);
1199 let mut tokens = Vec::new();
1200
1201 loop {
1202 let tok = lexer.next_token();
1203 let is_eof = tok.kind == TokenKind::Eof;
1204 tokens.push(tok);
1205 if is_eof {
1206 break;
1207 }
1208 }
1209
1210 let eof_span = tokens.last().unwrap().span;
1213 tokens.push(Token::new(TokenKind::Eof, eof_span));
1214
1215 let errors = lexer.errors;
1216 (tokens, errors)
1217}
1218
1219#[cfg(test)]
1220mod tests {
1221 use super::*;
1222
1223 fn collect_tokens(source: &str) -> Vec<Token> {
1224 let mut lexer = Lexer::new(source);
1225 let mut tokens = Vec::new();
1226 loop {
1227 let token = lexer.next_token();
1228 if token.kind == TokenKind::Eof {
1229 tokens.push(token);
1230 break;
1231 }
1232 tokens.push(token);
1233 }
1234 tokens
1235 }
1236
1237 fn collect_kinds(source: &str) -> Vec<TokenKind> {
1238 collect_tokens(source).into_iter().map(|t| t.kind).collect()
1239 }
1240
1241 fn php_kinds(code: &str) -> Vec<TokenKind> {
1243 let full = format!("<?php {}", code);
1244 collect_kinds(&full)
1245 .into_iter()
1246 .filter(|k| *k != TokenKind::OpenTag && *k != TokenKind::Eof)
1247 .collect()
1248 }
1249
1250 fn php_tokens(code: &str) -> Vec<(TokenKind, String)> {
1252 let full = format!("<?php {}", code);
1253 let mut lexer = Lexer::new(&full);
1254 let mut result = Vec::new();
1255 loop {
1256 let token = lexer.next_token();
1257 if token.kind == TokenKind::Eof {
1258 break;
1259 }
1260 if token.kind == TokenKind::OpenTag {
1261 continue;
1262 }
1263 let text = lexer.token_text(&token).to_string();
1264 result.push((token.kind, text));
1265 }
1266 result
1267 }
1268
1269 mod open_tag_and_html {
1270 use super::*;
1271
1272 #[test]
1273 fn test_php_only() {
1274 let tokens = collect_kinds("<?php $x = 42;");
1275 assert_eq!(
1276 tokens,
1277 vec![
1278 TokenKind::OpenTag,
1279 TokenKind::Variable,
1280 TokenKind::Equals,
1281 TokenKind::IntLiteral,
1282 TokenKind::Semicolon,
1283 TokenKind::Eof,
1284 ]
1285 );
1286 }
1287
1288 #[test]
1289 fn test_inline_html_before_php() {
1290 let tokens = collect_kinds("<html><?php echo 1;");
1291 assert_eq!(
1292 tokens,
1293 vec![
1294 TokenKind::InlineHtml,
1295 TokenKind::OpenTag,
1296 TokenKind::Echo,
1297 TokenKind::IntLiteral,
1298 TokenKind::Semicolon,
1299 TokenKind::Eof,
1300 ]
1301 );
1302 }
1303
1304 #[test]
1305 fn test_inline_html_after_close_tag() {
1306 let tokens = collect_kinds("<?php echo 1; ?><html>");
1307 assert_eq!(
1308 tokens,
1309 vec![
1310 TokenKind::OpenTag,
1311 TokenKind::Echo,
1312 TokenKind::IntLiteral,
1313 TokenKind::Semicolon,
1314 TokenKind::CloseTag,
1315 TokenKind::InlineHtml,
1316 TokenKind::Eof,
1317 ]
1318 );
1319 }
1320
1321 #[test]
1322 fn test_empty_source() {
1323 let tokens = collect_kinds("");
1324 assert_eq!(tokens, vec![TokenKind::Eof]);
1325 }
1326
1327 #[test]
1328 fn test_only_inline_html() {
1329 let tokens = collect_kinds("<html><body>Hello</body></html>");
1330 assert_eq!(tokens, vec![TokenKind::InlineHtml, TokenKind::Eof]);
1331 }
1332
1333 #[test]
1334 fn test_open_tag_uppercase() {
1335 for tag in &["<?PHP", "<?Php", "<?PhP", "<?pHP", "<?phP"] {
1337 let src = format!("{} $x = 1;", tag);
1338 let tokens = collect_kinds(&src);
1339 assert_eq!(
1340 tokens[0],
1341 TokenKind::OpenTag,
1342 "expected OpenTag for opening tag '{tag}'"
1343 );
1344 }
1345 }
1346
1347 #[test]
1348 fn test_open_tag_uppercase_mid_file() {
1349 let tokens = collect_kinds("<html><?PHP echo 1;");
1351 assert_eq!(
1352 tokens,
1353 vec![
1354 TokenKind::InlineHtml,
1355 TokenKind::OpenTag,
1356 TokenKind::Echo,
1357 TokenKind::IntLiteral,
1358 TokenKind::Semicolon,
1359 TokenKind::Eof,
1360 ]
1361 );
1362 }
1363 }
1364
1365 mod keywords {
1366 use super::*;
1367
1368 #[test]
1369 fn test_keyword_resolution() {
1370 let tokens = collect_kinds("<?php if else while for foreach function return");
1371 assert_eq!(
1372 tokens,
1373 vec![
1374 TokenKind::OpenTag,
1375 TokenKind::If,
1376 TokenKind::Else,
1377 TokenKind::While,
1378 TokenKind::For,
1379 TokenKind::Foreach,
1380 TokenKind::Function,
1381 TokenKind::Return,
1382 TokenKind::Eof,
1383 ]
1384 );
1385 }
1386
1387 #[test]
1388 fn test_keyword_case_insensitive() {
1389 let tokens = collect_kinds("<?php IF ELSE TRUE FALSE NULL");
1390 assert_eq!(
1391 tokens,
1392 vec![
1393 TokenKind::OpenTag,
1394 TokenKind::If,
1395 TokenKind::Else,
1396 TokenKind::True,
1397 TokenKind::False,
1398 TokenKind::Null,
1399 TokenKind::Eof,
1400 ]
1401 );
1402 }
1403
1404 #[test]
1405 fn test_logical_keywords() {
1406 let tokens = collect_kinds("<?php and or xor");
1407 assert_eq!(
1408 tokens,
1409 vec![
1410 TokenKind::OpenTag,
1411 TokenKind::And,
1412 TokenKind::Or,
1413 TokenKind::Xor,
1414 TokenKind::Eof,
1415 ]
1416 );
1417 }
1418 }
1419
1420 mod lexer_api {
1421 use super::*;
1422
1423 #[test]
1424 fn test_peek_doesnt_consume() {
1425 let mut lexer = Lexer::new("<?php 42");
1426 let peeked = *lexer.peek();
1427 assert_eq!(peeked.kind, TokenKind::OpenTag);
1428 let next = lexer.next_token();
1429 assert_eq!(next.kind, TokenKind::OpenTag);
1430 let next = lexer.next_token();
1431 assert_eq!(next.kind, TokenKind::IntLiteral);
1432 }
1433
1434 #[test]
1435 fn test_token_text() {
1436 let source = "<?php $myVar = 'hello';";
1437 let mut lexer = Lexer::new(source);
1438 lexer.next_token(); let var_tok = lexer.next_token();
1440 assert_eq!(lexer.token_text(&var_tok), "$myVar");
1441 lexer.next_token(); let str_tok = lexer.next_token();
1443 assert_eq!(lexer.token_text(&str_tok), "'hello'");
1444 }
1445
1446 #[test]
1447 fn test_spans_are_correct() {
1448 let source = "<?php $x";
1449 let tokens = collect_tokens(source);
1450 assert_eq!(tokens[0].span, Span::new(0, 5)); assert_eq!(tokens[1].span, Span::new(6, 8)); }
1453 }
1454
1455 mod operators {
1456 use super::*;
1457
1458 #[test]
1459 fn test_basic_operators() {
1460 assert_eq!(
1461 php_kinds("+ - * / % ** ."),
1462 vec![
1463 TokenKind::Plus,
1464 TokenKind::Minus,
1465 TokenKind::Star,
1466 TokenKind::Slash,
1467 TokenKind::Percent,
1468 TokenKind::StarStar,
1469 TokenKind::Dot,
1470 ]
1471 );
1472 }
1473
1474 #[test]
1475 fn test_operators() {
1476 let tokens = collect_kinds("<?php === !== <=> ?? ++ -- **");
1477 assert_eq!(
1478 tokens,
1479 vec![
1480 TokenKind::OpenTag,
1481 TokenKind::EqualsEqualsEquals,
1482 TokenKind::BangEqualsEquals,
1483 TokenKind::Spaceship,
1484 TokenKind::QuestionQuestion,
1485 TokenKind::PlusPlus,
1486 TokenKind::MinusMinus,
1487 TokenKind::StarStar,
1488 TokenKind::Eof,
1489 ]
1490 );
1491 }
1492
1493 #[test]
1494 fn test_assignment_operators() {
1495 let tokens = collect_kinds("<?php += -= *= /= %= **= .= ??=");
1496 assert_eq!(
1497 tokens,
1498 vec![
1499 TokenKind::OpenTag,
1500 TokenKind::PlusEquals,
1501 TokenKind::MinusEquals,
1502 TokenKind::StarEquals,
1503 TokenKind::SlashEquals,
1504 TokenKind::PercentEquals,
1505 TokenKind::StarStarEquals,
1506 TokenKind::DotEquals,
1507 TokenKind::CoalesceEquals,
1508 TokenKind::Eof,
1509 ]
1510 );
1511 }
1512
1513 #[test]
1514 fn test_hash_bracket_not_comment() {
1515 let kinds = php_kinds("#[Attribute]");
1516 assert_eq!(
1517 kinds,
1518 vec![
1519 TokenKind::HashBracket,
1520 TokenKind::Identifier,
1521 TokenKind::RightBracket,
1522 ]
1523 );
1524 }
1525
1526 #[test]
1527 fn test_nullsafe_arrow() {
1528 let kinds = php_kinds("$x?->y");
1529 assert_eq!(
1530 kinds,
1531 vec![
1532 TokenKind::Variable,
1533 TokenKind::NullsafeArrow,
1534 TokenKind::Identifier,
1535 ]
1536 );
1537 }
1538
1539 #[test]
1540 fn test_pipe_arrow() {
1541 let kinds = php_kinds("$x |> foo(...)");
1542 assert_eq!(
1543 kinds,
1544 vec![
1545 TokenKind::Variable,
1546 TokenKind::PipeArrow,
1547 TokenKind::Identifier,
1548 TokenKind::LeftParen,
1549 TokenKind::Ellipsis,
1550 TokenKind::RightParen,
1551 ]
1552 );
1553 }
1554 }
1555
1556 mod numeric_literals {
1557 use super::*;
1558
1559 #[test]
1560 fn test_integers() {
1561 let toks = php_tokens("42 0xFF 0b1010 077");
1562 assert_eq!(toks[0], (TokenKind::IntLiteral, "42".to_string()));
1563 assert_eq!(toks[1], (TokenKind::HexIntLiteral, "0xFF".to_string()));
1564 assert_eq!(toks[2], (TokenKind::BinIntLiteral, "0b1010".to_string()));
1565 assert_eq!(toks[3], (TokenKind::OctIntLiteral, "077".to_string()));
1566 }
1567
1568 #[test]
1569 fn test_floats() {
1570 let toks = php_tokens("3.14 1e10 2.5e-3");
1571 assert_eq!(toks[0], (TokenKind::FloatLiteralSimple, "3.14".to_string()));
1572 assert_eq!(toks[1], (TokenKind::FloatLiteral, "1e10".to_string()));
1573 assert_eq!(toks[2], (TokenKind::FloatLiteral, "2.5e-3".to_string()));
1574 }
1575
1576 #[test]
1577 fn test_float_leading_dot() {
1578 let toks = php_tokens(".5 .123e4");
1579 assert_eq!(
1580 toks[0],
1581 (TokenKind::FloatLiteralLeadingDot, ".5".to_string())
1582 );
1583 assert_eq!(
1584 toks[1],
1585 (TokenKind::FloatLiteralLeadingDot, ".123e4".to_string())
1586 );
1587 }
1588
1589 #[test]
1590 fn test_trailing_dot_float() {
1591 let toks = php_tokens("0. 1. 42.");
1593 assert_eq!(toks[0], (TokenKind::FloatLiteralSimple, "0.".to_string()));
1594 assert_eq!(toks[1], (TokenKind::FloatLiteralSimple, "1.".to_string()));
1595 assert_eq!(toks[2], (TokenKind::FloatLiteralSimple, "42.".to_string()));
1596 }
1597
1598 #[test]
1599 fn test_trailing_dot_not_confused_with_dotdot() {
1600 let toks = php_tokens("1..");
1603 assert_eq!(toks[0], (TokenKind::IntLiteral, "1".to_string()));
1604 assert_eq!(toks[1], (TokenKind::Dot, ".".to_string()));
1605 assert_eq!(toks[2], (TokenKind::Dot, ".".to_string()));
1606 }
1607
1608 #[test]
1609 fn test_new_octal_syntax() {
1610 let toks = php_tokens("0o77 0O755");
1611 assert_eq!(toks[0], (TokenKind::OctIntLiteralNew, "0o77".to_string()));
1612 assert_eq!(toks[1], (TokenKind::OctIntLiteralNew, "0O755".to_string()));
1613 }
1614
1615 #[test]
1616 fn test_legacy_octal_with_invalid_digits() {
1617 let toks = php_tokens("0778 019 09");
1620 assert_eq!(toks[0], (TokenKind::OctIntLiteral, "0778".to_string()));
1621 assert_eq!(toks[1], (TokenKind::OctIntLiteral, "019".to_string()));
1622 assert_eq!(toks[2], (TokenKind::OctIntLiteral, "09".to_string()));
1623 }
1624
1625 #[test]
1626 fn test_numeric_underscores() {
1627 let toks = php_tokens("1_000 0xFF_FF 0b1010_0101");
1628 assert_eq!(toks[0], (TokenKind::IntLiteral, "1_000".to_string()));
1629 assert_eq!(toks[1], (TokenKind::HexIntLiteral, "0xFF_FF".to_string()));
1630 assert_eq!(
1631 toks[2],
1632 (TokenKind::BinIntLiteral, "0b1010_0101".to_string())
1633 );
1634 }
1635 }
1636
1637 mod strings_and_variables {
1638 use super::*;
1639
1640 #[test]
1641 fn test_string_literals() {
1642 let tokens = collect_kinds(r#"<?php 'single' "double""#);
1643 assert_eq!(
1644 tokens,
1645 vec![
1646 TokenKind::OpenTag,
1647 TokenKind::SingleQuotedString,
1648 TokenKind::DoubleQuotedString,
1649 TokenKind::Eof,
1650 ]
1651 );
1652 }
1653
1654 #[test]
1655 fn test_strings() {
1656 let kinds = php_kinds(r#"'hello' "world" 'it\'s' "say \"hi\"""#);
1657 assert_eq!(
1658 kinds,
1659 vec![
1660 TokenKind::SingleQuotedString,
1661 TokenKind::DoubleQuotedString,
1662 TokenKind::SingleQuotedString,
1663 TokenKind::DoubleQuotedString,
1664 ]
1665 );
1666 }
1667
1668 #[test]
1669 fn test_binary_prefix_strings() {
1670 let kinds = php_kinds(r#"b'hello' B"world""#);
1671 assert_eq!(
1672 kinds,
1673 vec![TokenKind::SingleQuotedString, TokenKind::DoubleQuotedString,]
1674 );
1675 }
1676
1677 #[test]
1678 fn test_variables() {
1679 let toks = php_tokens("$x $myVar $_foo");
1680 assert_eq!(toks[0], (TokenKind::Variable, "$x".to_string()));
1681 assert_eq!(toks[1], (TokenKind::Variable, "$myVar".to_string()));
1682 assert_eq!(toks[2], (TokenKind::Variable, "$_foo".to_string()));
1683 }
1684
1685 #[test]
1686 fn test_comments_yielded() {
1687 let toks = php_tokens("42 // line comment\n43 /* block */ 44 # hash comment\n45");
1689 assert_eq!(toks[0], (TokenKind::IntLiteral, "42".to_string()));
1690 assert_eq!(
1691 toks[1],
1692 (TokenKind::LineComment, "// line comment".to_string())
1693 );
1694 assert_eq!(toks[2], (TokenKind::IntLiteral, "43".to_string()));
1695 assert_eq!(
1696 toks[3],
1697 (TokenKind::BlockComment, "/* block */".to_string())
1698 );
1699 assert_eq!(toks[4], (TokenKind::IntLiteral, "44".to_string()));
1700 assert_eq!(
1701 toks[5],
1702 (TokenKind::HashComment, "# hash comment".to_string())
1703 );
1704 assert_eq!(toks[6], (TokenKind::IntLiteral, "45".to_string()));
1705 }
1706 }
1707}