php_parser/lexer/
mod.rs

1pub mod token;
2
3use crate::span::Span;
4use memchr::{memchr, memchr2, memchr3};
5use token::{Token, TokenKind};
6
7#[derive(Debug, Clone, Copy, PartialEq)]
8pub enum LexerMode {
9    Standard,
10    LookingForProperty,
11    LookingForVarName,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15enum LexerState {
16    Initial,
17    Scripting,
18    DoubleQuotes,
19    Backquote,
20    Heredoc(Vec<u8>),
21    Nowdoc(Vec<u8>),
22    HaltCompiler,
23    RawData,
24    VarOffset,
25    VarOffsetDollarCurly,
26    LookingForProperty,
27    LookingForVarName,
28}
29
30fn keyword_lookup(text: &[u8]) -> TokenKind {
31    match text {
32        b"or" => TokenKind::LogicalOr,
33        b"and" => TokenKind::LogicalAnd,
34        b"xor" => TokenKind::LogicalXor,
35        b"bool" => TokenKind::TypeBool,
36        b"int" => TokenKind::TypeInt,
37        b"float" => TokenKind::TypeFloat,
38        b"string" => TokenKind::TypeString,
39        b"mixed" => TokenKind::TypeMixed,
40        b"never" => TokenKind::TypeNever,
41        b"null" => TokenKind::TypeNull,
42        b"false" => TokenKind::TypeFalse,
43        b"true" => TokenKind::TypeTrue,
44        b"exit" => TokenKind::Exit,
45        b"die" => TokenKind::Die,
46        b"function" => TokenKind::Function,
47        b"fn" => TokenKind::Fn,
48        b"const" => TokenKind::Const,
49        b"return" => TokenKind::Return,
50        b"yield" => TokenKind::Yield,
51        b"try" => TokenKind::Try,
52        b"catch" => TokenKind::Catch,
53        b"finally" => TokenKind::Finally,
54        b"throw" => TokenKind::Throw,
55        b"if" => TokenKind::If,
56        b"elseif" => TokenKind::ElseIf,
57        b"endif" => TokenKind::EndIf,
58        b"else" => TokenKind::Else,
59        b"insteadof" => TokenKind::Insteadof,
60        b"while" => TokenKind::While,
61        b"endwhile" => TokenKind::EndWhile,
62        b"do" => TokenKind::Do,
63        b"for" => TokenKind::For,
64        b"endfor" => TokenKind::EndFor,
65        b"foreach" => TokenKind::Foreach,
66        b"endforeach" => TokenKind::EndForeach,
67        b"declare" => TokenKind::Declare,
68        b"enddeclare" => TokenKind::EndDeclare,
69        b"instanceof" => TokenKind::InstanceOf,
70        b"as" => TokenKind::As,
71        b"switch" => TokenKind::Switch,
72        b"endswitch" => TokenKind::EndSwitch,
73        b"case" => TokenKind::Case,
74        b"default" => TokenKind::Default,
75        b"break" => TokenKind::Break,
76        b"continue" => TokenKind::Continue,
77        b"goto" => TokenKind::Goto,
78        b"echo" => TokenKind::Echo,
79        b"print" => TokenKind::Print,
80        b"enum" => TokenKind::Enum,
81        b"class" => TokenKind::Class,
82        b"interface" => TokenKind::Interface,
83        b"trait" => TokenKind::Trait,
84        b"extends" => TokenKind::Extends,
85        b"implements" => TokenKind::Implements,
86        b"new" => TokenKind::New,
87        b"clone" => TokenKind::Clone,
88        b"var" => TokenKind::Public,
89        b"public" => TokenKind::Public,
90        b"protected" => TokenKind::Protected,
91        b"private" => TokenKind::Private,
92        b"final" => TokenKind::Final,
93        b"abstract" => TokenKind::Abstract,
94        b"static" => TokenKind::Static,
95        b"readonly" => TokenKind::Readonly,
96        b"namespace" => TokenKind::Namespace,
97        b"use" => TokenKind::Use,
98        b"global" => TokenKind::Global,
99        b"isset" => TokenKind::Isset,
100        b"empty" => TokenKind::Empty,
101        b"__halt_compiler" => TokenKind::HaltCompiler,
102        b"__class__" => TokenKind::ClassC,
103        b"__trait__" => TokenKind::TraitC,
104        b"__function__" => TokenKind::FuncC,
105        b"__method__" => TokenKind::MethodC,
106        b"__line__" => TokenKind::Line,
107        b"__file__" => TokenKind::File,
108        b"__dir__" => TokenKind::Dir,
109        b"__namespace__" => TokenKind::NsC,
110        b"__property__" => TokenKind::PropertyC,
111        b"array" => TokenKind::Array,
112        b"callable" => TokenKind::TypeCallable,
113        b"iterable" => TokenKind::TypeIterable,
114        b"void" => TokenKind::TypeVoid,
115        b"object" => TokenKind::TypeObject,
116        b"match" => TokenKind::Match,
117        b"list" => TokenKind::List,
118        b"include" => TokenKind::Include,
119        b"include_once" => TokenKind::IncludeOnce,
120        b"require" => TokenKind::Require,
121        b"require_once" => TokenKind::RequireOnce,
122        b"eval" => TokenKind::Eval,
123        b"unset" => TokenKind::Unset,
124        _ => TokenKind::Identifier,
125    }
126}
127
128#[derive(Debug, Clone)]
129pub struct Lexer<'src> {
130    input: &'src [u8],
131    cursor: usize,
132    state_stack: Vec<LexerState>,
133    mode: LexerMode,
134}
135
136impl<'src> Lexer<'src> {
137    pub fn new(input: &'src [u8]) -> Self {
138        let mut cursor = 0;
139        if input.starts_with(b"#!") {
140            if let Some(pos) = memchr(b'\n', input) {
141                cursor = pos + 1;
142            } else {
143                cursor = input.len();
144            }
145        }
146
147        Self {
148            input,
149            cursor,
150            state_stack: vec![LexerState::Initial],
151            mode: LexerMode::Standard,
152        }
153    }
154
155    pub fn set_mode(&mut self, mode: LexerMode) {
156        self.mode = mode;
157    }
158
159    pub fn slice(&self, span: Span) -> &'src [u8] {
160        &self.input[span.start..span.end]
161    }
162
163    fn peek(&self) -> Option<u8> {
164        if self.cursor < self.input.len() {
165            Some(self.input[self.cursor])
166        } else {
167            None
168        }
169    }
170
171    fn advance(&mut self) {
172        self.cursor += 1;
173    }
174
175    fn advance_n(&mut self, n: usize) {
176        self.cursor += n;
177    }
178
179    fn skip_whitespace(&mut self) {
180        while self.cursor < self.input.len() {
181            if self.input[self.cursor].is_ascii_whitespace() {
182                self.cursor += 1;
183            } else {
184                break;
185            }
186        }
187    }
188
189    fn read_identifier(&mut self) {
190        while self.cursor < self.input.len() {
191            let c = self.input[self.cursor];
192            if c.is_ascii_alphanumeric() || c == b'_' || c >= 0x80 {
193                self.cursor += 1;
194            } else {
195                break;
196            }
197        }
198    }
199
200    fn read_number(&mut self) -> TokenKind {
201        let mut is_float = false;
202
203        // Check for hex/binary/octal
204        if self.peek() == Some(b'0') {
205            self.advance();
206            if let Some(c) = self.peek() {
207                if c == b'x' || c == b'X' {
208                    self.advance();
209                    while let Some(c) = self.peek() {
210                        if c.is_ascii_hexdigit() || c == b'_' {
211                            self.advance();
212                        } else {
213                            break;
214                        }
215                    }
216                    return TokenKind::LNumber;
217                } else if c == b'b' || c == b'B' {
218                    self.advance();
219                    while let Some(c) = self.peek() {
220                        if c == b'0' || c == b'1' || c == b'_' {
221                            self.advance();
222                        } else {
223                            break;
224                        }
225                    }
226                    return TokenKind::LNumber;
227                } else if c == b'o' || c == b'O' {
228                    self.advance();
229                    while let Some(c) = self.peek() {
230                        if (b'0'..=b'7').contains(&c) || c == b'_' {
231                            self.advance();
232                        } else {
233                            break;
234                        }
235                    }
236                    return TokenKind::LNumber;
237                }
238            }
239        }
240
241        while let Some(c) = self.peek() {
242            if c.is_ascii_digit() || c == b'_' {
243                self.advance();
244            } else if c == b'.' {
245                if is_float {
246                    break; // Already found a dot
247                }
248                is_float = true;
249                self.advance();
250            } else if c == b'e' || c == b'E' {
251                is_float = true;
252                self.advance();
253                if let Some(next) = self.peek()
254                    && (next == b'+' || next == b'-')
255                {
256                    self.advance();
257                }
258            } else {
259                break;
260            }
261        }
262
263        if is_float {
264            TokenKind::DNumber
265        } else {
266            TokenKind::LNumber
267        }
268    }
269
270    fn consume_single_line_comment(&mut self) -> TokenKind {
271        while self.cursor < self.input.len() {
272            let remaining = &self.input[self.cursor..];
273            match memchr3(b'\n', b'\r', b'?', remaining) {
274                Some(pos) => {
275                    self.cursor += pos;
276                    let c = self.input[self.cursor];
277                    if c == b'?' {
278                        if self.input.get(self.cursor + 1) == Some(&b'>') {
279                            break;
280                        } else {
281                            self.cursor += 1;
282                        }
283                    } else {
284                        break;
285                    }
286                }
287                None => {
288                    self.cursor = self.input.len();
289                    break;
290                }
291            }
292        }
293        TokenKind::Comment
294    }
295
296    fn consume_multi_line_comment(&mut self) -> TokenKind {
297        let is_doc = if self.peek() == Some(b'*') && self.input.get(self.cursor + 1) != Some(&b'/')
298        {
299            self.advance();
300            true
301        } else {
302            false
303        };
304
305        while self.cursor < self.input.len() {
306            let remaining = &self.input[self.cursor..];
307            match memchr(b'*', remaining) {
308                Some(pos) => {
309                    self.cursor += pos;
310                    self.advance(); // Consume *
311                    if self.peek() == Some(b'/') {
312                        self.advance();
313                        return if is_doc {
314                            TokenKind::DocComment
315                        } else {
316                            TokenKind::Comment
317                        };
318                    }
319                }
320                None => {
321                    self.cursor = self.input.len();
322                    break;
323                }
324            }
325        }
326
327        TokenKind::Error // Unterminated comment
328    }
329
330    fn next_in_looking_for_property(&mut self) -> Option<Token> {
331        let start = self.cursor;
332        if self.cursor >= self.input.len() {
333            return Some(Token {
334                kind: TokenKind::Error,
335                span: Span::new(start, start),
336            });
337        }
338
339        let c = self.input[self.cursor];
340
341        if c == b'-' && self.input.get(self.cursor + 1) == Some(&b'>') {
342            self.advance_n(2);
343            return Some(Token {
344                kind: TokenKind::Arrow,
345                span: Span::new(start, self.cursor),
346            });
347        }
348
349        if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
350            self.read_identifier();
351            self.state_stack.pop(); // Done with property
352            return Some(Token {
353                kind: TokenKind::Identifier,
354                span: Span::new(start, self.cursor),
355            });
356        }
357
358        // Fallback: if we are here, it means we expected -> or identifier but got something else.
359        // This shouldn't happen if we only push state when we see ->.
360        // But if we just returned Arrow, next call expects Identifier.
361        // If we don't see identifier, we should probably pop state and let double quotes handle it?
362        // But double quotes expects string content.
363
364        self.state_stack.pop();
365        // Return empty token? No.
366        // Let's return Error for now if it's unexpected.
367        Some(Token {
368            kind: TokenKind::Error,
369            span: Span::new(start, self.cursor),
370        })
371    }
372
373    fn next_in_looking_for_var_name(&mut self) -> Option<Token> {
374        let start = self.cursor;
375        if self.cursor >= self.input.len() {
376            return Some(Token {
377                kind: TokenKind::Error,
378                span: Span::new(start, start),
379            });
380        }
381
382        let c = self.input[self.cursor];
383
384        if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
385            self.read_identifier();
386            return Some(Token {
387                kind: TokenKind::StringVarname,
388                span: Span::new(start, self.cursor),
389            });
390        }
391
392        if c == b'[' {
393            self.advance();
394            self.state_stack.push(LexerState::VarOffsetDollarCurly);
395            return Some(Token {
396                kind: TokenKind::OpenBracket,
397                span: Span::new(start, self.cursor),
398            });
399        }
400
401        if c == b'}' {
402            self.advance();
403            self.state_stack.pop();
404            return Some(Token {
405                kind: TokenKind::CloseBrace,
406                span: Span::new(start, self.cursor),
407            });
408        }
409
410        self.advance();
411        Some(Token {
412            kind: TokenKind::Error,
413            span: Span::new(start, self.cursor),
414        })
415    }
416
417    fn next_in_var_offset(&mut self, is_num_string: bool) -> Option<Token> {
418        let start = self.cursor;
419        if self.cursor >= self.input.len() {
420            return Some(Token {
421                kind: TokenKind::Error,
422                span: Span::new(start, start),
423            });
424        }
425
426        let c = self.input[self.cursor];
427
428        if c == b']' {
429            self.advance();
430            self.state_stack.pop();
431            return Some(Token {
432                kind: TokenKind::CloseBracket,
433                span: Span::new(start, self.cursor),
434            });
435        }
436
437        if c == b'$' {
438            self.advance();
439            if let Some(next) = self.peek()
440                && (next.is_ascii_alphabetic() || next == b'_')
441            {
442                let var_start = self.cursor - 1;
443                self.read_identifier();
444                return Some(Token {
445                    kind: TokenKind::Variable,
446                    span: Span::new(var_start, self.cursor),
447                });
448            }
449            // Fallback to identifier/etc if not variable?
450            // PHP scanner: if $foo[bar], bar is T_STRING. if $foo[$bar], $bar is T_VARIABLE.
451            // if $foo[1], 1 is T_NUM_STRING.
452        }
453
454        if c.is_ascii_digit() {
455            if is_num_string {
456                // Only consume digits
457                while let Some(c) = self.peek() {
458                    if c.is_ascii_digit() {
459                        self.advance();
460                    } else {
461                        break;
462                    }
463                }
464                return Some(Token {
465                    kind: TokenKind::NumString,
466                    span: Span::new(start, self.cursor),
467                });
468            } else {
469                let kind = self.read_number();
470                return Some(Token {
471                    kind,
472                    span: Span::new(start, self.cursor),
473                });
474            }
475        }
476
477        if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
478            self.read_identifier();
479            return Some(Token {
480                kind: TokenKind::Identifier,
481                span: Span::new(start, self.cursor),
482            });
483        }
484
485        if c == b'-' {
486            self.advance();
487            return Some(Token {
488                kind: TokenKind::Minus,
489                span: Span::new(start, self.cursor),
490            });
491        }
492
493        // Any other char is just returned as is (e.g. - . etc)
494        self.advance();
495
496        // Map specific chars to tokens if needed, or just return Error/Char?
497        // In this context, [ is not possible (nested?), ] is handled.
498        // - is possible.
499        // Let's return a generic token or map it.
500        // But wait, if I return Error, my test maps it to UNKNOWN.
501        // PHP returns CHAR for [ if it's not a variable offset start?
502        // But we are IN variable offset state.
503        // Wait, $foo[1]. [ is consumed before entering state?
504        // No, I pushed state when I saw [.
505        // But I did NOT consume [.
506        // Ah!
507
508        /*
509                        // Check for array offset [
510                        if self.peek() == Some(b'[') {
511                            self.state_stack.push(LexerState::VarOffset);
512                        }
513        */
514
515        // So the next char IS [.
516        // So I need to handle [ in next_in_var_offset.
517
518        if c == b'[' {
519            return Some(Token {
520                kind: TokenKind::OpenBracket,
521                span: Span::new(start, self.cursor),
522            });
523        }
524
525        Some(Token {
526            kind: TokenKind::Error,
527            span: Span::new(start, self.cursor),
528        })
529    }
530
531    fn next_in_double_quotes(&mut self) -> Option<Token> {
532        let start = self.cursor;
533        if self.cursor >= self.input.len() {
534            return Some(Token {
535                kind: TokenKind::Error,
536                span: Span::new(start, start),
537            });
538        }
539
540        let char = self.input[self.cursor];
541
542        match char {
543            b'"' => {
544                if let Some(LexerState::DoubleQuotes) = self.state_stack.last() {
545                    self.advance();
546                    self.state_stack.pop();
547                    return Some(Token {
548                        kind: TokenKind::DoubleQuote,
549                        span: Span::new(start, self.cursor),
550                    });
551                }
552            }
553            b'`' => {
554                if let Some(LexerState::Backquote) = self.state_stack.last() {
555                    self.advance();
556                    self.state_stack.pop();
557                    return Some(Token {
558                        kind: TokenKind::Backtick,
559                        span: Span::new(start, self.cursor),
560                    });
561                }
562            }
563            b'$' => {
564                self.advance();
565                if let Some(c) = self.peek() {
566                    if c.is_ascii_alphabetic() || c == b'_' {
567                        // Backtrack to $? No, we consumed it.
568                        // But read_identifier expects to read identifier chars.
569                        // It does not read $.
570                        // So we are at the start of identifier.
571                        let var_start = self.cursor - 1;
572                        self.read_identifier();
573
574                        // Check for array offset [
575                        if self.peek() == Some(b'[') {
576                            self.state_stack.push(LexerState::VarOffset);
577                        } else if self.peek() == Some(b'-')
578                            && self.input.get(self.cursor + 1) == Some(&b'>')
579                            && let Some(next_next) = self.input.get(self.cursor + 2)
580                            && (next_next.is_ascii_alphabetic() || *next_next == b'_')
581                        {
582                            self.state_stack.push(LexerState::LookingForProperty);
583                        }
584
585                        return Some(Token {
586                            kind: TokenKind::Variable,
587                            span: Span::new(var_start, self.cursor),
588                        });
589                    } else if c == b'{' {
590                        self.advance(); // Eat {
591                        self.state_stack.push(LexerState::LookingForVarName);
592                        return Some(Token {
593                            kind: TokenKind::DollarOpenCurlyBraces,
594                            span: Span::new(start, self.cursor),
595                        });
596                    }
597                }
598                // Just a $ literal, continue as Encapsed
599            }
600            b'{' => {
601                if self.input.get(self.cursor + 1) == Some(&b'$') {
602                    self.advance();
603                    // Do NOT consume $
604                    self.state_stack.push(LexerState::Scripting);
605                    return Some(Token {
606                        kind: TokenKind::CurlyOpen,
607                        span: Span::new(start, self.cursor),
608                    });
609                }
610            }
611            _ => {}
612        }
613
614        // EncapsedAndWhitespace
615        while let Some(c) = self.peek() {
616            if c == b'"' && matches!(self.state_stack.last(), Some(LexerState::DoubleQuotes)) {
617                break;
618            }
619            if c == b'`' && matches!(self.state_stack.last(), Some(LexerState::Backquote)) {
620                break;
621            }
622            if c == b'$'
623                && let Some(next) = self.input.get(self.cursor + 1)
624                && (next.is_ascii_alphabetic() || *next == b'_' || *next == b'{')
625            {
626                break;
627            }
628            if c == b'{' && self.input.get(self.cursor + 1) == Some(&b'$') {
629                break;
630            }
631
632            if c == b'\\' {
633                self.advance();
634                if self.peek().is_some() {
635                    self.advance();
636                }
637            } else {
638                self.advance();
639            }
640        }
641
642        if self.cursor > start {
643            Some(Token {
644                kind: TokenKind::EncapsedAndWhitespace,
645                span: Span::new(start, self.cursor),
646            })
647        } else {
648            // Should have matched something above or broke immediately
649            // If we broke immediately (e.g. at "), we should have handled it in match char
650            // But if we are at $ or { that is NOT a variable start, we should consume it.
651            // Wait, if we are at $ and it fell through match char, it means it's NOT a variable.
652            // So we should consume it.
653
654            // My loop logic:
655            // `while let Some(c) = self.peek()`
656            // If `c` is `$`, check if variable. If NOT variable, consume.
657            // But my loop breaks if `c == b'$'` and it IS a variable.
658            // If it is NOT a variable, it continues?
659
660            // Let's re-check loop:
661            /*
662            if c == b'$' {
663                if let Some(next) ... {
664                    if next.is_ascii... {
665                        break;
666                    }
667                }
668            }
669            */
670            // It doesn't advance if it's NOT a variable. It just falls through to `if c == b'\\' ... else self.advance()`.
671            // So it advances. Correct.
672
673            Some(Token {
674                kind: TokenKind::EncapsedAndWhitespace,
675                span: Span::new(start, self.cursor),
676            })
677        }
678    }
679
680    fn read_single_quoted(&mut self) -> TokenKind {
681        while self.cursor < self.input.len() {
682            let remaining = &self.input[self.cursor..];
683            match memchr2(b'\'', b'\\', remaining) {
684                Some(pos) => {
685                    self.cursor += pos;
686                    let c = self.input[self.cursor];
687                    self.advance(); // Consume ' or \
688                    if c == b'\'' {
689                        return TokenKind::StringLiteral;
690                    } else {
691                        // Backslash
692                        if self.cursor < self.input.len() {
693                            self.advance(); // Skip escaped char
694                        }
695                    }
696                }
697                None => {
698                    self.cursor = self.input.len();
699                    break;
700                }
701            }
702        }
703        TokenKind::Error
704    }
705
706    fn read_double_quoted(&mut self, quote: u8, start_pos: usize) -> TokenKind {
707        while let Some(c) = self.peek() {
708            if c == quote {
709                self.advance();
710                return TokenKind::StringLiteral;
711            } else if c == b'\\' {
712                self.advance();
713                if self.peek().is_some() {
714                    self.advance();
715                }
716            } else if c == b'$' {
717                if let Some(next) = self.input.get(self.cursor + 1)
718                    && (next.is_ascii_alphabetic() || *next == b'_' || *next == b'{')
719                {
720                    self.cursor = start_pos + 1;
721                    self.state_stack.push(if quote == b'"' {
722                        LexerState::DoubleQuotes
723                    } else {
724                        LexerState::Backquote
725                    });
726                    return if quote == b'"' {
727                        TokenKind::DoubleQuote
728                    } else {
729                        TokenKind::Backtick
730                    };
731                }
732                self.advance();
733            } else if c == b'{' {
734                if self.input.get(self.cursor + 1) == Some(&b'$') {
735                    self.cursor = start_pos + 1;
736                    self.state_stack.push(if quote == b'"' {
737                        LexerState::DoubleQuotes
738                    } else {
739                        LexerState::Backquote
740                    });
741                    return if quote == b'"' {
742                        TokenKind::DoubleQuote
743                    } else {
744                        TokenKind::Backtick
745                    };
746                }
747                self.advance();
748            } else {
749                self.advance();
750            }
751        }
752        TokenKind::Error
753    }
754
755    fn read_heredoc_start(&mut self, start: usize) -> Token {
756        while let Some(c) = self.peek() {
757            if c == b' ' || c == b'\t' {
758                self.advance();
759            } else {
760                break;
761            }
762        }
763
764        let quote = self.peek();
765        let is_quoted = quote == Some(b'\'') || quote == Some(b'"');
766        let is_nowdoc = quote == Some(b'\'');
767
768        if is_quoted {
769            self.advance();
770        }
771
772        let label_start = self.cursor;
773        self.read_identifier();
774        let label = self.input[label_start..self.cursor].to_vec();
775
776        if is_quoted && self.peek() == quote {
777            self.advance();
778        }
779
780        // Consume newline after label
781        if let Some(c) = self.peek() {
782            if c == b'\n' {
783                self.advance();
784            } else if c == b'\r' {
785                self.advance();
786                if self.peek() == Some(b'\n') {
787                    self.advance();
788                }
789            }
790        }
791
792        if is_nowdoc {
793            self.state_stack.push(LexerState::Nowdoc(label));
794        } else {
795            self.state_stack.push(LexerState::Heredoc(label));
796        }
797
798        Token {
799            kind: TokenKind::StartHeredoc,
800            span: Span::new(start, self.cursor),
801        }
802    }
803
804    fn check_heredoc_end(&self, label: &[u8]) -> Option<usize> {
805        let mut current = self.cursor;
806        while current < self.input.len() {
807            let c = self.input[current];
808            if c == b' ' || c == b'\t' {
809                current += 1;
810            } else {
811                break;
812            }
813        }
814
815        if current + label.len() > self.input.len() {
816            return None;
817        }
818
819        if &self.input[current..current + label.len()] == label {
820            // Check what follows. Must not be a label character.
821            let after = current + label.len();
822            if after >= self.input.len() {
823                return Some(after - self.cursor);
824            }
825            let c = self.input[after];
826            if !c.is_ascii_alphanumeric() && c != b'_' && c < 0x80 {
827                return Some(after - self.cursor);
828            }
829        }
830        None
831    }
832
833    fn is_followed_by_var_or_vararg(&self) -> bool {
834        let mut cursor = self.cursor;
835        while cursor < self.input.len() {
836            let c = self.input[cursor];
837            if c.is_ascii_whitespace() {
838                cursor += 1;
839                continue;
840            }
841
842            // Comments
843            if c == b'#' {
844                // Single line comment
845                while cursor < self.input.len() && self.input[cursor] != b'\n' {
846                    cursor += 1;
847                }
848                continue;
849            }
850            if c == b'/' && cursor + 1 < self.input.len() {
851                if self.input[cursor + 1] == b'/' {
852                    // Single line
853                    while cursor < self.input.len() && self.input[cursor] != b'\n' {
854                        cursor += 1;
855                    }
856                    continue;
857                } else if self.input[cursor + 1] == b'*' {
858                    // Multi line
859                    cursor += 2;
860                    while cursor < self.input.len() {
861                        if self.input[cursor] == b'*'
862                            && cursor + 1 < self.input.len()
863                            && self.input[cursor + 1] == b'/'
864                        {
865                            cursor += 2;
866                            break;
867                        }
868                        cursor += 1;
869                    }
870                    continue;
871                }
872            }
873
874            // Check for Variable ($...)
875            if c == b'$' && cursor + 1 < self.input.len() {
876                let next = self.input[cursor + 1];
877                if next.is_ascii_alphabetic() || next == b'_' || next >= 0x80 {
878                    return true;
879                }
880            }
881
882            // Check for Ellipsis (...)
883            if c == b'.'
884                && cursor + 2 < self.input.len()
885                && self.input[cursor + 1] == b'.'
886                && self.input[cursor + 2] == b'.'
887            {
888                return true;
889            }
890
891            return false;
892        }
893        false
894    }
895
896    fn check_set_visibility(&mut self, normal: TokenKind, set: TokenKind) -> TokenKind {
897        let mut look = self.cursor;
898
899        // Skip whitespace before (
900        while let Some(b) = self.input.get(look) {
901            if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
902                look += 1;
903            } else {
904                break;
905            }
906        }
907
908        if self.input.get(look) != Some(&b'(') {
909            return normal;
910        }
911        look += 1;
912
913        // Skip whitespace after (
914        while let Some(b) = self.input.get(look) {
915            if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
916                look += 1;
917            } else {
918                break;
919            }
920        }
921
922        let set_kw = b"set";
923        let is_set = self
924            .input
925            .get(look..look + set_kw.len())
926            .map(|s| s.eq_ignore_ascii_case(set_kw))
927            .unwrap_or(false);
928
929        if !is_set {
930            return normal;
931        }
932        look += set_kw.len();
933
934        // Skip whitespace after set
935        while let Some(b) = self.input.get(look) {
936            if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
937                look += 1;
938            } else {
939                break;
940            }
941        }
942
943        if self.input.get(look) != Some(&b')') {
944            return normal;
945        }
946        look += 1;
947
948        self.cursor = look;
949        set
950    }
951
952    fn next_in_nowdoc(&mut self) -> Option<Token> {
953        let label = if let Some(LexerState::Nowdoc(label)) = self.state_stack.last() {
954            label.clone()
955        } else {
956            return None;
957        };
958
959        if self.cursor >= self.input.len() {
960            return Some(Token {
961                kind: TokenKind::Error,
962                span: Span::new(self.cursor, self.cursor),
963            });
964        }
965
966        let start = self.cursor;
967
968        // Check if we are at the end label immediately
969        if let Some(len) = self.check_heredoc_end(&label) {
970            self.advance_n(len);
971            self.state_stack.pop();
972
973            return Some(Token {
974                kind: TokenKind::EndHeredoc,
975                span: Span::new(start, self.cursor),
976            });
977        }
978
979        // Consume content until newline (inclusive)
980        while let Some(c) = self.peek() {
981            self.advance();
982            if c == b'\n' {
983                // Check if next line is the label
984                if self.check_heredoc_end(&label).is_some() {
985                    break;
986                }
987            }
988        }
989
990        Some(Token {
991            kind: TokenKind::EncapsedAndWhitespace,
992            span: Span::new(start, self.cursor),
993        })
994    }
995
996    fn next_in_heredoc(&mut self) -> Option<Token> {
997        let label = if let Some(LexerState::Heredoc(label)) = self.state_stack.last() {
998            label.clone()
999        } else {
1000            return None;
1001        };
1002
1003        if self.cursor >= self.input.len() {
1004            return Some(Token {
1005                kind: TokenKind::Error,
1006                span: Span::new(self.cursor, self.cursor),
1007            });
1008        }
1009
1010        let start = self.cursor;
1011
1012        // Check end label
1013        if let Some(len) = self.check_heredoc_end(&label) {
1014            self.advance_n(len);
1015            self.state_stack.pop();
1016
1017            return Some(Token {
1018                kind: TokenKind::EndHeredoc,
1019                span: Span::new(start, self.cursor),
1020            });
1021        }
1022
1023        // Handle interpolation
1024        if let Some(c) = self.peek() {
1025            if c == b'$' {
1026                self.advance();
1027                if let Some(next) = self.peek() {
1028                    if next.is_ascii_alphabetic() || next == b'_' {
1029                        let var_start = self.cursor - 1;
1030                        self.read_identifier();
1031
1032                        // Check for array offset [
1033                        if self.peek() == Some(b'[') {
1034                            self.state_stack.push(LexerState::VarOffset);
1035                        } else if self.peek() == Some(b'-')
1036                            && self.input.get(self.cursor + 1) == Some(&b'>')
1037                            && let Some(next_next) = self.input.get(self.cursor + 2)
1038                            && (next_next.is_ascii_alphabetic() || *next_next == b'_')
1039                        {
1040                            self.state_stack.push(LexerState::LookingForProperty);
1041                        }
1042
1043                        return Some(Token {
1044                            kind: TokenKind::Variable,
1045                            span: Span::new(var_start, self.cursor),
1046                        });
1047                    } else if next == b'{' {
1048                        self.advance();
1049                        self.state_stack.push(LexerState::LookingForVarName);
1050                        return Some(Token {
1051                            kind: TokenKind::DollarOpenCurlyBraces,
1052                            span: Span::new(start, self.cursor),
1053                        });
1054                    }
1055                }
1056            } else if c == b'{' && self.input.get(self.cursor + 1) == Some(&b'$') {
1057                self.advance();
1058                self.state_stack.push(LexerState::Scripting);
1059                return Some(Token {
1060                    kind: TokenKind::CurlyOpen,
1061                    span: Span::new(start, self.cursor),
1062                });
1063            }
1064        }
1065
1066        // Consume content
1067        while let Some(c) = self.peek() {
1068            if c == b'$'
1069                && let Some(next) = self.input.get(self.cursor + 1)
1070                && (next.is_ascii_alphabetic() || *next == b'_' || *next == b'{')
1071            {
1072                break;
1073            }
1074            if c == b'{' && self.input.get(self.cursor + 1) == Some(&b'$') {
1075                break;
1076            }
1077
1078            self.advance();
1079            if c == b'\n' && self.check_heredoc_end(&label).is_some() {
1080                break;
1081            }
1082
1083            if c == b'\\' && self.peek().is_some() {
1084                self.advance();
1085            }
1086        }
1087
1088        if self.cursor > start {
1089            Some(Token {
1090                kind: TokenKind::EncapsedAndWhitespace,
1091                span: Span::new(start, self.cursor),
1092            })
1093        } else {
1094            // Should have matched something above
1095            Some(Token {
1096                kind: TokenKind::EncapsedAndWhitespace,
1097                span: Span::new(start, self.cursor),
1098            })
1099        }
1100    }
1101
1102    fn next_in_halt_compiler(&mut self) -> Option<Token> {
1103        self.skip_whitespace();
1104
1105        if self.cursor >= self.input.len() {
1106            return Some(Token {
1107                kind: TokenKind::Eof,
1108                span: Span::new(self.cursor, self.cursor),
1109            });
1110        }
1111
1112        let start = self.cursor;
1113        let c = self.input[self.cursor];
1114        self.advance();
1115
1116        let kind = match c {
1117            b'(' => TokenKind::OpenParen,
1118            b')' => TokenKind::CloseParen,
1119            b';' => {
1120                self.state_stack.pop();
1121                self.state_stack.push(LexerState::RawData);
1122                TokenKind::SemiColon
1123            }
1124            b'#' => self.consume_single_line_comment(),
1125            b'/' => {
1126                if self.peek() == Some(b'/') {
1127                    self.advance();
1128                    self.consume_single_line_comment()
1129                } else if self.peek() == Some(b'*') {
1130                    self.advance();
1131                    self.consume_multi_line_comment()
1132                } else {
1133                    TokenKind::Error
1134                }
1135            }
1136            _ => TokenKind::Error,
1137        };
1138
1139        Some(Token {
1140            kind,
1141            span: Span::new(start, self.cursor),
1142        })
1143    }
1144
1145    pub fn input_slice(&self, span: Span) -> &'src [u8] {
1146        &self.input[span.start..span.end]
1147    }
1148}
1149
1150impl<'src> Iterator for Lexer<'src> {
1151    type Item = Token;
1152
1153    fn next(&mut self) -> Option<Self::Item> {
1154        // Handle initial state (looking for <?php)
1155        if let Some(LexerState::Initial) = self.state_stack.last() {
1156            let start = self.cursor;
1157            while self.cursor < self.input.len() {
1158                if self.input[self.cursor] != b'<' {
1159                    let remaining = &self.input[self.cursor..];
1160                    match memchr(b'<', remaining) {
1161                        Some(pos) => self.cursor += pos,
1162                        None => {
1163                            self.cursor = self.input.len();
1164                            break;
1165                        }
1166                    }
1167                }
1168
1169                if self.input[self.cursor..].starts_with(b"<?php") {
1170                    if self.cursor > start {
1171                        return Some(Token {
1172                            kind: TokenKind::InlineHtml,
1173                            span: Span::new(start, self.cursor),
1174                        });
1175                    }
1176
1177                    let tag_start = self.cursor;
1178                    self.state_stack.pop();
1179                    self.state_stack.push(LexerState::Scripting);
1180                    self.advance_n(5);
1181
1182                    // Check for trailing newline/whitespace after <?php
1183                    if self.peek().is_some_and(|c| c.is_ascii_whitespace()) {
1184                        self.advance();
1185                    }
1186
1187                    return Some(Token {
1188                        kind: TokenKind::OpenTag,
1189                        span: Span::new(tag_start, self.cursor),
1190                    });
1191                } else if self.input[self.cursor..].starts_with(b"<?=") {
1192                    if self.cursor > start {
1193                        return Some(Token {
1194                            kind: TokenKind::InlineHtml,
1195                            span: Span::new(start, self.cursor),
1196                        });
1197                    }
1198                    let tag_start = self.cursor;
1199                    self.state_stack.pop();
1200                    self.state_stack.push(LexerState::Scripting);
1201                    self.advance_n(3);
1202                    return Some(Token {
1203                        kind: TokenKind::OpenTagEcho,
1204                        span: Span::new(tag_start, self.cursor),
1205                    });
1206                }
1207                self.advance();
1208            }
1209
1210            if self.cursor > start {
1211                return Some(Token {
1212                    kind: TokenKind::InlineHtml,
1213                    span: Span::new(start, self.cursor),
1214                });
1215            }
1216
1217            return Some(Token {
1218                kind: TokenKind::Eof,
1219                span: Span::new(self.cursor, self.cursor),
1220            });
1221        }
1222
1223        // Handle DoubleQuotes/Backquote state
1224        if let Some(LexerState::DoubleQuotes) | Some(LexerState::Backquote) =
1225            self.state_stack.last()
1226        {
1227            return self.next_in_double_quotes();
1228        }
1229
1230        if let Some(LexerState::Heredoc(_)) = self.state_stack.last() {
1231            return self.next_in_heredoc();
1232        }
1233
1234        if let Some(LexerState::Nowdoc(_)) = self.state_stack.last() {
1235            return self.next_in_nowdoc();
1236        }
1237
1238        if let Some(LexerState::HaltCompiler) = self.state_stack.last() {
1239            return self.next_in_halt_compiler();
1240        }
1241
1242        if let Some(LexerState::VarOffset) = self.state_stack.last() {
1243            return self.next_in_var_offset(true);
1244        }
1245
1246        if let Some(LexerState::VarOffsetDollarCurly) = self.state_stack.last() {
1247            return self.next_in_var_offset(false);
1248        }
1249
1250        if let Some(LexerState::LookingForProperty) = self.state_stack.last() {
1251            return self.next_in_looking_for_property();
1252        }
1253
1254        if let Some(LexerState::LookingForVarName) = self.state_stack.last() {
1255            return self.next_in_looking_for_var_name();
1256        }
1257
1258        if let Some(LexerState::RawData) = self.state_stack.last() {
1259            if self.cursor >= self.input.len() {
1260                return Some(Token {
1261                    kind: TokenKind::Eof,
1262                    span: Span::new(self.cursor, self.cursor),
1263                });
1264            }
1265            let start = self.cursor;
1266            self.cursor = self.input.len(); // Consume all
1267            return Some(Token {
1268                kind: TokenKind::InlineHtml,
1269                span: Span::new(start, self.cursor),
1270            });
1271        }
1272
1273        self.skip_whitespace();
1274
1275        if self.cursor >= self.input.len() {
1276            return Some(Token {
1277                kind: TokenKind::Eof,
1278                span: Span::new(self.cursor, self.cursor),
1279            });
1280        }
1281
1282        let start = self.cursor;
1283        let char = self.input[self.cursor];
1284        self.advance();
1285
1286        let kind = match char {
1287            b'$' => {
1288                if let Some(c) = self.peek() {
1289                    if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
1290                        self.read_identifier();
1291                        TokenKind::Variable
1292                    } else {
1293                        TokenKind::Dollar
1294                    }
1295                } else {
1296                    TokenKind::Dollar
1297                }
1298            }
1299            b'\\' => TokenKind::NsSeparator,
1300            b'\'' => self.read_single_quoted(),
1301            b'"' => self.read_double_quoted(b'"', start),
1302            b'`' => {
1303                self.state_stack.push(LexerState::Backquote);
1304                TokenKind::Backtick
1305            }
1306            b'#' => {
1307                if self.peek() == Some(b'[') {
1308                    self.advance();
1309                    TokenKind::Attribute
1310                } else {
1311                    self.consume_single_line_comment()
1312                }
1313            }
1314            b';' => TokenKind::SemiColon,
1315            b':' => {
1316                if self.peek() == Some(b':') {
1317                    self.advance();
1318                    TokenKind::DoubleColon
1319                } else {
1320                    TokenKind::Colon
1321                }
1322            }
1323            b',' => TokenKind::Comma,
1324            b'{' => {
1325                self.state_stack.push(LexerState::Scripting);
1326                TokenKind::OpenBrace
1327            }
1328            b'}' => {
1329                if self.state_stack.len() > 1 {
1330                    self.state_stack.pop();
1331                }
1332                TokenKind::CloseBrace
1333            }
1334            b'(' => {
1335                // Check for cast
1336                let saved_cursor = self.cursor;
1337                self.skip_whitespace();
1338
1339                let start_ident = self.cursor;
1340                self.read_identifier();
1341                let ident_len = self.cursor - start_ident;
1342
1343                if ident_len > 0 {
1344                    let ident = &self.input[start_ident..self.cursor];
1345                    self.skip_whitespace();
1346                    if self.peek() == Some(b')') {
1347                        let cast_kind = match ident.to_ascii_lowercase().as_slice() {
1348                            b"int" | b"integer" => Some(TokenKind::IntCast),
1349                            b"bool" | b"boolean" => Some(TokenKind::BoolCast),
1350                            b"float" | b"double" | b"real" => Some(TokenKind::FloatCast),
1351                            b"string" | b"binary" => Some(TokenKind::StringCast),
1352                            b"array" => Some(TokenKind::ArrayCast),
1353                            b"object" => Some(TokenKind::ObjectCast),
1354                            b"unset" => Some(TokenKind::UnsetCast),
1355                            b"void" => Some(TokenKind::VoidCast),
1356                            _ => None,
1357                        };
1358
1359                        if let Some(k) = cast_kind {
1360                            self.advance(); // Eat ')'
1361                            k
1362                        } else {
1363                            self.cursor = saved_cursor;
1364                            TokenKind::OpenParen
1365                        }
1366                    } else {
1367                        self.cursor = saved_cursor;
1368                        TokenKind::OpenParen
1369                    }
1370                } else {
1371                    self.cursor = saved_cursor;
1372                    TokenKind::OpenParen
1373                }
1374            }
1375            b')' => TokenKind::CloseParen,
1376            b'[' => TokenKind::OpenBracket,
1377            b']' => TokenKind::CloseBracket,
1378            b'+' => {
1379                if self.peek() == Some(b'+') {
1380                    self.advance();
1381                    TokenKind::Inc
1382                } else if self.peek() == Some(b'=') {
1383                    self.advance();
1384                    TokenKind::PlusEq
1385                } else {
1386                    TokenKind::Plus
1387                }
1388            }
1389            b'-' => {
1390                if self.peek() == Some(b'>') {
1391                    self.advance();
1392                    TokenKind::Arrow
1393                } else if self.peek() == Some(b'-') {
1394                    self.advance();
1395                    TokenKind::Dec
1396                } else if self.peek() == Some(b'=') {
1397                    self.advance();
1398                    TokenKind::MinusEq
1399                } else {
1400                    TokenKind::Minus
1401                }
1402            }
1403            b'*' => {
1404                if self.peek() == Some(b'*') {
1405                    self.advance();
1406                    if self.peek() == Some(b'=') {
1407                        self.advance();
1408                        TokenKind::PowEq
1409                    } else {
1410                        TokenKind::Pow
1411                    }
1412                } else if self.peek() == Some(b'=') {
1413                    self.advance();
1414                    TokenKind::MulEq
1415                } else {
1416                    TokenKind::Asterisk
1417                }
1418            }
1419            b'/' => {
1420                if self.peek() == Some(b'/') {
1421                    self.advance();
1422                    self.consume_single_line_comment()
1423                } else if self.peek() == Some(b'*') {
1424                    self.advance();
1425                    self.consume_multi_line_comment()
1426                } else if self.peek() == Some(b'=') {
1427                    self.advance();
1428                    TokenKind::DivEq
1429                } else {
1430                    TokenKind::Slash
1431                }
1432            }
1433            b'%' => {
1434                if self.peek() == Some(b'=') {
1435                    self.advance();
1436                    TokenKind::ModEq
1437                } else {
1438                    TokenKind::Percent
1439                }
1440            }
1441            b'.' => {
1442                if self.peek() == Some(b'=') {
1443                    self.advance();
1444                    TokenKind::ConcatEq
1445                } else if self.peek() == Some(b'.') {
1446                    self.advance();
1447                    if self.peek() == Some(b'.') {
1448                        self.advance();
1449                        TokenKind::Ellipsis
1450                    } else {
1451                        TokenKind::Dot
1452                    }
1453                } else if let Some(c) = self.peek()
1454                    && c.is_ascii_digit()
1455                {
1456                    self.cursor -= 1;
1457                    self.read_number()
1458                } else {
1459                    TokenKind::Dot
1460                }
1461            }
1462            b'=' => {
1463                if self.peek() == Some(b'=') {
1464                    self.advance();
1465                    if self.peek() == Some(b'=') {
1466                        self.advance();
1467                        TokenKind::EqEqEq
1468                    } else {
1469                        TokenKind::EqEq
1470                    }
1471                } else if self.peek() == Some(b'>') {
1472                    self.advance();
1473                    TokenKind::DoubleArrow
1474                } else {
1475                    TokenKind::Eq
1476                }
1477            }
1478            b'!' => {
1479                if self.peek() == Some(b'=') {
1480                    self.advance();
1481                    if self.peek() == Some(b'=') {
1482                        self.advance();
1483                        TokenKind::BangEqEq
1484                    } else {
1485                        TokenKind::BangEq
1486                    }
1487                } else {
1488                    TokenKind::Bang
1489                }
1490            }
1491            b'<' => {
1492                if self.peek() == Some(b'<') && self.input.get(self.cursor + 1) == Some(&b'<') {
1493                    self.advance(); // Eat second <
1494                    self.advance(); // Eat third <
1495                    return Some(self.read_heredoc_start(start));
1496                } else if self.peek() == Some(b'=') {
1497                    self.advance();
1498                    if self.peek() == Some(b'>') {
1499                        self.advance();
1500                        TokenKind::Spaceship
1501                    } else {
1502                        TokenKind::LtEq
1503                    }
1504                } else if self.peek() == Some(b'<') {
1505                    self.advance();
1506                    if self.peek() == Some(b'=') {
1507                        self.advance();
1508                        TokenKind::SlEq
1509                    } else {
1510                        TokenKind::Sl
1511                    }
1512                } else if self.peek() == Some(b'>') {
1513                    self.advance();
1514                    TokenKind::BangEq
1515                } else {
1516                    TokenKind::Lt
1517                }
1518            }
1519            b'>' => {
1520                if self.peek() == Some(b'=') {
1521                    self.advance();
1522                    TokenKind::GtEq
1523                } else if self.peek() == Some(b'>') {
1524                    self.advance();
1525                    if self.peek() == Some(b'=') {
1526                        self.advance();
1527                        TokenKind::SrEq
1528                    } else {
1529                        TokenKind::Sr
1530                    }
1531                } else {
1532                    TokenKind::Gt
1533                }
1534            }
1535            b'&' => {
1536                if self.peek() == Some(b'&') {
1537                    self.advance();
1538                    TokenKind::AmpersandAmpersand
1539                } else if self.peek() == Some(b'=') {
1540                    self.advance();
1541                    TokenKind::AndEq
1542                } else if self.is_followed_by_var_or_vararg() {
1543                    TokenKind::AmpersandFollowedByVarOrVararg
1544                } else {
1545                    TokenKind::AmpersandNotFollowedByVarOrVararg
1546                }
1547            }
1548            b'|' => {
1549                if self.peek() == Some(b'|') {
1550                    self.advance();
1551                    TokenKind::PipePipe
1552                } else if self.peek() == Some(b'=') {
1553                    self.advance();
1554                    TokenKind::OrEq
1555                } else {
1556                    TokenKind::Pipe
1557                }
1558            }
1559            b'^' => {
1560                if self.peek() == Some(b'=') {
1561                    self.advance();
1562                    TokenKind::XorEq
1563                } else {
1564                    TokenKind::Caret
1565                }
1566            }
1567            b'~' => TokenKind::BitNot,
1568            b'@' => TokenKind::At,
1569            b'?' => {
1570                if self.peek() == Some(b'>') {
1571                    self.advance();
1572                    self.state_stack.pop();
1573                    self.state_stack.push(LexerState::Initial);
1574                    TokenKind::CloseTag
1575                } else if self.peek() == Some(b'?') {
1576                    self.advance();
1577                    if self.peek() == Some(b'=') {
1578                        self.advance();
1579                        TokenKind::CoalesceEq
1580                    } else {
1581                        TokenKind::Coalesce
1582                    }
1583                } else if self.peek() == Some(b'-')
1584                    && self.input.get(self.cursor + 1) == Some(&b'>')
1585                {
1586                    self.advance();
1587                    self.advance();
1588                    TokenKind::NullSafeArrow
1589                } else {
1590                    TokenKind::Question
1591                }
1592            }
1593            c if c.is_ascii_digit() => {
1594                self.cursor -= 1;
1595                self.read_number()
1596            }
1597            c if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 => {
1598                // Check for binary string prefix
1599                if (c == b'b' || c == b'B')
1600                    && let Some(next) = self.peek()
1601                {
1602                    if next == b'\'' {
1603                        self.advance(); // Eat '
1604                        return Some(Token {
1605                            kind: self.read_single_quoted(),
1606                            span: Span::new(start, self.cursor),
1607                        });
1608                    } else if next == b'"' {
1609                        let quote_pos = self.cursor;
1610                        self.advance(); // Eat "
1611                        return Some(Token {
1612                            kind: self.read_double_quoted(b'"', quote_pos),
1613                            span: Span::new(start, self.cursor),
1614                        });
1615                    }
1616                }
1617
1618                self.read_identifier();
1619                let text = &self.input[start..self.cursor];
1620
1621                if self.mode == LexerMode::LookingForProperty {
1622                    self.mode = LexerMode::Standard;
1623                    TokenKind::Identifier
1624                } else {
1625                    let is_all_lowercase = text.iter().all(|c| !c.is_ascii_uppercase());
1626
1627                    let mut kind = if is_all_lowercase {
1628                        keyword_lookup(text)
1629                    } else {
1630                        keyword_lookup(&text.to_ascii_lowercase())
1631                    };
1632
1633                    match kind {
1634                        TokenKind::Yield => {
1635                            let mut look = self.cursor;
1636                            while let Some(b) = self.input.get(look) {
1637                                if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
1638                                    look += 1;
1639                                } else {
1640                                    break;
1641                                }
1642                            }
1643                            let from_kw = b"from";
1644                            let is_from = self
1645                                .input
1646                                .get(look..look + from_kw.len())
1647                                .map(|s| {
1648                                    s.iter()
1649                                        .zip(from_kw.iter())
1650                                        .all(|(c, k)| c.to_ascii_lowercase() == *k)
1651                                })
1652                                .unwrap_or(false)
1653                                && !self
1654                                    .input
1655                                    .get(look + from_kw.len())
1656                                    .map(|c| c.is_ascii_alphanumeric() || *c == b'_' || *c >= 0x80)
1657                                    .unwrap_or(false);
1658
1659                            if is_from {
1660                                self.cursor = look + from_kw.len();
1661                                kind = TokenKind::YieldFrom;
1662                            }
1663                        }
1664                        TokenKind::Public => {
1665                            if text[0].eq_ignore_ascii_case(&b'p') {
1666                                kind = self
1667                                    .check_set_visibility(TokenKind::Public, TokenKind::PublicSet);
1668                            }
1669                        }
1670                        TokenKind::Protected => {
1671                            kind = self.check_set_visibility(
1672                                TokenKind::Protected,
1673                                TokenKind::ProtectedSet,
1674                            );
1675                        }
1676                        TokenKind::Private => {
1677                            kind = self
1678                                .check_set_visibility(TokenKind::Private, TokenKind::PrivateSet);
1679                        }
1680                        TokenKind::HaltCompiler => {
1681                            self.state_stack.pop();
1682                            self.state_stack.push(LexerState::HaltCompiler);
1683                        }
1684                        _ => {}
1685                    }
1686                    kind
1687                }
1688            }
1689            _ => TokenKind::Error,
1690        };
1691
1692        Some(Token {
1693            kind,
1694            span: Span::new(start, self.cursor),
1695        })
1696    }
1697}