php_parser/lexer/
mod.rs

1pub mod token;
2
3use crate::span::Span;
4use memchr::{memchr, memchr2, memchr3};
5use token::{Token, TokenKind};
6
7#[derive(Debug, Clone, Copy, PartialEq)]
8pub enum LexerMode {
9    Standard,
10    LookingForProperty,
11    LookingForVarName,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15enum LexerState {
16    Initial,
17    Scripting,
18    DoubleQuotes,
19    Backquote,
20    Heredoc(Vec<u8>),
21    Nowdoc(Vec<u8>),
22    HaltCompiler,
23    RawData,
24    VarOffset,
25    VarOffsetDollarCurly,
26    LookingForProperty,
27    LookingForVarName,
28}
29
30fn keyword_lookup(text: &[u8]) -> TokenKind {
31    match text {
32        b"or" => TokenKind::LogicalOr,
33        b"and" => TokenKind::LogicalAnd,
34        b"xor" => TokenKind::LogicalXor,
35        b"bool" => TokenKind::TypeBool,
36        b"int" => TokenKind::TypeInt,
37        b"float" => TokenKind::TypeFloat,
38        b"string" => TokenKind::TypeString,
39        b"mixed" => TokenKind::TypeMixed,
40        b"never" => TokenKind::TypeNever,
41        b"null" => TokenKind::TypeNull,
42        b"false" => TokenKind::TypeFalse,
43        b"true" => TokenKind::TypeTrue,
44        b"exit" => TokenKind::Exit,
45        b"die" => TokenKind::Die,
46        b"function" => TokenKind::Function,
47        b"fn" => TokenKind::Fn,
48        b"const" => TokenKind::Const,
49        b"return" => TokenKind::Return,
50        b"yield" => TokenKind::Yield,
51        b"try" => TokenKind::Try,
52        b"catch" => TokenKind::Catch,
53        b"finally" => TokenKind::Finally,
54        b"throw" => TokenKind::Throw,
55        b"if" => TokenKind::If,
56        b"elseif" => TokenKind::ElseIf,
57        b"endif" => TokenKind::EndIf,
58        b"else" => TokenKind::Else,
59        b"insteadof" => TokenKind::Insteadof,
60        b"while" => TokenKind::While,
61        b"endwhile" => TokenKind::EndWhile,
62        b"do" => TokenKind::Do,
63        b"for" => TokenKind::For,
64        b"endfor" => TokenKind::EndFor,
65        b"foreach" => TokenKind::Foreach,
66        b"endforeach" => TokenKind::EndForeach,
67        b"declare" => TokenKind::Declare,
68        b"enddeclare" => TokenKind::EndDeclare,
69        b"instanceof" => TokenKind::InstanceOf,
70        b"as" => TokenKind::As,
71        b"switch" => TokenKind::Switch,
72        b"endswitch" => TokenKind::EndSwitch,
73        b"case" => TokenKind::Case,
74        b"default" => TokenKind::Default,
75        b"break" => TokenKind::Break,
76        b"continue" => TokenKind::Continue,
77        b"goto" => TokenKind::Goto,
78        b"echo" => TokenKind::Echo,
79        b"print" => TokenKind::Print,
80        b"enum" => TokenKind::Enum,
81        b"class" => TokenKind::Class,
82        b"interface" => TokenKind::Interface,
83        b"trait" => TokenKind::Trait,
84        b"extends" => TokenKind::Extends,
85        b"implements" => TokenKind::Implements,
86        b"new" => TokenKind::New,
87        b"clone" => TokenKind::Clone,
88        b"var" => TokenKind::Public,
89        b"public" => TokenKind::Public,
90        b"protected" => TokenKind::Protected,
91        b"private" => TokenKind::Private,
92        b"final" => TokenKind::Final,
93        b"abstract" => TokenKind::Abstract,
94        b"static" => TokenKind::Static,
95        b"readonly" => TokenKind::Readonly,
96        b"namespace" => TokenKind::Namespace,
97        b"use" => TokenKind::Use,
98        b"global" => TokenKind::Global,
99        b"isset" => TokenKind::Isset,
100        b"empty" => TokenKind::Empty,
101        b"__halt_compiler" => TokenKind::HaltCompiler,
102        b"__class__" => TokenKind::ClassC,
103        b"__trait__" => TokenKind::TraitC,
104        b"__function__" => TokenKind::FuncC,
105        b"__method__" => TokenKind::MethodC,
106        b"__line__" => TokenKind::Line,
107        b"__file__" => TokenKind::File,
108        b"__dir__" => TokenKind::Dir,
109        b"__namespace__" => TokenKind::NsC,
110        b"__property__" => TokenKind::PropertyC,
111        b"array" => TokenKind::Array,
112        b"callable" => TokenKind::TypeCallable,
113        b"iterable" => TokenKind::TypeIterable,
114        b"void" => TokenKind::TypeVoid,
115        b"object" => TokenKind::TypeObject,
116        b"match" => TokenKind::Match,
117        b"list" => TokenKind::List,
118        b"include" => TokenKind::Include,
119        b"include_once" => TokenKind::IncludeOnce,
120        b"require" => TokenKind::Require,
121        b"require_once" => TokenKind::RequireOnce,
122        b"eval" => TokenKind::Eval,
123        b"unset" => TokenKind::Unset,
124        _ => TokenKind::Identifier,
125    }
126}
127
128#[derive(Debug, Clone)]
129pub struct Lexer<'src> {
130    input: &'src [u8],
131    cursor: usize,
132    state_stack: Vec<LexerState>,
133    mode: LexerMode,
134}
135
136impl<'src> Lexer<'src> {
137    pub fn new(input: &'src [u8]) -> Self {
138        Self {
139            input,
140            cursor: 0,
141            state_stack: vec![LexerState::Initial],
142            mode: LexerMode::Standard,
143        }
144    }
145
146    pub fn set_mode(&mut self, mode: LexerMode) {
147        self.mode = mode;
148    }
149
150    pub fn slice(&self, span: Span) -> &'src [u8] {
151        &self.input[span.start..span.end]
152    }
153
154    fn peek(&self) -> Option<u8> {
155        if self.cursor < self.input.len() {
156            Some(self.input[self.cursor])
157        } else {
158            None
159        }
160    }
161
162    fn advance(&mut self) {
163        self.cursor += 1;
164    }
165
166    fn advance_n(&mut self, n: usize) {
167        self.cursor += n;
168    }
169
170    fn skip_whitespace(&mut self) {
171        while self.cursor < self.input.len() {
172            if self.input[self.cursor].is_ascii_whitespace() {
173                self.cursor += 1;
174            } else {
175                break;
176            }
177        }
178    }
179
180    fn read_identifier(&mut self) {
181        while self.cursor < self.input.len() {
182            let c = self.input[self.cursor];
183            if c.is_ascii_alphanumeric() || c == b'_' || c >= 0x80 {
184                self.cursor += 1;
185            } else {
186                break;
187            }
188        }
189    }
190
191    fn read_number(&mut self) -> TokenKind {
192        let mut is_float = false;
193
194        // Check for hex/binary/octal
195        if self.peek() == Some(b'0') {
196            self.advance();
197            if let Some(c) = self.peek() {
198                if c == b'x' || c == b'X' {
199                    self.advance();
200                    while let Some(c) = self.peek() {
201                        if c.is_ascii_hexdigit() || c == b'_' {
202                            self.advance();
203                        } else {
204                            break;
205                        }
206                    }
207                    return TokenKind::LNumber;
208                } else if c == b'b' || c == b'B' {
209                    self.advance();
210                    while let Some(c) = self.peek() {
211                        if c == b'0' || c == b'1' || c == b'_' {
212                            self.advance();
213                        } else {
214                            break;
215                        }
216                    }
217                    return TokenKind::LNumber;
218                } else if c == b'o' || c == b'O' {
219                    self.advance();
220                    while let Some(c) = self.peek() {
221                        if (b'0'..=b'7').contains(&c) || c == b'_' {
222                            self.advance();
223                        } else {
224                            break;
225                        }
226                    }
227                    return TokenKind::LNumber;
228                }
229            }
230        }
231
232        while let Some(c) = self.peek() {
233            if c.is_ascii_digit() || c == b'_' {
234                self.advance();
235            } else if c == b'.' {
236                if is_float {
237                    break; // Already found a dot
238                }
239                is_float = true;
240                self.advance();
241            } else if c == b'e' || c == b'E' {
242                is_float = true;
243                self.advance();
244                if let Some(next) = self.peek()
245                    && (next == b'+' || next == b'-')
246                {
247                    self.advance();
248                }
249            } else {
250                break;
251            }
252        }
253
254        if is_float {
255            TokenKind::DNumber
256        } else {
257            TokenKind::LNumber
258        }
259    }
260
261    fn consume_single_line_comment(&mut self) -> TokenKind {
262        while self.cursor < self.input.len() {
263            let remaining = &self.input[self.cursor..];
264            match memchr3(b'\n', b'\r', b'?', remaining) {
265                Some(pos) => {
266                    self.cursor += pos;
267                    let c = self.input[self.cursor];
268                    if c == b'?' {
269                        if self.input.get(self.cursor + 1) == Some(&b'>') {
270                            break;
271                        } else {
272                            self.cursor += 1;
273                        }
274                    } else {
275                        break;
276                    }
277                }
278                None => {
279                    self.cursor = self.input.len();
280                    break;
281                }
282            }
283        }
284        TokenKind::Comment
285    }
286
287    fn consume_multi_line_comment(&mut self) -> TokenKind {
288        let is_doc = if self.peek() == Some(b'*') && self.input.get(self.cursor + 1) != Some(&b'/')
289        {
290            self.advance();
291            true
292        } else {
293            false
294        };
295
296        while self.cursor < self.input.len() {
297            let remaining = &self.input[self.cursor..];
298            match memchr(b'*', remaining) {
299                Some(pos) => {
300                    self.cursor += pos;
301                    self.advance(); // Consume *
302                    if self.peek() == Some(b'/') {
303                        self.advance();
304                        return if is_doc {
305                            TokenKind::DocComment
306                        } else {
307                            TokenKind::Comment
308                        };
309                    }
310                }
311                None => {
312                    self.cursor = self.input.len();
313                    break;
314                }
315            }
316        }
317
318        TokenKind::Error // Unterminated comment
319    }
320
321    fn next_in_looking_for_property(&mut self) -> Option<Token> {
322        let start = self.cursor;
323        if self.cursor >= self.input.len() {
324            return Some(Token {
325                kind: TokenKind::Error,
326                span: Span::new(start, start),
327            });
328        }
329
330        let c = self.input[self.cursor];
331
332        if c == b'-' && self.input.get(self.cursor + 1) == Some(&b'>') {
333            self.advance_n(2);
334            return Some(Token {
335                kind: TokenKind::Arrow,
336                span: Span::new(start, self.cursor),
337            });
338        }
339
340        if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
341            self.read_identifier();
342            self.state_stack.pop(); // Done with property
343            return Some(Token {
344                kind: TokenKind::Identifier,
345                span: Span::new(start, self.cursor),
346            });
347        }
348
349        // Fallback: if we are here, it means we expected -> or identifier but got something else.
350        // This shouldn't happen if we only push state when we see ->.
351        // But if we just returned Arrow, next call expects Identifier.
352        // If we don't see identifier, we should probably pop state and let double quotes handle it?
353        // But double quotes expects string content.
354
355        self.state_stack.pop();
356        // Return empty token? No.
357        // Let's return Error for now if it's unexpected.
358        Some(Token {
359            kind: TokenKind::Error,
360            span: Span::new(start, self.cursor),
361        })
362    }
363
364    fn next_in_looking_for_var_name(&mut self) -> Option<Token> {
365        let start = self.cursor;
366        if self.cursor >= self.input.len() {
367            return Some(Token {
368                kind: TokenKind::Error,
369                span: Span::new(start, start),
370            });
371        }
372
373        let c = self.input[self.cursor];
374
375        if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
376            self.read_identifier();
377            return Some(Token {
378                kind: TokenKind::StringVarname,
379                span: Span::new(start, self.cursor),
380            });
381        }
382
383        if c == b'[' {
384            self.advance();
385            self.state_stack.push(LexerState::VarOffsetDollarCurly);
386            return Some(Token {
387                kind: TokenKind::OpenBracket,
388                span: Span::new(start, self.cursor),
389            });
390        }
391
392        if c == b'}' {
393            self.advance();
394            self.state_stack.pop();
395            return Some(Token {
396                kind: TokenKind::CloseBrace,
397                span: Span::new(start, self.cursor),
398            });
399        }
400
401        self.advance();
402        Some(Token {
403            kind: TokenKind::Error,
404            span: Span::new(start, self.cursor),
405        })
406    }
407
408    fn next_in_var_offset(&mut self, is_num_string: bool) -> Option<Token> {
409        let start = self.cursor;
410        if self.cursor >= self.input.len() {
411            return Some(Token {
412                kind: TokenKind::Error,
413                span: Span::new(start, start),
414            });
415        }
416
417        let c = self.input[self.cursor];
418
419        if c == b']' {
420            self.advance();
421            self.state_stack.pop();
422            return Some(Token {
423                kind: TokenKind::CloseBracket,
424                span: Span::new(start, self.cursor),
425            });
426        }
427
428        if c == b'$' {
429            self.advance();
430            if let Some(next) = self.peek()
431                && (next.is_ascii_alphabetic() || next == b'_')
432            {
433                let var_start = self.cursor - 1;
434                self.read_identifier();
435                return Some(Token {
436                    kind: TokenKind::Variable,
437                    span: Span::new(var_start, self.cursor),
438                });
439            }
440            // Fallback to identifier/etc if not variable?
441            // PHP scanner: if $foo[bar], bar is T_STRING. if $foo[$bar], $bar is T_VARIABLE.
442            // if $foo[1], 1 is T_NUM_STRING.
443        }
444
445        if c.is_ascii_digit() {
446            if is_num_string {
447                // Only consume digits
448                while let Some(c) = self.peek() {
449                    if c.is_ascii_digit() {
450                        self.advance();
451                    } else {
452                        break;
453                    }
454                }
455                return Some(Token {
456                    kind: TokenKind::NumString,
457                    span: Span::new(start, self.cursor),
458                });
459            } else {
460                let kind = self.read_number();
461                return Some(Token {
462                    kind,
463                    span: Span::new(start, self.cursor),
464                });
465            }
466        }
467
468        if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
469            self.read_identifier();
470            return Some(Token {
471                kind: TokenKind::Identifier,
472                span: Span::new(start, self.cursor),
473            });
474        }
475
476        if c == b'-' {
477            self.advance();
478            return Some(Token {
479                kind: TokenKind::Minus,
480                span: Span::new(start, self.cursor),
481            });
482        }
483
484        // Any other char is just returned as is (e.g. - . etc)
485        self.advance();
486
487        // Map specific chars to tokens if needed, or just return Error/Char?
488        // In this context, [ is not possible (nested?), ] is handled.
489        // - is possible.
490        // Let's return a generic token or map it.
491        // But wait, if I return Error, my test maps it to UNKNOWN.
492        // PHP returns CHAR for [ if it's not a variable offset start?
493        // But we are IN variable offset state.
494        // Wait, $foo[1]. [ is consumed before entering state?
495        // No, I pushed state when I saw [.
496        // But I did NOT consume [.
497        // Ah!
498
499        /*
500                        // Check for array offset [
501                        if self.peek() == Some(b'[') {
502                            self.state_stack.push(LexerState::VarOffset);
503                        }
504        */
505
506        // So the next char IS [.
507        // So I need to handle [ in next_in_var_offset.
508
509        if c == b'[' {
510            return Some(Token {
511                kind: TokenKind::OpenBracket,
512                span: Span::new(start, self.cursor),
513            });
514        }
515
516        Some(Token {
517            kind: TokenKind::Error,
518            span: Span::new(start, self.cursor),
519        })
520    }
521
522    fn next_in_double_quotes(&mut self) -> Option<Token> {
523        let start = self.cursor;
524        if self.cursor >= self.input.len() {
525            return Some(Token {
526                kind: TokenKind::Error,
527                span: Span::new(start, start),
528            });
529        }
530
531        let char = self.input[self.cursor];
532
533        match char {
534            b'"' => {
535                if let Some(LexerState::DoubleQuotes) = self.state_stack.last() {
536                    self.advance();
537                    self.state_stack.pop();
538                    return Some(Token {
539                        kind: TokenKind::DoubleQuote,
540                        span: Span::new(start, self.cursor),
541                    });
542                }
543            }
544            b'`' => {
545                if let Some(LexerState::Backquote) = self.state_stack.last() {
546                    self.advance();
547                    self.state_stack.pop();
548                    return Some(Token {
549                        kind: TokenKind::Backtick,
550                        span: Span::new(start, self.cursor),
551                    });
552                }
553            }
554            b'$' => {
555                self.advance();
556                if let Some(c) = self.peek() {
557                    if c.is_ascii_alphabetic() || c == b'_' {
558                        // Backtrack to $? No, we consumed it.
559                        // But read_identifier expects to read identifier chars.
560                        // It does not read $.
561                        // So we are at the start of identifier.
562                        let var_start = self.cursor - 1;
563                        self.read_identifier();
564
565                        // Check for array offset [
566                        if self.peek() == Some(b'[') {
567                            self.state_stack.push(LexerState::VarOffset);
568                        } else if self.peek() == Some(b'-')
569                            && self.input.get(self.cursor + 1) == Some(&b'>')
570                            && let Some(next_next) = self.input.get(self.cursor + 2)
571                            && (next_next.is_ascii_alphabetic() || *next_next == b'_')
572                        {
573                            self.state_stack.push(LexerState::LookingForProperty);
574                        }
575
576                        return Some(Token {
577                            kind: TokenKind::Variable,
578                            span: Span::new(var_start, self.cursor),
579                        });
580                    } else if c == b'{' {
581                        self.advance(); // Eat {
582                        self.state_stack.push(LexerState::LookingForVarName);
583                        return Some(Token {
584                            kind: TokenKind::DollarOpenCurlyBraces,
585                            span: Span::new(start, self.cursor),
586                        });
587                    }
588                }
589                // Just a $ literal, continue as Encapsed
590            }
591            b'{' => {
592                if self.input.get(self.cursor + 1) == Some(&b'$') {
593                    self.advance();
594                    // Do NOT consume $
595                    self.state_stack.push(LexerState::Scripting);
596                    return Some(Token {
597                        kind: TokenKind::CurlyOpen,
598                        span: Span::new(start, self.cursor),
599                    });
600                }
601            }
602            _ => {}
603        }
604
605        // EncapsedAndWhitespace
606        while let Some(c) = self.peek() {
607            if c == b'"' && matches!(self.state_stack.last(), Some(LexerState::DoubleQuotes)) {
608                break;
609            }
610            if c == b'`' && matches!(self.state_stack.last(), Some(LexerState::Backquote)) {
611                break;
612            }
613            if c == b'$'
614                && let Some(next) = self.input.get(self.cursor + 1)
615                && (next.is_ascii_alphabetic() || *next == b'_' || *next == b'{')
616            {
617                break;
618            }
619            if c == b'{' && self.input.get(self.cursor + 1) == Some(&b'$') {
620                break;
621            }
622
623            if c == b'\\' {
624                self.advance();
625                if self.peek().is_some() {
626                    self.advance();
627                }
628            } else {
629                self.advance();
630            }
631        }
632
633        if self.cursor > start {
634            Some(Token {
635                kind: TokenKind::EncapsedAndWhitespace,
636                span: Span::new(start, self.cursor),
637            })
638        } else {
639            // Should have matched something above or broke immediately
640            // If we broke immediately (e.g. at "), we should have handled it in match char
641            // But if we are at $ or { that is NOT a variable start, we should consume it.
642            // Wait, if we are at $ and it fell through match char, it means it's NOT a variable.
643            // So we should consume it.
644
645            // My loop logic:
646            // `while let Some(c) = self.peek()`
647            // If `c` is `$`, check if variable. If NOT variable, consume.
648            // But my loop breaks if `c == b'$'` and it IS a variable.
649            // If it is NOT a variable, it continues?
650
651            // Let's re-check loop:
652            /*
653            if c == b'$' {
654                if let Some(next) ... {
655                    if next.is_ascii... {
656                        break;
657                    }
658                }
659            }
660            */
661            // It doesn't advance if it's NOT a variable. It just falls through to `if c == b'\\' ... else self.advance()`.
662            // So it advances. Correct.
663
664            Some(Token {
665                kind: TokenKind::EncapsedAndWhitespace,
666                span: Span::new(start, self.cursor),
667            })
668        }
669    }
670
671    fn read_single_quoted(&mut self) -> TokenKind {
672        while self.cursor < self.input.len() {
673            let remaining = &self.input[self.cursor..];
674            match memchr2(b'\'', b'\\', remaining) {
675                Some(pos) => {
676                    self.cursor += pos;
677                    let c = self.input[self.cursor];
678                    self.advance(); // Consume ' or \
679                    if c == b'\'' {
680                        return TokenKind::StringLiteral;
681                    } else {
682                        // Backslash
683                        if self.cursor < self.input.len() {
684                            self.advance(); // Skip escaped char
685                        }
686                    }
687                }
688                None => {
689                    self.cursor = self.input.len();
690                    break;
691                }
692            }
693        }
694        TokenKind::Error
695    }
696
697    fn read_double_quoted(&mut self, quote: u8, start_pos: usize) -> TokenKind {
698        while let Some(c) = self.peek() {
699            if c == quote {
700                self.advance();
701                return TokenKind::StringLiteral;
702            } else if c == b'\\' {
703                self.advance();
704                if self.peek().is_some() {
705                    self.advance();
706                }
707            } else if c == b'$' {
708                if let Some(next) = self.input.get(self.cursor + 1)
709                    && (next.is_ascii_alphabetic() || *next == b'_' || *next == b'{')
710                {
711                    self.cursor = start_pos + 1;
712                    self.state_stack.push(if quote == b'"' {
713                        LexerState::DoubleQuotes
714                    } else {
715                        LexerState::Backquote
716                    });
717                    return if quote == b'"' {
718                        TokenKind::DoubleQuote
719                    } else {
720                        TokenKind::Backtick
721                    };
722                }
723                self.advance();
724            } else if c == b'{' {
725                if self.input.get(self.cursor + 1) == Some(&b'$') {
726                    self.cursor = start_pos + 1;
727                    self.state_stack.push(if quote == b'"' {
728                        LexerState::DoubleQuotes
729                    } else {
730                        LexerState::Backquote
731                    });
732                    return if quote == b'"' {
733                        TokenKind::DoubleQuote
734                    } else {
735                        TokenKind::Backtick
736                    };
737                }
738                self.advance();
739            } else {
740                self.advance();
741            }
742        }
743        TokenKind::Error
744    }
745
746    fn read_heredoc_start(&mut self, start: usize) -> Token {
747        while let Some(c) = self.peek() {
748            if c == b' ' || c == b'\t' {
749                self.advance();
750            } else {
751                break;
752            }
753        }
754
755        let quote = self.peek();
756        let is_quoted = quote == Some(b'\'') || quote == Some(b'"');
757        let is_nowdoc = quote == Some(b'\'');
758
759        if is_quoted {
760            self.advance();
761        }
762
763        let label_start = self.cursor;
764        self.read_identifier();
765        let label = self.input[label_start..self.cursor].to_vec();
766
767        if is_quoted && self.peek() == quote {
768            self.advance();
769        }
770
771        // Consume newline after label
772        if let Some(c) = self.peek() {
773            if c == b'\n' {
774                self.advance();
775            } else if c == b'\r' {
776                self.advance();
777                if self.peek() == Some(b'\n') {
778                    self.advance();
779                }
780            }
781        }
782
783        if is_nowdoc {
784            self.state_stack.push(LexerState::Nowdoc(label));
785        } else {
786            self.state_stack.push(LexerState::Heredoc(label));
787        }
788
789        Token {
790            kind: TokenKind::StartHeredoc,
791            span: Span::new(start, self.cursor),
792        }
793    }
794
795    fn check_heredoc_end(&self, label: &[u8]) -> Option<usize> {
796        let mut current = self.cursor;
797        while current < self.input.len() {
798            let c = self.input[current];
799            if c == b' ' || c == b'\t' {
800                current += 1;
801            } else {
802                break;
803            }
804        }
805
806        if current + label.len() > self.input.len() {
807            return None;
808        }
809
810        if &self.input[current..current + label.len()] == label {
811            // Check what follows. Must not be a label character.
812            let after = current + label.len();
813            if after >= self.input.len() {
814                return Some(after - self.cursor);
815            }
816            let c = self.input[after];
817            if !c.is_ascii_alphanumeric() && c != b'_' && c < 0x80 {
818                return Some(after - self.cursor);
819            }
820        }
821        None
822    }
823
824    fn is_followed_by_var_or_vararg(&self) -> bool {
825        let mut cursor = self.cursor;
826        while cursor < self.input.len() {
827            let c = self.input[cursor];
828            if c.is_ascii_whitespace() {
829                cursor += 1;
830                continue;
831            }
832
833            // Comments
834            if c == b'#' {
835                // Single line comment
836                while cursor < self.input.len() && self.input[cursor] != b'\n' {
837                    cursor += 1;
838                }
839                continue;
840            }
841            if c == b'/' && cursor + 1 < self.input.len() {
842                if self.input[cursor + 1] == b'/' {
843                    // Single line
844                    while cursor < self.input.len() && self.input[cursor] != b'\n' {
845                        cursor += 1;
846                    }
847                    continue;
848                } else if self.input[cursor + 1] == b'*' {
849                    // Multi line
850                    cursor += 2;
851                    while cursor < self.input.len() {
852                        if self.input[cursor] == b'*'
853                            && cursor + 1 < self.input.len()
854                            && self.input[cursor + 1] == b'/'
855                        {
856                            cursor += 2;
857                            break;
858                        }
859                        cursor += 1;
860                    }
861                    continue;
862                }
863            }
864
865            // Check for Variable ($...)
866            if c == b'$' && cursor + 1 < self.input.len() {
867                let next = self.input[cursor + 1];
868                if next.is_ascii_alphabetic() || next == b'_' || next >= 0x80 {
869                    return true;
870                }
871            }
872
873            // Check for Ellipsis (...)
874            if c == b'.'
875                && cursor + 2 < self.input.len()
876                && self.input[cursor + 1] == b'.'
877                && self.input[cursor + 2] == b'.'
878            {
879                return true;
880            }
881
882            return false;
883        }
884        false
885    }
886
887    fn check_set_visibility(&mut self, normal: TokenKind, set: TokenKind) -> TokenKind {
888        let mut look = self.cursor;
889
890        // Skip whitespace before (
891        while let Some(b) = self.input.get(look) {
892            if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
893                look += 1;
894            } else {
895                break;
896            }
897        }
898
899        if self.input.get(look) != Some(&b'(') {
900            return normal;
901        }
902        look += 1;
903
904        // Skip whitespace after (
905        while let Some(b) = self.input.get(look) {
906            if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
907                look += 1;
908            } else {
909                break;
910            }
911        }
912
913        let set_kw = b"set";
914        let is_set = self
915            .input
916            .get(look..look + set_kw.len())
917            .map(|s| s.eq_ignore_ascii_case(set_kw))
918            .unwrap_or(false);
919
920        if !is_set {
921            return normal;
922        }
923        look += set_kw.len();
924
925        // Skip whitespace after set
926        while let Some(b) = self.input.get(look) {
927            if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
928                look += 1;
929            } else {
930                break;
931            }
932        }
933
934        if self.input.get(look) != Some(&b')') {
935            return normal;
936        }
937        look += 1;
938
939        self.cursor = look;
940        set
941    }
942
943    fn next_in_nowdoc(&mut self) -> Option<Token> {
944        let label = if let Some(LexerState::Nowdoc(label)) = self.state_stack.last() {
945            label.clone()
946        } else {
947            return None;
948        };
949
950        if self.cursor >= self.input.len() {
951            return Some(Token {
952                kind: TokenKind::Error,
953                span: Span::new(self.cursor, self.cursor),
954            });
955        }
956
957        let start = self.cursor;
958
959        // Check if we are at the end label immediately
960        if let Some(len) = self.check_heredoc_end(&label) {
961            self.advance_n(len);
962            self.state_stack.pop();
963
964            return Some(Token {
965                kind: TokenKind::EndHeredoc,
966                span: Span::new(start, self.cursor),
967            });
968        }
969
970        // Consume content until newline (inclusive)
971        while let Some(c) = self.peek() {
972            self.advance();
973            if c == b'\n' {
974                // Check if next line is the label
975                if self.check_heredoc_end(&label).is_some() {
976                    break;
977                }
978            }
979        }
980
981        Some(Token {
982            kind: TokenKind::EncapsedAndWhitespace,
983            span: Span::new(start, self.cursor),
984        })
985    }
986
987    fn next_in_heredoc(&mut self) -> Option<Token> {
988        let label = if let Some(LexerState::Heredoc(label)) = self.state_stack.last() {
989            label.clone()
990        } else {
991            return None;
992        };
993
994        if self.cursor >= self.input.len() {
995            return Some(Token {
996                kind: TokenKind::Error,
997                span: Span::new(self.cursor, self.cursor),
998            });
999        }
1000
1001        let start = self.cursor;
1002
1003        // Check end label
1004        if let Some(len) = self.check_heredoc_end(&label) {
1005            self.advance_n(len);
1006            self.state_stack.pop();
1007
1008            return Some(Token {
1009                kind: TokenKind::EndHeredoc,
1010                span: Span::new(start, self.cursor),
1011            });
1012        }
1013
1014        // Handle interpolation
1015        if let Some(c) = self.peek() {
1016            if c == b'$' {
1017                self.advance();
1018                if let Some(next) = self.peek() {
1019                    if next.is_ascii_alphabetic() || next == b'_' {
1020                        let var_start = self.cursor - 1;
1021                        self.read_identifier();
1022
1023                        // Check for array offset [
1024                        if self.peek() == Some(b'[') {
1025                            self.state_stack.push(LexerState::VarOffset);
1026                        } else if self.peek() == Some(b'-')
1027                            && self.input.get(self.cursor + 1) == Some(&b'>')
1028                            && let Some(next_next) = self.input.get(self.cursor + 2)
1029                            && (next_next.is_ascii_alphabetic() || *next_next == b'_')
1030                        {
1031                            self.state_stack.push(LexerState::LookingForProperty);
1032                        }
1033
1034                        return Some(Token {
1035                            kind: TokenKind::Variable,
1036                            span: Span::new(var_start, self.cursor),
1037                        });
1038                    } else if next == b'{' {
1039                        self.advance();
1040                        self.state_stack.push(LexerState::LookingForVarName);
1041                        return Some(Token {
1042                            kind: TokenKind::DollarOpenCurlyBraces,
1043                            span: Span::new(start, self.cursor),
1044                        });
1045                    }
1046                }
1047            } else if c == b'{' && self.input.get(self.cursor + 1) == Some(&b'$') {
1048                self.advance();
1049                self.state_stack.push(LexerState::Scripting);
1050                return Some(Token {
1051                    kind: TokenKind::CurlyOpen,
1052                    span: Span::new(start, self.cursor),
1053                });
1054            }
1055        }
1056
1057        // Consume content
1058        while let Some(c) = self.peek() {
1059            if c == b'$'
1060                && let Some(next) = self.input.get(self.cursor + 1)
1061                && (next.is_ascii_alphabetic() || *next == b'_' || *next == b'{')
1062            {
1063                break;
1064            }
1065            if c == b'{' && self.input.get(self.cursor + 1) == Some(&b'$') {
1066                break;
1067            }
1068
1069            self.advance();
1070            if c == b'\n' && self.check_heredoc_end(&label).is_some() {
1071                break;
1072            }
1073
1074            if c == b'\\' && self.peek().is_some() {
1075                self.advance();
1076            }
1077        }
1078
1079        if self.cursor > start {
1080            Some(Token {
1081                kind: TokenKind::EncapsedAndWhitespace,
1082                span: Span::new(start, self.cursor),
1083            })
1084        } else {
1085            // Should have matched something above
1086            Some(Token {
1087                kind: TokenKind::EncapsedAndWhitespace,
1088                span: Span::new(start, self.cursor),
1089            })
1090        }
1091    }
1092
1093    fn next_in_halt_compiler(&mut self) -> Option<Token> {
1094        self.skip_whitespace();
1095
1096        if self.cursor >= self.input.len() {
1097            return Some(Token {
1098                kind: TokenKind::Eof,
1099                span: Span::new(self.cursor, self.cursor),
1100            });
1101        }
1102
1103        let start = self.cursor;
1104        let c = self.input[self.cursor];
1105        self.advance();
1106
1107        let kind = match c {
1108            b'(' => TokenKind::OpenParen,
1109            b')' => TokenKind::CloseParen,
1110            b';' => {
1111                self.state_stack.pop();
1112                self.state_stack.push(LexerState::RawData);
1113                TokenKind::SemiColon
1114            }
1115            b'#' => self.consume_single_line_comment(),
1116            b'/' => {
1117                if self.peek() == Some(b'/') {
1118                    self.advance();
1119                    self.consume_single_line_comment()
1120                } else if self.peek() == Some(b'*') {
1121                    self.advance();
1122                    self.consume_multi_line_comment()
1123                } else {
1124                    TokenKind::Error
1125                }
1126            }
1127            _ => TokenKind::Error,
1128        };
1129
1130        Some(Token {
1131            kind,
1132            span: Span::new(start, self.cursor),
1133        })
1134    }
1135
1136    pub fn input_slice(&self, span: Span) -> &'src [u8] {
1137        &self.input[span.start..span.end]
1138    }
1139}
1140
1141impl<'src> Iterator for Lexer<'src> {
1142    type Item = Token;
1143
1144    fn next(&mut self) -> Option<Self::Item> {
1145        // Handle initial state (looking for <?php)
1146        if let Some(LexerState::Initial) = self.state_stack.last() {
1147            let start = self.cursor;
1148            while self.cursor < self.input.len() {
1149                if self.input[self.cursor] != b'<' {
1150                    let remaining = &self.input[self.cursor..];
1151                    match memchr(b'<', remaining) {
1152                        Some(pos) => self.cursor += pos,
1153                        None => {
1154                            self.cursor = self.input.len();
1155                            break;
1156                        }
1157                    }
1158                }
1159
1160                if self.input[self.cursor..].starts_with(b"<?php") {
1161                    if self.cursor > start {
1162                        return Some(Token {
1163                            kind: TokenKind::InlineHtml,
1164                            span: Span::new(start, self.cursor),
1165                        });
1166                    }
1167
1168                    let tag_start = self.cursor;
1169                    self.state_stack.pop();
1170                    self.state_stack.push(LexerState::Scripting);
1171                    self.advance_n(5);
1172
1173                    // Check for trailing newline/whitespace after <?php
1174                    if self.peek().is_some_and(|c| c.is_ascii_whitespace()) {
1175                        self.advance();
1176                    }
1177
1178                    return Some(Token {
1179                        kind: TokenKind::OpenTag,
1180                        span: Span::new(tag_start, self.cursor),
1181                    });
1182                } else if self.input[self.cursor..].starts_with(b"<?=") {
1183                    if self.cursor > start {
1184                        return Some(Token {
1185                            kind: TokenKind::InlineHtml,
1186                            span: Span::new(start, self.cursor),
1187                        });
1188                    }
1189                    let tag_start = self.cursor;
1190                    self.state_stack.pop();
1191                    self.state_stack.push(LexerState::Scripting);
1192                    self.advance_n(3);
1193                    return Some(Token {
1194                        kind: TokenKind::OpenTagEcho,
1195                        span: Span::new(tag_start, self.cursor),
1196                    });
1197                }
1198                self.advance();
1199            }
1200
1201            if self.cursor > start {
1202                return Some(Token {
1203                    kind: TokenKind::InlineHtml,
1204                    span: Span::new(start, self.cursor),
1205                });
1206            }
1207
1208            return Some(Token {
1209                kind: TokenKind::Eof,
1210                span: Span::new(self.cursor, self.cursor),
1211            });
1212        }
1213
1214        // Handle DoubleQuotes/Backquote state
1215        if let Some(LexerState::DoubleQuotes) | Some(LexerState::Backquote) =
1216            self.state_stack.last()
1217        {
1218            return self.next_in_double_quotes();
1219        }
1220
1221        if let Some(LexerState::Heredoc(_)) = self.state_stack.last() {
1222            return self.next_in_heredoc();
1223        }
1224
1225        if let Some(LexerState::Nowdoc(_)) = self.state_stack.last() {
1226            return self.next_in_nowdoc();
1227        }
1228
1229        if let Some(LexerState::HaltCompiler) = self.state_stack.last() {
1230            return self.next_in_halt_compiler();
1231        }
1232
1233        if let Some(LexerState::VarOffset) = self.state_stack.last() {
1234            return self.next_in_var_offset(true);
1235        }
1236
1237        if let Some(LexerState::VarOffsetDollarCurly) = self.state_stack.last() {
1238            return self.next_in_var_offset(false);
1239        }
1240
1241        if let Some(LexerState::LookingForProperty) = self.state_stack.last() {
1242            return self.next_in_looking_for_property();
1243        }
1244
1245        if let Some(LexerState::LookingForVarName) = self.state_stack.last() {
1246            return self.next_in_looking_for_var_name();
1247        }
1248
1249        if let Some(LexerState::RawData) = self.state_stack.last() {
1250            if self.cursor >= self.input.len() {
1251                return Some(Token {
1252                    kind: TokenKind::Eof,
1253                    span: Span::new(self.cursor, self.cursor),
1254                });
1255            }
1256            let start = self.cursor;
1257            self.cursor = self.input.len(); // Consume all
1258            return Some(Token {
1259                kind: TokenKind::InlineHtml,
1260                span: Span::new(start, self.cursor),
1261            });
1262        }
1263
1264        self.skip_whitespace();
1265
1266        if self.cursor >= self.input.len() {
1267            return Some(Token {
1268                kind: TokenKind::Eof,
1269                span: Span::new(self.cursor, self.cursor),
1270            });
1271        }
1272
1273        let start = self.cursor;
1274        let char = self.input[self.cursor];
1275        self.advance();
1276
1277        let kind = match char {
1278            b'$' => {
1279                if let Some(c) = self.peek() {
1280                    if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 {
1281                        self.read_identifier();
1282                        TokenKind::Variable
1283                    } else {
1284                        TokenKind::Dollar
1285                    }
1286                } else {
1287                    TokenKind::Dollar
1288                }
1289            }
1290            b'\\' => TokenKind::NsSeparator,
1291            b'\'' => self.read_single_quoted(),
1292            b'"' => self.read_double_quoted(b'"', start),
1293            b'`' => {
1294                self.state_stack.push(LexerState::Backquote);
1295                TokenKind::Backtick
1296            }
1297            b'#' => {
1298                if self.peek() == Some(b'[') {
1299                    self.advance();
1300                    TokenKind::Attribute
1301                } else {
1302                    self.consume_single_line_comment()
1303                }
1304            }
1305            b';' => TokenKind::SemiColon,
1306            b':' => {
1307                if self.peek() == Some(b':') {
1308                    self.advance();
1309                    TokenKind::DoubleColon
1310                } else {
1311                    TokenKind::Colon
1312                }
1313            }
1314            b',' => TokenKind::Comma,
1315            b'{' => {
1316                self.state_stack.push(LexerState::Scripting);
1317                TokenKind::OpenBrace
1318            }
1319            b'}' => {
1320                if self.state_stack.len() > 1 {
1321                    self.state_stack.pop();
1322                }
1323                TokenKind::CloseBrace
1324            }
1325            b'(' => {
1326                // Check for cast
1327                let saved_cursor = self.cursor;
1328                self.skip_whitespace();
1329
1330                let start_ident = self.cursor;
1331                self.read_identifier();
1332                let ident_len = self.cursor - start_ident;
1333
1334                if ident_len > 0 {
1335                    let ident = &self.input[start_ident..self.cursor];
1336                    self.skip_whitespace();
1337                    if self.peek() == Some(b')') {
1338                        let cast_kind = match ident.to_ascii_lowercase().as_slice() {
1339                            b"int" | b"integer" => Some(TokenKind::IntCast),
1340                            b"bool" | b"boolean" => Some(TokenKind::BoolCast),
1341                            b"float" | b"double" | b"real" => Some(TokenKind::FloatCast),
1342                            b"string" | b"binary" => Some(TokenKind::StringCast),
1343                            b"array" => Some(TokenKind::ArrayCast),
1344                            b"object" => Some(TokenKind::ObjectCast),
1345                            b"unset" => Some(TokenKind::UnsetCast),
1346                            b"void" => Some(TokenKind::VoidCast),
1347                            _ => None,
1348                        };
1349
1350                        if let Some(k) = cast_kind {
1351                            self.advance(); // Eat ')'
1352                            k
1353                        } else {
1354                            self.cursor = saved_cursor;
1355                            TokenKind::OpenParen
1356                        }
1357                    } else {
1358                        self.cursor = saved_cursor;
1359                        TokenKind::OpenParen
1360                    }
1361                } else {
1362                    self.cursor = saved_cursor;
1363                    TokenKind::OpenParen
1364                }
1365            }
1366            b')' => TokenKind::CloseParen,
1367            b'[' => TokenKind::OpenBracket,
1368            b']' => TokenKind::CloseBracket,
1369            b'+' => {
1370                if self.peek() == Some(b'+') {
1371                    self.advance();
1372                    TokenKind::Inc
1373                } else if self.peek() == Some(b'=') {
1374                    self.advance();
1375                    TokenKind::PlusEq
1376                } else {
1377                    TokenKind::Plus
1378                }
1379            }
1380            b'-' => {
1381                if self.peek() == Some(b'>') {
1382                    self.advance();
1383                    TokenKind::Arrow
1384                } else if self.peek() == Some(b'-') {
1385                    self.advance();
1386                    TokenKind::Dec
1387                } else if self.peek() == Some(b'=') {
1388                    self.advance();
1389                    TokenKind::MinusEq
1390                } else {
1391                    TokenKind::Minus
1392                }
1393            }
1394            b'*' => {
1395                if self.peek() == Some(b'*') {
1396                    self.advance();
1397                    if self.peek() == Some(b'=') {
1398                        self.advance();
1399                        TokenKind::PowEq
1400                    } else {
1401                        TokenKind::Pow
1402                    }
1403                } else if self.peek() == Some(b'=') {
1404                    self.advance();
1405                    TokenKind::MulEq
1406                } else {
1407                    TokenKind::Asterisk
1408                }
1409            }
1410            b'/' => {
1411                if self.peek() == Some(b'/') {
1412                    self.advance();
1413                    self.consume_single_line_comment()
1414                } else if self.peek() == Some(b'*') {
1415                    self.advance();
1416                    self.consume_multi_line_comment()
1417                } else if self.peek() == Some(b'=') {
1418                    self.advance();
1419                    TokenKind::DivEq
1420                } else {
1421                    TokenKind::Slash
1422                }
1423            }
1424            b'%' => {
1425                if self.peek() == Some(b'=') {
1426                    self.advance();
1427                    TokenKind::ModEq
1428                } else {
1429                    TokenKind::Percent
1430                }
1431            }
1432            b'.' => {
1433                if self.peek() == Some(b'=') {
1434                    self.advance();
1435                    TokenKind::ConcatEq
1436                } else if self.peek() == Some(b'.') {
1437                    self.advance();
1438                    if self.peek() == Some(b'.') {
1439                        self.advance();
1440                        TokenKind::Ellipsis
1441                    } else {
1442                        TokenKind::Dot
1443                    }
1444                } else if let Some(c) = self.peek()
1445                    && c.is_ascii_digit()
1446                {
1447                    self.cursor -= 1;
1448                    self.read_number()
1449                } else {
1450                    TokenKind::Dot
1451                }
1452            }
1453            b'=' => {
1454                if self.peek() == Some(b'=') {
1455                    self.advance();
1456                    if self.peek() == Some(b'=') {
1457                        self.advance();
1458                        TokenKind::EqEqEq
1459                    } else {
1460                        TokenKind::EqEq
1461                    }
1462                } else if self.peek() == Some(b'>') {
1463                    self.advance();
1464                    TokenKind::DoubleArrow
1465                } else {
1466                    TokenKind::Eq
1467                }
1468            }
1469            b'!' => {
1470                if self.peek() == Some(b'=') {
1471                    self.advance();
1472                    if self.peek() == Some(b'=') {
1473                        self.advance();
1474                        TokenKind::BangEqEq
1475                    } else {
1476                        TokenKind::BangEq
1477                    }
1478                } else {
1479                    TokenKind::Bang
1480                }
1481            }
1482            b'<' => {
1483                if self.peek() == Some(b'<') && self.input.get(self.cursor + 1) == Some(&b'<') {
1484                    self.advance(); // Eat second <
1485                    self.advance(); // Eat third <
1486                    return Some(self.read_heredoc_start(start));
1487                } else if self.peek() == Some(b'=') {
1488                    self.advance();
1489                    if self.peek() == Some(b'>') {
1490                        self.advance();
1491                        TokenKind::Spaceship
1492                    } else {
1493                        TokenKind::LtEq
1494                    }
1495                } else if self.peek() == Some(b'<') {
1496                    self.advance();
1497                    if self.peek() == Some(b'=') {
1498                        self.advance();
1499                        TokenKind::SlEq
1500                    } else {
1501                        TokenKind::Sl
1502                    }
1503                } else if self.peek() == Some(b'>') {
1504                    self.advance();
1505                    TokenKind::BangEq
1506                } else {
1507                    TokenKind::Lt
1508                }
1509            }
1510            b'>' => {
1511                if self.peek() == Some(b'=') {
1512                    self.advance();
1513                    TokenKind::GtEq
1514                } else if self.peek() == Some(b'>') {
1515                    self.advance();
1516                    if self.peek() == Some(b'=') {
1517                        self.advance();
1518                        TokenKind::SrEq
1519                    } else {
1520                        TokenKind::Sr
1521                    }
1522                } else {
1523                    TokenKind::Gt
1524                }
1525            }
1526            b'&' => {
1527                if self.peek() == Some(b'&') {
1528                    self.advance();
1529                    TokenKind::AmpersandAmpersand
1530                } else if self.peek() == Some(b'=') {
1531                    self.advance();
1532                    TokenKind::AndEq
1533                } else if self.is_followed_by_var_or_vararg() {
1534                    TokenKind::AmpersandFollowedByVarOrVararg
1535                } else {
1536                    TokenKind::AmpersandNotFollowedByVarOrVararg
1537                }
1538            }
1539            b'|' => {
1540                if self.peek() == Some(b'|') {
1541                    self.advance();
1542                    TokenKind::PipePipe
1543                } else if self.peek() == Some(b'=') {
1544                    self.advance();
1545                    TokenKind::OrEq
1546                } else {
1547                    TokenKind::Pipe
1548                }
1549            }
1550            b'^' => {
1551                if self.peek() == Some(b'=') {
1552                    self.advance();
1553                    TokenKind::XorEq
1554                } else {
1555                    TokenKind::Caret
1556                }
1557            }
1558            b'~' => TokenKind::BitNot,
1559            b'@' => TokenKind::At,
1560            b'?' => {
1561                if self.peek() == Some(b'>') {
1562                    self.advance();
1563                    self.state_stack.pop();
1564                    self.state_stack.push(LexerState::Initial);
1565                    TokenKind::CloseTag
1566                } else if self.peek() == Some(b'?') {
1567                    self.advance();
1568                    if self.peek() == Some(b'=') {
1569                        self.advance();
1570                        TokenKind::CoalesceEq
1571                    } else {
1572                        TokenKind::Coalesce
1573                    }
1574                } else if self.peek() == Some(b'-')
1575                    && self.input.get(self.cursor + 1) == Some(&b'>')
1576                {
1577                    self.advance();
1578                    self.advance();
1579                    TokenKind::NullSafeArrow
1580                } else {
1581                    TokenKind::Question
1582                }
1583            }
1584            c if c.is_ascii_digit() => {
1585                self.cursor -= 1;
1586                self.read_number()
1587            }
1588            c if c.is_ascii_alphabetic() || c == b'_' || c >= 0x80 => {
1589                // Check for binary string prefix
1590                if (c == b'b' || c == b'B')
1591                    && let Some(next) = self.peek()
1592                {
1593                    if next == b'\'' {
1594                        self.advance(); // Eat '
1595                        return Some(Token {
1596                            kind: self.read_single_quoted(),
1597                            span: Span::new(start, self.cursor),
1598                        });
1599                    } else if next == b'"' {
1600                        let quote_pos = self.cursor;
1601                        self.advance(); // Eat "
1602                        return Some(Token {
1603                            kind: self.read_double_quoted(b'"', quote_pos),
1604                            span: Span::new(start, self.cursor),
1605                        });
1606                    }
1607                }
1608
1609                self.read_identifier();
1610                let text = &self.input[start..self.cursor];
1611
1612                if self.mode == LexerMode::LookingForProperty {
1613                    self.mode = LexerMode::Standard;
1614                    TokenKind::Identifier
1615                } else {
1616                    let is_all_lowercase = text.iter().all(|c| !c.is_ascii_uppercase());
1617
1618                    let mut kind = if is_all_lowercase {
1619                        keyword_lookup(text)
1620                    } else {
1621                        keyword_lookup(&text.to_ascii_lowercase())
1622                    };
1623
1624                    match kind {
1625                        TokenKind::Yield => {
1626                            let mut look = self.cursor;
1627                            while let Some(b) = self.input.get(look) {
1628                                if matches!(b, b' ' | b'\t' | b'\r' | b'\n' | b'\x0b' | b'\x0c') {
1629                                    look += 1;
1630                                } else {
1631                                    break;
1632                                }
1633                            }
1634                            let from_kw = b"from";
1635                            let is_from = self
1636                                .input
1637                                .get(look..look + from_kw.len())
1638                                .map(|s| {
1639                                    s.iter()
1640                                        .zip(from_kw.iter())
1641                                        .all(|(c, k)| c.to_ascii_lowercase() == *k)
1642                                })
1643                                .unwrap_or(false)
1644                                && !self
1645                                    .input
1646                                    .get(look + from_kw.len())
1647                                    .map(|c| c.is_ascii_alphanumeric() || *c == b'_' || *c >= 0x80)
1648                                    .unwrap_or(false);
1649
1650                            if is_from {
1651                                self.cursor = look + from_kw.len();
1652                                kind = TokenKind::YieldFrom;
1653                            }
1654                        }
1655                        TokenKind::Public => {
1656                            if text[0].eq_ignore_ascii_case(&b'p') {
1657                                kind = self
1658                                    .check_set_visibility(TokenKind::Public, TokenKind::PublicSet);
1659                            }
1660                        }
1661                        TokenKind::Protected => {
1662                            kind = self.check_set_visibility(
1663                                TokenKind::Protected,
1664                                TokenKind::ProtectedSet,
1665                            );
1666                        }
1667                        TokenKind::Private => {
1668                            kind = self
1669                                .check_set_visibility(TokenKind::Private, TokenKind::PrivateSet);
1670                        }
1671                        TokenKind::HaltCompiler => {
1672                            self.state_stack.pop();
1673                            self.state_stack.push(LexerState::HaltCompiler);
1674                        }
1675                        _ => {}
1676                    }
1677                    kind
1678                }
1679            }
1680            _ => TokenKind::Error,
1681        };
1682
1683        Some(Token {
1684            kind,
1685            span: Span::new(start, self.cursor),
1686        })
1687    }
1688}