1#![allow(clippy::unreachable)]
2
3use core::hint::unreachable_unchecked;
4
5use std::collections::VecDeque;
6use std::fmt::Debug;
7
8use memchr::memchr2;
9use memchr::memmem;
10
11const SIMPLE_TOKEN_TABLE: [Option<TokenKind>; 256] = {
15 let mut table: [Option<TokenKind>; 256] = [None; 256];
16 table[b';' as usize] = Some(TokenKind::Semicolon);
17 table[b',' as usize] = Some(TokenKind::Comma);
18 table[b')' as usize] = Some(TokenKind::RightParenthesis);
19 table[b'[' as usize] = Some(TokenKind::LeftBracket);
20 table[b']' as usize] = Some(TokenKind::RightBracket);
21 table[b'{' as usize] = Some(TokenKind::LeftBrace);
22 table[b'}' as usize] = Some(TokenKind::RightBrace);
23 table[b'~' as usize] = Some(TokenKind::Tilde);
24 table[b'@' as usize] = Some(TokenKind::At);
25 table
26};
27
28const IDENT_START_TABLE: [bool; 256] = {
30 let mut table = [false; 256];
31 let mut i = 0usize;
32 while i < 256 {
33 table[i] = matches!(i as u8, b'a'..=b'z' | b'A'..=b'Z' | b'_' | 0x80..=0xFF);
34 i += 1;
35 }
36
37 table
38};
39
40use mago_database::file::FileId;
41use mago_database::file::HasFileId;
42use mago_span::Position;
43use mago_syntax_core::float_exponent;
44use mago_syntax_core::float_separator;
45use mago_syntax_core::input::Input;
46use mago_syntax_core::number_sign;
47use mago_syntax_core::start_of_binary_number;
48use mago_syntax_core::start_of_float_number;
49use mago_syntax_core::start_of_hexadecimal_number;
50use mago_syntax_core::start_of_identifier;
51use mago_syntax_core::start_of_number;
52use mago_syntax_core::start_of_octal_number;
53use mago_syntax_core::start_of_octal_or_float_number;
54use mago_syntax_core::utils::is_part_of_identifier;
55use mago_syntax_core::utils::is_start_of_identifier;
56use mago_syntax_core::utils::read_digits_of_base;
57
58use crate::error::SyntaxError;
59use crate::lexer::internal::mode::HaltStage;
60use crate::lexer::internal::mode::Interpolation;
61use crate::lexer::internal::mode::LexerMode;
62use crate::lexer::internal::utils::NumberKind;
63use crate::settings::LexerSettings;
64use crate::token::DocumentKind;
65use crate::token::Token;
66use crate::token::TokenKind;
67
68mod internal;
69
70#[derive(Debug)]
82pub struct Lexer<'input> {
83 input: Input<'input>,
84 settings: LexerSettings,
85 mode: LexerMode<'input>,
86 interpolating: bool,
87 brace_interpolating: bool,
88 buffer: VecDeque<Token<'input>>,
90}
91
92impl<'input> Lexer<'input> {
93 const BUFFER_INITIAL_CAPACITY: usize = 8;
96
97 #[must_use]
108 pub fn new(input: Input<'input>, settings: LexerSettings) -> Lexer<'input> {
109 Lexer {
110 input,
111 settings,
112 mode: LexerMode::Inline,
113 interpolating: false,
114 brace_interpolating: false,
115 buffer: VecDeque::with_capacity(Self::BUFFER_INITIAL_CAPACITY),
116 }
117 }
118
119 #[must_use]
130 pub fn scripting(input: Input<'input>, settings: LexerSettings) -> Lexer<'input> {
131 Lexer {
132 input,
133 settings,
134 mode: LexerMode::Script,
135 interpolating: false,
136 brace_interpolating: false,
137 buffer: VecDeque::with_capacity(Self::BUFFER_INITIAL_CAPACITY),
138 }
139 }
140
141 #[must_use]
145 pub fn has_reached_eof(&self) -> bool {
146 self.input.has_reached_eof()
147 }
148
149 #[inline]
151 #[must_use]
152 pub const fn current_position(&self) -> Position {
153 self.input.current_position()
154 }
155
156 #[inline]
189 pub fn advance(&mut self) -> Option<Result<Token<'input>, SyntaxError>> {
190 if !self.interpolating
192 && let Some(token) = self.buffer.pop_front()
193 {
194 return Some(Ok(token));
195 }
196
197 if self.input.has_reached_eof() {
198 return None;
199 }
200
201 match self.mode {
202 LexerMode::Inline => {
203 let start = self.input.current_position();
204 let offset = self.input.current_offset();
205
206 if offset == 0
208 && self.input.len() >= 2
209 && unsafe { *self.input.read_at_unchecked(0) } == b'#'
211 && unsafe { *self.input.read_at_unchecked(1) } == b'!'
213 {
214 let buffer = self.input.consume_through(b'\n');
215 let end = self.input.current_position();
216
217 return Some(Ok(self.token(TokenKind::InlineShebang, buffer, start, end)));
218 }
219
220 let bytes = self.input.read_remaining();
222
223 if self.settings.enable_short_tags {
224 if let Some(pos) = memchr::memmem::find(bytes, b"<?") {
225 if pos > 0 {
226 let buffer = self.input.consume(pos);
227 let end = self.input.current_position();
228
229 return Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
230 }
231
232 if self.input.is_at(b"<?php", true) {
233 let buffer = self.input.consume(5);
234 self.mode = LexerMode::Script;
235 return Some(Ok(self.token(
236 TokenKind::OpenTag,
237 buffer,
238 start,
239 self.input.current_position(),
240 )));
241 }
242
243 if self.input.is_at(b"<?=", false) {
244 let buffer = self.input.consume(3);
245 self.mode = LexerMode::Script;
246 return Some(Ok(self.token(
247 TokenKind::EchoTag,
248 buffer,
249 start,
250 self.input.current_position(),
251 )));
252 }
253
254 let buffer = self.input.consume(2);
255 self.mode = LexerMode::Script;
256 return Some(Ok(self.token(
257 TokenKind::ShortOpenTag,
258 buffer,
259 start,
260 self.input.current_position(),
261 )));
262 }
263 } else {
264 let iter = memchr::memmem::find_iter(bytes, b"<?");
265
266 for pos in iter {
267 let candidate = unsafe { bytes.get_unchecked(pos..) };
269
270 if candidate.len() >= 5
271 && (unsafe { *candidate.get_unchecked(2) } | 0x20) == b'p'
273 && (unsafe { *candidate.get_unchecked(3) } | 0x20) == b'h'
275 && (unsafe { *candidate.get_unchecked(4) } | 0x20) == b'p'
277 {
278 if pos > 0 {
279 let buffer = self.input.consume(pos);
280 let end = self.input.current_position();
281 return Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
282 }
283
284 let buffer = self.input.consume(5);
285 self.mode = LexerMode::Script;
286 return Some(Ok(self.token(
287 TokenKind::OpenTag,
288 buffer,
289 start,
290 self.input.current_position(),
291 )));
292 }
293
294 if candidate.len() >= 3 && unsafe { *candidate.get_unchecked(2) } == b'=' {
297 if pos > 0 {
298 let buffer = self.input.consume(pos);
299 let end = self.input.current_position();
300 return Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
301 }
302
303 let buffer = self.input.consume(3);
304 self.mode = LexerMode::Script;
305 return Some(Ok(self.token(
306 TokenKind::EchoTag,
307 buffer,
308 start,
309 self.input.current_position(),
310 )));
311 }
312 }
313 }
314
315 if self.input.has_reached_eof() {
316 return None;
317 }
318
319 let buffer = self.input.consume_remaining();
320 let end = self.input.current_position();
321 Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)))
322 }
323 LexerMode::Script => {
324 let start = self.input.current_position();
325 let whitespaces = self.input.consume_whitespaces();
326 if !whitespaces.is_empty() {
327 return Some(Ok(self.token(
328 TokenKind::Whitespace,
329 whitespaces,
330 start,
331 self.input.current_position(),
332 )));
333 }
334
335 let Some(&first_byte) = self.input.read(1).first() else {
336 unsafe { unreachable_unchecked() }
338 };
339
340 if let Some(kind) = SIMPLE_TOKEN_TABLE[first_byte as usize] {
341 let buffer = self.input.consume(1);
342 let end = self.input.current_position();
343 return Some(Ok(self.token(kind, buffer, start, end)));
344 }
345
346 if IDENT_START_TABLE[first_byte as usize] {
347 let is_binary_string_prefix = !self.interpolating
348 && matches!(first_byte, b'b' | b'B')
349 && matches!(self.input.read(4), [_, b'\'' | b'"', ..] | [_, b'<', b'<', b'<']);
350
351 if !is_binary_string_prefix {
352 let (token_kind, len) = self.scan_identifier_or_keyword_info();
353
354 if token_kind == TokenKind::HaltCompiler {
355 self.mode = LexerMode::Halt(HaltStage::LookingForLeftParenthesis);
356 }
357
358 let buffer = self.input.consume(len);
359 let end = self.input.current_position();
360 return Some(Ok(self.token(token_kind, buffer, start, end)));
361 }
362
363 }
365
366 if first_byte == b'$'
367 && let Some(&next) = self.input.read(2).get(1)
368 && IDENT_START_TABLE[next as usize]
369 {
370 let (ident_len, _) = self.input.scan_identifier(1);
371 let buffer = self.input.consume(1 + ident_len);
372 let end = self.input.current_position();
373 return Some(Ok(self.token(TokenKind::Variable, buffer, start, end)));
374 }
375
376 let mut document_label: &[u8] = &[];
377
378 let (token_kind, len) = match self.input.read(3) {
379 [b'!', b'=', b'='] => (TokenKind::BangEqualEqual, 3),
380 [b'?', b'?', b'='] => (TokenKind::QuestionQuestionEqual, 3),
381 [b'?', b'-', b'>'] => (TokenKind::QuestionMinusGreaterThan, 3),
382 [b'=', b'=', b'='] => (TokenKind::EqualEqualEqual, 3),
383 [b'.', b'.', b'.'] => (TokenKind::DotDotDot, 3),
384 [b'<', b'=', b'>'] => (TokenKind::LessThanEqualGreaterThan, 3),
385 [b'<', b'<', b'='] => (TokenKind::LeftShiftEqual, 3),
386 [b'>', b'>', b'='] => (TokenKind::RightShiftEqual, 3),
387 [b'*', b'*', b'='] => (TokenKind::AsteriskAsteriskEqual, 3),
388 [b'<', b'<', b'<'] if matches_start_of_heredoc_document(&self.input, 0) => {
389 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, false, 0);
390
391 document_label = self.input.peek(3 + whitespaces, label_length);
392
393 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
394 }
395 [b'<', b'<', b'<'] if matches_start_of_double_quote_heredoc_document(&self.input, 0) => {
396 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, true, 0);
397
398 document_label = self.input.peek(4 + whitespaces, label_length);
399
400 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
401 }
402 [b'<', b'<', b'<'] if matches_start_of_nowdoc_document(&self.input, 0) => {
403 let (length, whitespaces, label_length) = read_start_of_nowdoc_document(&self.input, 0);
404
405 document_label = self.input.peek(4 + whitespaces, label_length);
406
407 (TokenKind::DocumentStart(DocumentKind::Nowdoc), length)
408 }
409 [b'!', b'=', ..] => (TokenKind::BangEqual, 2),
410 [b'&', b'&', ..] => (TokenKind::AmpersandAmpersand, 2),
411 [b'&', b'=', ..] => (TokenKind::AmpersandEqual, 2),
412 [b'.', b'=', ..] => (TokenKind::DotEqual, 2),
413 [b'?', b'?', ..] => (TokenKind::QuestionQuestion, 2),
414 [b'?', b'>', ..] => (TokenKind::CloseTag, 2),
415 [b'=', b'>', ..] => (TokenKind::EqualGreaterThan, 2),
416 [b'=', b'=', ..] => (TokenKind::EqualEqual, 2),
417 [b'+', b'+', ..] => (TokenKind::PlusPlus, 2),
418 [b'+', b'=', ..] => (TokenKind::PlusEqual, 2),
419 [b'%', b'=', ..] => (TokenKind::PercentEqual, 2),
420 [b'-', b'-', ..] => (TokenKind::MinusMinus, 2),
421 [b'-', b'>', ..] => (TokenKind::MinusGreaterThan, 2),
422 [b'-', b'=', ..] => (TokenKind::MinusEqual, 2),
423 [b'<', b'<', ..] => (TokenKind::LeftShift, 2),
424 [b'<', b'=', ..] => (TokenKind::LessThanEqual, 2),
425 [b'<', b'>', ..] => (TokenKind::LessThanGreaterThan, 2),
426 [b'>', b'>', ..] => (TokenKind::RightShift, 2),
427 [b'>', b'=', ..] => (TokenKind::GreaterThanEqual, 2),
428 [b':', b':', ..] => (TokenKind::ColonColon, 2),
429 [b'#', b'[', ..] => (TokenKind::HashLeftBracket, 2),
430 [b'|', b'=', ..] => (TokenKind::PipeEqual, 2),
431 [b'|', b'|', ..] => (TokenKind::PipePipe, 2),
432 [b'/', b'=', ..] => (TokenKind::SlashEqual, 2),
433 [b'^', b'=', ..] => (TokenKind::CaretEqual, 2),
434 [b'*', b'*', ..] => (TokenKind::AsteriskAsterisk, 2),
435 [b'*', b'=', ..] => (TokenKind::AsteriskEqual, 2),
436 [b'|', b'>', ..] => (TokenKind::PipeGreaterThan, 2),
437 [b'/', b'/', ..] => {
438 let remaining = self.input.peek(2, self.input.len() - self.input.current_offset());
439 let comment_len = scan_single_line_comment(remaining);
440 (TokenKind::SingleLineComment, 2 + comment_len)
441 }
442 [b'/', b'*', asterisk] => {
443 let remaining = self.input.peek(2, self.input.len() - self.input.current_offset());
444 match scan_multi_line_comment(remaining) {
445 Some(len) => {
446 let is_docblock = asterisk == &b'*' && len > 2;
447 if is_docblock {
448 (TokenKind::DocBlockComment, len + 2)
449 } else {
450 (TokenKind::MultiLineComment, len + 2)
451 }
452 }
453 None => {
454 self.input.consume(remaining.len() + 2);
455 return Some(Err(SyntaxError::UnexpectedEndOfFile(
456 self.file_id(),
457 self.input.current_position(),
458 )));
459 }
460 }
461 }
462 [b'\\', start_of_identifier!(), ..] => {
463 let mut length = 1;
464 loop {
465 let (ident_len, ends_with_ns) = self.input.scan_identifier(length);
466 length += ident_len;
467 if ends_with_ns {
468 length += 1; } else {
470 break;
471 }
472 }
473
474 (TokenKind::FullyQualifiedIdentifier, length)
475 }
476 [b'$', b'{', ..] => (TokenKind::DollarLeftBrace, 2),
477 [b'$', ..] => (TokenKind::Dollar, 1),
478 [b'!', ..] => (TokenKind::Bang, 1),
479 [b'&', ..] => (TokenKind::Ampersand, 1),
480 [b'?', ..] => (TokenKind::Question, 1),
481 [b'=', ..] => (TokenKind::Equal, 1),
482 [b'`', ..] => (TokenKind::Backtick, 1),
483 [b'+', ..] => (TokenKind::Plus, 1),
484 [b'%', ..] => (TokenKind::Percent, 1),
485 [b'-', ..] => (TokenKind::Minus, 1),
486 [b'<', ..] => (TokenKind::LessThan, 1),
487 [b'>', ..] => (TokenKind::GreaterThan, 1),
488 [b':', ..] => (TokenKind::Colon, 1),
489 [b'|', ..] => (TokenKind::Pipe, 1),
490 [b'^', ..] => (TokenKind::Caret, 1),
491 [b'*', ..] => (TokenKind::Asterisk, 1),
492 [b'/', ..] => (TokenKind::Slash, 1),
493 [b'b' | b'B', b'\'', ..] => read_literal_string(&self.input, b'\'', 1),
494 [b'b' | b'B', b'"', ..] if matches_literal_double_quote_string(&self.input, 1) => {
495 read_literal_string(&self.input, b'"', 1)
496 }
497 [b'b' | b'B', b'"', ..] => (TokenKind::DoubleQuote, 2),
498 [b'b' | b'B', b'<', b'<']
499 if self.input.read(4).len() == 4
500 && self.input.read(4)[3] == b'<'
501 && matches_start_of_heredoc_document(&self.input, 1) =>
502 {
503 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, false, 1);
504
505 document_label = self.input.peek(4 + whitespaces, label_length);
506
507 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
508 }
509 [b'b' | b'B', b'<', b'<']
510 if self.input.read(4).len() == 4
511 && self.input.read(4)[3] == b'<'
512 && matches_start_of_double_quote_heredoc_document(&self.input, 1) =>
513 {
514 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, true, 1);
515
516 document_label = self.input.peek(5 + whitespaces, label_length);
517
518 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
519 }
520 [b'b' | b'B', b'<', b'<']
521 if self.input.read(4).len() == 4
522 && self.input.read(4)[3] == b'<'
523 && matches_start_of_nowdoc_document(&self.input, 1) =>
524 {
525 let (length, whitespaces, label_length) = read_start_of_nowdoc_document(&self.input, 1);
526
527 document_label = self.input.peek(5 + whitespaces, label_length);
528
529 (TokenKind::DocumentStart(DocumentKind::Nowdoc), length)
530 }
531 [quote @ b'\'', ..] => read_literal_string(&self.input, *quote, 0),
533 [quote @ b'"', ..] if matches_literal_double_quote_string(&self.input, 0) => {
534 read_literal_string(&self.input, *quote, 0)
535 }
536 [b'"', ..] => (TokenKind::DoubleQuote, 1),
537 [b'(', ..] => 'parenthesis: {
538 let mut peek_offset = 1;
539 while let Some(&b) = self.input.read(peek_offset + 1).get(peek_offset) {
540 if b.is_ascii_whitespace() {
541 peek_offset += 1;
542 } else {
543 let lower = b | 0x20; if !matches!(lower, b'i' | b'b' | b'f' | b'd' | b'r' | b's' | b'a' | b'o' | b'u' | b'v')
546 {
547 break 'parenthesis (TokenKind::LeftParenthesis, 1);
548 }
549 break;
550 }
551 }
552
553 for (value, kind) in internal::consts::CAST_TYPES {
554 if let Some(length) = self.input.match_sequence_ignore_whitespace(value, true) {
555 break 'parenthesis (kind, length);
556 }
557 }
558
559 (TokenKind::LeftParenthesis, 1)
560 }
561 [b'#', ..] => {
562 let remaining = self.input.peek(1, self.input.len() - self.input.current_offset());
563 let comment_len = scan_single_line_comment(remaining);
564 (TokenKind::HashComment, 1 + comment_len)
565 }
566 [b'\\', ..] => (TokenKind::NamespaceSeparator, 1),
567 [b'.', start_of_number!(), ..] => {
568 let mut length = read_digits_of_base(&self.input, 2, 10);
569 if let float_exponent!() = self.input.peek(length, 1) {
570 let mut exp_length = length + 1;
571 if let number_sign!() = self.input.peek(exp_length, 1) {
572 exp_length += 1;
573 }
574
575 let after_exp = read_digits_of_base(&self.input, exp_length, 10);
576 if after_exp > exp_length {
577 length = after_exp;
578 }
579 }
580
581 (TokenKind::LiteralFloat, length)
582 }
583 [start_of_number!(), ..] => 'number: {
584 let mut length = 1;
585
586 let (base, kind): (u8, NumberKind) = match self.input.read(3) {
587 start_of_binary_number!() => {
588 length += 1;
589
590 (2, NumberKind::Integer)
591 }
592 start_of_octal_number!() => {
593 length += 1;
594
595 (8, NumberKind::Integer)
596 }
597 start_of_hexadecimal_number!() => {
598 length += 1;
599
600 (16, NumberKind::Integer)
601 }
602 start_of_octal_or_float_number!() => (10, NumberKind::OctalOrFloat),
603 start_of_float_number!() => (10, NumberKind::Float),
604 _ => (10, NumberKind::IntegerOrFloat),
605 };
606
607 if kind != NumberKind::Float {
608 length = read_digits_of_base(&self.input, length, base);
609
610 if kind == NumberKind::Integer {
611 break 'number (TokenKind::LiteralInteger, length);
612 }
613 }
614
615 let is_float = matches!(self.input.peek(length, 3), float_separator!());
616
617 if !is_float {
618 if kind == NumberKind::OctalOrFloat
619 && let Some(invalid_idx) =
620 (1..length).find(|&i| matches!(self.input.peek(i, 1), [b'8' | b'9']))
621 {
622 let invalid_byte = self.input.peek(invalid_idx, 1)[0];
623 let start = self.input.current_position();
624 let invalid_position = Position { offset: start.offset + invalid_idx as u32 };
625 self.input.consume(length);
626 return Some(Err(SyntaxError::UnexpectedToken(
627 self.file_id(),
628 invalid_byte,
629 invalid_position,
630 )));
631 }
632 break 'number (TokenKind::LiteralInteger, length);
633 }
634
635 if let [b'.'] = self.input.peek(length, 1) {
636 length += 1;
637 length = read_digits_of_base(&self.input, length, 10);
638 }
639
640 if let float_exponent!() = self.input.peek(length, 1) {
641 let mut exp_length = length + 1;
643 if let number_sign!() = self.input.peek(exp_length, 1) {
644 exp_length += 1;
645 }
646 let after_exp = read_digits_of_base(&self.input, exp_length, 10);
647 if after_exp > exp_length {
648 length = after_exp;
650 }
651 }
652
653 (TokenKind::LiteralFloat, length)
654 }
655 [b'.', ..] => (TokenKind::Dot, 1),
656 [unknown_byte, ..] => {
657 let position = self.input.current_position();
658 self.input.consume(1);
659
660 return Some(Err(SyntaxError::UnrecognizedToken(self.file_id(), *unknown_byte, position)));
661 }
662 [] => {
663 return None;
667 }
668 };
669
670 self.mode = match token_kind {
671 TokenKind::DoubleQuote => LexerMode::DoubleQuoteString(Interpolation::None),
672 TokenKind::Backtick => LexerMode::ShellExecuteString(Interpolation::None),
673 TokenKind::CloseTag => LexerMode::Inline,
674 TokenKind::HaltCompiler => LexerMode::Halt(HaltStage::LookingForLeftParenthesis),
675 TokenKind::DocumentStart(document_kind) => {
676 LexerMode::DocumentString(document_kind, document_label, Interpolation::None)
677 }
678 _ => LexerMode::Script,
679 };
680
681 let buffer = self.input.consume(len);
682 let end = self.input.current_position();
683
684 Some(Ok(self.token(token_kind, buffer, start, end)))
685 }
686 LexerMode::DoubleQuoteString(interpolation) => match &interpolation {
687 Interpolation::None => {
688 let start = self.input.current_position();
689
690 let mut length = 0;
691 let mut last_was_slash = false;
692 let mut token_kind = TokenKind::StringPart;
693 loop {
694 match self.input.peek(length, 2) {
695 [b'$', start_of_identifier!(), ..] if !last_was_slash => {
696 let until_offset = read_until_end_of_variable_interpolation(&self.input, length + 2);
697
698 self.mode =
699 LexerMode::DoubleQuoteString(Interpolation::Until(start.offset + until_offset));
700
701 break;
702 }
703 [b'{', b'$', ..] | [b'$', b'{', ..] if !last_was_slash => {
704 let until_offset = read_until_end_of_brace_interpolation(&self.input, length + 2);
705
706 self.mode = LexerMode::DoubleQuoteString(Interpolation::BraceUntil(
707 start.offset + until_offset,
708 ));
709
710 break;
711 }
712 [b'\\', ..] => {
713 length += 1;
714
715 last_was_slash = !last_was_slash;
716 }
717 [b'"', ..] if !last_was_slash => {
718 if length == 0 {
719 length += 1;
720 token_kind = TokenKind::DoubleQuote;
721
722 break;
723 }
724
725 break;
726 }
727 [_, ..] => {
728 length += 1;
729 last_was_slash = false;
730 }
731 [] => {
732 break;
733 }
734 }
735 }
736
737 let buffer = self.input.consume(length);
738 let end = self.input.current_position();
739
740 if TokenKind::DoubleQuote == token_kind {
741 self.mode = LexerMode::Script;
742 }
743
744 Some(Ok(self.token(token_kind, buffer, start, end)))
745 }
746 Interpolation::Until(offset) => {
747 self.interpolation(*offset, LexerMode::DoubleQuoteString(Interpolation::None), false)
748 }
749 Interpolation::BraceUntil(offset) => {
750 self.interpolation(*offset, LexerMode::DoubleQuoteString(Interpolation::None), true)
751 }
752 },
753 LexerMode::ShellExecuteString(interpolation) => match &interpolation {
754 Interpolation::None => {
755 let start = self.input.current_position();
756
757 let mut length = 0;
758 let mut last_was_slash = false;
759 let mut token_kind = TokenKind::StringPart;
760 loop {
761 match self.input.peek(length, 2) {
762 [b'$', start_of_identifier!(), ..] if !last_was_slash => {
763 let until_offset = read_until_end_of_variable_interpolation(&self.input, length + 2);
764
765 self.mode =
766 LexerMode::ShellExecuteString(Interpolation::Until(start.offset + until_offset));
767
768 break;
769 }
770 [b'{', b'$', ..] | [b'$', b'{', ..] if !last_was_slash => {
771 let until_offset = read_until_end_of_brace_interpolation(&self.input, length + 2);
772
773 self.mode = LexerMode::ShellExecuteString(Interpolation::BraceUntil(
774 start.offset + until_offset,
775 ));
776
777 break;
778 }
779 [b'\\', ..] => {
780 length += 1;
781 last_was_slash = !last_was_slash;
782 }
783 [b'`', ..] if !last_was_slash => {
784 if length == 0 {
785 length += 1;
786 token_kind = TokenKind::Backtick;
787
788 break;
789 }
790
791 break;
792 }
793 [_, ..] => {
794 length += 1;
795 last_was_slash = false;
796 }
797 [] => {
798 break;
799 }
800 }
801 }
802
803 let buffer = self.input.consume(length);
804 let end = self.input.current_position();
805
806 if TokenKind::Backtick == token_kind {
807 self.mode = LexerMode::Script;
808 }
809
810 Some(Ok(self.token(token_kind, buffer, start, end)))
811 }
812 Interpolation::Until(offset) => {
813 self.interpolation(*offset, LexerMode::ShellExecuteString(Interpolation::None), false)
814 }
815 Interpolation::BraceUntil(offset) => {
816 self.interpolation(*offset, LexerMode::ShellExecuteString(Interpolation::None), true)
817 }
818 },
819 LexerMode::DocumentString(kind, label, interpolation) => match &kind {
820 DocumentKind::Heredoc => match &interpolation {
821 Interpolation::None => {
822 let start = self.input.current_position();
823
824 let mut length = 0;
825 let mut last_was_slash = false;
826 let mut only_whitespaces = true;
827 let mut token_kind = TokenKind::StringPart;
828 loop {
829 match self.input.peek(length, 2) {
830 [b'\r', b'\n'] => {
831 length += 2;
832
833 break;
834 }
835 [b'\n' | b'\r', ..] => {
836 length += 1;
837
838 break;
839 }
840 [byte, ..] if byte.is_ascii_whitespace() => {
841 length += 1;
842 }
843 [b'$', start_of_identifier!(), ..] if !last_was_slash => {
844 let until_offset =
845 read_until_end_of_variable_interpolation(&self.input, length + 2);
846
847 self.mode = LexerMode::DocumentString(
848 kind,
849 label,
850 Interpolation::Until(start.offset + until_offset),
851 );
852
853 break;
854 }
855 [b'{', b'$', ..] | [b'$', b'{', ..] if !last_was_slash => {
856 let until_offset = read_until_end_of_brace_interpolation(&self.input, length + 2);
857
858 self.mode = LexerMode::DocumentString(
859 kind,
860 label,
861 Interpolation::BraceUntil(start.offset + until_offset),
862 );
863
864 break;
865 }
866 [b'\\', ..] => {
867 length += 1;
868 last_was_slash = !last_was_slash;
869 only_whitespaces = false;
870 }
871 [_, ..] => {
872 if only_whitespaces
873 && self.input.peek(length, label.len()) == label
874 && self
875 .input
876 .peek(length + label.len(), 1)
877 .first()
878 .is_none_or(|c| !is_part_of_identifier(c))
879 {
880 length += label.len();
881 token_kind = TokenKind::DocumentEnd;
882
883 break;
884 }
885
886 length += 1;
887 last_was_slash = false;
888 only_whitespaces = false;
889 }
890 [] => {
891 break;
892 }
893 }
894 }
895
896 let buffer = self.input.consume(length);
897 let end = self.input.current_position();
898
899 if TokenKind::DocumentEnd == token_kind {
900 self.mode = LexerMode::Script;
901 }
902
903 Some(Ok(self.token(token_kind, buffer, start, end)))
904 }
905 Interpolation::Until(offset) => {
906 self.interpolation(*offset, LexerMode::DocumentString(kind, label, Interpolation::None), false)
907 }
908 Interpolation::BraceUntil(offset) => {
909 self.interpolation(*offset, LexerMode::DocumentString(kind, label, Interpolation::None), true)
910 }
911 },
912 DocumentKind::Nowdoc => {
913 let start = self.input.current_position();
914
915 let mut length = 0;
916 let mut terminated = false;
917 let mut only_whitespaces = true;
918
919 loop {
920 match self.input.peek(length, 2) {
921 [b'\r', b'\n'] => {
922 length += 2;
923
924 break;
925 }
926 [b'\n' | b'\r', ..] => {
927 length += 1;
928
929 break;
930 }
931 [byte, ..] if byte.is_ascii_whitespace() => {
932 length += 1;
933 }
934 [_, ..] => {
935 if only_whitespaces
936 && self.input.peek(length, label.len()) == label
937 && self
938 .input
939 .peek(length + label.len(), 1)
940 .first()
941 .is_none_or(|c| !is_part_of_identifier(c))
942 {
943 length += label.len();
944 terminated = true;
945
946 break;
947 }
948
949 only_whitespaces = false;
950 length += 1;
951 }
952 [] => {
953 break;
954 }
955 }
956 }
957
958 let buffer = self.input.consume(length);
959 let end = self.input.current_position();
960
961 if terminated {
962 self.mode = LexerMode::Script;
963
964 return Some(Ok(self.token(TokenKind::DocumentEnd, buffer, start, end)));
965 }
966
967 Some(Ok(self.token(TokenKind::StringPart, buffer, start, end)))
968 }
969 },
970 LexerMode::Halt(stage) => 'halt: {
971 let start = self.input.current_position();
972 if let HaltStage::End = stage {
973 let buffer = self.input.consume_remaining();
974 let end = self.input.current_position();
975
976 break 'halt Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
977 }
978
979 let whitespaces = self.input.consume_whitespaces();
980 if !whitespaces.is_empty() {
981 let end = self.input.current_position();
982
983 break 'halt Some(Ok(self.token(TokenKind::Whitespace, whitespaces, start, end)));
984 }
985
986 match &stage {
987 HaltStage::LookingForLeftParenthesis => {
988 if self.input.is_at(b"(", false) {
989 let buffer = self.input.consume(1);
990 let end = self.input.current_position();
991
992 self.mode = LexerMode::Halt(HaltStage::LookingForRightParenthesis);
993
994 Some(Ok(self.token(TokenKind::LeftParenthesis, buffer, start, end)))
995 } else {
996 let byte = self.input.read(1)[0];
997 let position = self.input.current_position();
998 self.input.consume(1);
1000 Some(Err(SyntaxError::UnexpectedToken(self.file_id(), byte, position)))
1001 }
1002 }
1003 HaltStage::LookingForRightParenthesis => {
1004 if self.input.is_at(b")", false) {
1005 let buffer = self.input.consume(1);
1006 let end = self.input.current_position();
1007
1008 self.mode = LexerMode::Halt(HaltStage::LookingForTerminator);
1009
1010 Some(Ok(self.token(TokenKind::RightParenthesis, buffer, start, end)))
1011 } else {
1012 let byte = self.input.read(1)[0];
1013 let position = self.input.current_position();
1014 self.input.consume(1);
1015 Some(Err(SyntaxError::UnexpectedToken(self.file_id(), byte, position)))
1016 }
1017 }
1018 HaltStage::LookingForTerminator => {
1019 if self.input.is_at(b";", false) {
1020 let buffer = self.input.consume(1);
1021 let end = self.input.current_position();
1022
1023 self.mode = LexerMode::Halt(HaltStage::End);
1024
1025 Some(Ok(self.token(TokenKind::Semicolon, buffer, start, end)))
1026 } else if self.input.is_at(b"?>", false) {
1027 let buffer = self.input.consume(2);
1028 let end = self.input.current_position();
1029
1030 self.mode = LexerMode::Halt(HaltStage::End);
1031
1032 Some(Ok(self.token(TokenKind::CloseTag, buffer, start, end)))
1033 } else {
1034 let byte = self.input.read(1)[0];
1035 let position = self.input.current_position();
1036 self.input.consume(1);
1037 Some(Err(SyntaxError::UnexpectedToken(self.file_id(), byte, position)))
1038 }
1039 }
1040 HaltStage::End => {
1041 None
1045 }
1046 }
1047 }
1048 }
1049 }
1050
1051 #[inline]
1055 fn scan_identifier_or_keyword_info(&self) -> (TokenKind, usize) {
1056 let (mut length, ended_with_slash) = self.input.scan_identifier(0);
1057
1058 if !ended_with_slash {
1059 match length {
1060 6 if self.input.is_at(b"public(set)", true) => {
1061 return (TokenKind::PublicSet, 11);
1062 }
1063 7 if self.input.is_at(b"private(set)", true) => {
1064 return (TokenKind::PrivateSet, 12);
1065 }
1066 9 if self.input.is_at(b"protected(set)", true) => {
1067 return (TokenKind::ProtectedSet, 14);
1068 }
1069 _ => {}
1070 }
1071 }
1072
1073 if !ended_with_slash && let Some(kind) = internal::keyword::lookup_keyword(self.input.read(length)) {
1074 return (kind, length);
1075 }
1076
1077 let mut slashes = 0;
1078 let mut last_was_slash = false;
1079 loop {
1080 match self.input.peek(length, 1) {
1081 [b'a'..=b'z' | b'A'..=b'Z' | b'_' | 0x80..=0xFF] if last_was_slash => {
1082 length += 1;
1083 last_was_slash = false;
1084 }
1085 [b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | 0x80..=0xFF] if !last_was_slash => {
1086 length += 1;
1087 }
1088 [b'\\'] if !self.interpolating || self.brace_interpolating => {
1089 if last_was_slash {
1090 length -= 1;
1091 slashes -= 1;
1092 last_was_slash = false;
1093 break;
1094 }
1095
1096 length += 1;
1097 slashes += 1;
1098 last_was_slash = true;
1099 }
1100 _ => {
1101 break;
1102 }
1103 }
1104 }
1105
1106 if last_was_slash {
1107 length -= 1;
1108 slashes -= 1;
1109 }
1110
1111 let kind = if slashes > 0 { TokenKind::QualifiedIdentifier } else { TokenKind::Identifier };
1112
1113 (kind, length)
1114 }
1115
1116 #[inline]
1117 fn token(&self, kind: TokenKind, value: &'input [u8], start: Position, _end: Position) -> Token<'input> {
1118 Token { kind, start, value }
1119 }
1120
1121 #[inline]
1122 fn interpolation(
1123 &mut self,
1124 end_offset: u32,
1125 post_interpolation_mode: LexerMode<'input>,
1126 brace: bool,
1127 ) -> Option<Result<Token<'input>, SyntaxError>> {
1128 self.mode = LexerMode::Script;
1129
1130 let was_interpolating = self.interpolating;
1131 self.interpolating = true;
1132 let was_brace_interpolating = self.brace_interpolating;
1133 self.brace_interpolating = brace;
1135
1136 let pending_error = loop {
1137 match self.advance() {
1138 Some(Ok(token)) => {
1139 let token_start = token.start.offset;
1140 let token_end = token_start + token.value.len() as u32;
1141 let is_final_token = token_start <= end_offset && end_offset <= token_end;
1142
1143 self.buffer.push_back(token);
1144
1145 if is_final_token {
1146 break None;
1147 }
1148 }
1149 Some(Err(error)) => break Some(error),
1150 None => break None,
1151 }
1152 };
1153
1154 self.mode = post_interpolation_mode;
1155 self.interpolating = was_interpolating;
1156 self.brace_interpolating = was_brace_interpolating;
1157
1158 if let Some(error) = pending_error {
1159 return Some(Err(error));
1160 }
1161
1162 self.advance()
1163 }
1164}
1165
1166impl HasFileId for Lexer<'_> {
1167 #[inline]
1168 fn file_id(&self) -> FileId {
1169 self.input.file_id()
1170 }
1171}
1172
1173#[inline]
1174fn matches_start_of_heredoc_document(input: &Input, prefix_len: usize) -> bool {
1175 let total = input.len();
1176 let base = input.current_offset();
1177
1178 let mut length = 3 + prefix_len;
1180 while base + length < total && input.read_at(base + length).is_ascii_whitespace() {
1182 length += 1;
1183 }
1184
1185 if base + length >= total || !is_start_of_identifier(input.read_at(base + length)) {
1187 return false;
1188 }
1189 length += 1; loop {
1193 let pos = base + length;
1194 if pos >= total {
1195 return false; }
1197
1198 let byte = *input.read_at(pos);
1199 if byte == b'\n' {
1200 return true; } else if byte == b'\r' {
1202 return pos + 1 < total && *input.read_at(pos + 1) == b'\n';
1204 } else if is_part_of_identifier(input.read_at(pos)) {
1205 length += 1;
1206 } else {
1207 return false; }
1209 }
1210}
1211
1212#[inline]
1213fn matches_start_of_double_quote_heredoc_document(input: &Input, prefix_len: usize) -> bool {
1214 let total = input.len();
1215 let base = input.current_offset();
1216
1217 let mut length = 3 + prefix_len;
1219 while base + length < total && input.read_at(base + length).is_ascii_whitespace() {
1220 length += 1;
1221 }
1222
1223 if base + length >= total || *input.read_at(base + length) != b'"' {
1225 return false;
1226 }
1227 length += 1;
1228
1229 if base + length >= total || !is_start_of_identifier(input.read_at(base + length)) {
1231 return false;
1232 }
1233 length += 1;
1234
1235 let mut terminated = false;
1237 loop {
1238 let pos = base + length;
1239 if pos >= total {
1240 return false;
1241 }
1242 let byte = input.read_at(pos);
1243 if *byte == b'\n' {
1244 return terminated;
1246 } else if *byte == b'\r' {
1247 return terminated && pos + 1 < total && *input.read_at(pos + 1) == b'\n';
1249 } else if !terminated && is_part_of_identifier(byte) {
1250 length += 1;
1251 } else if !terminated && *byte == b'"' {
1252 terminated = true;
1253 length += 1;
1254 } else {
1255 return false;
1256 }
1257 }
1258}
1259
1260#[inline]
1261fn matches_start_of_nowdoc_document(input: &Input, prefix_len: usize) -> bool {
1262 let total = input.len();
1263 let base = input.current_offset();
1264
1265 let mut length = 3 + prefix_len;
1267 while base + length < total && input.read_at(base + length).is_ascii_whitespace() {
1268 length += 1;
1269 }
1270
1271 if base + length >= total || *input.read_at(base + length) != b'\'' {
1273 return false;
1274 }
1275 length += 1;
1276
1277 if base + length >= total || !is_start_of_identifier(input.read_at(base + length)) {
1279 return false;
1280 }
1281 length += 1;
1282
1283 let mut terminated = false;
1285 loop {
1286 let pos = base + length;
1287 if pos >= total {
1288 return false;
1289 }
1290 let byte = *input.read_at(pos);
1291 if byte == b'\n' {
1292 return terminated;
1293 } else if byte == b'\r' {
1294 return terminated && pos + 1 < total && *input.read_at(pos + 1) == b'\n';
1295 } else if !terminated && is_part_of_identifier(&byte) {
1296 length += 1;
1297 } else if !terminated && byte == b'\'' {
1298 terminated = true;
1299 length += 1;
1300 } else {
1301 return false;
1302 }
1303 }
1304}
1305
1306#[inline]
1307fn matches_literal_double_quote_string(input: &Input, prefix_len: usize) -> bool {
1308 let total = input.len();
1309 let base = input.current_offset();
1310
1311 let mut pos = base + 1 + prefix_len;
1313 loop {
1314 if pos >= total {
1315 return true;
1317 }
1318 let byte = *input.read_at(pos);
1319 if byte == b'"' {
1320 return true;
1322 }
1323 if byte == b'\\' {
1324 pos += 2;
1326 continue;
1327 }
1328
1329 if pos + 1 < total {
1332 let next = *input.read_at(pos + 1);
1333 if (byte == b'$' && (is_start_of_identifier(&next) || next == b'{')) || (byte == b'{' && next == b'$') {
1334 return false;
1335 }
1336 }
1337 pos += 1;
1338 }
1339}
1340
1341#[inline]
1342fn read_start_of_heredoc_document(input: &Input, double_quoted: bool, prefix_len: usize) -> (usize, usize, usize) {
1343 let total = input.len();
1344 let base = input.current_offset();
1345
1346 let mut pos = base + 3 + prefix_len;
1348 let mut whitespaces = 0;
1349 while pos < total && input.read_at(pos).is_ascii_whitespace() {
1350 whitespaces += 1;
1351 pos += 1;
1352 }
1353
1354 let mut length = 3 + prefix_len + whitespaces + if double_quoted { 2 } else { 1 };
1359
1360 let mut label_length = 1; let mut terminated = false; loop {
1363 let pos = base + length;
1364 if pos >= total {
1369 return (length, whitespaces, label_length);
1370 }
1371
1372 let byte = *input.read_at(pos);
1373 if byte == b'\n' {
1374 length += 1;
1376 return (length, whitespaces, label_length);
1377 } else if byte == b'\r' {
1378 if pos + 1 < total && *input.read_at(pos + 1) == b'\n' {
1380 length += 2;
1381 } else {
1382 length += 1;
1383 }
1384 return (length, whitespaces, label_length);
1385 } else if is_part_of_identifier(&byte) && (!double_quoted || !terminated) {
1386 length += 1;
1389 label_length += 1;
1390 } else if double_quoted && !terminated && byte == b'"' {
1391 length += 1;
1393 terminated = true;
1394 } else {
1395 return (length, whitespaces, label_length);
1397 }
1398 }
1399}
1400
1401#[inline]
1402fn read_start_of_nowdoc_document(input: &Input, prefix_len: usize) -> (usize, usize, usize) {
1403 let total = input.len();
1404 let base = input.current_offset();
1405
1406 let mut pos = base + 3 + prefix_len;
1407 let mut whitespaces = 0;
1408 while pos < total && input.read_at(pos).is_ascii_whitespace() {
1409 whitespaces += 1;
1410 pos += 1;
1411 }
1412
1413 let mut length = 3 + prefix_len + whitespaces + 2;
1415
1416 let mut label_length = 1;
1417 let mut terminated = false;
1418 loop {
1419 let pos = base + length;
1420 if pos >= total {
1421 return (length, whitespaces, label_length);
1424 }
1425 let byte = *input.read_at(pos);
1426
1427 if byte == b'\n' {
1428 length += 1;
1430 return (length, whitespaces, label_length);
1431 } else if byte == b'\r' {
1432 if pos + 1 < total && *input.read_at(pos + 1) == b'\n' {
1434 length += 2;
1435 } else {
1436 length += 1;
1437 }
1438 return (length, whitespaces, label_length);
1439 } else if is_part_of_identifier(&byte) && !terminated {
1440 length += 1;
1442 label_length += 1;
1443 } else if !terminated && byte == b'\'' {
1444 length += 1;
1446 terminated = true;
1447 } else {
1448 return (length, whitespaces, label_length);
1450 }
1451 }
1452}
1453
1454#[inline]
1455fn read_literal_string(input: &Input, quote: u8, prefix_len: usize) -> (TokenKind, usize) {
1456 let total = input.len();
1457 let start = input.current_offset();
1458 let skip = prefix_len + 1; let mut length = skip;
1460
1461 let bytes = input.peek(skip, total - start - skip);
1462 loop {
1463 let scan_start = length - skip;
1464 match memchr2(quote, b'\\', &bytes[scan_start..]) {
1465 Some(pos) => {
1466 let abs_pos = scan_start + pos;
1467 let byte = bytes[abs_pos];
1468
1469 if byte == b'\\' {
1470 length = skip + abs_pos + 2;
1471 if length > total - start {
1472 return (TokenKind::PartialLiteralString, total - start);
1473 }
1474 } else {
1475 length = skip + abs_pos + 1; return (TokenKind::LiteralString, length);
1477 }
1478 }
1479 None => {
1480 return (TokenKind::PartialLiteralString, total - start);
1482 }
1483 }
1484 }
1485}
1486
1487#[inline]
1488fn read_until_end_of_variable_interpolation(input: &Input, from: usize) -> u32 {
1489 let total = input.len();
1490 let base = input.current_offset();
1491 let mut offset = from;
1493
1494 loop {
1495 let abs = base + offset;
1496 if abs >= total {
1497 break;
1499 }
1500
1501 if is_part_of_identifier(input.read_at(abs)) {
1503 offset += 1;
1504 continue;
1505 }
1506
1507 if *input.read_at(abs) == b'[' {
1509 offset += 1;
1510 let mut nesting = 0;
1511 loop {
1512 let abs_inner = base + offset;
1513 if abs_inner >= total {
1514 break;
1515 }
1516 let b = input.read_at(abs_inner);
1517 if *b == b']' {
1518 offset += 1;
1519 if nesting == 0 {
1520 break;
1521 }
1522
1523 nesting -= 1;
1524 } else if *b == b'[' {
1525 offset += 1;
1526 nesting += 1;
1527 } else if b.is_ascii_whitespace() {
1528 break;
1530 } else {
1531 offset += 1;
1532 }
1533 }
1534 break;
1536 }
1537
1538 if base + offset + 2 < total
1540 && *input.read_at(abs) == b'-'
1541 && *input.read_at(base + offset + 1) == b'>'
1542 && is_start_of_identifier(input.read_at(base + offset + 2))
1543 {
1544 offset += 3;
1545 while base + offset < total && is_part_of_identifier(input.read_at(base + offset)) {
1547 offset += 1;
1548 }
1549 break;
1550 }
1551
1552 if base + offset + 3 < total
1554 && *input.read_at(abs) == b'?'
1555 && *input.read_at(base + offset + 1) == b'-'
1556 && *input.read_at(base + offset + 2) == b'>'
1557 && is_start_of_identifier(input.read_at(base + offset + 3))
1558 {
1559 offset += 4;
1560 while base + offset < total && is_part_of_identifier(input.read_at(base + offset)) {
1561 offset += 1;
1562 }
1563 break;
1564 }
1565
1566 break;
1568 }
1569
1570 offset as u32
1571}
1572
1573#[inline]
1574fn read_until_end_of_brace_interpolation(input: &Input, from: usize) -> u32 {
1575 let total = input.len();
1576 let base = input.current_offset();
1577 let mut offset = from;
1578 let mut nesting = 0;
1579
1580 loop {
1581 let abs = base + offset;
1582 if abs >= total {
1583 break;
1584 }
1585 match input.read_at(abs) {
1586 b'}' => {
1587 offset += 1;
1588 if nesting == 0 {
1589 break;
1590 }
1591
1592 nesting -= 1;
1593 }
1594 b'{' => {
1595 offset += 1;
1596 nesting += 1;
1597 }
1598 _ => {
1599 offset += 1;
1600 }
1601 }
1602 }
1603
1604 offset as u32
1605}
1606
1607#[inline]
1610fn scan_multi_line_comment(bytes: &[u8]) -> Option<usize> {
1611 memmem::find(bytes, b"*/").map(|pos| pos + 2)
1613}
1614
1615#[inline]
1619fn scan_single_line_comment(bytes: &[u8]) -> usize {
1620 let mut pos = 0;
1621 while pos < bytes.len() {
1622 match memchr::memchr3(b'\n', b'\r', b'?', &bytes[pos..]) {
1623 Some(offset) => {
1624 let found_pos = pos + offset;
1625 match bytes[found_pos] {
1626 b'\n' | b'\r' => return found_pos,
1627 b'?' => {
1628 if found_pos + 1 < bytes.len() && bytes[found_pos + 1] == b'>' {
1630 if found_pos > 0 && bytes[found_pos - 1].is_ascii_whitespace() {
1632 return found_pos - 1;
1633 }
1634 return found_pos;
1635 }
1636 pos = found_pos + 1;
1638 }
1639 _ => return found_pos,
1642 }
1643 }
1644 None => return bytes.len(),
1645 }
1646 }
1647
1648 bytes.len()
1649}