1use std::collections::VecDeque;
2use std::fmt::Debug;
3use std::hint::unreachable_unchecked;
4
5use memchr::memchr2;
6use memchr::memmem;
7
8const SIMPLE_TOKEN_TABLE: [Option<TokenKind>; 256] = {
12 let mut table: [Option<TokenKind>; 256] = [None; 256];
13 table[b';' as usize] = Some(TokenKind::Semicolon);
14 table[b',' as usize] = Some(TokenKind::Comma);
15 table[b')' as usize] = Some(TokenKind::RightParenthesis);
16 table[b'[' as usize] = Some(TokenKind::LeftBracket);
17 table[b']' as usize] = Some(TokenKind::RightBracket);
18 table[b'{' as usize] = Some(TokenKind::LeftBrace);
19 table[b'}' as usize] = Some(TokenKind::RightBrace);
20 table[b'~' as usize] = Some(TokenKind::Tilde);
21 table[b'@' as usize] = Some(TokenKind::At);
22 table
23};
24
25const IDENT_START_TABLE: [bool; 256] = {
27 let mut table = [false; 256];
28 let mut i = 0usize;
29 while i < 256 {
30 table[i] = matches!(i as u8, b'a'..=b'z' | b'A'..=b'Z' | b'_' | 0x80..=0xFF);
31 i += 1;
32 }
33
34 table
35};
36
37use mago_database::file::FileId;
38use mago_database::file::HasFileId;
39use mago_span::Position;
40use mago_syntax_core::float_exponent;
41use mago_syntax_core::float_separator;
42use mago_syntax_core::input::Input;
43use mago_syntax_core::number_sign;
44use mago_syntax_core::start_of_binary_number;
45use mago_syntax_core::start_of_float_number;
46use mago_syntax_core::start_of_hexadecimal_number;
47use mago_syntax_core::start_of_identifier;
48use mago_syntax_core::start_of_number;
49use mago_syntax_core::start_of_octal_number;
50use mago_syntax_core::start_of_octal_or_float_number;
51use mago_syntax_core::utils::is_part_of_identifier;
52use mago_syntax_core::utils::is_start_of_identifier;
53use mago_syntax_core::utils::read_digits_of_base;
54
55use crate::error::SyntaxError;
56use crate::lexer::internal::mode::HaltStage;
57use crate::lexer::internal::mode::Interpolation;
58use crate::lexer::internal::mode::LexerMode;
59use crate::lexer::internal::utils::NumberKind;
60use crate::settings::LexerSettings;
61use crate::token::DocumentKind;
62use crate::token::Token;
63use crate::token::TokenKind;
64
65mod internal;
66
67#[derive(Debug)]
79pub struct Lexer<'input> {
80 input: Input<'input>,
81 settings: LexerSettings,
82 mode: LexerMode<'input>,
83 interpolating: bool,
84 brace_interpolating: bool,
85 buffer: VecDeque<Token<'input>>,
87}
88
89impl<'input> Lexer<'input> {
90 const BUFFER_INITIAL_CAPACITY: usize = 8;
93
94 pub fn new(input: Input<'input>, settings: LexerSettings) -> Lexer<'input> {
105 Lexer {
106 input,
107 settings,
108 mode: LexerMode::Inline,
109 interpolating: false,
110 brace_interpolating: false,
111 buffer: VecDeque::with_capacity(Self::BUFFER_INITIAL_CAPACITY),
112 }
113 }
114
115 pub fn scripting(input: Input<'input>, settings: LexerSettings) -> Lexer<'input> {
126 Lexer {
127 input,
128 settings,
129 mode: LexerMode::Script,
130 interpolating: false,
131 brace_interpolating: false,
132 buffer: VecDeque::with_capacity(Self::BUFFER_INITIAL_CAPACITY),
133 }
134 }
135
136 #[must_use]
140 pub fn has_reached_eof(&self) -> bool {
141 self.input.has_reached_eof()
142 }
143
144 #[inline]
146 pub const fn current_position(&self) -> Position {
147 self.input.current_position()
148 }
149
150 #[inline]
183 pub fn advance(&mut self) -> Option<Result<Token<'input>, SyntaxError>> {
184 if !self.interpolating
186 && let Some(token) = self.buffer.pop_front()
187 {
188 return Some(Ok(token));
189 }
190
191 if self.input.has_reached_eof() {
192 return None;
193 }
194
195 match self.mode {
196 LexerMode::Inline => {
197 let start = self.input.current_position();
198 let offset = self.input.current_offset();
199
200 if offset == 0
202 && self.input.len() >= 2
203 && unsafe { *self.input.read_at_unchecked(0) } == b'#'
204 && unsafe { *self.input.read_at_unchecked(1) } == b'!'
205 {
206 let buffer = self.input.consume_through(b'\n');
207 let end = self.input.current_position();
208
209 return Some(Ok(self.token(TokenKind::InlineShebang, buffer, start, end)));
210 }
211
212 let bytes = self.input.read_remaining();
214
215 if self.settings.enable_short_tags {
216 if let Some(pos) = memchr::memmem::find(bytes, b"<?") {
217 if pos > 0 {
218 let buffer = self.input.consume(pos);
219 let end = self.input.current_position();
220
221 return Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
222 }
223
224 if self.input.is_at(b"<?php", true) {
225 let buffer = self.input.consume(5);
226 self.mode = LexerMode::Script;
227 return Some(Ok(self.token(
228 TokenKind::OpenTag,
229 buffer,
230 start,
231 self.input.current_position(),
232 )));
233 }
234
235 if self.input.is_at(b"<?=", false) {
236 let buffer = self.input.consume(3);
237 self.mode = LexerMode::Script;
238 return Some(Ok(self.token(
239 TokenKind::EchoTag,
240 buffer,
241 start,
242 self.input.current_position(),
243 )));
244 }
245
246 let buffer = self.input.consume(2);
247 self.mode = LexerMode::Script;
248 return Some(Ok(self.token(
249 TokenKind::ShortOpenTag,
250 buffer,
251 start,
252 self.input.current_position(),
253 )));
254 }
255 } else {
256 let iter = memchr::memmem::find_iter(bytes, b"<?");
257
258 for pos in iter {
259 let candidate = unsafe { bytes.get_unchecked(pos..) };
261
262 if candidate.len() >= 5
263 && (unsafe { *candidate.get_unchecked(2) } | 0x20) == b'p'
264 && (unsafe { *candidate.get_unchecked(3) } | 0x20) == b'h'
265 && (unsafe { *candidate.get_unchecked(4) } | 0x20) == b'p'
266 {
267 if pos > 0 {
268 let buffer = self.input.consume(pos);
269 let end = self.input.current_position();
270 return Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
271 }
272
273 let buffer = self.input.consume(5);
274 self.mode = LexerMode::Script;
275 return Some(Ok(self.token(
276 TokenKind::OpenTag,
277 buffer,
278 start,
279 self.input.current_position(),
280 )));
281 }
282
283 if candidate.len() >= 3 && unsafe { *candidate.get_unchecked(2) } == b'=' {
284 if pos > 0 {
285 let buffer = self.input.consume(pos);
286 let end = self.input.current_position();
287 return Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
288 }
289
290 let buffer = self.input.consume(3);
291 self.mode = LexerMode::Script;
292 return Some(Ok(self.token(
293 TokenKind::EchoTag,
294 buffer,
295 start,
296 self.input.current_position(),
297 )));
298 }
299 }
300 }
301
302 if self.input.has_reached_eof() {
303 return None;
304 }
305
306 let buffer = self.input.consume_remaining();
307 let end = self.input.current_position();
308 Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)))
309 }
310 LexerMode::Script => {
311 let start = self.input.current_position();
312 let whitespaces = self.input.consume_whitespaces();
313 if !whitespaces.is_empty() {
314 return Some(Ok(self.token(
315 TokenKind::Whitespace,
316 whitespaces,
317 start,
318 self.input.current_position(),
319 )));
320 }
321
322 let first_byte = match self.input.read(1).first() {
323 Some(&b) => b,
324 None => {
325 unsafe { unreachable_unchecked() }
327 }
328 };
329
330 if let Some(kind) = SIMPLE_TOKEN_TABLE[first_byte as usize] {
331 let buffer = self.input.consume(1);
332 let end = self.input.current_position();
333 return Some(Ok(self.token(kind, buffer, start, end)));
334 }
335
336 if IDENT_START_TABLE[first_byte as usize] {
337 let is_binary_string_prefix = matches!(first_byte, b'b' | b'B')
339 && matches!(self.input.read(4), [_, b'\'' | b'"', ..] | [_, b'<', b'<', b'<']);
340
341 if !is_binary_string_prefix {
342 let (token_kind, len) = self.scan_identifier_or_keyword_info();
343
344 if token_kind == TokenKind::HaltCompiler {
345 self.mode = LexerMode::Halt(HaltStage::LookingForLeftParenthesis);
346 }
347
348 let buffer = self.input.consume(len);
349 let end = self.input.current_position();
350 return Some(Ok(self.token(token_kind, buffer, start, end)));
351 }
352
353 }
355
356 if first_byte == b'$'
357 && let Some(&next) = self.input.read(2).get(1)
358 && IDENT_START_TABLE[next as usize]
359 {
360 let (ident_len, _) = self.input.scan_identifier(1);
361 let buffer = self.input.consume(1 + ident_len);
362 let end = self.input.current_position();
363 return Some(Ok(self.token(TokenKind::Variable, buffer, start, end)));
364 }
365
366 let mut document_label: &[u8] = &[];
367
368 let (token_kind, len) = match self.input.read(3) {
369 [b'!', b'=', b'='] => (TokenKind::BangEqualEqual, 3),
370 [b'?', b'?', b'='] => (TokenKind::QuestionQuestionEqual, 3),
371 [b'?', b'-', b'>'] => (TokenKind::QuestionMinusGreaterThan, 3),
372 [b'=', b'=', b'='] => (TokenKind::EqualEqualEqual, 3),
373 [b'.', b'.', b'.'] => (TokenKind::DotDotDot, 3),
374 [b'<', b'=', b'>'] => (TokenKind::LessThanEqualGreaterThan, 3),
375 [b'<', b'<', b'='] => (TokenKind::LeftShiftEqual, 3),
376 [b'>', b'>', b'='] => (TokenKind::RightShiftEqual, 3),
377 [b'*', b'*', b'='] => (TokenKind::AsteriskAsteriskEqual, 3),
378 [b'<', b'<', b'<'] if matches_start_of_heredoc_document(&self.input, 0) => {
379 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, false, 0);
380
381 document_label = self.input.peek(3 + whitespaces, label_length);
382
383 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
384 }
385 [b'<', b'<', b'<'] if matches_start_of_double_quote_heredoc_document(&self.input, 0) => {
386 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, true, 0);
387
388 document_label = self.input.peek(4 + whitespaces, label_length);
389
390 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
391 }
392 [b'<', b'<', b'<'] if matches_start_of_nowdoc_document(&self.input, 0) => {
393 let (length, whitespaces, label_length) = read_start_of_nowdoc_document(&self.input, 0);
394
395 document_label = self.input.peek(4 + whitespaces, label_length);
396
397 (TokenKind::DocumentStart(DocumentKind::Nowdoc), length)
398 }
399 [b'!', b'=', ..] => (TokenKind::BangEqual, 2),
400 [b'&', b'&', ..] => (TokenKind::AmpersandAmpersand, 2),
401 [b'&', b'=', ..] => (TokenKind::AmpersandEqual, 2),
402 [b'.', b'=', ..] => (TokenKind::DotEqual, 2),
403 [b'?', b'?', ..] => (TokenKind::QuestionQuestion, 2),
404 [b'?', b'>', ..] => (TokenKind::CloseTag, 2),
405 [b'=', b'>', ..] => (TokenKind::EqualGreaterThan, 2),
406 [b'=', b'=', ..] => (TokenKind::EqualEqual, 2),
407 [b'+', b'+', ..] => (TokenKind::PlusPlus, 2),
408 [b'+', b'=', ..] => (TokenKind::PlusEqual, 2),
409 [b'%', b'=', ..] => (TokenKind::PercentEqual, 2),
410 [b'-', b'-', ..] => (TokenKind::MinusMinus, 2),
411 [b'-', b'>', ..] => (TokenKind::MinusGreaterThan, 2),
412 [b'-', b'=', ..] => (TokenKind::MinusEqual, 2),
413 [b'<', b'<', ..] => (TokenKind::LeftShift, 2),
414 [b'<', b'=', ..] => (TokenKind::LessThanEqual, 2),
415 [b'<', b'>', ..] => (TokenKind::LessThanGreaterThan, 2),
416 [b'>', b'>', ..] => (TokenKind::RightShift, 2),
417 [b'>', b'=', ..] => (TokenKind::GreaterThanEqual, 2),
418 [b':', b':', ..] => (TokenKind::ColonColon, 2),
419 [b'#', b'[', ..] => (TokenKind::HashLeftBracket, 2),
420 [b'|', b'=', ..] => (TokenKind::PipeEqual, 2),
421 [b'|', b'|', ..] => (TokenKind::PipePipe, 2),
422 [b'/', b'=', ..] => (TokenKind::SlashEqual, 2),
423 [b'^', b'=', ..] => (TokenKind::CaretEqual, 2),
424 [b'*', b'*', ..] => (TokenKind::AsteriskAsterisk, 2),
425 [b'*', b'=', ..] => (TokenKind::AsteriskEqual, 2),
426 [b'|', b'>', ..] => (TokenKind::PipeGreaterThan, 2),
427 [b'/', b'/', ..] => {
428 let remaining = self.input.peek(2, self.input.len() - self.input.current_offset());
429 let comment_len = scan_single_line_comment(remaining);
430 (TokenKind::SingleLineComment, 2 + comment_len)
431 }
432 [b'/', b'*', asterisk] => {
433 let remaining = self.input.peek(2, self.input.len() - self.input.current_offset());
434 match scan_multi_line_comment(remaining) {
435 Some(len) => {
436 let is_docblock = asterisk == &b'*' && len > 2;
437 if is_docblock {
438 (TokenKind::DocBlockComment, len + 2)
439 } else {
440 (TokenKind::MultiLineComment, len + 2)
441 }
442 }
443 None => {
444 self.input.consume(remaining.len() + 2);
445 return Some(Err(SyntaxError::UnexpectedEndOfFile(
446 self.file_id(),
447 self.input.current_position(),
448 )));
449 }
450 }
451 }
452 [b'\\', start_of_identifier!(), ..] => {
453 let mut length = 1;
454 loop {
455 let (ident_len, ends_with_ns) = self.input.scan_identifier(length);
456 length += ident_len;
457 if ends_with_ns {
458 length += 1; } else {
460 break;
461 }
462 }
463
464 (TokenKind::FullyQualifiedIdentifier, length)
465 }
466 [b'$', b'{', ..] => (TokenKind::DollarLeftBrace, 2),
467 [b'$', ..] => (TokenKind::Dollar, 1),
468 [b'!', ..] => (TokenKind::Bang, 1),
469 [b'&', ..] => (TokenKind::Ampersand, 1),
470 [b'?', ..] => (TokenKind::Question, 1),
471 [b'=', ..] => (TokenKind::Equal, 1),
472 [b'`', ..] => (TokenKind::Backtick, 1),
473 [b'+', ..] => (TokenKind::Plus, 1),
474 [b'%', ..] => (TokenKind::Percent, 1),
475 [b'-', ..] => (TokenKind::Minus, 1),
476 [b'<', ..] => (TokenKind::LessThan, 1),
477 [b'>', ..] => (TokenKind::GreaterThan, 1),
478 [b':', ..] => (TokenKind::Colon, 1),
479 [b'|', ..] => (TokenKind::Pipe, 1),
480 [b'^', ..] => (TokenKind::Caret, 1),
481 [b'*', ..] => (TokenKind::Asterisk, 1),
482 [b'/', ..] => (TokenKind::Slash, 1),
483 [b'b' | b'B', b'\'', ..] => read_literal_string(&self.input, b'\'', 1),
484 [b'b' | b'B', b'"', ..] if matches_literal_double_quote_string(&self.input, 1) => {
485 read_literal_string(&self.input, b'"', 1)
486 }
487 [b'b' | b'B', b'"', ..] => (TokenKind::DoubleQuote, 2),
488 [b'b' | b'B', b'<', b'<']
489 if self.input.read(4).len() == 4
490 && self.input.read(4)[3] == b'<'
491 && matches_start_of_heredoc_document(&self.input, 1) =>
492 {
493 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, false, 1);
494
495 document_label = self.input.peek(4 + whitespaces, label_length);
496
497 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
498 }
499 [b'b' | b'B', b'<', b'<']
500 if self.input.read(4).len() == 4
501 && self.input.read(4)[3] == b'<'
502 && matches_start_of_double_quote_heredoc_document(&self.input, 1) =>
503 {
504 let (length, whitespaces, label_length) = read_start_of_heredoc_document(&self.input, true, 1);
505
506 document_label = self.input.peek(5 + whitespaces, label_length);
507
508 (TokenKind::DocumentStart(DocumentKind::Heredoc), length)
509 }
510 [b'b' | b'B', b'<', b'<']
511 if self.input.read(4).len() == 4
512 && self.input.read(4)[3] == b'<'
513 && matches_start_of_nowdoc_document(&self.input, 1) =>
514 {
515 let (length, whitespaces, label_length) = read_start_of_nowdoc_document(&self.input, 1);
516
517 document_label = self.input.peek(5 + whitespaces, label_length);
518
519 (TokenKind::DocumentStart(DocumentKind::Nowdoc), length)
520 }
521 [quote @ b'\'', ..] => read_literal_string(&self.input, *quote, 0),
523 [quote @ b'"', ..] if matches_literal_double_quote_string(&self.input, 0) => {
524 read_literal_string(&self.input, *quote, 0)
525 }
526 [b'"', ..] => (TokenKind::DoubleQuote, 1),
527 [b'(', ..] => 'parenthesis: {
528 let mut peek_offset = 1;
529 while let Some(&b) = self.input.read(peek_offset + 1).get(peek_offset) {
530 if b.is_ascii_whitespace() {
531 peek_offset += 1;
532 } else {
533 let lower = b | 0x20; if !matches!(lower, b'i' | b'b' | b'f' | b'd' | b'r' | b's' | b'a' | b'o' | b'u' | b'v')
536 {
537 break 'parenthesis (TokenKind::LeftParenthesis, 1);
538 }
539 break;
540 }
541 }
542
543 for (value, kind) in internal::consts::CAST_TYPES {
544 if let Some(length) = self.input.match_sequence_ignore_whitespace(value, true) {
545 break 'parenthesis (kind, length);
546 }
547 }
548
549 (TokenKind::LeftParenthesis, 1)
550 }
551 [b'#', ..] => {
552 let remaining = self.input.peek(1, self.input.len() - self.input.current_offset());
553 let comment_len = scan_single_line_comment(remaining);
554 (TokenKind::HashComment, 1 + comment_len)
555 }
556 [b'\\', ..] => (TokenKind::NamespaceSeparator, 1),
557 [b'.', start_of_number!(), ..] => {
558 let mut length = read_digits_of_base(&self.input, 2, 10);
559 if let float_exponent!() = self.input.peek(length, 1) {
560 let mut exp_length = length + 1;
561 if let number_sign!() = self.input.peek(exp_length, 1) {
562 exp_length += 1;
563 }
564
565 let after_exp = read_digits_of_base(&self.input, exp_length, 10);
566 if after_exp > exp_length {
567 length = after_exp;
568 }
569 }
570
571 (TokenKind::LiteralFloat, length)
572 }
573 [start_of_number!(), ..] => 'number: {
574 let mut length = 1;
575
576 let (base, kind): (u8, NumberKind) = match self.input.read(3) {
577 start_of_binary_number!() => {
578 length += 1;
579
580 (2, NumberKind::Integer)
581 }
582 start_of_octal_number!() => {
583 length += 1;
584
585 (8, NumberKind::Integer)
586 }
587 start_of_hexadecimal_number!() => {
588 length += 1;
589
590 (16, NumberKind::Integer)
591 }
592 start_of_octal_or_float_number!() => (10, NumberKind::OctalOrFloat),
593 start_of_float_number!() => (10, NumberKind::Float),
594 _ => (10, NumberKind::IntegerOrFloat),
595 };
596
597 if kind != NumberKind::Float {
598 length = read_digits_of_base(&self.input, length, base);
599
600 if kind == NumberKind::Integer {
601 break 'number (TokenKind::LiteralInteger, length);
602 }
603 }
604
605 let is_float = matches!(self.input.peek(length, 3), float_separator!());
606
607 if !is_float {
608 if kind == NumberKind::OctalOrFloat
609 && let Some(invalid_idx) =
610 (1..length).find(|&i| matches!(self.input.peek(i, 1), [b'8' | b'9']))
611 {
612 let invalid_byte = self.input.peek(invalid_idx, 1)[0];
613 let start = self.input.current_position();
614 let invalid_position = Position { offset: start.offset + invalid_idx as u32 };
615 self.input.consume(length);
616 return Some(Err(SyntaxError::UnexpectedToken(
617 self.file_id(),
618 invalid_byte,
619 invalid_position,
620 )));
621 }
622 break 'number (TokenKind::LiteralInteger, length);
623 }
624
625 if let [b'.'] = self.input.peek(length, 1) {
626 length += 1;
627 length = read_digits_of_base(&self.input, length, 10);
628 }
629
630 if let float_exponent!() = self.input.peek(length, 1) {
631 let mut exp_length = length + 1;
633 if let number_sign!() = self.input.peek(exp_length, 1) {
634 exp_length += 1;
635 }
636 let after_exp = read_digits_of_base(&self.input, exp_length, 10);
637 if after_exp > exp_length {
638 length = after_exp;
640 }
641 }
642
643 (TokenKind::LiteralFloat, length)
644 }
645 [b'.', ..] => (TokenKind::Dot, 1),
646 [unknown_byte, ..] => {
647 let position = self.input.current_position();
648 self.input.consume(1);
649
650 return Some(Err(SyntaxError::UnrecognizedToken(self.file_id(), *unknown_byte, position)));
651 }
652 [] => {
653 unreachable!()
656 }
657 };
658
659 self.mode = match token_kind {
660 TokenKind::DoubleQuote => LexerMode::DoubleQuoteString(Interpolation::None),
661 TokenKind::Backtick => LexerMode::ShellExecuteString(Interpolation::None),
662 TokenKind::CloseTag => LexerMode::Inline,
663 TokenKind::HaltCompiler => LexerMode::Halt(HaltStage::LookingForLeftParenthesis),
664 TokenKind::DocumentStart(document_kind) => {
665 LexerMode::DocumentString(document_kind, document_label, Interpolation::None)
666 }
667 _ => LexerMode::Script,
668 };
669
670 let buffer = self.input.consume(len);
671 let end = self.input.current_position();
672
673 Some(Ok(self.token(token_kind, buffer, start, end)))
674 }
675 LexerMode::DoubleQuoteString(interpolation) => match &interpolation {
676 Interpolation::None => {
677 let start = self.input.current_position();
678
679 let mut length = 0;
680 let mut last_was_slash = false;
681 let mut token_kind = TokenKind::StringPart;
682 loop {
683 match self.input.peek(length, 2) {
684 [b'$', start_of_identifier!(), ..] if !last_was_slash => {
685 let until_offset = read_until_end_of_variable_interpolation(&self.input, length + 2);
686
687 self.mode =
688 LexerMode::DoubleQuoteString(Interpolation::Until(start.offset + until_offset));
689
690 break;
691 }
692 [b'{', b'$', ..] | [b'$', b'{', ..] if !last_was_slash => {
693 let until_offset = read_until_end_of_brace_interpolation(&self.input, length + 2);
694
695 self.mode = LexerMode::DoubleQuoteString(Interpolation::BraceUntil(
696 start.offset + until_offset,
697 ));
698
699 break;
700 }
701 [b'\\', ..] => {
702 length += 1;
703
704 last_was_slash = !last_was_slash;
705 }
706 [b'"', ..] if !last_was_slash => {
707 if length == 0 {
708 length += 1;
709 token_kind = TokenKind::DoubleQuote;
710
711 break;
712 }
713
714 break;
715 }
716 [_, ..] => {
717 length += 1;
718 last_was_slash = false;
719 }
720 [] => {
721 break;
722 }
723 }
724 }
725
726 let buffer = self.input.consume(length);
727 let end = self.input.current_position();
728
729 if TokenKind::DoubleQuote == token_kind {
730 self.mode = LexerMode::Script;
731 }
732
733 Some(Ok(self.token(token_kind, buffer, start, end)))
734 }
735 Interpolation::Until(offset) => {
736 self.interpolation(*offset, LexerMode::DoubleQuoteString(Interpolation::None), false)
737 }
738 Interpolation::BraceUntil(offset) => {
739 self.interpolation(*offset, LexerMode::DoubleQuoteString(Interpolation::None), true)
740 }
741 },
742 LexerMode::ShellExecuteString(interpolation) => match &interpolation {
743 Interpolation::None => {
744 let start = self.input.current_position();
745
746 let mut length = 0;
747 let mut last_was_slash = false;
748 let mut token_kind = TokenKind::StringPart;
749 loop {
750 match self.input.peek(length, 2) {
751 [b'$', start_of_identifier!(), ..] if !last_was_slash => {
752 let until_offset = read_until_end_of_variable_interpolation(&self.input, length + 2);
753
754 self.mode =
755 LexerMode::ShellExecuteString(Interpolation::Until(start.offset + until_offset));
756
757 break;
758 }
759 [b'{', b'$', ..] | [b'$', b'{', ..] if !last_was_slash => {
760 let until_offset = read_until_end_of_brace_interpolation(&self.input, length + 2);
761
762 self.mode = LexerMode::ShellExecuteString(Interpolation::BraceUntil(
763 start.offset + until_offset,
764 ));
765
766 break;
767 }
768 [b'\\', ..] => {
769 length += 1;
770 last_was_slash = !last_was_slash;
771 }
772 [b'`', ..] if !last_was_slash => {
773 if length == 0 {
774 length += 1;
775 token_kind = TokenKind::Backtick;
776
777 break;
778 }
779
780 break;
781 }
782 [_, ..] => {
783 length += 1;
784 last_was_slash = false;
785 }
786 [] => {
787 break;
788 }
789 }
790 }
791
792 let buffer = self.input.consume(length);
793 let end = self.input.current_position();
794
795 if TokenKind::Backtick == token_kind {
796 self.mode = LexerMode::Script;
797 }
798
799 Some(Ok(self.token(token_kind, buffer, start, end)))
800 }
801 Interpolation::Until(offset) => {
802 self.interpolation(*offset, LexerMode::ShellExecuteString(Interpolation::None), false)
803 }
804 Interpolation::BraceUntil(offset) => {
805 self.interpolation(*offset, LexerMode::ShellExecuteString(Interpolation::None), true)
806 }
807 },
808 LexerMode::DocumentString(kind, label, interpolation) => match &kind {
809 DocumentKind::Heredoc => match &interpolation {
810 Interpolation::None => {
811 let start = self.input.current_position();
812
813 let mut length = 0;
814 let mut last_was_slash = false;
815 let mut only_whitespaces = true;
816 let mut token_kind = TokenKind::StringPart;
817 loop {
818 match self.input.peek(length, 2) {
819 [b'\r', b'\n'] => {
820 length += 2;
821
822 break;
823 }
824 [b'\n' | b'\r', ..] => {
825 length += 1;
826
827 break;
828 }
829 [byte, ..] if byte.is_ascii_whitespace() => {
830 length += 1;
831 }
832 [b'$', start_of_identifier!(), ..] if !last_was_slash => {
833 let until_offset =
834 read_until_end_of_variable_interpolation(&self.input, length + 2);
835
836 self.mode = LexerMode::DocumentString(
837 kind,
838 label,
839 Interpolation::Until(start.offset + until_offset),
840 );
841
842 break;
843 }
844 [b'{', b'$', ..] | [b'$', b'{', ..] if !last_was_slash => {
845 let until_offset = read_until_end_of_brace_interpolation(&self.input, length + 2);
846
847 self.mode = LexerMode::DocumentString(
848 kind,
849 label,
850 Interpolation::BraceUntil(start.offset + until_offset),
851 );
852
853 break;
854 }
855 [b'\\', ..] => {
856 length += 1;
857 last_was_slash = !last_was_slash;
858 only_whitespaces = false;
859 }
860 [_, ..] => {
861 if only_whitespaces
862 && self.input.peek(length, label.len()) == label
863 && self
864 .input
865 .peek(length + label.len(), 1)
866 .first()
867 .is_none_or(|c| !c.is_ascii_alphanumeric())
868 {
869 length += label.len();
870 token_kind = TokenKind::DocumentEnd;
871
872 break;
873 }
874
875 length += 1;
876 last_was_slash = false;
877 only_whitespaces = false;
878 }
879 [] => {
880 break;
881 }
882 }
883 }
884
885 let buffer = self.input.consume(length);
886 let end = self.input.current_position();
887
888 if TokenKind::DocumentEnd == token_kind {
889 self.mode = LexerMode::Script;
890 }
891
892 Some(Ok(self.token(token_kind, buffer, start, end)))
893 }
894 Interpolation::Until(offset) => {
895 self.interpolation(*offset, LexerMode::DocumentString(kind, label, Interpolation::None), false)
896 }
897 Interpolation::BraceUntil(offset) => {
898 self.interpolation(*offset, LexerMode::DocumentString(kind, label, Interpolation::None), true)
899 }
900 },
901 DocumentKind::Nowdoc => {
902 let start = self.input.current_position();
903
904 let mut length = 0;
905 let mut terminated = false;
906 let mut only_whitespaces = true;
907
908 loop {
909 match self.input.peek(length, 2) {
910 [b'\r', b'\n'] => {
911 length += 2;
912
913 break;
914 }
915 [b'\n' | b'\r', ..] => {
916 length += 1;
917
918 break;
919 }
920 [byte, ..] if byte.is_ascii_whitespace() => {
921 length += 1;
922 }
923 [_, ..] => {
924 if only_whitespaces
925 && self.input.peek(length, label.len()) == label
926 && self
927 .input
928 .peek(length + label.len(), 1)
929 .first()
930 .is_none_or(|c| !c.is_ascii_alphanumeric())
931 {
932 length += label.len();
933 terminated = true;
934
935 break;
936 }
937
938 only_whitespaces = false;
939 length += 1;
940 }
941 [] => {
942 break;
943 }
944 }
945 }
946
947 let buffer = self.input.consume(length);
948 let end = self.input.current_position();
949
950 if terminated {
951 self.mode = LexerMode::Script;
952
953 return Some(Ok(self.token(TokenKind::DocumentEnd, buffer, start, end)));
954 }
955
956 Some(Ok(self.token(TokenKind::StringPart, buffer, start, end)))
957 }
958 },
959 LexerMode::Halt(stage) => 'halt: {
960 let start = self.input.current_position();
961 if let HaltStage::End = stage {
962 let buffer = self.input.consume_remaining();
963 let end = self.input.current_position();
964
965 break 'halt Some(Ok(self.token(TokenKind::InlineText, buffer, start, end)));
966 }
967
968 let whitespaces = self.input.consume_whitespaces();
969 if !whitespaces.is_empty() {
970 let end = self.input.current_position();
971
972 break 'halt Some(Ok(self.token(TokenKind::Whitespace, whitespaces, start, end)));
973 }
974
975 match &stage {
976 HaltStage::LookingForLeftParenthesis => {
977 if self.input.is_at(b"(", false) {
978 let buffer = self.input.consume(1);
979 let end = self.input.current_position();
980
981 self.mode = LexerMode::Halt(HaltStage::LookingForRightParenthesis);
982
983 Some(Ok(self.token(TokenKind::LeftParenthesis, buffer, start, end)))
984 } else {
985 let byte = self.input.read(1)[0];
986 let position = self.input.current_position();
987 self.input.consume(1);
989 Some(Err(SyntaxError::UnexpectedToken(self.file_id(), byte, position)))
990 }
991 }
992 HaltStage::LookingForRightParenthesis => {
993 if self.input.is_at(b")", false) {
994 let buffer = self.input.consume(1);
995 let end = self.input.current_position();
996
997 self.mode = LexerMode::Halt(HaltStage::LookingForTerminator);
998
999 Some(Ok(self.token(TokenKind::RightParenthesis, buffer, start, end)))
1000 } else {
1001 let byte = self.input.read(1)[0];
1002 let position = self.input.current_position();
1003 self.input.consume(1);
1004 Some(Err(SyntaxError::UnexpectedToken(self.file_id(), byte, position)))
1005 }
1006 }
1007 HaltStage::LookingForTerminator => {
1008 if self.input.is_at(b";", false) {
1009 let buffer = self.input.consume(1);
1010 let end = self.input.current_position();
1011
1012 self.mode = LexerMode::Halt(HaltStage::End);
1013
1014 Some(Ok(self.token(TokenKind::Semicolon, buffer, start, end)))
1015 } else if self.input.is_at(b"?>", false) {
1016 let buffer = self.input.consume(2);
1017 let end = self.input.current_position();
1018
1019 self.mode = LexerMode::Halt(HaltStage::End);
1020
1021 Some(Ok(self.token(TokenKind::CloseTag, buffer, start, end)))
1022 } else {
1023 let byte = self.input.read(1)[0];
1024 let position = self.input.current_position();
1025 self.input.consume(1);
1026 Some(Err(SyntaxError::UnexpectedToken(self.file_id(), byte, position)))
1027 }
1028 }
1029 _ => unreachable!(),
1030 }
1031 }
1032 }
1033 }
1034
1035 #[inline]
1039 fn scan_identifier_or_keyword_info(&self) -> (TokenKind, usize) {
1040 let (mut length, ended_with_slash) = self.input.scan_identifier(0);
1041
1042 if !ended_with_slash {
1043 match length {
1044 6 if self.input.is_at(b"public(set)", true) => {
1045 return (TokenKind::PublicSet, 11);
1046 }
1047 7 if self.input.is_at(b"private(set)", true) => {
1048 return (TokenKind::PrivateSet, 12);
1049 }
1050 9 if self.input.is_at(b"protected(set)", true) => {
1051 return (TokenKind::ProtectedSet, 14);
1052 }
1053 _ => {}
1054 }
1055 }
1056
1057 if !ended_with_slash && let Some(kind) = internal::keyword::lookup_keyword(self.input.read(length)) {
1058 return (kind, length);
1059 }
1060
1061 let mut slashes = 0;
1062 let mut last_was_slash = false;
1063 loop {
1064 match self.input.peek(length, 1) {
1065 [b'a'..=b'z' | b'A'..=b'Z' | b'_' | 0x80..=0xFF] if last_was_slash => {
1066 length += 1;
1067 last_was_slash = false;
1068 }
1069 [b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | 0x80..=0xFF] if !last_was_slash => {
1070 length += 1;
1071 }
1072 [b'\\'] if !self.interpolating || self.brace_interpolating => {
1073 if last_was_slash {
1074 length -= 1;
1075 slashes -= 1;
1076 last_was_slash = false;
1077 break;
1078 }
1079
1080 length += 1;
1081 slashes += 1;
1082 last_was_slash = true;
1083 }
1084 _ => {
1085 break;
1086 }
1087 }
1088 }
1089
1090 if last_was_slash {
1091 length -= 1;
1092 slashes -= 1;
1093 }
1094
1095 let kind = if slashes > 0 { TokenKind::QualifiedIdentifier } else { TokenKind::Identifier };
1096
1097 (kind, length)
1098 }
1099
1100 #[inline]
1101 fn token(&self, kind: TokenKind, v: &'input [u8], start: Position, _end: Position) -> Token<'input> {
1102 let value = unsafe { std::str::from_utf8_unchecked(v) };
1107
1108 Token { kind, start, value }
1109 }
1110
1111 #[inline]
1112 fn interpolation(
1113 &mut self,
1114 end_offset: u32,
1115 post_interpolation_mode: LexerMode<'input>,
1116 brace: bool,
1117 ) -> Option<Result<Token<'input>, SyntaxError>> {
1118 self.mode = LexerMode::Script;
1119
1120 let was_interpolating = self.interpolating;
1121 self.interpolating = true;
1122 let was_brace_interpolating = self.brace_interpolating;
1123 self.brace_interpolating = brace;
1125
1126 loop {
1127 let subsequent_token = self.advance()?.ok()?;
1128 let token_start = subsequent_token.start.offset;
1130 let token_end = token_start + subsequent_token.value.len() as u32;
1131 let is_final_token = token_start <= end_offset && end_offset <= token_end;
1132
1133 self.buffer.push_back(subsequent_token);
1134
1135 if is_final_token {
1136 break;
1137 }
1138 }
1139
1140 self.mode = post_interpolation_mode;
1141 self.interpolating = was_interpolating;
1142 self.brace_interpolating = was_brace_interpolating;
1143
1144 self.advance()
1145 }
1146}
1147
1148impl HasFileId for Lexer<'_> {
1149 #[inline]
1150 fn file_id(&self) -> FileId {
1151 self.input.file_id()
1152 }
1153}
1154
1155#[inline]
1156fn matches_start_of_heredoc_document(input: &Input, prefix_len: usize) -> bool {
1157 let total = input.len();
1158 let base = input.current_offset();
1159
1160 let mut length = 3 + prefix_len;
1162 while base + length < total && input.read_at(base + length).is_ascii_whitespace() {
1164 length += 1;
1165 }
1166
1167 if base + length >= total || !is_start_of_identifier(input.read_at(base + length)) {
1169 return false;
1170 }
1171 length += 1; loop {
1175 let pos = base + length;
1176 if pos >= total {
1177 return false; }
1179
1180 let byte = *input.read_at(pos);
1181 if byte == b'\n' {
1182 return true; } else if byte == b'\r' {
1184 return pos + 1 < total && *input.read_at(pos + 1) == b'\n';
1186 } else if is_part_of_identifier(input.read_at(pos)) {
1187 length += 1;
1188 } else {
1189 return false; }
1191 }
1192}
1193
1194#[inline]
1195fn matches_start_of_double_quote_heredoc_document(input: &Input, prefix_len: usize) -> bool {
1196 let total = input.len();
1197 let base = input.current_offset();
1198
1199 let mut length = 3 + prefix_len;
1201 while base + length < total && input.read_at(base + length).is_ascii_whitespace() {
1202 length += 1;
1203 }
1204
1205 if base + length >= total || *input.read_at(base + length) != b'"' {
1207 return false;
1208 }
1209 length += 1;
1210
1211 if base + length >= total || !is_start_of_identifier(input.read_at(base + length)) {
1213 return false;
1214 }
1215 length += 1;
1216
1217 let mut terminated = false;
1219 loop {
1220 let pos = base + length;
1221 if pos >= total {
1222 return false;
1223 }
1224 let byte = input.read_at(pos);
1225 if *byte == b'\n' {
1226 return terminated;
1228 } else if *byte == b'\r' {
1229 return terminated && pos + 1 < total && *input.read_at(pos + 1) == b'\n';
1231 } else if !terminated && is_part_of_identifier(byte) {
1232 length += 1;
1233 } else if !terminated && *byte == b'"' {
1234 terminated = true;
1235 length += 1;
1236 } else {
1237 return false;
1238 }
1239 }
1240}
1241
1242#[inline]
1243fn matches_start_of_nowdoc_document(input: &Input, prefix_len: usize) -> bool {
1244 let total = input.len();
1245 let base = input.current_offset();
1246
1247 let mut length = 3 + prefix_len;
1249 while base + length < total && input.read_at(base + length).is_ascii_whitespace() {
1250 length += 1;
1251 }
1252
1253 if base + length >= total || *input.read_at(base + length) != b'\'' {
1255 return false;
1256 }
1257 length += 1;
1258
1259 if base + length >= total || !is_start_of_identifier(input.read_at(base + length)) {
1261 return false;
1262 }
1263 length += 1;
1264
1265 let mut terminated = false;
1267 loop {
1268 let pos = base + length;
1269 if pos >= total {
1270 return false;
1271 }
1272 let byte = *input.read_at(pos);
1273 if byte == b'\n' {
1274 return terminated;
1275 } else if byte == b'\r' {
1276 return terminated && pos + 1 < total && *input.read_at(pos + 1) == b'\n';
1277 } else if !terminated && is_part_of_identifier(&byte) {
1278 length += 1;
1279 } else if !terminated && byte == b'\'' {
1280 terminated = true;
1281 length += 1;
1282 } else {
1283 return false;
1284 }
1285 }
1286}
1287
1288#[inline]
1289fn matches_literal_double_quote_string(input: &Input, prefix_len: usize) -> bool {
1290 let total = input.len();
1291 let base = input.current_offset();
1292
1293 let mut pos = base + 1 + prefix_len;
1295 loop {
1296 if pos >= total {
1297 return true;
1299 }
1300 let byte = *input.read_at(pos);
1301 if byte == b'"' {
1302 return true;
1304 } else if byte == b'\\' {
1305 pos += 2;
1307 continue;
1308 }
1309
1310 if pos + 1 < total {
1313 let next = *input.read_at(pos + 1);
1314 if (byte == b'$' && (is_start_of_identifier(&next) || next == b'{')) || (byte == b'{' && next == b'$') {
1315 return false;
1316 }
1317 }
1318 pos += 1;
1319 }
1320}
1321
1322#[inline]
1323fn read_start_of_heredoc_document(input: &Input, double_quoted: bool, prefix_len: usize) -> (usize, usize, usize) {
1324 let total = input.len();
1325 let base = input.current_offset();
1326
1327 let mut pos = base + 3 + prefix_len;
1329 let mut whitespaces = 0;
1330 while pos < total && input.read_at(pos).is_ascii_whitespace() {
1331 whitespaces += 1;
1332 pos += 1;
1333 }
1334
1335 let mut length = 3 + prefix_len + whitespaces + if double_quoted { 2 } else { 1 };
1340
1341 let mut label_length = 1; let mut terminated = false; loop {
1344 let pos = base + length;
1345 if pos >= total {
1347 unreachable!("Unexpected end of input while reading heredoc label");
1348 }
1349
1350 let byte = *input.read_at(pos);
1351 if byte == b'\n' {
1352 length += 1;
1354 return (length, whitespaces, label_length);
1355 } else if byte == b'\r' {
1356 if pos + 1 < total && *input.read_at(pos + 1) == b'\n' {
1358 length += 2;
1359 } else {
1360 length += 1;
1361 }
1362 return (length, whitespaces, label_length);
1363 } else if is_part_of_identifier(&byte) && (!double_quoted || !terminated) {
1364 length += 1;
1367 label_length += 1;
1368 } else if double_quoted && !terminated && byte == b'"' {
1369 length += 1;
1371 terminated = true;
1372 } else {
1373 unreachable!("Unexpected character encountered in heredoc label");
1374 }
1375 }
1376}
1377
1378#[inline]
1379fn read_start_of_nowdoc_document(input: &Input, prefix_len: usize) -> (usize, usize, usize) {
1380 let total = input.len();
1381 let base = input.current_offset();
1382
1383 let mut pos = base + 3 + prefix_len;
1384 let mut whitespaces = 0;
1385 while pos < total && input.read_at(pos).is_ascii_whitespace() {
1386 whitespaces += 1;
1387 pos += 1;
1388 }
1389
1390 let mut length = 3 + prefix_len + whitespaces + 2;
1392
1393 let mut label_length = 1;
1394 let mut terminated = false;
1395 loop {
1396 let pos = base + length;
1397 if pos >= total {
1398 unreachable!("Unexpected end of input while reading nowdoc label");
1399 }
1400 let byte = *input.read_at(pos);
1401
1402 if byte == b'\n' {
1403 length += 1;
1405 return (length, whitespaces, label_length);
1406 } else if byte == b'\r' {
1407 if pos + 1 < total && *input.read_at(pos + 1) == b'\n' {
1409 length += 2;
1410 } else {
1411 length += 1;
1412 }
1413 return (length, whitespaces, label_length);
1414 } else if is_part_of_identifier(&byte) && !terminated {
1415 length += 1;
1417 label_length += 1;
1418 } else if !terminated && byte == b'\'' {
1419 length += 1;
1421 terminated = true;
1422 } else {
1423 unreachable!("Unexpected character encountered in nowdoc label");
1424 }
1425 }
1426}
1427
1428#[inline]
1429fn read_literal_string(input: &Input, quote: u8, prefix_len: usize) -> (TokenKind, usize) {
1430 let total = input.len();
1431 let start = input.current_offset();
1432 let skip = prefix_len + 1; let mut length = skip;
1434
1435 let bytes = input.peek(skip, total - start - skip);
1436 loop {
1437 let scan_start = length - skip;
1438 match memchr2(quote, b'\\', &bytes[scan_start..]) {
1439 Some(pos) => {
1440 let abs_pos = scan_start + pos;
1441 let byte = bytes[abs_pos];
1442
1443 if byte == b'\\' {
1444 length = skip + abs_pos + 2;
1445 if length > total - start {
1446 return (TokenKind::PartialLiteralString, total - start);
1447 }
1448 } else {
1449 length = skip + abs_pos + 1; return (TokenKind::LiteralString, length);
1451 }
1452 }
1453 None => {
1454 return (TokenKind::PartialLiteralString, total - start);
1456 }
1457 }
1458 }
1459}
1460
1461#[inline]
1462fn read_until_end_of_variable_interpolation(input: &Input, from: usize) -> u32 {
1463 let total = input.len();
1464 let base = input.current_offset();
1465 let mut offset = from;
1467
1468 loop {
1469 let abs = base + offset;
1470 if abs >= total {
1471 break;
1473 }
1474
1475 if is_part_of_identifier(input.read_at(abs)) {
1477 offset += 1;
1478 continue;
1479 }
1480
1481 if *input.read_at(abs) == b'[' {
1483 offset += 1;
1484 let mut nesting = 0;
1485 loop {
1486 let abs_inner = base + offset;
1487 if abs_inner >= total {
1488 break;
1489 }
1490 let b = input.read_at(abs_inner);
1491 if *b == b']' {
1492 offset += 1;
1493 if nesting == 0 {
1494 break;
1495 }
1496
1497 nesting -= 1;
1498 } else if *b == b'[' {
1499 offset += 1;
1500 nesting += 1;
1501 } else if b.is_ascii_whitespace() {
1502 break;
1504 } else {
1505 offset += 1;
1506 }
1507 }
1508 break;
1510 }
1511
1512 if base + offset + 2 < total
1514 && *input.read_at(abs) == b'-'
1515 && *input.read_at(base + offset + 1) == b'>'
1516 && is_start_of_identifier(input.read_at(base + offset + 2))
1517 {
1518 offset += 3;
1519 while base + offset < total && is_part_of_identifier(input.read_at(base + offset)) {
1521 offset += 1;
1522 }
1523 break;
1524 }
1525
1526 if base + offset + 3 < total
1528 && *input.read_at(abs) == b'?'
1529 && *input.read_at(base + offset + 1) == b'-'
1530 && *input.read_at(base + offset + 2) == b'>'
1531 && is_start_of_identifier(input.read_at(base + offset + 3))
1532 {
1533 offset += 4;
1534 while base + offset < total && is_part_of_identifier(input.read_at(base + offset)) {
1535 offset += 1;
1536 }
1537 break;
1538 }
1539
1540 break;
1542 }
1543
1544 offset as u32
1545}
1546
1547#[inline]
1548fn read_until_end_of_brace_interpolation(input: &Input, from: usize) -> u32 {
1549 let total = input.len();
1550 let base = input.current_offset();
1551 let mut offset = from;
1552 let mut nesting = 0;
1553
1554 loop {
1555 let abs = base + offset;
1556 if abs >= total {
1557 break;
1558 }
1559 match input.read_at(abs) {
1560 b'}' => {
1561 offset += 1;
1562 if nesting == 0 {
1563 break;
1564 }
1565
1566 nesting -= 1;
1567 }
1568 b'{' => {
1569 offset += 1;
1570 nesting += 1;
1571 }
1572 _ => {
1573 offset += 1;
1574 }
1575 }
1576 }
1577
1578 offset as u32
1579}
1580
1581#[inline]
1584fn scan_multi_line_comment(bytes: &[u8]) -> Option<usize> {
1585 memmem::find(bytes, b"*/").map(|pos| pos + 2)
1587}
1588
1589#[inline]
1593fn scan_single_line_comment(bytes: &[u8]) -> usize {
1594 let mut pos = 0;
1595 while pos < bytes.len() {
1596 match memchr::memchr3(b'\n', b'\r', b'?', &bytes[pos..]) {
1597 Some(offset) => {
1598 let found_pos = pos + offset;
1599 match bytes[found_pos] {
1600 b'\n' | b'\r' => return found_pos,
1601 b'?' => {
1602 if found_pos + 1 < bytes.len() && bytes[found_pos + 1] == b'>' {
1604 if found_pos > 0 && bytes[found_pos - 1].is_ascii_whitespace() {
1606 return found_pos - 1;
1607 }
1608 return found_pos;
1609 }
1610 pos = found_pos + 1;
1612 }
1613 _ => unreachable!(),
1614 }
1615 }
1616 None => return bytes.len(),
1617 }
1618 }
1619
1620 bytes.len()
1621}