1use crate::ident;
2use crate::ident_start;
3use crate::lexer::byte_string::ByteString;
4use crate::lexer::error::SyntaxError;
5use crate::lexer::error::SyntaxResult;
6use crate::lexer::state::source::Source;
7use crate::lexer::state::StackFrame;
8use crate::lexer::state::State;
9use crate::lexer::token::DocStringIndentationKind;
10use crate::lexer::token::DocStringKind;
11use crate::lexer::token::OpenTagKind;
12use crate::lexer::token::Token;
13use crate::lexer::token::TokenKind;
14
15pub mod byte_string;
16pub mod error;
17pub mod stream;
18pub mod token;
19
20mod state;
21
22mod macros;
23
24#[derive(Debug, PartialEq, Eq, Clone, Default)]
25pub struct Lexer;
26
27impl Lexer {
28 pub const fn new() -> Self {
29 Self {}
30 }
31
32 pub fn tokenize<B: ?Sized + AsRef<[u8]>>(&self, input: &B) -> SyntaxResult<Vec<Token>> {
33 let mut state = State::new(Source::new(input.as_ref()));
34 let mut tokens = Vec::new();
35
36 while !state.source.eof() {
37 match state.frame()? {
38 StackFrame::Initial => self.initial(&mut state, &mut tokens)?,
42 StackFrame::Scripting => {
45 self.skip_whitespace(&mut state);
46
47 if state.source.eof() {
49 break;
50 }
51
52 tokens.push(self.scripting(&mut state)?);
53 }
54 StackFrame::Halted => {
58 tokens.push(Token {
59 kind: TokenKind::InlineHtml,
60 span: state.source.span(),
61 value: state.source.read_remaining().into(),
62 });
63 break;
64 }
65 StackFrame::DoubleQuote => self.double_quote(&mut state, &mut tokens)?,
68 StackFrame::ShellExec => self.shell_exec(&mut state, &mut tokens)?,
70 StackFrame::DocString(kind, label, ..) => {
72 let label = label.clone();
73
74 match kind {
75 DocStringKind::Heredoc => self.heredoc(&mut state, &mut tokens, label)?,
76 DocStringKind::Nowdoc => self.nowdoc(&mut state, &mut tokens, label)?,
77 }
78 }
79 StackFrame::LookingForVarname => {
83 if let Some(token) = self.looking_for_varname(&mut state)? {
84 tokens.push(token);
85 }
86 }
87 StackFrame::LookingForProperty => {
91 tokens.push(self.looking_for_property(&mut state)?);
92 }
93 StackFrame::VarOffset => {
94 if state.source.eof() {
95 break;
96 }
97
98 tokens.push(self.var_offset(&mut state)?);
99 }
100 }
101 }
102
103 tokens.push(Token {
104 kind: TokenKind::Eof,
105 span: state.source.span(),
106 value: ByteString::default(),
107 });
108
109 Ok(tokens)
110 }
111
112 fn skip_whitespace(&self, state: &mut State) {
113 while let Some(true) = state.source.current().map(|u: &u8| u.is_ascii_whitespace()) {
114 state.source.next();
115 }
116 }
117
118 fn read_and_skip_whitespace(&self, state: &mut State) -> Vec<u8> {
119 let mut buffer = Vec::new();
120 while let Some(true) = state.source.current().map(|u: &u8| u.is_ascii_whitespace()) {
121 buffer.push(*state.source.current().unwrap());
122 state.source.next();
123 }
124 buffer
125 }
126
127 fn initial(&self, state: &mut State, tokens: &mut Vec<Token>) -> SyntaxResult<()> {
128 let inline_span = state.source.span();
129 let mut buffer = Vec::new();
130 while let Some(char) = state.source.current() {
131 if state.source.at_case_insensitive(b"<?php", 5) {
132 let tag_span = state.source.span();
133
134 let tag = state.source.read_and_skip(5);
135 state.replace(StackFrame::Scripting);
136
137 if !buffer.is_empty() {
138 tokens.push(Token {
139 kind: TokenKind::InlineHtml,
140 span: inline_span,
141 value: buffer.into(),
142 });
143 }
144
145 tokens.push(Token {
146 kind: TokenKind::OpenTag(OpenTagKind::Full),
147 span: tag_span,
148 value: tag.into(),
149 });
150
151 return Ok(());
152 } else if state.source.at_case_insensitive(b"<?=", 3) {
153 let tag_span = state.source.span();
154
155 state.source.skip(3);
156 state.replace(StackFrame::Scripting);
157
158 if !buffer.is_empty() {
159 tokens.push(Token {
160 kind: TokenKind::InlineHtml,
161 span: inline_span,
162 value: buffer.into(),
163 });
164 }
165
166 tokens.push(Token {
167 kind: TokenKind::OpenTag(OpenTagKind::Echo),
168 span: tag_span,
169 value: b"<?=".into(),
170 });
171
172 return Ok(());
173 } else if state.source.at_case_insensitive(b"<?", 2) {
174 let tag_span = state.source.span();
175
176 state.source.skip(2);
177 state.replace(StackFrame::Scripting);
178
179 if !buffer.is_empty() {
180 tokens.push(Token {
181 kind: TokenKind::InlineHtml,
182 span: inline_span,
183 value: buffer.into(),
184 });
185 }
186
187 tokens.push(Token {
188 kind: TokenKind::OpenTag(OpenTagKind::Short),
189 span: tag_span,
190 value: b"<?".into(),
191 });
192
193 return Ok(());
194 }
195
196 state.source.next();
197 buffer.push(*char);
198 }
199
200 tokens.push(Token {
201 kind: TokenKind::InlineHtml,
202 span: inline_span,
203 value: buffer.into(),
204 });
205
206 Ok(())
207 }
208
209 fn scripting(&self, state: &mut State) -> SyntaxResult<Token> {
210 let span = state.source.span();
211 let (kind, value): (TokenKind, ByteString) = match state.source.read(3) {
212 [b'!', b'=', b'='] => {
213 state.source.skip(3);
214
215 (TokenKind::BangDoubleEquals, b"!==".into())
216 }
217 [b'?', b'?', b'='] => {
218 state.source.skip(3);
219 (TokenKind::DoubleQuestionEquals, b"??=".into())
220 }
221 [b'?', b'-', b'>'] => {
222 state.source.skip(3);
223 (TokenKind::QuestionArrow, b"?->".into())
224 }
225 [b'=', b'=', b'='] => {
226 state.source.skip(3);
227 (TokenKind::TripleEquals, b"===".into())
228 }
229 [b'.', b'.', b'.'] => {
230 state.source.skip(3);
231 (TokenKind::Ellipsis, b"...".into())
232 }
233 [b'`', ..] => {
234 state.source.next();
235 state.replace(StackFrame::ShellExec);
236 (TokenKind::Backtick, b"`".into())
237 }
238 [b'@', ..] => {
239 state.source.next();
240 (TokenKind::At, b"@".into())
241 }
242 [b'!', b'=', ..] => {
243 state.source.skip(2);
244 (TokenKind::BangEquals, b"!=".into())
245 }
246 [b'!', ..] => {
247 state.source.next();
248 (TokenKind::Bang, b"!".into())
249 }
250 [b'&', b'&', ..] => {
251 state.source.skip(2);
252 (TokenKind::BooleanAnd, b"&&".into())
253 }
254 [b'&', b'=', ..] => {
255 state.source.skip(2);
256 (TokenKind::AmpersandEquals, b"&=".into())
257 }
258 [b'&', ..] => {
259 state.source.next();
260 (TokenKind::Ampersand, b"&".into())
261 }
262 [b'?', b'>', ..] => {
263 state.source.skip(2);
265
266 state.replace(StackFrame::Initial);
267
268 (TokenKind::CloseTag, b"?>".into())
269 }
270 [b'?', b'?', ..] => {
271 state.source.skip(2);
272 (TokenKind::DoubleQuestion, b"??".into())
273 }
274 [b'?', b':', ..] => {
275 state.source.skip(2);
276 (TokenKind::QuestionColon, b"?:".into())
277 }
278 [b'?', ..] => {
279 state.source.next();
280 (TokenKind::Question, b"?".into())
281 }
282 [b'=', b'>', ..] => {
283 state.source.skip(2);
284 (TokenKind::DoubleArrow, b"=>".into())
285 }
286 [b'=', b'=', ..] => {
287 state.source.skip(2);
288 (TokenKind::DoubleEquals, b"==".into())
289 }
290 [b'=', ..] => {
291 state.source.next();
292 (TokenKind::Equals, b"=".into())
293 }
294 [b'\'', ..] => {
296 state.source.skip(1);
297 self.tokenize_single_quote_string(state)?
298 }
299 [b'b' | b'B', b'\'', ..] => {
300 state.source.skip(2);
301 self.tokenize_single_quote_string(state)?
302 }
303 [b'"', ..] => {
304 state.source.skip(1);
305 self.tokenize_double_quote_string(state)?
306 }
307 [b'b' | b'B', b'"', ..] => {
308 state.source.skip(2);
309 self.tokenize_double_quote_string(state)?
310 }
311 [b'$', ident_start!(), ..] => self.tokenize_variable(state),
312 [b'$', ..] => {
313 state.source.next();
314 (TokenKind::Dollar, b"$".into())
315 }
316 [b'.', b'=', ..] => {
317 state.source.skip(2);
318 (TokenKind::DotEquals, b".=".into())
319 }
320 [b'0'..=b'9', ..] => self.tokenize_number(state)?,
321 [b'.', b'0'..=b'9', ..] => self.tokenize_number(state)?,
322 [b'.', ..] => {
323 state.source.next();
324 (TokenKind::Dot, b".".into())
325 }
326 [b'\\', ident_start!(), ..] => {
327 state.source.next();
328
329 match self.scripting(state)? {
330 Token {
331 kind: TokenKind::Identifier | TokenKind::QualifiedIdentifier,
332 value,
333 ..
334 } => {
335 let mut bytes = value;
336 bytes.insert(0, b'\\');
337
338 (TokenKind::FullyQualifiedIdentifier, bytes)
339 }
340 Token {
341 kind: TokenKind::True,
342 ..
343 } => (TokenKind::FullyQualifiedIdentifier, b"\\true".into()),
344 Token {
345 kind: TokenKind::False,
346 ..
347 } => (TokenKind::FullyQualifiedIdentifier, b"\\false".into()),
348 Token {
349 kind: TokenKind::Null,
350 ..
351 } => (TokenKind::FullyQualifiedIdentifier, b"\\null".into()),
352 s => unreachable!("{:?}", s),
353 }
354 }
355 [b'\\', ..] => {
356 state.source.next();
357 (TokenKind::NamespaceSeparator, b"\\".into())
358 }
359 [b'/', b'*', ..] => {
360 state.source.next();
361 let mut buffer = vec![b'/'];
362
363 loop {
364 match state.source.read(2) {
365 [b'*', b'/'] => {
366 state.source.skip(2);
367 buffer.extend_from_slice(b"*/");
368 break;
369 }
370 &[t, ..] => {
371 state.source.next();
372 buffer.push(t);
373 }
374 _ => {
375 break;
376 }
377 }
378 }
379
380 if buffer.starts_with(b"/**") {
381 (TokenKind::DocumentComment, buffer.into())
382 } else {
383 (TokenKind::MultiLineComment, buffer.into())
384 }
385 }
386 [b'#', b'[', ..] => {
387 state.source.skip(2);
388 (TokenKind::Attribute, b"#[".into())
389 }
390 [ch @ b'/', b'/', ..] | [ch @ b'#', ..] => {
391 let mut buffer = if *ch == b'/' {
392 state.source.skip(2);
393 b"//".to_vec()
394 } else {
395 state.source.next();
396 b"#".to_vec()
397 };
398
399 while let Some(c) = state.source.current() {
400 if *c == b'\n' {
401 state.source.next();
402 break;
403 }
404
405 if state.source.read(2) == [b'?', b'>'] {
406 break;
407 }
408
409 buffer.push(*c);
410 state.source.next();
411 }
412
413 if buffer.starts_with(b"#") {
414 (TokenKind::HashMarkComment, buffer.into())
415 } else {
416 (TokenKind::SingleLineComment, buffer.into())
417 }
418 }
419 [b'/', b'=', ..] => {
420 state.source.skip(2);
421 (TokenKind::SlashEquals, b"/=".into())
422 }
423 [b'/', ..] => {
424 state.source.next();
425 (TokenKind::Slash, b"/".into())
426 }
427 [b'*', b'*', b'=', ..] => {
428 state.source.skip(3);
429 (TokenKind::PowEquals, b"**=".into())
430 }
431 [b'<', b'<', b'='] => {
432 state.source.skip(3);
433
434 (TokenKind::LeftShiftEquals, b"<<=".into())
435 }
436 [b'<', b'=', b'>'] => {
437 state.source.skip(3);
438 (TokenKind::Spaceship, b"<=>".into())
439 }
440 [b'>', b'>', b'='] => {
441 state.source.skip(3);
442 (TokenKind::RightShiftEquals, b">>=".into())
443 }
444 [b'<', b'<', b'<'] => {
445 state.source.skip(3);
446 let mut buffer = b"<<<".to_vec();
447 buffer.extend(self.read_and_skip_whitespace(state));
448
449 let doc_string_kind = match state.source.read(1) {
450 [b'\''] => {
451 buffer.push(b'\'');
452 state.source.next();
453 DocStringKind::Nowdoc
454 }
455 [b'"'] => {
456 buffer.push(b'"');
457 state.source.next();
458 DocStringKind::Heredoc
459 }
460 [_, ..] => DocStringKind::Heredoc,
461 [] => {
462 return Err(SyntaxError::UnexpectedEndOfFile(state.source.span()));
463 }
464 };
465
466 let label: ByteString = match self.peek_identifier(state) {
467 Some(_) => self.consume_identifier(state).into(),
468 None => {
469 return match state.source.current() {
470 Some(c) => {
471 Err(SyntaxError::UnexpectedCharacter(*c, state.source.span()))
472 }
473 None => Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
474 }
475 }
476 };
477
478 buffer.extend_from_slice(&label);
479
480 if doc_string_kind == DocStringKind::Nowdoc {
481 match state.source.current() {
482 Some(b'\'') => {
483 buffer.push(b'\'');
484 state.source.next();
485 }
486 _ => {
487 return Err(SyntaxError::UnexpectedCharacter(
489 *state.source.current().unwrap(),
490 state.source.span(),
491 ));
492 }
493 };
494 } else if let Some(b'"') = state.source.current() {
495 buffer.push(b'"');
496 state.source.next();
497 }
498
499 if !matches!(state.source.current(), Some(b'\n')) {
500 return Err(SyntaxError::UnexpectedCharacter(
501 *state.source.current().unwrap(),
502 state.source.span(),
503 ));
504 }
505
506 state.source.next();
507 state.replace(StackFrame::DocString(
508 doc_string_kind.clone(),
509 label.clone(),
510 DocStringIndentationKind::None,
511 0,
512 ));
513
514 (TokenKind::StartDocString(doc_string_kind), buffer.into())
515 }
516 [b'*', b'*', ..] => {
517 state.source.skip(2);
518 (TokenKind::Pow, b"**".into())
519 }
520 [b'*', b'=', ..] => {
521 state.source.skip(2);
522 (TokenKind::AsteriskEquals, b"*=".into())
523 }
524 [b'*', ..] => {
525 state.source.next();
526 (TokenKind::Asterisk, b"*".into())
527 }
528 [b'|', b'|', ..] => {
529 state.source.skip(2);
530 (TokenKind::BooleanOr, b"||".into())
531 }
532 [b'|', b'=', ..] => {
533 state.source.skip(2);
534 (TokenKind::PipeEquals, b"|=".into())
535 }
536 [b'|', ..] => {
537 state.source.next();
538 (TokenKind::Pipe, b"|".into())
539 }
540 [b'^', b'=', ..] => {
541 state.source.skip(2);
542 (TokenKind::CaretEquals, b"^=".into())
543 }
544 [b'^', ..] => {
545 state.source.next();
546 (TokenKind::Caret, b"^".into())
547 }
548 [b'{', ..] => {
549 state.source.next();
550 state.enter(StackFrame::Scripting);
551 (TokenKind::LeftBrace, b"{".into())
552 }
553 [b'}', ..] => {
554 state.source.next();
555 state.exit();
556 (TokenKind::RightBrace, b"}".into())
557 }
558 [b'(', ..] => {
559 state.source.next();
560 let mut buffer = b"(".to_vec();
561
562 while let Some(true) = state.source.current().map(|u: &u8| u.is_ascii_whitespace())
564 {
565 buffer.push(*state.source.current().unwrap());
566 state.source.next();
567 }
568
569 if state.source.at_case_insensitive(b"int", 3) {
570 if state.source.at_case_insensitive(b"integer", 7)
571 && state.source.peek_ignoring_whitespace(7, 1) == [b')']
572 {
573 buffer.extend(state.source.read_and_skip(7));
574 buffer.extend(self.read_and_skip_whitespace(state));
575 buffer.extend(state.source.read_and_skip(1));
576
577 (TokenKind::IntegerCast, buffer.into())
578 } else if state.source.peek_ignoring_whitespace(3, 1) == [b')'] {
579 buffer.extend(state.source.read_and_skip(3));
580 buffer.extend(self.read_and_skip_whitespace(state));
581 buffer.extend(state.source.read_and_skip(1));
582
583 (TokenKind::IntCast, buffer.into())
584 } else {
585 (TokenKind::LeftParen, buffer.into())
586 }
587 } else if state.source.at_case_insensitive(b"bool", 4) {
588 if state.source.at_case_insensitive(b"boolean", 7)
589 && state.source.peek_ignoring_whitespace(7, 1) == [b')']
590 {
591 buffer.extend(state.source.read_and_skip(7));
592 buffer.extend(self.read_and_skip_whitespace(state));
593 buffer.extend(state.source.read_and_skip(1));
594
595 (TokenKind::BooleanCast, buffer.into())
596 } else if state.source.peek_ignoring_whitespace(4, 1) == [b')'] {
597 buffer.extend(state.source.read_and_skip(4));
598 buffer.extend(self.read_and_skip_whitespace(state));
599 buffer.extend(state.source.read_and_skip(1));
600
601 (TokenKind::BoolCast, buffer.into())
602 } else {
603 (TokenKind::LeftParen, buffer.into())
604 }
605 } else if state.source.at_case_insensitive(b"float", 5) {
606 if state.source.peek_ignoring_whitespace(5, 1) == [b')'] {
607 buffer.extend(state.source.read_and_skip(5));
608 buffer.extend(self.read_and_skip_whitespace(state));
609 buffer.extend(state.source.read_and_skip(1));
610
611 (TokenKind::FloatCast, buffer.into())
612 } else {
613 (TokenKind::LeftParen, buffer.into())
614 }
615 } else if state.source.at_case_insensitive(b"double", 6) {
616 if state.source.peek_ignoring_whitespace(6, 1) == [b')'] {
617 buffer.extend(state.source.read_and_skip(6));
618 buffer.extend(self.read_and_skip_whitespace(state));
619 buffer.extend(state.source.read_and_skip(1));
620
621 (TokenKind::DoubleCast, buffer.into())
622 } else {
623 (TokenKind::LeftParen, buffer.into())
624 }
625 } else if state.source.at_case_insensitive(b"real", 4) {
626 if state.source.peek_ignoring_whitespace(4, 1) == [b')'] {
627 buffer.extend(state.source.read_and_skip(4));
628 buffer.extend(self.read_and_skip_whitespace(state));
629 buffer.extend(state.source.read_and_skip(1));
630
631 (TokenKind::RealCast, buffer.into())
632 } else {
633 (TokenKind::LeftParen, buffer.into())
634 }
635 } else if state.source.at_case_insensitive(b"string", 6) {
636 if state.source.peek_ignoring_whitespace(6, 1) == [b')'] {
637 buffer.extend(state.source.read_and_skip(6));
638 buffer.extend(self.read_and_skip_whitespace(state));
639 buffer.extend(state.source.read_and_skip(1));
640
641 (TokenKind::StringCast, buffer.into())
642 } else {
643 (TokenKind::LeftParen, buffer.into())
644 }
645 } else if state.source.at_case_insensitive(b"binary", 6) {
646 if state.source.peek_ignoring_whitespace(6, 1) == [b')'] {
647 buffer.extend(state.source.read_and_skip(6));
648 buffer.extend(self.read_and_skip_whitespace(state));
649 buffer.extend(state.source.read_and_skip(1));
650
651 (TokenKind::BinaryCast, buffer.into())
652 } else {
653 (TokenKind::LeftParen, buffer.into())
654 }
655 } else if state.source.at_case_insensitive(b"array", 5) {
656 if state.source.peek_ignoring_whitespace(5, 1) == [b')'] {
657 buffer.extend(state.source.read_and_skip(5));
658 buffer.extend(self.read_and_skip_whitespace(state));
659 buffer.extend(state.source.read_and_skip(1));
660
661 (TokenKind::ArrayCast, buffer.into())
662 } else {
663 (TokenKind::LeftParen, buffer.into())
664 }
665 } else if state.source.at_case_insensitive(b"object", 6) {
666 if state.source.peek_ignoring_whitespace(6, 1) == [b')'] {
667 buffer.extend(state.source.read_and_skip(6));
668 buffer.extend(self.read_and_skip_whitespace(state));
669 buffer.extend(state.source.read_and_skip(1));
670
671 (TokenKind::ObjectCast, buffer.into())
672 } else {
673 (TokenKind::LeftParen, buffer.into())
674 }
675 } else if state.source.at_case_insensitive(b"unset", 5) {
676 if state.source.peek_ignoring_whitespace(5, 1) == [b')'] {
677 buffer.extend(state.source.read_and_skip(5));
678 buffer.extend(self.read_and_skip_whitespace(state));
679 buffer.extend(state.source.read_and_skip(1));
680
681 (TokenKind::UnsetCast, buffer.into())
682 } else {
683 (TokenKind::LeftParen, buffer.into())
684 }
685 } else {
686 (TokenKind::LeftParen, buffer.into())
687 }
688 }
689 [b')', ..] => {
690 state.source.next();
691 (TokenKind::RightParen, b")".into())
692 }
693 [b';', ..] => {
694 state.source.next();
695 (TokenKind::SemiColon, b";".into())
696 }
697 [b'+', b'+', ..] => {
698 state.source.skip(2);
699 (TokenKind::Increment, b"++".into())
700 }
701 [b'+', b'=', ..] => {
702 state.source.skip(2);
703 (TokenKind::PlusEquals, b"+=".into())
704 }
705 [b'+', ..] => {
706 state.source.next();
707 (TokenKind::Plus, b"+".into())
708 }
709 [b'%', b'=', ..] => {
710 state.source.skip(2);
711 (TokenKind::PercentEquals, b"%=".into())
712 }
713 [b'%', ..] => {
714 state.source.next();
715 (TokenKind::Percent, b"%".into())
716 }
717 [b'-', b'-', ..] => {
718 state.source.skip(2);
719 (TokenKind::Decrement, b"--".into())
720 }
721 [b'-', b'>', ..] => {
722 state.source.skip(2);
723 (TokenKind::Arrow, b"->".into())
724 }
725 [b'-', b'=', ..] => {
726 state.source.skip(2);
727 (TokenKind::MinusEquals, b"-=".into())
728 }
729 [b'-', ..] => {
730 state.source.next();
731 (TokenKind::Minus, b"-".into())
732 }
733 [b'<', b'<', ..] => {
734 state.source.skip(2);
735 (TokenKind::LeftShift, b"<<".into())
736 }
737 [b'<', b'=', ..] => {
738 state.source.skip(2);
739 (TokenKind::LessThanEquals, b"<=".into())
740 }
741 [b'<', b'>', ..] => {
742 state.source.skip(2);
743 (TokenKind::AngledLeftRight, b"<>".into())
744 }
745 [b'<', ..] => {
746 state.source.next();
747 (TokenKind::LessThan, b"<".into())
748 }
749 [b'>', b'>', ..] => {
750 state.source.skip(2);
751 (TokenKind::RightShift, b">>".into())
752 }
753 [b'>', b'=', ..] => {
754 state.source.skip(2);
755 (TokenKind::GreaterThanEquals, b">=".into())
756 }
757 [b'>', ..] => {
758 state.source.next();
759 (TokenKind::GreaterThan, b">".into())
760 }
761 [b',', ..] => {
762 state.source.next();
763 (TokenKind::Comma, b",".into())
764 }
765 [b'[', ..] => {
766 state.source.next();
767 (TokenKind::LeftBracket, b"[".into())
768 }
769 [b']', ..] => {
770 state.source.next();
771 (TokenKind::RightBracket, b"]".into())
772 }
773 [b':', b':', ..] => {
774 state.source.skip(2);
775 (TokenKind::DoubleColon, b"::".into())
776 }
777 [b':', ..] => {
778 state.source.next();
779 (TokenKind::Colon, b":".into())
780 }
781 [b'~', ..] => {
782 state.source.next();
783 (TokenKind::BitwiseNot, b"~".into())
784 }
785 [b @ ident_start!(), ..] => {
786 state.source.next();
787 let mut qualified = false;
788 let mut last_was_slash = false;
789
790 let mut buffer = vec![*b];
791 while let Some(next @ ident!() | next @ b'\\') = state.source.current() {
792 if matches!(next, ident!()) {
793 buffer.push(*next);
794 state.source.next();
795 last_was_slash = false;
796 continue;
797 }
798
799 if *next == b'\\' && !last_was_slash {
800 qualified = true;
801 last_was_slash = true;
802 buffer.push(*next);
803 state.source.next();
804 continue;
805 }
806
807 break;
808 }
809
810 if qualified {
811 (TokenKind::QualifiedIdentifier, buffer.into())
812 } else {
813 let kind = identifier_to_keyword(&buffer).unwrap_or(TokenKind::Identifier);
814
815 if kind == TokenKind::HaltCompiler {
816 match state.source.read(3) {
817 [b'(', b')', b';'] => {
818 state.source.skip(3);
819 state.replace(StackFrame::Halted);
820 }
821 _ => return Err(SyntaxError::InvalidHaltCompiler(state.source.span())),
822 }
823 }
824
825 (kind, buffer.into())
826 }
827 }
828 [b, ..] => unimplemented!(
829 "<scripting> char: {}, line: {}, col: {}",
830 *b as char,
831 state.source.span().line,
832 state.source.span().column
833 ),
834 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
837 };
838
839 Ok(Token { kind, span, value })
840 }
841
842 fn double_quote(&self, state: &mut State, tokens: &mut Vec<Token>) -> SyntaxResult<()> {
843 let span = state.source.span();
844 let mut buffer = Vec::new();
845 let (kind, value) = loop {
846 match state.source.read(3) {
847 [b'$', b'{', ..] => {
848 state.source.skip(2);
849 state.enter(StackFrame::LookingForVarname);
850 break (TokenKind::DollarLeftBrace, b"${".into());
851 }
852 [b'{', b'$', ..] => {
853 state.source.next();
855 state.enter(StackFrame::Scripting);
856 break (TokenKind::LeftBrace, b"{".into());
857 }
858 [b'"', ..] => {
859 state.source.next();
860 state.replace(StackFrame::Scripting);
861 break (TokenKind::DoubleQuote, b'"'.into());
862 }
863 &[b'\\', b @ (b'"' | b'\\' | b'$'), ..] => {
864 state.source.skip(2);
865 buffer.push(b);
866 }
867 &[b'\\', b'n', ..] => {
868 state.source.skip(2);
869 buffer.push(b'\n');
870 }
871 &[b'\\', b'r', ..] => {
872 state.source.skip(2);
873 buffer.push(b'\r');
874 }
875 &[b'\\', b't', ..] => {
876 state.source.skip(2);
877 buffer.push(b'\t');
878 }
879 &[b'\\', b'v', ..] => {
880 state.source.skip(2);
881 buffer.push(b'\x0b');
882 }
883 &[b'\\', b'e', ..] => {
884 state.source.skip(2);
885 buffer.push(b'\x1b');
886 }
887 &[b'\\', b'f', ..] => {
888 state.source.skip(2);
889 buffer.push(b'\x0c');
890 }
891 &[b'\\', b'x', b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')] => {
892 state.source.skip(3);
893
894 let mut hex = String::from(b as char);
895 if let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
896 state.source.current()
897 {
898 state.source.next();
899 hex.push(*b as char);
900 }
901
902 let b = u8::from_str_radix(&hex, 16).unwrap();
903 buffer.push(b);
904 }
905 &[b'\\', b'u', b'{'] => {
906 state.source.skip(3);
907
908 let mut code_point = String::new();
909 while let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
910 state.source.current()
911 {
912 state.source.next();
913 code_point.push(*b as char);
914 }
915
916 if code_point.is_empty() || state.source.current() != Some(&b'}') {
917 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
918 }
919 state.source.next();
920
921 let c = if let Ok(c) = u32::from_str_radix(&code_point, 16) {
922 c
923 } else {
924 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
925 };
926
927 if let Some(c) = char::from_u32(c) {
928 let mut tmp = [0; 4];
929 let bytes = c.encode_utf8(&mut tmp);
930 buffer.extend(bytes.as_bytes());
931 } else {
932 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
933 }
934 }
935 &[b'\\', b @ b'0'..=b'7', ..] => {
936 state.source.skip(2);
937
938 let mut octal = String::from(b as char);
939 if let Some(b @ b'0'..=b'7') = state.source.current() {
940 state.source.next();
941 octal.push(*b as char);
942 }
943 if let Some(b @ b'0'..=b'7') = state.source.current() {
944 state.source.next();
945 octal.push(*b as char);
946 }
947
948 if let Ok(b) = u8::from_str_radix(&octal, 8) {
949 buffer.push(b);
950 } else {
951 return Err(SyntaxError::InvalidOctalEscape(state.source.span()));
952 }
953 }
954 [b'$', ident_start!(), ..] => {
955 let mut var = state.source.read_and_skip(1).to_vec();
956 var.extend(self.consume_identifier(state));
957
958 match state.source.read(4) {
959 [b'[', ..] => state.enter(StackFrame::VarOffset),
960 [b'-', b'>', ident_start!(), ..] | [b'?', b'-', b'>', ident_start!()] => {
961 state.enter(StackFrame::LookingForProperty)
962 }
963 _ => {}
964 }
965
966 break (TokenKind::Variable, var.into());
967 }
968 &[b, ..] => {
969 state.source.next();
970 buffer.push(b);
971 }
972 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
973 }
974 };
975
976 if !buffer.is_empty() {
977 tokens.push(Token {
978 kind: TokenKind::StringPart,
979 span,
980 value: buffer.into(),
981 })
982 }
983
984 tokens.push(Token { kind, span, value });
985 Ok(())
986 }
987
988 fn shell_exec(&self, state: &mut State, tokens: &mut Vec<Token>) -> SyntaxResult<()> {
989 let span = state.source.span();
990 let mut buffer = Vec::new();
991 let (kind, value) = loop {
992 match state.source.read(2) {
993 [b'$', b'{'] => {
994 state.source.skip(2);
995 state.enter(StackFrame::LookingForVarname);
996 break (TokenKind::DollarLeftBrace, b"${".into());
997 }
998 [b'{', b'$'] => {
999 state.source.next();
1001 state.enter(StackFrame::Scripting);
1002 break (TokenKind::LeftBrace, b"{".into());
1003 }
1004 [b'`', ..] => {
1005 state.source.next();
1006 state.replace(StackFrame::Scripting);
1007 break (TokenKind::Backtick, b"`".into());
1008 }
1009 [b'$', ident_start!()] => {
1010 let mut var = state.source.read_and_skip(1).to_vec();
1011 var.extend(self.consume_identifier(state));
1012
1013 match state.source.read(4) {
1014 [b'[', ..] => state.enter(StackFrame::VarOffset),
1015 [b'-', b'>', ident_start!(), ..] | [b'?', b'-', b'>', ident_start!()] => {
1016 state.enter(StackFrame::LookingForProperty)
1017 }
1018 _ => {}
1019 }
1020
1021 break (TokenKind::Variable, var.into());
1022 }
1023 &[b, ..] => {
1024 state.source.next();
1025 buffer.push(b);
1026 }
1027 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
1028 }
1029 };
1030
1031 if !buffer.is_empty() {
1032 tokens.push(Token {
1033 kind: TokenKind::StringPart,
1034 span,
1035 value: buffer.into(),
1036 })
1037 }
1038
1039 tokens.push(Token { kind, span, value });
1040
1041 Ok(())
1042 }
1043
1044 fn heredoc(
1045 &self,
1046 state: &mut State,
1047 tokens: &mut Vec<Token>,
1048 label: ByteString,
1049 ) -> SyntaxResult<()> {
1050 let span = state.source.span();
1051 let mut buffer: Vec<u8> = Vec::new();
1052
1053 let (kind, value) = loop {
1054 match state.source.read(3) {
1055 [b'$', b'{', ..] => {
1056 state.source.skip(2);
1057 state.enter(StackFrame::LookingForVarname);
1058 break (TokenKind::DollarLeftBrace, b"${".into());
1059 }
1060 [b'{', b'$', ..] => {
1061 state.source.next();
1063 state.enter(StackFrame::Scripting);
1064 break (TokenKind::LeftBrace, b"{".into());
1065 }
1066 &[b'\\', b @ (b'"' | b'\\' | b'$'), ..] => {
1067 state.source.skip(2);
1068 buffer.push(b);
1069 }
1070 &[b'\\', b'n', ..] => {
1071 state.source.skip(2);
1072 buffer.push(b'\n');
1073 }
1074 &[b'\\', b'r', ..] => {
1075 state.source.skip(2);
1076 buffer.push(b'\r');
1077 }
1078 &[b'\\', b't', ..] => {
1079 state.source.skip(2);
1080 buffer.push(b'\t');
1081 }
1082 &[b'\\', b'v', ..] => {
1083 state.source.skip(2);
1084 buffer.push(b'\x0b');
1085 }
1086 &[b'\\', b'e', ..] => {
1087 state.source.skip(2);
1088 buffer.push(b'\x1b');
1089 }
1090 &[b'\\', b'f', ..] => {
1091 state.source.skip(2);
1092 buffer.push(b'\x0c');
1093 }
1094 &[b'\\', b'x', b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')] => {
1095 state.source.skip(3);
1096
1097 let mut hex = String::from(b as char);
1098 if let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
1099 state.source.current()
1100 {
1101 state.source.next();
1102 hex.push(*b as char);
1103 }
1104
1105 let b = u8::from_str_radix(&hex, 16).unwrap();
1106 buffer.push(b);
1107 }
1108 &[b'\\', b'u', b'{'] => {
1109 state.source.skip(3);
1110
1111 let mut code_point = String::new();
1112 while let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
1113 state.source.current()
1114 {
1115 state.source.next();
1116 code_point.push(*b as char);
1117 }
1118
1119 if code_point.is_empty() || state.source.current() != Some(&b'}') {
1120 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
1121 }
1122 state.source.next();
1123
1124 let c = if let Ok(c) = u32::from_str_radix(&code_point, 16) {
1125 c
1126 } else {
1127 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
1128 };
1129
1130 if let Some(c) = char::from_u32(c) {
1131 let mut tmp = [0; 4];
1132 let bytes = c.encode_utf8(&mut tmp);
1133 buffer.extend(bytes.as_bytes());
1134 } else {
1135 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
1136 }
1137 }
1138 &[b'\\', b @ b'0'..=b'7', ..] => {
1139 state.source.skip(2);
1140
1141 let mut octal = String::from(b as char);
1142 if let Some(b @ b'0'..=b'7') = state.source.current() {
1143 state.source.next();
1144 octal.push(*b as char);
1145 }
1146 if let Some(b @ b'0'..=b'7') = state.source.current() {
1147 state.source.next();
1148 octal.push(*b as char);
1149 }
1150
1151 if let Ok(b) = u8::from_str_radix(&octal, 8) {
1152 buffer.push(b);
1153 } else {
1154 return Err(SyntaxError::InvalidOctalEscape(state.source.span()));
1155 }
1156 }
1157 [b'$', ident_start!(), ..] => {
1158 let mut var = state.source.read_and_skip(1).to_vec();
1159 var.extend(self.consume_identifier(state));
1160
1161 match state.source.read(4) {
1162 [b'[', ..] => state.enter(StackFrame::VarOffset),
1163 [b'-', b'>', ident_start!(), ..] | [b'?', b'-', b'>', ident_start!()] => {
1164 state.enter(StackFrame::LookingForProperty)
1165 }
1166 _ => {}
1167 }
1168
1169 break (TokenKind::Variable, var.into());
1170 }
1171 [b'\n', ..] => {
1173 buffer.push(b'\n');
1174 state.source.next();
1175
1176 if state.source.at(&label, label.len()) {
1178 state.source.skip(label.len());
1179 state.replace(StackFrame::Scripting);
1180 break (
1181 TokenKind::EndDocString(DocStringIndentationKind::None, 0),
1182 label,
1183 );
1184 }
1185
1186 let (whitespace_kind, whitespace_amount) = match state.source.read(1) {
1188 [b' '] => {
1189 let mut amount = 0;
1190 while state.source.read(1) == [b' '] {
1191 amount += 1;
1192 state.source.next();
1193 }
1194 (DocStringIndentationKind::Space, amount)
1195 }
1196 [b'\t'] => {
1197 let mut amount = 0;
1198 while state.source.read(1) == [b'\t'] {
1199 amount += 1;
1200 state.source.next();
1201 }
1202 (DocStringIndentationKind::Tab, amount)
1203 }
1204 _ => (DocStringIndentationKind::None, 0),
1205 };
1206
1207 let mut extra_whitespace_buffer = Vec::new();
1211 while let [b @ b' ' | b @ b'\t'] = state.source.read(1) {
1212 extra_whitespace_buffer.push(b);
1213 state.source.next();
1214 }
1215
1216 if state.source.at(&label, label.len()) {
1219 if whitespace_kind != DocStringIndentationKind::None
1222 && !extra_whitespace_buffer.is_empty()
1223 {
1224 return Err(SyntaxError::InvalidDocIndentation(state.source.span()));
1225 }
1226
1227 state.source.skip(label.len());
1231 state.replace(StackFrame::Scripting);
1232 break (
1233 TokenKind::EndDocString(whitespace_kind, whitespace_amount),
1234 label,
1235 );
1236 } else {
1237 if whitespace_kind != DocStringIndentationKind::None {
1241 let whitespace_char: u8 = whitespace_kind.into();
1242 for _ in 0..whitespace_amount {
1243 buffer.push(whitespace_char);
1244 }
1245 }
1246
1247 buffer.extend(extra_whitespace_buffer);
1248 }
1249 }
1250 &[b, ..] => {
1251 state.source.next();
1252 buffer.push(b);
1253 }
1254 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
1255 }
1256 };
1257
1258 if buffer.last() == Some(&b'\n') {
1260 buffer.pop();
1261 }
1262
1263 if !buffer.is_empty() {
1264 tokens.push(Token {
1265 kind: TokenKind::StringPart,
1266 span,
1267 value: buffer.into(),
1268 })
1269 }
1270
1271 tokens.push(Token { kind, span, value });
1272
1273 Ok(())
1274 }
1275
1276 fn nowdoc(
1277 &self,
1278 state: &mut State,
1279 tokens: &mut Vec<Token>,
1280 label: ByteString,
1281 ) -> SyntaxResult<()> {
1282 let span = state.source.span();
1283 let mut buffer: Vec<u8> = Vec::new();
1284
1285 let (kind, value) = loop {
1286 match state.source.read(3) {
1287 [b'\n', ..] => {
1289 buffer.push(b'\n');
1290 state.source.next();
1291
1292 if state.source.at(&label, label.len()) {
1294 state.source.skip(label.len());
1295 state.replace(StackFrame::Scripting);
1296 break (
1297 TokenKind::EndDocString(DocStringIndentationKind::None, 0),
1298 label,
1299 );
1300 }
1301
1302 let (whitespace_kind, whitespace_amount) = match state.source.read(1) {
1304 [b' '] => {
1305 let mut amount = 0;
1306 while state.source.read(1) == [b' '] {
1307 amount += 1;
1308 state.source.next();
1309 }
1310 (DocStringIndentationKind::Space, amount)
1311 }
1312 [b'\t'] => {
1313 let mut amount = 0;
1314 while state.source.read(1) == [b'\t'] {
1315 amount += 1;
1316 state.source.next();
1317 }
1318 (DocStringIndentationKind::Tab, amount)
1319 }
1320 _ => (DocStringIndentationKind::None, 0),
1321 };
1322
1323 let mut extra_whitespace_buffer = Vec::new();
1327 while let [b @ b' ' | b @ b'\t'] = state.source.read(1) {
1328 extra_whitespace_buffer.push(b);
1329 state.source.next();
1330 }
1331
1332 if state.source.at(&label, label.len()) {
1335 if whitespace_kind != DocStringIndentationKind::None
1338 && !extra_whitespace_buffer.is_empty()
1339 {
1340 return Err(SyntaxError::InvalidDocIndentation(state.source.span()));
1341 }
1342
1343 state.source.skip(label.len());
1347 state.replace(StackFrame::Scripting);
1348 break (
1349 TokenKind::EndDocString(whitespace_kind, whitespace_amount),
1350 label,
1351 );
1352 } else {
1353 if whitespace_kind != DocStringIndentationKind::None {
1357 let whitespace_char: u8 = whitespace_kind.into();
1358 for _ in 0..whitespace_amount {
1359 buffer.push(whitespace_char);
1360 }
1361 }
1362
1363 buffer.extend(extra_whitespace_buffer);
1364 }
1365 }
1366 &[b, ..] => {
1367 state.source.next();
1368 buffer.push(b);
1369 }
1370 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
1371 }
1372 };
1373
1374 if buffer.last() == Some(&b'\n') {
1376 buffer.pop();
1377 }
1378
1379 if !buffer.is_empty() {
1380 tokens.push(Token {
1381 kind: TokenKind::StringPart,
1382 span,
1383 value: buffer.into(),
1384 })
1385 }
1386
1387 tokens.push(Token { kind, span, value });
1388
1389 Ok(())
1390 }
1391
1392 fn looking_for_varname(&self, state: &mut State) -> SyntaxResult<Option<Token>> {
1393 let identifier = self.peek_identifier(state);
1394
1395 if let Some(ident) = identifier {
1396 if let [b'[' | b'}'] = state.source.peek(ident.len(), 1) {
1397 let ident = ident.to_vec();
1398 let span = state.source.span();
1399 state.source.skip(ident.len());
1400 state.replace(StackFrame::Scripting);
1401 return Ok(Some(Token {
1402 kind: TokenKind::Identifier,
1403 span,
1404 value: ident.into(),
1405 }));
1406 }
1407 }
1408
1409 state.replace(StackFrame::Scripting);
1410
1411 Ok(None)
1412 }
1413
1414 fn looking_for_property(&self, state: &mut State) -> SyntaxResult<Token> {
1415 let span = state.source.span();
1416 let (kind, value) = match state.source.read(3) {
1417 [b'?', b'-', b'>'] => {
1418 state.source.skip(3);
1419 (TokenKind::QuestionArrow, b"?->".into())
1420 }
1421 [b'-', b'>', ..] => {
1422 state.source.skip(2);
1423 (TokenKind::Arrow, b"->".into())
1424 }
1425 &[ident_start!(), ..] => {
1426 let buffer = self.consume_identifier(state);
1427 state.exit();
1428 (TokenKind::Identifier, buffer.into())
1429 }
1430 _ => unreachable!(),
1432 };
1433
1434 Ok(Token { kind, span, value })
1435 }
1436
1437 fn var_offset(&self, state: &mut State) -> SyntaxResult<Token> {
1438 let span = state.source.span();
1439 let (kind, value) = match state.source.read(2) {
1440 [b'$', ident_start!()] => self.tokenize_variable(state),
1441 [b'0'..=b'9', ..] => {
1442 self.tokenize_number(state)?
1446 }
1447 [b'[', ..] => {
1448 state.source.next();
1449 (TokenKind::LeftBracket, b"[".into())
1450 }
1451 [b'-', ..] => {
1452 state.source.next();
1453 (TokenKind::Minus, b"-".into())
1454 }
1455 [b']', ..] => {
1456 state.source.next();
1457 state.exit();
1458 (TokenKind::RightBracket, b"]".into())
1459 }
1460 &[ident_start!(), ..] => {
1461 let label = self.consume_identifier(state);
1462 (TokenKind::Identifier, label.into())
1463 }
1464 &[b, ..] => return Err(SyntaxError::UnrecognisedToken(b, state.source.span())),
1465 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
1466 };
1467 Ok(Token { kind, span, value })
1468 }
1469
1470 fn tokenize_single_quote_string(
1471 &self,
1472 state: &mut State,
1473 ) -> SyntaxResult<(TokenKind, ByteString)> {
1474 let mut buffer = vec![];
1475
1476 loop {
1477 match state.source.read(2) {
1478 [b'\'', ..] => {
1479 state.source.next();
1480 break;
1481 }
1482 &[b'\\', b @ b'\'' | b @ b'\\'] => {
1483 state.source.skip(2);
1484 buffer.push(b);
1485 }
1486 &[b, ..] => {
1487 state.source.next();
1488 buffer.push(b);
1489 }
1490 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
1491 }
1492 }
1493
1494 Ok((TokenKind::LiteralSingleQuotedString, buffer.into()))
1495 }
1496
1497 fn tokenize_double_quote_string(
1498 &self,
1499 state: &mut State,
1500 ) -> SyntaxResult<(TokenKind, ByteString)> {
1501 let mut buffer = vec![];
1502
1503 let constant = loop {
1504 match state.source.read(3) {
1505 [b'"', ..] => {
1506 state.source.next();
1507 break true;
1508 }
1509 &[b'\\', b @ (b'"' | b'\\' | b'$'), ..] => {
1510 state.source.skip(2);
1511 buffer.push(b);
1512 }
1513 &[b'\\', b'n', ..] => {
1514 state.source.skip(2);
1515 buffer.push(b'\n');
1516 }
1517 &[b'\\', b'r', ..] => {
1518 state.source.skip(2);
1519 buffer.push(b'\r');
1520 }
1521 &[b'\\', b't', ..] => {
1522 state.source.skip(2);
1523 buffer.push(b'\t');
1524 }
1525 &[b'\\', b'v', ..] => {
1526 state.source.skip(2);
1527 buffer.push(b'\x0b');
1528 }
1529 &[b'\\', b'e', ..] => {
1530 state.source.skip(2);
1531 buffer.push(b'\x1b');
1532 }
1533 &[b'\\', b'f', ..] => {
1534 state.source.skip(2);
1535 buffer.push(b'\x0c');
1536 }
1537 &[b'\\', b'x', b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')] => {
1538 state.source.skip(3);
1539
1540 let mut hex = String::from(b as char);
1541 if let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
1542 state.source.current()
1543 {
1544 state.source.next();
1545 hex.push(*b as char);
1546 }
1547
1548 let b = u8::from_str_radix(&hex, 16).unwrap();
1549 buffer.push(b);
1550 }
1551 &[b'\\', b'u', b'{'] => {
1552 state.source.skip(3);
1553
1554 let mut code_point = String::new();
1555 while let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
1556 state.source.current()
1557 {
1558 state.source.next();
1559 code_point.push(*b as char);
1560 }
1561
1562 if code_point.is_empty() || state.source.current() != Some(&b'}') {
1563 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
1564 }
1565 state.source.next();
1566
1567 let c = if let Ok(c) = u32::from_str_radix(&code_point, 16) {
1568 c
1569 } else {
1570 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
1571 };
1572
1573 if let Some(c) = char::from_u32(c) {
1574 let mut tmp = [0; 4];
1575 let bytes = c.encode_utf8(&mut tmp);
1576 buffer.extend(bytes.as_bytes());
1577 } else {
1578 return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
1579 }
1580 }
1581 &[b'\\', b @ b'0'..=b'7', ..] => {
1582 state.source.skip(2);
1583
1584 let mut octal = String::from(b as char);
1585 if let Some(b @ b'0'..=b'7') = state.source.current() {
1586 state.source.next();
1587 octal.push(*b as char);
1588 }
1589
1590 if let Some(b @ b'0'..=b'7') = state.source.current() {
1591 state.source.next();
1592 octal.push(*b as char);
1593 }
1594
1595 if let Ok(b) = u8::from_str_radix(&octal, 8) {
1596 buffer.push(b);
1597 } else {
1598 return Err(SyntaxError::InvalidOctalEscape(state.source.span()));
1599 }
1600 }
1601 [b'$', ident_start!(), ..] | [b'{', b'$', ..] | [b'$', b'{', ..] => {
1602 break false;
1603 }
1604 &[b, ..] => {
1605 state.source.next();
1606 buffer.push(b);
1607 }
1608 [] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
1609 }
1610 };
1611
1612 Ok(if constant {
1613 (TokenKind::LiteralDoubleQuotedString, buffer.into())
1614 } else {
1615 state.replace(StackFrame::DoubleQuote);
1616 (TokenKind::StringPart, buffer.into())
1617 })
1618 }
1619
1620 fn peek_identifier<'a>(&'a self, state: &'a State) -> Option<&'a [u8]> {
1621 let mut size = 0;
1622
1623 if let [ident_start!()] = state.source.read(1) {
1624 size += 1;
1625 while let [ident!()] = state.source.peek(size, 1) {
1626 size += 1;
1627 }
1628
1629 Some(state.source.read(size))
1630 } else {
1631 None
1632 }
1633 }
1634
1635 fn consume_identifier(&self, state: &mut State) -> Vec<u8> {
1636 let ident = self.peek_identifier(state).unwrap().to_vec();
1637 state.source.skip(ident.len());
1638
1639 ident
1640 }
1641
1642 fn tokenize_variable(&self, state: &mut State) -> (TokenKind, ByteString) {
1643 let mut var = state.source.read_and_skip(1).to_vec();
1644 var.extend(self.consume_identifier(state));
1645 (TokenKind::Variable, var.into())
1646 }
1647
1648 fn tokenize_number(&self, state: &mut State) -> SyntaxResult<(TokenKind, ByteString)> {
1649 let mut buffer = Vec::new();
1650
1651 let (base, kind) = match state.source.read(2) {
1652 [a @ b'0', b @ b'B' | b @ b'b'] => {
1653 buffer.push(*a);
1654 buffer.push(*b);
1655 state.source.skip(2);
1656 (2, NumberKind::Int)
1657 }
1658 [a @ b'0', b @ b'O' | b @ b'o'] => {
1659 buffer.push(*a);
1660 buffer.push(*b);
1661 state.source.skip(2);
1662 (8, NumberKind::Int)
1663 }
1664 [a @ b'0', b @ b'X' | b @ b'x'] => {
1665 buffer.push(*a);
1666 buffer.push(*b);
1667 state.source.skip(2);
1668 (16, NumberKind::Int)
1669 }
1670 [b'0', ..] => (10, NumberKind::OctalOrFloat),
1671 [b'.', ..] => (10, NumberKind::Float),
1672 _ => (10, NumberKind::IntOrFloat),
1673 };
1674
1675 if kind != NumberKind::Float {
1676 self.read_digits(state, &mut buffer, base);
1677 if kind == NumberKind::Int {
1678 return parse_int(&buffer);
1679 }
1680 }
1681
1682 let is_float = matches!(
1684 state.source.read(3),
1685 [b'.', ..] | [b'e' | b'E', b'-' | b'+', b'0'..=b'9'] | [b'e' | b'E', b'0'..=b'9', ..]
1686 );
1687
1688 if !is_float {
1689 return parse_int(&buffer);
1690 }
1691
1692 if let Some(b'.') = state.source.current() {
1693 buffer.push(b'.');
1694 state.source.next();
1695 self.read_digits(state, &mut buffer, 10);
1696 }
1697
1698 if let Some(b'e' | b'E') = state.source.current() {
1699 buffer.push(b'e');
1700 state.source.next();
1701 if let Some(b @ (b'-' | b'+')) = state.source.current() {
1702 buffer.push(*b);
1703 state.source.next();
1704 }
1705 self.read_digits(state, &mut buffer, 10);
1706 }
1707
1708 Ok((TokenKind::LiteralFloat, buffer.into()))
1709 }
1710
1711 fn read_digits(&self, state: &mut State, buffer: &mut Vec<u8>, base: usize) {
1712 if base == 16 {
1713 self.read_digits_fn(state, buffer, u8::is_ascii_hexdigit);
1714 } else {
1715 let max = b'0' + base as u8;
1716 self.read_digits_fn(state, buffer, |b| (b'0'..max).contains(b));
1717 };
1718 }
1719
1720 fn read_digits_fn<F: Fn(&u8) -> bool>(
1721 &self,
1722 state: &mut State,
1723 buffer: &mut Vec<u8>,
1724 is_digit: F,
1725 ) {
1726 if let Some(b) = state.source.current() {
1727 if is_digit(b) {
1728 state.source.next();
1729 buffer.push(*b);
1730 } else {
1731 return;
1732 }
1733 }
1734
1735 loop {
1736 match state.source.read(2) {
1737 [b, ..] if is_digit(b) => {
1738 state.source.next();
1739 buffer.push(*b);
1740 }
1741 [b'_', b] if is_digit(b) => {
1742 state.source.next();
1743 state.source.next();
1744 buffer.push(*b);
1745 }
1746 _ => {
1747 break;
1748 }
1749 }
1750 }
1751 }
1752}
1753
1754fn parse_int(buffer: &[u8]) -> SyntaxResult<(TokenKind, ByteString)> {
1757 Ok((TokenKind::LiteralInteger, buffer.into()))
1758}
1759
1760#[inline(always)]
1761fn identifier_to_keyword(ident: &[u8]) -> Option<TokenKind> {
1762 Some(match ident.to_ascii_lowercase().as_slice() {
1763 b"eval" => TokenKind::Eval,
1764 b"die" => TokenKind::Die,
1765 b"empty" => TokenKind::Empty,
1766 b"isset" => TokenKind::Isset,
1767 b"unset" => TokenKind::Unset,
1768 b"exit" => TokenKind::Exit,
1769 b"enddeclare" => TokenKind::EndDeclare,
1770 b"endswitch" => TokenKind::EndSwitch,
1771 b"endfor" => TokenKind::EndFor,
1772 b"endwhile" => TokenKind::EndWhile,
1773 b"endforeach" => TokenKind::EndForeach,
1774 b"endif" => TokenKind::EndIf,
1775 b"from" => TokenKind::From,
1776 b"and" => TokenKind::LogicalAnd,
1777 b"or" => TokenKind::LogicalOr,
1778 b"xor" => TokenKind::LogicalXor,
1779 b"print" => TokenKind::Print,
1780 b"__halt_compiler" => TokenKind::HaltCompiler,
1781 b"readonly" => TokenKind::Readonly,
1782 b"global" => TokenKind::Global,
1783 b"match" => TokenKind::Match,
1784 b"abstract" => TokenKind::Abstract,
1785 b"array" => TokenKind::Array,
1786 b"as" => TokenKind::As,
1787 b"break" => TokenKind::Break,
1788 b"case" => TokenKind::Case,
1789 b"catch" => TokenKind::Catch,
1790 b"class" => TokenKind::Class,
1791 b"clone" => TokenKind::Clone,
1792 b"continue" => TokenKind::Continue,
1793 b"const" => TokenKind::Const,
1794 b"declare" => TokenKind::Declare,
1795 b"default" => TokenKind::Default,
1796 b"do" => TokenKind::Do,
1797 b"echo" => TokenKind::Echo,
1798 b"else" => TokenKind::Else,
1799 b"elseif" => TokenKind::ElseIf,
1800 b"enum" => TokenKind::Enum,
1801 b"extends" => TokenKind::Extends,
1802 b"false" => TokenKind::False,
1803 b"final" => TokenKind::Final,
1804 b"finally" => TokenKind::Finally,
1805 b"fn" => TokenKind::Fn,
1806 b"for" => TokenKind::For,
1807 b"foreach" => TokenKind::Foreach,
1808 b"function" => TokenKind::Function,
1809 b"goto" => TokenKind::Goto,
1810 b"if" => TokenKind::If,
1811 b"include" => TokenKind::Include,
1812 b"include_once" => TokenKind::IncludeOnce,
1813 b"implements" => TokenKind::Implements,
1814 b"interface" => TokenKind::Interface,
1815 b"instanceof" => TokenKind::Instanceof,
1816 b"namespace" => TokenKind::Namespace,
1817 b"new" => TokenKind::New,
1818 b"null" => TokenKind::Null,
1819 b"private" => TokenKind::Private,
1820 b"protected" => TokenKind::Protected,
1821 b"public" => TokenKind::Public,
1822 b"require" => TokenKind::Require,
1823 b"require_once" => TokenKind::RequireOnce,
1824 b"return" => TokenKind::Return,
1825 b"static" => TokenKind::Static,
1826 b"switch" => TokenKind::Switch,
1827 b"throw" => TokenKind::Throw,
1828 b"trait" => TokenKind::Trait,
1829 b"true" => TokenKind::True,
1830 b"try" => TokenKind::Try,
1831 b"use" => TokenKind::Use,
1832 b"var" => TokenKind::Var,
1833 b"yield" => TokenKind::Yield,
1834 b"__dir__" => TokenKind::DirConstant,
1835 b"__file__" => TokenKind::FileConstant,
1836 b"__line__" => TokenKind::LineConstant,
1837 b"__function__" => TokenKind::FunctionConstant,
1838 b"__class__" => TokenKind::ClassConstant,
1839 b"__method__" => TokenKind::MethodConstant,
1840 b"__trait__" => TokenKind::TraitConstant,
1841 b"__namespace__" => TokenKind::NamespaceConstant,
1842 b"__compiler_halt_offset__" => TokenKind::CompilerHaltOffsetConstant,
1843 b"while" => TokenKind::While,
1844 b"insteadof" => TokenKind::Insteadof,
1845 b"list" => TokenKind::List,
1846 b"self" => TokenKind::Self_,
1847 b"parent" => TokenKind::Parent,
1848 _ => return None,
1849 })
1850}
1851
1852#[derive(Debug, Eq, PartialEq)]
1853enum NumberKind {
1854 Int,
1855 Float,
1856 IntOrFloat,
1857 OctalOrFloat,
1858}