Skip to main content

mago_syntax/token/
mod.rs

1use serde::Serialize;
2use strum::Display;
3
4use mago_database::file::FileId;
5use mago_span::HasPosition;
6use mago_span::Position;
7use mago_span::Span;
8
9use crate::T;
10
11#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
12#[serde(tag = "type", content = "value")]
13pub enum DocumentKind {
14    Heredoc,
15    Nowdoc,
16}
17
18#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
19#[serde(tag = "type", content = "value")]
20pub enum Associativity {
21    NonAssociative,
22    Left,
23    Right,
24}
25
26#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
27#[serde(tag = "type", content = "value")]
28pub enum Precedence {
29    Lowest,
30    Print,
31    YieldFrom,
32    Yield,
33    KeyOr,
34    KeyXor,
35    KeyAnd,
36    Assignment,
37    ElvisOrConditional,
38    NullCoalesce,
39    Or,
40    And,
41    BitwiseOr,
42    BitwiseXor,
43    BitwiseAnd,
44    Equality,
45    Comparison,
46    // NOTE(azjezz): the RFC does not really specify the precedence of the `|>` operator
47    // clearly, the current precedence position handles the examples shown in the RFC,
48    // but will need to be verified with the actual implementation once its merged into php-src.
49    //
50    // RFC: https://wiki.php.net/rfc/pipe-operator-v3
51    // PR: https://github.com/php/php-src/pull/17118
52    Pipe,
53    Concat,
54    BitShift,
55    AddSub,
56    MulDivMod,
57    Unary,
58    Instanceof,
59    ErrorControl,
60    Pow,
61    Clone,
62    IncDec,
63    Reference,
64    CallDim,
65    New,
66    ArrayDim,
67    ObjectAccess,
68    Highest,
69}
70
71pub trait GetPrecedence {
72    fn precedence(&self) -> Precedence;
73}
74
75#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
76#[serde(tag = "type", content = "value")]
77pub enum TokenKind {
78    Whitespace,                  // ` `
79    Eval,                        // `eval`
80    Die,                         // `die`
81    Self_,                       // `self`
82    Parent,                      // `parent`
83    Backtick,                    // `` ` ``
84    DocumentStart(DocumentKind), // `<<<abc`, or `<<<'abc'`
85    DocumentEnd,                 // `abc`
86    From,                        // `from`
87    Print,                       // `print`
88    Dollar,                      // `$`
89    HaltCompiler,                // `__halt_compiler`
90    Readonly,                    // `readonly`
91    Global,                      // `global`
92    Abstract,                    // `abstract`
93    Ampersand,                   // `&`
94    AmpersandEqual,              // `&=`
95    AmpersandAmpersand,          // `&&`
96    AmpersandAmpersandEqual,     // `&&=`
97    Array,                       // `array`
98    ArrayCast,                   // `(array)`
99    MinusGreaterThan,            // `->`
100    QuestionMinusGreaterThan,    // `?->`
101    At,                          // `@`
102    As,                          // `as`
103    Asterisk,                    // `*`
104    HashLeftBracket,             // `#[`
105    Bang,                        // `!`
106    BangEqual,                   // `!=`
107    LessThanGreaterThan,         // `<>`
108    BangEqualEqual,              // `!==`
109    LessThanEqualGreaterThan,    // `<=>`
110    BoolCast,                    // `(bool)`
111    BooleanCast,                 // `(boolean)`
112    And,                         // `and`
113    Or,                          // `or`
114    Break,                       // `break`
115    Callable,                    // `callable`
116    Caret,                       // `^`
117    CaretEqual,                  // `^=`
118    Case,                        // `case`
119    Catch,                       // `catch`
120    Class,                       // `class`
121    ClassConstant,               // `__CLASS__`
122    TraitConstant,               // `__TRAIT__`
123    FunctionConstant,            // `__FUNCTION__`
124    MethodConstant,              // `__METHOD__`
125    LineConstant,                // `__LINE__`
126    FileConstant,                // `__FILE__`
127    Clone,                       // `clone`
128    MinusEqual,                  // `-=`
129    CloseTag,                    // `?>`
130    QuestionQuestion,            // `??`
131    QuestionQuestionEqual,       // `??=`
132    AsteriskEqual,               // `*=`
133    Colon,                       // `:`
134    Comma,                       // `,`
135    SingleLineComment,           // `// comment`
136    HashComment,                 // `# comment`
137    MultiLineComment,            // `/* comment */`
138    DocBlockComment,             // `/** comment */`
139    Const,                       // `const`
140    PartialLiteralString,        // `"string` or `'string`, missing closing quote
141    LiteralString,               // `"string"` or `'string'`
142    Continue,                    // `continue`
143    Declare,                     // `declare`
144    MinusMinus,                  // `--`
145    Default,                     // `default`
146    DirConstant,                 // `__DIR__`
147    SlashEqual,                  // `/=`
148    Do,                          // `do`
149    DollarLeftBrace,             // `${`
150    Dot,                         // `.`
151    DotEqual,                    // `.=`
152    EqualGreaterThan,            // `=>`
153    DoubleCast,                  // `(double)`
154    RealCast,                    // `(real)`
155    FloatCast,                   // `(float)`
156    ColonColon,                  // `::`
157    EqualEqual,                  // `==`
158    DoubleQuote,                 // `"`
159    Else,                        // `else`
160    Echo,                        // `echo`
161    DotDotDot,                   // `...`
162    ElseIf,                      // `elseif`
163    Empty,                       // `empty`
164    EndDeclare,                  // `enddeclare`
165    EndFor,                      // `endfor`
166    EndForeach,                  // `endforeach`
167    EndIf,                       // `endif`
168    EndSwitch,                   // `endswitch`
169    EndWhile,                    // `endwhile`
170    Enum,                        // `enum`
171    Equal,                       // `=`
172    Extends,                     // `extends`
173    False,                       // `false`
174    Final,                       // `final`
175    Finally,                     // `finally`
176    LiteralFloat,                // `1.0`
177    Fn,                          // `fn`
178    For,                         // `for`
179    Foreach,                     // `foreach`
180    FullyQualifiedIdentifier,    // `\Namespace\Class`
181    Function,                    // `function`
182    Goto,                        // `goto`
183    GreaterThan,                 // `>`
184    GreaterThanEqual,            // `>=`
185    Identifier,                  // `name`
186    If,                          // `if`
187    Implements,                  // `implements`
188    Include,                     // `include`
189    IncludeOnce,                 // `include_once`
190    PlusPlus,                    // `++`
191    InlineText,                  // inline text outside of PHP tags, also referred to as "HTML"
192    InlineShebang,               // `#!...`
193    Instanceof,                  // `instanceof`
194    Insteadof,                   // `insteadof`
195    Exit,                        // `exit`
196    Unset,                       // `unset`
197    Isset,                       // `isset`
198    List,                        // `list`
199    LiteralInteger,              // `1`
200    IntCast,                     // `(int)`
201    IntegerCast,                 // `(integer)`
202    Interface,                   // `interface`
203    LeftBrace,                   // `{`
204    LeftBracket,                 // `[`
205    LeftParenthesis,             // `(`
206    LeftShift,                   // `<<`
207    LeftShiftEqual,              // `<<=`
208    RightShift,                  // `>>`
209    RightShiftEqual,             // `>>=`
210    LessThan,                    // `<`
211    LessThanEqual,               // `<=`
212    Match,                       // `match`
213    Minus,                       // `-`
214    Namespace,                   // `namespace`
215    NamespaceSeparator,          // `\`
216    NamespaceConstant,           // `__NAMESPACE__`
217    PropertyConstant,            // `__PROPERTY__`
218    New,                         // `new`
219    Null,                        // `null`
220    ObjectCast,                  // `(object)`
221    UnsetCast,                   // `(unset)`
222    OpenTag,                     // `<?php`
223    EchoTag,                     // `<?=`
224    ShortOpenTag,                // `<?`
225    Percent,                     // `%`
226    PercentEqual,                // `%=`
227    Pipe,                        // `|`
228    PipeEqual,                   // `|=`
229    Plus,                        // `+`
230    PlusEqual,                   // `+=`
231    AsteriskAsterisk,            // `**`
232    AsteriskAsteriskEqual,       // `**=`
233    Private,                     // `private`
234    PrivateSet,                  // `private(set)`
235    Protected,                   // `protected`
236    ProtectedSet,                // `protected(set)`
237    Public,                      // `public`
238    PublicSet,                   // `public(set)`
239    QualifiedIdentifier,         // `Namespace\Class`
240    Question,                    // `?`
241    Require,                     // `require`
242    RequireOnce,                 // `require_once`
243    Return,                      // `return`
244    RightBrace,                  // `}`
245    RightBracket,                // `]`
246    RightParenthesis,            // `)`
247    Semicolon,                   // `;`
248    Slash,                       // `/`
249    Static,                      // `static`
250    StringCast,                  // `(string)`
251    BinaryCast,                  // `(binary)`
252    VoidCast,                    // `(void)`
253    StringPart,                  // `string` inside a double-quoted string, or a document string
254    Switch,                      // `switch`
255    Throw,                       // `throw`
256    Trait,                       // `trait`
257    EqualEqualEqual,             // `===`
258    True,                        // `true`
259    Try,                         // `try`
260    Use,                         // `use`
261    Var,                         // `var`
262    Variable,                    // `$name`
263    Yield,                       // `yield`
264    While,                       // `while`
265    Tilde,                       // `~`
266    PipePipe,                    // `||`
267    Xor,                         // `xor`
268    PipeGreaterThan,             // `|>`
269}
270
271#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord)]
272pub struct Token<'src> {
273    pub kind: TokenKind,
274    pub start: Position,
275    pub value: &'src str,
276}
277
278impl HasPosition for Token<'_> {
279    #[inline]
280    fn position(&self) -> Position {
281        self.start
282    }
283}
284
285impl Precedence {
286    #[inline]
287    #[must_use]
288    pub const fn infix(kind: &TokenKind) -> Precedence {
289        match kind {
290            T!["**"] => Precedence::Pow,
291            T!["instanceof"] => Precedence::Instanceof,
292            T!["*" | "/" | "%"] => Precedence::MulDivMod,
293            T!["+" | "-"] => Precedence::AddSub,
294            T!["<<"] | T![">>"] => Precedence::BitShift,
295            T!["."] => Precedence::Concat,
296            T!["<" | "<=" | ">" | ">="] => Precedence::Comparison,
297            T!["==" | "!=" | "===" | "!==" | "<>" | "<=>"] => Precedence::Equality,
298            T!["&"] => Precedence::BitwiseAnd,
299            T!["^"] => Precedence::BitwiseXor,
300            T!["|"] => Precedence::BitwiseOr,
301            T!["&&"] => Precedence::And,
302            T!["||"] => Precedence::Or,
303            T!["??"] => Precedence::NullCoalesce,
304            T!["?"] => Precedence::ElvisOrConditional,
305            T!["="
306                | "+="
307                | "-="
308                | "*="
309                | "**="
310                | "/="
311                | ".="
312                | "&&="
313                | "??="
314                | "%="
315                | "&="
316                | "|="
317                | "^="
318                | "<<="
319                | ">>="] => Precedence::Assignment,
320            T!["yield"] => Precedence::Yield,
321            T!["and"] => Precedence::KeyAnd,
322            T!["or"] => Precedence::KeyOr,
323            T!["xor"] => Precedence::KeyXor,
324            T!["print"] => Precedence::Print,
325            T!["|>"] => Precedence::Pipe,
326            _ => Precedence::Lowest,
327        }
328    }
329
330    #[inline]
331    #[must_use]
332    pub const fn postfix(kind: &TokenKind) -> Self {
333        match kind {
334            T!["++" | "--"] => Self::IncDec,
335            T!["("] => Self::CallDim,
336            T!["["] => Self::ArrayDim,
337            T!["->" | "?->" | "::"] => Self::ObjectAccess,
338            _ => Self::Lowest,
339        }
340    }
341
342    #[inline]
343    #[must_use]
344    pub const fn associativity(&self) -> Option<Associativity> {
345        Some(match self {
346            Self::MulDivMod
347            | Self::AddSub
348            | Self::Concat
349            | Self::BitShift
350            | Self::BitwiseAnd
351            | Self::BitwiseOr
352            | Self::BitwiseXor
353            | Self::And
354            | Self::Or
355            | Self::KeyAnd
356            | Self::KeyXor
357            | Self::KeyOr
358            | Self::Pipe
359            | Self::ElvisOrConditional
360            | Self::ObjectAccess => Associativity::Left,
361            Self::Pow | Self::NullCoalesce | Self::Assignment | Self::Unary | Self::New => Associativity::Right,
362            Self::Equality | Self::Comparison | Self::Instanceof => Associativity::NonAssociative,
363            _ => return None,
364        })
365    }
366
367    #[inline]
368    #[must_use]
369    pub const fn is_associative(&self) -> bool {
370        self.associativity().is_some()
371    }
372
373    #[inline]
374    #[must_use]
375    pub const fn is_right_associative(&self) -> bool {
376        matches!(self.associativity(), Some(Associativity::Right))
377    }
378
379    #[inline]
380    #[must_use]
381    pub const fn is_left_associative(&self) -> bool {
382        matches!(self.associativity(), Some(Associativity::Left))
383    }
384
385    #[inline]
386    #[must_use]
387    pub const fn is_non_associative(&self) -> bool {
388        matches!(self.associativity(), Some(Associativity::NonAssociative))
389    }
390}
391
392impl TokenKind {
393    #[inline]
394    #[must_use]
395    pub const fn is_keyword(&self) -> bool {
396        matches!(
397            self,
398            TokenKind::Eval
399                | TokenKind::Die
400                | TokenKind::Empty
401                | TokenKind::Isset
402                | TokenKind::Unset
403                | TokenKind::Exit
404                | TokenKind::EndDeclare
405                | TokenKind::EndSwitch
406                | TokenKind::EndWhile
407                | TokenKind::EndForeach
408                | TokenKind::EndFor
409                | TokenKind::EndIf
410                | TokenKind::From
411                | TokenKind::And
412                | TokenKind::Or
413                | TokenKind::Xor
414                | TokenKind::Print
415                | TokenKind::Readonly
416                | TokenKind::Global
417                | TokenKind::Match
418                | TokenKind::Abstract
419                | TokenKind::Array
420                | TokenKind::As
421                | TokenKind::Break
422                | TokenKind::Case
423                | TokenKind::Catch
424                | TokenKind::Class
425                | TokenKind::Clone
426                | TokenKind::Continue
427                | TokenKind::Const
428                | TokenKind::Declare
429                | TokenKind::Default
430                | TokenKind::Do
431                | TokenKind::Echo
432                | TokenKind::ElseIf
433                | TokenKind::Else
434                | TokenKind::Enum
435                | TokenKind::Extends
436                | TokenKind::False
437                | TokenKind::Finally
438                | TokenKind::Final
439                | TokenKind::Fn
440                | TokenKind::Foreach
441                | TokenKind::For
442                | TokenKind::Function
443                | TokenKind::Goto
444                | TokenKind::If
445                | TokenKind::IncludeOnce
446                | TokenKind::Include
447                | TokenKind::Implements
448                | TokenKind::Interface
449                | TokenKind::Instanceof
450                | TokenKind::Namespace
451                | TokenKind::New
452                | TokenKind::Null
453                | TokenKind::Private
454                | TokenKind::PrivateSet
455                | TokenKind::Protected
456                | TokenKind::Public
457                | TokenKind::RequireOnce
458                | TokenKind::Require
459                | TokenKind::Return
460                | TokenKind::Static
461                | TokenKind::Switch
462                | TokenKind::Throw
463                | TokenKind::Trait
464                | TokenKind::True
465                | TokenKind::Try
466                | TokenKind::Use
467                | TokenKind::Var
468                | TokenKind::Yield
469                | TokenKind::While
470                | TokenKind::Insteadof
471                | TokenKind::List
472                | TokenKind::Self_
473                | TokenKind::Parent
474                | TokenKind::DirConstant
475                | TokenKind::FileConstant
476                | TokenKind::LineConstant
477                | TokenKind::FunctionConstant
478                | TokenKind::ClassConstant
479                | TokenKind::MethodConstant
480                | TokenKind::TraitConstant
481                | TokenKind::NamespaceConstant
482                | TokenKind::PropertyConstant
483                | TokenKind::HaltCompiler
484        )
485    }
486
487    #[inline]
488    #[must_use]
489    pub const fn is_infix(&self) -> bool {
490        matches!(
491            self,
492            T!["**"
493                | ">>="
494                | "<<="
495                | "^="
496                | "&="
497                | "|="
498                | "%="
499                | "**="
500                | "and"
501                | "or"
502                | "xor"
503                | "<=>"
504                | "<<"
505                | ">>"
506                | "&"
507                | "|"
508                | "^"
509                | "%"
510                | "instanceof"
511                | "*"
512                | "/"
513                | "+"
514                | "-"
515                | "."
516                | "<"
517                | ">"
518                | "<="
519                | ">="
520                | "=="
521                | "==="
522                | "!="
523                | "!=="
524                | "<>"
525                | "?"
526                | "&&"
527                | "||"
528                | "="
529                | "+="
530                | "-="
531                | ".="
532                | "??="
533                | "/="
534                | "*="
535                | "??"
536                | "|>"]
537        )
538    }
539
540    #[inline]
541    #[must_use]
542    pub const fn is_postfix(&self) -> bool {
543        matches!(self, T!["++" | "--" | "(" | "[" | "->" | "?->" | "::"])
544    }
545
546    #[inline]
547    #[must_use]
548    pub const fn is_visibility_modifier(&self) -> bool {
549        matches!(self, T!["public" | "protected" | "private" | "private(set)" | "protected(set)" | "public(set)"])
550    }
551
552    #[inline]
553    #[must_use]
554    pub const fn is_modifier(&self) -> bool {
555        matches!(
556            self,
557            T!["public"
558                | "protected"
559                | "private"
560                | "private(set)"
561                | "protected(set)"
562                | "public(set)"
563                | "static"
564                | "final"
565                | "abstract"
566                | "readonly"]
567        )
568    }
569
570    #[inline]
571    #[must_use]
572    pub const fn is_identifier_maybe_soft_reserved(&self) -> bool {
573        if let TokenKind::Identifier = self { true } else { self.is_soft_reserved_identifier() }
574    }
575
576    #[inline]
577    #[must_use]
578    pub const fn is_identifier_maybe_reserved(&self) -> bool {
579        if let TokenKind::Identifier = self { true } else { self.is_reserved_identifier() }
580    }
581
582    #[inline]
583    #[must_use]
584    pub const fn is_soft_reserved_identifier(&self) -> bool {
585        matches!(
586            self,
587            T!["parent" | "self" | "true" | "false" | "list" | "null" | "enum" | "from" | "readonly" | "match"]
588        )
589    }
590
591    #[inline]
592    #[must_use]
593    pub const fn is_reserved_identifier(&self) -> bool {
594        if self.is_soft_reserved_identifier() {
595            return true;
596        }
597
598        matches!(
599            self,
600            T!["static"
601                | "abstract"
602                | "final"
603                | "for"
604                | "private"
605                | "private(set)"
606                | "protected"
607                | "protected(set)"
608                | "public"
609                | "public(set)"
610                | "include"
611                | "include_once"
612                | "eval"
613                | "require"
614                | "require_once"
615                | "or"
616                | "xor"
617                | "and"
618                | "instanceof"
619                | "new"
620                | "clone"
621                | "exit"
622                | "die"
623                | "if"
624                | "elseif"
625                | "else"
626                | "endif"
627                | "echo"
628                | "do"
629                | "while"
630                | "endwhile"
631                | "endfor"
632                | "foreach"
633                | "endforeach"
634                | "declare"
635                | "enddeclare"
636                | "as"
637                | "try"
638                | "catch"
639                | "finally"
640                | "throw"
641                | "use"
642                | "insteadof"
643                | "global"
644                | "var"
645                | "unset"
646                | "isset"
647                | "empty"
648                | "continue"
649                | "goto"
650                | "function"
651                | "const"
652                | "return"
653                | "print"
654                | "yield"
655                | "list"
656                | "switch"
657                | "endswitch"
658                | "case"
659                | "default"
660                | "break"
661                | "array"
662                | "callable"
663                | "extends"
664                | "implements"
665                | "namespace"
666                | "trait"
667                | "interface"
668                | "class"
669                | "__CLASS__"
670                | "__TRAIT__"
671                | "__FUNCTION__"
672                | "__METHOD__"
673                | "__LINE__"
674                | "__FILE__"
675                | "__DIR__"
676                | "__NAMESPACE__"
677                | "__PROPERTY__"
678                | "__halt_compiler"
679                | "fn"
680                | "match"]
681        )
682    }
683
684    #[inline]
685    #[must_use]
686    pub const fn is_literal(&self) -> bool {
687        matches!(
688            self,
689            T!["true" | "false" | "null" | LiteralFloat | LiteralInteger | LiteralString | PartialLiteralString]
690        )
691    }
692
693    #[inline]
694    #[must_use]
695    pub const fn is_magic_constant(&self) -> bool {
696        matches!(
697            self,
698            T!["__CLASS__"
699                | "__DIR__"
700                | "__FILE__"
701                | "__FUNCTION__"
702                | "__LINE__"
703                | "__METHOD__"
704                | "__NAMESPACE__"
705                | "__PROPERTY__"
706                | "__TRAIT__"]
707        )
708    }
709
710    #[inline]
711    #[must_use]
712    pub const fn is_cast(&self) -> bool {
713        matches!(
714            self,
715            T!["(string)"
716                | "(binary)"
717                | "(int)"
718                | "(integer)"
719                | "(float)"
720                | "(double)"
721                | "(real)"
722                | "(bool)"
723                | "(boolean)"
724                | "(array)"
725                | "(object)"
726                | "(unset)"
727                | "(void)"]
728        )
729    }
730
731    #[inline]
732    #[must_use]
733    pub const fn is_unary_prefix(&self) -> bool {
734        if self.is_cast() {
735            return true;
736        }
737
738        matches!(self, T!["@" | "!" | "~" | "-" | "+" | "++" | "--"])
739    }
740
741    #[inline]
742    #[must_use]
743    pub const fn is_trivia(&self) -> bool {
744        matches!(self, T![SingleLineComment | MultiLineComment | DocBlockComment | HashComment | Whitespace])
745    }
746
747    #[inline]
748    #[must_use]
749    pub const fn is_comment(&self) -> bool {
750        matches!(self, T![SingleLineComment | MultiLineComment | DocBlockComment | HashComment])
751    }
752
753    #[inline]
754    #[must_use]
755    pub const fn is_comma(&self) -> bool {
756        matches!(self, T![","])
757    }
758
759    #[inline]
760    #[must_use]
761    pub const fn is_construct(&self) -> bool {
762        matches!(
763            self,
764            T!["isset"
765                | "empty"
766                | "eval"
767                | "include"
768                | "include_once"
769                | "require"
770                | "require_once"
771                | "print"
772                | "unset"
773                | "exit"
774                | "die"]
775        )
776    }
777}
778
779impl<'arena> Token<'arena> {
780    #[inline]
781    #[must_use]
782    pub const fn new(kind: TokenKind, value: &'arena str, start: Position) -> Self {
783        Self { kind, start, value }
784    }
785
786    /// Constructs a `Span` for this token given the file ID.
787    ///
788    /// The span is computed from the token's start position and its value length.
789    #[inline]
790    #[must_use]
791    pub const fn span_for(&self, file_id: FileId) -> Span {
792        let end = Position::new(self.start.offset + self.value.len() as u32);
793        Span::new(file_id, self.start, end)
794    }
795}
796
797impl std::fmt::Display for Token<'_> {
798    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
799        write!(f, "{}({})", self.kind, self.value)
800    }
801}