Skip to main content

mago_syntax/token/
mod.rs

1use serde::Serialize;
2use strum::Display;
3
4use mago_database::file::FileId;
5use mago_span::Position;
6use mago_span::Span;
7
8use crate::T;
9
10#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
11#[serde(tag = "type", content = "value")]
12pub enum DocumentKind {
13    Heredoc,
14    Nowdoc,
15}
16
17#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
18#[serde(tag = "type", content = "value")]
19pub enum Associativity {
20    NonAssociative,
21    Left,
22    Right,
23}
24
25#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
26#[serde(tag = "type", content = "value")]
27pub enum Precedence {
28    Lowest,
29    Print,
30    YieldFrom,
31    Yield,
32    KeyOr,
33    KeyXor,
34    KeyAnd,
35    Assignment,
36    ElvisOrConditional,
37    NullCoalesce,
38    Or,
39    And,
40    BitwiseOr,
41    BitwiseXor,
42    BitwiseAnd,
43    Equality,
44    Comparison,
45    // NOTE(azjezz): the RFC does not really specify the precedence of the `|>` operator
46    // clearly, the current precedence position handles the examples shown in the RFC,
47    // but will need to be verified with the actual implementation once its merged into php-src.
48    //
49    // RFC: https://wiki.php.net/rfc/pipe-operator-v3
50    // PR: https://github.com/php/php-src/pull/17118
51    Pipe,
52    Concat,
53    BitShift,
54    AddSub,
55    MulDivMod,
56    Unary,
57    Instanceof,
58    ErrorControl,
59    Pow,
60    Clone,
61    IncDec,
62    Reference,
63    CallDim,
64    New,
65    ArrayDim,
66    ObjectAccess,
67    Highest,
68}
69
70pub trait GetPrecedence {
71    fn precedence(&self) -> Precedence;
72}
73
74#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord, Display)]
75#[serde(tag = "type", content = "value")]
76pub enum TokenKind {
77    Whitespace,                  // ` `
78    Eval,                        // `eval`
79    Die,                         // `die`
80    Self_,                       // `self`
81    Parent,                      // `parent`
82    Backtick,                    // `` ` ``
83    DocumentStart(DocumentKind), // `<<<abc`, or `<<<'abc'`
84    DocumentEnd,                 // `abc`
85    From,                        // `from`
86    Print,                       // `print`
87    Dollar,                      // `$`
88    HaltCompiler,                // `__halt_compiler`
89    Readonly,                    // `readonly`
90    Global,                      // `global`
91    Abstract,                    // `abstract`
92    Ampersand,                   // `&`
93    AmpersandEqual,              // `&=`
94    AmpersandAmpersand,          // `&&`
95    AmpersandAmpersandEqual,     // `&&=`
96    Array,                       // `array`
97    ArrayCast,                   // `(array)`
98    MinusGreaterThan,            // `->`
99    QuestionMinusGreaterThan,    // `?->`
100    At,                          // `@`
101    As,                          // `as`
102    Asterisk,                    // `*`
103    HashLeftBracket,             // `#[`
104    Bang,                        // `!`
105    BangEqual,                   // `!=`
106    LessThanGreaterThan,         // `<>`
107    BangEqualEqual,              // `!==`
108    LessThanEqualGreaterThan,    // `<=>`
109    BoolCast,                    // `(bool)`
110    BooleanCast,                 // `(boolean)`
111    And,                         // `and`
112    Or,                          // `or`
113    Break,                       // `break`
114    Callable,                    // `callable`
115    Caret,                       // `^`
116    CaretEqual,                  // `^=`
117    Case,                        // `case`
118    Catch,                       // `catch`
119    Class,                       // `class`
120    ClassConstant,               // `__CLASS__`
121    TraitConstant,               // `__TRAIT__`
122    FunctionConstant,            // `__FUNCTION__`
123    MethodConstant,              // `__METHOD__`
124    LineConstant,                // `__LINE__`
125    FileConstant,                // `__FILE__`
126    Clone,                       // `clone`
127    MinusEqual,                  // `-=`
128    CloseTag,                    // `?>`
129    QuestionQuestion,            // `??`
130    QuestionQuestionEqual,       // `??=`
131    AsteriskEqual,               // `*=`
132    Colon,                       // `:`
133    Comma,                       // `,`
134    SingleLineComment,           // `// comment`
135    HashComment,                 // `# comment`
136    MultiLineComment,            // `/* comment */`
137    DocBlockComment,             // `/** comment */`
138    Const,                       // `const`
139    PartialLiteralString,        // `"string` or `'string`, missing closing quote
140    LiteralString,               // `"string"` or `'string'`
141    Continue,                    // `continue`
142    Declare,                     // `declare`
143    MinusMinus,                  // `--`
144    Default,                     // `default`
145    DirConstant,                 // `__DIR__`
146    SlashEqual,                  // `/=`
147    Do,                          // `do`
148    DollarLeftBrace,             // `${`
149    Dot,                         // `.`
150    DotEqual,                    // `.=`
151    EqualGreaterThan,            // `=>`
152    DoubleCast,                  // `(double)`
153    RealCast,                    // `(real)`
154    FloatCast,                   // `(float)`
155    ColonColon,                  // `::`
156    EqualEqual,                  // `==`
157    DoubleQuote,                 // `"`
158    Else,                        // `else`
159    Echo,                        // `echo`
160    DotDotDot,                   // `...`
161    ElseIf,                      // `elseif`
162    Empty,                       // `empty`
163    EndDeclare,                  // `enddeclare`
164    EndFor,                      // `endfor`
165    EndForeach,                  // `endforeach`
166    EndIf,                       // `endif`
167    EndSwitch,                   // `endswitch`
168    EndWhile,                    // `endwhile`
169    Enum,                        // `enum`
170    Equal,                       // `=`
171    Extends,                     // `extends`
172    False,                       // `false`
173    Final,                       // `final`
174    Finally,                     // `finally`
175    LiteralFloat,                // `1.0`
176    Fn,                          // `fn`
177    For,                         // `for`
178    Foreach,                     // `foreach`
179    FullyQualifiedIdentifier,    // `\Namespace\Class`
180    Function,                    // `function`
181    Goto,                        // `goto`
182    GreaterThan,                 // `>`
183    GreaterThanEqual,            // `>=`
184    Identifier,                  // `name`
185    If,                          // `if`
186    Implements,                  // `implements`
187    Include,                     // `include`
188    IncludeOnce,                 // `include_once`
189    PlusPlus,                    // `++`
190    InlineText,                  // inline text outside of PHP tags, also referred to as "HTML"
191    InlineShebang,               // `#!...`
192    Instanceof,                  // `instanceof`
193    Insteadof,                   // `insteadof`
194    Exit,                        // `exit`
195    Unset,                       // `unset`
196    Isset,                       // `isset`
197    List,                        // `list`
198    LiteralInteger,              // `1`
199    IntCast,                     // `(int)`
200    IntegerCast,                 // `(integer)`
201    Interface,                   // `interface`
202    LeftBrace,                   // `{`
203    LeftBracket,                 // `[`
204    LeftParenthesis,             // `(`
205    LeftShift,                   // `<<`
206    LeftShiftEqual,              // `<<=`
207    RightShift,                  // `>>`
208    RightShiftEqual,             // `>>=`
209    LessThan,                    // `<`
210    LessThanEqual,               // `<=`
211    Match,                       // `match`
212    Minus,                       // `-`
213    Namespace,                   // `namespace`
214    NamespaceSeparator,          // `\`
215    NamespaceConstant,           // `__NAMESPACE__`
216    PropertyConstant,            // `__PROPERTY__`
217    New,                         // `new`
218    Null,                        // `null`
219    ObjectCast,                  // `(object)`
220    UnsetCast,                   // `(unset)`
221    OpenTag,                     // `<?php`
222    EchoTag,                     // `<?=`
223    ShortOpenTag,                // `<?`
224    Percent,                     // `%`
225    PercentEqual,                // `%=`
226    Pipe,                        // `|`
227    PipeEqual,                   // `|=`
228    Plus,                        // `+`
229    PlusEqual,                   // `+=`
230    AsteriskAsterisk,            // `**`
231    AsteriskAsteriskEqual,       // `**=`
232    Private,                     // `private`
233    PrivateSet,                  // `private(set)`
234    Protected,                   // `protected`
235    ProtectedSet,                // `protected(set)`
236    Public,                      // `public`
237    PublicSet,                   // `public(set)`
238    QualifiedIdentifier,         // `Namespace\Class`
239    Question,                    // `?`
240    Require,                     // `require`
241    RequireOnce,                 // `require_once`
242    Return,                      // `return`
243    RightBrace,                  // `}`
244    RightBracket,                // `]`
245    RightParenthesis,            // `)`
246    Semicolon,                   // `;`
247    Slash,                       // `/`
248    Static,                      // `static`
249    StringCast,                  // `(string)`
250    BinaryCast,                  // `(binary)`
251    VoidCast,                    // `(void)`
252    StringPart,                  // `string` inside a double-quoted string, or a document string
253    Switch,                      // `switch`
254    Throw,                       // `throw`
255    Trait,                       // `trait`
256    EqualEqualEqual,             // `===`
257    True,                        // `true`
258    Try,                         // `try`
259    Use,                         // `use`
260    Var,                         // `var`
261    Variable,                    // `$name`
262    Yield,                       // `yield`
263    While,                       // `while`
264    Tilde,                       // `~`
265    PipePipe,                    // `||`
266    Xor,                         // `xor`
267    PipeGreaterThan,             // `|>`
268}
269
270#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, PartialOrd, Ord)]
271pub struct Token<'a> {
272    pub kind: TokenKind,
273    pub start: Position,
274    pub value: &'a str,
275}
276
277impl Precedence {
278    #[inline]
279    #[must_use]
280    pub const fn infix(kind: &TokenKind) -> Precedence {
281        match kind {
282            T!["**"] => Precedence::Pow,
283            T!["instanceof"] => Precedence::Instanceof,
284            T!["*" | "/" | "%"] => Precedence::MulDivMod,
285            T!["+" | "-"] => Precedence::AddSub,
286            T!["<<"] | T![">>"] => Precedence::BitShift,
287            T!["."] => Precedence::Concat,
288            T!["<" | "<=" | ">" | ">="] => Precedence::Comparison,
289            T!["==" | "!=" | "===" | "!==" | "<>" | "<=>"] => Precedence::Equality,
290            T!["&"] => Precedence::BitwiseAnd,
291            T!["^"] => Precedence::BitwiseXor,
292            T!["|"] => Precedence::BitwiseOr,
293            T!["&&"] => Precedence::And,
294            T!["||"] => Precedence::Or,
295            T!["??"] => Precedence::NullCoalesce,
296            T!["?"] => Precedence::ElvisOrConditional,
297            T!["="
298                | "+="
299                | "-="
300                | "*="
301                | "**="
302                | "/="
303                | ".="
304                | "&&="
305                | "??="
306                | "%="
307                | "&="
308                | "|="
309                | "^="
310                | "<<="
311                | ">>="] => Precedence::Assignment,
312            T!["yield"] => Precedence::Yield,
313            T!["and"] => Precedence::KeyAnd,
314            T!["or"] => Precedence::KeyOr,
315            T!["xor"] => Precedence::KeyXor,
316            T!["print"] => Precedence::Print,
317            T!["|>"] => Precedence::Pipe,
318            _ => Precedence::Lowest,
319        }
320    }
321
322    #[inline]
323    #[must_use]
324    pub const fn postfix(kind: &TokenKind) -> Self {
325        match kind {
326            T!["++" | "--"] => Self::IncDec,
327            T!["("] => Self::CallDim,
328            T!["["] => Self::ArrayDim,
329            T!["->" | "?->" | "::"] => Self::ObjectAccess,
330            _ => Self::Lowest,
331        }
332    }
333
334    #[inline]
335    #[must_use]
336    pub const fn associativity(&self) -> Option<Associativity> {
337        Some(match self {
338            Self::MulDivMod
339            | Self::AddSub
340            | Self::Concat
341            | Self::BitShift
342            | Self::BitwiseAnd
343            | Self::BitwiseOr
344            | Self::BitwiseXor
345            | Self::And
346            | Self::Or
347            | Self::KeyAnd
348            | Self::KeyXor
349            | Self::KeyOr
350            | Self::Pipe
351            | Self::ElvisOrConditional
352            | Self::ObjectAccess => Associativity::Left,
353            Self::Pow | Self::NullCoalesce | Self::Assignment | Self::Unary | Self::New => Associativity::Right,
354            Self::Equality | Self::Comparison | Self::Instanceof => Associativity::NonAssociative,
355            _ => return None,
356        })
357    }
358
359    #[inline]
360    #[must_use]
361    pub const fn is_associative(&self) -> bool {
362        self.associativity().is_some()
363    }
364
365    #[inline]
366    #[must_use]
367    pub const fn is_right_associative(&self) -> bool {
368        matches!(self.associativity(), Some(Associativity::Right))
369    }
370
371    #[inline]
372    #[must_use]
373    pub const fn is_left_associative(&self) -> bool {
374        matches!(self.associativity(), Some(Associativity::Left))
375    }
376
377    #[inline]
378    #[must_use]
379    pub const fn is_non_associative(&self) -> bool {
380        matches!(self.associativity(), Some(Associativity::NonAssociative))
381    }
382}
383
384impl TokenKind {
385    #[inline]
386    #[must_use]
387    pub const fn is_keyword(&self) -> bool {
388        matches!(
389            self,
390            TokenKind::Eval
391                | TokenKind::Die
392                | TokenKind::Empty
393                | TokenKind::Isset
394                | TokenKind::Unset
395                | TokenKind::Exit
396                | TokenKind::EndDeclare
397                | TokenKind::EndSwitch
398                | TokenKind::EndWhile
399                | TokenKind::EndForeach
400                | TokenKind::EndFor
401                | TokenKind::EndIf
402                | TokenKind::From
403                | TokenKind::And
404                | TokenKind::Or
405                | TokenKind::Xor
406                | TokenKind::Print
407                | TokenKind::Readonly
408                | TokenKind::Global
409                | TokenKind::Match
410                | TokenKind::Abstract
411                | TokenKind::Array
412                | TokenKind::As
413                | TokenKind::Break
414                | TokenKind::Case
415                | TokenKind::Catch
416                | TokenKind::Class
417                | TokenKind::Clone
418                | TokenKind::Continue
419                | TokenKind::Const
420                | TokenKind::Declare
421                | TokenKind::Default
422                | TokenKind::Do
423                | TokenKind::Echo
424                | TokenKind::ElseIf
425                | TokenKind::Else
426                | TokenKind::Enum
427                | TokenKind::Extends
428                | TokenKind::False
429                | TokenKind::Finally
430                | TokenKind::Final
431                | TokenKind::Fn
432                | TokenKind::Foreach
433                | TokenKind::For
434                | TokenKind::Function
435                | TokenKind::Goto
436                | TokenKind::If
437                | TokenKind::IncludeOnce
438                | TokenKind::Include
439                | TokenKind::Implements
440                | TokenKind::Interface
441                | TokenKind::Instanceof
442                | TokenKind::Namespace
443                | TokenKind::New
444                | TokenKind::Null
445                | TokenKind::Private
446                | TokenKind::PrivateSet
447                | TokenKind::Protected
448                | TokenKind::Public
449                | TokenKind::RequireOnce
450                | TokenKind::Require
451                | TokenKind::Return
452                | TokenKind::Static
453                | TokenKind::Switch
454                | TokenKind::Throw
455                | TokenKind::Trait
456                | TokenKind::True
457                | TokenKind::Try
458                | TokenKind::Use
459                | TokenKind::Var
460                | TokenKind::Yield
461                | TokenKind::While
462                | TokenKind::Insteadof
463                | TokenKind::List
464                | TokenKind::Self_
465                | TokenKind::Parent
466                | TokenKind::DirConstant
467                | TokenKind::FileConstant
468                | TokenKind::LineConstant
469                | TokenKind::FunctionConstant
470                | TokenKind::ClassConstant
471                | TokenKind::MethodConstant
472                | TokenKind::TraitConstant
473                | TokenKind::NamespaceConstant
474                | TokenKind::PropertyConstant
475                | TokenKind::HaltCompiler
476        )
477    }
478
479    #[inline]
480    #[must_use]
481    pub const fn is_infix(&self) -> bool {
482        matches!(
483            self,
484            T!["**"
485                | ">>="
486                | "<<="
487                | "^="
488                | "&="
489                | "|="
490                | "%="
491                | "**="
492                | "and"
493                | "or"
494                | "xor"
495                | "<=>"
496                | "<<"
497                | ">>"
498                | "&"
499                | "|"
500                | "^"
501                | "%"
502                | "instanceof"
503                | "*"
504                | "/"
505                | "+"
506                | "-"
507                | "."
508                | "<"
509                | ">"
510                | "<="
511                | ">="
512                | "=="
513                | "==="
514                | "!="
515                | "!=="
516                | "<>"
517                | "?"
518                | "&&"
519                | "||"
520                | "="
521                | "+="
522                | "-="
523                | ".="
524                | "??="
525                | "/="
526                | "*="
527                | "??"
528                | "|>"]
529        )
530    }
531
532    #[inline]
533    #[must_use]
534    pub const fn is_postfix(&self) -> bool {
535        matches!(self, T!["++" | "--" | "(" | "[" | "->" | "?->" | "::"])
536    }
537
538    #[inline]
539    #[must_use]
540    pub const fn is_visibility_modifier(&self) -> bool {
541        matches!(self, T!["public" | "protected" | "private" | "private(set)" | "protected(set)" | "public(set)"])
542    }
543
544    #[inline]
545    #[must_use]
546    pub const fn is_modifier(&self) -> bool {
547        matches!(
548            self,
549            T!["public"
550                | "protected"
551                | "private"
552                | "private(set)"
553                | "protected(set)"
554                | "public(set)"
555                | "static"
556                | "final"
557                | "abstract"
558                | "readonly"]
559        )
560    }
561
562    #[inline]
563    #[must_use]
564    pub const fn is_identifier_maybe_soft_reserved(&self) -> bool {
565        if let TokenKind::Identifier = self { true } else { self.is_soft_reserved_identifier() }
566    }
567
568    #[inline]
569    #[must_use]
570    pub const fn is_identifier_maybe_reserved(&self) -> bool {
571        if let TokenKind::Identifier = self { true } else { self.is_reserved_identifier() }
572    }
573
574    #[inline]
575    #[must_use]
576    pub const fn is_soft_reserved_identifier(&self) -> bool {
577        matches!(
578            self,
579            T!["parent" | "self" | "true" | "false" | "list" | "null" | "enum" | "from" | "readonly" | "match"]
580        )
581    }
582
583    #[inline]
584    #[must_use]
585    pub const fn is_reserved_identifier(&self) -> bool {
586        if self.is_soft_reserved_identifier() {
587            return true;
588        }
589
590        matches!(
591            self,
592            T!["static"
593                | "abstract"
594                | "final"
595                | "for"
596                | "private"
597                | "private(set)"
598                | "protected"
599                | "protected(set)"
600                | "public"
601                | "public(set)"
602                | "include"
603                | "include_once"
604                | "eval"
605                | "require"
606                | "require_once"
607                | "or"
608                | "xor"
609                | "and"
610                | "instanceof"
611                | "new"
612                | "clone"
613                | "exit"
614                | "die"
615                | "if"
616                | "elseif"
617                | "else"
618                | "endif"
619                | "echo"
620                | "do"
621                | "while"
622                | "endwhile"
623                | "endfor"
624                | "foreach"
625                | "endforeach"
626                | "declare"
627                | "enddeclare"
628                | "as"
629                | "try"
630                | "catch"
631                | "finally"
632                | "throw"
633                | "use"
634                | "insteadof"
635                | "global"
636                | "var"
637                | "unset"
638                | "isset"
639                | "empty"
640                | "continue"
641                | "goto"
642                | "function"
643                | "const"
644                | "return"
645                | "print"
646                | "yield"
647                | "list"
648                | "switch"
649                | "endswitch"
650                | "case"
651                | "default"
652                | "break"
653                | "array"
654                | "callable"
655                | "extends"
656                | "implements"
657                | "namespace"
658                | "trait"
659                | "interface"
660                | "class"
661                | "__CLASS__"
662                | "__TRAIT__"
663                | "__FUNCTION__"
664                | "__METHOD__"
665                | "__LINE__"
666                | "__FILE__"
667                | "__DIR__"
668                | "__NAMESPACE__"
669                | "__PROPERTY__"
670                | "__halt_compiler"
671                | "fn"
672                | "match"]
673        )
674    }
675
676    #[inline]
677    #[must_use]
678    pub const fn is_literal(&self) -> bool {
679        matches!(
680            self,
681            T!["true" | "false" | "null" | LiteralFloat | LiteralInteger | LiteralString | PartialLiteralString]
682        )
683    }
684
685    #[inline]
686    #[must_use]
687    pub const fn is_magic_constant(&self) -> bool {
688        matches!(
689            self,
690            T!["__CLASS__"
691                | "__DIR__"
692                | "__FILE__"
693                | "__FUNCTION__"
694                | "__LINE__"
695                | "__METHOD__"
696                | "__NAMESPACE__"
697                | "__PROPERTY__"
698                | "__TRAIT__"]
699        )
700    }
701
702    #[inline]
703    #[must_use]
704    pub const fn is_cast(&self) -> bool {
705        matches!(
706            self,
707            T!["(string)"
708                | "(binary)"
709                | "(int)"
710                | "(integer)"
711                | "(float)"
712                | "(double)"
713                | "(real)"
714                | "(bool)"
715                | "(boolean)"
716                | "(array)"
717                | "(object)"
718                | "(unset)"
719                | "(void)"]
720        )
721    }
722
723    #[inline]
724    #[must_use]
725    pub const fn is_unary_prefix(&self) -> bool {
726        if self.is_cast() {
727            return true;
728        }
729
730        matches!(self, T!["@" | "!" | "~" | "-" | "+" | "++" | "--" | "&"])
731    }
732
733    #[inline]
734    #[must_use]
735    pub const fn is_trivia(&self) -> bool {
736        matches!(self, T![SingleLineComment | MultiLineComment | DocBlockComment | HashComment | Whitespace])
737    }
738
739    #[inline]
740    #[must_use]
741    pub const fn is_comment(&self) -> bool {
742        matches!(self, T![SingleLineComment | MultiLineComment | DocBlockComment | HashComment])
743    }
744
745    #[inline]
746    #[must_use]
747    pub const fn is_comma(&self) -> bool {
748        matches!(self, T![","])
749    }
750
751    #[inline]
752    #[must_use]
753    pub const fn is_construct(&self) -> bool {
754        matches!(
755            self,
756            T!["isset"
757                | "empty"
758                | "eval"
759                | "include"
760                | "include_once"
761                | "require"
762                | "require_once"
763                | "print"
764                | "unset"
765                | "exit"
766                | "die"]
767        )
768    }
769}
770
771impl<'arena> Token<'arena> {
772    #[inline]
773    #[must_use]
774    pub const fn new(kind: TokenKind, value: &'arena str, start: Position) -> Self {
775        Self { kind, start, value }
776    }
777
778    /// Constructs a `Span` for this token given the file ID.
779    ///
780    /// The span is computed from the token's start position and its value length.
781    #[inline]
782    #[must_use]
783    pub const fn span_for(&self, file_id: FileId) -> Span {
784        let end = Position::new(self.start.offset + self.value.len() as u32);
785        Span::new(file_id, self.start, end)
786    }
787}
788
789impl std::fmt::Display for Token<'_> {
790    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
791        write!(f, "{}({})", self.kind, self.value)
792    }
793}