Skip to main content

perl_token/
kind.rs

1/// Token classification for Perl parsing.
2///
3/// The set is intentionally simplified for fast parser matching while covering
4/// keywords, operators, delimiters, literals, identifiers, and special tokens.
5///
6/// Use [`TokenKind::display_name`] to get a human-readable string suitable for
7/// error messages shown to the user.
8///
9/// # Categories
10///
11/// | Group | Examples |
12/// |-------|----------|
13/// | Keywords | [`My`](Self::My), [`Sub`](Self::Sub), [`If`](Self::If), ... |
14/// | Operators | [`Plus`](Self::Plus), [`Arrow`](Self::Arrow), [`And`](Self::And), ... |
15/// | Delimiters | [`LeftParen`](Self::LeftParen), [`LeftBrace`](Self::LeftBrace), ... |
16/// | Literals | [`Number`](Self::Number), [`String`](Self::String), [`Regex`](Self::Regex), ... |
17/// | Identifiers | [`Identifier`](Self::Identifier), [`ScalarSigil`](Self::ScalarSigil), ... |
18/// | Special | [`Eof`](Self::Eof), [`Unknown`](Self::Unknown) |
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum TokenKind {
21    // ===== Keywords =====
22    /// Lexical variable declaration: `my $x`
23    My,
24    /// Package variable declaration: `our $x`
25    Our,
26    /// Dynamic scoping: `local $x`
27    Local,
28    /// Persistent variable: `state $x`
29    State,
30    /// Subroutine declaration: `sub foo`
31    Sub,
32    /// Conditional: `if (cond)`
33    If,
34    /// Else-if conditional: `elsif (cond)`
35    Elsif,
36    /// Else branch: `else { }`
37    Else,
38    /// Negated conditional: `unless (cond)`
39    Unless,
40    /// While loop: `while (cond)`
41    While,
42    /// Until loop: `until (cond)`
43    Until,
44    /// C-style for loop: `for (init; cond; update)`
45    For,
46    /// Iterator loop: `foreach $x (@list)`
47    Foreach,
48    /// Return statement: `return $value`
49    Return,
50    /// Package declaration: `package Foo`
51    Package,
52    /// Module import: `use Module`
53    Use,
54    /// Disable pragma/module: `no strict`
55    No,
56    /// Compile-time block: `BEGIN { }`
57    Begin,
58    /// Exit-time block: `END { }`
59    End,
60    /// Check phase block: `CHECK { }`
61    Check,
62    /// Init phase block: `INIT { }`
63    Init,
64    /// Unit check block: `UNITCHECK { }`
65    Unitcheck,
66    /// Exception handling: `eval { }`
67    Eval,
68    /// Block execution: `do { }` or `do "file"`
69    Do,
70    /// Switch expression: `given ($x)`
71    Given,
72    /// Case clause: `when ($pattern)`
73    When,
74    /// Default case: `default { }`
75    Default,
76    /// Try block: `try { }`
77    Try,
78    /// Catch block: `catch ($e) { }`
79    Catch,
80    /// Finally block: `finally { }`
81    Finally,
82    /// Continue block: `continue { }`
83    Continue,
84    /// Loop control: `next`
85    Next,
86    /// Loop control: `last`
87    Last,
88    /// Loop control: `redo`
89    Redo,
90    /// Goto statement: `goto LABEL`, `goto &sub`, `goto EXPR`
91    Goto,
92    /// Class declaration (5.38+): `class Foo`
93    Class,
94    /// Method declaration (5.38+): `method foo`
95    Method,
96    /// Class field declaration (5.38+): `field $name`
97    Field,
98    /// Format declaration: `format STDOUT =`
99    Format,
100    /// Undefined value: `undef`
101    Undef,
102    /// Defer block: `defer { ... }` (Perl 5.36+ experimental, stable in 5.40)
103    Defer,
104
105    // ===== Operators =====
106    /// Assignment: `=`
107    Assign,
108    /// Addition: `+`
109    Plus,
110    /// Subtraction: `-`
111    Minus,
112    /// Multiplication: `*`
113    Star,
114    /// Division: `/`
115    Slash,
116    /// Modulo: `%`
117    Percent,
118    /// Exponentiation: `**`
119    Power,
120    /// Left bit shift: `<<`
121    LeftShift,
122    /// Right bit shift: `>>`
123    RightShift,
124    /// Bitwise AND: `&`
125    BitwiseAnd,
126    /// Bitwise OR: `|`
127    BitwiseOr,
128    /// Bitwise XOR: `^`
129    BitwiseXor,
130    /// Bitwise NOT: `~`
131    BitwiseNot,
132    /// Add and assign: `+=`
133    PlusAssign,
134    /// Subtract and assign: `-=`
135    MinusAssign,
136    /// Multiply and assign: `*=`
137    StarAssign,
138    /// Divide and assign: `/=`
139    SlashAssign,
140    /// Modulo and assign: `%=`
141    PercentAssign,
142    /// Concatenate and assign: `.=`
143    DotAssign,
144    /// Bitwise AND and assign: `&=`
145    AndAssign,
146    /// Bitwise OR and assign: `|=`
147    OrAssign,
148    /// Bitwise XOR and assign: `^=`
149    XorAssign,
150    /// Power and assign: `**=`
151    PowerAssign,
152    /// Left shift and assign: `<<=`
153    LeftShiftAssign,
154    /// Right shift and assign: `>>=`
155    RightShiftAssign,
156    /// Logical AND and assign: `&&=`
157    LogicalAndAssign,
158    /// Logical OR and assign: `||=`
159    LogicalOrAssign,
160    /// Defined-or and assign: `//=`
161    DefinedOrAssign,
162    /// Numeric equality: `==`
163    Equal,
164    /// Numeric inequality: `!=`
165    NotEqual,
166    /// Pattern match binding: `=~`
167    Match,
168    /// Negated pattern match: `!~`
169    NotMatch,
170    /// Smart match: `~~`
171    SmartMatch,
172    /// Less than: `<`
173    Less,
174    /// Greater than: `>`
175    Greater,
176    /// Less than or equal: `<=`
177    LessEqual,
178    /// Greater than or equal: `>=`
179    GreaterEqual,
180    /// Numeric comparison (spaceship): `<=>`
181    Spaceship,
182    /// String comparison: `cmp`
183    StringCompare,
184    /// Logical AND: `&&`
185    And,
186    /// Logical OR: `||`
187    Or,
188    /// Logical NOT: `!`
189    Not,
190    /// Defined-or: `//`
191    DefinedOr,
192    /// Word AND operator: `and`
193    WordAnd,
194    /// Word OR operator: `or`
195    WordOr,
196    /// Word NOT operator: `not`
197    WordNot,
198    /// Word XOR operator: `xor`
199    WordXor,
200    /// Method/dereference arrow: `->`
201    Arrow,
202    /// Hash key separator: `=>`
203    FatArrow,
204    /// String concatenation: `.`
205    Dot,
206    /// Range operator: `..`
207    Range,
208    /// Yada-yada (unimplemented): `...`
209    Ellipsis,
210    /// Increment: `++`
211    Increment,
212    /// Decrement: `--`
213    Decrement,
214    /// Package separator: `::`
215    DoubleColon,
216    /// Ternary condition: `?`
217    Question,
218    /// Ternary/label separator: `:`
219    Colon,
220    /// Reference operator: `\`
221    Backslash,
222
223    // ===== Delimiters =====
224    /// Left parenthesis: `(`
225    LeftParen,
226    /// Right parenthesis: `)`
227    RightParen,
228    /// Left brace: `{`
229    LeftBrace,
230    /// Right brace: `}`
231    RightBrace,
232    /// Left bracket: `[`
233    LeftBracket,
234    /// Right bracket: `]`
235    RightBracket,
236    /// Statement terminator: `;`
237    Semicolon,
238    /// List separator: `,`
239    Comma,
240
241    // ===== Literals =====
242    /// Numeric literal: `42`, `3.14`, `0xFF`
243    Number,
244    /// String literal: `"hello"` or `'world'`
245    String,
246    /// Regular expression: `/pattern/flags`
247    Regex,
248    /// Substitution: `s/pattern/replacement/flags`
249    Substitution,
250    /// Transliteration: `tr/abc/xyz/` or `y///`
251    Transliteration,
252    /// Single-quoted string: `q/text/`
253    QuoteSingle,
254    /// Double-quoted string: `qq/text/`
255    QuoteDouble,
256    /// Quote words: `qw(list of words)`
257    QuoteWords,
258    /// Backtick command: `` `cmd` `` or `qx/cmd/`
259    QuoteCommand,
260    /// Heredoc start marker: `<<EOF`
261    HeredocStart,
262    /// Heredoc content body
263    HeredocBody,
264    /// Format specification body
265    FormatBody,
266    /// Data section marker: `__DATA__` or `__END__`
267    DataMarker,
268    /// Data section content
269    DataBody,
270    /// Version string literal: `v5.26.0`, `v5.10`
271    VString,
272    /// Unparsed remainder (budget exceeded)
273    UnknownRest,
274    /// Heredoc depth limit exceeded (special error token)
275    HeredocDepthLimit,
276
277    // ===== Identifiers and Variables =====
278    /// Bareword identifier or function name
279    Identifier,
280    /// Scalar sigil: `$`
281    ScalarSigil,
282    /// Array sigil: `@`
283    ArraySigil,
284    /// Hash sigil: `%`
285    HashSigil,
286    /// Subroutine sigil: `&`
287    SubSigil,
288    /// Glob/typeglob sigil: `*`
289    GlobSigil,
290
291    // ===== Special =====
292    /// End of file/input
293    Eof,
294    /// Unknown/unrecognized token
295    Unknown,
296}
297
298/// Broad classification used for token metadata and conformance checks.
299///
300/// This enum is `#[non_exhaustive]`: external code must include a wildcard `_`
301/// arm when matching on it. This allows new categories to be added in future
302/// releases without breaking downstream crates.
303#[non_exhaustive]
304#[derive(Debug, Clone, Copy, PartialEq, Eq)]
305pub enum TokenCategory {
306    /// Reserved words and language keywords.
307    Keyword,
308    /// Operators and symbolic/word forms.
309    Operator,
310    /// Grouping and punctuation delimiters.
311    Delimiter,
312    /// Literal-like lexical forms.
313    Literal,
314    /// Identifiers and sigils.
315    Identifier,
316    /// Special sentinel and recovery tokens.
317    Special,
318}
319
320/// Metadata associated with each [`TokenKind`] variant.
321///
322/// This struct is `#[non_exhaustive]`: external code must not construct it
323/// using struct literal syntax. Use [`TokenKind::metadata`] to obtain
324/// instances. Additional fields may be added in future releases without
325/// constituting a breaking change.
326#[non_exhaustive]
327#[derive(Debug, Clone, Copy, PartialEq, Eq)]
328pub struct TokenKindMetadata {
329    /// Stable category used in docs/tests/gates.
330    pub category: TokenCategory,
331    /// User-facing display label for diagnostics.
332    pub display_name: &'static str,
333}
334
335/// Canonical lexer keyword spellings and their parser-facing token kinds.
336///
337/// Word-form operators (`and`, `or`, `not`, `xor`, `cmp`) are included here
338/// because the lexer emits them as keyword tokens before the parser maps them
339/// to their operator roles.
340pub const KEYWORD_SPELLINGS: &[(&str, TokenKind)] = &[
341    ("my", TokenKind::My),
342    ("our", TokenKind::Our),
343    ("local", TokenKind::Local),
344    ("state", TokenKind::State),
345    ("sub", TokenKind::Sub),
346    ("if", TokenKind::If),
347    ("elsif", TokenKind::Elsif),
348    ("else", TokenKind::Else),
349    ("unless", TokenKind::Unless),
350    ("while", TokenKind::While),
351    ("until", TokenKind::Until),
352    ("for", TokenKind::For),
353    ("foreach", TokenKind::Foreach),
354    ("return", TokenKind::Return),
355    ("package", TokenKind::Package),
356    ("use", TokenKind::Use),
357    ("no", TokenKind::No),
358    ("BEGIN", TokenKind::Begin),
359    ("END", TokenKind::End),
360    ("CHECK", TokenKind::Check),
361    ("INIT", TokenKind::Init),
362    ("UNITCHECK", TokenKind::Unitcheck),
363    ("eval", TokenKind::Eval),
364    ("do", TokenKind::Do),
365    ("given", TokenKind::Given),
366    ("when", TokenKind::When),
367    ("default", TokenKind::Default),
368    ("try", TokenKind::Try),
369    ("catch", TokenKind::Catch),
370    ("finally", TokenKind::Finally),
371    ("continue", TokenKind::Continue),
372    ("next", TokenKind::Next),
373    ("last", TokenKind::Last),
374    ("redo", TokenKind::Redo),
375    ("goto", TokenKind::Goto),
376    ("class", TokenKind::Class),
377    ("method", TokenKind::Method),
378    ("field", TokenKind::Field),
379    ("format", TokenKind::Format),
380    ("undef", TokenKind::Undef),
381    ("defer", TokenKind::Defer),
382    ("and", TokenKind::WordAnd),
383    ("or", TokenKind::WordOr),
384    ("not", TokenKind::WordNot),
385    ("xor", TokenKind::WordXor),
386    ("cmp", TokenKind::StringCompare),
387];
388
389/// Canonical symbolic operator spellings and their parser-facing token kinds.
390pub const OPERATOR_SPELLINGS: &[(&str, TokenKind)] = &[
391    ("=", TokenKind::Assign),
392    ("+", TokenKind::Plus),
393    ("-", TokenKind::Minus),
394    ("*", TokenKind::Star),
395    ("/", TokenKind::Slash),
396    ("%", TokenKind::Percent),
397    ("**", TokenKind::Power),
398    ("<<", TokenKind::LeftShift),
399    (">>", TokenKind::RightShift),
400    ("&", TokenKind::BitwiseAnd),
401    ("|", TokenKind::BitwiseOr),
402    ("^", TokenKind::BitwiseXor),
403    ("~", TokenKind::BitwiseNot),
404    ("+=", TokenKind::PlusAssign),
405    ("-=", TokenKind::MinusAssign),
406    ("*=", TokenKind::StarAssign),
407    ("/=", TokenKind::SlashAssign),
408    ("%=", TokenKind::PercentAssign),
409    (".=", TokenKind::DotAssign),
410    ("&=", TokenKind::AndAssign),
411    ("|=", TokenKind::OrAssign),
412    ("^=", TokenKind::XorAssign),
413    ("**=", TokenKind::PowerAssign),
414    ("<<=", TokenKind::LeftShiftAssign),
415    (">>=", TokenKind::RightShiftAssign),
416    ("&&=", TokenKind::LogicalAndAssign),
417    ("||=", TokenKind::LogicalOrAssign),
418    ("//=", TokenKind::DefinedOrAssign),
419    ("==", TokenKind::Equal),
420    ("!=", TokenKind::NotEqual),
421    ("=~", TokenKind::Match),
422    ("!~", TokenKind::NotMatch),
423    ("~~", TokenKind::SmartMatch),
424    ("<", TokenKind::Less),
425    (">", TokenKind::Greater),
426    ("<=", TokenKind::LessEqual),
427    (">=", TokenKind::GreaterEqual),
428    ("<=>", TokenKind::Spaceship),
429    ("&&", TokenKind::And),
430    ("||", TokenKind::Or),
431    ("!", TokenKind::Not),
432    ("//", TokenKind::DefinedOr),
433    ("->", TokenKind::Arrow),
434    ("=>", TokenKind::FatArrow),
435    (".", TokenKind::Dot),
436    ("..", TokenKind::Range),
437    ("...", TokenKind::Ellipsis),
438    ("++", TokenKind::Increment),
439    ("--", TokenKind::Decrement),
440    ("::", TokenKind::DoubleColon),
441    ("?", TokenKind::Question),
442    (":", TokenKind::Colon),
443    ("\\", TokenKind::Backslash),
444];
445
446/// Canonical delimiter spellings and their parser-facing token kinds.
447pub const DELIMITER_SPELLINGS: &[(&str, TokenKind)] = &[
448    ("(", TokenKind::LeftParen),
449    (")", TokenKind::RightParen),
450    ("{", TokenKind::LeftBrace),
451    ("}", TokenKind::RightBrace),
452    ("[", TokenKind::LeftBracket),
453    ("]", TokenKind::RightBracket),
454    (";", TokenKind::Semicolon),
455    (",", TokenKind::Comma),
456];
457
458/// Canonical sigil spellings and their parser-facing token kinds.
459pub const SIGIL_SPELLINGS: &[(&str, TokenKind)] = &[
460    ("$", TokenKind::ScalarSigil),
461    ("@", TokenKind::ArraySigil),
462    ("%", TokenKind::HashSigil),
463    ("&", TokenKind::SubSigil),
464    ("*", TokenKind::GlobSigil),
465];
466
467impl TokenKind {
468    /// Return every [`TokenKind`] variant in stable declaration order.
469    pub const fn all() -> &'static [TokenKind] {
470        &TOKEN_KIND_ALL
471    }
472
473    /// Number of token kinds expected to have metadata coverage.
474    pub const fn metadata_count() -> usize {
475        TOKEN_KIND_ALL.len()
476    }
477
478    /// Return compact metadata for this token kind.
479    pub fn metadata(self) -> TokenKindMetadata {
480        TokenKindMetadata { category: self.category(), display_name: self.display_name() }
481    }
482
483    /// Return the high-level category for this token kind.
484    pub const fn category(self) -> TokenCategory {
485        match self {
486            TokenKind::My
487            | TokenKind::Our
488            | TokenKind::Local
489            | TokenKind::State
490            | TokenKind::Sub
491            | TokenKind::If
492            | TokenKind::Elsif
493            | TokenKind::Else
494            | TokenKind::Unless
495            | TokenKind::While
496            | TokenKind::Until
497            | TokenKind::For
498            | TokenKind::Foreach
499            | TokenKind::Return
500            | TokenKind::Package
501            | TokenKind::Use
502            | TokenKind::No
503            | TokenKind::Begin
504            | TokenKind::End
505            | TokenKind::Check
506            | TokenKind::Init
507            | TokenKind::Unitcheck
508            | TokenKind::Eval
509            | TokenKind::Do
510            | TokenKind::Given
511            | TokenKind::When
512            | TokenKind::Default
513            | TokenKind::Try
514            | TokenKind::Catch
515            | TokenKind::Finally
516            | TokenKind::Continue
517            | TokenKind::Next
518            | TokenKind::Last
519            | TokenKind::Redo
520            | TokenKind::Goto
521            | TokenKind::Class
522            | TokenKind::Method
523            | TokenKind::Field
524            | TokenKind::Format
525            | TokenKind::Undef
526            | TokenKind::Defer => TokenCategory::Keyword,
527            TokenKind::Assign
528            | TokenKind::Plus
529            | TokenKind::Minus
530            | TokenKind::Star
531            | TokenKind::Slash
532            | TokenKind::Percent
533            | TokenKind::Power
534            | TokenKind::LeftShift
535            | TokenKind::RightShift
536            | TokenKind::BitwiseAnd
537            | TokenKind::BitwiseOr
538            | TokenKind::BitwiseXor
539            | TokenKind::BitwiseNot
540            | TokenKind::PlusAssign
541            | TokenKind::MinusAssign
542            | TokenKind::StarAssign
543            | TokenKind::SlashAssign
544            | TokenKind::PercentAssign
545            | TokenKind::DotAssign
546            | TokenKind::AndAssign
547            | TokenKind::OrAssign
548            | TokenKind::XorAssign
549            | TokenKind::PowerAssign
550            | TokenKind::LeftShiftAssign
551            | TokenKind::RightShiftAssign
552            | TokenKind::LogicalAndAssign
553            | TokenKind::LogicalOrAssign
554            | TokenKind::DefinedOrAssign
555            | TokenKind::Equal
556            | TokenKind::NotEqual
557            | TokenKind::Match
558            | TokenKind::NotMatch
559            | TokenKind::SmartMatch
560            | TokenKind::Less
561            | TokenKind::Greater
562            | TokenKind::LessEqual
563            | TokenKind::GreaterEqual
564            | TokenKind::Spaceship
565            | TokenKind::StringCompare
566            | TokenKind::And
567            | TokenKind::Or
568            | TokenKind::Not
569            | TokenKind::DefinedOr
570            | TokenKind::WordAnd
571            | TokenKind::WordOr
572            | TokenKind::WordNot
573            | TokenKind::WordXor
574            | TokenKind::Arrow
575            | TokenKind::FatArrow
576            | TokenKind::Dot
577            | TokenKind::Range
578            | TokenKind::Ellipsis
579            | TokenKind::Increment
580            | TokenKind::Decrement
581            | TokenKind::DoubleColon
582            | TokenKind::Question
583            | TokenKind::Colon
584            | TokenKind::Backslash => TokenCategory::Operator,
585            TokenKind::LeftParen
586            | TokenKind::RightParen
587            | TokenKind::LeftBrace
588            | TokenKind::RightBrace
589            | TokenKind::LeftBracket
590            | TokenKind::RightBracket
591            | TokenKind::Semicolon
592            | TokenKind::Comma => TokenCategory::Delimiter,
593            TokenKind::Number
594            | TokenKind::String
595            | TokenKind::Regex
596            | TokenKind::Substitution
597            | TokenKind::Transliteration
598            | TokenKind::QuoteSingle
599            | TokenKind::QuoteDouble
600            | TokenKind::QuoteWords
601            | TokenKind::QuoteCommand
602            | TokenKind::HeredocStart
603            | TokenKind::HeredocBody
604            | TokenKind::FormatBody
605            | TokenKind::DataMarker
606            | TokenKind::DataBody
607            | TokenKind::VString
608            | TokenKind::UnknownRest
609            | TokenKind::HeredocDepthLimit => TokenCategory::Literal,
610            TokenKind::Identifier
611            | TokenKind::ScalarSigil
612            | TokenKind::ArraySigil
613            | TokenKind::HashSigil
614            | TokenKind::SubSigil
615            | TokenKind::GlobSigil => TokenCategory::Identifier,
616            TokenKind::Eof | TokenKind::Unknown => TokenCategory::Special,
617        }
618    }
619
620    // --- Category-based predicates (classify by TokenCategory) ---
621
622    /// Returns `true` if this token kind is a keyword.
623    pub const fn is_keyword(self) -> bool {
624        matches!(self.category(), TokenCategory::Keyword)
625    }
626
627    /// Returns `true` if this token kind is an operator.
628    pub const fn is_operator(self) -> bool {
629        matches!(self.category(), TokenCategory::Operator)
630    }
631
632    /// Returns `true` if this token kind is a literal.
633    pub const fn is_literal(self) -> bool {
634        matches!(self.category(), TokenCategory::Literal)
635    }
636
637    /// Returns `true` if this token kind is a delimiter.
638    pub const fn is_delimiter(self) -> bool {
639        matches!(self.category(), TokenCategory::Delimiter)
640    }
641
642    /// Returns `true` if this token kind is an identifier or sigil.
643    pub const fn is_identifier(self) -> bool {
644        matches!(self.category(), TokenCategory::Identifier)
645    }
646
647    /// Returns `true` if this token kind is a special sentinel/recovery token.
648    pub const fn is_special(self) -> bool {
649        matches!(self.category(), TokenCategory::Special)
650    }
651
652    // --- Parser-facing role predicates (specific semantic roles) ---
653
654    /// Return whether this token is an assignment operator.
655    #[inline]
656    pub fn is_assignment_operator(self) -> bool {
657        matches!(
658            self,
659            TokenKind::Assign
660                | TokenKind::PlusAssign
661                | TokenKind::MinusAssign
662                | TokenKind::StarAssign
663                | TokenKind::SlashAssign
664                | TokenKind::PercentAssign
665                | TokenKind::DotAssign
666                | TokenKind::AndAssign
667                | TokenKind::OrAssign
668                | TokenKind::XorAssign
669                | TokenKind::PowerAssign
670                | TokenKind::LeftShiftAssign
671                | TokenKind::RightShiftAssign
672                | TokenKind::LogicalAndAssign
673                | TokenKind::LogicalOrAssign
674                | TokenKind::DefinedOrAssign
675        )
676    }
677
678    /// Return whether this token is a comparison operator.
679    #[inline]
680    pub fn is_comparison_operator(self) -> bool {
681        matches!(
682            self,
683            TokenKind::Equal
684                | TokenKind::NotEqual
685                | TokenKind::Less
686                | TokenKind::Greater
687                | TokenKind::LessEqual
688                | TokenKind::GreaterEqual
689                | TokenKind::Spaceship
690                | TokenKind::StringCompare
691                | TokenKind::Match
692                | TokenKind::NotMatch
693                | TokenKind::SmartMatch
694        )
695    }
696
697    /// Return whether this token is a logical operator.
698    #[inline]
699    pub fn is_logical_operator(self) -> bool {
700        matches!(
701            self,
702            TokenKind::And
703                | TokenKind::Or
704                | TokenKind::Not
705                | TokenKind::DefinedOr
706                | TokenKind::WordAnd
707                | TokenKind::WordOr
708                | TokenKind::WordNot
709                | TokenKind::WordXor
710        )
711    }
712
713    /// Return whether this token is a word-form operator token.
714    #[inline]
715    pub fn is_word_operator(self) -> bool {
716        matches!(
717            self,
718            TokenKind::StringCompare
719                | TokenKind::WordAnd
720                | TokenKind::WordOr
721                | TokenKind::WordNot
722                | TokenKind::WordXor
723        )
724    }
725
726    /// Return whether this token is a low-precedence word operator.
727    #[inline]
728    pub fn is_low_precedence_word_operator(self) -> bool {
729        matches!(
730            self,
731            TokenKind::WordAnd | TokenKind::WordOr | TokenKind::WordNot | TokenKind::WordXor
732        )
733    }
734
735    /// Return whether this token is an opening paired delimiter.
736    #[inline]
737    pub fn is_open_delimiter(self) -> bool {
738        matches!(self, TokenKind::LeftParen | TokenKind::LeftBrace | TokenKind::LeftBracket)
739    }
740
741    /// Return whether this token is a closing paired delimiter.
742    #[inline]
743    pub fn is_close_delimiter(self) -> bool {
744        matches!(self, TokenKind::RightParen | TokenKind::RightBrace | TokenKind::RightBracket)
745    }
746
747    /// Return the matching paired delimiter for this token, if any.
748    #[inline]
749    pub fn matching_delimiter(self) -> Option<Self> {
750        match self {
751            TokenKind::LeftParen => Some(TokenKind::RightParen),
752            TokenKind::RightParen => Some(TokenKind::LeftParen),
753            TokenKind::LeftBrace => Some(TokenKind::RightBrace),
754            TokenKind::RightBrace => Some(TokenKind::LeftBrace),
755            TokenKind::LeftBracket => Some(TokenKind::RightBracket),
756            TokenKind::RightBracket => Some(TokenKind::LeftBracket),
757            _ => None,
758        }
759    }
760
761    /// Return whether this token is quote-like syntax.
762    #[inline]
763    pub fn is_quote_like(self) -> bool {
764        matches!(
765            self,
766            TokenKind::Regex
767                | TokenKind::Substitution
768                | TokenKind::Transliteration
769                | TokenKind::QuoteSingle
770                | TokenKind::QuoteDouble
771                | TokenKind::QuoteWords
772                | TokenKind::QuoteCommand
773                | TokenKind::HeredocStart
774        )
775    }
776
777    /// Return whether this token is a hard recovery boundary.
778    #[inline]
779    pub fn is_recovery_boundary(self) -> bool {
780        self == TokenKind::Semicolon || self.is_close_delimiter() || self == TokenKind::Eof
781    }
782
783    /// Map a canonical keyword spelling to its [`TokenKind`].
784    ///
785    /// This mapping is case-sensitive and only recognizes canonical Perl
786    /// spellings used by the lexer/parser pipeline.
787    pub fn from_keyword(spelling: &str) -> Option<TokenKind> {
788        match spelling {
789            "my" => Some(TokenKind::My),
790            "our" => Some(TokenKind::Our),
791            "local" => Some(TokenKind::Local),
792            "state" => Some(TokenKind::State),
793            "sub" => Some(TokenKind::Sub),
794            "if" => Some(TokenKind::If),
795            "elsif" => Some(TokenKind::Elsif),
796            "else" => Some(TokenKind::Else),
797            "unless" => Some(TokenKind::Unless),
798            "while" => Some(TokenKind::While),
799            "until" => Some(TokenKind::Until),
800            "for" => Some(TokenKind::For),
801            "foreach" => Some(TokenKind::Foreach),
802            "return" => Some(TokenKind::Return),
803            "package" => Some(TokenKind::Package),
804            "use" => Some(TokenKind::Use),
805            "no" => Some(TokenKind::No),
806            "BEGIN" => Some(TokenKind::Begin),
807            "END" => Some(TokenKind::End),
808            "CHECK" => Some(TokenKind::Check),
809            "INIT" => Some(TokenKind::Init),
810            "UNITCHECK" => Some(TokenKind::Unitcheck),
811            "eval" => Some(TokenKind::Eval),
812            "do" => Some(TokenKind::Do),
813            "given" => Some(TokenKind::Given),
814            "when" => Some(TokenKind::When),
815            "default" => Some(TokenKind::Default),
816            "try" => Some(TokenKind::Try),
817            "catch" => Some(TokenKind::Catch),
818            "finally" => Some(TokenKind::Finally),
819            "continue" => Some(TokenKind::Continue),
820            "next" => Some(TokenKind::Next),
821            "last" => Some(TokenKind::Last),
822            "redo" => Some(TokenKind::Redo),
823            "goto" => Some(TokenKind::Goto),
824            "class" => Some(TokenKind::Class),
825            "method" => Some(TokenKind::Method),
826            "field" => Some(TokenKind::Field),
827            "format" => Some(TokenKind::Format),
828            "undef" => Some(TokenKind::Undef),
829            "defer" => Some(TokenKind::Defer),
830            // Word operators are emitted as Keyword tokens by the lexer.
831            "and" => Some(TokenKind::WordAnd),
832            "or" => Some(TokenKind::WordOr),
833            "not" => Some(TokenKind::WordNot),
834            "xor" => Some(TokenKind::WordXor),
835            "cmp" => Some(TokenKind::StringCompare),
836            _ => None,
837        }
838    }
839
840    /// Map a canonical operator spelling to its [`TokenKind`].
841    ///
842    /// This mapping is case-sensitive.
843    pub fn from_operator(spelling: &str) -> Option<TokenKind> {
844        match spelling {
845            "=" => Some(TokenKind::Assign),
846            "+" => Some(TokenKind::Plus),
847            "-" => Some(TokenKind::Minus),
848            "*" => Some(TokenKind::Star),
849            "/" => Some(TokenKind::Slash),
850            "%" => Some(TokenKind::Percent),
851            "**" => Some(TokenKind::Power),
852            "<<" => Some(TokenKind::LeftShift),
853            ">>" => Some(TokenKind::RightShift),
854            "&" => Some(TokenKind::BitwiseAnd),
855            "|" => Some(TokenKind::BitwiseOr),
856            "^" => Some(TokenKind::BitwiseXor),
857            "~" => Some(TokenKind::BitwiseNot),
858            "+=" => Some(TokenKind::PlusAssign),
859            "-=" => Some(TokenKind::MinusAssign),
860            "*=" => Some(TokenKind::StarAssign),
861            "/=" => Some(TokenKind::SlashAssign),
862            "%=" => Some(TokenKind::PercentAssign),
863            ".=" => Some(TokenKind::DotAssign),
864            "&=" => Some(TokenKind::AndAssign),
865            "|=" => Some(TokenKind::OrAssign),
866            "^=" => Some(TokenKind::XorAssign),
867            "**=" => Some(TokenKind::PowerAssign),
868            "<<=" => Some(TokenKind::LeftShiftAssign),
869            ">>=" => Some(TokenKind::RightShiftAssign),
870            "&&=" => Some(TokenKind::LogicalAndAssign),
871            "||=" => Some(TokenKind::LogicalOrAssign),
872            "//=" => Some(TokenKind::DefinedOrAssign),
873            "==" => Some(TokenKind::Equal),
874            "!=" => Some(TokenKind::NotEqual),
875            "=~" => Some(TokenKind::Match),
876            "!~" => Some(TokenKind::NotMatch),
877            "~~" => Some(TokenKind::SmartMatch),
878            "<" => Some(TokenKind::Less),
879            ">" => Some(TokenKind::Greater),
880            "<=" => Some(TokenKind::LessEqual),
881            ">=" => Some(TokenKind::GreaterEqual),
882            "<=>" => Some(TokenKind::Spaceship),
883            "&&" => Some(TokenKind::And),
884            "||" => Some(TokenKind::Or),
885            "!" => Some(TokenKind::Not),
886            "//" => Some(TokenKind::DefinedOr),
887            "->" => Some(TokenKind::Arrow),
888            "=>" => Some(TokenKind::FatArrow),
889            "." => Some(TokenKind::Dot),
890            ".." => Some(TokenKind::Range),
891            "..." => Some(TokenKind::Ellipsis),
892            "++" => Some(TokenKind::Increment),
893            "--" => Some(TokenKind::Decrement),
894            "::" => Some(TokenKind::DoubleColon),
895            "?" => Some(TokenKind::Question),
896            ":" => Some(TokenKind::Colon),
897            "\\" => Some(TokenKind::Backslash),
898            _ => None,
899        }
900    }
901
902    /// Map a delimiter spelling to its [`TokenKind`].
903    pub fn from_delimiter(spelling: &str) -> Option<TokenKind> {
904        match spelling {
905            "(" => Some(TokenKind::LeftParen),
906            ")" => Some(TokenKind::RightParen),
907            "{" => Some(TokenKind::LeftBrace),
908            "}" => Some(TokenKind::RightBrace),
909            "[" => Some(TokenKind::LeftBracket),
910            "]" => Some(TokenKind::RightBracket),
911            ";" => Some(TokenKind::Semicolon),
912            "," => Some(TokenKind::Comma),
913            _ => None,
914        }
915    }
916
917    /// Map a sigil spelling to its [`TokenKind`].
918    pub fn from_sigil(spelling: &str) -> Option<TokenKind> {
919        match spelling {
920            "$" => Some(TokenKind::ScalarSigil),
921            "@" => Some(TokenKind::ArraySigil),
922            "%" => Some(TokenKind::HashSigil),
923            "&" => Some(TokenKind::SubSigil),
924            "*" => Some(TokenKind::GlobSigil),
925            _ => None,
926        }
927    }
928
929    /// Return the canonical spelling for fixed-spelling tokens.
930    ///
931    /// Tokens whose spelling depends on source text, such as identifiers,
932    /// strings, regexes, heredocs, and recovery tokens, return `None`.
933    ///
934    /// # Examples
935    ///
936    /// ```rust
937    /// use perl_token::TokenKind;
938    ///
939    /// assert_eq!(TokenKind::Sub.canonical_spelling(), Some("sub"));
940    /// assert_eq!(TokenKind::LeftBrace.canonical_spelling(), Some("{"));
941    /// assert_eq!(TokenKind::Identifier.canonical_spelling(), None);
942    /// ```
943    pub fn canonical_spelling(self) -> Option<&'static str> {
944        spelling_for_kind(self, KEYWORD_SPELLINGS)
945            .or_else(|| spelling_for_kind(self, OPERATOR_SPELLINGS))
946            .or_else(|| spelling_for_kind(self, DELIMITER_SPELLINGS))
947            .or_else(|| spelling_for_kind(self, SIGIL_SPELLINGS))
948    }
949
950    /// Return a user-friendly display name for this token kind.
951    ///
952    /// These names appear in parser error messages shown in the editor.
953    /// They use the actual Perl syntax (e.g. `}` instead of `RightBrace`)
954    /// so users can immediately understand what the parser expected.
955    ///
956    /// # Examples
957    ///
958    /// ```rust
959    /// use perl_token::TokenKind;
960    ///
961    /// assert_eq!(TokenKind::Semicolon.display_name(), "';'");
962    /// assert_eq!(TokenKind::Sub.display_name(), "'sub'");
963    /// assert_eq!(TokenKind::Number.display_name(), "number");
964    /// ```
965    pub fn display_name(self) -> &'static str {
966        match self {
967            // Keywords
968            TokenKind::My => "'my'",
969            TokenKind::Our => "'our'",
970            TokenKind::Local => "'local'",
971            TokenKind::State => "'state'",
972            TokenKind::Sub => "'sub'",
973            TokenKind::If => "'if'",
974            TokenKind::Elsif => "'elsif'",
975            TokenKind::Else => "'else'",
976            TokenKind::Unless => "'unless'",
977            TokenKind::While => "'while'",
978            TokenKind::Until => "'until'",
979            TokenKind::For => "'for'",
980            TokenKind::Foreach => "'foreach'",
981            TokenKind::Return => "'return'",
982            TokenKind::Package => "'package'",
983            TokenKind::Use => "'use'",
984            TokenKind::No => "'no'",
985            TokenKind::Begin => "'BEGIN'",
986            TokenKind::End => "'END'",
987            TokenKind::Check => "'CHECK'",
988            TokenKind::Init => "'INIT'",
989            TokenKind::Unitcheck => "'UNITCHECK'",
990            TokenKind::Eval => "'eval'",
991            TokenKind::Do => "'do'",
992            TokenKind::Given => "'given'",
993            TokenKind::When => "'when'",
994            TokenKind::Default => "'default'",
995            TokenKind::Try => "'try'",
996            TokenKind::Catch => "'catch'",
997            TokenKind::Finally => "'finally'",
998            TokenKind::Continue => "'continue'",
999            TokenKind::Next => "'next'",
1000            TokenKind::Last => "'last'",
1001            TokenKind::Redo => "'redo'",
1002            TokenKind::Goto => "'goto'",
1003            TokenKind::Class => "'class'",
1004            TokenKind::Method => "'method'",
1005            TokenKind::Field => "'field'",
1006            TokenKind::Format => "'format'",
1007            TokenKind::Undef => "'undef'",
1008            TokenKind::Defer => "'defer'",
1009
1010            // Operators
1011            TokenKind::Assign => "'='",
1012            TokenKind::Plus => "'+'",
1013            TokenKind::Minus => "'-'",
1014            TokenKind::Star => "'*'",
1015            TokenKind::Slash => "'/'",
1016            TokenKind::Percent => "'%'",
1017            TokenKind::Power => "'**'",
1018            TokenKind::LeftShift => "'<<'",
1019            TokenKind::RightShift => "'>>'",
1020            TokenKind::BitwiseAnd => "'&'",
1021            TokenKind::BitwiseOr => "'|'",
1022            TokenKind::BitwiseXor => "'^'",
1023            TokenKind::BitwiseNot => "'~'",
1024            TokenKind::PlusAssign => "'+='",
1025            TokenKind::MinusAssign => "'-='",
1026            TokenKind::StarAssign => "'*='",
1027            TokenKind::SlashAssign => "'/='",
1028            TokenKind::PercentAssign => "'%='",
1029            TokenKind::DotAssign => "'.='",
1030            TokenKind::AndAssign => "'&='",
1031            TokenKind::OrAssign => "'|='",
1032            TokenKind::XorAssign => "'^='",
1033            TokenKind::PowerAssign => "'**='",
1034            TokenKind::LeftShiftAssign => "'<<='",
1035            TokenKind::RightShiftAssign => "'>>='",
1036            TokenKind::LogicalAndAssign => "'&&='",
1037            TokenKind::LogicalOrAssign => "'||='",
1038            TokenKind::DefinedOrAssign => "'//='",
1039            TokenKind::Equal => "'=='",
1040            TokenKind::NotEqual => "'!='",
1041            TokenKind::Match => "'=~'",
1042            TokenKind::NotMatch => "'!~'",
1043            TokenKind::SmartMatch => "'~~'",
1044            TokenKind::Less => "'<'",
1045            TokenKind::Greater => "'>'",
1046            TokenKind::LessEqual => "'<='",
1047            TokenKind::GreaterEqual => "'>='",
1048            TokenKind::Spaceship => "'<=>'",
1049            TokenKind::StringCompare => "'cmp'",
1050            TokenKind::And => "'&&'",
1051            TokenKind::Or => "'||'",
1052            TokenKind::Not => "'!'",
1053            TokenKind::DefinedOr => "'//'",
1054            TokenKind::WordAnd => "'and'",
1055            TokenKind::WordOr => "'or'",
1056            TokenKind::WordNot => "'not'",
1057            TokenKind::WordXor => "'xor'",
1058            TokenKind::Arrow => "'->'",
1059            TokenKind::FatArrow => "'=>'",
1060            TokenKind::Dot => "'.'",
1061            TokenKind::Range => "'..'",
1062            TokenKind::Ellipsis => "'...'",
1063            TokenKind::Increment => "'++'",
1064            TokenKind::Decrement => "'--'",
1065            TokenKind::DoubleColon => "'::'",
1066            TokenKind::Question => "'?'",
1067            TokenKind::Colon => "':'",
1068            TokenKind::Backslash => "'\\'",
1069
1070            // Delimiters
1071            TokenKind::LeftParen => "'('",
1072            TokenKind::RightParen => "')'",
1073            TokenKind::LeftBrace => "'{'",
1074            TokenKind::RightBrace => "'}'",
1075            TokenKind::LeftBracket => "'['",
1076            TokenKind::RightBracket => "']'",
1077            TokenKind::Semicolon => "';'",
1078            TokenKind::Comma => "','",
1079
1080            // Literals
1081            TokenKind::Number => "number",
1082            TokenKind::String => "string",
1083            TokenKind::Regex => "regex",
1084            TokenKind::Substitution => "substitution (s///)",
1085            TokenKind::Transliteration => "transliteration (tr///)",
1086            TokenKind::QuoteSingle => "q// string",
1087            TokenKind::QuoteDouble => "qq// string",
1088            TokenKind::QuoteWords => "qw() word list",
1089            TokenKind::QuoteCommand => "qx// command",
1090            TokenKind::HeredocStart => "heredoc (<<)",
1091            TokenKind::HeredocBody => "heredoc body",
1092            TokenKind::FormatBody => "format body",
1093            TokenKind::DataMarker => "data marker (__DATA__ or __END__)",
1094            TokenKind::DataBody => "data section body",
1095            TokenKind::VString => "version string",
1096            TokenKind::UnknownRest => "unparsed remainder",
1097            TokenKind::HeredocDepthLimit => "heredoc depth limit exceeded",
1098
1099            // Identifiers and variables
1100            TokenKind::Identifier => "identifier",
1101            TokenKind::ScalarSigil => "'$'",
1102            TokenKind::ArraySigil => "'@'",
1103            TokenKind::HashSigil => "'%'",
1104            TokenKind::SubSigil => "'&'",
1105            TokenKind::GlobSigil => "'*'",
1106
1107            // Special
1108            TokenKind::Eof => "end of input",
1109            TokenKind::Unknown => "unknown token",
1110        }
1111    }
1112}
1113
1114fn spelling_for_kind(
1115    kind: TokenKind,
1116    spellings: &'static [(&'static str, TokenKind)],
1117) -> Option<&'static str> {
1118    spellings.iter().find_map(|&(spelling, candidate)| (candidate == kind).then_some(spelling))
1119}
1120
1121const TOKEN_KIND_ALL: [TokenKind; 132] = [
1122    TokenKind::My,
1123    TokenKind::Our,
1124    TokenKind::Local,
1125    TokenKind::State,
1126    TokenKind::Sub,
1127    TokenKind::If,
1128    TokenKind::Elsif,
1129    TokenKind::Else,
1130    TokenKind::Unless,
1131    TokenKind::While,
1132    TokenKind::Until,
1133    TokenKind::For,
1134    TokenKind::Foreach,
1135    TokenKind::Return,
1136    TokenKind::Package,
1137    TokenKind::Use,
1138    TokenKind::No,
1139    TokenKind::Begin,
1140    TokenKind::End,
1141    TokenKind::Check,
1142    TokenKind::Init,
1143    TokenKind::Unitcheck,
1144    TokenKind::Eval,
1145    TokenKind::Do,
1146    TokenKind::Given,
1147    TokenKind::When,
1148    TokenKind::Default,
1149    TokenKind::Try,
1150    TokenKind::Catch,
1151    TokenKind::Finally,
1152    TokenKind::Continue,
1153    TokenKind::Next,
1154    TokenKind::Last,
1155    TokenKind::Redo,
1156    TokenKind::Goto,
1157    TokenKind::Class,
1158    TokenKind::Method,
1159    TokenKind::Field,
1160    TokenKind::Format,
1161    TokenKind::Undef,
1162    TokenKind::Defer,
1163    TokenKind::Assign,
1164    TokenKind::Plus,
1165    TokenKind::Minus,
1166    TokenKind::Star,
1167    TokenKind::Slash,
1168    TokenKind::Percent,
1169    TokenKind::Power,
1170    TokenKind::LeftShift,
1171    TokenKind::RightShift,
1172    TokenKind::BitwiseAnd,
1173    TokenKind::BitwiseOr,
1174    TokenKind::BitwiseXor,
1175    TokenKind::BitwiseNot,
1176    TokenKind::PlusAssign,
1177    TokenKind::MinusAssign,
1178    TokenKind::StarAssign,
1179    TokenKind::SlashAssign,
1180    TokenKind::PercentAssign,
1181    TokenKind::DotAssign,
1182    TokenKind::AndAssign,
1183    TokenKind::OrAssign,
1184    TokenKind::XorAssign,
1185    TokenKind::PowerAssign,
1186    TokenKind::LeftShiftAssign,
1187    TokenKind::RightShiftAssign,
1188    TokenKind::LogicalAndAssign,
1189    TokenKind::LogicalOrAssign,
1190    TokenKind::DefinedOrAssign,
1191    TokenKind::Equal,
1192    TokenKind::NotEqual,
1193    TokenKind::Match,
1194    TokenKind::NotMatch,
1195    TokenKind::SmartMatch,
1196    TokenKind::Less,
1197    TokenKind::Greater,
1198    TokenKind::LessEqual,
1199    TokenKind::GreaterEqual,
1200    TokenKind::Spaceship,
1201    TokenKind::StringCompare,
1202    TokenKind::And,
1203    TokenKind::Or,
1204    TokenKind::Not,
1205    TokenKind::DefinedOr,
1206    TokenKind::WordAnd,
1207    TokenKind::WordOr,
1208    TokenKind::WordNot,
1209    TokenKind::WordXor,
1210    TokenKind::Arrow,
1211    TokenKind::FatArrow,
1212    TokenKind::Dot,
1213    TokenKind::Range,
1214    TokenKind::Ellipsis,
1215    TokenKind::Increment,
1216    TokenKind::Decrement,
1217    TokenKind::DoubleColon,
1218    TokenKind::Question,
1219    TokenKind::Colon,
1220    TokenKind::Backslash,
1221    TokenKind::LeftParen,
1222    TokenKind::RightParen,
1223    TokenKind::LeftBrace,
1224    TokenKind::RightBrace,
1225    TokenKind::LeftBracket,
1226    TokenKind::RightBracket,
1227    TokenKind::Semicolon,
1228    TokenKind::Comma,
1229    TokenKind::Number,
1230    TokenKind::String,
1231    TokenKind::Regex,
1232    TokenKind::Substitution,
1233    TokenKind::Transliteration,
1234    TokenKind::QuoteSingle,
1235    TokenKind::QuoteDouble,
1236    TokenKind::QuoteWords,
1237    TokenKind::QuoteCommand,
1238    TokenKind::HeredocStart,
1239    TokenKind::HeredocBody,
1240    TokenKind::FormatBody,
1241    TokenKind::DataMarker,
1242    TokenKind::DataBody,
1243    TokenKind::VString,
1244    TokenKind::UnknownRest,
1245    TokenKind::HeredocDepthLimit,
1246    TokenKind::Identifier,
1247    TokenKind::ScalarSigil,
1248    TokenKind::ArraySigil,
1249    TokenKind::HashSigil,
1250    TokenKind::SubSigil,
1251    TokenKind::GlobSigil,
1252    TokenKind::Eof,
1253    TokenKind::Unknown,
1254];