Skip to main content

perl_token/
lib.rs

1//! Perl token definitions shared across the parser ecosystem.
2//!
3//! This crate defines [`Token`] and [`TokenKind`], the fundamental types that
4//! flow from the lexer (`perl-lexer`) into the parser (`perl-parser-core`).
5//! Downstream crates re-export these types so consumers rarely need to depend
6//! on `perl-token` directly.
7//!
8//! # Examples
9//!
10//! Create and inspect tokens:
11//!
12//! ```rust
13//! use perl_token::{Token, TokenKind};
14//!
15//! // Create a keyword token for `my`
16//! let token = Token::new(TokenKind::My, "my", 0, 2);
17//! assert_eq!(token.kind, TokenKind::My);
18//! assert_eq!(&*token.text, "my");
19//! assert_eq!(token.start, 0);
20//! assert_eq!(token.end, 2);
21//!
22//! // Create a numeric literal token
23//! let num = Token::new(TokenKind::Number, "42", 7, 9);
24//! assert_eq!(num.kind, TokenKind::Number);
25//! assert_eq!(&*num.text, "42");
26//! ```
27//!
28//! Use [`TokenKind::display_name`] for user-facing error messages:
29//!
30//! ```rust
31//! use perl_token::TokenKind;
32//!
33//! assert_eq!(TokenKind::LeftBrace.display_name(), "'{'");
34//! assert_eq!(TokenKind::Identifier.display_name(), "identifier");
35//! assert_eq!(TokenKind::Eof.display_name(), "end of input");
36//! ```
37
38use std::{ops::Range, sync::Arc};
39
40/// Byte span carried by a [`Token`].
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub struct TokenSpan {
43    /// Starting byte position.
44    pub start: usize,
45    /// Ending byte position.
46    pub end: usize,
47}
48
49impl TokenSpan {
50    /// Create a span from raw byte positions.
51    pub const fn new(start: usize, end: usize) -> Self {
52        Self { start, end }
53    }
54
55    /// Create a span, returning an error when `end < start`.
56    pub fn try_new(start: usize, end: usize) -> Result<Self, TokenSpanError> {
57        if end < start {
58            return Err(TokenSpanError::EndBeforeStart { start, end });
59        }
60
61        Ok(Self { start, end })
62    }
63
64    /// Span length in bytes.
65    pub const fn len(self) -> usize {
66        self.end.saturating_sub(self.start)
67    }
68
69    /// Whether the span length is zero bytes.
70    pub const fn is_empty(self) -> bool {
71        self.len() == 0
72    }
73
74    /// Convert this span to a standard `Range`.
75    pub const fn range(self) -> Range<usize> {
76        self.start..self.end
77    }
78}
79
80/// Error type for checked token/span constructors.
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub enum TokenSpanError {
83    /// End offset is before start offset.
84    EndBeforeStart { start: usize, end: usize },
85    /// Empty span is only valid for EOF or explicit synthetic tokens.
86    EmptySpanNotAllowed { kind: TokenKind, at: usize },
87}
88
89impl std::fmt::Display for TokenSpanError {
90    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91        match self {
92            Self::EndBeforeStart { start, end } => {
93                write!(f, "token span invariant violated: end ({end}) < start ({start})")
94            }
95            Self::EmptySpanNotAllowed { kind, at } => {
96                write!(f, "empty span not allowed for token kind {kind:?} at byte {at}")
97            }
98        }
99    }
100}
101
102impl std::error::Error for TokenSpanError {}
103
104/// Borrowed view over token data for allocation-sensitive paths.
105///
106/// Unlike [`Token`], this type borrows source text and does not allocate.
107/// Convert to [`Token`] explicitly with [`TokenRef::to_owned_token`] or `From`.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109pub struct TokenRef<'src> {
110    /// Token classification for parser decision making
111    pub kind: TokenKind,
112    /// Borrowed source text slice
113    pub text: &'src str,
114    /// Starting byte position for error reporting and location tracking
115    pub start: usize,
116    /// Ending byte position for span calculation and navigation
117    pub end: usize,
118}
119
120impl<'src> TokenRef<'src> {
121    /// Create a borrowed token view with the given kind, source text, and byte span.
122    pub fn new(kind: TokenKind, text: &'src str, start: usize, end: usize) -> Self {
123        Self { kind, text, start, end }
124    }
125
126    /// Create a borrowed token view with checked span ordering.
127    ///
128    /// Unlike [`TokenRef::new`], this rejects spans where `end < start`.
129    pub fn try_new(
130        kind: TokenKind,
131        text: &'src str,
132        start: usize,
133        end: usize,
134    ) -> Result<Self, TokenSpanError> {
135        let span = TokenSpan::try_new(start, end)?;
136        Ok(Self { kind, text, start: span.start, end: span.end })
137    }
138
139    /// Create a borrowed token view while enforcing span invariants.
140    ///
141    /// Rules:
142    /// - `start <= end`
143    /// - zero-length spans are accepted for EOF and explicit synthetic unknown tokens
144    pub fn new_checked(
145        kind: TokenKind,
146        text: &'src str,
147        start: usize,
148        end: usize,
149    ) -> Result<Self, TokenSpanError> {
150        let token = Self::try_new(kind, text, start, end)?;
151        if token.is_empty() && !matches!(token.kind, TokenKind::Eof | TokenKind::Unknown) {
152            return Err(TokenSpanError::EmptySpanNotAllowed { kind: token.kind, at: token.start });
153        }
154
155        Ok(token)
156    }
157
158    /// Return the token span length in bytes.
159    pub fn len(self) -> usize {
160        self.end.saturating_sub(self.start)
161    }
162
163    /// Return whether the token span is empty.
164    pub fn is_empty(self) -> bool {
165        self.len() == 0
166    }
167
168    /// Return the token span as `(start, end)`.
169    pub fn span(self) -> (usize, usize) {
170        (self.start, self.end)
171    }
172
173    /// Return a human-readable display name for this token.
174    pub fn display_name(self) -> &'static str {
175        self.kind.display_name()
176    }
177
178    /// Convert this borrowed token view into an owned [`Token`].
179    pub fn to_owned_token(self) -> Token {
180        Token::new(self.kind, self.text, self.start, self.end)
181    }
182}
183
184/// Token produced by the lexer and consumed by the parser.
185///
186/// Stores the token kind, original source text, and byte span. The text is kept
187/// in an `Arc<str>` so buffering and lookahead can clone tokens cheaply.
188#[derive(Debug, Clone, PartialEq)]
189pub struct Token {
190    /// Token classification for parser decision making
191    pub kind: TokenKind,
192    /// Original source text for precise reconstruction
193    pub text: Arc<str>,
194    /// Starting byte position for error reporting and location tracking
195    pub start: usize,
196    /// Ending byte position for span calculation and navigation
197    pub end: usize,
198}
199
200impl Token {
201    /// Create a new token with the given kind, source text, and byte span.
202    ///
203    /// # Examples
204    ///
205    /// ```rust
206    /// use perl_token::{Token, TokenKind};
207    ///
208    /// let tok = Token::new(TokenKind::Sub, "sub", 0, 3);
209    /// assert_eq!(tok.kind, TokenKind::Sub);
210    /// assert_eq!(&*tok.text, "sub");
211    /// ```
212    pub fn new(kind: TokenKind, text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
213        Token { kind, text: text.into(), start, end }
214    }
215
216    /// Create a token with checked span ordering.
217    ///
218    /// Unlike [`Token::new`], this rejects spans where `end < start`.
219    pub fn try_new(
220        kind: TokenKind,
221        text: impl Into<Arc<str>>,
222        start: usize,
223        end: usize,
224    ) -> Result<Self, TokenSpanError> {
225        let span = TokenSpan::try_new(start, end)?;
226        Ok(Self { kind, text: text.into(), start: span.start, end: span.end })
227    }
228
229    /// Create a token while enforcing span invariants.
230    ///
231    /// Rules:
232    /// - `start <= end`
233    /// - zero-length spans are accepted for EOF and explicit synthetic unknown tokens
234    pub fn new_checked(
235        kind: TokenKind,
236        text: impl Into<Arc<str>>,
237        start: usize,
238        end: usize,
239    ) -> Result<Self, TokenSpanError> {
240        let token = Self::try_new(kind, text, start, end)?;
241        if token.is_empty() && !matches!(token.kind, TokenKind::Eof | TokenKind::Unknown) {
242            return Err(TokenSpanError::EmptySpanNotAllowed { kind: token.kind, at: token.start });
243        }
244
245        Ok(token)
246    }
247
248    /// Create an EOF token at `pos`.
249    pub fn eof_at(pos: usize) -> Self {
250        Self::new(TokenKind::Eof, "", pos, pos)
251    }
252
253    /// Create an unknown (synthetic) token at `start..end`.
254    pub fn unknown_at(text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
255        let bounded_end = end.max(start);
256        Self::new(TokenKind::Unknown, text, start, bounded_end)
257    }
258
259    /// Return this token's byte span.
260    pub fn span(&self) -> TokenSpan {
261        TokenSpan::new(self.start, self.end)
262    }
263
264    /// Return this token's byte span as `Range<usize>`.
265    pub fn range(&self) -> Range<usize> {
266        self.span().range()
267    }
268
269    /// Clone this token with a new checked span.
270    pub fn with_span(&self, start: usize, end: usize) -> Result<Self, TokenSpanError> {
271        Self::new_checked(self.kind, self.text.clone(), start, end)
272    }
273
274    /// Clone this token with a new token kind.
275    pub fn with_kind(&self, kind: TokenKind) -> Self {
276        Self::new(kind, self.text.clone(), self.start, self.end)
277    }
278
279    /// Return the token span length in bytes.
280    ///
281    /// This uses saturating subtraction so malformed spans (where `end < start`)
282    /// are treated as zero-length instead of underflowing.
283    ///
284    /// # Examples
285    ///
286    /// ```rust
287    /// use perl_token::{Token, TokenKind};
288    ///
289    /// let tok = Token::new(TokenKind::Identifier, "foo", 10, 13);
290    /// assert_eq!(tok.len(), 3);
291    /// ```
292    pub fn len(&self) -> usize {
293        self.end.saturating_sub(self.start)
294    }
295
296    /// Return whether the token span is empty.
297    ///
298    /// # Examples
299    ///
300    /// ```rust
301    /// use perl_token::{Token, TokenKind};
302    ///
303    /// let tok = Token::new(TokenKind::Eof, "", 8, 8);
304    /// assert!(tok.is_empty());
305    /// ```
306    pub fn is_empty(&self) -> bool {
307        self.len() == 0
308    }
309
310    /// Return a human-readable display name for this token.
311    pub fn display_name(&self) -> &'static str {
312        self.kind.display_name()
313    }
314
315    /// Return a borrowed token view over this token.
316    pub fn as_ref_token(&self) -> TokenRef<'_> {
317        TokenRef { kind: self.kind, text: self.text.as_ref(), start: self.start, end: self.end }
318    }
319}
320
321impl From<TokenRef<'_>> for Token {
322    fn from(value: TokenRef<'_>) -> Self {
323        value.to_owned_token()
324    }
325}
326
327/// Token classification for Perl parsing.
328///
329/// The set is intentionally simplified for fast parser matching while covering
330/// keywords, operators, delimiters, literals, identifiers, and special tokens.
331///
332/// Use [`TokenKind::display_name`] to get a human-readable string suitable for
333/// error messages shown to the user.
334///
335/// # Categories
336///
337/// | Group | Examples |
338/// |-------|----------|
339/// | Keywords | [`My`](Self::My), [`Sub`](Self::Sub), [`If`](Self::If), ... |
340/// | Operators | [`Plus`](Self::Plus), [`Arrow`](Self::Arrow), [`And`](Self::And), ... |
341/// | Delimiters | [`LeftParen`](Self::LeftParen), [`LeftBrace`](Self::LeftBrace), ... |
342/// | Literals | [`Number`](Self::Number), [`String`](Self::String), [`Regex`](Self::Regex), ... |
343/// | Identifiers | [`Identifier`](Self::Identifier), [`ScalarSigil`](Self::ScalarSigil), ... |
344/// | Special | [`Eof`](Self::Eof), [`Unknown`](Self::Unknown) |
345#[derive(Debug, Clone, Copy, PartialEq, Eq)]
346pub enum TokenKind {
347    // ===== Keywords =====
348    /// Lexical variable declaration: `my $x`
349    My,
350    /// Package variable declaration: `our $x`
351    Our,
352    /// Dynamic scoping: `local $x`
353    Local,
354    /// Persistent variable: `state $x`
355    State,
356    /// Subroutine declaration: `sub foo`
357    Sub,
358    /// Conditional: `if (cond)`
359    If,
360    /// Else-if conditional: `elsif (cond)`
361    Elsif,
362    /// Else branch: `else { }`
363    Else,
364    /// Negated conditional: `unless (cond)`
365    Unless,
366    /// While loop: `while (cond)`
367    While,
368    /// Until loop: `until (cond)`
369    Until,
370    /// C-style for loop: `for (init; cond; update)`
371    For,
372    /// Iterator loop: `foreach $x (@list)`
373    Foreach,
374    /// Return statement: `return $value`
375    Return,
376    /// Package declaration: `package Foo`
377    Package,
378    /// Module import: `use Module`
379    Use,
380    /// Disable pragma/module: `no strict`
381    No,
382    /// Compile-time block: `BEGIN { }`
383    Begin,
384    /// Exit-time block: `END { }`
385    End,
386    /// Check phase block: `CHECK { }`
387    Check,
388    /// Init phase block: `INIT { }`
389    Init,
390    /// Unit check block: `UNITCHECK { }`
391    Unitcheck,
392    /// Exception handling: `eval { }`
393    Eval,
394    /// Block execution: `do { }` or `do "file"`
395    Do,
396    /// Switch expression: `given ($x)`
397    Given,
398    /// Case clause: `when ($pattern)`
399    When,
400    /// Default case: `default { }`
401    Default,
402    /// Try block: `try { }`
403    Try,
404    /// Catch block: `catch ($e) { }`
405    Catch,
406    /// Finally block: `finally { }`
407    Finally,
408    /// Continue block: `continue { }`
409    Continue,
410    /// Loop control: `next`
411    Next,
412    /// Loop control: `last`
413    Last,
414    /// Loop control: `redo`
415    Redo,
416    /// Goto statement: `goto LABEL`, `goto &sub`, `goto EXPR`
417    Goto,
418    /// Class declaration (5.38+): `class Foo`
419    Class,
420    /// Method declaration (5.38+): `method foo`
421    Method,
422    /// Class field declaration (5.38+): `field $name`
423    Field,
424    /// Format declaration: `format STDOUT =`
425    Format,
426    /// Undefined value: `undef`
427    Undef,
428    /// Defer block: `defer { ... }` (Perl 5.36+ experimental, stable in 5.40)
429    Defer,
430
431    // ===== Operators =====
432    /// Assignment: `=`
433    Assign,
434    /// Addition: `+`
435    Plus,
436    /// Subtraction: `-`
437    Minus,
438    /// Multiplication: `*`
439    Star,
440    /// Division: `/`
441    Slash,
442    /// Modulo: `%`
443    Percent,
444    /// Exponentiation: `**`
445    Power,
446    /// Left bit shift: `<<`
447    LeftShift,
448    /// Right bit shift: `>>`
449    RightShift,
450    /// Bitwise AND: `&`
451    BitwiseAnd,
452    /// Bitwise OR: `|`
453    BitwiseOr,
454    /// Bitwise XOR: `^`
455    BitwiseXor,
456    /// Bitwise NOT: `~`
457    BitwiseNot,
458    /// Add and assign: `+=`
459    PlusAssign,
460    /// Subtract and assign: `-=`
461    MinusAssign,
462    /// Multiply and assign: `*=`
463    StarAssign,
464    /// Divide and assign: `/=`
465    SlashAssign,
466    /// Modulo and assign: `%=`
467    PercentAssign,
468    /// Concatenate and assign: `.=`
469    DotAssign,
470    /// Bitwise AND and assign: `&=`
471    AndAssign,
472    /// Bitwise OR and assign: `|=`
473    OrAssign,
474    /// Bitwise XOR and assign: `^=`
475    XorAssign,
476    /// Power and assign: `**=`
477    PowerAssign,
478    /// Left shift and assign: `<<=`
479    LeftShiftAssign,
480    /// Right shift and assign: `>>=`
481    RightShiftAssign,
482    /// Logical AND and assign: `&&=`
483    LogicalAndAssign,
484    /// Logical OR and assign: `||=`
485    LogicalOrAssign,
486    /// Defined-or and assign: `//=`
487    DefinedOrAssign,
488    /// Numeric equality: `==`
489    Equal,
490    /// Numeric inequality: `!=`
491    NotEqual,
492    /// Pattern match binding: `=~`
493    Match,
494    /// Negated pattern match: `!~`
495    NotMatch,
496    /// Smart match: `~~`
497    SmartMatch,
498    /// Less than: `<`
499    Less,
500    /// Greater than: `>`
501    Greater,
502    /// Less than or equal: `<=`
503    LessEqual,
504    /// Greater than or equal: `>=`
505    GreaterEqual,
506    /// Numeric comparison (spaceship): `<=>`
507    Spaceship,
508    /// String comparison: `cmp`
509    StringCompare,
510    /// Logical AND: `&&`
511    And,
512    /// Logical OR: `||`
513    Or,
514    /// Logical NOT: `!`
515    Not,
516    /// Defined-or: `//`
517    DefinedOr,
518    /// Word AND operator: `and`
519    WordAnd,
520    /// Word OR operator: `or`
521    WordOr,
522    /// Word NOT operator: `not`
523    WordNot,
524    /// Word XOR operator: `xor`
525    WordXor,
526    /// Method/dereference arrow: `->`
527    Arrow,
528    /// Hash key separator: `=>`
529    FatArrow,
530    /// String concatenation: `.`
531    Dot,
532    /// Range operator: `..`
533    Range,
534    /// Yada-yada (unimplemented): `...`
535    Ellipsis,
536    /// Increment: `++`
537    Increment,
538    /// Decrement: `--`
539    Decrement,
540    /// Package separator: `::`
541    DoubleColon,
542    /// Ternary condition: `?`
543    Question,
544    /// Ternary/label separator: `:`
545    Colon,
546    /// Reference operator: `\`
547    Backslash,
548
549    // ===== Delimiters =====
550    /// Left parenthesis: `(`
551    LeftParen,
552    /// Right parenthesis: `)`
553    RightParen,
554    /// Left brace: `{`
555    LeftBrace,
556    /// Right brace: `}`
557    RightBrace,
558    /// Left bracket: `[`
559    LeftBracket,
560    /// Right bracket: `]`
561    RightBracket,
562    /// Statement terminator: `;`
563    Semicolon,
564    /// List separator: `,`
565    Comma,
566
567    // ===== Literals =====
568    /// Numeric literal: `42`, `3.14`, `0xFF`
569    Number,
570    /// String literal: `"hello"` or `'world'`
571    String,
572    /// Regular expression: `/pattern/flags`
573    Regex,
574    /// Substitution: `s/pattern/replacement/flags`
575    Substitution,
576    /// Transliteration: `tr/abc/xyz/` or `y///`
577    Transliteration,
578    /// Single-quoted string: `q/text/`
579    QuoteSingle,
580    /// Double-quoted string: `qq/text/`
581    QuoteDouble,
582    /// Quote words: `qw(list of words)`
583    QuoteWords,
584    /// Backtick command: `` `cmd` `` or `qx/cmd/`
585    QuoteCommand,
586    /// Heredoc start marker: `<<EOF`
587    HeredocStart,
588    /// Heredoc content body
589    HeredocBody,
590    /// Format specification body
591    FormatBody,
592    /// Data section marker: `__DATA__` or `__END__`
593    DataMarker,
594    /// Data section content
595    DataBody,
596    /// Version string literal: `v5.26.0`, `v5.10`
597    VString,
598    /// Unparsed remainder (budget exceeded)
599    UnknownRest,
600    /// Heredoc depth limit exceeded (special error token)
601    HeredocDepthLimit,
602
603    // ===== Identifiers and Variables =====
604    /// Bareword identifier or function name
605    Identifier,
606    /// Scalar sigil: `$`
607    ScalarSigil,
608    /// Array sigil: `@`
609    ArraySigil,
610    /// Hash sigil: `%`
611    HashSigil,
612    /// Subroutine sigil: `&`
613    SubSigil,
614    /// Glob/typeglob sigil: `*`
615    GlobSigil,
616
617    // ===== Special =====
618    /// End of file/input
619    Eof,
620    /// Unknown/unrecognized token
621    Unknown,
622}
623
624/// Broad classification used for token metadata and conformance checks.
625#[derive(Debug, Clone, Copy, PartialEq, Eq)]
626pub enum TokenCategory {
627    /// Reserved words and language keywords.
628    Keyword,
629    /// Operators and symbolic/word forms.
630    Operator,
631    /// Grouping and punctuation delimiters.
632    Delimiter,
633    /// Literal-like lexical forms.
634    Literal,
635    /// Identifiers and sigils.
636    Identifier,
637    /// Special sentinel and recovery tokens.
638    Special,
639}
640
641/// Metadata associated with each [`TokenKind`] variant.
642#[derive(Debug, Clone, Copy, PartialEq, Eq)]
643pub struct TokenKindMetadata {
644    /// Stable category used in docs/tests/gates.
645    pub category: TokenCategory,
646    /// User-facing display label for diagnostics.
647    pub display_name: &'static str,
648}
649
650impl TokenKind {
651    /// Return every [`TokenKind`] variant in stable declaration order.
652    pub const fn all() -> &'static [TokenKind] {
653        &TOKEN_KIND_ALL
654    }
655
656    /// Number of token kinds expected to have metadata coverage.
657    pub const fn metadata_count() -> usize {
658        TOKEN_KIND_ALL.len()
659    }
660
661    /// Return compact metadata for this token kind.
662    pub fn metadata(self) -> TokenKindMetadata {
663        TokenKindMetadata { category: self.category(), display_name: self.display_name() }
664    }
665
666    /// Return the high-level category for this token kind.
667    pub const fn category(self) -> TokenCategory {
668        match self {
669            TokenKind::My
670            | TokenKind::Our
671            | TokenKind::Local
672            | TokenKind::State
673            | TokenKind::Sub
674            | TokenKind::If
675            | TokenKind::Elsif
676            | TokenKind::Else
677            | TokenKind::Unless
678            | TokenKind::While
679            | TokenKind::Until
680            | TokenKind::For
681            | TokenKind::Foreach
682            | TokenKind::Return
683            | TokenKind::Package
684            | TokenKind::Use
685            | TokenKind::No
686            | TokenKind::Begin
687            | TokenKind::End
688            | TokenKind::Check
689            | TokenKind::Init
690            | TokenKind::Unitcheck
691            | TokenKind::Eval
692            | TokenKind::Do
693            | TokenKind::Given
694            | TokenKind::When
695            | TokenKind::Default
696            | TokenKind::Try
697            | TokenKind::Catch
698            | TokenKind::Finally
699            | TokenKind::Continue
700            | TokenKind::Next
701            | TokenKind::Last
702            | TokenKind::Redo
703            | TokenKind::Goto
704            | TokenKind::Class
705            | TokenKind::Method
706            | TokenKind::Field
707            | TokenKind::Format
708            | TokenKind::Undef
709            | TokenKind::Defer => TokenCategory::Keyword,
710            TokenKind::Assign
711            | TokenKind::Plus
712            | TokenKind::Minus
713            | TokenKind::Star
714            | TokenKind::Slash
715            | TokenKind::Percent
716            | TokenKind::Power
717            | TokenKind::LeftShift
718            | TokenKind::RightShift
719            | TokenKind::BitwiseAnd
720            | TokenKind::BitwiseOr
721            | TokenKind::BitwiseXor
722            | TokenKind::BitwiseNot
723            | TokenKind::PlusAssign
724            | TokenKind::MinusAssign
725            | TokenKind::StarAssign
726            | TokenKind::SlashAssign
727            | TokenKind::PercentAssign
728            | TokenKind::DotAssign
729            | TokenKind::AndAssign
730            | TokenKind::OrAssign
731            | TokenKind::XorAssign
732            | TokenKind::PowerAssign
733            | TokenKind::LeftShiftAssign
734            | TokenKind::RightShiftAssign
735            | TokenKind::LogicalAndAssign
736            | TokenKind::LogicalOrAssign
737            | TokenKind::DefinedOrAssign
738            | TokenKind::Equal
739            | TokenKind::NotEqual
740            | TokenKind::Match
741            | TokenKind::NotMatch
742            | TokenKind::SmartMatch
743            | TokenKind::Less
744            | TokenKind::Greater
745            | TokenKind::LessEqual
746            | TokenKind::GreaterEqual
747            | TokenKind::Spaceship
748            | TokenKind::StringCompare
749            | TokenKind::And
750            | TokenKind::Or
751            | TokenKind::Not
752            | TokenKind::DefinedOr
753            | TokenKind::WordAnd
754            | TokenKind::WordOr
755            | TokenKind::WordNot
756            | TokenKind::WordXor
757            | TokenKind::Arrow
758            | TokenKind::FatArrow
759            | TokenKind::Dot
760            | TokenKind::Range
761            | TokenKind::Ellipsis
762            | TokenKind::Increment
763            | TokenKind::Decrement
764            | TokenKind::DoubleColon
765            | TokenKind::Question
766            | TokenKind::Colon
767            | TokenKind::Backslash => TokenCategory::Operator,
768            TokenKind::LeftParen
769            | TokenKind::RightParen
770            | TokenKind::LeftBrace
771            | TokenKind::RightBrace
772            | TokenKind::LeftBracket
773            | TokenKind::RightBracket
774            | TokenKind::Semicolon
775            | TokenKind::Comma => TokenCategory::Delimiter,
776            TokenKind::Number
777            | TokenKind::String
778            | TokenKind::Regex
779            | TokenKind::Substitution
780            | TokenKind::Transliteration
781            | TokenKind::QuoteSingle
782            | TokenKind::QuoteDouble
783            | TokenKind::QuoteWords
784            | TokenKind::QuoteCommand
785            | TokenKind::HeredocStart
786            | TokenKind::HeredocBody
787            | TokenKind::FormatBody
788            | TokenKind::DataMarker
789            | TokenKind::DataBody
790            | TokenKind::VString
791            | TokenKind::UnknownRest
792            | TokenKind::HeredocDepthLimit => TokenCategory::Literal,
793            TokenKind::Identifier
794            | TokenKind::ScalarSigil
795            | TokenKind::ArraySigil
796            | TokenKind::HashSigil
797            | TokenKind::SubSigil
798            | TokenKind::GlobSigil => TokenCategory::Identifier,
799            TokenKind::Eof | TokenKind::Unknown => TokenCategory::Special,
800        }
801    }
802
803    // --- Category-based predicates (classify by TokenCategory) ---
804
805    /// Returns `true` if this token kind is a keyword.
806    pub const fn is_keyword(self) -> bool {
807        matches!(self.category(), TokenCategory::Keyword)
808    }
809
810    /// Returns `true` if this token kind is an operator.
811    pub const fn is_operator(self) -> bool {
812        matches!(self.category(), TokenCategory::Operator)
813    }
814
815    /// Returns `true` if this token kind is a literal.
816    pub const fn is_literal(self) -> bool {
817        matches!(self.category(), TokenCategory::Literal)
818    }
819
820    /// Returns `true` if this token kind is a delimiter.
821    pub const fn is_delimiter(self) -> bool {
822        matches!(self.category(), TokenCategory::Delimiter)
823    }
824
825    /// Returns `true` if this token kind is an identifier or sigil.
826    pub const fn is_identifier(self) -> bool {
827        matches!(self.category(), TokenCategory::Identifier)
828    }
829
830    /// Returns `true` if this token kind is a special sentinel/recovery token.
831    pub const fn is_special(self) -> bool {
832        matches!(self.category(), TokenCategory::Special)
833    }
834
835    // --- Parser-facing role predicates (specific semantic roles) ---
836
837    /// Return whether this token is an assignment operator.
838    #[inline]
839    pub fn is_assignment_operator(self) -> bool {
840        matches!(
841            self,
842            TokenKind::Assign
843                | TokenKind::PlusAssign
844                | TokenKind::MinusAssign
845                | TokenKind::StarAssign
846                | TokenKind::SlashAssign
847                | TokenKind::PercentAssign
848                | TokenKind::DotAssign
849                | TokenKind::AndAssign
850                | TokenKind::OrAssign
851                | TokenKind::XorAssign
852                | TokenKind::PowerAssign
853                | TokenKind::LeftShiftAssign
854                | TokenKind::RightShiftAssign
855                | TokenKind::LogicalAndAssign
856                | TokenKind::LogicalOrAssign
857                | TokenKind::DefinedOrAssign
858        )
859    }
860
861    /// Return whether this token is a comparison operator.
862    #[inline]
863    pub fn is_comparison_operator(self) -> bool {
864        matches!(
865            self,
866            TokenKind::Equal
867                | TokenKind::NotEqual
868                | TokenKind::Less
869                | TokenKind::Greater
870                | TokenKind::LessEqual
871                | TokenKind::GreaterEqual
872                | TokenKind::Spaceship
873                | TokenKind::StringCompare
874                | TokenKind::Match
875                | TokenKind::NotMatch
876                | TokenKind::SmartMatch
877        )
878    }
879
880    /// Return whether this token is a logical operator.
881    #[inline]
882    pub fn is_logical_operator(self) -> bool {
883        matches!(
884            self,
885            TokenKind::And
886                | TokenKind::Or
887                | TokenKind::Not
888                | TokenKind::DefinedOr
889                | TokenKind::WordAnd
890                | TokenKind::WordOr
891                | TokenKind::WordNot
892                | TokenKind::WordXor
893        )
894    }
895
896    /// Return whether this token is a word-form operator token.
897    #[inline]
898    pub fn is_word_operator(self) -> bool {
899        matches!(
900            self,
901            TokenKind::StringCompare
902                | TokenKind::WordAnd
903                | TokenKind::WordOr
904                | TokenKind::WordNot
905                | TokenKind::WordXor
906        )
907    }
908
909    /// Return whether this token is a low-precedence word operator.
910    #[inline]
911    pub fn is_low_precedence_word_operator(self) -> bool {
912        matches!(
913            self,
914            TokenKind::WordAnd | TokenKind::WordOr | TokenKind::WordNot | TokenKind::WordXor
915        )
916    }
917
918    /// Return whether this token is an opening paired delimiter.
919    #[inline]
920    pub fn is_open_delimiter(self) -> bool {
921        matches!(self, TokenKind::LeftParen | TokenKind::LeftBrace | TokenKind::LeftBracket)
922    }
923
924    /// Return whether this token is a closing paired delimiter.
925    #[inline]
926    pub fn is_close_delimiter(self) -> bool {
927        matches!(self, TokenKind::RightParen | TokenKind::RightBrace | TokenKind::RightBracket)
928    }
929
930    /// Return the matching paired delimiter for this token, if any.
931    #[inline]
932    pub fn matching_delimiter(self) -> Option<Self> {
933        match self {
934            TokenKind::LeftParen => Some(TokenKind::RightParen),
935            TokenKind::RightParen => Some(TokenKind::LeftParen),
936            TokenKind::LeftBrace => Some(TokenKind::RightBrace),
937            TokenKind::RightBrace => Some(TokenKind::LeftBrace),
938            TokenKind::LeftBracket => Some(TokenKind::RightBracket),
939            TokenKind::RightBracket => Some(TokenKind::LeftBracket),
940            _ => None,
941        }
942    }
943
944    /// Return whether this token is quote-like syntax.
945    #[inline]
946    pub fn is_quote_like(self) -> bool {
947        matches!(
948            self,
949            TokenKind::Regex
950                | TokenKind::Substitution
951                | TokenKind::Transliteration
952                | TokenKind::QuoteSingle
953                | TokenKind::QuoteDouble
954                | TokenKind::QuoteWords
955                | TokenKind::QuoteCommand
956                | TokenKind::HeredocStart
957        )
958    }
959
960    /// Return whether this token is a hard recovery boundary.
961    #[inline]
962    pub fn is_recovery_boundary(self) -> bool {
963        self == TokenKind::Semicolon || self.is_close_delimiter() || self == TokenKind::Eof
964    }
965
966    /// Map a canonical keyword spelling to its [`TokenKind`].
967    ///
968    /// This mapping is case-sensitive and only recognizes canonical Perl
969    /// spellings used by the lexer/parser pipeline.
970    pub fn from_keyword(spelling: &str) -> Option<TokenKind> {
971        match spelling {
972            "my" => Some(TokenKind::My),
973            "our" => Some(TokenKind::Our),
974            "local" => Some(TokenKind::Local),
975            "state" => Some(TokenKind::State),
976            "sub" => Some(TokenKind::Sub),
977            "if" => Some(TokenKind::If),
978            "elsif" => Some(TokenKind::Elsif),
979            "else" => Some(TokenKind::Else),
980            "unless" => Some(TokenKind::Unless),
981            "while" => Some(TokenKind::While),
982            "until" => Some(TokenKind::Until),
983            "for" => Some(TokenKind::For),
984            "foreach" => Some(TokenKind::Foreach),
985            "return" => Some(TokenKind::Return),
986            "package" => Some(TokenKind::Package),
987            "use" => Some(TokenKind::Use),
988            "no" => Some(TokenKind::No),
989            "BEGIN" => Some(TokenKind::Begin),
990            "END" => Some(TokenKind::End),
991            "CHECK" => Some(TokenKind::Check),
992            "INIT" => Some(TokenKind::Init),
993            "UNITCHECK" => Some(TokenKind::Unitcheck),
994            "eval" => Some(TokenKind::Eval),
995            "do" => Some(TokenKind::Do),
996            "given" => Some(TokenKind::Given),
997            "when" => Some(TokenKind::When),
998            "default" => Some(TokenKind::Default),
999            "try" => Some(TokenKind::Try),
1000            "catch" => Some(TokenKind::Catch),
1001            "finally" => Some(TokenKind::Finally),
1002            "continue" => Some(TokenKind::Continue),
1003            "next" => Some(TokenKind::Next),
1004            "last" => Some(TokenKind::Last),
1005            "redo" => Some(TokenKind::Redo),
1006            "goto" => Some(TokenKind::Goto),
1007            "class" => Some(TokenKind::Class),
1008            "method" => Some(TokenKind::Method),
1009            "field" => Some(TokenKind::Field),
1010            "format" => Some(TokenKind::Format),
1011            "undef" => Some(TokenKind::Undef),
1012            "defer" => Some(TokenKind::Defer),
1013            // Word operators are emitted as Keyword tokens by the lexer.
1014            "and" => Some(TokenKind::WordAnd),
1015            "or" => Some(TokenKind::WordOr),
1016            "not" => Some(TokenKind::WordNot),
1017            "xor" => Some(TokenKind::WordXor),
1018            "cmp" => Some(TokenKind::StringCompare),
1019            _ => None,
1020        }
1021    }
1022
1023    /// Map a canonical operator spelling to its [`TokenKind`].
1024    ///
1025    /// This mapping is case-sensitive.
1026    pub fn from_operator(spelling: &str) -> Option<TokenKind> {
1027        match spelling {
1028            "=" => Some(TokenKind::Assign),
1029            "+" => Some(TokenKind::Plus),
1030            "-" => Some(TokenKind::Minus),
1031            "*" => Some(TokenKind::Star),
1032            "/" => Some(TokenKind::Slash),
1033            "%" => Some(TokenKind::Percent),
1034            "**" => Some(TokenKind::Power),
1035            "<<" => Some(TokenKind::LeftShift),
1036            ">>" => Some(TokenKind::RightShift),
1037            "&" => Some(TokenKind::BitwiseAnd),
1038            "|" => Some(TokenKind::BitwiseOr),
1039            "^" => Some(TokenKind::BitwiseXor),
1040            "~" => Some(TokenKind::BitwiseNot),
1041            "+=" => Some(TokenKind::PlusAssign),
1042            "-=" => Some(TokenKind::MinusAssign),
1043            "*=" => Some(TokenKind::StarAssign),
1044            "/=" => Some(TokenKind::SlashAssign),
1045            "%=" => Some(TokenKind::PercentAssign),
1046            ".=" => Some(TokenKind::DotAssign),
1047            "&=" => Some(TokenKind::AndAssign),
1048            "|=" => Some(TokenKind::OrAssign),
1049            "^=" => Some(TokenKind::XorAssign),
1050            "**=" => Some(TokenKind::PowerAssign),
1051            "<<=" => Some(TokenKind::LeftShiftAssign),
1052            ">>=" => Some(TokenKind::RightShiftAssign),
1053            "&&=" => Some(TokenKind::LogicalAndAssign),
1054            "||=" => Some(TokenKind::LogicalOrAssign),
1055            "//=" => Some(TokenKind::DefinedOrAssign),
1056            "==" => Some(TokenKind::Equal),
1057            "!=" => Some(TokenKind::NotEqual),
1058            "=~" => Some(TokenKind::Match),
1059            "!~" => Some(TokenKind::NotMatch),
1060            "~~" => Some(TokenKind::SmartMatch),
1061            "<" => Some(TokenKind::Less),
1062            ">" => Some(TokenKind::Greater),
1063            "<=" => Some(TokenKind::LessEqual),
1064            ">=" => Some(TokenKind::GreaterEqual),
1065            "<=>" => Some(TokenKind::Spaceship),
1066            "&&" => Some(TokenKind::And),
1067            "||" => Some(TokenKind::Or),
1068            "!" => Some(TokenKind::Not),
1069            "//" => Some(TokenKind::DefinedOr),
1070            "->" => Some(TokenKind::Arrow),
1071            "=>" => Some(TokenKind::FatArrow),
1072            "." => Some(TokenKind::Dot),
1073            ".." => Some(TokenKind::Range),
1074            "..." => Some(TokenKind::Ellipsis),
1075            "++" => Some(TokenKind::Increment),
1076            "--" => Some(TokenKind::Decrement),
1077            "::" => Some(TokenKind::DoubleColon),
1078            "?" => Some(TokenKind::Question),
1079            ":" => Some(TokenKind::Colon),
1080            "\\" => Some(TokenKind::Backslash),
1081            _ => None,
1082        }
1083    }
1084
1085    /// Map a delimiter spelling to its [`TokenKind`].
1086    pub fn from_delimiter(spelling: &str) -> Option<TokenKind> {
1087        match spelling {
1088            "(" => Some(TokenKind::LeftParen),
1089            ")" => Some(TokenKind::RightParen),
1090            "{" => Some(TokenKind::LeftBrace),
1091            "}" => Some(TokenKind::RightBrace),
1092            "[" => Some(TokenKind::LeftBracket),
1093            "]" => Some(TokenKind::RightBracket),
1094            ";" => Some(TokenKind::Semicolon),
1095            "," => Some(TokenKind::Comma),
1096            _ => None,
1097        }
1098    }
1099
1100    /// Map a sigil spelling to its [`TokenKind`].
1101    pub fn from_sigil(spelling: &str) -> Option<TokenKind> {
1102        match spelling {
1103            "$" => Some(TokenKind::ScalarSigil),
1104            "@" => Some(TokenKind::ArraySigil),
1105            "%" => Some(TokenKind::HashSigil),
1106            "&" => Some(TokenKind::SubSigil),
1107            "*" => Some(TokenKind::GlobSigil),
1108            _ => None,
1109        }
1110    }
1111
1112    /// Return a user-friendly display name for this token kind.
1113    ///
1114    /// These names appear in parser error messages shown in the editor.
1115    /// They use the actual Perl syntax (e.g. `}` instead of `RightBrace`)
1116    /// so users can immediately understand what the parser expected.
1117    ///
1118    /// # Examples
1119    ///
1120    /// ```rust
1121    /// use perl_token::TokenKind;
1122    ///
1123    /// assert_eq!(TokenKind::Semicolon.display_name(), "';'");
1124    /// assert_eq!(TokenKind::Sub.display_name(), "'sub'");
1125    /// assert_eq!(TokenKind::Number.display_name(), "number");
1126    /// ```
1127    pub fn display_name(self) -> &'static str {
1128        match self {
1129            // Keywords
1130            TokenKind::My => "'my'",
1131            TokenKind::Our => "'our'",
1132            TokenKind::Local => "'local'",
1133            TokenKind::State => "'state'",
1134            TokenKind::Sub => "'sub'",
1135            TokenKind::If => "'if'",
1136            TokenKind::Elsif => "'elsif'",
1137            TokenKind::Else => "'else'",
1138            TokenKind::Unless => "'unless'",
1139            TokenKind::While => "'while'",
1140            TokenKind::Until => "'until'",
1141            TokenKind::For => "'for'",
1142            TokenKind::Foreach => "'foreach'",
1143            TokenKind::Return => "'return'",
1144            TokenKind::Package => "'package'",
1145            TokenKind::Use => "'use'",
1146            TokenKind::No => "'no'",
1147            TokenKind::Begin => "'BEGIN'",
1148            TokenKind::End => "'END'",
1149            TokenKind::Check => "'CHECK'",
1150            TokenKind::Init => "'INIT'",
1151            TokenKind::Unitcheck => "'UNITCHECK'",
1152            TokenKind::Eval => "'eval'",
1153            TokenKind::Do => "'do'",
1154            TokenKind::Given => "'given'",
1155            TokenKind::When => "'when'",
1156            TokenKind::Default => "'default'",
1157            TokenKind::Try => "'try'",
1158            TokenKind::Catch => "'catch'",
1159            TokenKind::Finally => "'finally'",
1160            TokenKind::Continue => "'continue'",
1161            TokenKind::Next => "'next'",
1162            TokenKind::Last => "'last'",
1163            TokenKind::Redo => "'redo'",
1164            TokenKind::Goto => "'goto'",
1165            TokenKind::Class => "'class'",
1166            TokenKind::Method => "'method'",
1167            TokenKind::Field => "'field'",
1168            TokenKind::Format => "'format'",
1169            TokenKind::Undef => "'undef'",
1170            TokenKind::Defer => "'defer'",
1171
1172            // Operators
1173            TokenKind::Assign => "'='",
1174            TokenKind::Plus => "'+'",
1175            TokenKind::Minus => "'-'",
1176            TokenKind::Star => "'*'",
1177            TokenKind::Slash => "'/'",
1178            TokenKind::Percent => "'%'",
1179            TokenKind::Power => "'**'",
1180            TokenKind::LeftShift => "'<<'",
1181            TokenKind::RightShift => "'>>'",
1182            TokenKind::BitwiseAnd => "'&'",
1183            TokenKind::BitwiseOr => "'|'",
1184            TokenKind::BitwiseXor => "'^'",
1185            TokenKind::BitwiseNot => "'~'",
1186            TokenKind::PlusAssign => "'+='",
1187            TokenKind::MinusAssign => "'-='",
1188            TokenKind::StarAssign => "'*='",
1189            TokenKind::SlashAssign => "'/='",
1190            TokenKind::PercentAssign => "'%='",
1191            TokenKind::DotAssign => "'.='",
1192            TokenKind::AndAssign => "'&='",
1193            TokenKind::OrAssign => "'|='",
1194            TokenKind::XorAssign => "'^='",
1195            TokenKind::PowerAssign => "'**='",
1196            TokenKind::LeftShiftAssign => "'<<='",
1197            TokenKind::RightShiftAssign => "'>>='",
1198            TokenKind::LogicalAndAssign => "'&&='",
1199            TokenKind::LogicalOrAssign => "'||='",
1200            TokenKind::DefinedOrAssign => "'//='",
1201            TokenKind::Equal => "'=='",
1202            TokenKind::NotEqual => "'!='",
1203            TokenKind::Match => "'=~'",
1204            TokenKind::NotMatch => "'!~'",
1205            TokenKind::SmartMatch => "'~~'",
1206            TokenKind::Less => "'<'",
1207            TokenKind::Greater => "'>'",
1208            TokenKind::LessEqual => "'<='",
1209            TokenKind::GreaterEqual => "'>='",
1210            TokenKind::Spaceship => "'<=>'",
1211            TokenKind::StringCompare => "'cmp'",
1212            TokenKind::And => "'&&'",
1213            TokenKind::Or => "'||'",
1214            TokenKind::Not => "'!'",
1215            TokenKind::DefinedOr => "'//'",
1216            TokenKind::WordAnd => "'and'",
1217            TokenKind::WordOr => "'or'",
1218            TokenKind::WordNot => "'not'",
1219            TokenKind::WordXor => "'xor'",
1220            TokenKind::Arrow => "'->'",
1221            TokenKind::FatArrow => "'=>'",
1222            TokenKind::Dot => "'.'",
1223            TokenKind::Range => "'..'",
1224            TokenKind::Ellipsis => "'...'",
1225            TokenKind::Increment => "'++'",
1226            TokenKind::Decrement => "'--'",
1227            TokenKind::DoubleColon => "'::'",
1228            TokenKind::Question => "'?'",
1229            TokenKind::Colon => "':'",
1230            TokenKind::Backslash => "'\\'",
1231
1232            // Delimiters
1233            TokenKind::LeftParen => "'('",
1234            TokenKind::RightParen => "')'",
1235            TokenKind::LeftBrace => "'{'",
1236            TokenKind::RightBrace => "'}'",
1237            TokenKind::LeftBracket => "'['",
1238            TokenKind::RightBracket => "']'",
1239            TokenKind::Semicolon => "';'",
1240            TokenKind::Comma => "','",
1241
1242            // Literals
1243            TokenKind::Number => "number",
1244            TokenKind::String => "string",
1245            TokenKind::Regex => "regex",
1246            TokenKind::Substitution => "substitution (s///)",
1247            TokenKind::Transliteration => "transliteration (tr///)",
1248            TokenKind::QuoteSingle => "q// string",
1249            TokenKind::QuoteDouble => "qq// string",
1250            TokenKind::QuoteWords => "qw() word list",
1251            TokenKind::QuoteCommand => "qx// command",
1252            TokenKind::HeredocStart => "heredoc (<<)",
1253            TokenKind::HeredocBody => "heredoc body",
1254            TokenKind::FormatBody => "format body",
1255            TokenKind::DataMarker => "data marker (__DATA__ or __END__)",
1256            TokenKind::DataBody => "data section body",
1257            TokenKind::VString => "version string",
1258            TokenKind::UnknownRest => "unparsed remainder",
1259            TokenKind::HeredocDepthLimit => "heredoc depth limit exceeded",
1260
1261            // Identifiers and variables
1262            TokenKind::Identifier => "identifier",
1263            TokenKind::ScalarSigil => "'$'",
1264            TokenKind::ArraySigil => "'@'",
1265            TokenKind::HashSigil => "'%'",
1266            TokenKind::SubSigil => "'&'",
1267            TokenKind::GlobSigil => "'*'",
1268
1269            // Special
1270            TokenKind::Eof => "end of input",
1271            TokenKind::Unknown => "unknown token",
1272        }
1273    }
1274}
1275
1276const TOKEN_KIND_ALL: [TokenKind; 132] = [
1277    TokenKind::My,
1278    TokenKind::Our,
1279    TokenKind::Local,
1280    TokenKind::State,
1281    TokenKind::Sub,
1282    TokenKind::If,
1283    TokenKind::Elsif,
1284    TokenKind::Else,
1285    TokenKind::Unless,
1286    TokenKind::While,
1287    TokenKind::Until,
1288    TokenKind::For,
1289    TokenKind::Foreach,
1290    TokenKind::Return,
1291    TokenKind::Package,
1292    TokenKind::Use,
1293    TokenKind::No,
1294    TokenKind::Begin,
1295    TokenKind::End,
1296    TokenKind::Check,
1297    TokenKind::Init,
1298    TokenKind::Unitcheck,
1299    TokenKind::Eval,
1300    TokenKind::Do,
1301    TokenKind::Given,
1302    TokenKind::When,
1303    TokenKind::Default,
1304    TokenKind::Try,
1305    TokenKind::Catch,
1306    TokenKind::Finally,
1307    TokenKind::Continue,
1308    TokenKind::Next,
1309    TokenKind::Last,
1310    TokenKind::Redo,
1311    TokenKind::Goto,
1312    TokenKind::Class,
1313    TokenKind::Method,
1314    TokenKind::Field,
1315    TokenKind::Format,
1316    TokenKind::Undef,
1317    TokenKind::Defer,
1318    TokenKind::Assign,
1319    TokenKind::Plus,
1320    TokenKind::Minus,
1321    TokenKind::Star,
1322    TokenKind::Slash,
1323    TokenKind::Percent,
1324    TokenKind::Power,
1325    TokenKind::LeftShift,
1326    TokenKind::RightShift,
1327    TokenKind::BitwiseAnd,
1328    TokenKind::BitwiseOr,
1329    TokenKind::BitwiseXor,
1330    TokenKind::BitwiseNot,
1331    TokenKind::PlusAssign,
1332    TokenKind::MinusAssign,
1333    TokenKind::StarAssign,
1334    TokenKind::SlashAssign,
1335    TokenKind::PercentAssign,
1336    TokenKind::DotAssign,
1337    TokenKind::AndAssign,
1338    TokenKind::OrAssign,
1339    TokenKind::XorAssign,
1340    TokenKind::PowerAssign,
1341    TokenKind::LeftShiftAssign,
1342    TokenKind::RightShiftAssign,
1343    TokenKind::LogicalAndAssign,
1344    TokenKind::LogicalOrAssign,
1345    TokenKind::DefinedOrAssign,
1346    TokenKind::Equal,
1347    TokenKind::NotEqual,
1348    TokenKind::Match,
1349    TokenKind::NotMatch,
1350    TokenKind::SmartMatch,
1351    TokenKind::Less,
1352    TokenKind::Greater,
1353    TokenKind::LessEqual,
1354    TokenKind::GreaterEqual,
1355    TokenKind::Spaceship,
1356    TokenKind::StringCompare,
1357    TokenKind::And,
1358    TokenKind::Or,
1359    TokenKind::Not,
1360    TokenKind::DefinedOr,
1361    TokenKind::WordAnd,
1362    TokenKind::WordOr,
1363    TokenKind::WordNot,
1364    TokenKind::WordXor,
1365    TokenKind::Arrow,
1366    TokenKind::FatArrow,
1367    TokenKind::Dot,
1368    TokenKind::Range,
1369    TokenKind::Ellipsis,
1370    TokenKind::Increment,
1371    TokenKind::Decrement,
1372    TokenKind::DoubleColon,
1373    TokenKind::Question,
1374    TokenKind::Colon,
1375    TokenKind::Backslash,
1376    TokenKind::LeftParen,
1377    TokenKind::RightParen,
1378    TokenKind::LeftBrace,
1379    TokenKind::RightBrace,
1380    TokenKind::LeftBracket,
1381    TokenKind::RightBracket,
1382    TokenKind::Semicolon,
1383    TokenKind::Comma,
1384    TokenKind::Number,
1385    TokenKind::String,
1386    TokenKind::Regex,
1387    TokenKind::Substitution,
1388    TokenKind::Transliteration,
1389    TokenKind::QuoteSingle,
1390    TokenKind::QuoteDouble,
1391    TokenKind::QuoteWords,
1392    TokenKind::QuoteCommand,
1393    TokenKind::HeredocStart,
1394    TokenKind::HeredocBody,
1395    TokenKind::FormatBody,
1396    TokenKind::DataMarker,
1397    TokenKind::DataBody,
1398    TokenKind::VString,
1399    TokenKind::UnknownRest,
1400    TokenKind::HeredocDepthLimit,
1401    TokenKind::Identifier,
1402    TokenKind::ScalarSigil,
1403    TokenKind::ArraySigil,
1404    TokenKind::HashSigil,
1405    TokenKind::SubSigil,
1406    TokenKind::GlobSigil,
1407    TokenKind::Eof,
1408    TokenKind::Unknown,
1409];
1410
1411#[cfg(test)]
1412mod tests {
1413    use super::*;
1414
1415    // --- TokenSpan ---
1416
1417    #[test]
1418    fn token_span_new_and_accessors() {
1419        let span = TokenSpan::new(5, 10);
1420        assert_eq!(span.start, 5);
1421        assert_eq!(span.end, 10);
1422        assert_eq!(span.len(), 5);
1423        assert!(!span.is_empty());
1424        assert_eq!(span.range(), 5..10);
1425    }
1426
1427    #[test]
1428    fn token_span_is_empty_when_zero_length() {
1429        let span = TokenSpan::new(3, 3);
1430        assert!(span.is_empty());
1431        assert_eq!(span.len(), 0);
1432    }
1433
1434    #[test]
1435    fn token_span_try_new_ok() {
1436        let span = TokenSpan::try_new(0, 5).unwrap();
1437        assert_eq!(span.start, 0);
1438        assert_eq!(span.end, 5);
1439    }
1440
1441    #[test]
1442    fn token_span_try_new_end_before_start_errors() {
1443        let err = TokenSpan::try_new(10, 5).unwrap_err();
1444        assert_eq!(err, TokenSpanError::EndBeforeStart { start: 10, end: 5 });
1445    }
1446
1447    #[test]
1448    fn token_span_error_display_end_before_start() {
1449        let err = TokenSpanError::EndBeforeStart { start: 10, end: 5 };
1450        let msg = err.to_string();
1451        assert!(msg.contains("10"));
1452        assert!(msg.contains("5"));
1453    }
1454
1455    #[test]
1456    fn token_span_error_display_empty_span_not_allowed() {
1457        let err = TokenSpanError::EmptySpanNotAllowed { kind: TokenKind::Identifier, at: 7 };
1458        let msg = err.to_string();
1459        assert!(msg.contains("Identifier"));
1460        assert!(msg.contains("7"));
1461    }
1462
1463    // --- Token ---
1464
1465    #[test]
1466    fn token_new_stores_fields() {
1467        let tok = Token::new(TokenKind::My, "my", 0, 2);
1468        assert_eq!(tok.kind, TokenKind::My);
1469        assert_eq!(&*tok.text, "my");
1470        assert_eq!(tok.start, 0);
1471        assert_eq!(tok.end, 2);
1472    }
1473
1474    #[test]
1475    fn token_len_and_is_empty() {
1476        let tok = Token::new(TokenKind::Identifier, "foo", 10, 13);
1477        assert_eq!(tok.len(), 3);
1478        assert!(!tok.is_empty());
1479
1480        let eof = Token::eof_at(8);
1481        assert_eq!(eof.len(), 0);
1482        assert!(eof.is_empty());
1483    }
1484
1485    #[test]
1486    fn token_span_and_range() {
1487        let tok = Token::new(TokenKind::Number, "42", 5, 7);
1488        assert_eq!(tok.span(), TokenSpan::new(5, 7));
1489        assert_eq!(tok.range(), 5..7);
1490    }
1491
1492    #[test]
1493    fn token_try_new_rejects_end_before_start() {
1494        let err = Token::try_new(TokenKind::Identifier, "x", 10, 5).unwrap_err();
1495        assert_eq!(err, TokenSpanError::EndBeforeStart { start: 10, end: 5 });
1496    }
1497
1498    #[test]
1499    fn token_new_checked_rejects_empty_non_eof() {
1500        let err = Token::new_checked(TokenKind::Identifier, "", 5, 5).unwrap_err();
1501        assert!(matches!(
1502            err,
1503            TokenSpanError::EmptySpanNotAllowed { kind: TokenKind::Identifier, at: 5 }
1504        ));
1505    }
1506
1507    #[test]
1508    fn token_new_checked_allows_empty_eof() {
1509        let tok = Token::new_checked(TokenKind::Eof, "", 5, 5).unwrap();
1510        assert_eq!(tok.kind, TokenKind::Eof);
1511        assert_eq!(tok.start, 5);
1512    }
1513
1514    #[test]
1515    fn token_eof_at() {
1516        let eof = Token::eof_at(42);
1517        assert_eq!(eof.kind, TokenKind::Eof);
1518        assert_eq!(eof.start, 42);
1519        assert_eq!(eof.end, 42);
1520        assert!(eof.is_empty());
1521    }
1522
1523    #[test]
1524    fn token_unknown_at_normalises_inverted_span() {
1525        let tok = Token::unknown_at("?", 5, 3); // end < start
1526        assert_eq!(tok.kind, TokenKind::Unknown);
1527        assert_eq!(tok.start, 5);
1528        assert_eq!(tok.end, 5); // bounded to start
1529    }
1530
1531    #[test]
1532    fn token_with_kind() {
1533        let tok = Token::new(TokenKind::Identifier, "sub", 0, 3);
1534        let retyped = tok.with_kind(TokenKind::Sub);
1535        assert_eq!(retyped.kind, TokenKind::Sub);
1536        assert_eq!(&*retyped.text, "sub");
1537        assert_eq!(retyped.start, 0);
1538        assert_eq!(retyped.end, 3);
1539    }
1540
1541    #[test]
1542    fn token_with_span_ok() {
1543        let tok = Token::new(TokenKind::String, "hello", 0, 5);
1544        let moved = tok.with_span(10, 15).unwrap();
1545        assert_eq!(moved.start, 10);
1546        assert_eq!(moved.end, 15);
1547    }
1548
1549    #[test]
1550    fn token_display_name_delegates_to_kind() {
1551        let tok = Token::new(TokenKind::LeftBrace, "{", 0, 1);
1552        assert_eq!(tok.display_name(), "'{'");
1553    }
1554
1555    #[test]
1556    fn token_as_ref_token_round_trip() {
1557        let tok = Token::new(TokenKind::Sub, "sub", 0, 3);
1558        let tok_ref = tok.as_ref_token();
1559        assert_eq!(tok_ref.kind, TokenKind::Sub);
1560        assert_eq!(tok_ref.text, "sub");
1561        assert_eq!(tok_ref.start, 0);
1562        assert_eq!(tok_ref.end, 3);
1563
1564        let owned: Token = tok_ref.into();
1565        assert_eq!(owned.kind, TokenKind::Sub);
1566        assert_eq!(&*owned.text, "sub");
1567    }
1568
1569    // --- TokenRef ---
1570
1571    #[test]
1572    fn token_ref_accessors() {
1573        let r = TokenRef::new(TokenKind::Number, "99", 4, 6);
1574        assert_eq!(r.len(), 2);
1575        assert!(!r.is_empty());
1576        assert_eq!(r.span(), (4, 6));
1577        assert_eq!(r.display_name(), "number");
1578    }
1579
1580    #[test]
1581    fn token_ref_to_owned_token() {
1582        let r = TokenRef::new(TokenKind::Identifier, "foo", 1, 4);
1583        let owned = r.to_owned_token();
1584        assert_eq!(owned.kind, TokenKind::Identifier);
1585        assert_eq!(&*owned.text, "foo");
1586    }
1587
1588    // --- TokenKind::from_keyword ---
1589
1590    #[test]
1591    fn from_keyword_recognises_perl_keywords() {
1592        assert_eq!(TokenKind::from_keyword("my"), Some(TokenKind::My));
1593        assert_eq!(TokenKind::from_keyword("sub"), Some(TokenKind::Sub));
1594        assert_eq!(TokenKind::from_keyword("if"), Some(TokenKind::If));
1595        assert_eq!(TokenKind::from_keyword("elsif"), Some(TokenKind::Elsif));
1596        assert_eq!(TokenKind::from_keyword("else"), Some(TokenKind::Else));
1597        assert_eq!(TokenKind::from_keyword("while"), Some(TokenKind::While));
1598        assert_eq!(TokenKind::from_keyword("for"), Some(TokenKind::For));
1599        assert_eq!(TokenKind::from_keyword("foreach"), Some(TokenKind::Foreach));
1600        assert_eq!(TokenKind::from_keyword("return"), Some(TokenKind::Return));
1601        assert_eq!(TokenKind::from_keyword("package"), Some(TokenKind::Package));
1602        assert_eq!(TokenKind::from_keyword("use"), Some(TokenKind::Use));
1603        assert_eq!(TokenKind::from_keyword("BEGIN"), Some(TokenKind::Begin));
1604        assert_eq!(TokenKind::from_keyword("END"), Some(TokenKind::End));
1605        assert_eq!(TokenKind::from_keyword("eval"), Some(TokenKind::Eval));
1606        assert_eq!(TokenKind::from_keyword("class"), Some(TokenKind::Class));
1607        assert_eq!(TokenKind::from_keyword("defer"), Some(TokenKind::Defer));
1608        assert_eq!(TokenKind::from_keyword("and"), Some(TokenKind::WordAnd));
1609        assert_eq!(TokenKind::from_keyword("or"), Some(TokenKind::WordOr));
1610        assert_eq!(TokenKind::from_keyword("not"), Some(TokenKind::WordNot));
1611        assert_eq!(TokenKind::from_keyword("xor"), Some(TokenKind::WordXor));
1612        assert_eq!(TokenKind::from_keyword("cmp"), Some(TokenKind::StringCompare));
1613    }
1614
1615    #[test]
1616    fn from_keyword_unknown_returns_none() {
1617        assert_eq!(TokenKind::from_keyword("MY"), None);
1618        assert_eq!(TokenKind::from_keyword("Sub"), None);
1619        assert_eq!(TokenKind::from_keyword("unknown"), None);
1620        assert_eq!(TokenKind::from_keyword(""), None);
1621    }
1622
1623    // --- TokenKind::from_operator ---
1624
1625    #[test]
1626    fn from_operator_recognises_operators() {
1627        assert_eq!(TokenKind::from_operator("="), Some(TokenKind::Assign));
1628        assert_eq!(TokenKind::from_operator("+"), Some(TokenKind::Plus));
1629        assert_eq!(TokenKind::from_operator("**"), Some(TokenKind::Power));
1630        assert_eq!(TokenKind::from_operator("->"), Some(TokenKind::Arrow));
1631        assert_eq!(TokenKind::from_operator("=>"), Some(TokenKind::FatArrow));
1632        assert_eq!(TokenKind::from_operator("<=>"), Some(TokenKind::Spaceship));
1633        assert_eq!(TokenKind::from_operator("//="), Some(TokenKind::DefinedOrAssign));
1634        assert_eq!(TokenKind::from_operator("..."), Some(TokenKind::Ellipsis));
1635        assert_eq!(TokenKind::from_operator("~~"), Some(TokenKind::SmartMatch));
1636    }
1637
1638    #[test]
1639    fn from_operator_unknown_returns_none() {
1640        assert_eq!(TokenKind::from_operator(""), None);
1641        assert_eq!(TokenKind::from_operator("xyz"), None);
1642    }
1643
1644    // --- TokenKind::from_delimiter ---
1645
1646    #[test]
1647    fn from_delimiter_recognises_all() {
1648        assert_eq!(TokenKind::from_delimiter("("), Some(TokenKind::LeftParen));
1649        assert_eq!(TokenKind::from_delimiter(")"), Some(TokenKind::RightParen));
1650        assert_eq!(TokenKind::from_delimiter("{"), Some(TokenKind::LeftBrace));
1651        assert_eq!(TokenKind::from_delimiter("}"), Some(TokenKind::RightBrace));
1652        assert_eq!(TokenKind::from_delimiter("["), Some(TokenKind::LeftBracket));
1653        assert_eq!(TokenKind::from_delimiter("]"), Some(TokenKind::RightBracket));
1654        assert_eq!(TokenKind::from_delimiter(";"), Some(TokenKind::Semicolon));
1655        assert_eq!(TokenKind::from_delimiter(","), Some(TokenKind::Comma));
1656        assert_eq!(TokenKind::from_delimiter("x"), None);
1657    }
1658
1659    // --- TokenKind::from_sigil ---
1660
1661    #[test]
1662    fn from_sigil_recognises_all() {
1663        assert_eq!(TokenKind::from_sigil("$"), Some(TokenKind::ScalarSigil));
1664        assert_eq!(TokenKind::from_sigil("@"), Some(TokenKind::ArraySigil));
1665        assert_eq!(TokenKind::from_sigil("%"), Some(TokenKind::HashSigil));
1666        assert_eq!(TokenKind::from_sigil("&"), Some(TokenKind::SubSigil));
1667        assert_eq!(TokenKind::from_sigil("*"), Some(TokenKind::GlobSigil));
1668        assert_eq!(TokenKind::from_sigil("!"), None);
1669    }
1670
1671    // --- TokenKind::category ---
1672
1673    #[test]
1674    fn category_keyword_variants() {
1675        assert_eq!(TokenKind::My.category(), TokenCategory::Keyword);
1676        assert_eq!(TokenKind::Sub.category(), TokenCategory::Keyword);
1677        assert_eq!(TokenKind::Defer.category(), TokenCategory::Keyword);
1678    }
1679
1680    #[test]
1681    fn category_operator_variants() {
1682        assert_eq!(TokenKind::Plus.category(), TokenCategory::Operator);
1683        assert_eq!(TokenKind::Spaceship.category(), TokenCategory::Operator);
1684        assert_eq!(TokenKind::WordAnd.category(), TokenCategory::Operator);
1685    }
1686
1687    #[test]
1688    fn category_delimiter_variants() {
1689        assert_eq!(TokenKind::LeftParen.category(), TokenCategory::Delimiter);
1690        assert_eq!(TokenKind::Comma.category(), TokenCategory::Delimiter);
1691    }
1692
1693    #[test]
1694    fn category_literal_variants() {
1695        assert_eq!(TokenKind::Number.category(), TokenCategory::Literal);
1696        assert_eq!(TokenKind::HeredocStart.category(), TokenCategory::Literal);
1697        assert_eq!(TokenKind::DataMarker.category(), TokenCategory::Literal);
1698    }
1699
1700    #[test]
1701    fn category_identifier_variants() {
1702        assert_eq!(TokenKind::Identifier.category(), TokenCategory::Identifier);
1703        assert_eq!(TokenKind::ScalarSigil.category(), TokenCategory::Identifier);
1704        assert_eq!(TokenKind::GlobSigil.category(), TokenCategory::Identifier);
1705    }
1706
1707    #[test]
1708    fn category_special_variants() {
1709        assert_eq!(TokenKind::Eof.category(), TokenCategory::Special);
1710        assert_eq!(TokenKind::Unknown.category(), TokenCategory::Special);
1711    }
1712
1713    // --- TokenKind::display_name ---
1714
1715    #[test]
1716    fn display_name_selected_variants() {
1717        assert_eq!(TokenKind::LeftBrace.display_name(), "'{'");
1718        assert_eq!(TokenKind::RightBrace.display_name(), "'}'");
1719        assert_eq!(TokenKind::Identifier.display_name(), "identifier");
1720        assert_eq!(TokenKind::Eof.display_name(), "end of input");
1721        assert_eq!(TokenKind::Number.display_name(), "number");
1722        assert_eq!(TokenKind::Sub.display_name(), "'sub'");
1723        assert_eq!(TokenKind::Semicolon.display_name(), "';'");
1724        assert_eq!(TokenKind::HeredocStart.display_name(), "heredoc (<<)");
1725        assert_eq!(TokenKind::DataMarker.display_name(), "data marker (__DATA__ or __END__)");
1726    }
1727
1728    // --- TokenKind::all / metadata_count ---
1729
1730    #[test]
1731    fn all_returns_132_variants() {
1732        assert_eq!(TokenKind::all().len(), 132);
1733        assert_eq!(TokenKind::metadata_count(), 132);
1734    }
1735
1736    #[test]
1737    fn metadata_round_trips_through_kind() {
1738        let m = TokenKind::Sub.metadata();
1739        assert_eq!(m.category, TokenCategory::Keyword);
1740        assert_eq!(m.display_name, "'sub'");
1741    }
1742}