Skip to main content

perl_token/
lib.rs

1//! Perl token definitions shared across the parser ecosystem.
2//!
3//! This crate defines [`Token`] and [`TokenKind`], the fundamental types that
4//! flow from the lexer (`perl-lexer`) into the parser (`perl-parser-core`).
5//! Downstream crates re-export these types so consumers rarely need to depend
6//! on `perl-token` directly.
7//!
8//! # Examples
9//!
10//! Create and inspect tokens:
11//!
12//! ```rust
13//! use perl_token::{Token, TokenKind};
14//!
15//! // Create a keyword token for `my`
16//! let token = Token::new(TokenKind::My, "my", 0, 2);
17//! assert_eq!(token.kind, TokenKind::My);
18//! assert_eq!(&*token.text, "my");
19//! assert_eq!(token.start, 0);
20//! assert_eq!(token.end, 2);
21//!
22//! // Create a numeric literal token
23//! let num = Token::new(TokenKind::Number, "42", 7, 9);
24//! assert_eq!(num.kind, TokenKind::Number);
25//! assert_eq!(&*num.text, "42");
26//! ```
27//!
28//! Use [`TokenKind::display_name`] for user-facing error messages:
29//!
30//! ```rust
31//! use perl_token::TokenKind;
32//!
33//! assert_eq!(TokenKind::LeftBrace.display_name(), "'{'");
34//! assert_eq!(TokenKind::Identifier.display_name(), "identifier");
35//! assert_eq!(TokenKind::Eof.display_name(), "end of input");
36//! ```
37
38use std::sync::Arc;
39
40/// Token produced by the lexer and consumed by the parser.
41///
42/// Stores the token kind, original source text, and byte span. The text is kept
43/// in an `Arc<str>` so buffering and lookahead can clone tokens cheaply.
44#[derive(Debug, Clone, PartialEq)]
45pub struct Token {
46    /// Token classification for parser decision making
47    pub kind: TokenKind,
48    /// Original source text for precise reconstruction
49    pub text: Arc<str>,
50    /// Starting byte position for error reporting and location tracking
51    pub start: usize,
52    /// Ending byte position for span calculation and navigation
53    pub end: usize,
54}
55
56impl Token {
57    /// Create a new token with the given kind, source text, and byte span.
58    ///
59    /// # Examples
60    ///
61    /// ```rust
62    /// use perl_token::{Token, TokenKind};
63    ///
64    /// let tok = Token::new(TokenKind::Sub, "sub", 0, 3);
65    /// assert_eq!(tok.kind, TokenKind::Sub);
66    /// assert_eq!(&*tok.text, "sub");
67    /// ```
68    pub fn new(kind: TokenKind, text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
69        Token { kind, text: text.into(), start, end }
70    }
71}
72
73/// Token classification for Perl parsing.
74///
75/// The set is intentionally simplified for fast parser matching while covering
76/// keywords, operators, delimiters, literals, identifiers, and special tokens.
77///
78/// Use [`TokenKind::display_name`] to get a human-readable string suitable for
79/// error messages shown to the user.
80///
81/// # Categories
82///
83/// | Group | Examples |
84/// |-------|----------|
85/// | Keywords | [`My`](Self::My), [`Sub`](Self::Sub), [`If`](Self::If), ... |
86/// | Operators | [`Plus`](Self::Plus), [`Arrow`](Self::Arrow), [`And`](Self::And), ... |
87/// | Delimiters | [`LeftParen`](Self::LeftParen), [`LeftBrace`](Self::LeftBrace), ... |
88/// | Literals | [`Number`](Self::Number), [`String`](Self::String), [`Regex`](Self::Regex), ... |
89/// | Identifiers | [`Identifier`](Self::Identifier), [`ScalarSigil`](Self::ScalarSigil), ... |
90/// | Special | [`Eof`](Self::Eof), [`Unknown`](Self::Unknown) |
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum TokenKind {
93    // ===== Keywords =====
94    /// Lexical variable declaration: `my $x`
95    My,
96    /// Package variable declaration: `our $x`
97    Our,
98    /// Dynamic scoping: `local $x`
99    Local,
100    /// Persistent variable: `state $x`
101    State,
102    /// Subroutine declaration: `sub foo`
103    Sub,
104    /// Conditional: `if (cond)`
105    If,
106    /// Else-if conditional: `elsif (cond)`
107    Elsif,
108    /// Else branch: `else { }`
109    Else,
110    /// Negated conditional: `unless (cond)`
111    Unless,
112    /// While loop: `while (cond)`
113    While,
114    /// Until loop: `until (cond)`
115    Until,
116    /// C-style for loop: `for (init; cond; update)`
117    For,
118    /// Iterator loop: `foreach $x (@list)`
119    Foreach,
120    /// Return statement: `return $value`
121    Return,
122    /// Package declaration: `package Foo`
123    Package,
124    /// Module import: `use Module`
125    Use,
126    /// Disable pragma/module: `no strict`
127    No,
128    /// Compile-time block: `BEGIN { }`
129    Begin,
130    /// Exit-time block: `END { }`
131    End,
132    /// Check phase block: `CHECK { }`
133    Check,
134    /// Init phase block: `INIT { }`
135    Init,
136    /// Unit check block: `UNITCHECK { }`
137    Unitcheck,
138    /// Exception handling: `eval { }`
139    Eval,
140    /// Block execution: `do { }` or `do "file"`
141    Do,
142    /// Switch expression: `given ($x)`
143    Given,
144    /// Case clause: `when ($pattern)`
145    When,
146    /// Default case: `default { }`
147    Default,
148    /// Try block: `try { }`
149    Try,
150    /// Catch block: `catch ($e) { }`
151    Catch,
152    /// Finally block: `finally { }`
153    Finally,
154    /// Continue block: `continue { }`
155    Continue,
156    /// Loop control: `next`
157    Next,
158    /// Loop control: `last`
159    Last,
160    /// Loop control: `redo`
161    Redo,
162    /// Goto statement: `goto LABEL`, `goto &sub`, `goto EXPR`
163    Goto,
164    /// Class declaration (5.38+): `class Foo`
165    Class,
166    /// Method declaration (5.38+): `method foo`
167    Method,
168    /// Class field declaration (5.38+): `field $name`
169    Field,
170    /// Format declaration: `format STDOUT =`
171    Format,
172    /// Undefined value: `undef`
173    Undef,
174
175    // ===== Operators =====
176    /// Assignment: `=`
177    Assign,
178    /// Addition: `+`
179    Plus,
180    /// Subtraction: `-`
181    Minus,
182    /// Multiplication: `*`
183    Star,
184    /// Division: `/`
185    Slash,
186    /// Modulo: `%`
187    Percent,
188    /// Exponentiation: `**`
189    Power,
190    /// Left bit shift: `<<`
191    LeftShift,
192    /// Right bit shift: `>>`
193    RightShift,
194    /// Bitwise AND: `&`
195    BitwiseAnd,
196    /// Bitwise OR: `|`
197    BitwiseOr,
198    /// Bitwise XOR: `^`
199    BitwiseXor,
200    /// Bitwise NOT: `~`
201    BitwiseNot,
202    /// Add and assign: `+=`
203    PlusAssign,
204    /// Subtract and assign: `-=`
205    MinusAssign,
206    /// Multiply and assign: `*=`
207    StarAssign,
208    /// Divide and assign: `/=`
209    SlashAssign,
210    /// Modulo and assign: `%=`
211    PercentAssign,
212    /// Concatenate and assign: `.=`
213    DotAssign,
214    /// Bitwise AND and assign: `&=`
215    AndAssign,
216    /// Bitwise OR and assign: `|=`
217    OrAssign,
218    /// Bitwise XOR and assign: `^=`
219    XorAssign,
220    /// Power and assign: `**=`
221    PowerAssign,
222    /// Left shift and assign: `<<=`
223    LeftShiftAssign,
224    /// Right shift and assign: `>>=`
225    RightShiftAssign,
226    /// Logical AND and assign: `&&=`
227    LogicalAndAssign,
228    /// Logical OR and assign: `||=`
229    LogicalOrAssign,
230    /// Defined-or and assign: `//=`
231    DefinedOrAssign,
232    /// Numeric equality: `==`
233    Equal,
234    /// Numeric inequality: `!=`
235    NotEqual,
236    /// Pattern match binding: `=~`
237    Match,
238    /// Negated pattern match: `!~`
239    NotMatch,
240    /// Smart match: `~~`
241    SmartMatch,
242    /// Less than: `<`
243    Less,
244    /// Greater than: `>`
245    Greater,
246    /// Less than or equal: `<=`
247    LessEqual,
248    /// Greater than or equal: `>=`
249    GreaterEqual,
250    /// Numeric comparison (spaceship): `<=>`
251    Spaceship,
252    /// String comparison: `cmp`
253    StringCompare,
254    /// Logical AND: `&&`
255    And,
256    /// Logical OR: `||`
257    Or,
258    /// Logical NOT: `!`
259    Not,
260    /// Defined-or: `//`
261    DefinedOr,
262    /// Word AND operator: `and`
263    WordAnd,
264    /// Word OR operator: `or`
265    WordOr,
266    /// Word NOT operator: `not`
267    WordNot,
268    /// Word XOR operator: `xor`
269    WordXor,
270    /// Method/dereference arrow: `->`
271    Arrow,
272    /// Hash key separator: `=>`
273    FatArrow,
274    /// String concatenation: `.`
275    Dot,
276    /// Range operator: `..`
277    Range,
278    /// Yada-yada (unimplemented): `...`
279    Ellipsis,
280    /// Increment: `++`
281    Increment,
282    /// Decrement: `--`
283    Decrement,
284    /// Package separator: `::`
285    DoubleColon,
286    /// Ternary condition: `?`
287    Question,
288    /// Ternary/label separator: `:`
289    Colon,
290    /// Reference operator: `\`
291    Backslash,
292
293    // ===== Delimiters =====
294    /// Left parenthesis: `(`
295    LeftParen,
296    /// Right parenthesis: `)`
297    RightParen,
298    /// Left brace: `{`
299    LeftBrace,
300    /// Right brace: `}`
301    RightBrace,
302    /// Left bracket: `[`
303    LeftBracket,
304    /// Right bracket: `]`
305    RightBracket,
306    /// Statement terminator: `;`
307    Semicolon,
308    /// List separator: `,`
309    Comma,
310
311    // ===== Literals =====
312    /// Numeric literal: `42`, `3.14`, `0xFF`
313    Number,
314    /// String literal: `"hello"` or `'world'`
315    String,
316    /// Regular expression: `/pattern/flags`
317    Regex,
318    /// Substitution: `s/pattern/replacement/flags`
319    Substitution,
320    /// Transliteration: `tr/abc/xyz/` or `y///`
321    Transliteration,
322    /// Single-quoted string: `q/text/`
323    QuoteSingle,
324    /// Double-quoted string: `qq/text/`
325    QuoteDouble,
326    /// Quote words: `qw(list of words)`
327    QuoteWords,
328    /// Backtick command: `` `cmd` `` or `qx/cmd/`
329    QuoteCommand,
330    /// Heredoc start marker: `<<EOF`
331    HeredocStart,
332    /// Heredoc content body
333    HeredocBody,
334    /// Format specification body
335    FormatBody,
336    /// Data section marker: `__DATA__` or `__END__`
337    DataMarker,
338    /// Data section content
339    DataBody,
340    /// Version string literal: `v5.26.0`, `v5.10`
341    VString,
342    /// Unparsed remainder (budget exceeded)
343    UnknownRest,
344    /// Heredoc depth limit exceeded (special error token)
345    HeredocDepthLimit,
346
347    // ===== Identifiers and Variables =====
348    /// Bareword identifier or function name
349    Identifier,
350    /// Scalar sigil: `$`
351    ScalarSigil,
352    /// Array sigil: `@`
353    ArraySigil,
354    /// Hash sigil: `%`
355    HashSigil,
356    /// Subroutine sigil: `&`
357    SubSigil,
358    /// Glob/typeglob sigil: `*`
359    GlobSigil,
360
361    // ===== Special =====
362    /// End of file/input
363    Eof,
364    /// Unknown/unrecognized token
365    Unknown,
366}
367
368impl TokenKind {
369    /// Return a user-friendly display name for this token kind.
370    ///
371    /// These names appear in parser error messages shown in the editor.
372    /// They use the actual Perl syntax (e.g. `}` instead of `RightBrace`)
373    /// so users can immediately understand what the parser expected.
374    ///
375    /// # Examples
376    ///
377    /// ```rust
378    /// use perl_token::TokenKind;
379    ///
380    /// assert_eq!(TokenKind::Semicolon.display_name(), "';'");
381    /// assert_eq!(TokenKind::Sub.display_name(), "'sub'");
382    /// assert_eq!(TokenKind::Number.display_name(), "number");
383    /// ```
384    pub fn display_name(self) -> &'static str {
385        match self {
386            // Keywords
387            TokenKind::My => "'my'",
388            TokenKind::Our => "'our'",
389            TokenKind::Local => "'local'",
390            TokenKind::State => "'state'",
391            TokenKind::Sub => "'sub'",
392            TokenKind::If => "'if'",
393            TokenKind::Elsif => "'elsif'",
394            TokenKind::Else => "'else'",
395            TokenKind::Unless => "'unless'",
396            TokenKind::While => "'while'",
397            TokenKind::Until => "'until'",
398            TokenKind::For => "'for'",
399            TokenKind::Foreach => "'foreach'",
400            TokenKind::Return => "'return'",
401            TokenKind::Package => "'package'",
402            TokenKind::Use => "'use'",
403            TokenKind::No => "'no'",
404            TokenKind::Begin => "'BEGIN'",
405            TokenKind::End => "'END'",
406            TokenKind::Check => "'CHECK'",
407            TokenKind::Init => "'INIT'",
408            TokenKind::Unitcheck => "'UNITCHECK'",
409            TokenKind::Eval => "'eval'",
410            TokenKind::Do => "'do'",
411            TokenKind::Given => "'given'",
412            TokenKind::When => "'when'",
413            TokenKind::Default => "'default'",
414            TokenKind::Try => "'try'",
415            TokenKind::Catch => "'catch'",
416            TokenKind::Finally => "'finally'",
417            TokenKind::Continue => "'continue'",
418            TokenKind::Next => "'next'",
419            TokenKind::Last => "'last'",
420            TokenKind::Redo => "'redo'",
421            TokenKind::Goto => "'goto'",
422            TokenKind::Class => "'class'",
423            TokenKind::Method => "'method'",
424            TokenKind::Field => "'field'",
425            TokenKind::Format => "'format'",
426            TokenKind::Undef => "'undef'",
427
428            // Operators
429            TokenKind::Assign => "'='",
430            TokenKind::Plus => "'+'",
431            TokenKind::Minus => "'-'",
432            TokenKind::Star => "'*'",
433            TokenKind::Slash => "'/'",
434            TokenKind::Percent => "'%'",
435            TokenKind::Power => "'**'",
436            TokenKind::LeftShift => "'<<'",
437            TokenKind::RightShift => "'>>'",
438            TokenKind::BitwiseAnd => "'&'",
439            TokenKind::BitwiseOr => "'|'",
440            TokenKind::BitwiseXor => "'^'",
441            TokenKind::BitwiseNot => "'~'",
442            TokenKind::PlusAssign => "'+='",
443            TokenKind::MinusAssign => "'-='",
444            TokenKind::StarAssign => "'*='",
445            TokenKind::SlashAssign => "'/='",
446            TokenKind::PercentAssign => "'%='",
447            TokenKind::DotAssign => "'.='",
448            TokenKind::AndAssign => "'&='",
449            TokenKind::OrAssign => "'|='",
450            TokenKind::XorAssign => "'^='",
451            TokenKind::PowerAssign => "'**='",
452            TokenKind::LeftShiftAssign => "'<<='",
453            TokenKind::RightShiftAssign => "'>>='",
454            TokenKind::LogicalAndAssign => "'&&='",
455            TokenKind::LogicalOrAssign => "'||='",
456            TokenKind::DefinedOrAssign => "'//='",
457            TokenKind::Equal => "'=='",
458            TokenKind::NotEqual => "'!='",
459            TokenKind::Match => "'=~'",
460            TokenKind::NotMatch => "'!~'",
461            TokenKind::SmartMatch => "'~~'",
462            TokenKind::Less => "'<'",
463            TokenKind::Greater => "'>'",
464            TokenKind::LessEqual => "'<='",
465            TokenKind::GreaterEqual => "'>='",
466            TokenKind::Spaceship => "'<=>'",
467            TokenKind::StringCompare => "'cmp'",
468            TokenKind::And => "'&&'",
469            TokenKind::Or => "'||'",
470            TokenKind::Not => "'!'",
471            TokenKind::DefinedOr => "'//'",
472            TokenKind::WordAnd => "'and'",
473            TokenKind::WordOr => "'or'",
474            TokenKind::WordNot => "'not'",
475            TokenKind::WordXor => "'xor'",
476            TokenKind::Arrow => "'->'",
477            TokenKind::FatArrow => "'=>'",
478            TokenKind::Dot => "'.'",
479            TokenKind::Range => "'..'",
480            TokenKind::Ellipsis => "'...'",
481            TokenKind::Increment => "'++'",
482            TokenKind::Decrement => "'--'",
483            TokenKind::DoubleColon => "'::'",
484            TokenKind::Question => "'?'",
485            TokenKind::Colon => "':'",
486            TokenKind::Backslash => "'\\'",
487
488            // Delimiters
489            TokenKind::LeftParen => "'('",
490            TokenKind::RightParen => "')'",
491            TokenKind::LeftBrace => "'{'",
492            TokenKind::RightBrace => "'}'",
493            TokenKind::LeftBracket => "'['",
494            TokenKind::RightBracket => "']'",
495            TokenKind::Semicolon => "';'",
496            TokenKind::Comma => "','",
497
498            // Literals
499            TokenKind::Number => "number",
500            TokenKind::String => "string",
501            TokenKind::Regex => "regex",
502            TokenKind::Substitution => "substitution (s///)",
503            TokenKind::Transliteration => "transliteration (tr///)",
504            TokenKind::QuoteSingle => "q// string",
505            TokenKind::QuoteDouble => "qq// string",
506            TokenKind::QuoteWords => "qw() word list",
507            TokenKind::QuoteCommand => "qx// command",
508            TokenKind::HeredocStart => "heredoc (<<)",
509            TokenKind::HeredocBody => "heredoc body",
510            TokenKind::FormatBody => "format body",
511            TokenKind::DataMarker => "__DATA__",
512            TokenKind::DataBody => "data section",
513            TokenKind::VString => "version string",
514            TokenKind::UnknownRest => "unparsed content",
515            TokenKind::HeredocDepthLimit => "heredoc depth limit",
516
517            // Identifiers and variables
518            TokenKind::Identifier => "identifier",
519            TokenKind::ScalarSigil => "'$'",
520            TokenKind::ArraySigil => "'@'",
521            TokenKind::HashSigil => "'%'",
522            TokenKind::SubSigil => "'&'",
523            TokenKind::GlobSigil => "'*'",
524
525            // Special
526            TokenKind::Eof => "end of input",
527            TokenKind::Unknown => "unknown token",
528        }
529    }
530}