perl_lexer/token.rs
1//! Token types and structures for the Perl lexer
2
3use std::sync::Arc;
4
5/// Parts of an interpolated string
6#[derive(Debug, Clone, PartialEq)]
7pub enum StringPart {
8 /// Literal text
9 Literal(Arc<str>),
10 /// Variable interpolation: $var, @array, %hash
11 Variable(Arc<str>),
12 /// Expression interpolation: ${expr}, @{expr}
13 Expression(Arc<str>),
14 /// Method call: `->method()`
15 MethodCall(Arc<str>),
16 /// Array slice: [1..3]
17 ArraySlice(Arc<str>),
18}
19
20/// Token types for Perl
21#[derive(Debug, Clone, PartialEq)]
22pub enum TokenType {
23 // Slash-derived tokens
24 /// Division operator: /
25 Division,
26 /// Regex match: m// or //
27 RegexMatch,
28 /// Substitution: s///
29 Substitution,
30 /// Transliteration: tr/// or y///
31 Transliteration,
32 /// Quote regex: qr//
33 QuoteRegex,
34
35 // String and quote tokens
36 /// String literal: "string" or 'string'
37 StringLiteral,
38 /// Single quote: q//
39 QuoteSingle,
40 /// Double quote: qq//
41 QuoteDouble,
42 /// Quote words: qw//
43 QuoteWords,
44 /// Quote command: qx// or `backticks`
45 QuoteCommand,
46
47 // String interpolation tokens
48 /// String with interpolated parts
49 InterpolatedString(Vec<StringPart>),
50
51 // Heredoc tokens
52 /// Heredoc start: <<EOF or <<'EOF'
53 HeredocStart,
54 /// Heredoc body content
55 HeredocBody(Arc<str>),
56
57 // Format declarations
58 /// Format body content
59 FormatBody(Arc<str>),
60
61 // Version strings
62 /// Version string: v5.32.0
63 Version(Arc<str>),
64
65 // POD documentation
66 /// POD documentation block
67 Pod,
68
69 // Data sections
70 /// Data section marker: __DATA__ or __END__
71 DataMarker(Arc<str>),
72 /// Data section body content
73 DataBody(Arc<str>),
74
75 // Error recovery
76 /// Unknown rest of input (used when budget exceeded)
77 UnknownRest,
78
79 // Identifiers and literals
80 /// Identifier or variable name
81 Identifier(Arc<str>),
82 /// Numeric literal
83 Number(Arc<str>),
84 /// Operator
85 Operator(Arc<str>),
86 /// Keyword
87 Keyword(Arc<str>),
88
89 // Delimiters
90 /// Left parenthesis: (
91 LeftParen,
92 /// Right parenthesis: )
93 RightParen,
94 /// Left bracket: [
95 LeftBracket,
96 /// Right bracket: ]
97 RightBracket,
98 /// Left brace: {
99 LeftBrace,
100 /// Right brace: }
101 RightBrace,
102
103 // Punctuation
104 /// Semicolon: ;
105 Semicolon,
106 /// Comma: ,
107 Comma,
108 /// Colon: :
109 Colon,
110 /// Arrow: ->
111 Arrow,
112 /// Fat comma: =>
113 FatComma,
114
115 // Whitespace and comments
116 /// Whitespace (usually not returned)
117 Whitespace,
118 /// Newline character
119 Newline,
120 /// Comment text
121 Comment(Arc<str>),
122
123 // Special tokens
124 /// End of file
125 EOF,
126 /// Error token for invalid input
127 Error(Arc<str>),
128}
129
130/// Token with position information
131#[derive(Debug, Clone)]
132pub struct Token {
133 /// The type of token
134 pub token_type: TokenType,
135 /// The actual text of the token
136 pub text: Arc<str>,
137 /// Start position in the input
138 pub start: usize,
139 /// End position in the input
140 pub end: usize,
141}
142
143impl Token {
144 /// Create a new token
145 pub fn new(token_type: TokenType, text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
146 Self { token_type, text: text.into(), start, end }
147 }
148
149 /// Get the length of the token
150 pub fn len(&self) -> usize {
151 self.end - self.start
152 }
153
154 /// Check if the token is empty
155 pub fn is_empty(&self) -> bool {
156 self.start == self.end
157 }
158}