frost_lexer/token.rs
1//! Token types for the zsh lexer.
2
3use compact_str::CompactString;
4
5/// Byte offset span in source.
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct Span {
8 pub start: u32,
9 pub end: u32,
10}
11
12impl Span {
13 pub fn new(start: u32, end: u32) -> Self {
14 Self { start, end }
15 }
16
17 pub fn len(&self) -> u32 {
18 self.end - self.start
19 }
20
21 pub fn is_empty(&self) -> bool {
22 self.start == self.end
23 }
24}
25
26/// A token produced by the lexer.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct Token {
29 pub kind: TokenKind,
30 pub span: Span,
31 /// The raw text of the token (before any expansion).
32 pub text: CompactString,
33}
34
35/// All token kinds recognized by the zsh lexer.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum TokenKind {
38 // ── Literals ────────────────────────────────────────────
39 /// Unquoted word (command name, argument, etc.)
40 Word,
41 /// Single-quoted string (no expansion)
42 SingleQuoted,
43 /// Double-quoted string (expansion inside)
44 DoubleQuoted,
45 /// $'...' ANSI-C quoting
46 DollarSingleQuoted,
47 /// Integer or float literal (in arithmetic context)
48 Number,
49
50 // ── Operators ───────────────────────────────────────────
51 /// |
52 Pipe,
53 /// |&
54 PipeAmpersand,
55 /// ||
56 OrOr,
57 /// &&
58 AndAnd,
59 /// &
60 Ampersand,
61 /// &! or &| (disown)
62 Disown,
63 /// ;
64 Semi,
65 /// ;;
66 DoubleSemi,
67 /// ;&
68 SemiAnd,
69 /// ;|
70 SemiPipe,
71 /// \n (significant in shell grammar)
72 Newline,
73
74 // ── Redirections ────────────────────────────────────────
75 /// < (stdin)
76 Less,
77 /// > (stdout, clobber)
78 Greater,
79 /// >> (append)
80 DoubleGreater,
81 /// >| (clobber, noclobber override)
82 GreaterPipe,
83 /// >! (clobber, noclobber override — zsh-specific)
84 GreaterBang,
85 /// &> or >& (stdout+stderr)
86 AmpGreater,
87 /// &>> (append stdout+stderr)
88 AmpDoubleGreater,
89 /// << (heredoc)
90 DoubleLess,
91 /// <<< (herestring)
92 TripleLess,
93 /// <<- (heredoc, strip tabs)
94 DoubleLessDash,
95 /// <> (open read-write)
96 LessGreater,
97 /// <( (process substitution — read)
98 ProcessSubIn,
99 /// >( (process substitution — write)
100 ProcessSubOut,
101 /// N> (fd redirect, e.g. 2>)
102 FdGreater,
103 /// N< (fd redirect, e.g. 0<)
104 FdLess,
105 /// N>>
106 FdDoubleGreater,
107 /// N>&M (fd dup)
108 FdDup,
109
110 // ── Grouping ────────────────────────────────────────────
111 /// (
112 LeftParen,
113 /// )
114 RightParen,
115 /// {
116 LeftBrace,
117 /// }
118 RightBrace,
119 /// [[ (conditional start)
120 DoubleLeftBracket,
121 /// ]] (conditional end)
122 DoubleRightBracket,
123
124 // ── Expansion markers ───────────────────────────────────
125 /// $ (variable expansion start)
126 Dollar,
127 /// ${ (parameter expansion start)
128 DollarBrace,
129 /// $( (command substitution start)
130 DollarParen,
131 /// $(( (arithmetic expansion start)
132 DollarDoubleParen,
133 /// ` (backtick command substitution)
134 Backtick,
135 /// = (assignment or test operator)
136 Equals,
137 /// ~ (tilde expansion)
138 Tilde,
139 /// # (comment start — lexer produces this then skips to newline)
140 Comment,
141 /// ! (history expansion or negation)
142 Bang,
143 /// @ (used in ${arr[@]}, $@ etc.)
144 At,
145 /// * (glob or arithmetic)
146 Star,
147 /// ? (glob or ternary)
148 Question,
149
150 // ── Reserved words ──────────────────────────────────────
151 /// if
152 If,
153 /// then
154 Then,
155 /// elif
156 Elif,
157 /// else
158 Else,
159 /// fi
160 Fi,
161 /// for
162 For,
163 /// in (for ... in)
164 In,
165 /// while
166 While,
167 /// until
168 Until,
169 /// do
170 Do,
171 /// done
172 Done,
173 /// case
174 Case,
175 /// esac
176 Esac,
177 /// select
178 Select,
179 /// repeat
180 Repeat,
181 /// function
182 Function,
183 /// time
184 Time,
185 /// coproc
186 Coproc,
187 /// [[ (reserved word form)
188 CondStart,
189 /// ]] (reserved word form)
190 CondEnd,
191
192 // ── Special ─────────────────────────────────────────────
193 /// End of input
194 Eof,
195 /// Lexer error (unrecognized byte sequence)
196 Error,
197}
198
199impl TokenKind {
200 /// Whether this token kind is a reserved word.
201 pub fn is_reserved_word(&self) -> bool {
202 matches!(
203 self,
204 Self::If
205 | Self::Then
206 | Self::Elif
207 | Self::Else
208 | Self::Fi
209 | Self::For
210 | Self::In
211 | Self::While
212 | Self::Until
213 | Self::Do
214 | Self::Done
215 | Self::Case
216 | Self::Esac
217 | Self::Select
218 | Self::Repeat
219 | Self::Function
220 | Self::Time
221 | Self::Coproc
222 )
223 }
224
225 /// Whether this token kind terminates a simple command.
226 pub fn is_separator(&self) -> bool {
227 matches!(
228 self,
229 Self::Semi
230 | Self::Newline
231 | Self::Ampersand
232 | Self::Pipe
233 | Self::PipeAmpersand
234 | Self::OrOr
235 | Self::AndAnd
236 | Self::Eof
237 )
238 }
239}