Skip to main content

frost_lexer/
token.rs

1//! Token types for the zsh lexer.
2
3use compact_str::CompactString;
4
5/// Byte offset span in source.
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct Span {
8    pub start: u32,
9    pub end: u32,
10}
11
12impl Span {
13    pub fn new(start: u32, end: u32) -> Self {
14        Self { start, end }
15    }
16
17    pub fn len(&self) -> u32 {
18        self.end - self.start
19    }
20
21    pub fn is_empty(&self) -> bool {
22        self.start == self.end
23    }
24}
25
26/// A token produced by the lexer.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct Token {
29    pub kind: TokenKind,
30    pub span: Span,
31    /// The raw text of the token (before any expansion).
32    pub text: CompactString,
33}
34
35/// All token kinds recognized by the zsh lexer.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum TokenKind {
38    // ── Literals ────────────────────────────────────────────
39    /// Unquoted word (command name, argument, etc.)
40    Word,
41    /// Single-quoted string (no expansion)
42    SingleQuoted,
43    /// Double-quoted string (expansion inside)
44    DoubleQuoted,
45    /// $'...' ANSI-C quoting
46    DollarSingleQuoted,
47    /// Integer or float literal (in arithmetic context)
48    Number,
49
50    // ── Operators ───────────────────────────────────────────
51    /// |
52    Pipe,
53    /// |&
54    PipeAmpersand,
55    /// ||
56    OrOr,
57    /// &&
58    AndAnd,
59    /// &
60    Ampersand,
61    /// &!  or  &|  (disown)
62    Disown,
63    /// ;
64    Semi,
65    /// ;;
66    DoubleSemi,
67    /// ;&
68    SemiAnd,
69    /// ;|
70    SemiPipe,
71    /// \n (significant in shell grammar)
72    Newline,
73
74    // ── Redirections ────────────────────────────────────────
75    /// < (stdin)
76    Less,
77    /// > (stdout, clobber)
78    Greater,
79    /// >> (append)
80    DoubleGreater,
81    /// >| (clobber, noclobber override)
82    GreaterPipe,
83    /// >! (clobber, noclobber override — zsh-specific)
84    GreaterBang,
85    /// &> or >& (stdout+stderr)
86    AmpGreater,
87    /// &>> (append stdout+stderr)
88    AmpDoubleGreater,
89    /// << (heredoc)
90    DoubleLess,
91    /// <<< (herestring)
92    TripleLess,
93    /// <<- (heredoc, strip tabs)
94    DoubleLessDash,
95    /// <> (open read-write)
96    LessGreater,
97    /// <( (process substitution — read)
98    ProcessSubIn,
99    /// >( (process substitution — write)
100    ProcessSubOut,
101    /// N> (fd redirect, e.g. 2>)
102    FdGreater,
103    /// N< (fd redirect, e.g. 0<)
104    FdLess,
105    /// N>>
106    FdDoubleGreater,
107    /// N>&M (fd dup)
108    FdDup,
109
110    // ── Grouping ────────────────────────────────────────────
111    /// (
112    LeftParen,
113    /// )
114    RightParen,
115    /// {
116    LeftBrace,
117    /// }
118    RightBrace,
119    /// [[ (conditional start)
120    DoubleLeftBracket,
121    /// ]] (conditional end)
122    DoubleRightBracket,
123
124    // ── Expansion markers ───────────────────────────────────
125    /// $ (variable expansion start)
126    Dollar,
127    /// ${ (parameter expansion start)
128    DollarBrace,
129    /// $( (command substitution start)
130    DollarParen,
131    /// $(( (arithmetic expansion start)
132    DollarDoubleParen,
133    /// ` (backtick command substitution)
134    Backtick,
135    /// = (assignment or test operator)
136    Equals,
137    /// ~ (tilde expansion)
138    Tilde,
139    /// # (comment start — lexer produces this then skips to newline)
140    Comment,
141    /// ! (history expansion or negation)
142    Bang,
143    /// @ (used in ${arr[@]}, $@ etc.)
144    At,
145    /// * (glob or arithmetic)
146    Star,
147    /// ? (glob or ternary)
148    Question,
149
150    // ── Reserved words ──────────────────────────────────────
151    /// if
152    If,
153    /// then
154    Then,
155    /// elif
156    Elif,
157    /// else
158    Else,
159    /// fi
160    Fi,
161    /// for
162    For,
163    /// in (for ... in)
164    In,
165    /// while
166    While,
167    /// until
168    Until,
169    /// do
170    Do,
171    /// done
172    Done,
173    /// case
174    Case,
175    /// esac
176    Esac,
177    /// select
178    Select,
179    /// repeat
180    Repeat,
181    /// function
182    Function,
183    /// time
184    Time,
185    /// coproc
186    Coproc,
187    /// [[ (reserved word form)
188    CondStart,
189    /// ]] (reserved word form)
190    CondEnd,
191
192    // ── Special ─────────────────────────────────────────────
193    /// End of input
194    Eof,
195    /// Lexer error (unrecognized byte sequence)
196    Error,
197}
198
199impl TokenKind {
200    /// Whether this token kind is a reserved word.
201    pub fn is_reserved_word(&self) -> bool {
202        matches!(
203            self,
204            Self::If
205                | Self::Then
206                | Self::Elif
207                | Self::Else
208                | Self::Fi
209                | Self::For
210                | Self::In
211                | Self::While
212                | Self::Until
213                | Self::Do
214                | Self::Done
215                | Self::Case
216                | Self::Esac
217                | Self::Select
218                | Self::Repeat
219                | Self::Function
220                | Self::Time
221                | Self::Coproc
222        )
223    }
224
225    /// Whether this token kind terminates a simple command.
226    pub fn is_separator(&self) -> bool {
227        matches!(
228            self,
229            Self::Semi
230                | Self::Newline
231                | Self::Ampersand
232                | Self::Pipe
233                | Self::PipeAmpersand
234                | Self::OrOr
235                | Self::AndAnd
236                | Self::Eof
237        )
238    }
239}