rush_sh/lexer/token.rs
1//! Token definitions for the Rush shell lexer.
2//!
3//! This module contains the core token types that represent the lexical elements
4//! of shell syntax. Tokens are the building blocks produced by the lexer and
5//! consumed by the parser to construct an Abstract Syntax Tree (AST).
6//!
7//! # Token Categories
8//!
9//! ## Keywords
10//! Control flow and structural keywords like `if`, `then`, `else`, `fi`, `case`,
11//! `for`, `while`, `until`, `do`, `done`, `local`, `return`, `break`, `continue`.
12//!
13//! ## Operators
14//! - **Pipe operators**: `|` (pipe), `||` (OR), `&&` (AND)
15//! - **Redirection operators**: `>`, `>>`, `<`, `<<`, `<<<`, `>|`, `<>`
16//! - **File descriptor operators**: `N>`, `N<`, `N>>`, `N>&M`, `N<&M`, `N>&-`, `N<&-`, `N<>`
17//! - **Structural operators**: `;`, `;;`, `(`, `)`, `{`, `}`, `!`
18//!
19//! ## Words
20//! Command names, arguments, variable names, and other textual content.
21//!
22//! ## Special Tokens
23//! - **Newline**: Line terminators
24//! - **Here-documents**: Multi-line input redirection
25//! - **Here-strings**: Single-line input redirection
26//!
27//! # Examples
28//!
29//! ```
30//! use rush_sh::lexer::Token;
31//!
32//! // A simple command token sequence
33//! let tokens = vec![
34//! Token::Word("echo".to_string()),
35//! Token::Word("hello".to_string()),
36//! ];
37//!
38//! // A pipeline with redirection
39//! let pipeline = vec![
40//! Token::Word("cat".to_string()),
41//! Token::RedirIn,
42//! Token::Word("input.txt".to_string()),
43//! Token::Pipe,
44//! Token::Word("grep".to_string()),
45//! Token::Word("pattern".to_string()),
46//! ];
47//! ```
48
49/// Represents a lexical token in shell syntax.
50///
51/// Each variant corresponds to a specific syntactic element that can appear
52/// in shell commands. The lexer produces a stream of these tokens which the
53/// parser then uses to build an AST.
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub enum Token {
56 /// A word token representing command names, arguments, or other text.
57 /// This is the most common token type.
58 Word(String),
59
60 /// Pipe operator `|` - connects stdout of one command to stdin of another.
61 Pipe,
62
63 /// Output redirection `>` - redirects stdout to a file.
64 RedirOut,
65
66 /// Output redirection with noclobber override `>|` - forces overwrite even with noclobber set.
67 RedirOutClobber,
68
69 /// Input redirection `<` - redirects stdin from a file.
70 RedirIn,
71
72 /// Append redirection `>>` - appends stdout to a file.
73 RedirAppend,
74
75 /// Here-document `<<DELIMITER` - multi-line input redirection.
76 /// The boolean indicates if the delimiter was quoted (affects expansion).
77 RedirHereDoc(String, bool),
78
79 /// Here-string `<<<"content"` - single-line input redirection.
80 RedirHereString(String),
81
82 // File descriptor redirections
83 /// Redirect file descriptor N from file: `N<file`
84 RedirectFdIn(i32, String),
85
86 /// Redirect file descriptor N to file: `N>file`
87 RedirectFdOut(i32, String),
88
89 /// Redirect file descriptor N to file with noclobber override: `N>|file`
90 RedirectFdOutClobber(i32, String),
91
92 /// Append file descriptor N to file: `N>>file`
93 RedirectFdAppend(i32, String),
94
95 /// Duplicate file descriptor: `N>&M` or `N<&M`
96 RedirectFdDup(i32, i32),
97
98 /// Close file descriptor: `N>&-` or `N<&-`
99 RedirectFdClose(i32),
100
101 /// Open file descriptor for read/write: `N<>file`
102 RedirectFdInOut(i32, String),
103
104 // Control flow keywords
105 /// `if` keyword - starts a conditional statement.
106 If,
107
108 /// `then` keyword - begins the consequent clause of an if statement.
109 Then,
110
111 /// `else` keyword - begins the alternative clause of an if statement.
112 Else,
113
114 /// `elif` keyword - else-if for chained conditionals.
115 Elif,
116
117 /// `fi` keyword - ends an if statement.
118 Fi,
119
120 /// `case` keyword - starts a case statement.
121 Case,
122
123 /// `in` keyword - used in case and for statements.
124 In,
125
126 /// `esac` keyword - ends a case statement.
127 Esac,
128
129 /// Double semicolon `;;` - terminates a case clause.
130 DoubleSemicolon,
131
132 /// Semicolon `;` - command separator.
133 Semicolon,
134
135 /// Right parenthesis `)` - used in case patterns and subshells.
136 RightParen,
137
138 /// Left parenthesis `(` - starts a subshell or case pattern.
139 LeftParen,
140
141 /// Left brace `{` - starts a command group.
142 LeftBrace,
143
144 /// Right brace `}` - ends a command group.
145 RightBrace,
146
147 /// Newline - line terminator, also acts as command separator.
148 Newline,
149
150 /// `local` keyword - declares local variables in functions.
151 Local,
152
153 /// `return` keyword - returns from a function with an exit code.
154 Return,
155
156 /// `for` keyword - starts a for loop.
157 For,
158
159 /// `do` keyword - begins the body of a loop.
160 Do,
161
162 /// `done` keyword - ends a loop.
163 Done,
164
165 /// `while` keyword - starts a while loop.
166 While,
167
168 /// `until` keyword - starts an until loop.
169 Until,
170
171 /// `break` keyword - exits from a loop.
172 Break,
173
174 /// `continue` keyword - skips to next iteration of a loop.
175 Continue,
176
177 /// AND operator `&&` - executes next command only if previous succeeded.
178 And,
179
180 /// OR operator `||` - executes next command only if previous failed.
181 Or,
182
183 /// Bang operator `!` - negates the exit status of a command.
184 Bang,
185
186 /// Ampersand `&` - runs command in background (async execution).
187 Ampersand,
188}
189
190/// Map a keyword string to its corresponding shell Token.
191///
192/// This function is used during lexical analysis to identify reserved words
193/// that should be treated as keywords rather than regular word tokens.
194///
195/// # Arguments
196///
197/// * `word` - The string to check for keyword status
198///
199/// # Returns
200///
201/// `Some(Token::X)` if `word` matches a recognized shell keyword (for example: `if`, `then`,
202/// `else`, `elif`, `fi`, `case`, `in`, `esac`, `local`, `return`, `for`, `while`, `until`,
203/// `break`, `continue`, `do`, `done`), `None` otherwise.
204///
205/// # Examples
206///
207/// ```
208/// // Note: is_keyword is a private function
209/// // This example is for documentation only
210/// ```
211pub(super) fn is_keyword(word: &str) -> Option<Token> {
212 match word {
213 "if" => Some(Token::If),
214 "then" => Some(Token::Then),
215 "else" => Some(Token::Else),
216 "elif" => Some(Token::Elif),
217 "fi" => Some(Token::Fi),
218 "case" => Some(Token::Case),
219 "in" => Some(Token::In),
220 "esac" => Some(Token::Esac),
221 "local" => Some(Token::Local),
222 "return" => Some(Token::Return),
223 "for" => Some(Token::For),
224 "while" => Some(Token::While),
225 "until" => Some(Token::Until),
226 "break" => Some(Token::Break),
227 "continue" => Some(Token::Continue),
228 "do" => Some(Token::Do),
229 "done" => Some(Token::Done),
230 _ => None,
231 }
232}
233
234/// Check if a word is a shell keyword (public API for builtins).
235///
236/// This includes both keywords recognized by the lexer and special tokens.
237/// Used by the `type` builtin to identify reserved words.
238///
239/// # Arguments
240///
241/// * `word` - The string to check
242///
243/// # Returns
244///
245/// `true` if the word is a shell keyword, `false` otherwise.
246///
247/// # Examples
248///
249/// ```
250/// use rush_sh::lexer::is_shell_keyword;
251///
252/// assert!(is_shell_keyword("if"));
253/// assert!(is_shell_keyword("while"));
254/// assert!(is_shell_keyword("{"));
255/// assert!(!is_shell_keyword("echo"));
256/// ```
257pub fn is_shell_keyword(word: &str) -> bool {
258 // Check lexer keywords first
259 if is_keyword(word).is_some() {
260 return true;
261 }
262
263 // Check additional POSIX keywords and special tokens
264 // These are handled as separate tokens but should be recognized as keywords by `type`
265 matches!(word, "until" | "{" | "}" | "!")
266}