rush_sh/lexer/
token.rs

1//! Token definitions for the Rush shell lexer.
2//!
3//! This module contains the core token types that represent the lexical elements
4//! of shell syntax. Tokens are the building blocks produced by the lexer and
5//! consumed by the parser to construct an Abstract Syntax Tree (AST).
6//!
7//! # Token Categories
8//!
9//! ## Keywords
10//! Control flow and structural keywords like `if`, `then`, `else`, `fi`, `case`,
11//! `for`, `while`, `until`, `do`, `done`, `local`, `return`, `break`, `continue`.
12//!
13//! ## Operators
14//! - **Pipe operators**: `|` (pipe), `||` (OR), `&&` (AND)
15//! - **Redirection operators**: `>`, `>>`, `<`, `<<`, `<<<`, `>|`, `<>`
16//! - **File descriptor operators**: `N>`, `N<`, `N>>`, `N>&M`, `N<&M`, `N>&-`, `N<&-`, `N<>`
17//! - **Structural operators**: `;`, `;;`, `(`, `)`, `{`, `}`, `!`
18//!
19//! ## Words
20//! Command names, arguments, variable names, and other textual content.
21//!
22//! ## Special Tokens
23//! - **Newline**: Line terminators
24//! - **Here-documents**: Multi-line input redirection
25//! - **Here-strings**: Single-line input redirection
26//!
27//! # Examples
28//!
29//! ```
30//! use rush_sh::lexer::Token;
31//!
32//! // A simple command token sequence
33//! let tokens = vec![
34//!     Token::Word("echo".to_string()),
35//!     Token::Word("hello".to_string()),
36//! ];
37//!
38//! // A pipeline with redirection
39//! let pipeline = vec![
40//!     Token::Word("cat".to_string()),
41//!     Token::RedirIn,
42//!     Token::Word("input.txt".to_string()),
43//!     Token::Pipe,
44//!     Token::Word("grep".to_string()),
45//!     Token::Word("pattern".to_string()),
46//! ];
47//! ```
48
49/// Represents a lexical token in shell syntax.
50///
51/// Each variant corresponds to a specific syntactic element that can appear
52/// in shell commands. The lexer produces a stream of these tokens which the
53/// parser then uses to build an AST.
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub enum Token {
56    /// A word token representing command names, arguments, or other text.
57    /// This is the most common token type.
58    Word(String),
59
60    /// Pipe operator `|` - connects stdout of one command to stdin of another.
61    Pipe,
62
63    /// Output redirection `>` - redirects stdout to a file.
64    RedirOut,
65
66    /// Output redirection with noclobber override `>|` - forces overwrite even with noclobber set.
67    RedirOutClobber,
68
69    /// Input redirection `<` - redirects stdin from a file.
70    RedirIn,
71
72    /// Append redirection `>>` - appends stdout to a file.
73    RedirAppend,
74
75    /// Here-document `<<DELIMITER` - multi-line input redirection.
76    /// The boolean indicates if the delimiter was quoted (affects expansion).
77    RedirHereDoc(String, bool),
78
79    /// Here-string `<<<"content"` - single-line input redirection.
80    RedirHereString(String),
81
82    // File descriptor redirections
83    /// Redirect file descriptor N from file: `N<file`
84    RedirectFdIn(i32, String),
85
86    /// Redirect file descriptor N to file: `N>file`
87    RedirectFdOut(i32, String),
88
89    /// Redirect file descriptor N to file with noclobber override: `N>|file`
90    RedirectFdOutClobber(i32, String),
91
92    /// Append file descriptor N to file: `N>>file`
93    RedirectFdAppend(i32, String),
94
95    /// Duplicate file descriptor: `N>&M` or `N<&M`
96    RedirectFdDup(i32, i32),
97
98    /// Close file descriptor: `N>&-` or `N<&-`
99    RedirectFdClose(i32),
100
101    /// Open file descriptor for read/write: `N<>file`
102    RedirectFdInOut(i32, String),
103
104    // Control flow keywords
105    /// `if` keyword - starts a conditional statement.
106    If,
107
108    /// `then` keyword - begins the consequent clause of an if statement.
109    Then,
110
111    /// `else` keyword - begins the alternative clause of an if statement.
112    Else,
113
114    /// `elif` keyword - else-if for chained conditionals.
115    Elif,
116
117    /// `fi` keyword - ends an if statement.
118    Fi,
119
120    /// `case` keyword - starts a case statement.
121    Case,
122
123    /// `in` keyword - used in case and for statements.
124    In,
125
126    /// `esac` keyword - ends a case statement.
127    Esac,
128
129    /// Double semicolon `;;` - terminates a case clause.
130    DoubleSemicolon,
131
132    /// Semicolon `;` - command separator.
133    Semicolon,
134
135    /// Right parenthesis `)` - used in case patterns and subshells.
136    RightParen,
137
138    /// Left parenthesis `(` - starts a subshell or case pattern.
139    LeftParen,
140
141    /// Left brace `{` - starts a command group.
142    LeftBrace,
143
144    /// Right brace `}` - ends a command group.
145    RightBrace,
146
147    /// Newline - line terminator, also acts as command separator.
148    Newline,
149
150    /// `local` keyword - declares local variables in functions.
151    Local,
152
153    /// `return` keyword - returns from a function with an exit code.
154    Return,
155
156    /// `for` keyword - starts a for loop.
157    For,
158
159    /// `do` keyword - begins the body of a loop.
160    Do,
161
162    /// `done` keyword - ends a loop.
163    Done,
164
165    /// `while` keyword - starts a while loop.
166    While,
167
168    /// `until` keyword - starts an until loop.
169    Until,
170
171    /// `break` keyword - exits from a loop.
172    Break,
173
174    /// `continue` keyword - skips to next iteration of a loop.
175    Continue,
176
177    /// AND operator `&&` - executes next command only if previous succeeded.
178    And,
179
180    /// OR operator `||` - executes next command only if previous failed.
181    Or,
182
183    /// Bang operator `!` - negates the exit status of a command.
184    Bang,
185
186    /// Ampersand `&` - runs command in background (async execution).
187    Ampersand,
188}
189
190/// Map a keyword string to its corresponding shell Token.
191///
192/// This function is used during lexical analysis to identify reserved words
193/// that should be treated as keywords rather than regular word tokens.
194///
195/// # Arguments
196///
197/// * `word` - The string to check for keyword status
198///
199/// # Returns
200///
201/// `Some(Token::X)` if `word` matches a recognized shell keyword (for example: `if`, `then`,
202/// `else`, `elif`, `fi`, `case`, `in`, `esac`, `local`, `return`, `for`, `while`, `until`,
203/// `break`, `continue`, `do`, `done`), `None` otherwise.
204///
205/// # Examples
206///
207/// ```
208/// // Note: is_keyword is a private function
209/// // This example is for documentation only
210/// ```
211pub(super) fn is_keyword(word: &str) -> Option<Token> {
212    match word {
213        "if" => Some(Token::If),
214        "then" => Some(Token::Then),
215        "else" => Some(Token::Else),
216        "elif" => Some(Token::Elif),
217        "fi" => Some(Token::Fi),
218        "case" => Some(Token::Case),
219        "in" => Some(Token::In),
220        "esac" => Some(Token::Esac),
221        "local" => Some(Token::Local),
222        "return" => Some(Token::Return),
223        "for" => Some(Token::For),
224        "while" => Some(Token::While),
225        "until" => Some(Token::Until),
226        "break" => Some(Token::Break),
227        "continue" => Some(Token::Continue),
228        "do" => Some(Token::Do),
229        "done" => Some(Token::Done),
230        _ => None,
231    }
232}
233
234/// Check if a word is a shell keyword (public API for builtins).
235///
236/// This includes both keywords recognized by the lexer and special tokens.
237/// Used by the `type` builtin to identify reserved words.
238///
239/// # Arguments
240///
241/// * `word` - The string to check
242///
243/// # Returns
244///
245/// `true` if the word is a shell keyword, `false` otherwise.
246///
247/// # Examples
248///
249/// ```
250/// use rush_sh::lexer::is_shell_keyword;
251///
252/// assert!(is_shell_keyword("if"));
253/// assert!(is_shell_keyword("while"));
254/// assert!(is_shell_keyword("{"));
255/// assert!(!is_shell_keyword("echo"));
256/// ```
257pub fn is_shell_keyword(word: &str) -> bool {
258    // Check lexer keywords first
259    if is_keyword(word).is_some() {
260        return true;
261    }
262
263    // Check additional POSIX keywords and special tokens
264    // These are handled as separate tokens but should be recognized as keywords by `type`
265    matches!(word, "until" | "{" | "}" | "!")
266}