#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind {
Word(String),
Assignment, Pipe, Semicolon, Newline, And, Background, Or, LParen, RParen, LBrace, RBrace, Less, Great, DGreat, Dollar, Quote, SingleQuote, Backtick, Comment, CmdSubst, ExtGlob(char), If, Then, Elif, Else, Fi, Function, For, While, Until, Do, Done, In, Break, Continue, Return, Export, EOF,
}
#[derive(Debug, Clone)]
pub struct Token {
pub kind: TokenKind,
pub value: String,
pub position: Position,
}
#[derive(Debug, Clone, Copy)]
pub struct Position {
pub line: usize,
pub column: usize,
}
impl Position {
pub fn new(line: usize, column: usize) -> Self {
Self { line, column }
}
}
fn is_special_char(ch: char) -> bool {
matches!(
ch,
'=' | '|'
| ';'
| '\n'
| '&'
| '('
| ')'
| '{'
| '}'
| '<'
| '>'
| '$'
| '"'
| '\''
| '`'
| '#'
)
}
#[derive(Clone)]
pub struct Lexer {
input: Vec<char>,
pub position: usize,
read_position: usize,
ch: char,
line: usize,
column: usize,
in_quotes: Option<char>,
}
impl Lexer {
pub fn new(input: &str) -> Self {
let mut lexer = Self {
input: input.chars().collect(),
position: 0,
read_position: 0,
ch: '\0',
line: 1,
column: 0,
in_quotes: None,
};
lexer.read_char();
lexer
}
fn read_char(&mut self) {
if self.read_position >= self.input.len() {
self.ch = '\0';
} else {
self.ch = self.input[self.read_position];
}
self.position = self.read_position;
self.read_position += 1;
self.column += 1;
}
pub fn peek_char(&self) -> char {
if self.read_position >= self.input.len() {
'\0'
} else {
self.input[self.read_position]
}
}
fn is_word_boundary(&self) -> bool {
let peek = self.peek_char();
peek.is_whitespace() || is_special_char(peek) || peek == '\0'
}
pub fn peek_next_token(&mut self) -> Token {
let saved_position = self.position;
let saved_read_position = self.read_position;
let saved_ch = self.ch;
let saved_line = self.line;
let saved_column = self.column;
let token = self.next_token();
self.position = saved_position;
self.read_position = saved_read_position;
self.ch = saved_ch;
self.line = saved_line;
self.column = saved_column;
token
}
pub fn next_token(&mut self) -> Token {
if self.in_quotes.is_none() {
self.skip_whitespace();
}
let current_position = Position::new(self.line, self.column);
if (self.ch == '"' || self.ch == '\'') && self.in_quotes.is_none() {
let quote_type = self.ch;
let token = Token {
kind: if quote_type == '"' {
TokenKind::Quote
} else {
TokenKind::SingleQuote
},
value: quote_type.to_string(),
position: current_position,
};
self.in_quotes = Some(quote_type); self.read_char();
return token;
} else if self.in_quotes.is_some() && self.ch == self.in_quotes.unwrap() {
let quote_type = self.ch;
let token = Token {
kind: if quote_type == '"' {
TokenKind::Quote
} else {
TokenKind::SingleQuote
},
value: quote_type.to_string(),
position: current_position,
};
self.in_quotes = None; self.read_char();
return token;
} else if self.in_quotes.is_some() {
if self.ch == '$' && self.peek_char() == '(' {
self.read_char(); return Token {
kind: TokenKind::CmdSubst,
value: "$(".to_string(),
position: current_position,
};
} else {
return self.read_quoted_content();
}
}
let token = match self.ch {
'=' => Token {
kind: TokenKind::Assignment,
value: "=".to_string(),
position: current_position,
},
'|' => {
if self.peek_char() == '|' {
self.read_char();
Token {
kind: TokenKind::Or,
value: "||".to_string(),
position: current_position,
}
} else {
Token {
kind: TokenKind::Pipe,
value: "|".to_string(),
position: current_position,
}
}
}
';' => Token {
kind: TokenKind::Semicolon,
value: ";".to_string(),
position: current_position,
},
'&' => {
if self.peek_char() == '&' {
self.read_char();
Token {
kind: TokenKind::And,
value: "&&".to_string(),
position: current_position,
}
} else {
Token {
kind: TokenKind::Background,
value: "&".to_string(),
position: current_position,
}
}
}
'\n' => {
self.line += 1;
self.column = 0;
Token {
kind: TokenKind::Newline,
value: "\n".to_string(),
position: current_position,
}
}
'(' => Token {
kind: TokenKind::LParen,
value: "(".to_string(),
position: current_position,
},
')' => Token {
kind: TokenKind::RParen,
value: ")".to_string(),
position: current_position,
},
'{' => Token {
kind: TokenKind::LBrace,
value: "{".to_string(),
position: current_position,
},
'}' => Token {
kind: TokenKind::RBrace,
value: "}".to_string(),
position: current_position,
},
'<' => Token {
kind: TokenKind::Less,
value: "<".to_string(),
position: current_position,
},
'>' => {
if self.peek_char() == '>' {
self.read_char();
Token {
kind: TokenKind::DGreat,
value: ">>".to_string(),
position: current_position,
}
} else {
Token {
kind: TokenKind::Great,
value: ">".to_string(),
position: current_position,
}
}
}
'$' => {
if self.peek_char() == '(' {
self.read_char(); Token {
kind: TokenKind::CmdSubst,
value: "$(".to_string(),
position: current_position,
}
} else {
Token {
kind: TokenKind::Dollar,
value: "$".to_string(),
position: current_position,
}
}
}
'"' => Token {
kind: TokenKind::Quote,
value: "\"".to_string(),
position: current_position,
},
'\'' => Token {
kind: TokenKind::SingleQuote,
value: "'".to_string(),
position: current_position,
},
'`' => Token {
kind: TokenKind::Backtick,
value: "`".to_string(),
position: current_position,
},
'#' => self.read_comment(),
'\0' => Token {
kind: TokenKind::EOF,
value: "".to_string(),
position: current_position,
},
't' => {
if self.peek_char() == 'h'
&& self.position + 3 < self.input.len()
&& self.input[self.position + 1] == 'h'
&& self.input[self.position + 2] == 'e'
&& self.input[self.position + 3] == 'n'
{
self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Then,
value: "then".to_string(),
position: current_position,
}
} else {
self.position -= 3;
self.read_position -= 3;
self.column -= 3;
self.ch = 't';
self.read_word()
}
} else {
self.read_word()
}
}
'e' => {
if self.peek_char() == 'l' && self.position + 3 < self.input.len() {
self.read_char();
if self.peek_char() == 's' {
self.read_char(); if self.peek_char() == 'e' {
self.read_char(); if self.is_word_boundary() {
Token {
kind: TokenKind::Else,
value: "else".to_string(),
position: current_position,
}
} else {
self.position -= 3;
self.read_position -= 3;
self.column -= 3;
self.ch = 'e';
self.read_word()
}
} else {
self.position -= 2;
self.read_position -= 2;
self.column -= 2;
self.ch = 'e';
self.read_word()
}
} else if self.peek_char() == 'i' {
self.read_char(); if self.peek_char() == 'f' {
self.read_char(); if self.is_word_boundary() {
Token {
kind: TokenKind::Elif,
value: "elif".to_string(),
position: current_position,
}
} else {
self.position -= 3;
self.read_position -= 3;
self.column -= 3;
self.ch = 'e';
self.read_word()
}
} else {
self.position -= 2;
self.read_position -= 2;
self.column -= 2;
self.ch = 'e';
self.read_word()
}
} else {
self.position -= 1;
self.read_position -= 1;
self.column -= 1;
self.ch = 'e';
self.read_word()
}
} else if self.position + 5 < self.input.len()
&& self.peek_char() == 'x'
&& self.input[self.position + 1] == 'x'
&& self.input[self.position + 2] == 'p'
&& self.input[self.position + 3] == 'o'
&& self.input[self.position + 4] == 'r'
&& self.input[self.position + 5] == 't'
{
self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Export,
value: "export".to_string(),
position: current_position,
}
} else {
self.position -= 5;
self.read_position -= 5;
self.column -= 5;
self.ch = 'e';
self.read_word()
}
} else {
self.read_word()
}
}
'f' => {
if self.peek_char() == 'i' && self.position + 1 < self.input.len() {
self.read_char(); if self.is_word_boundary() {
Token {
kind: TokenKind::Fi,
value: "fi".to_string(),
position: current_position,
}
} else {
self.position -= 1;
self.read_position -= 1;
self.column -= 1;
self.ch = 'f';
self.read_word()
}
} else if self.position + 7 < self.input.len()
&& self.peek_char() == 'u'
&& self.input[self.position + 1] == 'u'
&& self.input[self.position + 2] == 'n'
&& self.input[self.position + 3] == 'c'
&& self.input[self.position + 4] == 't'
&& self.input[self.position + 5] == 'i'
&& self.input[self.position + 6] == 'o'
&& self.input[self.position + 7] == 'n'
{
self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Function,
value: "function".to_string(),
position: current_position,
}
} else {
self.position -= 7;
self.read_position -= 7;
self.column -= 7;
self.ch = 'f';
self.read_word()
}
} else if self.position + 2 < self.input.len()
&& self.peek_char() == 'o'
&& self.input[self.position + 1] == 'o'
&& self.input[self.position + 2] == 'r'
{
self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::For,
value: "for".to_string(),
position: current_position,
}
} else {
self.position -= 2;
self.read_position -= 2;
self.column -= 2;
self.ch = 'f';
self.read_word()
}
} else {
self.read_word()
}
}
'u' => {
if self.position + 4 < self.input.len()
&& self.peek_char() == 'n'
&& self.input[self.position + 1] == 'n'
&& self.input[self.position + 2] == 't'
&& self.input[self.position + 3] == 'i'
&& self.input[self.position + 4] == 'l'
{
self.read_char(); self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Until,
value: "until".to_string(),
position: current_position,
}
} else {
self.position -= 4;
self.read_position -= 4;
self.column -= 4;
self.ch = 'u';
self.read_word()
}
} else {
self.read_word()
}
}
'r' => {
if self.position + 5 < self.input.len()
&& self.peek_char() == 'e'
&& self.input[self.position + 1] == 'e'
&& self.input[self.position + 2] == 't'
&& self.input[self.position + 3] == 'u'
&& self.input[self.position + 4] == 'r'
&& self.input[self.position + 5] == 'n'
{
self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Return,
value: "return".to_string(),
position: current_position,
}
} else {
self.position -= 5;
self.read_position -= 5;
self.column -= 5;
self.ch = 'r';
self.read_word()
}
} else {
self.read_word()
}
}
'w' => {
if self.position + 4 < self.input.len()
&& self.peek_char() == 'h'
&& self.input[self.position + 1] == 'h'
&& self.input[self.position + 2] == 'i'
&& self.input[self.position + 3] == 'l'
&& self.input[self.position + 4] == 'e'
{
self.read_char(); self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::While,
value: "while".to_string(),
position: current_position,
}
} else {
self.position -= 4;
self.read_position -= 4;
self.column -= 4;
self.ch = 'w';
self.read_word()
}
} else {
self.read_word()
}
}
'd' => {
if self.peek_char() == 'o' && self.position + 1 < self.input.len() {
self.read_char();
if self.peek_char() == 'n'
&& self.position + 2 < self.input.len()
&& self.input[self.position + 1] == 'n'
&& self.input[self.position + 2] == 'e'
{
self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Done,
value: "done".to_string(),
position: current_position,
}
} else {
self.position -= 3;
self.read_position -= 3;
self.column -= 3;
self.ch = 'd';
self.read_word()
}
} else if self.is_word_boundary() {
Token {
kind: TokenKind::Do,
value: "do".to_string(),
position: current_position,
}
} else {
self.position -= 1;
self.read_position -= 1;
self.column -= 1;
self.ch = 'd';
self.read_word()
}
} else {
self.read_word()
}
}
'b' => {
if self.position + 4 < self.input.len()
&& self.peek_char() == 'r'
&& self.input[self.position + 1] == 'r'
&& self.input[self.position + 2] == 'e'
&& self.input[self.position + 3] == 'a'
&& self.input[self.position + 4] == 'k'
{
self.read_char(); self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Break,
value: "break".to_string(),
position: current_position,
}
} else {
self.position -= 4;
self.read_position -= 4;
self.column -= 4;
self.ch = 'b';
self.read_word()
}
} else {
self.read_word()
}
}
'c' => {
if self.position + 7 < self.input.len()
&& self.peek_char() == 'o'
&& self.input[self.position + 1] == 'o'
&& self.input[self.position + 2] == 'n'
&& self.input[self.position + 3] == 't'
&& self.input[self.position + 4] == 'i'
&& self.input[self.position + 5] == 'n'
&& self.input[self.position + 6] == 'u'
&& self.input[self.position + 7] == 'e'
{
self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char(); self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::Continue,
value: "continue".to_string(),
position: current_position,
}
} else {
self.position -= 7;
self.read_position -= 7;
self.column -= 7;
self.ch = 'c';
self.read_word()
}
} else {
self.read_word()
}
}
'i' => {
if self.peek_char() == 'f' && self.position + 1 < self.input.len() {
self.read_char(); if self.is_word_boundary() {
Token {
kind: TokenKind::If,
value: "if".to_string(),
position: current_position,
}
} else {
self.position -= 1;
self.read_position -= 1;
self.column -= 1;
self.ch = 'i';
self.read_word()
}
} else if self.position + 1 < self.input.len() &&
self.peek_char() == 'n'
{
self.read_char();
if self.is_word_boundary() {
Token {
kind: TokenKind::In,
value: "in".to_string(),
position: current_position,
}
} else {
self.position -= 1;
self.read_position -= 1;
self.column -= 1;
self.ch = 'i';
self.read_word()
}
} else {
self.read_word()
}
}
_ => self.read_word(),
};
if token.kind != TokenKind::Word(String::new()) {
self.read_char();
}
token
}
fn read_word(&mut self) -> Token {
let position = Position::new(self.line, self.column);
let mut word = String::new();
if (self.ch == '?' || self.ch == '*' || self.ch == '+' || self.ch == '@' || self.ch == '!')
&& self.peek_char() == '('
{
let peek = self.peek_char();
if peek == '(' {
word.push(self.ch);
self.read_char(); word.push(self.ch); self.read_char();
let mut depth = 1;
while depth > 0 && self.ch != '\0' {
if self.ch == '(' {
depth += 1;
} else if self.ch == ')' {
depth -= 1;
}
word.push(self.ch);
self.read_char();
}
while !self.ch.is_whitespace() && self.ch != '\0' && !is_special_char(self.ch) {
word.push(self.ch);
self.read_char();
}
return Token {
kind: TokenKind::Word(word.clone()),
value: word,
position,
};
}
}
while !self.ch.is_whitespace() && self.ch != '\0' && !is_special_char(self.ch) {
word.push(self.ch);
self.read_char();
}
while !self.ch.is_whitespace()
&& self.ch != '\0'
&& (self.ch == '*'
|| self.ch == '?'
|| self.ch == '['
|| self.ch == ']'
|| !is_special_char(self.ch))
{
word.push(self.ch);
self.read_char();
}
if self.position > 0 {
self.position -= 1;
self.read_position -= 1;
self.column -= 1;
}
let token_kind = match word.as_str() {
"if" => TokenKind::If,
"then" => TokenKind::Then,
"elif" => TokenKind::Elif,
"else" => TokenKind::Else,
"fi" => TokenKind::Fi,
"for" => TokenKind::For,
"while" => TokenKind::While,
"until" => TokenKind::Until,
"do" => TokenKind::Do,
"done" => TokenKind::Done,
"in" => TokenKind::In,
"function" => TokenKind::Function,
"break" => TokenKind::Break,
"continue" => TokenKind::Continue,
"return" => TokenKind::Return,
"export" => TokenKind::Export,
_ => TokenKind::Word(word.clone()),
};
Token {
kind: token_kind,
value: word,
position,
}
}
fn read_comment(&mut self) -> Token {
let position = Position::new(self.line, self.column);
let mut comment = String::from("#");
self.read_char();
while self.ch != '\n' && self.ch != '\0' {
comment.push(self.ch);
self.read_char();
}
if self.position > 0 {
self.position -= 1;
self.read_position -= 1;
self.column -= 1;
}
Token {
kind: TokenKind::Comment,
value: comment,
position,
}
}
fn read_quoted_content(&mut self) -> Token {
let position = Position::new(self.line, self.column);
let mut content = String::new();
let quote_char = self.in_quotes.unwrap();
while self.ch != quote_char && self.ch != '\0' {
if self.ch == '\\' && self.peek_char() == quote_char {
self.read_char(); }
if self.ch == '\n' {
self.line += 1;
self.column = 0;
}
content.push(self.ch);
self.read_char();
}
Token {
kind: TokenKind::Word(content.clone()),
value: content,
position,
}
}
fn skip_whitespace(&mut self) {
while self.ch.is_whitespace() && self.ch != '\n' {
self.read_char();
}
}
}
#[cfg(test)]
mod lexer_tests {
use crate::lexer::Lexer;
use crate::lexer::Token;
use crate::lexer::TokenKind;
#[test]
fn debug_lexer_output() {
let input = r#"LOG_DIR="/var/log""#;
let mut lexer = Lexer::new(input);
println!("Tokens for 'LOG_DIR=\"/var/log\"':");
let mut token = lexer.next_token();
while token.kind != TokenKind::EOF {
println!("Token: {:?}", token);
token = lexer.next_token();
}
}
fn collect_tokens(input: &str) -> Vec<Token> {
let mut lexer = Lexer::new(input);
let mut tokens = Vec::new();
loop {
let token = lexer.next_token();
let is_eof = matches!(token.kind, TokenKind::EOF);
tokens.push(token);
if is_eof {
break;
}
}
tokens
}
fn test_tokens(input: &str, expected_tokens: Vec<TokenKind>) {
let mut lexer = Lexer::new(input);
for expected in expected_tokens {
let token = lexer.next_token();
assert_eq!(
token.kind, expected,
"Expected {:?} but got {:?} for input: {}",
expected, token.kind, input
);
}
let final_token = lexer.next_token();
assert_eq!(
final_token.kind,
TokenKind::EOF,
"Expected EOF but got {:?}",
final_token.kind
);
}
#[test]
fn test_peek_without_advancing() {
let input = "if then";
let mut lexer = Lexer::new(input);
let peeked_token = lexer.peek_next_token();
assert_eq!(peeked_token.kind, TokenKind::If);
assert_eq!(peeked_token.value, "if");
let current_token = lexer.next_token();
assert_eq!(current_token.kind, TokenKind::If);
assert_eq!(current_token.value, "if");
let next_token = lexer.next_token();
assert_eq!(next_token.kind, TokenKind::Then);
assert_eq!(next_token.value, "then");
}
#[test]
fn test_multiple_peeks() {
let input = "for i in 1 2 3";
let mut lexer = Lexer::new(input);
let first_peek = lexer.peek_next_token();
assert_eq!(first_peek.kind, TokenKind::For);
let second_peek = lexer.peek_next_token();
assert_eq!(second_peek.kind, TokenKind::For);
let token = lexer.next_token();
assert_eq!(token.kind, TokenKind::For);
let third_peek = lexer.peek_next_token();
assert_eq!(third_peek.kind, TokenKind::Word("i".to_string()));
}
#[test]
fn test_peek_at_end() {
let input = "ls";
let mut lexer = Lexer::new(input);
let token = lexer.next_token();
assert_eq!(token.kind, TokenKind::Word("ls".to_string()));
let peeked_token = lexer.peek_next_token();
assert_eq!(peeked_token.kind, TokenKind::EOF);
let eof_token = lexer.next_token();
assert_eq!(eof_token.kind, TokenKind::EOF);
}
#[test]
fn test_peek_special_tokens() {
let input = "if [ $a = 5 ]; then echo success; fi";
let mut lexer = Lexer::new(input);
let if_token = lexer.next_token();
assert_eq!(if_token.kind, TokenKind::If);
let peek_token = lexer.peek_next_token();
assert_eq!(peek_token.kind, TokenKind::Word("[".to_string()));
let bracket_token = lexer.next_token();
assert_eq!(bracket_token.kind, TokenKind::Word("[".to_string()));
lexer.next_token(); lexer.next_token();
let eq_peek = lexer.peek_next_token();
assert_eq!(eq_peek.kind, TokenKind::Assignment);
assert_eq!(eq_peek.value, "=");
let eq_token = lexer.next_token();
assert_eq!(eq_token.kind, TokenKind::Assignment);
}
#[test]
fn test_peek_with_complex_tokens() {
let input = "ls -l || echo 'failed'";
let mut lexer = Lexer::new(input);
lexer.next_token(); lexer.next_token();
let or_peek = lexer.peek_next_token();
assert_eq!(or_peek.kind, TokenKind::Or);
assert_eq!(or_peek.value, "||");
let or_token = lexer.next_token();
assert_eq!(or_token.kind, TokenKind::Or);
let echo_peek = lexer.peek_next_token();
assert_eq!(echo_peek.kind, TokenKind::Word("echo".to_string()));
}
#[test]
fn test_peek_with_newlines() {
let input = "echo hello\necho world";
let mut lexer = Lexer::new(input);
lexer.next_token(); lexer.next_token();
let nl_peek = lexer.peek_next_token();
assert_eq!(nl_peek.kind, TokenKind::Newline);
let nl_token = lexer.next_token();
assert_eq!(nl_token.kind, TokenKind::Newline);
let echo2_peek = lexer.peek_next_token();
assert_eq!(echo2_peek.kind, TokenKind::Word("echo".to_string()));
}
#[test]
fn test_peek_with_comments() {
let input = "# This is a comment\necho hello";
let mut lexer = Lexer::new(input);
let comment_peek = lexer.peek_next_token();
assert_eq!(comment_peek.kind, TokenKind::Comment);
let comment_token = lexer.next_token();
assert_eq!(comment_token.kind, TokenKind::Comment);
let nl_peek = lexer.peek_next_token();
assert_eq!(nl_peek.kind, TokenKind::Newline);
}
#[test]
fn test_state_preservation() {
let input = "if [ $? -eq 0 ]; then echo success; fi";
let mut lexer = Lexer::new(input);
let initial_position = lexer.position;
let initial_read_position = lexer.read_position;
let initial_line = lexer.line;
let initial_column = lexer.column;
lexer.peek_next_token();
assert_eq!(lexer.position, initial_position);
assert_eq!(lexer.read_position, initial_read_position);
assert_eq!(lexer.line, initial_line);
assert_eq!(lexer.column, initial_column);
lexer.next_token();
assert_ne!(lexer.position, initial_position);
}
#[test]
fn test_basic_tokens() {
let input = "ls -l | grep file";
let expected = vec![
TokenKind::Word("ls".to_string()),
TokenKind::Word("-l".to_string()),
TokenKind::Pipe,
TokenKind::Word("grep".to_string()),
TokenKind::Word("file".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_assignment() {
let input = "VAR=value";
let expected = vec![
TokenKind::Word("VAR".to_string()),
TokenKind::Assignment,
TokenKind::Word("value".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_redirections() {
let input = "ls > output.txt 2>&1";
let expected = vec![
TokenKind::Word("ls".to_string()),
TokenKind::Great,
TokenKind::Word("output.txt".to_string()),
TokenKind::Word("2".to_string()),
TokenKind::Great,
TokenKind::Background,
TokenKind::Word("1".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_quoted_strings() {
let input = r#"echo "hello world" 'rio de janeiro'"#;
let expected = vec![
TokenKind::Word("echo".to_string()),
TokenKind::Quote,
TokenKind::Word("hello world".to_string()),
TokenKind::Quote,
TokenKind::SingleQuote,
TokenKind::Word("rio de janeiro".to_string()),
TokenKind::SingleQuote,
];
test_tokens(input, expected);
}
#[test]
fn test_command_substitution() {
let input = "echo $(ls -l)";
let expected = vec![
TokenKind::Word("echo".to_string()),
TokenKind::CmdSubst,
TokenKind::Word("ls".to_string()),
TokenKind::Word("-l".to_string()),
TokenKind::RParen,
];
test_tokens(input, expected);
}
#[test]
fn test_command_substitution_on_variable() {
let input = "NUMBER=$(echo 85)";
let expected = vec![
TokenKind::Word("NUMBER".to_string()),
TokenKind::Assignment,
TokenKind::CmdSubst,
TokenKind::Word("echo".to_string()),
TokenKind::Word("85".to_string()),
TokenKind::RParen,
];
test_tokens(input, expected);
}
#[test]
fn test_command_substitution_on_variable_with_quotes() {
let input = "NUMBER=\"$(echo 85)\"";
let expected = vec![
TokenKind::Word("NUMBER".to_string()),
TokenKind::Assignment,
TokenKind::Quote,
TokenKind::CmdSubst,
TokenKind::Word("echo".to_string()),
TokenKind::Word("85".to_string()),
TokenKind::Quote,
TokenKind::RParen,
];
test_tokens(input, expected);
}
#[test]
fn test_variable_expansion() {
let input = "echo $HOME";
let expected = vec![
TokenKind::Word("echo".to_string()),
TokenKind::Dollar,
TokenKind::Word("HOME".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_operators() {
let input = "cmd1 && cmd2 || cmd3";
let expected = vec![
TokenKind::Word("cmd1".to_string()),
TokenKind::And,
TokenKind::Word("cmd2".to_string()),
TokenKind::Or,
TokenKind::Word("cmd3".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_background_process() {
let input = "sleep 10 &";
let expected = vec![
TokenKind::Word("sleep".to_string()),
TokenKind::Word("10".to_string()),
TokenKind::Background,
];
test_tokens(input, expected);
}
#[test]
fn test_comments() {
let input = "echo hello # this is a comment";
let expected = vec![
TokenKind::Word("echo".to_string()),
TokenKind::Word("hello".to_string()),
TokenKind::Comment,
];
test_tokens(input, expected);
}
#[test]
fn test_newlines() {
let input = "cmd1\ncmd2\ncmd3";
let expected = vec![
TokenKind::Word("cmd1".to_string()),
TokenKind::Newline,
TokenKind::Word("cmd2".to_string()),
TokenKind::Newline,
TokenKind::Word("cmd3".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_if_statement() {
let input = "if test -f file.txt; then echo found; fi";
let expected = vec![
TokenKind::If,
TokenKind::Word("test".to_string()),
TokenKind::Word("-f".to_string()),
TokenKind::Word("file.txt".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("found".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_if_with_newlines() {
let input = "if true\nthen\necho yes\nfi";
let expected = vec![
TokenKind::If,
TokenKind::Word("true".to_string()),
TokenKind::Newline,
TokenKind::Then,
TokenKind::Newline,
TokenKind::Word("echo".to_string()),
TokenKind::Word("yes".to_string()),
TokenKind::Newline,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_if_else_statement() {
let input = "if [ $a -eq 5 ]; then echo equal; else echo not equal; fi";
let expected = vec![
TokenKind::If,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("a".to_string()),
TokenKind::Word("-eq".to_string()),
TokenKind::Word("5".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("equal".to_string()),
TokenKind::Semicolon,
TokenKind::Else,
TokenKind::Word("echo".to_string()),
TokenKind::Word("not".to_string()),
TokenKind::Word("equal".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_if_elif_else_statement() {
let input =
"if [ $a -eq 1 ]; then echo one; elif [ $a -eq 2 ]; then echo two; else echo other; fi";
let expected = vec![
TokenKind::If,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("a".to_string()),
TokenKind::Word("-eq".to_string()),
TokenKind::Word("1".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("one".to_string()),
TokenKind::Semicolon,
TokenKind::Elif,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("a".to_string()),
TokenKind::Word("-eq".to_string()),
TokenKind::Word("2".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("two".to_string()),
TokenKind::Semicolon,
TokenKind::Else,
TokenKind::Word("echo".to_string()),
TokenKind::Word("other".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_nested_if_statements() {
let input = "if true; then if false; then echo nested; fi; fi";
let expected = vec![
TokenKind::If,
TokenKind::Word("true".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::If,
TokenKind::Word("false".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("nested".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
TokenKind::Semicolon,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_if_with_complex_command() {
let input = "if grep -q pattern file.txt; then echo found; fi";
let expected = vec![
TokenKind::If,
TokenKind::Word("grep".to_string()),
TokenKind::Word("-q".to_string()),
TokenKind::Word("pattern".to_string()),
TokenKind::Word("file.txt".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("found".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_control_flow_keywords_as_prefix() {
let input = "ifconfig && thenext && elifprocess && elseware && fifile";
let expected = vec![
TokenKind::Word("ifconfig".to_string()),
TokenKind::And,
TokenKind::Word("thenext".to_string()),
TokenKind::And,
TokenKind::Word("elifprocess".to_string()),
TokenKind::And,
TokenKind::Word("elseware".to_string()),
TokenKind::And,
TokenKind::Word("fifile".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_words_with_glob_patterns() {
let input = "ls *.txt file?.log [abc]*.tmp";
let expected = vec![
TokenKind::Word("ls".to_string()),
TokenKind::Word("*.txt".to_string()),
TokenKind::Word("file?.log".to_string()),
TokenKind::Word("[abc]*.tmp".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_extglob_patterns() {
let input = "ls ?(file|temp).txt *(a|b|c).log +(1|2|3).dat";
let expected = vec![
TokenKind::Word("ls".to_string()),
TokenKind::Word("?(file|temp).txt".to_string()),
TokenKind::Word("*(a|b|c).log".to_string()),
TokenKind::Word("+(1|2|3).dat".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_mixed_keywords_and_words() {
let input = "if if_var=42; then echo then_var=42; fi";
let expected = vec![
TokenKind::If,
TokenKind::Word("if_var".to_string()),
TokenKind::Assignment,
TokenKind::Word("42".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("then_var".to_string()),
TokenKind::Assignment,
TokenKind::Word("42".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_command_substitution_in_if() {
let input = "if $(test -d /tmp); then echo directory exists; fi";
let expected = vec![
TokenKind::If,
TokenKind::CmdSubst,
TokenKind::Word("test".to_string()),
TokenKind::Word("-d".to_string()),
TokenKind::Word("/tmp".to_string()),
TokenKind::RParen,
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Word("echo".to_string()),
TokenKind::Word("directory".to_string()),
TokenKind::Word("exists".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
];
test_tokens(input, expected);
}
#[test]
fn test_function_declaration() {
let input = "function greet() { echo hello; }";
let expected = vec![
TokenKind::Function,
TokenKind::Word("greet".to_string()),
TokenKind::LParen,
TokenKind::RParen,
TokenKind::LBrace,
TokenKind::Word("echo".to_string()),
TokenKind::Word("hello".to_string()),
TokenKind::Semicolon,
TokenKind::RBrace,
];
test_tokens(input, expected);
}
#[test]
fn test_function_declaration_alternate_syntax() {
let input = "greet() { echo hello; }";
let expected = vec![
TokenKind::Word("greet".to_string()),
TokenKind::LParen,
TokenKind::RParen,
TokenKind::LBrace,
TokenKind::Word("echo".to_string()),
TokenKind::Word("hello".to_string()),
TokenKind::Semicolon,
TokenKind::RBrace,
];
test_tokens(input, expected);
}
#[test]
fn test_function_call() {
let input = "greet; greet arg1 arg2";
let expected = vec![
TokenKind::Word("greet".to_string()),
TokenKind::Semicolon,
TokenKind::Word("greet".to_string()),
TokenKind::Word("arg1".to_string()),
TokenKind::Word("arg2".to_string()),
];
test_tokens(input, expected);
}
#[test]
fn test_function_with_return() {
let input = "function check() { if [ $1 -eq 0 ]; then return 1; fi; echo ok; }";
let expected = vec![
TokenKind::Function,
TokenKind::Word("check".to_string()),
TokenKind::LParen,
TokenKind::RParen,
TokenKind::LBrace,
TokenKind::If,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("1".to_string()),
TokenKind::Word("-eq".to_string()),
TokenKind::Word("0".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Return,
TokenKind::Word("1".to_string()),
TokenKind::Semicolon,
TokenKind::Fi,
TokenKind::Semicolon,
TokenKind::Word("echo".to_string()),
TokenKind::Word("ok".to_string()),
TokenKind::Semicolon,
TokenKind::RBrace,
];
test_tokens(input, expected);
}
#[test]
fn test_function_multiline() {
let input = "function hello() {\n echo \"Hello, world!\"\n return 0\n}";
let expected = vec![
TokenKind::Function,
TokenKind::Word("hello".to_string()),
TokenKind::LParen,
TokenKind::RParen,
TokenKind::LBrace,
TokenKind::Newline,
TokenKind::Word("echo".to_string()),
TokenKind::Quote,
TokenKind::Word("Hello, world!".to_string()),
TokenKind::Quote,
TokenKind::Newline,
TokenKind::Return,
TokenKind::Word("0".to_string()),
TokenKind::Newline,
TokenKind::RBrace,
];
test_tokens(input, expected);
}
#[test]
fn test_for_loop_basic() {
let input = "for i in 1 2 3; do echo $i; done";
let expected = vec![
TokenKind::For,
TokenKind::Word("i".to_string()),
TokenKind::In,
TokenKind::Word("1".to_string()),
TokenKind::Word("2".to_string()),
TokenKind::Word("3".to_string()),
TokenKind::Semicolon,
TokenKind::Do,
TokenKind::Word("echo".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Semicolon,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_for_loop_with_glob() {
let input = "for file in *.txt; do cat $file; done";
let expected = vec![
TokenKind::For,
TokenKind::Word("file".to_string()),
TokenKind::In,
TokenKind::Word("*.txt".to_string()),
TokenKind::Semicolon,
TokenKind::Do,
TokenKind::Word("cat".to_string()),
TokenKind::Dollar,
TokenKind::Word("file".to_string()),
TokenKind::Semicolon,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_for_loop_multiline() {
let input = "for i in $(seq 1 10)\ndo\n echo $i\ndone";
let expected = vec![
TokenKind::For,
TokenKind::Word("i".to_string()),
TokenKind::In,
TokenKind::CmdSubst,
TokenKind::Word("seq".to_string()),
TokenKind::Word("1".to_string()),
TokenKind::Word("10".to_string()),
TokenKind::RParen,
TokenKind::Newline,
TokenKind::Do,
TokenKind::Newline,
TokenKind::Word("echo".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Newline,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_for_loop_with_break() {
let input = "for i in 1 2 3; do if [ $i -eq 2 ]; then break; fi; done";
let expected = vec![
TokenKind::For,
TokenKind::Word("i".to_string()),
TokenKind::In,
TokenKind::Word("1".to_string()),
TokenKind::Word("2".to_string()),
TokenKind::Word("3".to_string()),
TokenKind::Semicolon,
TokenKind::Do,
TokenKind::If,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Word("-eq".to_string()),
TokenKind::Word("2".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Break,
TokenKind::Semicolon,
TokenKind::Fi,
TokenKind::Semicolon,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_for_loop_with_continue() {
let input = "for i in 1 2 3; do if [ $i -eq 2 ]; then continue; fi; echo $i; done";
let expected = vec![
TokenKind::For,
TokenKind::Word("i".to_string()),
TokenKind::In,
TokenKind::Word("1".to_string()),
TokenKind::Word("2".to_string()),
TokenKind::Word("3".to_string()),
TokenKind::Semicolon,
TokenKind::Do,
TokenKind::If,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Word("-eq".to_string()),
TokenKind::Word("2".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Continue,
TokenKind::Semicolon,
TokenKind::Fi,
TokenKind::Semicolon,
TokenKind::Word("echo".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Semicolon,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_c_style_for_loop() {
let input = "for ((i=0; i<5; i++)); do echo $i; done";
let expected = vec![
TokenKind::For,
TokenKind::LParen,
TokenKind::LParen,
TokenKind::Word("i".to_string()),
TokenKind::Assignment,
TokenKind::Word("0".to_string()),
TokenKind::Semicolon,
TokenKind::Word("i".to_string()),
TokenKind::Less,
TokenKind::Word("5".to_string()),
TokenKind::Semicolon,
TokenKind::Word("i++".to_string()),
TokenKind::RParen,
TokenKind::RParen,
TokenKind::Semicolon,
TokenKind::Do,
TokenKind::Word("echo".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Semicolon,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_c_style_for_loop_using_decrement() {
let input = "for ((i=5; i>0; i--)); do echo $i; done";
let expected = vec![
TokenKind::For,
TokenKind::LParen,
TokenKind::LParen,
TokenKind::Word("i".to_string()),
TokenKind::Assignment,
TokenKind::Word("5".to_string()),
TokenKind::Semicolon,
TokenKind::Word("i".to_string()),
TokenKind::Great,
TokenKind::Word("0".to_string()),
TokenKind::Semicolon,
TokenKind::Word("i--".to_string()),
TokenKind::RParen,
TokenKind::RParen,
TokenKind::Semicolon,
TokenKind::Do,
TokenKind::Word("echo".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Semicolon,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_while_loop_basic() {
let input = "while [ $i -lt 10 ]; do echo $i; i=$((i+1)); done";
let expected = vec![
TokenKind::While,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Word("-lt".to_string()),
TokenKind::Word("10".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Do,
TokenKind::Word("echo".to_string()),
TokenKind::Dollar,
TokenKind::Word("i".to_string()),
TokenKind::Semicolon,
TokenKind::Word("i".to_string()),
TokenKind::Assignment,
TokenKind::CmdSubst,
TokenKind::LParen,
TokenKind::Word("i+1".to_string()),
TokenKind::RParen,
TokenKind::RParen,
TokenKind::Semicolon,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_while_loop_multiline() {
let input = "while true\ndo\n echo looping\n if [ $count -gt 10 ]; then break; fi\ndone";
let expected = vec![
TokenKind::While,
TokenKind::Word("true".to_string()),
TokenKind::Newline,
TokenKind::Do,
TokenKind::Newline,
TokenKind::Word("echo".to_string()),
TokenKind::Word("looping".to_string()),
TokenKind::Newline,
TokenKind::If,
TokenKind::Word("[".to_string()),
TokenKind::Dollar,
TokenKind::Word("count".to_string()),
TokenKind::Word("-gt".to_string()),
TokenKind::Word("10".to_string()),
TokenKind::Word("]".to_string()),
TokenKind::Semicolon,
TokenKind::Then,
TokenKind::Break,
TokenKind::Semicolon,
TokenKind::Fi,
TokenKind::Newline,
TokenKind::Done,
];
test_tokens(input, expected);
}
#[test]
fn test_array_declaration() {
let input = "colors=(red green blue)";
let expected = vec![
TokenKind::Word("colors".to_string()),
TokenKind::Assignment,
TokenKind::LParen,
TokenKind::Word("red".to_string()),
TokenKind::Word("green".to_string()),
TokenKind::Word("blue".to_string()),
TokenKind::RParen,
];
test_tokens(input, expected);
}
#[test]
fn test_export_keyword() {
let tokens = collect_tokens("export");
assert_eq!(tokens.len(), 2); assert!(matches!(tokens[0].kind, TokenKind::Export));
assert_eq!(tokens[0].value, "export");
}
#[test]
fn test_export_assignment() {
let tokens = collect_tokens("export VAR=value");
let kinds: Vec<_> = tokens.iter().map(|t| &t.kind).collect();
assert_eq!(kinds.len(), 5); assert!(matches!(kinds[0], TokenKind::Export));
assert!(matches!(kinds[1], TokenKind::Word(_)));
assert!(matches!(kinds[2], TokenKind::Assignment));
assert!(matches!(kinds[3], TokenKind::Word(_)));
assert!(matches!(kinds[4], TokenKind::EOF));
assert_eq!(tokens[0].value, "export");
assert_eq!(tokens[1].value, "VAR");
assert_eq!(tokens[2].value, "=");
assert_eq!(tokens[3].value, "value");
}
#[test]
fn test_export_with_quotes() {
let tokens = collect_tokens("export PATH=\"/usr/bin:/bin\"");
let kinds: Vec<_> = tokens.iter().map(|t| &t.kind).collect();
assert!(matches!(kinds[0], TokenKind::Export));
assert!(matches!(kinds[1], TokenKind::Word(_)));
assert!(matches!(kinds[2], TokenKind::Assignment));
assert!(matches!(kinds[3], TokenKind::Quote));
assert!(matches!(kinds[4], TokenKind::Word(_)));
assert!(matches!(kinds[5], TokenKind::Quote));
assert_eq!(tokens[0].value, "export");
assert_eq!(tokens[1].value, "PATH");
assert_eq!(tokens[4].value, "/usr/bin:/bin");
}
#[test]
fn test_export_multiple_variables() {
let tokens = collect_tokens("export VAR1=val1 VAR2=val2");
let export_count = tokens
.iter()
.filter(|t| matches!(t.kind, TokenKind::Export))
.count();
assert_eq!(export_count, 1);
let var_count = tokens
.iter()
.filter(|t| matches!(t.kind, TokenKind::Word(_)))
.count();
assert_eq!(var_count, 4); }
#[test]
fn test_export_not_keyword_when_part_of_word() {
let tokens = collect_tokens("exported");
assert_eq!(tokens.len(), 2); assert!(matches!(tokens[0].kind, TokenKind::Word(_)));
assert_eq!(tokens[0].value, "exported");
let tokens2 = collect_tokens("exportable");
assert!(matches!(tokens2[0].kind, TokenKind::Word(_)));
assert_eq!(tokens2[0].value, "exportable");
}
#[test]
fn test_export_with_newline() {
let tokens = collect_tokens("export VAR=value\necho $VAR");
let kinds: Vec<_> = tokens.iter().map(|t| &t.kind).collect();
assert!(matches!(kinds[0], TokenKind::Export));
assert!(matches!(kinds[4], TokenKind::Newline)); assert!(matches!(kinds[5], TokenKind::Word(_))); }
#[test]
fn test_export_with_semicolon() {
let tokens = collect_tokens("export VAR=value; echo done");
let semicolon_pos = tokens
.iter()
.position(|t| matches!(t.kind, TokenKind::Semicolon));
assert!(semicolon_pos.is_some());
}
}