use super::*;
#[allow(clippy::unwrap_used)]
fn collect_tokens(source: &str) -> Vec<(TokenType, String)> {
let mut lexer = Lexer::new(source, false);
let mut tokens = Vec::new();
loop {
let tok = lexer.next_token().unwrap();
if tok.kind == TokenType::Eof {
break;
}
tokens.push((tok.kind, tok.value));
}
tokens
}
#[test]
fn simple_command() {
let tokens = collect_tokens("echo hello world");
assert_eq!(tokens.len(), 3);
assert_eq!(tokens[0], (TokenType::Word, "echo".to_string()));
assert_eq!(tokens[1], (TokenType::Word, "hello".to_string()));
assert_eq!(tokens[2], (TokenType::Word, "world".to_string()));
}
#[test]
fn pipeline() {
let tokens = collect_tokens("ls | grep foo");
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0].1, "ls");
assert_eq!(tokens[1], (TokenType::Pipe, "|".to_string()));
assert_eq!(tokens[2].1, "grep");
assert_eq!(tokens[3].1, "foo");
}
#[test]
fn redirections() {
let tokens = collect_tokens("echo hello > file.txt");
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[2], (TokenType::Greater, ">".to_string()));
}
#[test]
fn reserved_words() {
let tokens = collect_tokens("if true; then echo yes; fi");
assert_eq!(tokens[0].0, TokenType::If);
assert_eq!(tokens[2].0, TokenType::Semi);
assert_eq!(tokens[3].0, TokenType::Then);
assert_eq!(tokens[6].0, TokenType::Semi);
assert_eq!(tokens[7].0, TokenType::Fi);
}
#[test]
fn single_quoted() {
let tokens = collect_tokens("echo 'hello world'");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[1].1, "'hello world'");
}
#[test]
fn double_quoted() {
let tokens = collect_tokens("echo \"hello $name\"");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[1].1, "\"hello $name\"");
}
#[test]
#[allow(clippy::literal_string_with_formatting_args)]
fn dollar_expansion() {
let tokens = collect_tokens("echo ${foo:-bar}");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[1].1, "${foo:-bar}");
}
#[test]
fn command_substitution() {
let tokens = collect_tokens("echo $(date)");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[1].1, "$(date)");
}
#[test]
fn and_or() {
let tokens = collect_tokens("a && b || c");
assert_eq!(tokens[1], (TokenType::And, "&&".to_string()));
assert_eq!(tokens[3], (TokenType::Or, "||".to_string()));
}
#[test]
fn assignment_word_simple() {
let tokens = collect_tokens("FOO=bar");
assert_eq!(
tokens[0],
(TokenType::AssignmentWord, "FOO=bar".to_string())
);
}
#[test]
fn assignment_word_plus_equals() {
let tokens = collect_tokens("FOO+=bar");
assert_eq!(
tokens[0],
(TokenType::AssignmentWord, "FOO+=bar".to_string())
);
}
#[test]
fn assignment_word_array() {
let tokens = collect_tokens("arr=(a b)");
assert_eq!(
tokens[0],
(TokenType::AssignmentWord, "arr=(a b)".to_string())
);
}
#[test]
fn assignment_word_subscript() {
let tokens = collect_tokens("arr[0]=val");
assert_eq!(
tokens[0],
(TokenType::AssignmentWord, "arr[0]=val".to_string())
);
}
#[test]
fn not_assignment_no_name() {
let tokens = collect_tokens("=value");
assert_eq!(tokens[0].0, TokenType::Word);
}
#[test]
fn not_assignment_regular_word() {
let tokens = collect_tokens("echo");
assert_eq!(tokens[0].0, TokenType::Word);
}
#[test]
fn assignment_before_command_keeps_command_start() {
let tokens = collect_tokens("FOO=bar echo hello");
assert_eq!(tokens[0].0, TokenType::AssignmentWord);
assert_eq!(tokens[1].0, TokenType::Word);
assert_eq!(tokens[2].0, TokenType::Word);
}
#[test]
fn reserved_word_after_assignment_is_plain_word() {
let tokens = collect_tokens("foo= for x");
assert_eq!(tokens[0].0, TokenType::AssignmentWord);
assert_eq!(tokens[1], (TokenType::Word, "for".to_string()));
assert_eq!(tokens[2], (TokenType::Word, "x".to_string()));
let tokens = collect_tokens("arr[0]=$fo do o");
assert_eq!(tokens[0].0, TokenType::AssignmentWord);
assert_eq!(tokens[1], (TokenType::Word, "do".to_string()));
let tokens = collect_tokens("x=$ then bar");
assert_eq!(tokens[0].0, TokenType::AssignmentWord);
assert_eq!(tokens[1], (TokenType::Word, "then".to_string()));
}
#[test]
fn reserved_word_re_armed_after_separator() {
let tokens = collect_tokens("foo=bar baz; do");
assert_eq!(tokens[0].0, TokenType::AssignmentWord);
assert_eq!(tokens[3].0, TokenType::Do);
let tokens = collect_tokens("foo= for | do");
assert_eq!(tokens[1], (TokenType::Word, "for".to_string()));
assert_eq!(tokens[3].0, TokenType::Do);
}
#[test]
fn list_separators_re_arm_reserved_words() {
for src in [
"foo= |& for",
"foo= && for",
"foo= || for",
"foo= ;; for",
"foo= ;& for",
"foo= ;;& for",
] {
let tokens = collect_tokens(src);
assert_eq!(tokens.len(), 3, "`{src}`: unexpected token count");
assert_eq!(
tokens[2].0,
TokenType::For,
"`{src}`: list separator must re-arm reserved-word recognition"
);
}
}
#[test]
fn file_redirects_do_not_re_arm_reserved_words() {
for src in [
"foo= > f for",
"foo= >> f for",
"foo= &> f for",
"foo= < f for",
"foo= <<< f for",
] {
let tokens = collect_tokens(src);
assert_eq!(tokens.len(), 4, "`{src}`: unexpected token count");
assert_eq!(
tokens[3],
(TokenType::Word, "for".to_string()),
"`{src}`: file redirect must not re-arm reserved words"
);
}
}
#[test]
fn extglob_prefix_without_paren_is_ordinary_char() {
for src in ["foo@bar", "foo?bar", "foo+bar", "foo!bar", "foo*bar"] {
let tokens = collect_tokens(src);
assert_eq!(tokens.len(), 1, "`{src}`: unexpected token count");
assert_eq!(
tokens[0],
(TokenType::Word, src.to_string()),
"`{src}`: prefix char without `(` must stay an ordinary word char"
);
}
}
#[test]
fn arithmetic_double_paren_closes_at_double_right_paren() {
let tokens = collect_tokens("$((1+2))");
assert_eq!(tokens.len(), 1, "tokens = {tokens:?}");
assert_eq!(tokens[0].0, TokenType::Word);
assert_eq!(tokens[0].1, "$((1+2))");
}
#[test]
fn extglob_with_alternation_is_one_word() {
let tokens = collect_tokens("@(foo|bar|baz)");
assert_eq!(tokens.len(), 1, "tokens = {tokens:?}");
assert_eq!(tokens[0].0, TokenType::Word);
assert_eq!(tokens[0].1, "@(foo|bar|baz)");
}
#[test]
fn empty_extglob_closes_at_immediate_right_paren() {
let tokens = collect_tokens("@()");
assert_eq!(tokens.len(), 1, "tokens = {tokens:?}");
assert_eq!(tokens[0].0, TokenType::Word);
assert_eq!(tokens[0].1, "@()");
}
#[test]
fn trailing_backslash_in_arith_is_graceful_error() {
let mut lexer = Lexer::new("$(( \\", false);
let result = lexer.next_token();
assert!(result.is_err(), "expected error, got {result:?}");
}
#[test]
fn paren_content_comment_stops_at_newline() {
let tokens = collect_tokens("$(( # comment\n 1+2 ))");
assert_eq!(tokens.len(), 1, "tokens = {tokens:?}");
assert_eq!(tokens[0].0, TokenType::Word);
assert!(
tokens[0].1.starts_with("$((") && tokens[0].1.ends_with("))"),
"unexpected word value: {:?}",
tokens[0].1
);
assert!(
!tokens[0].1.contains("comment"),
"comment text must be skipped: {:?}",
tokens[0].1
);
}
#[test]
fn extglob_disabled_does_not_absorb_paren() {
for src in ["!(cmd)", "foo*(bar)"] {
let tokens = collect_tokens(src);
let has_left_paren = tokens.iter().any(|(k, _)| *k == TokenType::LeftParen);
assert!(
has_left_paren,
"`{src}` with extglob=false: expected a LeftParen token, got {tokens:?}"
);
}
}
use super::word_builder::{WordSpan, WordSpanKind};
#[allow(clippy::unwrap_used)]
fn first_word_spans(source: &str) -> (String, Vec<WordSpan>) {
let mut lexer = Lexer::new(source, false);
let tok = lexer.next_token().unwrap();
(tok.value, tok.spans)
}
#[test]
fn span_plain_word_no_spans() {
let (_, spans) = first_word_spans("echo");
assert!(spans.is_empty());
}
#[test]
fn span_command_sub() {
let (val, spans) = first_word_spans("$(cmd)");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::CommandSub);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_command_sub_mid_word() {
let (_, spans) = first_word_spans("hello$(world)end");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::CommandSub);
assert_eq!(spans[0].start, 5);
assert_eq!(spans[0].end, 13);
}
#[test]
fn span_arithmetic_sub() {
let (val, spans) = first_word_spans("$((1+2))");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::ArithmeticSub);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_param_expansion() {
let (val, spans) = first_word_spans("${var:-default}");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::ParamExpansion);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_simple_var() {
let (val, spans) = first_word_spans("$HOME");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::SimpleVar);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_ansi_c_quote() {
let (val, spans) = first_word_spans("$'foo'");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::AnsiCQuote);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_locale_string() {
let (val, spans) = first_word_spans("$\"hello\"");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::LocaleString);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_single_quoted() {
let (val, spans) = first_word_spans("'quoted'");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::SingleQuoted);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_double_quoted() {
let (val, spans) = first_word_spans("\"double\"");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::DoubleQuoted);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_backtick() {
let (val, spans) = first_word_spans("`cmd`");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::Backtick);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_escape() {
let (val, spans) = first_word_spans("\\n");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::Escape);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
fn span_line_continuation_no_span() {
let (val, spans) = first_word_spans("hel\\\nlo");
assert_eq!(val, "hello");
assert!(spans.is_empty());
}
#[test]
fn span_bare_dollar_no_span() {
let tokens = collect_tokens("echo $");
assert_eq!(tokens[1].1, "$");
let (_, spans) = first_word_spans("$");
assert!(spans.is_empty());
}
#[test]
fn span_nested_double_quoted_with_cmdsub() {
let (val, spans) = first_word_spans("\"$(cmd)\"");
assert_eq!(val, "\"$(cmd)\"");
assert_eq!(spans.len(), 2);
assert_eq!(spans[0].kind, WordSpanKind::CommandSub);
assert_eq!(spans[0].start, 1);
assert_eq!(spans[0].end, 7);
assert_eq!(spans[1].kind, WordSpanKind::DoubleQuoted);
assert_eq!(spans[1].start, 0);
assert_eq!(spans[1].end, 8);
}
#[test]
fn span_deprecated_arith() {
let (val, spans) = first_word_spans("$[1+2]");
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].kind, WordSpanKind::DeprecatedArith);
assert_eq!(spans[0].start, 0);
assert_eq!(spans[0].end, val.len());
}
#[test]
#[allow(clippy::expect_used)]
fn arith_command_strips_line_continuation() {
let mut lexer = Lexer::new("1 + \\\n2))", false);
let raw = lexer
.read_until_double_paren()
.expect("read_until_double_paren should succeed");
assert_eq!(raw, "1 + 2");
}
#[test]
#[allow(clippy::expect_used)]
fn arith_command_preserves_other_backslash_escapes() {
let mut lexer = Lexer::new("a\\b))", false);
let raw = lexer
.read_until_double_paren()
.expect("read_until_double_paren should succeed");
assert_eq!(raw, "a\\b");
}
#[test]
fn rbracket_outside_cond_is_plain_word() {
let tokens = collect_tokens("]] foo");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], (TokenType::Word, "]]".to_string()));
assert_eq!(tokens[1], (TokenType::Word, "foo".to_string()));
}
#[test]
fn rbracket_in_middle_of_command_is_plain_word() {
let tokens = collect_tokens("Cdeclare -n ref=t ]] arget");
let values: Vec<&str> = tokens.iter().map(|(_, v)| v.as_str()).collect();
assert_eq!(values, vec!["Cdeclare", "-n", "ref=t", "]]", "arget"]);
let rbracket = tokens.iter().find(|(_, v)| v == "]]").map(|(k, _)| *k);
assert_eq!(rbracket, Some(TokenType::Word));
}
#[test]
fn rbracket_inside_cond_is_reserved() {
let mut lexer = Lexer::new("]]", false);
lexer.enter_cond_expr();
#[allow(clippy::unwrap_used)]
let tok = lexer.next_token().unwrap();
assert_eq!(tok.kind, TokenType::DoubleRightBracket);
assert_eq!(tok.value, "]]");
}
#[test]
fn leading_rbracket_bracket_bracket_stays_own_word() {
let tokens = collect_tokens("][[ \"$file\" == *.txt ]]");
let values: Vec<&str> = tokens.iter().map(|(_, v)| v.as_str()).collect();
assert_eq!(values, vec!["][[", "\"$file\"", "==", "*.txt", "]]"],);
}
#[test]
fn leading_bracket_letter_bracket_stays_own_word() {
let tokens = collect_tokens("[c[ $x =~ ]+[a-z] ]]");
let values: Vec<&str> = tokens.iter().map(|(_, v)| v.as_str()).collect();
assert_eq!(values, vec!["[c[", "$x", "=~", "]+[a-z]", "]]"]);
}
#[test]
fn pipe_inside_brackets_on_non_identifier_splits_command() {
let tokens = collect_tokens("echo ho $$[a||b]");
let values: Vec<&str> = tokens.iter().map(|(_, v)| v.as_str()).collect();
assert_eq!(values, vec!["echo", "ho", "$$[a", "||", "b]"]);
}
#[test]
fn amp_inside_brackets_on_reserved_word_prefix_splits() {
let tokens = collect_tokens("Decho $ case[a&&b]");
let values: Vec<&str> = tokens.iter().map(|(_, v)| v.as_str()).collect();
assert_eq!(values, vec!["Decho", "$", "case[a", "&&", "b]"]);
}
#[test]
fn caret_prefix_brackets_split_on_space() {
let tokens = collect_tokens("foo^[a-echo ${foo^[a-z]}");
let values: Vec<&str> = tokens.iter().map(|(_, v)| v.as_str()).collect();
assert_eq!(values, vec!["foo^[a-echo", "${foo^[a-z]}"]);
}
#[test]
fn bare_bracket_test_then_bracket_splits_on_semi() {
let tokens = collect_tokens("if [ $\"yes\" = yes9 ][ $; then echo ok; fi");
let values: Vec<&str> = tokens.iter().map(|(_, v)| v.as_str()).collect();
assert_eq!(
values,
vec![
"if", "[", "$\"yes\"", "=", "yes9", "][", "$", ";", "then", "echo", "ok", ";", "fi",
],
);
}
#[test]
fn arr_subscript_absorbs_space() {
let tokens = collect_tokens("arr[0 foo]");
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].1, "arr[0 foo]");
}
#[test]
fn regex_char_class_inside_cond_stays_one_word() {
let mut lexer = Lexer::new("$x =~ [[:alpha:][:dig||it:]] ]]", false);
lexer.enter_cond_expr();
let mut values = Vec::new();
loop {
#[allow(clippy::unwrap_used)]
let tok = lexer.next_token().unwrap();
if tok.kind == TokenType::Eof {
break;
}
values.push(tok.value);
}
assert_eq!(values, vec!["$x", "=~", "[[:alpha:][:dig||it:]]", "]]"],);
}