use std::vec::IntoIter;
#[derive(Debug, PartialEq)]
enum ShlexToken {
Whitespace,
NonWhitespace,
Escape,
SingleQuote,
DoubleQuote,
Semicolon,
And,
Pipe,
}
#[derive(Debug)]
pub(crate) struct Word {
pub start: usize,
pub end: usize,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub(crate) enum ShlexRunType {
Unconditional,
ConditionalAnd,
ConditionalOr,
}
#[derive(Debug)]
pub(crate) struct Statement {
pub(crate) words: Vec<Word>,
pub(crate) run_type: ShlexRunType,
}
impl Statement {
pub fn iter(&self) -> impl Iterator<Item = &Word> {
self.words.iter()
}
}
impl IntoIterator for Statement {
type Item = Word;
type IntoIter = IntoIter<Word>;
fn into_iter(self) -> Self::IntoIter {
self.words.into_iter()
}
}
pub(crate) fn unescape_word(line: &str, word: &Word) -> String {
let word = String::from(&line[word.start..word.end]);
if !word.contains('\\') {
return word;
}
let mut prev = ' ';
let mut result = String::with_capacity(word.len());
for c in word.chars() {
if prev == '\\' {
match c {
' ' | '\\' => result.push(c),
_ => {
result.push(prev);
result.push(c)
}
}
} else if c != '\\' {
result.push(c);
}
prev = c;
}
result
}
pub(crate) fn escape_word(word: &str) -> String {
word.replace(' ', "\\ ")
}
pub(crate) fn split(line: &str) -> Vec<Statement> {
use ShlexToken::*;
let mut statements = vec![];
let mut words = vec![];
let mut stack = vec![];
let mut i_word_start = 0_usize;
let mut run_type = ShlexRunType::Unconditional;
for (i, c) in line.chars().enumerate() {
fn get_char_class(ch: &char) -> ShlexToken {
if ch.is_whitespace() {
return Whitespace;
}
match ch {
'\'' => SingleQuote,
'"' => DoubleQuote,
'\\' => Escape,
';' => Semicolon,
'&' => And,
'|' => Pipe,
_ => NonWhitespace,
}
}
let Some(last) = stack.pop() else {
let cls = get_char_class(&c);
match cls {
SingleQuote | DoubleQuote => {
i_word_start += 1;
}
Semicolon => {
if !words.is_empty() {
statements.push(Statement {
words,
run_type,
});
words = vec![];
}
continue;
}
_ => {
}
};
stack.push(cls);
continue;
};
let ch = get_char_class(&c);
match last {
Semicolon => {
unreachable!("semicolon should never be pushed on the stack");
}
Escape => {
continue;
}
SingleQuote => match ch {
SingleQuote => {
words.push(Word {
start: i_word_start,
end: i,
});
}
DoubleQuote | NonWhitespace | Whitespace | And | Pipe | Semicolon => {
stack.push(SingleQuote)
}
Escape => {
stack.push(last);
stack.push(Escape);
}
},
DoubleQuote => match ch {
DoubleQuote => {
words.push(Word {
start: i_word_start,
end: i,
});
}
SingleQuote | NonWhitespace | Whitespace | And | Pipe | Semicolon => {
stack.push(DoubleQuote)
}
Escape => {
stack.push(last);
stack.push(Escape);
}
},
And => match ch {
NonWhitespace | SingleQuote | DoubleQuote => stack.push(NonWhitespace),
Pipe => stack.push(Pipe),
And => {
if i_word_start < i - 1 {
words.push(Word {
start: i_word_start,
end: i - 1,
});
}
statements.push(Statement { words, run_type });
words = vec![];
i_word_start = i + 1;
run_type = ShlexRunType::ConditionalAnd;
}
Semicolon => {
if i_word_start < i {
words.push(Word {
start: i_word_start,
end: i,
});
}
statements.push(Statement { words, run_type });
words = vec![];
i_word_start = i + 1;
}
Whitespace => {
words.push(Word {
start: i_word_start,
end: i,
});
stack.push(Whitespace);
}
Escape => {
stack.push(last);
stack.push(Escape);
}
},
Pipe => match ch {
NonWhitespace | SingleQuote | DoubleQuote => stack.push(NonWhitespace),
And => stack.push(And),
Pipe => {
if i_word_start < i - 1 {
words.push(Word {
start: i_word_start,
end: i - 1,
});
}
statements.push(Statement { words, run_type });
words = vec![];
i_word_start = i + 1;
run_type = ShlexRunType::ConditionalOr;
}
Semicolon => {
if i_word_start < i {
words.push(Word {
start: i_word_start,
end: i,
});
}
statements.push(Statement { words, run_type });
words = vec![];
i_word_start = i + 1;
}
Whitespace => {
words.push(Word {
start: i_word_start,
end: i,
});
stack.push(Whitespace);
}
Escape => {
stack.push(last);
stack.push(Escape);
}
},
NonWhitespace => match ch {
NonWhitespace | SingleQuote | DoubleQuote => stack.push(NonWhitespace),
And | Pipe => stack.push(ch),
Semicolon => {
if i_word_start < i {
words.push(Word {
start: i_word_start,
end: i,
});
}
statements.push(Statement { words, run_type });
words = vec![];
i_word_start = i + 1;
}
Whitespace => {
words.push(Word {
start: i_word_start,
end: i,
});
stack.push(Whitespace);
}
Escape => {
stack.push(last);
stack.push(Escape);
}
},
Whitespace => match ch {
Whitespace => {
stack.push(Whitespace);
i_word_start = i;
}
Escape => {
stack.push(last);
stack.push(ch);
i_word_start = i;
}
SingleQuote | DoubleQuote => {
stack.push(ch);
i_word_start = i + 1;
}
NonWhitespace | And | Pipe => {
stack.push(ch);
i_word_start = i;
}
Semicolon => {
statements.push(Statement { words, run_type });
words = vec![];
i_word_start = i + 1;
}
},
}
}
if let Some(last) = stack.pop() {
if last == Whitespace {
i_word_start = line.len();
}
words.push(Word {
start: i_word_start,
end: line.len(),
});
}
if !words.is_empty() {
statements.push(Statement { words, run_type });
}
statements
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_quotes() {
let s = "a";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 1);
assert_eq!(words[0].start, 0);
let s = "a b";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 2);
assert_eq!(words[1].end, 3);
let s = "a bcd efg";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 3);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 2);
assert_eq!(words[1].end, 5);
assert_eq!(words[2].start, 6);
assert_eq!(words[2].end, s.len());
}
#[test]
fn trailing_whitespace() {
let s = "a ";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 2);
let s = "a ";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 4);
}
#[test]
fn double_quotes_match_correctly() {
let s = "a \"b ";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 3);
let s = "a \"b c\" d";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 3);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 3);
assert_eq!(words[1].end, 6);
assert_eq!(words[2].start, 8);
}
#[test]
fn single_quotes_match_correctly() {
let s = "a 'b ";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 3);
let s = "a 'b c' d";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 3);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 3);
assert_eq!(words[1].end, 6);
assert_eq!(words[2].start, 8);
}
#[test]
fn spaces_escaped_correctly() {
let s = "a b\\ c";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
assert_eq!(words[1].start, 2);
}
#[test]
fn double_quotes_escaped_correctly() {
let s = "a\\\" b";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 3);
assert_eq!(words[1].start, 4);
}
#[test]
fn single_quotes_escaped_correctly() {
let s = "a\\' b";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 3);
assert_eq!(words[1].start, 4);
}
#[test]
fn escapes_escaped_correctly() {
let s = "a\\\\ b";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 3);
assert_eq!(words[1].start, 4);
}
#[test]
fn last_char_quote() {
let s = "abc \"";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 3);
assert_eq!(words[1].start, 5);
}
#[test]
fn only_word_quoted() {
let s = "\"a b";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 1);
assert_eq!(words[0].start, 1);
assert_eq!(words[0].end, s.len());
}
#[test]
fn first_word_quoted() {
let s = "\"a b\" c";
let statements = split(s);
assert_eq!(statements.len(), 1);
let words = &statements[0].words;
assert_eq!(words.len(), 2);
assert_eq!(words[0].start, 1);
assert_eq!(words[0].end, 4);
assert_eq!(words[1].start, 6);
}
#[test]
fn semicolon_with_space_separates_commands() {
let s = "a ; b";
let statements = split(s);
assert_eq!(
statements.len(),
2,
"didn't correctly determine number of statements"
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
let words = &statements[1].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in second statement: {:?}",
&statements
);
assert_eq!(words[0].start, 4);
assert_eq!(words[0].end, 5);
}
#[test]
fn semicolon_without_space_separates_commands() {
let s = "a;b";
let statements = split(s);
assert_eq!(
statements.len(),
2,
"didn't correctly determine number of statements"
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"number of words in statement incorrect, parse result: {:?}",
&statements
);
assert_eq!(
words[0].start, 0,
"start location of words in statement incorrect"
);
assert_eq!(
words[0].end, 1,
"end location of words in statement incorrect"
);
let words = &statements[1].words;
assert_eq!(
words.len(),
1,
"didn't parse second statement correctly {:?}",
words
);
assert_eq!(
words[0].start, 2,
"start location of words in second statement wrong: {:?}",
words
);
assert_eq!(
words[0].end, 3,
"end location of words in second statement wrong: {:?}",
words
);
}
#[test]
fn semicolon_in_quotes_is_ignored() {
let s = "echo 'abc;d'";
let statements = split(s);
assert_eq!(
statements.len(),
1,
"didn't correctly determine number of statements"
);
let words = &statements[0].words;
assert_eq!(
words.len(),
2,
"number of words in statement incorrect, parse result: {:?}",
&statements
);
assert_eq!(
words[0].start, 0,
"start location of word0 in statement incorrect"
);
assert_eq!(
words[0].end, 4,
"end location of word0 in statement incorrect"
);
assert_eq!(
words[1].start, 6,
"start location of word1 in statement incorrect"
);
assert_eq!(
words[1].end, 11,
"end location of word1 in statement incorrect"
);
}
#[test]
fn semicolon_at_end_is_ignored() {
let s = "echo;";
let statements = split(s);
assert_eq!(
statements.len(),
1,
"didn't correctly determine number of statements"
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"number of words in statement incorrect, parse result: {:?}",
&statements
);
assert_eq!(
words[0].start, 0,
"start location of word0 in statement incorrect"
);
assert_eq!(
words[0].end, 4,
"end location of word0 in statement incorrect"
);
}
#[test]
fn semicolon_at_end_of_multiple_statements_is_ignored() {
let s = "a;b;c;";
let statements = split(s);
assert_eq!(
statements.len(),
3,
"didn't correctly determine number of statements"
);
for (i, statement) in statements.iter().enumerate() {
let words = &statement.words;
assert_eq!(
words.len(),
1,
"number of words in statement incorrect, parse result: {:?}",
&statements
);
assert_eq!(
words[0].start,
i * 2,
"start location of word[{i}] in statement incorrect"
);
assert_eq!(
words[0].end,
i * 2 + 1,
"end location of word[{i}] in statement incorrect"
);
}
}
#[test]
fn whitespace_between_semicolon_and_statement_is_ignored() {
let s = "a; b";
let statements = split(s);
assert_eq!(
statements.len(),
2,
"didn't correctly determine number of statements"
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"number of words in statement incorrect, parse result: {:?}",
&statements
);
assert_eq!(
words[0].start, 0,
"start location of word[0] in statement incorrect"
);
assert_eq!(
words[0].end, 1,
"end location of word[0] in statement incorrect"
);
let words = &statements[1].words;
assert_eq!(
words.len(),
1,
"number of words in statement incorrect, parse result: {:?}",
&statements
);
assert_eq!(
words[0].start, 5,
"start location of word[1] in statement incorrect"
);
assert_eq!(
words[0].end, 6,
"end location of word[1] in statement incorrect"
);
}
#[test]
fn conditional_and_detected_with_no_spacing() {
let s = "a&&b";
let statements = split(s);
assert_eq!(
statements.len(),
2,
"didn't correctly determine number of statements: {:#?}",
statements,
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
println!("statements: {:#?}", statements);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
let words = &statements[1].words;
assert_eq!(
statements[1].run_type,
ShlexRunType::ConditionalAnd,
"wrong run type"
);
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in second statement: {:?}",
&statements
);
assert_eq!(words[0].start, 3);
assert_eq!(words[0].end, 4);
}
#[test]
fn conditional_and_detected_with_spacing() {
let s = "a && b";
let statements = split(s);
assert_eq!(
statements.len(),
2,
"didn't correctly determine number of statements: {:#?}",
statements,
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
println!("statements: {:#?}", statements);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
let words = &statements[1].words;
assert_eq!(
statements[1].run_type,
ShlexRunType::ConditionalAnd,
"wrong run type"
);
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in second statement: {:?}",
&statements
);
assert_eq!(words[0].start, 7);
assert_eq!(words[0].end, 8);
}
#[test]
fn conditional_and_not_detected_when_escaped() {
let s = "a\\&&b";
let statements = split(s);
assert_eq!(
statements.len(),
1,
"didn't correctly determine number of statements: {:#?}",
statements,
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
println!("statements: {:#?}", statements);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 5);
}
#[test]
fn single_and_not_detected_as_conditional_and() {
let s = "a&b";
let statements = split(s);
assert_eq!(
statements.len(),
1,
"didn't correctly determine number of statements"
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 3);
}
#[test]
fn conditional_or_detected_with_no_spacing() {
let s = "a||b";
let statements = split(s);
assert_eq!(
statements.len(),
2,
"didn't correctly determine number of statements: {:#?}",
statements,
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
println!("statements: {:#?}", statements);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
let words = &statements[1].words;
assert_eq!(
statements[1].run_type,
ShlexRunType::ConditionalOr,
"wrong run type"
);
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in second statement: {:?}",
&statements
);
assert_eq!(words[0].start, 3);
assert_eq!(words[0].end, 4);
}
#[test]
fn conditional_or_detected_with_spacing() {
let s = "a || b";
let statements = split(s);
assert_eq!(
statements.len(),
2,
"didn't correctly determine number of statements: {:#?}",
statements,
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
println!("statements: {:#?}", statements);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 1);
let words = &statements[1].words;
assert_eq!(
statements[1].run_type,
ShlexRunType::ConditionalOr,
"wrong run type"
);
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in second statement: {:?}",
&statements
);
assert_eq!(words[0].start, 7);
assert_eq!(words[0].end, 8);
}
#[test]
fn conditional_or_not_detected_when_escaped() {
let s = "a\\||b";
let statements = split(s);
assert_eq!(
statements.len(),
1,
"didn't correctly determine number of statements: {:#?}",
statements,
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
println!("statements: {:#?}", statements);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 5);
}
#[test]
fn single_pipe_not_detected_as_conditional_or() {
let s = "a|b";
let statements = split(s);
assert_eq!(
statements.len(),
1,
"didn't correctly determine number of statements"
);
let words = &statements[0].words;
assert_eq!(
words.len(),
1,
"didn't parse correct number of words in first statement: {:?}",
&statements
);
assert_eq!(words[0].start, 0);
assert_eq!(words[0].end, 3);
}
#[test]
fn unescape_word_removes_space_sequences() {
let s = "hello\\ world";
let word = Word {
start: 0,
end: s.len(),
};
let unescaped = unescape_word(s, &word);
assert_eq!(unescaped, "hello world");
}
#[test]
fn unescape_word_ignores_nonspace_sequences() {
let s = "hello\\bworld";
let word = Word {
start: 0,
end: s.len(),
};
let unescaped = unescape_word(s, &word);
assert_eq!(unescaped, "hello\\bworld");
}
#[test]
fn escape_word_fixes_space_sequences() {
let word = "hello world";
let escaped = escape_word(&word);
assert_eq!(escaped, "hello\\ world");
}
}