use super::ExpandedField;
use crate::env::ShellEnv;
fn get_ifs(env: &ShellEnv) -> String {
match env.vars.get("IFS") {
Some(ifs) => ifs.to_string(),
None => " \t\n".to_string(),
}
}
pub fn split(env: &ShellEnv, fields: Vec<ExpandedField>) -> Vec<ExpandedField> {
let ifs = get_ifs(env);
if ifs.is_empty() {
return fields
.into_iter()
.filter(|f| !f.value.is_empty() || f.was_quoted)
.collect();
}
let ifs_ws: Vec<u8> = ifs
.bytes()
.filter(|b| matches!(*b, b' ' | b'\t' | b'\n'))
.collect();
let ifs_nws: Vec<u8> = ifs
.bytes()
.filter(|b| !matches!(*b, b' ' | b'\t' | b'\n'))
.filter(|b| *b < 0x80)
.collect();
if fields.iter().all(|f| !needs_splitting(f, &ifs_ws, &ifs_nws)) {
return fields;
}
let mut result = Vec::new();
for field in fields {
split_field(&field, &ifs_ws, &ifs_nws, &mut result);
}
result
}
fn split_field(field: &ExpandedField, ifs_ws: &[u8], ifs_nws: &[u8], out: &mut Vec<ExpandedField>) {
#[derive(Clone, Copy, PartialEq)]
enum State {
Start,
InField,
AfterWs,
AfterNws,
}
let bytes = field.value.as_bytes();
let len = bytes.len();
if len == 0 && field.was_quoted {
out.push(ExpandedField {
was_quoted: true,
..ExpandedField::new()
});
return;
}
let mut current = ExpandedField::new();
let mut state = State::Start;
let mut i = 0;
while i < len {
let b = bytes[i];
let quoted = field.is_quoted(i);
let is_ws = !quoted && ifs_ws.contains(&b);
let is_nws = !quoted && ifs_nws.contains(&b);
match state {
State::Start | State::AfterNws => {
if is_ws {
i += 1;
} else if is_nws {
out.push(ExpandedField {
was_quoted: true,
..ExpandedField::new()
});
state = State::AfterNws;
i += 1;
} else {
let ch_len = append_char(&mut current, field, i);
state = State::InField;
i += ch_len;
}
}
State::InField => {
if is_ws {
emit(&mut current, out);
state = State::AfterWs;
i += 1;
} else if is_nws {
emit(&mut current, out);
state = State::AfterNws;
i += 1;
} else {
let ch_len = append_char(&mut current, field, i);
i += ch_len;
}
}
State::AfterWs => {
if is_ws {
i += 1;
} else if is_nws {
state = State::AfterNws;
i += 1;
} else {
let ch_len = append_char(&mut current, field, i);
state = State::InField;
i += ch_len;
}
}
}
}
if !current.is_empty() {
emit(&mut current, out);
}
}
fn needs_splitting(field: &ExpandedField, ifs_ws: &[u8], ifs_nws: &[u8]) -> bool {
field.value.bytes().enumerate().any(|(i, b)| {
!field.is_quoted(i) && (ifs_ws.contains(&b) || ifs_nws.contains(&b))
})
}
#[inline]
fn append_char(dest: &mut ExpandedField, source: &ExpandedField, i: usize) -> usize {
let ch_len = source.value[i..]
.chars()
.next()
.expect("i on char boundary")
.len_utf8();
let slice = &source.value[i..i + ch_len];
if source.is_quoted(i) {
dest.push_quoted(slice);
} else {
dest.push_unquoted(slice);
}
ch_len
}
#[inline]
fn emit(current: &mut ExpandedField, out: &mut Vec<ExpandedField>) {
let done = std::mem::take(current);
out.push(done);
}
#[cfg(test)]
mod tests {
use super::*;
use crate::env::ShellEnv;
fn env_with_ifs(ifs: &str) -> ShellEnv {
let mut env = ShellEnv::new("yosh", vec![]);
env.vars.set("IFS", ifs).unwrap();
env
}
fn env_no_ifs() -> ShellEnv {
let mut env = ShellEnv::new("yosh", vec![]);
env.vars.unset("IFS").ok();
env
}
fn unquoted(s: &str) -> ExpandedField {
let mut f = ExpandedField::new();
f.push_unquoted(s);
f
}
fn quoted_field(s: &str) -> ExpandedField {
let mut f = ExpandedField::new();
f.push_quoted(s);
f
}
fn values(fields: Vec<ExpandedField>) -> Vec<String> {
fields.into_iter().map(|f| f.value).collect()
}
#[test]
fn test_split_spaces() {
let env = env_with_ifs(" ");
let input = vec![unquoted("hello world foo")];
assert_eq!(values(split(&env, input)), vec!["hello", "world", "foo"]);
}
#[test]
fn test_consecutive_whitespace() {
let env = env_with_ifs(" \t\n");
let input = vec![unquoted(" hello world ")];
assert_eq!(values(split(&env, input)), vec!["hello", "world"]);
}
#[test]
fn test_split_quoted_not_split() {
let env = env_with_ifs(" ");
let input = vec![quoted_field("hello world")];
assert_eq!(values(split(&env, input)), vec!["hello world"]);
}
#[test]
fn test_split_colon_delimiter() {
let env = env_with_ifs(":");
let input = vec![unquoted("a:b:c")];
assert_eq!(values(split(&env, input)), vec!["a", "b", "c"]);
}
#[test]
fn test_colon_with_surrounding_whitespace_absorbed() {
let env = env_with_ifs(" :");
let input = vec![unquoted("a : b : c")];
assert_eq!(values(split(&env, input)), vec!["a", "b", "c"]);
}
#[test]
fn test_empty_ifs_no_split() {
let env = env_with_ifs("");
let input = vec![unquoted("hello world")];
assert_eq!(values(split(&env, input)), vec!["hello world"]);
}
#[test]
fn test_empty_ifs_drops_empty_fields() {
let env = env_with_ifs("");
let mut empty = ExpandedField::new();
empty.push_unquoted("");
let input = vec![empty, unquoted("hello")];
assert_eq!(values(split(&env, input)), vec!["hello"]);
}
#[test]
fn test_unset_ifs_default() {
let env = env_no_ifs();
let input = vec![unquoted("hello\tworld\nfoo")];
assert_eq!(values(split(&env, input)), vec!["hello", "world", "foo"]);
}
#[test]
fn test_mixed_quoted_unquoted() {
let env = env_with_ifs(" ");
let mut f = ExpandedField::new();
f.push_unquoted("foo ");
f.push_quoted("bar baz");
f.push_unquoted(" qux");
let result = split(&env, vec![f]);
assert_eq!(values(result), vec!["foo", "bar baz", "qux"]);
}
#[test]
fn test_double_colon_empty_field() {
let env = env_with_ifs(":");
let input = vec![unquoted("a::b")];
assert_eq!(values(split(&env, input)), vec!["a", "", "b"]);
}
#[test]
fn test_fast_path_single_field_no_ifs_chars() {
let env = env_with_ifs(" \t\n");
let input = vec![unquoted("hello")];
assert_eq!(values(split(&env, input)), vec!["hello"]);
}
#[test]
fn test_fast_path_multiple_fields_no_ifs_chars() {
let env = env_with_ifs(" \t\n");
let input = vec![unquoted("hello"), unquoted("world")];
assert_eq!(values(split(&env, input)), vec!["hello", "world"]);
}
#[test]
fn test_fast_path_mixed_quoted_unquoted_no_ifs() {
let env = env_with_ifs(" ");
let mut f = ExpandedField::new();
f.push_unquoted("foo");
f.push_quoted("bar");
assert_eq!(values(split(&env, vec![f])), vec!["foobar"]);
}
#[test]
fn test_slow_path_triggered_by_one_splittable_field() {
let env = env_with_ifs(" ");
let input = vec![unquoted("hello"), unquoted("a b")];
assert_eq!(values(split(&env, input)), vec!["hello", "a", "b"]);
}
#[test]
fn test_fast_path_quoted_ifs_byte_stays_fast() {
let env = env_with_ifs(" ");
let mut f = ExpandedField::new();
f.push_quoted("a b c");
assert_eq!(values(split(&env, vec![f])), vec!["a b c"]);
}
#[test]
fn test_fast_path_empty_unquoted_field_preserved() {
let env = env_with_ifs(" \t\n");
let mut empty = ExpandedField::new();
empty.push_unquoted("");
let result = split(&env, vec![empty, unquoted("hello")]);
assert_eq!(result.len(), 2);
assert!(result[0].value.is_empty());
assert!(!result[0].was_quoted);
assert_eq!(result[1].value, "hello");
}
#[test]
fn test_fast_path_utf8_no_false_positive() {
let env = env_with_ifs(" \t\n");
let input = vec![unquoted("日本語")];
assert_eq!(values(split(&env, input)), vec!["日本語"]);
}
#[test]
fn test_utf8_content_ascii_ifs_splits() {
let env = env_with_ifs(" ");
let input = vec![unquoted("日本 語")];
assert_eq!(values(split(&env, input)), vec!["日本", "語"]);
}
#[test]
fn test_utf8_content_colon_delimiter() {
let env = env_with_ifs(":");
let input = vec![unquoted("a:日:b")];
assert_eq!(values(split(&env, input)), vec!["a", "日", "b"]);
}
#[test]
fn test_utf8_quoted_not_split() {
let env = env_with_ifs(" ");
let input = vec![quoted_field("日 本")];
assert_eq!(values(split(&env, input)), vec!["日 本"]);
}
#[test]
fn test_utf8_leading_trailing_whitespace_around_multibyte() {
let env = env_with_ifs(" \t\n");
let input = vec![unquoted(" 日本語 ")];
assert_eq!(values(split(&env, input)), vec!["日本語"]);
}
#[test]
fn test_non_ascii_ifs_byte_ignored() {
let env = env_with_ifs("\u{00c0}");
let input = vec![unquoted("À")];
assert_eq!(values(split(&env, input)), vec!["À"]);
}
}