use std::mem;
pub(crate) struct Splitter<'a> {
input: &'a str,
output: String,
delimiter: char,
state: SplitState,
}
struct SplitState {
in_double_quote: bool,
in_single_quote: bool,
}
impl SplitState {
fn in_quote(&self) -> bool {
self.in_double_quote || self.in_single_quote
}
fn should_add(&mut self, token: SplitToken) -> bool {
match (token, self.in_single_quote, self.in_double_quote) {
(SplitToken::DoubleQuote, false, true) => {
self.in_double_quote = false;
false
}
(SplitToken::DoubleQuote, false, false) => {
self.in_double_quote = true;
false
}
(SplitToken::SingleQuote, true, false) => {
self.in_single_quote = false;
false
}
(SplitToken::SingleQuote, false, false) => {
self.in_single_quote = true;
false
}
(SplitToken::DoubleQuote | SplitToken::SingleQuote, true, true) => {
unreachable!("Never have two quotes active at the some time.")
}
#[allow(clippy::match_same_arms)]
(SplitToken::DoubleQuote, true, false) | (SplitToken::SingleQuote, false, true) => {
true
}
(SplitToken::Char(_), _, _) => true,
(SplitToken::Delimiter, _, _) => self.in_quote(),
}
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
enum SplitToken {
DoubleQuote,
SingleQuote,
Delimiter,
Char(char),
}
impl SplitToken {
fn to_char(self, delimiter: char) -> char {
match self {
SplitToken::DoubleQuote => '"',
SplitToken::SingleQuote => '\'',
SplitToken::Delimiter => delimiter,
SplitToken::Char(val) => val,
}
}
}
impl Splitter<'_> {
fn next_part(&mut self) -> Option<String> {
if self.input.is_empty() {
return None;
}
loop {
let Some(next) = self.next_token() else {
return Some(self.return_output());
};
self.chomp(next.to_char(self.delimiter));
if self.state.should_add(next) {
self.add_to_output(next);
}
if next == SplitToken::Delimiter && !self.state.in_quote() {
while let Some(SplitToken::Delimiter) = self.next_token() {
self.chomp(self.delimiter);
}
return Some(self.return_output());
}
}
}
fn return_output(&mut self) -> String {
mem::take(&mut self.output)
}
fn next_token(&mut self) -> Option<SplitToken> {
self.peek_char().map(|next| match next {
delimiter if delimiter == self.delimiter => SplitToken::Delimiter,
'"' => SplitToken::DoubleQuote,
'\'' => SplitToken::SingleQuote,
other => SplitToken::Char(other),
})
}
fn peek_char(&mut self) -> Option<char> {
self.input.chars().next()
}
fn chomp(&mut self, ch: char) {
self.input = &self.input[ch.len_utf8()..];
}
fn add_to_output(&mut self, token: SplitToken) {
self.output.push(token.to_char(self.delimiter));
}
}
impl<'a> Splitter<'a> {
pub(crate) fn new(src: &'a str, delimiter: char) -> Self {
Self {
input: src.trim_matches(delimiter),
output: String::new(),
delimiter,
state: SplitState {
in_double_quote: false,
in_single_quote: false,
},
}
}
}
impl Iterator for Splitter<'_> {
type Item = String;
fn next(&mut self) -> Option<String> {
self.next_part()
}
}
#[cfg(test)]
mod split_unquoted_whitespace_test {
use super::*;
macro_rules! t {
($src:literal -> [$($token:literal),* $(,)?]) => {
t!(@with ' ' $src -> [$($token),*])
};
(@with $delimiter:literal $src:literal -> [$($token:literal),* $(,)?]) => {
let mut split = Splitter::new($src, $delimiter);
$(
assert_eq!(split.next(), Some($token.to_owned()));
)*
assert_eq!(split.next(), None);
}
}
#[test]
fn test_previous_doc() {
t!(r#"Type "rhit -p blog" or "rhit --path blog""# -> ["Type", "rhit -p blog", "or", "rhit --path blog"]);
}
#[test]
fn test_multiple_delimitors() {
t!("" -> []);
t!(" " -> []);
t!(" \" \"" -> [" "]);
t!(@with ',' "" -> []);
t!(@with ',' ",,,," -> []);
}
#[test]
fn test_quote_trimming() {
t!("1234" -> ["1234"]);
t!("1234\"" -> ["1234"]);
t!(r#"""# -> [""]);
t!(r#""a""# -> ["a"]);
t!(r#" " "# -> [""]);
}
#[test]
fn test_complex() {
t!(r#" a "2 * 试" x"x "z "# -> ["a", "2 * 试", "xx z"]);
}
#[test]
fn test_many_quotes() {
t!(r#"""""# -> [""]);
t!(r#""""""# -> [""]);
}
#[test]
fn test_utf8_bytes() {
t!("e^iπ^ = 1" -> ["e^iπ^", "=", "1"]);
}
#[test]
fn test_multi_space_infix() {
t!(" a 试bc d " -> ["a", "试bc", "d"]);
t!(r#"a "deux mots" b"# -> ["a", "deux mots", "b"]);
}
#[test]
fn test_commas() {
t!(@with ',' "1,2,3,4" -> ["1", "2", "3", "4"]);
}
#[test]
fn test_quote_delimitor() {
t!(@with '"' "one\" two\"three" -> ["one", " two", "three"]);
t!(@with '\'' "one' two'three" -> ["one", " two", "three"]);
}
}