1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
pub struct SplitUnquotedChar<'s> {
src: &'s str,
unwrap_quotes: bool,
delimitor: char,
}
impl<'s> SplitUnquotedChar<'s> {
pub fn new(src: &'s str, delimitor: char) -> Self {
Self {
src,
unwrap_quotes: false,
delimitor,
}
}
pub fn unwrap_quotes(&self, b: bool) -> Self {
Self {
src: self.src,
unwrap_quotes: b,
delimitor: self.delimitor,
}
}
}
impl<'s> Iterator for SplitUnquotedChar<'s> {
type Item = &'s str;
fn next(&mut self) -> Option<&'s str> {
self.src = self.src.trim_start();
let mut char_indices = self.src.char_indices();
if let Some((_, c0)) = char_indices.next() {
let mut previous = c0;
for (bi, c) in self.src.char_indices() {
if c == self.delimitor {
if c0 == '"' {
if bi == 1 || previous != '"' {
previous = c;
continue;
}
let token = if self.unwrap_quotes {
&self.src[1..bi - 1]
} else {
self.src
};
self.src = &self.src[bi..];
return Some(token);
}
let token = &self.src[..bi];
self.src = &self.src[bi..];
return Some(token);
}
previous = c;
}
let unwrap = self.unwrap_quotes && c0 == '"' && previous == '"' && self.src.len() > 1;
let token = if unwrap {
&self.src[1..self.src.len() - 1]
} else {
self.src
};
self.src = &self.src[0..0];
Some(token)
} else {
None
}
}
}
pub fn split_unquoted_whitespace<'s>(src: &'s str) -> SplitUnquotedChar<'s> {
SplitUnquotedChar::new(src, ' ')
}
pub fn split_unquoted_char<'s>(src: &'s str, delimitor: char) -> SplitUnquotedChar<'s> {
SplitUnquotedChar::new(src, delimitor)
}
#[cfg(test)]
mod split_unquoted_whitespace_test {
use super::*;
macro_rules! t {
($src:literal -> [$($token:literal),* $(,)?]) => {
let mut split = SplitUnquotedChar::new($src, ' ')
.unwrap_quotes(true);
$(
assert_eq!(split.next(), Some($token));
)*
assert_eq!(split.next(), None);
}
}
#[test]
fn test_split_unquoted_whitespace() {
t!("" -> []);
t!(" " -> []);
t!(" a 试bc d " -> ["a", "试bc", "d"]);
t!("e^iπ^ = 1" -> ["e^iπ^", "=", "1"]);
t!("1234" -> ["1234"]);
t!("1234\"" -> ["1234\""]);
t!(r#"""# -> [r#"""#]);
t!(r#""a""# -> [r#"a"#]);
t!(r#" " "# -> [r#"" "#]);
t!(r#"a "deux mots" b"# -> ["a", "deux mots", "b"]);
t!(r#" " ""# -> [" "]);
t!(r#" a "2 * 试" x"x "z "# -> ["a", "2 * 试", "x\"x", "\"z "]);
t!(r#"""""# -> ["\""]);
t!(r#""""""# -> ["\"\""]);
}
}