yz_string_utils/
shwsplit.rs1use crate::{Cow, Shard};
2
3#[derive(Clone, Copy, Debug)]
4pub struct SyntaxError;
5
6pub struct ShellwordSplitter<'a> {
7 input: &'a str,
8}
9
10impl<'a> ShellwordSplitter<'a> {
11 pub fn new(input: &'a str) -> Self {
12 Self { input }
13 }
14
15 fn skip_whitespace(&mut self) {
16 let mut it = self.input.char_indices();
17 self.input = loop {
18 break match it.next() {
19 None => "",
20 Some((pos, x)) if !x.is_whitespace() => &self.input[pos..],
21 _ => continue,
22 };
23 };
24 }
25}
26
27fn ch_is_quote(ch: char) -> bool {
28 matches!(ch, '"' | '\'')
29}
30
31impl<'a> Iterator for ShellwordSplitter<'a> {
32 type Item = Result<Cow<'a, str>, SyntaxError>;
33
34 fn next(&mut self) -> Option<Self::Item> {
35 self.skip_whitespace();
36 let mut it = self.input.char_indices();
37 let mut quotec = None;
38 let mut ret = Shard::<'a>::new(self.input);
39 while let Some((cpos, cx)) = it.next() {
40 if cx == '\\' {
41 let x = match it.next() {
43 Some(i) => i.1,
44 None => {
45 self.input = "";
46 return Some(Err(SyntaxError));
47 }
48 };
49 ret.push_owned(match x {
50 'n' => '\n',
51 't' => '\t',
52 'r' => '\r',
53 _ if quotec.is_some() && x.is_whitespace() => continue,
54 _ => x,
55 });
56 continue;
57 }
58 if quotec.is_none() {
59 if ch_is_quote(cx) {
60 quotec = Some(cx);
62 ret.skip(1);
64 continue;
65 } else if cx.is_whitespace() {
66 self.input = &self.input[cpos..];
68 return ret.finish_cvg().map(Ok);
69 }
70 } else if Some(cx) == quotec {
71 quotec = None;
73 match it.next() {
74 Some((npos, nx)) if nx.is_whitespace() => {
75 self.input = &self.input[npos..];
78 return ret.finish_cvg().map(Ok);
79 }
80 Some((_, nx)) if ch_is_quote(nx) => {
81 quotec = Some(nx);
84 }
85 Some((_, nx)) => {
86 ret.push_owned(nx);
89 }
90 None => {
91 self.input = "";
93 return ret.finish_cvg().map(Ok);
94 }
95 }
96 continue;
97 }
98 ret.push(cx);
99 }
100 self.input = "";
101 if quotec.is_some() {
102 return Some(Err(SyntaxError));
103 }
104 ret.finish_cvg().map(Ok)
105 }
106}
107
108#[cfg(test)]
109mod tests {
110 use alloc::{string::String, vec::Vec};
111 use proptest::prelude::*;
112
113 fn split(x: &str) -> Result<Vec<String>, super::SyntaxError> {
117 super::ShellwordSplitter::new(x)
118 .map(|i| i.map(super::Cow::into_owned))
119 .collect()
120 }
121
122 #[test]
123 fn nothing_special() {
124 assert_eq!(split("a b c d").unwrap(), ["a", "b", "c", "d"]);
125 }
126
127 #[test]
128 fn quoted_strings() {
129 assert_eq!(split("a \"b b\" a").unwrap(), ["a", "b b", "a"]);
130 }
131
132 #[test]
133 fn escaped_double_quotes() {
134 assert_eq!(split("a \"\\\"b\\\" c\" d").unwrap(), ["a", "\"b\" c", "d"]);
135 }
136
137 #[test]
138 fn escaped_single_quotes() {
139 assert_eq!(split("a \"'b' c\" d").unwrap(), ["a", "'b' c", "d"]);
140 }
141
142 #[test]
143 fn escaped_spaces() {
144 assert_eq!(split("a b\\ c d").unwrap(), ["a", "b c", "d"]);
145 }
146
147 #[test]
148 fn start_with_qspaces() {
149 assert_eq!(split("\" \" b c").unwrap(), [" ", "b", "c"]);
150 }
151
152 #[test]
153 fn bad_double_quotes() {
154 split("a \"b c d e").unwrap_err();
155 }
156
157 #[test]
158 fn bad_single_quotes() {
159 split("a 'b c d e").unwrap_err();
160 }
161
162 #[test]
163 fn bad_quotes() {
164 split("one '\"\"\"").unwrap_err();
165 }
166
167 #[test]
168 fn trailing_whitespace() {
169 assert_eq!(split("a b c d ").unwrap(), ["a", "b", "c", "d"]);
170 }
171
172 proptest! {
173 #[test]
174 fn doesnt_crash(s in "\\PC*") {
175 let _: Vec<_> = super::ShellwordSplitter::new(&s).collect();
176 }
177 }
178}