1use std::fmt;
2use std::iter;
3use crate::wtf8like::{IsWtf8Slice, IsWtf8Buf};
4
5pub(crate) struct ArgsWtf8<S> {
6 inner: std::vec::IntoIter<S>,
7}
8
9impl<S: IsWtf8Buf> ArgsWtf8<S> {
10 pub(crate) fn parse_cmd<I: IsWtf8Slice + ?Sized>(input: &I) -> Self {
11 let mut wide: Vec<_> = input.encode_wide();
12 wide.push(0);
13
14 ArgsWtf8 { inner: parse_lp_cmd_line(&wide).into_iter() }
15 }
16}
17
18fn parse_lp_cmd_line<S: IsWtf8Buf>(
28 lp_cmd_line: &[u16],
29) -> Vec<S> {
30 const BACKSLASH: u16 = '\\' as u16;
31 const QUOTE: u16 = '"' as u16;
32 const TAB: u16 = '\t' as u16;
33 const SPACE: u16 = ' ' as u16;
34
35 let mut ret_val = Vec::new();
36 if lp_cmd_line[0] == 0 {
37 ret_val.push(S::from_str(""));
43 return ret_val;
44 }
45 let mut cmd_line = {
46 let mut end = 0;
47 while lp_cmd_line[end] != 0 {
48 end += 1;
49 }
50 &lp_cmd_line[..end]
51 };
52 cmd_line = match cmd_line[0] {
54 QUOTE => {
57 let args = {
58 let mut cut = cmd_line[1..].splitn(2, |&c| c == QUOTE);
59 if let Some(exe) = cut.next() {
60 ret_val.push(S::from_wide(exe));
61 }
62 cut.next()
63 };
64 if let Some(args) = args {
65 args
66 } else {
67 return ret_val;
68 }
69 }
70 0..=SPACE => {
76 ret_val.push(S::from_str(""));
77 &cmd_line[1..]
78 },
79 _ => {
82 let args = {
83 let mut cut = cmd_line.splitn(2, |&c| c > 0 && c <= SPACE);
84 if let Some(exe) = cut.next() {
85 ret_val.push(S::from_wide(exe));
86 }
87 cut.next()
88 };
89 if let Some(args) = args {
90 args
91 } else {
92 return ret_val;
93 }
94 }
95 };
96 let mut cur = Vec::new();
97 let mut in_quotes = false;
98 let mut was_in_quotes = false;
99 let mut backslash_count: usize = 0;
100 for &c in cmd_line {
101 match c {
102 BACKSLASH => {
104 backslash_count += 1;
105 was_in_quotes = false;
106 },
107 QUOTE if backslash_count % 2 == 0 => {
108 cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2));
109 backslash_count = 0;
110 if was_in_quotes {
111 cur.push('"' as u16);
112 was_in_quotes = false;
113 } else {
114 was_in_quotes = in_quotes;
115 in_quotes = !in_quotes;
116 }
117 }
118 QUOTE if backslash_count % 2 != 0 => {
119 cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2));
120 backslash_count = 0;
121 was_in_quotes = false;
122 cur.push(b'"' as u16);
123 }
124 SPACE | TAB if !in_quotes => {
125 cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
126 if !cur.is_empty() || was_in_quotes {
127 ret_val.push(S::from_wide(&cur[..]));
128 cur.truncate(0);
129 }
130 backslash_count = 0;
131 was_in_quotes = false;
132 }
133 _ => {
134 cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
135 backslash_count = 0;
136 was_in_quotes = false;
137 cur.push(c);
138 }
139 }
140 }
141 cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
142 if !cur.is_empty() || was_in_quotes || in_quotes {
144 ret_val.push(S::from_wide(&cur[..]));
145 }
146 ret_val
147}
148
149pub(crate) struct ArgsInnerDebug<'a, S> {
150 args: &'a ArgsWtf8<S>,
151}
152
153impl<'a, S: fmt::Debug> fmt::Debug for ArgsInnerDebug<'a, S> {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 self.args.inner.as_slice().fmt(f)
156 }
157}
158
159impl<S> ArgsWtf8<S> {
160 pub(crate) fn inner_debug(&self) -> ArgsInnerDebug<'_, S> {
161 ArgsInnerDebug {
162 args: self
163 }
164 }
165}
166
167impl<S> Iterator for ArgsWtf8<S> {
168 type Item = S;
169 fn next(&mut self) -> Option<S> { self.inner.next() }
170 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
171}
172
173impl<S> DoubleEndedIterator for ArgsWtf8<S> {
174 fn next_back(&mut self) -> Option<S> { self.inner.next_back() }
175}
176
177impl<S> ExactSizeIterator for ArgsWtf8<S> {
178 fn len(&self) -> usize { self.inner.len() }
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use wtf8::Wtf8Buf;
185
186 fn chk(string: &str, parts: &[&str]) {
187 let mut wide: Vec<u16> = Wtf8Buf::from_str(string).to_ill_formed_utf16().collect();
188 wide.push(0);
189 let parsed = parse_lp_cmd_line::<Wtf8Buf>(&wide);
190 let expected: Vec<Wtf8Buf> = parts.iter().map(|k| Wtf8Buf::from_str(k)).collect();
191 assert_eq!(parsed.as_slice(), expected.as_slice());
192 }
193
194 #[test]
195 fn empty() {
196 chk("", &[""]);
197 chk("\0", &[""]);
198 chk(" ", &[""]);
199 }
200
201 #[test]
202 fn single_words() {
203 chk("EXE one_word", &["EXE", "one_word"]);
204 chk("EXE a", &["EXE", "a"]);
205 chk("EXE 😅", &["EXE", "😅"]);
206 chk("EXE 😅🤦", &["EXE", "😅🤦"]);
207 }
208
209 #[test]
210 fn official_examples() {
211 chk(r#"EXE "abc" d e"#, &["EXE", "abc", "d", "e"]);
212 chk(r#"EXE a\\\b d"e f"g h"#, &["EXE", r#"a\\\b"#, "de fg", "h"]);
213 chk(r#"EXE a\\\"b c d"#, &["EXE", r#"a\"b"#, "c", "d"]);
214 chk(r#"EXE a\\\\"b c" d e"#, &["EXE", r#"a\\b c"#, "d", "e"]);
215 }
216
217 #[test]
218 fn whitespace_behavior() {
219 chk(r#" test"#, &["", "test"]);
220 chk(r#" test"#, &["", "test"]);
221 chk(r#" test test2"#, &["", "test", "test2"]);
222 chk(r#" test test2"#, &["", "test", "test2"]);
223 chk(r#"test test2 "#, &["test", "test2"]);
224 chk(r#"test test2 "#, &["test", "test2"]);
225 chk(r#"test "#, &["test"]);
226 }
227
228 #[test]
229 fn genius_quotes() {
230 chk(r#"EXE "" """#, &["EXE", "", ""]);
231 chk(r#"EXE "" """"#, &["EXE", "", "\""]);
232 chk(
233 r#"EXE "this is """all""" in the same argument""#,
234 &["EXE", "this is \"all\" in the same argument"]
235 );
236 chk(r#"EXE "a"""#, &["EXE", "a\""]);
237 chk(r#"EXE "a"" a"#, &["EXE", "a\"", "a"]);
238 chk(r#""EXE" check"#, &["EXE", "check"]);
240 chk(r#""EXE check""#, &["EXE check"]);
241 chk(r#""EXE """for""" check"#, &["EXE ", r#"for""#, "check"]);
242 chk(r#""EXE \"for\" check"#, &[r#"EXE \"#, r#"for""#, "check"]);
243 }
244}