windows_args/
args.rs

1use std::fmt;
2use std::iter;
3use crate::wtf8like::{IsWtf8Slice, IsWtf8Buf};
4
5pub(crate) struct ArgsWtf8<S> {
6    inner: std::vec::IntoIter<S>,
7}
8
9impl<S: IsWtf8Buf> ArgsWtf8<S> {
10    pub(crate) fn parse_cmd<I: IsWtf8Slice + ?Sized>(input: &I) -> Self {
11        let mut wide: Vec<_> = input.encode_wide();
12        wide.push(0);
13
14        ArgsWtf8 { inner: parse_lp_cmd_line(&wide).into_iter() }
15    }
16}
17
18/// Implements the Windows command-line argument parsing algorithm.
19///
20/// Microsoft's documentation for the Windows CLI argument format can be found at
21/// <https://docs.microsoft.com/en-us/previous-versions//17w5ykft(v=vs.85)>.
22///
23/// Windows includes a function to do this in shell32.dll,
24/// but linking with that DLL causes the process to be registered as a GUI application.
25/// GUI applications add a bunch of overhead, even if no windows are drawn. See
26/// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
27fn parse_lp_cmd_line<S: IsWtf8Buf>(
28    lp_cmd_line: &[u16],
29) -> Vec<S> {
30    const BACKSLASH: u16 = '\\' as u16;
31    const QUOTE: u16 = '"' as u16;
32    const TAB: u16 = '\t' as u16;
33    const SPACE: u16 = ' ' as u16;
34
35    let mut ret_val = Vec::new();
36    if lp_cmd_line[0] == 0 {
37        // NOTE: Here, CommandLineToArgvW would produce the current executable name, as
38        //       given by GetModuleFileNameW.
39        //
40        //       For our purposes, it makes more sense to treat this the same way we would
41        //       treat a string consisting entirely of whitespace.
42        ret_val.push(S::from_str(""));
43        return ret_val;
44    }
45    let mut cmd_line = {
46        let mut end = 0;
47        while lp_cmd_line[end] != 0 {
48            end += 1;
49        }
50        &lp_cmd_line[..end]
51    };
52    // The executable name at the beginning is special.
53    cmd_line = match cmd_line[0] {
54        // The executable name ends at the next quote mark,
55        // no matter what.
56        QUOTE => {
57            let args = {
58                let mut cut = cmd_line[1..].splitn(2, |&c| c == QUOTE);
59                if let Some(exe) = cut.next() {
60                    ret_val.push(S::from_wide(exe));
61                }
62                cut.next()
63            };
64            if let Some(args) = args {
65                args
66            } else {
67                return ret_val;
68            }
69        }
70        // Implement quirk: when they say whitespace here,
71        // they include the entire ASCII control plane:
72        // "However, if lpCmdLine starts with any amount of whitespace, CommandLineToArgvW
73        // will consider the first argument to be an empty string. Excess whitespace at the
74        // end of lpCmdLine is ignored."
75        0..=SPACE => {
76            ret_val.push(S::from_str(""));
77            &cmd_line[1..]
78        },
79        // The executable name ends at the next whitespace,
80        // no matter what.
81        _ => {
82            let args = {
83                let mut cut = cmd_line.splitn(2, |&c| c > 0 && c <= SPACE);
84                if let Some(exe) = cut.next() {
85                    ret_val.push(S::from_wide(exe));
86                }
87                cut.next()
88            };
89            if let Some(args) = args {
90                args
91            } else {
92                return ret_val;
93            }
94        }
95    };
96    let mut cur = Vec::new();
97    let mut in_quotes = false;
98    let mut was_in_quotes = false;
99    let mut backslash_count: usize = 0;
100    for &c in cmd_line {
101        match c {
102            // backslash
103            BACKSLASH => {
104                backslash_count += 1;
105                was_in_quotes = false;
106            },
107            QUOTE if backslash_count % 2 == 0 => {
108                cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2));
109                backslash_count = 0;
110                if was_in_quotes {
111                    cur.push('"' as u16);
112                    was_in_quotes = false;
113                } else {
114                    was_in_quotes = in_quotes;
115                    in_quotes = !in_quotes;
116                }
117            }
118            QUOTE if backslash_count % 2 != 0 => {
119                cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2));
120                backslash_count = 0;
121                was_in_quotes = false;
122                cur.push(b'"' as u16);
123            }
124            SPACE | TAB if !in_quotes => {
125                cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
126                if !cur.is_empty() || was_in_quotes {
127                    ret_val.push(S::from_wide(&cur[..]));
128                    cur.truncate(0);
129                }
130                backslash_count = 0;
131                was_in_quotes = false;
132            }
133            _ => {
134                cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
135                backslash_count = 0;
136                was_in_quotes = false;
137                cur.push(c);
138            }
139        }
140    }
141    cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
142    // include empty quoted strings at the end of the arguments list
143    if !cur.is_empty() || was_in_quotes || in_quotes {
144        ret_val.push(S::from_wide(&cur[..]));
145    }
146    ret_val
147}
148
149pub(crate) struct ArgsInnerDebug<'a, S> {
150    args: &'a ArgsWtf8<S>,
151}
152
153impl<'a, S: fmt::Debug> fmt::Debug for ArgsInnerDebug<'a, S> {
154    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155        self.args.inner.as_slice().fmt(f)
156    }
157}
158
159impl<S> ArgsWtf8<S> {
160    pub(crate) fn inner_debug(&self) -> ArgsInnerDebug<'_, S> {
161        ArgsInnerDebug {
162            args: self
163        }
164    }
165}
166
167impl<S> Iterator for ArgsWtf8<S> {
168    type Item = S;
169    fn next(&mut self) -> Option<S> { self.inner.next() }
170    fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
171}
172
173impl<S> DoubleEndedIterator for ArgsWtf8<S> {
174    fn next_back(&mut self) -> Option<S> { self.inner.next_back() }
175}
176
177impl<S> ExactSizeIterator for ArgsWtf8<S> {
178    fn len(&self) -> usize { self.inner.len() }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use wtf8::Wtf8Buf;
185
186    fn chk(string: &str, parts: &[&str]) {
187        let mut wide: Vec<u16> = Wtf8Buf::from_str(string).to_ill_formed_utf16().collect();
188        wide.push(0);
189        let parsed = parse_lp_cmd_line::<Wtf8Buf>(&wide);
190        let expected: Vec<Wtf8Buf> = parts.iter().map(|k| Wtf8Buf::from_str(k)).collect();
191        assert_eq!(parsed.as_slice(), expected.as_slice());
192    }
193
194    #[test]
195    fn empty() {
196        chk("", &[""]);
197        chk("\0", &[""]);
198        chk(" ", &[""]);
199    }
200
201    #[test]
202    fn single_words() {
203        chk("EXE one_word", &["EXE", "one_word"]);
204        chk("EXE a", &["EXE", "a"]);
205        chk("EXE 😅", &["EXE", "😅"]);
206        chk("EXE 😅🤦", &["EXE", "😅🤦"]);
207    }
208
209    #[test]
210    fn official_examples() {
211        chk(r#"EXE "abc" d e"#, &["EXE", "abc", "d", "e"]);
212        chk(r#"EXE a\\\b d"e f"g h"#, &["EXE", r#"a\\\b"#, "de fg", "h"]);
213        chk(r#"EXE a\\\"b c d"#, &["EXE", r#"a\"b"#, "c", "d"]);
214        chk(r#"EXE a\\\\"b c" d e"#, &["EXE", r#"a\\b c"#, "d", "e"]);
215    }
216
217    #[test]
218    fn whitespace_behavior() {
219        chk(r#" test"#, &["", "test"]);
220        chk(r#"  test"#, &["", "test"]);
221        chk(r#" test test2"#, &["", "test", "test2"]);
222        chk(r#" test  test2"#, &["", "test", "test2"]);
223        chk(r#"test test2 "#, &["test", "test2"]);
224        chk(r#"test  test2 "#, &["test", "test2"]);
225        chk(r#"test "#, &["test"]);
226    }
227
228    #[test]
229    fn genius_quotes() {
230        chk(r#"EXE "" """#, &["EXE", "", ""]);
231        chk(r#"EXE "" """"#, &["EXE", "", "\""]);
232        chk(
233            r#"EXE "this is """all""" in the same argument""#,
234            &["EXE", "this is \"all\" in the same argument"]
235        );
236        chk(r#"EXE "a"""#, &["EXE", "a\""]);
237        chk(r#"EXE "a"" a"#, &["EXE", "a\"", "a"]);
238        // quotes cannot be escaped in command names
239        chk(r#""EXE" check"#, &["EXE", "check"]);
240        chk(r#""EXE check""#, &["EXE check"]);
241        chk(r#""EXE """for""" check"#, &["EXE ", r#"for""#, "check"]);
242        chk(r#""EXE \"for\" check"#, &[r#"EXE \"#, r#"for""#,  "check"]);
243    }
244}