yz_string_utils/
shwsplit.rs

1use crate::{Cow, Shard};
2
3#[derive(Clone, Copy, Debug)]
4pub struct SyntaxError;
5
6pub struct ShellwordSplitter<'a> {
7    input: &'a str,
8}
9
10impl<'a> ShellwordSplitter<'a> {
11    pub fn new(input: &'a str) -> Self {
12        Self { input }
13    }
14
15    fn skip_whitespace(&mut self) {
16        let mut it = self.input.char_indices();
17        self.input = loop {
18            break match it.next() {
19                None => "",
20                Some((pos, x)) if !x.is_whitespace() => &self.input[pos..],
21                _ => continue,
22            };
23        };
24    }
25}
26
27fn ch_is_quote(ch: char) -> bool {
28    matches!(ch, '"' | '\'')
29}
30
31impl<'a> Iterator for ShellwordSplitter<'a> {
32    type Item = Result<Cow<'a, str>, SyntaxError>;
33
34    fn next(&mut self) -> Option<Self::Item> {
35        self.skip_whitespace();
36        let mut it = self.input.char_indices();
37        let mut quotec = None;
38        let mut ret = Shard::<'a>::new(self.input);
39        while let Some((cpos, cx)) = it.next() {
40            if cx == '\\' {
41                // escape works the same, no matter if inside or outside of quotes
42                let x = match it.next() {
43                    Some(i) => i.1,
44                    None => {
45                        self.input = "";
46                        return Some(Err(SyntaxError));
47                    }
48                };
49                ret.push_owned(match x {
50                    'n' => '\n',
51                    't' => '\t',
52                    'r' => '\r',
53                    _ if quotec.is_some() && x.is_whitespace() => continue,
54                    _ => x,
55                });
56                continue;
57            }
58            if quotec.is_none() {
59                if ch_is_quote(cx) {
60                    // start of quotation
61                    quotec = Some(cx);
62                    // allow the algo to reuse simple, quoted args
63                    ret.skip(1);
64                    continue;
65                } else if cx.is_whitespace() {
66                    // argument separator, this will never happen on the first iteration
67                    self.input = &self.input[cpos..];
68                    return ret.finish_cvg().map(Ok);
69                }
70            } else if Some(cx) == quotec {
71                // end of quotation
72                quotec = None;
73                match it.next() {
74                    Some((npos, nx)) if nx.is_whitespace() => {
75                        // simple case: the ending quote is followed by an separator
76                        // we can thus skip the whitespace and return our item
77                        self.input = &self.input[npos..];
78                        return ret.finish_cvg().map(Ok);
79                    }
80                    Some((_, nx)) if ch_is_quote(nx) => {
81                        // medium case: the ending quote if directly followed by another quote
82                        // thus, remain in quote mode
83                        quotec = Some(nx);
84                    }
85                    Some((_, nx)) => {
86                        // complex case: the ending quote is followed by more data which
87                        // belongs to the same argument
88                        ret.push_owned(nx);
89                    }
90                    None => {
91                        // simple case: the ending quote is followed by EOF
92                        self.input = "";
93                        return ret.finish_cvg().map(Ok);
94                    }
95                }
96                continue;
97            }
98            ret.push(cx);
99        }
100        self.input = "";
101        if quotec.is_some() {
102            return Some(Err(SyntaxError));
103        }
104        ret.finish_cvg().map(Ok)
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use alloc::{string::String, vec::Vec};
111    use proptest::prelude::*;
112
113    /// split_shellwords tests were taken from
114    /// https://docs.rs/shellwords/1.1.0/src/shellwords/lib.rs.html
115    /// License: MIT
116    fn split(x: &str) -> Result<Vec<String>, super::SyntaxError> {
117        super::ShellwordSplitter::new(x)
118            .map(|i| i.map(super::Cow::into_owned))
119            .collect()
120    }
121
122    #[test]
123    fn nothing_special() {
124        assert_eq!(split("a b c d").unwrap(), ["a", "b", "c", "d"]);
125    }
126
127    #[test]
128    fn quoted_strings() {
129        assert_eq!(split("a \"b b\" a").unwrap(), ["a", "b b", "a"]);
130    }
131
132    #[test]
133    fn escaped_double_quotes() {
134        assert_eq!(split("a \"\\\"b\\\" c\" d").unwrap(), ["a", "\"b\" c", "d"]);
135    }
136
137    #[test]
138    fn escaped_single_quotes() {
139        assert_eq!(split("a \"'b' c\" d").unwrap(), ["a", "'b' c", "d"]);
140    }
141
142    #[test]
143    fn escaped_spaces() {
144        assert_eq!(split("a b\\ c d").unwrap(), ["a", "b c", "d"]);
145    }
146
147    #[test]
148    fn start_with_qspaces() {
149        assert_eq!(split("\"  \" b c").unwrap(), ["  ", "b", "c"]);
150    }
151
152    #[test]
153    fn bad_double_quotes() {
154        split("a \"b c d e").unwrap_err();
155    }
156
157    #[test]
158    fn bad_single_quotes() {
159        split("a 'b c d e").unwrap_err();
160    }
161
162    #[test]
163    fn bad_quotes() {
164        split("one '\"\"\"").unwrap_err();
165    }
166
167    #[test]
168    fn trailing_whitespace() {
169        assert_eq!(split("a b c d ").unwrap(), ["a", "b", "c", "d"]);
170    }
171
172    proptest! {
173        #[test]
174        fn doesnt_crash(s in "\\PC*") {
175            let _: Vec<_> = super::ShellwordSplitter::new(&s).collect();
176        }
177    }
178}