cmdline_parser/
unix.rs

1//! bash-like cmdline parser
2
3use std::collections::HashSet;
4use std::iter::Peekable;
5use std::ops::Range;
6use std::str::CharIndices;
7
8#[derive(Clone, Copy, Eq, PartialEq)]
9enum ParsingState {
10    Normal,
11    Escaped,
12    SingleQuoted,
13    DoubleQuoted,
14    DoubleQuotedEscaped,
15}
16
17/// Parser for bash-like command lines.
18///
19/// Supports parsing arguments which use escaping, single quotes and double
20/// quotes (no expansion of `$` etc.). Splits on spaces by default.
21///
22/// Unfinished quotings at the end of a command line are parsed successfully
23/// to support building of e.g. path completers.
24pub struct Parser<'a> {
25    state: ParsingState,
26    cmdline: Peekable<CharIndices<'a>>,
27    cmdline_len: usize,
28    separators: HashSet<char>,
29}
30
31impl<'a> Parser<'a> {
32    pub fn new(cmdline: &str) -> Parser {
33        Parser {
34            state: ParsingState::Normal,
35            cmdline: cmdline.char_indices().peekable(),
36            cmdline_len: cmdline.len(),
37            separators: [' '].iter().cloned().collect(),
38        }
39    }
40
41    /// Change the list of chars which are used to separate arguments.
42    ///
43    /// Can be changed dynamically during parsing.
44    pub fn set_separators<I: IntoIterator<Item=char>>(&mut self, separators: I) {
45        self.separators.clear();
46        self.separators.extend(separators);
47    }
48}
49
50impl<'a> Iterator for Parser<'a> {
51    type Item = (Range<usize>, String);
52
53    fn next(&mut self) -> Option<Self::Item> {
54        use self::ParsingState::*;
55
56        let mut arg = String::new();
57
58        if let Some(&(mut start, _)) = self.cmdline.peek() {
59            let mut yield_value = false;
60            let mut was_quoted = false;
61
62            for (i, c) in &mut self.cmdline {
63                self.state = match (self.state, c) {
64                    (Normal, '\\') => Escaped,
65                    (Normal, '\'') => SingleQuoted,
66                    (Normal, '"') => DoubleQuoted,
67                    (Normal, ref c) if self.separators.contains(c) => {
68                        if arg.len() > 0 || was_quoted {
69                            yield_value = true;
70                        } else {
71                            start = i + 1;
72                        }
73                        Normal
74                    },
75                    (Normal, _) |
76                    (Escaped, _) => { arg.push(c); Normal },
77                    (SingleQuoted, '\'') => { was_quoted = true; Normal },
78                    (SingleQuoted, _) => { arg.push(c); SingleQuoted },
79                    (DoubleQuoted, '"') => { was_quoted = true; Normal },
80                    (DoubleQuoted, '\\') => DoubleQuotedEscaped,
81                    (DoubleQuoted, _) |
82                    (DoubleQuotedEscaped, '"') |
83                    (DoubleQuotedEscaped, '\\') => { arg.push(c); DoubleQuoted },
84                    (DoubleQuotedEscaped, _) => {
85                        arg.push('\\');
86                        arg.push(c);
87                        DoubleQuoted
88                    },
89                };
90
91                if yield_value {
92                    return Some((start..i, arg));
93                }
94            }
95
96            if arg.len() > 0 || was_quoted {
97                return Some((start..self.cmdline_len, arg));
98            }
99        }
100
101        None
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    #[test]
108    fn parser() {
109        let parse = |cmd| -> Vec<_> { super::Parser::new(cmd).collect() };
110
111        // no quoting and simple escaping
112        assert_eq!(parse(r"arg1 arg\2 arg3\ arg4  arg5 \a\r\g\\6"), [
113            ( 0.. 4, r"arg1".into()),
114            ( 5..10, r"arg2".into()),
115            (11..21, r"arg3 arg4".into()),
116            (23..27, r"arg5".into()),
117            (28..37, r"arg\6".into()),
118        ]);
119
120        // single quoting
121        assert_eq!(parse(r#"'arg 1' 'arg '2 'arg\3' 'arg\\4' 'arg"5' '\'arg6"#), [
122            ( 0.. 7, r#"arg 1"#.into()),
123            ( 8..15, r#"arg 2"#.into()),
124            (16..23, r#"arg\3"#.into()),
125            (24..32, r#"arg\\4"#.into()),
126            (33..40, r#"arg"5"#.into()),
127            (41..48, r#"\arg6"#.into()),
128        ]);
129
130        // double quoting
131        assert_eq!(parse(r#""arg 1" "arg "2 "arg\3" "arg\\4" "arg'5" "arg\"6""#), [
132            ( 0.. 7, r#"arg 1"#.into()),
133            ( 8..15, r#"arg 2"#.into()),
134            (16..23, r#"arg\3"#.into()),
135            (24..32, r#"arg\4"#.into()),
136            (33..40, r#"arg'5"#.into()),
137            (41..49, r#"arg"6"#.into()),
138        ]);
139
140        // emtpy arguments
141        assert_eq!(parse(r#"'' """#), [(0..2, r"".into()), (3..5, r"".into())]);
142
143        // unfinished escaping
144        assert_eq!(parse(r#"a\"#), [(0..2, r"a".into())]);
145
146        // unfinished quoting (causes an error in a real shell)
147        assert_eq!(parse(r#""a"#), [(0..2, "a".into())]);
148        assert_eq!(parse(r#"'a"#), [(0..2, "a".into())]);
149    }
150
151    #[test]
152    fn multiple_separators() {
153        let mut parser = super::Parser::new("arg1|arg 2:arg3");
154        parser.set_separators(['|', ':'].iter().cloned());
155
156        assert_eq!(parser.collect::<Vec<_>>(), [
157            (0..4, "arg1".into()),
158            (5..10, "arg 2".into()),
159            (11..15, "arg3".into()),
160        ]);
161    }
162
163    #[test]
164    fn dynamic_separators() {
165        let mut parser = super::Parser::new("arg1 arg 2:arg3");
166
167        assert_eq!(parser.next(), Some((0..4, "arg1".into())));
168
169        parser.set_separators([':'].iter().cloned());
170        assert_eq!(parser.next(), Some((5..10, "arg 2".into())));
171        assert_eq!(parser.next(), Some((11..15, "arg3".into())));
172        assert_eq!(parser.next(), None);
173    }
174}
175