cmdline_parser/
windows.rs

1//! cmd-like cmdline parser
2
3use std::collections::HashSet;
4use std::iter::Peekable;
5use std::ops::Range;
6use std::str::CharIndices;
7
8#[derive(Clone, Copy, Eq, PartialEq)]
9enum ParsingState {
10    Normal,
11    Quoted,
12    QuotedEscaped,
13}
14
15/// Parser for cmd-like command lines.
16///
17/// Supports parsing arguments which use double quotes including escaping
18/// the quote. Splits on spaces by default.
19pub struct Parser<'a> {
20    state: ParsingState,
21    cmdline: Peekable<CharIndices<'a>>,
22    cmdline_len: usize,
23    separators: HashSet<char>,
24}
25
26impl<'a> Parser<'a> {
27    pub fn new(cmdline: &str) -> Parser {
28        Parser {
29            state: ParsingState::Normal,
30            cmdline: cmdline.char_indices().peekable(),
31            cmdline_len: cmdline.len(),
32            separators: [' '].iter().cloned().collect(),
33        }
34    }
35
36    /// Change the list of chars which are used to separate arguments.
37    ///
38    /// Can be changed dynamically during parsing.
39    pub fn set_separators<I: IntoIterator<Item=char>>(&mut self, separators: I) {
40        self.separators.clear();
41        self.separators.extend(separators);
42    }
43}
44
45impl<'a> Iterator for Parser<'a> {
46    type Item = (Range<usize>, String);
47
48    fn next(&mut self) -> Option<Self::Item> {
49        use self::ParsingState::*;
50
51        let mut arg = String::new();
52
53        if let Some(&(mut start, _)) = self.cmdline.peek() {
54            let mut yield_value = false;
55            let mut was_quoted = false;
56
57            for (i, c) in &mut self.cmdline {
58                self.state = match (self.state, c) {
59                    (Normal, '"') => Quoted,
60                    (Normal, ref c) if self.separators.contains(c) => {
61                        if arg.len() > 0 || was_quoted {
62                            yield_value = true;
63                        } else {
64                            start = i + 1;
65                        }
66                        Normal
67                    },
68                    (Normal, _) => { arg.push(c); Normal },
69                    (Quoted, '"') => { was_quoted = true; Normal },
70                    (Quoted, '\\') => QuotedEscaped,
71                    (Quoted, _) => { arg.push(c); Quoted },
72                    (QuotedEscaped, '"') |
73                    (QuotedEscaped, '\\') => { arg.push(c); Quoted },
74                    (QuotedEscaped, _) => {
75                        arg.push('\\');
76                        arg.push(c);
77                        Quoted
78                    },
79                };
80
81                if yield_value {
82                    return Some((start..i, arg));
83                }
84            }
85
86            if self.state == QuotedEscaped {
87                arg.push('\\');
88            }
89
90            if arg.len() > 0 || was_quoted {
91                return Some((start..self.cmdline_len, arg));
92            }
93        }
94
95        None
96    }
97}
98
99#[cfg(test)]
100mod tests {
101    #[test]
102    fn parser() {
103        let parse = |cmd| -> Vec<_> { super::Parser::new(cmd).collect() };
104
105        // no quoting, escaping should have no effect
106        assert_eq!(parse(r"arg1 arg\2 arg3\ arg4  arg5"), [
107            ( 0.. 4, r"arg1".into()),
108            ( 5..10, r"arg\2".into()),
109            (11..16, r"arg3\".into()),
110            (17..21, r"arg4".into()),
111            (23..27, r"arg5".into()),
112        ]);
113
114        // quoting and escaped quotes
115        assert_eq!(parse(r#""arg 1" "arg "2 "arg\3" "arg\\4" "arg\"5""#), [
116            ( 0.. 7, r#"arg 1"#.into()),
117            ( 8..15, r#"arg 2"#.into()),
118            (16..23, r#"arg\3"#.into()),
119            (24..32, r#"arg\4"#.into()),
120            (33..41, r#"arg"5"#.into()),
121        ]);
122
123        // emtpy arguments
124        assert_eq!(parse(r#""" """#), [(0..2, r"".into()), (3..5, r"".into())]);
125
126        // unfinished quoting
127        assert_eq!(parse(r#""a"#), [(0..2, "a".into())]);
128
129        // unfinished escaping
130        assert_eq!(parse(r#""a\"#), [(0..3, r"a\".into())]);
131        assert_eq!(parse(r#""a\""#), [(0..4, r#"a""#.into())]);
132    }
133
134    #[test]
135    fn multiple_separators() {
136        let mut parser = super::Parser::new("arg1|arg 2:arg3");
137        parser.set_separators(['|', ':'].iter().cloned());
138
139        assert_eq!(parser.collect::<Vec<_>>(), [
140            (0..4, "arg1".into()),
141            (5..10, "arg 2".into()),
142            (11..15, "arg3".into()),
143        ]);
144    }
145
146    #[test]
147    fn dynamic_separators() {
148        let mut parser = super::Parser::new("arg1 arg 2:arg3");
149
150        assert_eq!(parser.next(), Some((0..4, "arg1".into())));
151
152        parser.set_separators([':'].iter().cloned());
153        assert_eq!(parser.next(), Some((5..10, "arg 2".into())));
154        assert_eq!(parser.next(), Some((11..15, "arg3".into())));
155        assert_eq!(parser.next(), None);
156    }
157}