1use std::collections::HashSet;
4use std::iter::Peekable;
5use std::ops::Range;
6use std::str::CharIndices;
7
8#[derive(Clone, Copy, Eq, PartialEq)]
9enum ParsingState {
10 Normal,
11 Escaped,
12 SingleQuoted,
13 DoubleQuoted,
14 DoubleQuotedEscaped,
15}
16
17pub struct Parser<'a> {
25 state: ParsingState,
26 cmdline: Peekable<CharIndices<'a>>,
27 cmdline_len: usize,
28 separators: HashSet<char>,
29}
30
31impl<'a> Parser<'a> {
32 pub fn new(cmdline: &str) -> Parser {
33 Parser {
34 state: ParsingState::Normal,
35 cmdline: cmdline.char_indices().peekable(),
36 cmdline_len: cmdline.len(),
37 separators: [' '].iter().cloned().collect(),
38 }
39 }
40
41 pub fn set_separators<I: IntoIterator<Item=char>>(&mut self, separators: I) {
45 self.separators.clear();
46 self.separators.extend(separators);
47 }
48}
49
50impl<'a> Iterator for Parser<'a> {
51 type Item = (Range<usize>, String);
52
53 fn next(&mut self) -> Option<Self::Item> {
54 use self::ParsingState::*;
55
56 let mut arg = String::new();
57
58 if let Some(&(mut start, _)) = self.cmdline.peek() {
59 let mut yield_value = false;
60 let mut was_quoted = false;
61
62 for (i, c) in &mut self.cmdline {
63 self.state = match (self.state, c) {
64 (Normal, '\\') => Escaped,
65 (Normal, '\'') => SingleQuoted,
66 (Normal, '"') => DoubleQuoted,
67 (Normal, ref c) if self.separators.contains(c) => {
68 if arg.len() > 0 || was_quoted {
69 yield_value = true;
70 } else {
71 start = i + 1;
72 }
73 Normal
74 },
75 (Normal, _) |
76 (Escaped, _) => { arg.push(c); Normal },
77 (SingleQuoted, '\'') => { was_quoted = true; Normal },
78 (SingleQuoted, _) => { arg.push(c); SingleQuoted },
79 (DoubleQuoted, '"') => { was_quoted = true; Normal },
80 (DoubleQuoted, '\\') => DoubleQuotedEscaped,
81 (DoubleQuoted, _) |
82 (DoubleQuotedEscaped, '"') |
83 (DoubleQuotedEscaped, '\\') => { arg.push(c); DoubleQuoted },
84 (DoubleQuotedEscaped, _) => {
85 arg.push('\\');
86 arg.push(c);
87 DoubleQuoted
88 },
89 };
90
91 if yield_value {
92 return Some((start..i, arg));
93 }
94 }
95
96 if arg.len() > 0 || was_quoted {
97 return Some((start..self.cmdline_len, arg));
98 }
99 }
100
101 None
102 }
103}
104
105#[cfg(test)]
106mod tests {
107 #[test]
108 fn parser() {
109 let parse = |cmd| -> Vec<_> { super::Parser::new(cmd).collect() };
110
111 assert_eq!(parse(r"arg1 arg\2 arg3\ arg4 arg5 \a\r\g\\6"), [
113 ( 0.. 4, r"arg1".into()),
114 ( 5..10, r"arg2".into()),
115 (11..21, r"arg3 arg4".into()),
116 (23..27, r"arg5".into()),
117 (28..37, r"arg\6".into()),
118 ]);
119
120 assert_eq!(parse(r#"'arg 1' 'arg '2 'arg\3' 'arg\\4' 'arg"5' '\'arg6"#), [
122 ( 0.. 7, r#"arg 1"#.into()),
123 ( 8..15, r#"arg 2"#.into()),
124 (16..23, r#"arg\3"#.into()),
125 (24..32, r#"arg\\4"#.into()),
126 (33..40, r#"arg"5"#.into()),
127 (41..48, r#"\arg6"#.into()),
128 ]);
129
130 assert_eq!(parse(r#""arg 1" "arg "2 "arg\3" "arg\\4" "arg'5" "arg\"6""#), [
132 ( 0.. 7, r#"arg 1"#.into()),
133 ( 8..15, r#"arg 2"#.into()),
134 (16..23, r#"arg\3"#.into()),
135 (24..32, r#"arg\4"#.into()),
136 (33..40, r#"arg'5"#.into()),
137 (41..49, r#"arg"6"#.into()),
138 ]);
139
140 assert_eq!(parse(r#"'' """#), [(0..2, r"".into()), (3..5, r"".into())]);
142
143 assert_eq!(parse(r#"a\"#), [(0..2, r"a".into())]);
145
146 assert_eq!(parse(r#""a"#), [(0..2, "a".into())]);
148 assert_eq!(parse(r#"'a"#), [(0..2, "a".into())]);
149 }
150
151 #[test]
152 fn multiple_separators() {
153 let mut parser = super::Parser::new("arg1|arg 2:arg3");
154 parser.set_separators(['|', ':'].iter().cloned());
155
156 assert_eq!(parser.collect::<Vec<_>>(), [
157 (0..4, "arg1".into()),
158 (5..10, "arg 2".into()),
159 (11..15, "arg3".into()),
160 ]);
161 }
162
163 #[test]
164 fn dynamic_separators() {
165 let mut parser = super::Parser::new("arg1 arg 2:arg3");
166
167 assert_eq!(parser.next(), Some((0..4, "arg1".into())));
168
169 parser.set_separators([':'].iter().cloned());
170 assert_eq!(parser.next(), Some((5..10, "arg 2".into())));
171 assert_eq!(parser.next(), Some((11..15, "arg3".into())));
172 assert_eq!(parser.next(), None);
173 }
174}
175