shtring/
parser.rs

1use crate::{
2    lexer::{Lexer, Token},
3    Error,
4};
5
6/// Iterator over the arguments in an input string.
7///
8/// The individual returned items for an input string `&'a str` are `Result<&'a str, Error>` (see
9/// [error-handling](#error-handling) below for notes on the individual results). Escape sequences in the format
10/// `\<character>` are parsed as normal characters. The iterator will return `None` once the input has been exhausted.
11///
12/// ```rust
13/// # use shtring::Parser;
14/// let input = "a \"b c\" \\\"d";
15/// let mut parser = Parser::new(input);
16/// assert_eq!(parser.next(), Some(Ok("a")));
17/// assert_eq!(parser.next(), Some(Ok("b c")));
18/// assert_eq!(parser.next(), Some(Ok("\\\"d")));
19/// assert_eq!(parser.next(), None);
20/// ```
21///
22/// # Error handling
23///
24/// The parser will recover from any errors encountered while parsing individual arguments. This means that if some
25/// argument fails to be parsed, there still may be more valid arguments after it, in the sense that the erroneous
26/// argument was ignored.
27///
28/// ```rust
29/// # use shtring::{Error, Parser};
30/// let input = "a b\" c";
31/// let mut parser = Parser::new(input);
32/// assert_eq!(parser.next(), Some(Ok("a")));
33/// assert_eq!(parser.next(), Some(Err(Error::UnexpectedToken(3, "\""))));
34/// assert_eq!(parser.next(), Some(Ok("c")));
35/// assert_eq!(parser.next(), None);
36/// ```
37#[derive(Debug)]
38pub struct Parser<'a> {
39    input: &'a str,
40    lexer: Lexer<'a>,
41}
42
43impl<'a> Parser<'a> {
44    /// Return a new [Parser](Parser) over a given input string.
45    pub fn new(input: &'a str) -> Self {
46        Self {
47            input,
48            lexer: Lexer::new(input),
49        }
50    }
51}
52
53impl<'a> Iterator for Parser<'a> {
54    type Item = Result<&'a str, Error<'a>>;
55
56    fn next(&mut self) -> Option<Self::Item> {
57        loop {
58            break match self.lexer.next() {
59                Some(Ok((idx, token))) => match token {
60                    Token::Whitespace(_) => continue,
61                    Token::Word(_) | Token::UnknownCharacter(_) | Token::Escape(_) => loop {
62                        match self.lexer.next() {
63                            Some(Ok((cont, Token::Whitespace(_)))) => break Some(Ok(&self.input[(idx..cont)])),
64                            Some(Ok((_, Token::Word(_))))
65                            | Some(Ok((_, Token::UnknownCharacter(_))))
66                            | Some(Ok((_, Token::Escape(_)))) => continue,
67                            Some(Ok((cont, token))) => {
68                                break Some(Err(Error::UnexpectedToken(cont, &self.input[cont..cont + token.len()])))
69                            }
70                            Some(Err(e)) => break Some(Err(e)),
71                            None => break Some(Ok(&self.input[(idx..)])),
72                        }
73                    },
74                    Token::SingleQuote | Token::DoubleQuote => loop {
75                        match self.lexer.next() {
76                            Some(Ok((cont, quote))) if quote == token => break Some(Ok(&self.input[idx + 1..cont])),
77                            Some(Ok((_, _))) => continue,
78                            Some(Err(Error::UnexpectedEndOfInput)) | None => {
79                                break Some(Err(Error::UnexpectedEndOfInput))
80                            }
81                            Some(Err(e)) => break Some(Err(e)),
82                        }
83                    },
84                },
85                Some(Err(e)) => Some(Err(e)),
86                None => None,
87            };
88        }
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    extern crate test;
95
96    use super::*;
97    use test::Bencher;
98
99    #[test]
100    fn single_word() {
101        let input = "a";
102        let mut parser = Parser::new(input);
103        let output = parser.next();
104        assert_eq!(output, Some(Ok(input)));
105    }
106
107    #[test]
108    fn multiple_words() {
109        let input = "a b c";
110        let parser = Parser::new(input);
111        let output: Vec<Result<&str, Error>> = parser.collect();
112        assert_eq!(output, vec![Ok("a"), Ok("b"), Ok("c")]);
113    }
114
115    #[test]
116    fn single_quoted_word() {
117        let input = "'a b c'";
118        let parser = Parser::new(input);
119        let output: Vec<Result<&str, Error>> = parser.collect();
120        assert_eq!(output, vec![Ok("a b c")]);
121    }
122
123    #[test]
124    fn double_quoted_word() {
125        let input = "\"a b c\"";
126        let parser = Parser::new(input);
127        let output: Vec<Result<&str, Error>> = parser.collect();
128        assert_eq!(output, vec![Ok("a b c")]);
129    }
130
131    #[test]
132    fn escaped_quote() {
133        let input = "\\\"a";
134        let mut parser = Parser::new(input);
135        let output = parser.next();
136        assert_eq!(output, Some(Ok(input)));
137    }
138
139    #[test]
140    fn escaped_quotes() {
141        let input = "\\\" a \\\"";
142        let parser = Parser::new(input);
143        let output: Vec<Result<&str, Error>> = parser.collect();
144        assert_eq!(output, vec![Ok("\\\""), Ok("a"), Ok("\\\"")]);
145    }
146
147    #[test]
148    fn unterminated_single_quote() {
149        let input = "'a";
150        let mut parser = Parser::new(input);
151        let output = parser.next();
152        assert_eq!(output, Some(Err(Error::UnexpectedEndOfInput)));
153    }
154
155    #[test]
156    fn unterminated_double_quote() {
157        let input = "\"a";
158        let mut parser = Parser::new(input);
159        let output = parser.next();
160        assert_eq!(output, Some(Err(Error::UnexpectedEndOfInput)));
161    }
162
163    #[test]
164    fn mismatched_quote() {
165        let input = "\"a'";
166        let mut parser = Parser::new(input);
167        let output = parser.next();
168        assert_eq!(output, Some(Err(Error::UnexpectedEndOfInput)));
169    }
170
171    #[test]
172    fn unexpected_quote() {
173        let input = "a\"";
174        let mut parser = Parser::new(input);
175        let output = parser.next();
176        assert_eq!(output, Some(Err(Error::UnexpectedToken(1, "\""))));
177    }
178
179    #[bench]
180    fn multiple_words_with_escapes_and_quotes(b: &mut Bencher) {
181        b.iter(|| {
182            let input = "a \"b \\\"c d\" e 'f g'";
183            let parser = Parser::new(input);
184            let output: Vec<Result<&str, Error>> = parser.collect();
185            assert_eq!(output, vec![Ok("a"), Ok("b \\\"c d"), Ok("e"), Ok("f g")]);
186        });
187    }
188}