casper_node/cli/
arglang.rs

1//! TOML-inspired command-line argument language.
2//!
3//! Supports strings, booleans, integers and arrays (lists).
4//!
5//! * Booleans are expressed as `true` or `false`.
6//! * Any integer must fit into `i64`, otherwise will be parsed as strings.
7//! * Strings can be quoted using double quotes. A backslash `\\` can be used to escape quotes
8//!   inside.
9//! * Unquoted strings are terminated on whitespace.
10//! * Arrays are written using brackets and commas: `[1, 2, 3]`.
11//!
12//! ## Examples
13//!
14//! * `[127.0.0.1, 1.2.3.4, 6.7.8.9]` list of three strings
15//! * `"hello world"` string `hello world`
16//! * `["no\"de\"-1", node-2]` list of two strings (`no"de"-1` and `node-2`).
17
18use std::{iter::Peekable, str::FromStr};
19
20use thiserror::Error;
21use toml::Value;
22
23/// A Token to be parsed.
24#[derive(Clone, Debug, Eq, PartialEq)]
25pub(crate) enum Token {
26    String(String),
27    I64(i64),
28    Boolean(bool),
29    Comma,
30    OpenBracket,
31    CloseBracket,
32}
33
34#[derive(Debug, Error, Eq, PartialEq)]
35pub(crate) enum Error {
36    #[error("unterminated string in input")]
37    UnterminatedString,
38    #[error("unexpected token {0:?}")]
39    UnexpectedToken(Token),
40    #[error("unexpected end of input")]
41    UnexpectedEndOfInput,
42    #[error("trailing input {0:?}...")]
43    TrailingInput(Token),
44}
45
46impl Token {
47    /// Constructs a token from a string.
48    #[cfg(test)]
49    fn string(value: &str) -> Token {
50        Token::String(value.to_string())
51    }
52}
53
54/// Tokenizes a stream of characters.
55fn tokenize(input: &str) -> Result<Vec<Token>, Error> {
56    let mut chars = input.chars();
57    let mut tokens = Vec::new();
58
59    let mut buffer = String::new();
60
61    loop {
62        let ch = chars.next();
63
64        // Check if we need to complete a token.
65        if !buffer.is_empty() {
66            match ch {
67                Some(' ' | '"' | '[' | ']' | ',') | None => {
68                    // Try to parse as number or bool first.
69                    if let Ok(value) = i64::from_str(&buffer) {
70                        tokens.push(Token::I64(value));
71                    } else if let Ok(value) = bool::from_str(&buffer) {
72                        tokens.push(Token::Boolean(value));
73                    } else {
74                        tokens.push(Token::String(buffer.clone()));
75                    }
76
77                    buffer.clear();
78                }
79                _ => {
80                    // Handled in second match below.
81                }
82            }
83        }
84
85        match ch {
86            None => {
87                // On EOF, we break.
88                break;
89            }
90            Some(' ') => {
91                // Ignore whitespace.
92            }
93            Some('"') => {
94                // Quoted string.
95                let mut escaped = false;
96                let mut string = String::new();
97                loop {
98                    let c = chars.next();
99                    match c {
100                        Some(character) if escaped => {
101                            string.push(character);
102                            escaped = false;
103                        }
104                        Some('\\') => {
105                            escaped = true;
106                        }
107                        Some('"') => {
108                            break;
109                        }
110                        Some(character) => string.push(character),
111                        None => {
112                            return Err(Error::UnterminatedString);
113                        }
114                    }
115                }
116                tokens.push(Token::String(string));
117            }
118            Some('[') => tokens.push(Token::OpenBracket),
119            Some(']') => tokens.push(Token::CloseBracket),
120            Some(',') => tokens.push(Token::Comma),
121            Some(character) => buffer.push(character),
122        }
123    }
124
125    Ok(tokens)
126}
127
128/// Parse a stream of tokens of arglang.
129fn parse_stream<I>(tokens: &mut Peekable<I>) -> Result<Value, Error>
130where
131    I: Iterator<Item = Token>,
132{
133    match tokens.next() {
134        Some(Token::String(value)) => Ok(Value::String(value)),
135        Some(Token::I64(value)) => Ok(Value::Integer(value)),
136        Some(Token::Boolean(value)) => Ok(Value::Boolean(value)),
137        Some(Token::OpenBracket) => {
138            // Special case for empty list.
139            if tokens.peek() == Some(&Token::CloseBracket) {
140                tokens.next();
141                return Ok(Value::Array(Vec::new()));
142            }
143
144            let mut items = Vec::new();
145            loop {
146                items.push(parse_stream(tokens)?);
147
148                match tokens.next() {
149                    Some(Token::CloseBracket) => {
150                        return Ok(Value::Array(items));
151                    }
152                    Some(Token::Comma) => {
153                        // Continue parsing next time.
154                    }
155                    Some(t) => {
156                        return Err(Error::UnexpectedToken(t));
157                    }
158                    None => {
159                        return Err(Error::UnexpectedEndOfInput);
160                    }
161                }
162            }
163        }
164        Some(t @ (Token::CloseBracket | Token::Comma)) => Err(Error::UnexpectedToken(t)),
165        None => Err(Error::UnexpectedEndOfInput),
166    }
167}
168
169/// Parse string using arglang.
170pub(crate) fn parse(input: &str) -> Result<Value, Error> {
171    let mut tokens = tokenize(input)?.into_iter().peekable();
172    let value = parse_stream(&mut tokens)?;
173
174    // Check if there is trailing input.
175    if let Some(trailing) = tokens.next() {
176        return Err(Error::TrailingInput(trailing));
177    }
178
179    Ok(value)
180}
181
182#[cfg(test)]
183mod tests {
184    use toml::Value;
185
186    use super::{parse, tokenize, Error, Token};
187
188    #[test]
189    fn tokenize_single() {
190        assert_eq!(tokenize("asdf").unwrap(), vec![Token::string("asdf")]);
191        assert_eq!(tokenize("  ").unwrap(), vec![]);
192        assert_eq!(tokenize("-123").unwrap(), vec![Token::I64(-123)]);
193        assert_eq!(tokenize("123").unwrap(), vec![Token::I64(123)]);
194        assert_eq!(tokenize("true").unwrap(), vec![Token::Boolean(true)]);
195        assert_eq!(tokenize("false").unwrap(), vec![Token::Boolean(false)]);
196        assert_eq!(tokenize("[").unwrap(), vec![Token::OpenBracket]);
197        assert_eq!(tokenize("]").unwrap(), vec![Token::CloseBracket]);
198        assert_eq!(tokenize(",").unwrap(), vec![Token::Comma]);
199
200        assert_eq!(tokenize(" asdf").unwrap(), vec![Token::string("asdf")]);
201        assert_eq!(tokenize("  ").unwrap(), vec![]);
202        assert_eq!(tokenize(" -123").unwrap(), vec![Token::I64(-123)]);
203        assert_eq!(tokenize(" 123").unwrap(), vec![Token::I64(123)]);
204        assert_eq!(tokenize(" true").unwrap(), vec![Token::Boolean(true)]);
205        assert_eq!(tokenize(" false").unwrap(), vec![Token::Boolean(false)]);
206        assert_eq!(tokenize(" [").unwrap(), vec![Token::OpenBracket]);
207        assert_eq!(tokenize(" ]").unwrap(), vec![Token::CloseBracket]);
208        assert_eq!(tokenize(" ,").unwrap(), vec![Token::Comma]);
209
210        assert_eq!(tokenize(" asdf ").unwrap(), vec![Token::string("asdf")]);
211        assert_eq!(tokenize("  ").unwrap(), vec![]);
212        assert_eq!(tokenize(" -123 ").unwrap(), vec![Token::I64(-123)]);
213        assert_eq!(tokenize(" 123 ").unwrap(), vec![Token::I64(123)]);
214        assert_eq!(tokenize(" true ").unwrap(), vec![Token::Boolean(true)]);
215        assert_eq!(tokenize(" false ").unwrap(), vec![Token::Boolean(false)]);
216        assert_eq!(tokenize(" [ ").unwrap(), vec![Token::OpenBracket]);
217        assert_eq!(tokenize(" ] ").unwrap(), vec![Token::CloseBracket]);
218        assert_eq!(tokenize(" , ").unwrap(), vec![Token::Comma]);
219    }
220
221    #[test]
222    fn tokenize_strings() {
223        assert_eq!(
224            tokenize(" a1 b2 c3 ").unwrap(),
225            vec![
226                Token::string("a1"),
227                Token::string("b2"),
228                Token::string("c3")
229            ]
230        );
231
232        assert_eq!(
233            tokenize("hello \"world\"!").unwrap(),
234            vec![
235                Token::string("hello"),
236                Token::string("world"),
237                Token::string("!")
238            ]
239        );
240
241        assert_eq!(
242            tokenize("\"inner\\\"quote\"").unwrap(),
243            vec![Token::string("inner\"quote"),]
244        );
245
246        assert_eq!(tokenize("\"asdf"), Err(Error::UnterminatedString))
247    }
248
249    #[test]
250    fn tokenize_list() {
251        assert_eq!(
252            tokenize("[a, 1, 2]").unwrap(),
253            vec![
254                Token::OpenBracket,
255                Token::String("a".to_owned()),
256                Token::Comma,
257                Token::I64(1),
258                Token::Comma,
259                Token::I64(2),
260                Token::CloseBracket
261            ]
262        );
263    }
264
265    #[test]
266    fn parse_simple() {
267        assert_eq!(
268            parse("\"hello\"").unwrap(),
269            Value::String("hello".to_owned())
270        );
271        assert_eq!(
272            parse("\"127.0.0.1\"").unwrap(),
273            Value::String("127.0.0.1".to_owned())
274        );
275        assert_eq!(
276            parse("127.0.0.1").unwrap(),
277            Value::String("127.0.0.1".to_owned())
278        );
279
280        assert_eq!(parse("true").unwrap(), Value::Boolean(true));
281        assert_eq!(parse("false").unwrap(), Value::Boolean(false));
282
283        assert_eq!(parse("123").unwrap(), Value::Integer(123));
284        assert_eq!(parse("-123").unwrap(), Value::Integer(-123));
285
286        assert_eq!(
287            parse("123456789012345678901234567890").unwrap(),
288            Value::String("123456789012345678901234567890".to_string())
289        );
290    }
291
292    #[test]
293    fn parse_arrays() {
294        assert_eq!(parse(" [ ] ").unwrap(), Value::Array(Vec::new()));
295        assert_eq!(parse("[]").unwrap(), Value::Array(Vec::new()));
296
297        assert_eq!(
298            parse("[a, 1, 2]").unwrap(),
299            Value::Array(vec![
300                Value::String("a".to_string()),
301                Value::Integer(1),
302                Value::Integer(2),
303            ])
304        );
305
306        assert_eq!(
307            parse("[a, [1, 2], 3]").unwrap(),
308            Value::Array(vec![
309                Value::String("a".to_string()),
310                Value::Array(vec![Value::Integer(1), Value::Integer(2)]),
311                Value::Integer(3),
312            ])
313        );
314    }
315
316    #[test]
317    fn doc_examples() {
318        assert_eq!(
319            parse("[127.0.0.1, 1.2.3.4, 6.7.8.9]").unwrap(),
320            Value::Array(vec![
321                Value::String("127.0.0.1".to_owned()),
322                Value::String("1.2.3.4".to_owned()),
323                Value::String("6.7.8.9".to_owned())
324            ])
325        );
326
327        assert_eq!(
328            parse("\"hello world\"").unwrap(),
329            Value::String("hello world".to_owned())
330        );
331
332        assert_eq!(
333            parse("[\"no\\\"de\\\"-1\", node-2]").unwrap(),
334            Value::Array(vec![
335                Value::String("no\"de\"-1".to_owned()),
336                Value::String("node-2".to_owned()),
337            ])
338        );
339    }
340}