clapi/
tokenizer.rs

1use crate::context::Context;
2use crate::error::{Error, ErrorKind, Result};
3use crate::token::{Token, END_OF_OPTIONS};
4use std::borrow::Borrow;
5
6/// A converts a collection of `String`s to `Token`s.
7#[derive(Debug)]
8pub struct Tokenizer;
9
10impl Tokenizer {
11    pub fn tokenize<S, I>(&self, context: &Context, args: I) -> Result<Vec<Token>>
12    where
13        S: Borrow<str>,
14        I: IntoIterator<Item = S>,
15    {
16        let mut iterator = args
17            .into_iter()
18            .filter(|s| !s.borrow().is_empty())
19            .peekable();
20
21        // Quick path
22        if iterator.peek().is_none() {
23            return Ok(Vec::new());
24        }
25
26        let mut tokens = Vec::new();
27        let mut current_command = context.root();
28        let mut has_end_of_options = false;
29
30        // Finds the executing command
31        if iterator
32            .peek()
33            .map_or(false, |s| crate::is_help_command(context, s.borrow()))
34        {
35            let s = iterator.next().unwrap().borrow().to_string();
36            tokens.push(Token::Cmd(s))
37        } else {
38            while let Some(arg) = iterator.peek() {
39                if let Some(child) = current_command.find_subcommand(arg.borrow()) {
40                    current_command = child;
41                    tokens.push(Token::Cmd(child.get_name().to_string()));
42                    iterator.next();
43                } else {
44                    // If the current don't take args, have subcommands and is not an option
45                    // the next should be an unknown subcommand
46                    if !current_command.take_args()
47                        && current_command.get_subcommands().len() > 0
48                        && !is_prefixed_option(context, arg.borrow())
49                    {
50                        tokens.push(Token::Cmd(arg.borrow().to_string()));
51                        iterator.next();
52                    }
53
54                    break;
55                }
56            }
57        }
58
59        // Check for options
60        while let Some(arg) = iterator.peek() {
61            let value: &str = arg.borrow();
62
63            // End of the options
64            if value == END_OF_OPTIONS {
65                tokens.push(Token::EOO);
66                has_end_of_options = true;
67                iterator.next();
68                break;
69            }
70
71            if is_prefixed_option(context, value) {
72                let OptionAndArgs {
73                    prefixed_option,
74                    args,
75                    assign_op,
76                } = try_split_option_and_args(context, value)?;
77
78                // Moves to the next value
79                iterator.next();
80
81                // Adds the option
82                tokens.push(Token::Opt(prefixed_option.clone()));
83
84                // Adds the assign operator if any
85                if let Some(c) = assign_op {
86                    tokens.push(Token::AssignOp(c));
87                }
88
89                if let Some(args) = args {
90                    tokens.extend(args.into_iter().map(Token::Arg));
91                } else if let Some(opt) = current_command
92                    .get_options()
93                    .get(context.trim_prefix(&prefixed_option))
94                {
95                    for arg in opt.get_args() {
96                        let max_arg_count = arg.get_values_count().max_or_default();
97                        let mut count = 0;
98                        while count < max_arg_count {
99                            if let Some(value) = iterator.peek() {
100                                let s: &str = value.borrow();
101                                // If the token is prefixed as an option: exit
102                                if is_prefixed_option(context, s) || s == END_OF_OPTIONS {
103                                    break;
104                                } else {
105                                    // Adds the next argument
106                                    tokens.push(Token::Arg(s.to_string()));
107                                    iterator.next();
108                                    count += 1;
109                                }
110                            } else {
111                                break;
112                            }
113                        }
114                    }
115                }
116            } else {
117                break;
118            }
119        }
120
121        if has_end_of_options {
122            // The rest if considered arguments
123            tokens.extend(iterator.map(|s| Token::Arg(s.borrow().to_string())));
124        } else {
125            for value in iterator {
126                let s: String = value.borrow().to_string();
127                if s == END_OF_OPTIONS && !has_end_of_options {
128                    tokens.push(Token::EOO);
129                    has_end_of_options = true;
130                } else {
131                    tokens.push(Token::Arg(s));
132                }
133            }
134        }
135
136        Ok(tokens)
137    }
138}
139
140struct OptionAndArgs {
141    prefixed_option: String,
142    args: Option<Vec<String>>,
143    assign_op: Option<char>,
144}
145
146// Given an option returns the option and its args (if any)
147fn try_split_option_and_args(context: &Context, value: &str) -> Result<OptionAndArgs> {
148    // Check if the value contains an assign operator like: --times=1
149    if let Some(assign_op) = context
150        .assign_operators()
151        .cloned()
152        .find(|d| value.contains(*d))
153    {
154        let option_and_args = value
155            .split(assign_op)
156            .map(|s| s.to_string())
157            .filter(|s| !s.is_empty())
158            .collect::<Vec<String>>();
159
160        // We expect 2 parts: `option` = `arg1`
161        return if option_and_args.len() != 2 {
162            Err(Error::from(ErrorKind::InvalidExpression))
163        } else {
164            // We use the unprefixed option to do checks
165            let unprefixed_option = context.trim_prefix(&option_and_args[0]);
166
167            // --values=" hello world","good day","bye, amigo"
168            let args = split_option_args(&option_and_args[1], context);
169
170            // Error when: `=1,2,3`
171            if unprefixed_option.is_empty() {
172                return Err(Error::new(
173                    ErrorKind::InvalidExpression,
174                    "no option specified",
175                ));
176            }
177
178            // Error when: `--option=`
179            if args.is_empty() {
180                return Err(Error::new(
181                    ErrorKind::InvalidExpression,
182                    format!("no arguments specified: `{}`", value),
183                ));
184            }
185
186            // Error when: `--option=1,,,3`
187            if args.iter().any(|s| s.is_empty()) {
188                return Err(Error::new(ErrorKind::InvalidExpression, value));
189            }
190
191            Ok(OptionAndArgs {
192                prefixed_option: option_and_args[0].clone(),
193                args: Some(args),
194                assign_op: Some(assign_op),
195            })
196        };
197    } else {
198        if context.trim_prefix(value).is_empty() {
199            return Err(Error::new(
200                ErrorKind::InvalidExpression,
201                "no option specified",
202            ));
203        }
204
205        Ok(OptionAndArgs {
206            prefixed_option: value.to_owned(),
207            args: None,
208            assign_op: None,
209        })
210    }
211}
212
213fn split_option_args(args: &str, context: &Context) -> Vec<String> {
214    const QUOTE_ESCAPE: char = '\\';
215
216    let mut result = Vec::new();
217    let delimiter = context.delimiter();
218    let mut chars = args.chars().peekable();
219    let mut temp = String::new();
220    let mut in_quote = false;
221
222    while let Some(c) = chars.next() {
223        match c {
224            '"' => {
225                if in_quote {
226                    result.push(temp.drain(..).collect());
227                }
228
229                in_quote = !in_quote;
230            }
231            QUOTE_ESCAPE if chars.peek() == Some(&'"') => {
232                temp.push(chars.next().unwrap());
233            }
234            _ if c == delimiter => {
235                if in_quote {
236                    temp.push(c);
237                } else {
238                    result.push(temp.drain(..).collect());
239                }
240            }
241            _ => {
242                temp.push(c);
243            }
244        }
245    }
246
247    // Add the last value
248    result.push(temp);
249
250    result
251}
252
253// Returns `true` if the specified value starts with an option prefix.
254fn is_prefixed_option(context: &Context, value: &str) -> bool {
255    context
256        .name_prefixes()
257        .chain(context.alias_prefixes())
258        .any(|prefix| value.starts_with(prefix))
259}
260
261#[cfg(test)]
262mod tests {
263    use crate::{split_into_args, ArgSplitter, Argument, Command, CommandOption, ContextBuilder};
264
265    use super::*;
266
267    fn tokenize(command: Command, value: &str) -> crate::Result<Vec<Token>> {
268        let context = Context::new(command);
269        Tokenizer.tokenize(&context, split_into_args(value))
270    }
271
272    fn tokenize_with_delimiter(
273        command: Command,
274        value: &str,
275        delimiter: char,
276    ) -> crate::Result<Vec<Token>> {
277        let context = ContextBuilder::new(command).delimiter(delimiter).build();
278        let args = ArgSplitter::new().delimiter(delimiter).split(value);
279        Tokenizer.tokenize(&context, args)
280    }
281
282    #[test]
283    fn tokenize_test() {
284        let command = Command::new("MyApp")
285            .arg(Argument::one_or_more("args"))
286            .option(CommandOption::new("enable").alias("e"))
287            .option(
288                CommandOption::new("range").arg(Argument::with_name("range").values_count(1..=2)),
289            )
290            .subcommand(Command::new("version"));
291
292        assert_eq!(tokenize(command.clone(), "").unwrap(), Vec::new());
293
294        let tokens1 = tokenize(command.clone(), "--range 1 -e").unwrap();
295        assert_eq!(tokens1.len(), 3);
296        assert_eq!(tokens1[0], Token::Opt("--range".to_owned()));
297        assert_eq!(tokens1[1], Token::Arg("1".to_owned()));
298        assert_eq!(tokens1[2], Token::Opt("-e".to_owned()));
299
300        let tokens2 = tokenize(command.clone(), "version").unwrap();
301        assert_eq!(tokens2.len(), 1);
302        assert_eq!(tokens2[0], Token::Cmd("version".to_owned()));
303
304        let tokens3 = tokenize(command.clone(), "--range 0 10 -- a b c").unwrap();
305        assert_eq!(tokens3.len(), 7);
306        assert_eq!(tokens3[0], Token::Opt("--range".to_owned()));
307        assert_eq!(tokens3[1], Token::Arg("0".to_owned()));
308        assert_eq!(tokens3[2], Token::Arg("10".to_owned()));
309        assert_eq!(tokens3[3], Token::EOO);
310        assert_eq!(tokens3[4], Token::Arg("a".to_owned()));
311        assert_eq!(tokens3[5], Token::Arg("b".to_owned()));
312        assert_eq!(tokens3[6], Token::Arg("c".to_owned()));
313    }
314
315    #[test]
316    fn tokenize_test2() {
317        let command = Command::new("MyApp")
318            .arg(Argument::zero_or_one("values"))
319            .option(
320                CommandOption::new("times")
321                    .alias("t")
322                    .arg(Argument::with_name("times")),
323            )
324            .option(
325                CommandOption::new("numbers")
326                    .alias("n")
327                    .arg(Argument::zero_or_one("N")),
328            );
329
330        let tokens1 = tokenize(command.clone(), "-t=1 --numbers=2,4,6 --").unwrap();
331        assert_eq!(tokens1.len(), 9);
332        assert_eq!(tokens1[0], Token::Opt("-t".to_owned()));
333        assert_eq!(tokens1[1], Token::AssignOp('='));
334        assert_eq!(tokens1[2], Token::Arg("1".to_owned()));
335        assert_eq!(tokens1[3], Token::Opt("--numbers".to_owned()));
336        assert_eq!(tokens1[4], Token::AssignOp('='));
337        assert_eq!(tokens1[5], Token::Arg("2".to_owned()));
338        assert_eq!(tokens1[6], Token::Arg("4".to_owned()));
339        assert_eq!(tokens1[7], Token::Arg("6".to_owned()));
340        assert_eq!(tokens1[8], Token::EOO);
341    }
342
343    #[test]
344    fn invalid_expression_test() {
345        let command = Command::new("MyApp")
346            .arg(Argument::zero_or_one("values"))
347            .option(
348                CommandOption::new("times")
349                    .alias("t")
350                    .arg(Argument::with_name("times")),
351            )
352            .option(
353                CommandOption::new("numbers")
354                    .alias("n")
355                    .arg(Argument::zero_or_one("N")),
356            );
357
358        // Err
359        assert!(tokenize(command.clone(), "-").is_err());
360        assert!(tokenize(command.clone(), "--numbers=").is_err());
361        assert!(tokenize(command.clone(), "--numbers=,").is_err());
362        assert!(tokenize(command.clone(), "--numbers=1,,,2").is_err());
363        assert!(tokenize(command.clone(), "--numbers=1,2,3,").is_err());
364        assert!(tokenize(command.clone(), "--numbers=,1,2,3").is_err());
365    }
366
367    #[test]
368    fn split_with_spaces_test() {
369        let command = Command::new("MyApp")
370            .option(CommandOption::new("words").arg(Argument::one_or_more("words")));
371
372        let tokens = tokenize_with_delimiter(
373            command.clone(),
374            "--words=\"hello world\"|\"good night\"|\"right, bye\"",
375            '|',
376        )
377        .unwrap();
378
379        assert_eq!(tokens.len(), 5);
380        assert_eq!(tokens[0], Token::Opt("--words".to_owned()));
381        assert_eq!(tokens[1], Token::AssignOp('='));
382        assert_eq!(tokens[2], Token::Arg("hello world".to_owned()));
383        assert_eq!(tokens[3], Token::Arg("good night".to_owned()));
384        assert_eq!(tokens[4], Token::Arg("right, bye".to_owned()));
385    }
386}