noshell_parser/
lexer.rs

1//! A lexer for generating tokens from a command line.
2
3/// Defines a `Token` that has been read from the command line.
4#[derive(Clone, Copy, Debug, PartialEq)]
5#[cfg_attr(feature = "defmt", derive(defmt::Format))]
6pub enum Token<'a> {
7    /// Short flag (e.g. -f).
8    ShortFlag(char),
9
10    /// Long flag (e.g. --flag).
11    LongFlag(&'a str),
12
13    /// Value (i.e. everything that is not a short or long flag).
14    Value(&'a str),
15}
16
17/// Defines a `Lexer` that is responsible for streaming tokens from the command line input.
18///
19/// A lexer acts like an forward iterator.
20#[derive(Clone, Debug)]
21pub struct Lexer<'a> {
22    args: &'a [&'a str],
23    cursor: usize,
24}
25
26impl<'a> Lexer<'a> {
27    /// Create a new lexer from the command line input.
28    pub fn new(args: &'a [&'a str]) -> Self {
29        Lexer { args, cursor: 0 }
30    }
31
32    /// Retrieve the next token on the command line if any.
33    pub fn next_token(&mut self) -> Option<Token<'a>> {
34        if self.cursor >= self.args.len() {
35            return None;
36        }
37
38        let arg = self.args[self.cursor];
39        self.cursor += 1;
40
41        // Long flag.
42        if arg.starts_with("--") && arg.len() >= 3 {
43            let (_, name) = arg.split_at(2);
44            return Some(Token::LongFlag(name));
45        }
46
47        // Numbers.
48        if arg.starts_with('-') && is_number(arg) {
49            return Some(Token::Value(arg));
50        }
51
52        // Short flag.
53        if arg.starts_with('-') && arg.len() == 2 {
54            let (_, name) = arg.split_at(1);
55            return Some(Token::ShortFlag(name.chars().nth(0).unwrap_or_default()));
56        }
57
58        Some(Token::Value(arg))
59    }
60}
61
62impl<'a> Iterator for Lexer<'a> {
63    type Item = Token<'a>;
64
65    fn next(&mut self) -> Option<Self::Item> {
66        self.next_token()
67    }
68}
69
70fn is_number(input: &str) -> bool {
71    let mut position_of_e = None;
72    let mut have_seen_dot = false;
73
74    // Remove the front sign is any.
75    let input = input.trim_start_matches('-');
76
77    for (i, c) in input.as_bytes().iter().enumerate() {
78        match c {
79            // Digits, OK.
80            b'0'..=b'9' => {}
81
82            // Exponential, OK if not the first character.
83            b'e' | b'E' if position_of_e.is_none() && i > 0 => {
84                position_of_e = Some(i);
85            }
86
87            // Dot is valid if unique, not the first character and before any exponential.
88            b'.' if !have_seen_dot && position_of_e.is_none() && i > 0 => {
89                have_seen_dot = true;
90            }
91
92            _ => return false,
93        }
94    }
95
96    if let Some(pos) = position_of_e {
97        pos != input.len() - 1
98    } else {
99        true
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use googletest::prelude::*;
106
107    use super::*;
108
109    #[test]
110    fn it_should_match_short_flag() {
111        let mut lexer = Lexer::new(&["-f"]);
112
113        let token = lexer.next_token();
114        assert_that!(token.is_some(), eq(true));
115        assert_that!(token.unwrap(), eq(Token::ShortFlag('f')));
116    }
117
118    #[test]
119    fn it_should_match_value_starting_with_dash() {
120        let mut lexer = Lexer::new(&["-flag"]);
121
122        let token = lexer.next_token();
123        assert_that!(token.is_some(), eq(true));
124        assert_that!(token.unwrap(), eq(Token::Value("-flag")));
125    }
126
127    #[test]
128    fn it_should_match_long_flag() {
129        let mut lexer = Lexer::new(&["--flag"]);
130
131        let token = lexer.next_token();
132        assert_that!(token.is_some(), eq(true));
133        assert_that!(token.unwrap(), eq(Token::LongFlag("flag")));
134    }
135
136    #[test]
137    fn it_should_match_numbers() {
138        let lexer = Lexer::new(&["-2", "2", "-2.", "2.", "-2.e1", "2.e1", "-2e1", "2e1"]);
139
140        for token in lexer {
141            assert_that!(token, matches_pattern!(&Token::Value(_)));
142        }
143    }
144}