noshell_parser/
lexer.rs

1//! A lexer for generating tokens from a command line.
2
3/// Variant of `Flag` token. Only store the identifier, not the hyphens.
4#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5#[cfg_attr(feature = "defmt", derive(defmt::Format))]
6pub enum Flag<'a> {
7    /// A short flag (e.g. -v).
8    Short(char),
9
10    /// A long flag (e.g. --verbose).
11    Long(&'a str),
12}
13
14impl PartialEq<Flag<'_>> for &str {
15    fn eq(&self, other: &Flag<'_>) -> bool {
16        match other {
17            Flag::Short(id) => {
18                if let Some(x) = self.chars().next() {
19                    *id == x
20                } else {
21                    false
22                }
23            }
24            Flag::Long(id) => *id == *other,
25        }
26    }
27}
28
29/// Defines a `Token` that has been read from the command line.
30#[derive(Clone, Copy, Debug, PartialEq)]
31#[cfg_attr(feature = "defmt", derive(defmt::Format))]
32pub enum Token<'a> {
33    /// Flag.
34    Flag(Flag<'a>),
35
36    /// Value (i.e. everything that is not a short or long flag).
37    Value(&'a str),
38}
39
40impl Token<'_> {
41    /// Evaluate if the token string is a short flag.
42    #[inline(always)]
43    pub fn is_short_flag(input: &str) -> bool {
44        input.starts_with('-') && input.len() > 1 && !Self::is_number(input)
45    }
46
47    /// Evaluate if the token string is a long flag.
48    #[inline(always)]
49    pub fn is_long_flag(input: &str) -> bool {
50        input.starts_with("--") && input.len() > 2
51    }
52
53    /// Evaluate if the token string is a flag.
54    #[inline(always)]
55    pub fn is_flag(input: &str) -> bool {
56        Self::is_short_flag(input) || Self::is_long_flag(input)
57    }
58
59    /// Evaluate if the token string represents a number.
60    pub fn is_number(input: &str) -> bool {
61        let mut position_of_e = None;
62        let mut have_seen_dot = false;
63
64        // Remove the front sign is any.
65        let input = input.trim_start_matches('-');
66
67        for (i, c) in input.as_bytes().iter().enumerate() {
68            match c {
69                // Digits, OK.
70                b'0'..=b'9' => {}
71
72                // Exponential, OK if not the first character.
73                b'e' | b'E' if position_of_e.is_none() && i > 0 => {
74                    position_of_e = Some(i);
75                }
76
77                // Dot is valid if unique, not the first character and before any exponential.
78                b'.' if !have_seen_dot && position_of_e.is_none() && i > 0 => {
79                    have_seen_dot = true;
80                }
81
82                _ => return false,
83            }
84        }
85
86        if let Some(pos) = position_of_e {
87            pos != input.len() - 1
88        } else {
89            true
90        }
91    }
92}
93
94/// Defines a `Lexer` that is responsible for streaming tokens from the command line input.
95///
96/// A lexer acts like an forward iterator.
97#[derive(Clone, Debug)]
98#[cfg_attr(feature = "defmt", derive(defmt::Format))]
99pub struct Tokens<'a> {
100    argv: &'a [&'a str],
101    cursor: usize,
102}
103
104impl<'a> Tokens<'a> {
105    /// Create a new lexer from the command line input.
106    pub fn new(argv: &'a [&'a str]) -> Self {
107        Tokens { argv, cursor: 0 }
108    }
109
110    /// Retreive an iterator to the next value tokens.
111    #[inline(always)]
112    pub fn values(&self) -> Values<'a> {
113        Values::new(&self.argv[self.cursor..])
114    }
115
116    /// Retreive an iterator to the next tokens.
117    #[inline(always)]
118    pub fn tokens(&self) -> Self {
119        Tokens::new(&self.argv[self.cursor..])
120    }
121}
122
123impl<'a> Iterator for Tokens<'a> {
124    type Item = Token<'a>;
125
126    fn next(&mut self) -> Option<Self::Item> {
127        if self.cursor >= self.argv.len() {
128            return None;
129        }
130
131        let arg = self.argv[self.cursor];
132        self.cursor += 1;
133
134        // Long flag.
135        if arg.starts_with("--") && arg.len() >= 3 {
136            let (_, name) = arg.split_at(2);
137            return Some(Token::Flag(Flag::Long(name)));
138        }
139
140        // Numbers.
141        if arg.starts_with('-') && Token::is_number(arg) {
142            return Some(Token::Value(arg));
143        }
144
145        // Short flag.
146        if arg.starts_with('-') && arg.len() == 2 {
147            let (_, name) = arg.split_at(1);
148            return Some(Token::Flag(Flag::Short(
149                name.chars().nth(0).unwrap_or_default(),
150            )));
151        }
152
153        Some(Token::Value(arg))
154    }
155}
156
157/// A trait for creating a [`Tokens`] iterator from other types.
158pub trait IntoTokens<'a> {
159    /// Convert into the iterator.
160    fn into_tokens(self) -> Tokens<'a>;
161}
162
163impl<'a> IntoTokens<'a> for Tokens<'a> {
164    fn into_tokens(self) -> Tokens<'a> {
165        self
166    }
167}
168
169impl<'a> IntoTokens<'a> for &'a [&'a str] {
170    fn into_tokens(self) -> Tokens<'a> {
171        Tokens::new(self)
172    }
173}
174
175/// A iterator over value tokens.
176#[derive(Clone, Debug)]
177#[cfg_attr(feature = "defmt", derive(defmt::Format))]
178pub struct Values<'a> {
179    argv: &'a [&'a str],
180    cursor: usize,
181    done: bool,
182}
183
184impl<'a> Values<'a> {
185    /// Create a value iterator from the given cursor.
186    pub fn new(argv: &'a [&'a str]) -> Self {
187        Values {
188            argv,
189            cursor: 0,
190            done: false,
191        }
192    }
193}
194
195impl<'a> Iterator for Values<'a> {
196    type Item = &'a str;
197
198    fn next(&mut self) -> Option<Self::Item> {
199        if self.done || self.cursor >= self.argv.len() {
200            return None;
201        }
202
203        let arg = self.argv[self.cursor];
204        self.cursor += 1;
205
206        if Token::is_flag(arg) {
207            self.done = true;
208            None
209        } else {
210            Some(arg)
211        }
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use googletest::prelude::*;
218
219    use super::*;
220
221    #[test]
222    fn it_should_match_short_flag() {
223        let mut lexer = Tokens::new(&["-f"]);
224
225        let token = lexer.next();
226        assert_that!(token.is_some(), eq(true));
227        assert_that!(token.unwrap(), eq(Token::Flag(Flag::Short('f'))));
228    }
229
230    #[test]
231    fn it_should_match_value_starting_with_dash() {
232        let mut lexer = Tokens::new(&["-flag"]);
233
234        let token = lexer.next();
235        assert_that!(token.is_some(), eq(true));
236        assert_that!(token.unwrap(), eq(Token::Value("-flag")));
237    }
238
239    #[test]
240    fn it_should_match_long_flag() {
241        let mut lexer = Tokens::new(&["--flag"]);
242
243        let token = lexer.next();
244        assert_that!(token.is_some(), eq(true));
245        assert_that!(token.unwrap(), eq(Token::Flag(Flag::Long("flag"))));
246    }
247
248    #[test]
249    fn it_should_match_numbers() {
250        let lexer = Tokens::new(&["-2", "2", "-2.", "2.", "-2.e1", "2.e1", "-2e1", "2e1"]);
251
252        for token in lexer {
253            assert_that!(token, matches_pattern!(&Token::Value(_)));
254        }
255    }
256}