Skip to main content

noshell_parser/
lexer.rs

1//! A lexer for generating tokens from a command line.
2
3use core::marker::PhantomData;
4use core::ops::Deref;
5
6/// Variant of `Flag` token. Only store the identifier, not the hyphens.
7#[derive(Clone, Copy, Debug, PartialEq, Eq)]
8#[cfg_attr(feature = "defmt", derive(defmt::Format))]
9pub enum Flag<'a> {
10    /// A short flag (e.g. -v).
11    Short(char),
12
13    /// A long flag (e.g. --verbose).
14    Long(&'a str),
15}
16
17/// Defines a `Token` that has been read from the command line.
18#[derive(Clone, Copy, Debug, PartialEq, Eq)]
19#[cfg_attr(feature = "defmt", derive(defmt::Format))]
20pub enum Token<'a> {
21    /// Flag.
22    Flag(Flag<'a>),
23
24    /// Value (i.e. everything that is not a short or long flag).
25    Value(&'a str),
26}
27
28impl<'a> Token<'a> {
29    /// Evaluate if the token string is a short flag.
30    #[inline(always)]
31    pub fn is_short_flag(input: &str) -> bool {
32        input.starts_with('-') && input.len() == 2 && !Self::is_number(input)
33    }
34
35    /// Evaluate if the token string is a short flag, and then return it.
36    pub fn as_short_flag(input: &str) -> Option<Self> {
37        if Self::is_short_flag(input) {
38            let (_, name) = input.split_at(1);
39            let first = name.chars().nth(0).unwrap_or_default();
40            Some(Token::Flag(Flag::Short(first)))
41        } else {
42            None
43        }
44    }
45
46    /// Evaluate if the token string is a long flag.
47    #[inline(always)]
48    pub fn is_long_flag(input: &str) -> bool {
49        input.starts_with("--") && input.len() > 2
50    }
51
52    /// Evaluate if the token string is a long flag, and then return it.
53    pub fn as_long_flag(input: &'a str) -> Option<Self> {
54        if Self::is_long_flag(input) {
55            let (_, name) = input.split_at(2);
56            Some(Token::Flag(Flag::Long(name)))
57        } else {
58            None
59        }
60    }
61
62    /// Evaluate if the token string is a flag.
63    #[inline(always)]
64    pub fn is_flag(input: &str) -> bool {
65        Self::is_short_flag(input) || Self::is_long_flag(input)
66    }
67
68    /// Evaluate if the token string represents a number.
69    pub fn is_number(input: &str) -> bool {
70        let mut position_of_e = None;
71        let mut have_seen_dot = false;
72
73        // Remove the front sign is any.
74        let input = input.trim_start_matches('-');
75
76        for (i, c) in input.as_bytes().iter().enumerate() {
77            match c {
78                // Digits, OK.
79                b'0'..=b'9' => {}
80
81                // Exponential, OK if not the first character.
82                b'e' | b'E' if position_of_e.is_none() && i > 0 => {
83                    position_of_e = Some(i);
84                }
85
86                // Dot is valid if unique, not the first character and before any exponential.
87                b'.' if !have_seen_dot && position_of_e.is_none() && i > 0 => {
88                    have_seen_dot = true;
89                }
90
91                _ => return false,
92            }
93        }
94
95        if let Some(pos) = position_of_e {
96            pos != input.len() - 1
97        } else {
98            true
99        }
100    }
101
102    /// Convert a input string into a token.
103    pub fn tokenize(input: &'a str) -> Self {
104        if let Some(flag) = Self::as_short_flag(input) {
105            return flag;
106        }
107
108        if let Some(flag) = Self::as_long_flag(input) {
109            return flag;
110        }
111
112        Token::Value(input)
113    }
114}
115
116/// Defines a `Lexer` that is responsible for streaming tokens from the command line input.
117///
118/// A lexer acts like an forward iterator.
119#[derive(Debug)]
120#[cfg_attr(feature = "defmt", derive(defmt::Format))]
121pub struct TokenIterator<'a, InnerTy> {
122    inner: InnerTy,
123    _marker: PhantomData<&'a ()>,
124}
125
126impl<'a, InnerTy> TokenIterator<'a, InnerTy> {
127    /// Create a new lexer from the command line input.
128    pub fn new(inner: InnerTy) -> Self {
129        TokenIterator {
130            inner,
131            _marker: PhantomData,
132        }
133    }
134}
135
136impl<'a, InnerTy> Iterator for TokenIterator<'a, InnerTy>
137where
138    InnerTy: Iterator,
139    <InnerTy as Iterator>::Item: Deref<Target = &'a str>,
140{
141    type Item = Token<'a>;
142
143    fn next(&mut self) -> Option<Self::Item> {
144        let item = self.inner.next()?;
145        Some(Token::tokenize(item.deref()))
146    }
147}
148
149impl<'a> From<&'a [&'a str]> for TokenIterator<'a, core::slice::Iter<'a, &'a str>> {
150    fn from(value: &'a [&'a str]) -> Self {
151        TokenIterator::new(value.iter())
152    }
153}
154
155#[cfg(test)]
156mod tests {
157    use speculoos::prelude::*;
158
159    use super::*;
160
161    #[test]
162    fn it_should_match_short_flag() {
163        let mut lexer = TokenIterator::new(["-f"].iter());
164
165        assert_that!(lexer.next())
166            .is_some()
167            .is_equal_to(Token::Flag(Flag::Short('f')));
168    }
169
170    #[test]
171    fn it_should_match_value_starting_with_dash() {
172        let mut lexer = TokenIterator::new(["-flag"].iter());
173
174        assert_that!(lexer.next())
175            .is_some()
176            .is_equal_to(Token::Value("-flag"));
177    }
178
179    #[test]
180    fn it_should_match_long_flag() {
181        let mut lexer = TokenIterator::new(["--flag"].iter());
182
183        assert_that!(lexer.next())
184            .is_some()
185            .is_equal_to(Token::Flag(Flag::Long("flag")));
186    }
187
188    #[test]
189    fn it_should_match_numbers() {
190        let lexer =
191            TokenIterator::new(["-2", "2", "-2.", "2.", "-2.e1", "2.e1", "-2e1", "2e1"].iter());
192
193        for token in lexer {
194            assert_that!(token).matches(|x| matches!(x, &Token::Value(_)));
195        }
196    }
197}