kal/lex/
lexer.rs

1//! Generate command framgents from String command.
2
3use std::{fmt, iter::Peekable, str::CharIndices};
4
5use super::{CommandToken, RawStringPattern};
6
7/// An error that can appear while lexing a command.
8#[derive(Debug, PartialEq, Eq)]
9pub enum CommandLexError<'a> {
10    /// There is a unclosed quote.
11    UnclosedQuote(usize, &'a str),
12
13    /// There is a whitespace before/after equal sign.
14    NamedProhibitsWhitespace(usize, &'a str),
15
16    /// There is a nam assignment syntax after equal sign
17    NamedCannotContainNamed(usize, &'a str),
18}
19
20impl fmt::Display for CommandLexError<'_> {
21    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22        match self {
23            CommandLexError::UnclosedQuote(pos, src_part) => {
24                write!(
25                    f,
26                    "Unclosed quote at position {} in command: {}",
27                    pos, src_part
28                )
29            }
30            CommandLexError::NamedProhibitsWhitespace(pos, src_part) => write!(
31                f,
32                "Named argument prohibits whitespace at position {} in command: {}",
33                pos, src_part
34            ),
35            CommandLexError::NamedCannotContainNamed(pos, src_part) => write!(
36                f,
37                "Named argument cannot contain named argument at position {} in command: {}",
38                pos, src_part
39            ),
40        }
41    }
42}
43
44impl std::error::Error for CommandLexError<'_> {}
45
46/// A lexer transforming command string into sequence of [`CommandToken`]/
47pub struct CommandLexer<'a> {
48    source: &'a str,
49    iter: Peekable<CharIndices<'a>>,
50
51    in_named: bool,
52    failed: bool,
53}
54
55impl<'a> CommandLexer<'a> {
56    /// Create a new `CommandLexer` instance from a command string.
57    pub fn new(source: &'a str) -> Self {
58        CommandLexer {
59            source,
60            iter: source.char_indices().peekable(),
61            in_named: false,
62            failed: false,
63        }
64    }
65}
66
67impl<'a> CommandLexer<'a> {
68    fn next_whitespace(&mut self) -> Option<Result<CommandToken<'a>, CommandLexError<'a>>> {
69        let (first, _) = self.iter.peek()?;
70        let first = *first;
71
72        let last = loop {
73            match self.iter.peek() {
74                Some((_, ch)) if ch.is_whitespace() => {
75                    self.iter.next();
76                }
77                Some((i, _)) => break *i,
78                None => break self.source.len(),
79            }
80        };
81        Some(Ok(CommandToken::Whitespace(&self.source[first..last])))
82    }
83    fn next_quote(&mut self) -> Option<Result<CommandToken<'a>, CommandLexError<'a>>> {
84        let (first, ch) = self.iter.next()?;
85        let mut src = String::new();
86
87        let src_first = match self.iter.next() {
88            None => {
89                return Some(Err(CommandLexError::UnclosedQuote(
90                    first,
91                    &self.source[first..],
92                )))
93            }
94            Some((last, c)) if c == ch => {
95                return Some(Ok(CommandToken::QuotedString(
96                    &self.source[first..=last],
97                    "".to_string(),
98                    &self.source[last..=self
99                        .iter
100                        .peek()
101                        .map(|(next_i, _)| *next_i)
102                        .unwrap_or(self.source.len())],
103                )))
104            }
105            Some((src_first, ch)) => {
106                src.push(ch);
107                src_first
108            }
109        };
110
111        let mut previous_backslash = false;
112        let src_last = loop {
113            match self.iter.next() {
114                Some((src_last, c)) if c == ch => {
115                    if previous_backslash {
116                        src.push(c);
117                        previous_backslash = false;
118                    } else {
119                        break src_last;
120                    }
121                }
122                Some((_, '\\')) => {
123                    if previous_backslash {
124                        src.push('\\');
125                        previous_backslash = false;
126                    } else {
127                        previous_backslash = true;
128                    }
129                }
130                Some((_, ch)) => {
131                    src.push(ch);
132                    if previous_backslash {
133                        previous_backslash = false;
134                    }
135                }
136                None => {
137                    return Some(Err(CommandLexError::UnclosedQuote(
138                        first,
139                        &self.source[first..],
140                    )))
141                }
142            }
143        };
144
145        Some(Ok(CommandToken::QuotedString(
146            &self.source[first..src_first],
147            src,
148            &self.source[src_last
149                ..self
150                    .iter
151                    .peek()
152                    .map(|(next_i, _)| *next_i)
153                    .unwrap_or(self.source.len())],
154        )))
155    }
156    fn next_raw_string_or_named(
157        &mut self,
158    ) -> Option<Result<CommandToken<'a>, CommandLexError<'a>>> {
159        let (first, _) = self.iter.peek()?;
160        let first = *first;
161
162        let mut is_numeric = true;
163        let mut met_float_dot = false;
164        let last = loop {
165            match self.iter.peek() {
166                Some((i, ch)) if ch.is_whitespace() => break *i,
167                Some((i, ch)) => {
168                    let i = *i;
169                    match ch {
170                        '=' => {
171                            let name = &self.source[first..i];
172                            if self.in_named {
173                                return Some(Ok(CommandToken::Named(
174                                    name,
175                                    Box::new(CommandToken::Whitespace("")),
176                                )));
177                            }
178                            if name.is_empty() {
179                                return Some(Err(CommandLexError::NamedProhibitsWhitespace(
180                                    first,
181                                    &self.source[first..],
182                                )));
183                            }
184                            self.iter.next();
185                            self.in_named = true;
186                            let token = match self.next() {
187                                Some(Ok(CommandToken::Whitespace(_))) | None => {
188                                    return Some(Err(CommandLexError::NamedProhibitsWhitespace(
189                                        first,
190                                        &self.source[first..],
191                                    )))
192                                }
193                                Some(Ok(CommandToken::Named(..))) => {
194                                    return Some(Err(CommandLexError::NamedCannotContainNamed(
195                                        first,
196                                        &self.source[first..],
197                                    )))
198                                }
199                                Some(Ok(expr)) => expr,
200                                otherwise => return otherwise,
201                            };
202                            self.in_named = false;
203                            return Some(Ok(CommandToken::Named(name, Box::new(token))));
204                        }
205                        '0'..='9' => {
206                            self.iter.next();
207                        }
208                        '.' => {
209                            if met_float_dot {
210                                is_numeric = false;
211                            } else {
212                                met_float_dot = true;
213                            }
214                            self.iter.next();
215                        }
216                        '-' | '+' => {
217                            if first != i {
218                                is_numeric = false;
219                                met_float_dot = false;
220                            }
221                            self.iter.next();
222                        }
223                        _ => {
224                            is_numeric = false;
225                            met_float_dot = false;
226                            self.iter.next();
227                        }
228                    }
229                }
230                None => break self.source.len(),
231            }
232        };
233
234        let pattern = match (is_numeric, met_float_dot) {
235            (false, _) => RawStringPattern::Unrecognized,
236            (true, true) => RawStringPattern::Float,
237            (true, false) => RawStringPattern::Integer,
238        };
239
240        Some(Ok(CommandToken::RawString(
241            &self.source[first..last],
242            pattern,
243        )))
244    }
245}
246
247impl<'a> Iterator for CommandLexer<'a> {
248    type Item = Result<CommandToken<'a>, CommandLexError<'a>>;
249
250    fn next(&mut self) -> Option<Self::Item> {
251        if self.failed {
252            return None;
253        }
254
255        let (_, ch) = self.iter.peek()?;
256
257        let res = match ch {
258            ch if ch.is_whitespace() => self.next_whitespace(),
259            '"' | '\'' => self.next_quote(),
260            _ => self.next_raw_string_or_named(),
261        };
262
263        if res.as_ref().map(|res| res.is_err()).unwrap_or(false) {
264            self.failed = true;
265        }
266
267        res
268    }
269}