evalexpr/token/
mod.rs

1use std::str::FromStr;
2
3use crate::{
4    error::{EvalexprError, EvalexprResult},
5    value::numeric_types::{
6        default_numeric_types::DefaultNumericTypes, EvalexprInt, EvalexprNumericTypes,
7    },
8};
9
10mod display;
11
12#[derive(Clone, PartialEq, Debug)]
13pub enum Token<NumericTypes: EvalexprNumericTypes = DefaultNumericTypes> {
14    // Arithmetic
15    Plus,
16    Minus,
17    Star,
18    Slash,
19    Percent,
20    Hat,
21
22    // Logic
23    Eq,
24    Neq,
25    Gt,
26    Lt,
27    Geq,
28    Leq,
29    And,
30    Or,
31    Not,
32
33    // Precedence
34    LBrace,
35    RBrace,
36
37    // Assignment
38    Assign,
39    PlusAssign,
40    MinusAssign,
41    StarAssign,
42    SlashAssign,
43    PercentAssign,
44    HatAssign,
45    AndAssign,
46    OrAssign,
47
48    // Special
49    Comma,
50    Semicolon,
51
52    // Values, Variables and Functions
53    Identifier(String),
54    Float(NumericTypes::Float),
55    Int(NumericTypes::Int),
56    Boolean(bool),
57    String(String),
58}
59
60/// A partial token is an input character whose meaning depends on the characters around it.
61#[derive(Clone, Debug, PartialEq)]
62pub enum PartialToken<NumericTypes: EvalexprNumericTypes = DefaultNumericTypes> {
63    /// A partial token that unambiguously maps to a single token.
64    Token(Token<NumericTypes>),
65    /// A partial token that is a literal.
66    Literal(String),
67    /// A plus character '+'.
68    Plus,
69    /// A minus character '-'.
70    Minus,
71    /// A star character '*'.
72    Star,
73    /// A slash character '/'.
74    Slash,
75    /// A percent character '%'.
76    Percent,
77    /// A hat character '^'.
78    Hat,
79    /// A whitespace character, e.g. ' '.
80    Whitespace,
81    /// An equal-to character '='.
82    Eq,
83    /// An exclamation mark character '!'.
84    ExclamationMark,
85    /// A greater-than character '>'.
86    Gt,
87    /// A lower-than character '<'.
88    Lt,
89    /// An ampersand character '&'.
90    Ampersand,
91    /// A vertical bar character '|'.
92    VerticalBar,
93}
94
95// Make this a const fn as soon as is_whitespace and to_string get stable (issue #57563)
96fn char_to_partial_token<NumericTypes: EvalexprNumericTypes>(
97    c: char,
98) -> PartialToken<NumericTypes> {
99    match c {
100        '+' => PartialToken::Plus,
101        '-' => PartialToken::Minus,
102        '*' => PartialToken::Star,
103        '/' => PartialToken::Slash,
104        '%' => PartialToken::Percent,
105        '^' => PartialToken::Hat,
106
107        '(' => PartialToken::Token(Token::LBrace),
108        ')' => PartialToken::Token(Token::RBrace),
109
110        ',' => PartialToken::Token(Token::Comma),
111        ';' => PartialToken::Token(Token::Semicolon),
112
113        '=' => PartialToken::Eq,
114        '!' => PartialToken::ExclamationMark,
115        '>' => PartialToken::Gt,
116        '<' => PartialToken::Lt,
117        '&' => PartialToken::Ampersand,
118        '|' => PartialToken::VerticalBar,
119
120        c => {
121            if c.is_whitespace() {
122                PartialToken::Whitespace
123            } else {
124                PartialToken::Literal(c.to_string())
125            }
126        },
127    }
128}
129
130impl<NumericTypes: EvalexprNumericTypes> Token<NumericTypes> {
131    #[cfg(not(tarpaulin_include))]
132    pub(crate) const fn is_leftsided_value(&self) -> bool {
133        match self {
134            Token::Plus => false,
135            Token::Minus => false,
136            Token::Star => false,
137            Token::Slash => false,
138            Token::Percent => false,
139            Token::Hat => false,
140
141            Token::Eq => false,
142            Token::Neq => false,
143            Token::Gt => false,
144            Token::Lt => false,
145            Token::Geq => false,
146            Token::Leq => false,
147            Token::And => false,
148            Token::Or => false,
149            Token::Not => false,
150
151            Token::LBrace => true,
152            Token::RBrace => false,
153
154            Token::Comma => false,
155            Token::Semicolon => false,
156
157            Token::Assign => false,
158            Token::PlusAssign => false,
159            Token::MinusAssign => false,
160            Token::StarAssign => false,
161            Token::SlashAssign => false,
162            Token::PercentAssign => false,
163            Token::HatAssign => false,
164            Token::AndAssign => false,
165            Token::OrAssign => false,
166
167            Token::Identifier(_) => true,
168            Token::Float(_) => true,
169            Token::Int(_) => true,
170            Token::Boolean(_) => true,
171            Token::String(_) => true,
172        }
173    }
174
175    #[cfg(not(tarpaulin_include))]
176    pub(crate) const fn is_rightsided_value(&self) -> bool {
177        match self {
178            Token::Plus => false,
179            Token::Minus => false,
180            Token::Star => false,
181            Token::Slash => false,
182            Token::Percent => false,
183            Token::Hat => false,
184
185            Token::Eq => false,
186            Token::Neq => false,
187            Token::Gt => false,
188            Token::Lt => false,
189            Token::Geq => false,
190            Token::Leq => false,
191            Token::And => false,
192            Token::Or => false,
193            Token::Not => false,
194
195            Token::LBrace => false,
196            Token::RBrace => true,
197
198            Token::Comma => false,
199            Token::Semicolon => false,
200
201            Token::Assign => false,
202            Token::PlusAssign => false,
203            Token::MinusAssign => false,
204            Token::StarAssign => false,
205            Token::SlashAssign => false,
206            Token::PercentAssign => false,
207            Token::HatAssign => false,
208            Token::AndAssign => false,
209            Token::OrAssign => false,
210
211            Token::Identifier(_) => true,
212            Token::Float(_) => true,
213            Token::Int(_) => true,
214            Token::Boolean(_) => true,
215            Token::String(_) => true,
216        }
217    }
218
219    #[cfg(not(tarpaulin_include))]
220    pub(crate) fn is_assignment(&self) -> bool {
221        use Token::*;
222        matches!(
223            self,
224            Assign
225                | PlusAssign
226                | MinusAssign
227                | StarAssign
228                | SlashAssign
229                | PercentAssign
230                | HatAssign
231                | AndAssign
232                | OrAssign
233        )
234    }
235}
236
237/// Parses an escape sequence within a string literal.
238fn parse_escape_sequence<Iter: Iterator<Item = char>, NumericTypes: EvalexprNumericTypes>(
239    iter: &mut Iter,
240) -> EvalexprResult<char, NumericTypes> {
241    match iter.next() {
242        Some('"') => Ok('"'),
243        Some('\\') => Ok('\\'),
244        Some(c) => Err(EvalexprError::IllegalEscapeSequence(format!("\\{}", c))),
245        None => Err(EvalexprError::IllegalEscapeSequence("\\".to_string())),
246    }
247}
248
249/// Parses a string value from the given character iterator.
250///
251/// The first character from the iterator is interpreted as first character of the string.
252/// The string is terminated by a double quote `"`.
253/// Occurrences of `"` within the string can be escaped with `\`.
254/// The backslash needs to be escaped with another backslash `\`.
255fn parse_string_literal<Iter: Iterator<Item = char>, NumericTypes: EvalexprNumericTypes>(
256    mut iter: &mut Iter,
257) -> EvalexprResult<PartialToken<NumericTypes>, NumericTypes> {
258    let mut result = String::new();
259
260    while let Some(c) = iter.next() {
261        match c {
262            '"' => return Ok(PartialToken::Token(Token::String(result))),
263            '\\' => result.push(parse_escape_sequence(&mut iter)?),
264            c => result.push(c),
265        }
266    }
267
268    Err(EvalexprError::UnmatchedDoubleQuote)
269}
270
271fn try_skip_comment<NumericTypes: EvalexprNumericTypes>(
272    iter: &mut std::iter::Peekable<std::str::Chars<'_>>,
273) -> EvalexprResult<bool, NumericTypes> {
274    let mut matched = false;
275    if let Some(lookahead) = iter.peek() {
276        if *lookahead == '/' {
277            matched = true;
278            iter.next();
279            // line comment
280            for c in iter {
281                if c == '\n' {
282                    break;
283                }
284            }
285        } else if *lookahead == '*' {
286            // inline commment
287            iter.next();
288            while let Some(c) = iter.next() {
289                if let Some(next) = iter.peek() {
290                    if c == '*' && *next == '/' {
291                        matched = true;
292                        iter.next();
293                        break;
294                    }
295                }
296            }
297            if !matched {
298                return Err(EvalexprError::CustomMessage(
299                    "unmatched inline comment".into(),
300                ));
301            }
302        }
303    }
304
305    Ok(matched)
306}
307
308/// Converts a string to a vector of partial tokens.
309fn str_to_partial_tokens<NumericTypes: EvalexprNumericTypes>(
310    string: &str,
311) -> EvalexprResult<Vec<PartialToken<NumericTypes>>, NumericTypes> {
312    let mut result = Vec::new();
313    let mut iter = string.chars().peekable();
314
315    while let Some(c) = iter.next() {
316        if c == '"' {
317            result.push(parse_string_literal(&mut iter)?);
318        } else {
319            let partial_token = char_to_partial_token(c);
320
321            if let PartialToken::Slash = partial_token {
322                if try_skip_comment(&mut iter)? {
323                    continue;
324                }
325            }
326
327            let if_let_successful =
328                if let (Some(PartialToken::Literal(last)), PartialToken::Literal(literal)) =
329                    (result.last_mut(), &partial_token)
330                {
331                    last.push_str(literal);
332                    true
333                } else {
334                    false
335                };
336
337            if !if_let_successful {
338                result.push(partial_token);
339            }
340        }
341    }
342    Ok(result)
343}
344
345/// Resolves all partial tokens by converting them to complex tokens.
346fn partial_tokens_to_tokens<NumericTypes: EvalexprNumericTypes>(
347    mut tokens: &[PartialToken<NumericTypes>],
348) -> EvalexprResult<Vec<Token<NumericTypes>>, NumericTypes> {
349    let mut result = Vec::new();
350    while !tokens.is_empty() {
351        let first = tokens[0].clone();
352        let second = tokens.get(1).cloned();
353        let third = tokens.get(2).cloned();
354        let mut cutoff = 2;
355
356        result.extend(match first {
357            PartialToken::Token(token) => {
358                cutoff = 1;
359                Some(token)
360            },
361            PartialToken::Plus => match second {
362                Some(PartialToken::Eq) => Some(Token::PlusAssign),
363                _ => {
364                    cutoff = 1;
365                    Some(Token::Plus)
366                },
367            },
368            PartialToken::Minus => match second {
369                Some(PartialToken::Eq) => Some(Token::MinusAssign),
370                _ => {
371                    cutoff = 1;
372                    Some(Token::Minus)
373                },
374            },
375            PartialToken::Star => match second {
376                Some(PartialToken::Eq) => Some(Token::StarAssign),
377                _ => {
378                    cutoff = 1;
379                    Some(Token::Star)
380                },
381            },
382            PartialToken::Slash => match second {
383                Some(PartialToken::Eq) => Some(Token::SlashAssign),
384                _ => {
385                    cutoff = 1;
386                    Some(Token::Slash)
387                },
388            },
389            PartialToken::Percent => match second {
390                Some(PartialToken::Eq) => Some(Token::PercentAssign),
391                _ => {
392                    cutoff = 1;
393                    Some(Token::Percent)
394                },
395            },
396            PartialToken::Hat => match second {
397                Some(PartialToken::Eq) => Some(Token::HatAssign),
398                _ => {
399                    cutoff = 1;
400                    Some(Token::Hat)
401                },
402            },
403            PartialToken::Literal(literal) => {
404                cutoff = 1;
405                if let Ok(number) = parse_integer::<NumericTypes>(&literal) {
406                    Some(Token::Int(number))
407                } else if let Ok(number) = literal.parse::<NumericTypes::Float>() {
408                    Some(Token::Float(number))
409                } else if let Ok(boolean) = literal.parse::<bool>() {
410                    Some(Token::Boolean(boolean))
411                } else {
412                    // If there are two tokens following this one, check if the next one is
413                    // a plus or a minus. If so, then attempt to parse all three tokens as a
414                    // scientific notation number of the form `<coefficient>e{+,-}<exponent>`,
415                    // for example [Literal("10e"), Minus, Literal("3")] => "1e-3".parse().
416                    match (second, third) {
417                        (Some(second), Some(third))
418                            if second == PartialToken::Minus || second == PartialToken::Plus =>
419                        {
420                            if let Ok(number) = format!("{}{}{}", literal, second, third)
421                                .parse::<NumericTypes::Float>()
422                            {
423                                cutoff = 3;
424                                Some(Token::Float(number))
425                            } else {
426                                Some(Token::Identifier(literal.to_string()))
427                            }
428                        },
429                        _ => Some(Token::Identifier(literal.to_string())),
430                    }
431                }
432            },
433            PartialToken::Whitespace => {
434                cutoff = 1;
435                None
436            },
437            PartialToken::Eq => match second {
438                Some(PartialToken::Eq) => Some(Token::Eq),
439                _ => {
440                    cutoff = 1;
441                    Some(Token::Assign)
442                },
443            },
444            PartialToken::ExclamationMark => match second {
445                Some(PartialToken::Eq) => Some(Token::Neq),
446                _ => {
447                    cutoff = 1;
448                    Some(Token::Not)
449                },
450            },
451            PartialToken::Gt => match second {
452                Some(PartialToken::Eq) => Some(Token::Geq),
453                _ => {
454                    cutoff = 1;
455                    Some(Token::Gt)
456                },
457            },
458            PartialToken::Lt => match second {
459                Some(PartialToken::Eq) => Some(Token::Leq),
460                _ => {
461                    cutoff = 1;
462                    Some(Token::Lt)
463                },
464            },
465            PartialToken::Ampersand => match second {
466                Some(PartialToken::Ampersand) => match third {
467                    Some(PartialToken::Eq) => {
468                        cutoff = 3;
469                        Some(Token::AndAssign)
470                    },
471                    _ => Some(Token::And),
472                },
473                _ => return Err(EvalexprError::unmatched_partial_token(first, second)),
474            },
475            PartialToken::VerticalBar => match second {
476                Some(PartialToken::VerticalBar) => match third {
477                    Some(PartialToken::Eq) => {
478                        cutoff = 3;
479                        Some(Token::OrAssign)
480                    },
481                    _ => Some(Token::Or),
482                },
483                _ => return Err(EvalexprError::unmatched_partial_token(first, second)),
484            },
485        });
486
487        tokens = &tokens[cutoff..];
488    }
489    Ok(result)
490}
491
492pub(crate) fn tokenize<NumericTypes: EvalexprNumericTypes>(
493    string: &str,
494) -> EvalexprResult<Vec<Token<NumericTypes>>, NumericTypes> {
495    partial_tokens_to_tokens(&str_to_partial_tokens(string)?)
496}
497
498/// Can parse decimal (base 10), hexadecimal (base 16), binary (base 2), or octal (base 8).
499fn parse_integer<NumericTypes: EvalexprNumericTypes>(
500    literal: &str,
501) -> Result<NumericTypes::Int, ()> {
502    if let Some(literal) = literal.strip_prefix("0x") {
503        NumericTypes::Int::from_hex_str(literal)
504    } else if let Some(literal) = literal.strip_prefix("0b") {
505        NumericTypes::Int::from_binary_str(literal)
506    } else if let Some(literal) = literal.strip_prefix("0o") {
507        NumericTypes::Int::from_octal_str(literal)
508    } else {
509        NumericTypes::Int::from_str(literal).map_err(|_| ())
510    }
511}
512
513#[cfg(test)]
514mod tests {
515    use crate::{
516        token::{char_to_partial_token, tokenize, Token},
517        value::numeric_types::default_numeric_types::DefaultNumericTypes,
518    };
519    use std::fmt::Write;
520
521    #[test]
522    fn test_partial_token_display() {
523        let chars = vec![
524            '+', '-', '*', '/', '%', '^', '(', ')', ',', ';', '=', '!', '>', '<', '&', '|', ' ',
525        ];
526
527        for char in chars {
528            assert_eq!(
529                format!("{}", char),
530                format!("{}", char_to_partial_token::<DefaultNumericTypes>(char))
531            );
532        }
533    }
534
535    #[test]
536    fn test_token_display() {
537        let token_string =
538            "+ - * / % ^ == != > < >= <= && || ! ( ) = += -= *= /= %= ^= &&= ||= , ; ";
539        let tokens = tokenize::<DefaultNumericTypes>(token_string).unwrap();
540        let mut result_string = String::new();
541
542        for token in tokens {
543            write!(result_string, "{} ", token).unwrap();
544        }
545
546        assert_eq!(token_string, result_string);
547    }
548
549    #[test]
550    fn test_skip_comment() {
551        let token_string =
552            "+ - * / % ^ == != > < >= <= && || ! ( ) = += -= *= /= %= ^= &&= ||= , ; ";
553
554        let token_string_with_comments = r"+ - * / % ^ == != >
555            < >= <= && /* inline comment */ || ! ( )
556            = += -= *= /= %= ^=
557            // line comment
558            &&= ||= , ;
559            ";
560
561        let tokens = tokenize::<DefaultNumericTypes>(token_string_with_comments).unwrap();
562        let mut result_string = String::new();
563
564        for token in tokens {
565            write!(result_string, "{} ", token).unwrap();
566        }
567
568        assert_eq!(token_string, result_string);
569    }
570
571    #[test]
572    fn assignment_lhs_is_identifier() {
573        let tokens = tokenize::<DefaultNumericTypes>("a = 1").unwrap();
574        assert_eq!(
575            tokens.as_slice(),
576            [
577                Token::Identifier("a".to_string()),
578                Token::Assign,
579                Token::Int(1)
580            ]
581        );
582    }
583}