simple_interpreter/
lexer.rs

1use crate::token::*;
2use std::collections::HashMap;
3
4#[derive(Clone)]
5pub struct Lexer {
6    literal_token_map: HashMap<&'static str, Token>
7}
8
9impl Lexer {
10    pub fn new(literal_token_map: HashMap<&'static str, Token>) -> Self {
11        Lexer {
12            literal_token_map
13        }
14    }
15
16    pub fn tokenize(&self, expr: &str) -> Result<Vec<Token>, &str> {
17        let splitted = Self::split(expr);
18        let mut tokens: Vec<Token> = Vec::with_capacity(splitted.len());
19        for token in splitted.iter() {
20            match self.try_token_from(token.as_str()) {
21                Ok(t) => tokens.push(t),
22                Err(str) => return Err(str)
23            }
24        }
25        Ok(tokens)
26    }
27    
28    fn split(expr: &str) -> Vec<String> {
29        let mut splitted: Vec<String> = Vec::new();
30        let mut reading_num = false;
31        for ch in expr.chars() {
32            match ch {
33                ' ' => {
34                    reading_num = false;
35                }
36                '0'..='9' | 'a'..='f' => {
37                    if !reading_num {
38                        splitted.push(ch.to_string());
39                        reading_num = true;
40                    } else {
41                        splitted.last_mut().unwrap().push(ch);
42                    }
43                }
44                _ => {
45                    reading_num = false;
46                    splitted.push(ch.to_string());
47                }
48            }
49        }
50        splitted
51    }
52
53    fn try_token_from(&self, token_str: &str) -> Result<Token, &str> {
54        match self.literal_token_map.get(token_str) {
55            Some(token) => Ok(token.clone()),
56            None => {
57                match u8::from_str_radix(token_str, 16) {
58                    Ok(val) => Ok(Token::ConstVal(val)),
59                    Err(_) => Err("Incorrect token found!")
60                }
61            }
62        }
63    }
64}
65
66
67#[cfg(test)]
68mod tests {
69    use crate::lexer::*;
70    use crate::token::Token::*;
71    use lazy_static::*;
72
73    lazy_static! {
74        static ref LITERAL_TOKEN_MAP: HashMap<&'static str, Token> = HashMap::from([
75            ("&", AndOp),
76            ("|", OrOp),
77            ("^", XorOp),
78            ("~", NotOp),
79            ("(", LeftBrace),
80            (")", RightBrace) 
81        ]);
82    }
83
84    #[test]
85    fn tokenize_from_strings_vec_works() {
86        let lexer = Lexer::new(LITERAL_TOKEN_MAP.clone());
87
88        assert_eq!(
89            lexer.tokenize("ab &(c5 ^10 ) ").unwrap(),
90            vec![ConstVal(0xAB), AndOp, LeftBrace, ConstVal(0xC5), XorOp, ConstVal(0x10), RightBrace]
91        );
92        assert!(lexer.tokenize("a b").is_ok());
93        assert!(lexer.tokenize("g5").is_err());
94        assert!(lexer.tokenize("ff,33").is_err());
95    }
96
97    #[test]
98    fn split_on_strings_works() {
99        assert_eq!(
100            Lexer::split("~3f|ab &( c5^10 ) "), 
101            vec!["~", "3f", "|", "ab", "&", "(", "c5", "^", "10", ")"]
102        );
103    }
104
105    #[test]
106    fn try_token_from_str_works() {
107        let lexer = Lexer::new(LITERAL_TOKEN_MAP.clone());
108
109        assert_eq!(lexer.try_token_from("&").unwrap(), Token::AndOp);
110        assert_eq!(lexer.try_token_from("~").unwrap(), Token::NotOp);
111        assert_eq!(lexer.try_token_from("ff").unwrap(), Token::ConstVal(0xff));
112        assert!(lexer.try_token_from("m55").is_err());
113    }
114}