simple_interpreter/
lexer.rs1use crate::token::*;
2use std::collections::HashMap;
3
4#[derive(Clone)]
5pub struct Lexer {
6 literal_token_map: HashMap<&'static str, Token>
7}
8
9impl Lexer {
10 pub fn new(literal_token_map: HashMap<&'static str, Token>) -> Self {
11 Lexer {
12 literal_token_map
13 }
14 }
15
16 pub fn tokenize(&self, expr: &str) -> Result<Vec<Token>, &str> {
17 let splitted = Self::split(expr);
18 let mut tokens: Vec<Token> = Vec::with_capacity(splitted.len());
19 for token in splitted.iter() {
20 match self.try_token_from(token.as_str()) {
21 Ok(t) => tokens.push(t),
22 Err(str) => return Err(str)
23 }
24 }
25 Ok(tokens)
26 }
27
28 fn split(expr: &str) -> Vec<String> {
29 let mut splitted: Vec<String> = Vec::new();
30 let mut reading_num = false;
31 for ch in expr.chars() {
32 match ch {
33 ' ' => {
34 reading_num = false;
35 }
36 '0'..='9' | 'a'..='f' => {
37 if !reading_num {
38 splitted.push(ch.to_string());
39 reading_num = true;
40 } else {
41 splitted.last_mut().unwrap().push(ch);
42 }
43 }
44 _ => {
45 reading_num = false;
46 splitted.push(ch.to_string());
47 }
48 }
49 }
50 splitted
51 }
52
53 fn try_token_from(&self, token_str: &str) -> Result<Token, &str> {
54 match self.literal_token_map.get(token_str) {
55 Some(token) => Ok(token.clone()),
56 None => {
57 match u8::from_str_radix(token_str, 16) {
58 Ok(val) => Ok(Token::ConstVal(val)),
59 Err(_) => Err("Incorrect token found!")
60 }
61 }
62 }
63 }
64}
65
66
67#[cfg(test)]
68mod tests {
69 use crate::lexer::*;
70 use crate::token::Token::*;
71 use lazy_static::*;
72
73 lazy_static! {
74 static ref LITERAL_TOKEN_MAP: HashMap<&'static str, Token> = HashMap::from([
75 ("&", AndOp),
76 ("|", OrOp),
77 ("^", XorOp),
78 ("~", NotOp),
79 ("(", LeftBrace),
80 (")", RightBrace)
81 ]);
82 }
83
84 #[test]
85 fn tokenize_from_strings_vec_works() {
86 let lexer = Lexer::new(LITERAL_TOKEN_MAP.clone());
87
88 assert_eq!(
89 lexer.tokenize("ab &(c5 ^10 ) ").unwrap(),
90 vec![ConstVal(0xAB), AndOp, LeftBrace, ConstVal(0xC5), XorOp, ConstVal(0x10), RightBrace]
91 );
92 assert!(lexer.tokenize("a b").is_ok());
93 assert!(lexer.tokenize("g5").is_err());
94 assert!(lexer.tokenize("ff,33").is_err());
95 }
96
97 #[test]
98 fn split_on_strings_works() {
99 assert_eq!(
100 Lexer::split("~3f|ab &( c5^10 ) "),
101 vec!["~", "3f", "|", "ab", "&", "(", "c5", "^", "10", ")"]
102 );
103 }
104
105 #[test]
106 fn try_token_from_str_works() {
107 let lexer = Lexer::new(LITERAL_TOKEN_MAP.clone());
108
109 assert_eq!(lexer.try_token_from("&").unwrap(), Token::AndOp);
110 assert_eq!(lexer.try_token_from("~").unwrap(), Token::NotOp);
111 assert_eq!(lexer.try_token_from("ff").unwrap(), Token::ConstVal(0xff));
112 assert!(lexer.try_token_from("m55").is_err());
113 }
114}