Skip to main content

rexpr/
lexer.rs

1use crate::ast::Node;
2use crate::error::Error;
3use crate::parser::parse;
4use crate::token::TokenMap;
5use std::collections::LinkedList;
6
7///lexer
8pub fn lexer(express: &str, token_map: &TokenMap) -> Result<Vec<String>, Error> {
9    let express = express.replace("none", "null").replace("None", "null");
10    let mut tokens = parse_tokens(&express, token_map)?;
11    loop_fill_lost_token(0, &mut tokens, token_map);
12    return Ok(tokens);
13}
14
15//fill lost node to  '+1'  =>  ['(','null',"+",'1',')']
16fn loop_fill_lost_token(start_index: usize, arg: &mut Vec<String>, opt_map: &TokenMap) {
17    let len = arg.len();
18    let mut last = "".to_string();
19    for index in start_index..len {
20        let item = arg[index].clone();
21        if index == 0 && item != "(" && opt_map.is_token(&item) {
22            let mut right = "null".to_string();
23            if arg.get((index + 1) as usize).is_some() {
24                right = arg.remove((index + 1) as usize);
25            }
26            let current = arg.remove(0);
27            arg.insert(0, ")".to_string());
28            arg.insert(0, right);
29            arg.insert(0, current);
30            arg.insert(0, "null".to_string());
31            arg.insert(0, "(".to_string());
32            return loop_fill_lost_token(4, arg, opt_map);
33        }
34        if index >= 1
35            && last != ")"
36            && item != "("
37            && item != ")"
38            && (opt_map.is_token(&last))
39            && opt_map.is_token(&item)
40        {
41            let mut right = "null".to_string();
42            if arg.get((index + 1) as usize).is_some() {
43                right = arg.remove((index + 1) as usize);
44            }
45            let current = arg.remove(index);
46            arg.insert(index, ")".to_string());
47            arg.insert(index, right);
48            arg.insert(index, current);
49            arg.insert(index, "null".to_string());
50            arg.insert(index, "(".to_string());
51            return loop_fill_lost_token(index + 5, arg, opt_map);
52        }
53        if (index + 1) as usize == len && item != ")" && opt_map.is_token(&item) {
54            let right = "null".to_string();
55            let current = arg.remove(index);
56            let last;
57            if (index - 1) as i32 >= 0 {
58                last = arg.remove(index - 1);
59            } else {
60                last = "null".to_string();
61            }
62            let index = index - 1;
63            arg.insert(index, ")".to_string());
64            arg.insert(index, right);
65            arg.insert(index, current);
66            arg.insert(index, last);
67            arg.insert(index, "(".to_string());
68        }
69        last = item.to_string();
70    }
71}
72
73/// lexer and parse
74pub fn lexer_parse_node(express: &str, token_map: &TokenMap) -> Result<Node, Error> {
75    let tokens = lexer(express, token_map)?;
76    return Ok(parse(token_map, &tokens, express)?);
77}
78
79///parse token to vec
80pub fn parse_tokens(s: &str, token_map: &TokenMap) -> Result<Vec<String>, Error> {
81    let chars = s.chars();
82    let chars_len = s.len() as i32;
83    let mut result = LinkedList::new();
84    //str
85    let mut is_find_str = false;
86    let mut temp_str = String::new();
87
88    let empty_string = String::new();
89    //token
90    let mut temp_arg = String::new();
91    let mut index: i32 = -1;
92    for item in chars {
93        index = index + 1;
94        let is_token = token_map.is_token(item.to_string().as_str());
95        if item == '\'' || item == '`' {
96            if is_find_str {
97                //第二次找到
98                is_find_str = false;
99                temp_str.push(item);
100                trim_push_back(&temp_str, &mut result);
101                temp_str.clear();
102                continue;
103            }
104            is_find_str = true;
105            temp_str.push(item);
106            continue;
107        }
108        if is_find_str {
109            temp_str.push(item);
110            continue;
111        }
112        if item != '`' && item != '\'' && is_token == false && !is_find_str {
113            //need reset
114            temp_arg.push(item);
115            if (index + 1) == chars_len {
116                trim_push_back(&temp_arg, &mut result);
117            }
118        } else {
119            trim_push_back(&temp_arg, &mut result);
120            temp_arg.clear();
121        }
122        //token node
123        if is_token {
124            if result.len() > 0 {
125                let back = result.back().unwrap_or(&empty_string);
126                if token_map.is_token(&format!("{}{}", back, &item)) == false {
127                    trim_push_back(&item.to_string(), &mut result);
128                    continue;
129                }
130                if back != "" && token_map.is_token(back) {
131                    let mut new_item = back.to_owned();
132                    result.pop_back();
133                    new_item.push(item);
134                    trim_push_back(&new_item, &mut result);
135                    continue;
136                }
137            }
138            trim_push_back(&item.to_string(), &mut result);
139            continue;
140        }
141    }
142    if is_find_str {
143        return Err(Error::from(format!(
144            "[rexpr] find string expr not end! express:{}",
145            s
146        )));
147    }
148    let mut v = vec![];
149    for item in result {
150        v.push(item);
151    }
152    return Ok(v);
153}
154
155fn trim_push_back(arg: &str, list: &mut LinkedList<String>) {
156    let trim_str = arg.trim().to_string();
157    if trim_str.is_empty() {
158        return;
159    }
160    list.push_back(trim_str);
161}
162
163#[cfg(test)]
164mod test {
165    use crate::bencher::QPS;
166    use crate::lexer::{lexer, parse_tokens};
167    use crate::token::TokenMap;
168
169    #[test]
170    fn test_fill() {
171        let l = lexer("-1 == -a", &TokenMap::new()).unwrap();
172        println!("{:?}", &l);
173        assert_eq!(
174            l,
175            vec!["(", "null", "-", "1", ")", "==", "(", "null", "-", "a", ")"]
176        )
177    }
178
179    #[test]
180    fn test_fill_first() {
181        let l = lexer("-1 == -1", &TokenMap::new()).unwrap();
182        println!("{:?}", &l);
183        assert_eq!(
184            l,
185            vec!["(", "null", "-", "1", ")", "==", "(", "null", "-", "1", ")"]
186        )
187    }
188
189    #[test]
190    fn test_fill_last() {
191        let l = lexer("-1 == 1-", &TokenMap::new()).unwrap();
192        println!("{:?}", &l);
193        assert_eq!(
194            l,
195            vec!["(", "null", "-", "1", ")", "==", "(", "1", "-", "null", ")"]
196        )
197    }
198
199    #[test]
200    fn test_fill_center() {
201        let l = lexer("-1 == -1 && -1 == -2", &TokenMap::new()).unwrap();
202        println!("{:?}", &l);
203        assert_eq!(
204            l,
205            vec![
206                "(", "null", "-", "1", ")", "==", "(", "null", "-", "1", ")", "&&", "(", "null",
207                "-", "1", ")", "==", "(", "null", "-", "2", ")"
208            ]
209        )
210    }
211
212    #[test]
213    fn test_fill_center_n() {
214        let l = lexer("-1 -1 -1 --1", &TokenMap::new()).unwrap();
215        println!("{:?}", &l);
216        assert_eq!(
217            l,
218            vec!["(", "null", "-", "1", ")", "-", "1", "-", "1", "-", "(", "null", "-", "1", ")"]
219        )
220    }
221
222    //cargo test --release --package rexpr --lib lexer::test::test_bench_lexer --no-fail-fast -- --exact -Z unstable-options --show-output
223    #[test]
224    fn test_bench_lexer() {
225        let token_map = TokenMap::new();
226        let now = std::time::Instant::now();
227        let total = 1000000;
228        for _ in 0..total {
229            parse_tokens("1+1", &token_map).unwrap();
230        }
231        now.time(total);
232    }
233}