specmc_base/
tokenize.rs

1//! Module for tokenizing strings.
2
3#[macro_export]
4macro_rules! tokenize {
5    ($input:expr) => {
6        $crate::tokenize::tokenize($input)
7            .into_iter()
8            .rev()
9            .collect()
10    };
11}
12
13const SPECIAL_CHARS: &[&str] = &[
14    " ", "\t", "\n", "\r", "==", "!=", "||", "&&", "**", "(", ")", "{", "}", "[", "]", ",", "=",
15    "-", "+", ";",
16    "\"",
17    // ".", ":", "*", "/", "%", "!", "&", "|", "^", "~", removed because useless
18];
19
20/// Split a string into tokens.
21/// Special characters will be included in tokens. However, whitespace will not.
22pub fn tokenize(input: &str) -> Vec<String> {
23    let mut tokens: Vec<String> = vec![];
24    let mut current_token: String = "".to_string();
25
26    let mut i: usize = 0;
27    while i < input.len() {
28        let ch: char = input.as_bytes()[i] as char;
29
30        let mut found_special_char: bool = false;
31        for special_char in SPECIAL_CHARS {
32            if input[i..].starts_with(special_char) {
33                found_special_char = true;
34                if !current_token.is_empty() {
35                    tokens.push(current_token);
36                    current_token = "".to_string();
37                }
38                if !special_char.trim().is_empty() {
39                    tokens.push(special_char.to_string());
40                }
41                i += special_char.len() - 1;
42                break;
43            }
44        }
45
46        if !found_special_char {
47            current_token.push(ch);
48        }
49
50        i += 1;
51    }
52
53    if !current_token.is_empty() {
54        tokens.push(current_token);
55    }
56
57    tokens
58}