mws_compiler/
lib.rs

1use flate2::write::GzEncoder;
2use flate2::Compression;
3use regex::Regex;
4// use std::fs::File;
5use std::io::prelude::*;
6// use std::io::BufReader;
7
8// fn main() -> std::io::Result<()> {
9//     let test_files = 4;
10
11//     for i in 1..=test_files {
12//         let target_num: String;
13//         match i {
14//             _ if i < 10 => target_num = "0".to_owned() + &i.to_owned().to_string(),
15//             _ => target_num = i.to_owned().to_string(),
16//         }
17//         println!(
18//             "================== Test: {}.mws ==================",
19//             target_num
20//         );
21
22//         let file = format!("./test/{}.mws", target_num);
23//         let f = File::open(file)?;
24//         let reader = BufReader::new(f);
25
26//         for line in reader.lines() {
27//             let line = line?;
28//             parse_line(&line);
29//         }
30//     }
31
32//     Ok(())
33// }
34
35
36//? Split a string using regex. Keep the matched string.
37fn split_keep<'a>(r: &Regex, text: &'a str) -> Vec<&'a str> {
38    let mut result = Vec::new();
39    let mut last = 0;
40    for (index, matched) in text.match_indices(r) {
41        if last != index {
42            result.push(&text[last..index]);
43        }
44        result.push(matched);
45        last = index + matched.len();
46    }
47    if last < text.len() {
48        result.push(&text[last..]);
49    }
50    result
51}
52
53pub fn parse_line(line: &str) -> String {
54    let split_re = Regex::new(r"([ ,:;]+)").expect("Invalid regex");
55    let tokens = split_keep(&split_re, line)
56        .into_iter()
57        .map(|n| n.trim())
58        .filter(|n| !n.is_empty())
59        .collect::<Vec<_>>();
60    let mut tokenized_line: Vec<String> = vec![];
61    let mut minimized_line: Vec<String> = vec![];
62    for token in tokens {
63        let token = tokenizer(token);
64        println!("{} = \"{}\"", token.token_type, token.value);
65        if token.token_type == "unknown" {
66            println!("================== FAILED! ==================");
67            println!("Unknown token: {}", token.value);
68            println!("================== FAILED! ==================");
69        } else {
70            tokenized_line.push(token.token_type);
71            minimized_line.push(token.value);
72        }
73    }
74
75    return minimized_line.join(" ");
76
77}
78
79struct Token {
80    token_type: String,
81    value: String,
82}
83
84fn tokenizer(token: &str) -> Token {
85    let token_type;
86
87    //? Regex:
88    let operators_re = Regex::new(r"=|\+|-|\\").unwrap();
89    let keywords_re = Regex::new(r"let|pub").unwrap();
90    let identifier_re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
91    let punctuator_re = Regex::new(r"\(|\)|\{|\}|;|:").unwrap();
92    // literals
93    let string_re = Regex::new(r#""[^"]*""#).unwrap();
94    let number_re = Regex::new(r"^\d+$").unwrap();
95    let bool_re = Regex::new(r"true|false").unwrap();
96    let null_re = Regex::new(r"null").unwrap();
97    let float_re = Regex::new(r"^\d+\.\d+$").unwrap();
98    // types
99    let float_type_re = Regex::new(r"float").unwrap();
100    let int_type_re = Regex::new(r"int").unwrap();
101    let string_type_re = Regex::new(r"string").unwrap();
102    let bool_type_re = Regex::new(r"bool").unwrap();
103    let null_type_re = Regex::new(r"null").unwrap();
104    let object_type_re = Regex::new(r"object").unwrap();
105    let array_type_re = Regex::new(r"array").unwrap();
106    //whitespace
107    let whitespace_re = Regex::new(r"^\s+$").unwrap();
108    //comment
109    let comment_re = Regex::new(r"//.*").unwrap();
110    let comment_block_re = Regex::new(r"/\*.*\*/").unwrap();
111    //? End of regex
112
113    match token {
114        _ if keywords_re.is_match(token) => token_type = "keyword",
115        _ if operators_re.is_match(token) => token_type = "operator",
116        _ if punctuator_re.is_match(token) => token_type = "punctuator",
117
118        _ if number_re.is_match(token) => token_type = "number",
119        _ if float_re.is_match(token) => token_type = "float",
120        _ if bool_re.is_match(token) => token_type = "bool",
121        _ if null_re.is_match(token) => token_type = "null",
122        _ if string_re.is_match(token) => token_type = "string",
123
124        _ if float_type_re.is_match(token) => token_type = "type",
125        _ if int_type_re.is_match(token) => token_type = "type",
126        _ if string_type_re.is_match(token) => token_type = "type",
127        _ if bool_type_re.is_match(token) => token_type = "type",
128        _ if null_type_re.is_match(token) => token_type = "type",
129        _ if object_type_re.is_match(token) => token_type = "type",
130        _ if array_type_re.is_match(token) => token_type = "type",
131
132        _ if whitespace_re.is_match(token) => token_type = "whitespace",
133
134        _ if comment_re.is_match(token) => token_type = "comment",
135        _ if comment_block_re.is_match(token) => token_type = "comment",
136
137        _ if identifier_re.is_match(token) => token_type = "identifier",
138        _ => token_type = "unknown",
139    }
140
141    return Token {
142        token_type: token_type.to_string(),
143        value: token.to_string(),
144    };
145}
146
147fn compress_string(input: &str) -> Vec<u8> {
148    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
149    encoder.write_all(input.as_bytes()).unwrap();
150    encoder.finish().unwrap()
151}