1use flate2::write::GzEncoder;
2use flate2::Compression;
3use regex::Regex;
4use std::io::prelude::*;
6fn split_keep<'a>(r: &Regex, text: &'a str) -> Vec<&'a str> {
38 let mut result = Vec::new();
39 let mut last = 0;
40 for (index, matched) in text.match_indices(r) {
41 if last != index {
42 result.push(&text[last..index]);
43 }
44 result.push(matched);
45 last = index + matched.len();
46 }
47 if last < text.len() {
48 result.push(&text[last..]);
49 }
50 result
51}
52
53pub fn parse_line(line: &str) -> String {
54 let split_re = Regex::new(r"([ ,:;]+)").expect("Invalid regex");
55 let tokens = split_keep(&split_re, line)
56 .into_iter()
57 .map(|n| n.trim())
58 .filter(|n| !n.is_empty())
59 .collect::<Vec<_>>();
60 let mut tokenized_line: Vec<String> = vec![];
61 let mut minimized_line: Vec<String> = vec![];
62 for token in tokens {
63 let token = tokenizer(token);
64 println!("{} = \"{}\"", token.token_type, token.value);
65 if token.token_type == "unknown" {
66 println!("================== FAILED! ==================");
67 println!("Unknown token: {}", token.value);
68 println!("================== FAILED! ==================");
69 } else {
70 tokenized_line.push(token.token_type);
71 minimized_line.push(token.value);
72 }
73 }
74
75 return minimized_line.join(" ");
76
77}
78
79struct Token {
80 token_type: String,
81 value: String,
82}
83
84fn tokenizer(token: &str) -> Token {
85 let token_type;
86
87 let operators_re = Regex::new(r"=|\+|-|\\").unwrap();
89 let keywords_re = Regex::new(r"let|pub").unwrap();
90 let identifier_re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
91 let punctuator_re = Regex::new(r"\(|\)|\{|\}|;|:").unwrap();
92 let string_re = Regex::new(r#""[^"]*""#).unwrap();
94 let number_re = Regex::new(r"^\d+$").unwrap();
95 let bool_re = Regex::new(r"true|false").unwrap();
96 let null_re = Regex::new(r"null").unwrap();
97 let float_re = Regex::new(r"^\d+\.\d+$").unwrap();
98 let float_type_re = Regex::new(r"float").unwrap();
100 let int_type_re = Regex::new(r"int").unwrap();
101 let string_type_re = Regex::new(r"string").unwrap();
102 let bool_type_re = Regex::new(r"bool").unwrap();
103 let null_type_re = Regex::new(r"null").unwrap();
104 let object_type_re = Regex::new(r"object").unwrap();
105 let array_type_re = Regex::new(r"array").unwrap();
106 let whitespace_re = Regex::new(r"^\s+$").unwrap();
108 let comment_re = Regex::new(r"//.*").unwrap();
110 let comment_block_re = Regex::new(r"/\*.*\*/").unwrap();
111 match token {
114 _ if keywords_re.is_match(token) => token_type = "keyword",
115 _ if operators_re.is_match(token) => token_type = "operator",
116 _ if punctuator_re.is_match(token) => token_type = "punctuator",
117
118 _ if number_re.is_match(token) => token_type = "number",
119 _ if float_re.is_match(token) => token_type = "float",
120 _ if bool_re.is_match(token) => token_type = "bool",
121 _ if null_re.is_match(token) => token_type = "null",
122 _ if string_re.is_match(token) => token_type = "string",
123
124 _ if float_type_re.is_match(token) => token_type = "type",
125 _ if int_type_re.is_match(token) => token_type = "type",
126 _ if string_type_re.is_match(token) => token_type = "type",
127 _ if bool_type_re.is_match(token) => token_type = "type",
128 _ if null_type_re.is_match(token) => token_type = "type",
129 _ if object_type_re.is_match(token) => token_type = "type",
130 _ if array_type_re.is_match(token) => token_type = "type",
131
132 _ if whitespace_re.is_match(token) => token_type = "whitespace",
133
134 _ if comment_re.is_match(token) => token_type = "comment",
135 _ if comment_block_re.is_match(token) => token_type = "comment",
136
137 _ if identifier_re.is_match(token) => token_type = "identifier",
138 _ => token_type = "unknown",
139 }
140
141 return Token {
142 token_type: token_type.to_string(),
143 value: token.to_string(),
144 };
145}
146
147fn compress_string(input: &str) -> Vec<u8> {
148 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
149 encoder.write_all(input.as_bytes()).unwrap();
150 encoder.finish().unwrap()
151}