use flate2::write::GzEncoder;
use flate2::Compression;
use regex::Regex;
use std::io::prelude::*;
fn split_keep<'a>(r: &Regex, text: &'a str) -> Vec<&'a str> {
let mut result = Vec::new();
let mut last = 0;
for (index, matched) in text.match_indices(r) {
if last != index {
result.push(&text[last..index]);
}
result.push(matched);
last = index + matched.len();
}
if last < text.len() {
result.push(&text[last..]);
}
result
}
pub fn parse_line(line: &str) {
let split_re = Regex::new(r"([ ,:;]+)").expect("Invalid regex");
let tokens = split_keep(&split_re, line)
.into_iter()
.map(|n| n.trim())
.filter(|n| !n.is_empty())
.collect::<Vec<_>>();
let mut tokenized_line: Vec<String> = vec![];
let mut minimized_line: Vec<String> = vec![];
for token in tokens {
let token = tokenizer(token);
println!("{} = \"{}\"", token.token_type, token.value);
if token.token_type == "unknown" {
println!("================== FAILED! ==================");
println!("Unknown token: {}", token.value);
println!("================== FAILED! ==================");
} else {
tokenized_line.push(token.token_type);
minimized_line.push(token.value);
}
}
println!("{:?}", tokenized_line);
println!("{:?}", minimized_line);
println!("{:?}", compress_string(&minimized_line.join(" ")));
}
struct Token {
token_type: String,
value: String,
}
fn tokenizer(token: &str) -> Token {
let token_type;
let operators_re = Regex::new(r"=|\+|-|\\").unwrap();
let keywords_re = Regex::new(r"let|pub").unwrap();
let identifier_re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
let punctuator_re = Regex::new(r"\(|\)|\{|\}|;|:").unwrap();
let string_re = Regex::new(r#""[^"]*""#).unwrap();
let number_re = Regex::new(r"^\d+$").unwrap();
let bool_re = Regex::new(r"true|false").unwrap();
let null_re = Regex::new(r"null").unwrap();
let float_re = Regex::new(r"^\d+\.\d+$").unwrap();
let float_type_re = Regex::new(r"float").unwrap();
let int_type_re = Regex::new(r"int").unwrap();
let string_type_re = Regex::new(r"string").unwrap();
let bool_type_re = Regex::new(r"bool").unwrap();
let null_type_re = Regex::new(r"null").unwrap();
let object_type_re = Regex::new(r"object").unwrap();
let array_type_re = Regex::new(r"array").unwrap();
let whitespace_re = Regex::new(r"^\s+$").unwrap();
let comment_re = Regex::new(r"//.*").unwrap();
let comment_block_re = Regex::new(r"/\*.*\*/").unwrap();
match token {
_ if keywords_re.is_match(token) => token_type = "keyword",
_ if operators_re.is_match(token) => token_type = "operator",
_ if punctuator_re.is_match(token) => token_type = "punctuator",
_ if number_re.is_match(token) => token_type = "number",
_ if float_re.is_match(token) => token_type = "float",
_ if bool_re.is_match(token) => token_type = "bool",
_ if null_re.is_match(token) => token_type = "null",
_ if string_re.is_match(token) => token_type = "string",
_ if float_type_re.is_match(token) => token_type = "type",
_ if int_type_re.is_match(token) => token_type = "type",
_ if string_type_re.is_match(token) => token_type = "type",
_ if bool_type_re.is_match(token) => token_type = "type",
_ if null_type_re.is_match(token) => token_type = "type",
_ if object_type_re.is_match(token) => token_type = "type",
_ if array_type_re.is_match(token) => token_type = "type",
_ if whitespace_re.is_match(token) => token_type = "whitespace",
_ if comment_re.is_match(token) => token_type = "comment",
_ if comment_block_re.is_match(token) => token_type = "comment",
_ if identifier_re.is_match(token) => token_type = "identifier",
_ => token_type = "unknown",
}
return Token {
token_type: token_type.to_string(),
value: token.to_string(),
};
}
fn compress_string(input: &str) -> Vec<u8> {
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(input.as_bytes()).unwrap();
encoder.finish().unwrap()
}