use std::collections::{HashSet};
use std::fs::{File, read_to_string};
use std::io::{Write};
use std::env;
use std::path::Path;
fn hash_generate_list<P: AsRef<Path>>(path: P, name: &str, code: & mut String, all_lengths: & mut HashSet<usize>) -> HashSet<usize> {
use std::fmt::Write;
let s = read_to_string(path.as_ref()).unwrap();
let mut lengths = HashSet::new();
let mut builder = phf_codegen::OrderedMap::new();
let mut count = 0;
let mut slices = Vec::new();
for lemma in s.lines() {
slices.push(percent_encoding::percent_decode_str(lemma).decode_utf8().unwrap());
}
for (i, lemma) in slices.iter().enumerate() {
builder.entry(lemma.as_bytes(), i.to_string().as_str());
lengths.insert(lemma.len());
all_lengths.insert(lemma.len());
count += 1;
}
write!(code, "
pub (crate) struct {};
impl {} {{
pub (crate) const fn get_length() -> usize {{
{}
}}
pub (crate) fn get_index(index: usize) -> & 'static str {{
unsafe {{ std::str::from_utf8_unchecked(Self::get_map().index(index).unwrap().0) }} //This unsafe is justified as the build script checks the lists for invalid utf-8 strings
}}
pub (crate) fn get_map() -> & 'static phf::OrderedMap<&'static [u8], usize> {{
static MAP: phf::OrderedMap<&'static [u8], usize> = {};
&MAP
}}
}}", name, name, count, builder.build()).unwrap();
lengths
}
fn main() {
use std::fmt::Write;
println!("cargo:rerun-if-changed=./lists/obw.txt");
println!("cargo:rerun-if-changed=./lists/tbc.txt");
println!("cargo:rerun-if-changed=./lists/tbu.txt");
println!("cargo:rerun-if-changed=./lists/controls.txt");
println!("cargo:rerun-if-changed=./lists/repetitions.txt");
let mut all_lengths = HashSet::new();
let mut code = String::new();
hash_generate_list("./lists/tbc.txt", "TwoByteMap", & mut code, & mut all_lengths);
hash_generate_list("./lists/tbu.txt", "ThreeByteMap", & mut code, & mut all_lengths);
hash_generate_list("./lists/obw.txt", "OneByteMap", & mut code, & mut all_lengths);
hash_generate_list("./lists/controls.txt", "Controls", & mut code, & mut all_lengths);
let rep_lengths = hash_generate_list("./lists/repetitions.txt", "Repetitions", & mut code, & mut all_lengths);
let mut rep_lengths: Vec<_> = rep_lengths.iter().collect();
rep_lengths.sort();
rep_lengths.reverse();
all_lengths.remove(&1usize);
let mut all_lengths: Vec<_> = all_lengths.iter().collect();
all_lengths.sort();
all_lengths.reverse();
write!(& mut code, "pub (crate) const TOTAL_LENGTHS: [usize; {}] = [", all_lengths.len()).unwrap();
for length in all_lengths {
write!(& mut code, "{}usize, ", *length).unwrap();
}
write!(& mut code, "];\n\n").unwrap();
write!(& mut code, "pub (crate) const REPETITION_LENGTHS: [usize; {}] = [", rep_lengths.len()).unwrap();
for length in rep_lengths {
write!(& mut code, "{}usize, ", *length).unwrap();
}
write!(& mut code, "];").unwrap();
let mut fs = File::create(Path::new(&env::var("OUT_DIR").unwrap()).join("maps.rs")).unwrap();
fs.write_all(code.as_bytes()).unwrap();
}