use std::{collections::HashSet, env, fmt::Write as _, fs, path::Path};
const DATA: &str = "src/validation_data.json";
fn main() {
println!("cargo:rerun-if-changed={DATA}");
println!("cargo:rerun-if-changed=build.rs");
let json = fs::read_to_string(DATA).unwrap_or_else(|e| panic!("read {DATA}: {e}"));
let data: serde_json::Value =
serde_json::from_str(&json).unwrap_or_else(|e| panic!("parse {DATA}: {e}"));
let mut out = String::new();
out.push_str("// @generated by build.rs from src/validation_data.json - do not edit.\n");
let pairs = data["past_tense"]
.as_array()
.expect("past_tense must be an array");
let mut irregular: Vec<&str> = Vec::new();
let mut seen_present: HashSet<&str> = HashSet::new();
out.push_str("pub const PAST_TENSE_MAP: &[(&str, &str)] = &[\n");
for pair in pairs {
let arr = pair.as_array().expect("past_tense entry must be [present, past]");
assert_eq!(arr.len(), 2, "past_tense entry must have exactly 2 elements: {pair}");
let present = lc(arr[0].as_str().expect("present must be a string"), "past_tense present");
assert!(
seen_present.insert(present),
"duplicate present key in past_tense: {present:?}"
);
let past = lc(arr[1].as_str().expect("past must be a string"), "past_tense past");
assert!(
!past.contains(' '),
"past form must be a single token, got {past:?}"
);
writeln!(out, " ({present:?}, {past:?}),").unwrap();
if present == past {
irregular.push(past);
}
}
out.push_str("];\n");
for s in data["irregular_past"]
.as_array()
.expect("irregular_past must be an array")
{
irregular.push(lc(s.as_str().expect("irregular_past entry must be a string"), "irregular_past"));
}
irregular.sort_unstable();
irregular.dedup();
let blocklisted: HashSet<&str> = ["ed_blocklist", "d_blocklist"]
.iter()
.flat_map(|k| {
data[*k]
.as_array()
.unwrap_or_else(|| panic!("{k} must be an array"))
.iter()
.map(|v| v.as_str().expect("blocklist entry must be a string"))
})
.collect();
for &irr in &irregular {
assert!(
!blocklisted.contains(irr),
"irregular {irr:?} must not appear in a morphology blocklist"
);
}
emit_slice(&mut out, "IRREGULAR_PAST", irregular.into_iter());
for (name, key) in [
("ED_BLOCKLIST", "ed_blocklist"),
("D_BLOCKLIST", "d_blocklist"),
("CODE_EXTENSIONS", "code_extensions"),
("DOC_EXTENSIONS", "doc_extensions"),
("FILLER_WORDS", "filler_words"),
("META_PHRASES", "meta_phrases"),
("BODY_PRESENT_TENSE", "body_present_tense"),
] {
let arr = data[key]
.as_array()
.unwrap_or_else(|| panic!("{key} must be an array"));
emit_slice(
&mut out,
name,
arr.iter()
.map(|v| lc(v.as_str().unwrap_or_else(|| panic!("{key} entries must be strings")), key)),
);
}
let out_dir = env::var("OUT_DIR").expect("OUT_DIR not set");
let dest = Path::new(&out_dir).join("validation_data.rs");
fs::write(&dest, out).unwrap_or_else(|e| panic!("write {}: {e}", dest.display()));
}
fn emit_slice<'a>(out: &mut String, name: &str, items: impl Iterator<Item = &'a str>) {
writeln!(out, "const {name}: &[&str] = &[").unwrap();
for item in items {
writeln!(out, " {item:?},").unwrap();
}
out.push_str("];\n");
}
fn lc<'a>(s: &'a str, ctx: &str) -> &'a str {
assert!(
s == s.trim() && s == s.to_lowercase(),
"{ctx} value must be lowercase and trimmed: {s:?}"
);
s
}