use std::{
collections::HashSet,
env,
ffi::OsStr,
fs::File,
io::{BufRead, BufReader, Write},
path::{Path, PathBuf},
};
use anyhow::{Context, Result, bail, ensure};
fn main() -> Result<()> {
println!("cargo:rerun-if-changed=words");
let out_dir = PathBuf::from(env::var("OUT_DIR").context("OUT_DIR must be set by Cargo")?);
let manifest_dir = PathBuf::from(
env::var("CARGO_MANIFEST_DIR").context("CARGO_MANIFEST_DIR must be set by Cargo")?,
);
let words_dir = manifest_dir.join("words");
let langs = [
("english", "english.txt", None),
("chinese_simplified", "chinese_simplified.txt", Some("CARGO_FEATURE_CHINESE_SIMPLIFIED")),
(
"chinese_traditional",
"chinese_traditional.txt",
Some("CARGO_FEATURE_CHINESE_TRADITIONAL"),
),
("czech", "czech.txt", Some("CARGO_FEATURE_CZECH")),
("french", "french.txt", Some("CARGO_FEATURE_FRENCH")),
("italian", "italian.txt", Some("CARGO_FEATURE_ITALIAN")),
("japanese", "japanese.txt", Some("CARGO_FEATURE_JAPANESE")),
("korean", "korean.txt", Some("CARGO_FEATURE_KOREAN")),
("portuguese", "portuguese.txt", Some("CARGO_FEATURE_PORTUGUESE")),
("spanish", "spanish.txt", Some("CARGO_FEATURE_SPANISH")),
];
for (lang, filename, feature_env) in langs {
if let Some(feature_env) = feature_env {
if env::var_os(feature_env).is_none() {
continue;
}
}
let path = words_dir.join(filename);
println!("cargo:rerun-if-changed={}", path.display());
generate_one(&out_dir, lang, &path).with_context(|| {
format!("failed generating wordlist for '{lang}' from {}", path.display())
})?;
}
Ok(())
}
fn generate_one(out_dir: &Path, lang: &str, input_path: &Path) -> Result<()> {
let words = read_wordlist(input_path)
.with_context(|| format!("failed reading wordlist {}", input_path.display()))?;
ensure!(
words.len() == 2048,
"word list '{}' must contain exactly 2048 lines, got {}",
input_path.display(),
words.len()
);
let index_values: Vec<String> = (0..words.len()).map(|i| format!("{i}u16")).collect();
let mut index_map = phf_codegen::Map::new();
for (w, v) in words.iter().zip(index_values.iter()) {
index_map.entry(w, v.as_str());
}
let out_name = format!("bip0039_wordlist_{lang}.rs");
let out_path = out_dir.join(&out_name);
let mut f = File::create(&out_path)
.with_context(|| format!("failed to create {}", out_path.display()))?;
writeln!(
f,
"// AUTO-GENERATED by build.rs. DO NOT EDIT.\n\
// Source: {}\n\
\n\
",
input_path.file_name().and_then(OsStr::to_str).unwrap_or("<unknown>")
)
.context("failed writing generated file header")?;
writeln!(f, "pub static WORDS: [&str; 2048] = [").context("failed writing WORDS header")?;
for w in &words {
writeln!(f, " {:?},", w).context("failed writing a WORDS entry")?;
}
writeln!(f, "];\n").context("failed writing WORDS footer")?;
writeln!(f, "pub static INDEX: ::phf::Map<&'static str, u16> = {};\n", index_map.build())
.context("failed writing INDEX")?;
writeln!(
f,
"pub static WORDLIST: crate::language::Wordlist = crate::language::Wordlist {{\n\
\twords: &WORDS,\n\
\tindex: &INDEX,\n\
}};"
)
.context("failed writing WORDLIST")?;
Ok(())
}
fn read_wordlist(path: &Path) -> Result<Vec<String>> {
let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
let reader = BufReader::new(file);
let mut words = Vec::with_capacity(2048);
for (line_no, line) in reader.lines().enumerate() {
let line_no_1 = line_no + 1;
let mut s = line
.with_context(|| format!("failed reading {} at line {}", path.display(), line_no_1))?;
if s.ends_with('\r') {
s.pop();
}
let trimmed = s.trim();
if trimmed != s {
bail!("invalid whitespace in {} at line {}", path.display(), line_no_1);
}
if s.is_empty() {
bail!("empty line in {} at line {}", path.display(), line_no_1);
}
words.push(s);
}
let mut set = HashSet::with_capacity(words.len());
for (i, w) in words.iter().enumerate() {
if !set.insert(w) {
bail!("duplicate word '{}' in {} (at input line {})", w, path.display(), i + 1);
}
}
Ok(words)
}