bip0039 0.13.2

Another Rust implementation of BIP-0039 standard
Documentation
//! Build script to generate BIP-0039 wordlists and PHF indices from `words/*.txt`.
//!
//! This script generates one Rust source file per language into `OUT_DIR`:
//! - `bip0039_wordlist_<lang>.rs`
//!
//! Each generated file defines:
//! - `pub static WORDS: [&'static str; 2048]` (BIP-0039 order)
//! - `pub static INDEX: phf::Map<&'static str, u16>` (word -> index)
//! - `pub static WORDLIST: crate::language::wordlist::Wordlist` (stores a reference to `INDEX`)

use std::{
    collections::HashSet,
    env,
    ffi::OsStr,
    fs::File,
    io::{BufRead, BufReader, Write},
    path::{Path, PathBuf},
};

use anyhow::{Context, Result, bail, ensure};

fn main() -> Result<()> {
    // Re-run if any wordlist changes or if the words directory changes.
    println!("cargo:rerun-if-changed=words");

    let out_dir = PathBuf::from(env::var("OUT_DIR").context("OUT_DIR must be set by Cargo")?);

    let manifest_dir = PathBuf::from(
        env::var("CARGO_MANIFEST_DIR").context("CARGO_MANIFEST_DIR must be set by Cargo")?,
    );
    let words_dir = manifest_dir.join("words");

    let langs = [
        // English is always available (no feature gate).
        ("english", "english.txt", None),
        ("chinese_simplified", "chinese_simplified.txt", Some("CARGO_FEATURE_CHINESE_SIMPLIFIED")),
        (
            "chinese_traditional",
            "chinese_traditional.txt",
            Some("CARGO_FEATURE_CHINESE_TRADITIONAL"),
        ),
        ("czech", "czech.txt", Some("CARGO_FEATURE_CZECH")),
        ("french", "french.txt", Some("CARGO_FEATURE_FRENCH")),
        ("italian", "italian.txt", Some("CARGO_FEATURE_ITALIAN")),
        ("japanese", "japanese.txt", Some("CARGO_FEATURE_JAPANESE")),
        ("korean", "korean.txt", Some("CARGO_FEATURE_KOREAN")),
        ("portuguese", "portuguese.txt", Some("CARGO_FEATURE_PORTUGUESE")),
        ("spanish", "spanish.txt", Some("CARGO_FEATURE_SPANISH")),
    ];

    // Generate only for English and enabled language features.
    for (lang, filename, feature_env) in langs {
        if let Some(feature_env) = feature_env {
            if env::var_os(feature_env).is_none() {
                continue;
            }
        }

        let path = words_dir.join(filename);
        println!("cargo:rerun-if-changed={}", path.display());
        generate_one(&out_dir, lang, &path).with_context(|| {
            format!("failed generating wordlist for '{lang}' from {}", path.display())
        })?;
    }

    Ok(())
}

fn generate_one(out_dir: &Path, lang: &str, input_path: &Path) -> Result<()> {
    let words = read_wordlist(input_path)
        .with_context(|| format!("failed reading wordlist {}", input_path.display()))?;

    ensure!(
        words.len() == 2048,
        "word list '{}' must contain exactly 2048 lines, got {}",
        input_path.display(),
        words.len()
    );

    // Create PHF map (word -> index).
    //
    // `phf_codegen::Map::entry` stores borrowed `&str` values internally while we build it.
    // So we must ensure backing strings live long enough and are not moved/reallocated
    // during insertion. We do that by precomputing all value strings first, then inserting.
    let index_values: Vec<String> = (0..words.len()).map(|i| format!("{i}u16")).collect();

    let mut index_map = phf_codegen::Map::new();
    for (w, v) in words.iter().zip(index_values.iter()) {
        index_map.entry(w, v.as_str());
    }

    let out_name = format!("bip0039_wordlist_{lang}.rs");
    let out_path = out_dir.join(&out_name);

    let mut f = File::create(&out_path)
        .with_context(|| format!("failed to create {}", out_path.display()))?;

    // Keep generated files stable and readable.
    writeln!(
        f,
        "// AUTO-GENERATED by build.rs. DO NOT EDIT.\n\
         // Source: {}\n\
         \n\
         ",
        input_path.file_name().and_then(OsStr::to_str).unwrap_or("<unknown>")
    )
    .context("failed writing generated file header")?;

    // WORDS
    writeln!(f, "pub static WORDS: [&str; 2048] = [").context("failed writing WORDS header")?;
    for w in &words {
        writeln!(f, "    {:?},", w).context("failed writing a WORDS entry")?;
    }
    writeln!(f, "];\n").context("failed writing WORDS footer")?;

    // INDEX
    writeln!(f, "pub static INDEX: ::phf::Map<&'static str, u16> = {};\n", index_map.build())
        .context("failed writing INDEX")?;

    // WORDLIST
    writeln!(
        f,
        "pub static WORDLIST: crate::language::Wordlist = crate::language::Wordlist {{\n\
         \twords: &WORDS,\n\
         \tindex: &INDEX,\n\
         }};"
    )
    .context("failed writing WORDLIST")?;

    Ok(())
}

fn read_wordlist(path: &Path) -> Result<Vec<String>> {
    let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
    let reader = BufReader::new(file);

    let mut words = Vec::with_capacity(2048);

    for (line_no, line) in reader.lines().enumerate() {
        let line_no_1 = line_no + 1;
        let mut s = line
            .with_context(|| format!("failed reading {} at line {}", path.display(), line_no_1))?;

        if s.ends_with('\r') {
            s.pop();
        }

        // Reject leading/trailing whitespace; BIP-0039 wordlists are line-oriented.
        let trimmed = s.trim();
        if trimmed != s {
            bail!("invalid whitespace in {} at line {}", path.display(), line_no_1);
        }

        // Reject empty lines.
        if s.is_empty() {
            bail!("empty line in {} at line {}", path.display(), line_no_1);
        }

        words.push(s);
    }

    // Ensure uniqueness (helps catch accidental duplicates early).
    let mut set = HashSet::with_capacity(words.len());
    for (i, w) in words.iter().enumerate() {
        if !set.insert(w) {
            bail!("duplicate word '{}' in {} (at input line {})", w, path.display(), i + 1);
        }
    }

    Ok(words)
}