wubi 0.4.0

Self-developed Wubi 86 encoder, dictionary, and dataset (PHF + FST, WASM-ready).
Documentation
//! Generate a Wubi 86 dictionary TSV from wubi's seed data.
//!
//!     cargo run --release --bin wubi-gen-dict > dict.txt
//!
//! Output one entry per line (`<code>\t<word>`) sorted by code, compatible
//! with `lab8-core::WubiTable::parse`.

use wubi::{embedded_seed, encode, iter_jianma1};

fn main() {
    let mut entries: Vec<(String, String)> = Vec::new();

    // 一级简码: 1-letter codes for 25 most-frequent characters.
    for (letter, ch) in iter_jianma1() {
        entries.push(((letter as char).to_string(), ch.to_string()));
    }

    // Algorithmic encoding from seed decompositions.
    for (ch, decomp) in embedded_seed() {
        match encode(&decomp) {
            Ok(code) => entries.push((code.as_str().to_string(), ch.to_string())),
            Err(e) => eprintln!("# WARN: failed to encode {ch}: {e}"),
        }
    }

    entries.sort_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1)));

    println!("# Generated by wubi-gen-dict.");
    println!("# Sources: wubi/data/{{zigen86,jianma1,seed}}.txt");
    println!("# License: MIT OR Apache-2.0 (wubi).");
    for (code, word) in entries {
        println!("{code}\t{word}");
    }
}