wubi 0.4.0

Self-developed Wubi 86 encoder, dictionary, and dataset (PHF + FST, WASM-ready).
Documentation
# Wubi weight provenance — generated by wubi-build-weights.
# This file documents the exact inputs that produced the
# accompanying weights.tsv. Re-running the pipeline against
# the same SHAs MUST yield byte-identical weights.tsv.

[generator]
script_sha256   = "27fe6715e36e3fcf345cbc6f513f8b0c66ef563b97e9f8f8546ae289b3ca9690"
manifest_sha256 = "5daba84f301e75193323ad5c0b5cde86ff6e6b7f97d6f1f19304e7b91775f6ab"
rules_sha256    = "e5d0b50e54352b185616f05dced3900261d5a063a5360668e4413904ca6ab519"

[output]
entries = 135822
corpora = 3

[corpora]
zho_news_2020_100k.sha256 = "ba219ac3a5b9e13f2653690ab67a03a184cab6019cbbb374791fd4cc7446a7e1"
zho_news_2020_100k.weight = 10
zho_subtlex_ch_wf.sha256 = "086536450b1f77d0c7ff3ac0fc8375897162ace807d3167bec48b4c493434077"
zho_subtlex_ch_wf.weight = 0.6
zho_wikipedia_2018_1m.sha256 = "1fa674cd44842c16ee724f9c57d6b3a3cc4493e83e59b4fba2768213184fdfd4"
zho_wikipedia_2018_1m.weight = 1