use std::collections::BTreeSet;
fn main() {
let manifest = env!("CARGO_MANIFEST_DIR");
let ref_path = format!("{manifest}/../../data/wubi86_full.txt");
let reference = std::fs::read_to_string(&ref_path).expect("read rime");
let mut entries: BTreeSet<(String, String)> = BTreeSet::new();
for raw in reference.lines() {
let line = raw.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let mut parts = line.splitn(2, '\t');
let (Some(code), Some(word)) = (parts.next(), parts.next()) else {
continue;
};
let code = code.trim();
let word = word.trim();
if word.chars().count() != 1 {
continue;
}
let ch = word.chars().next().unwrap();
let cp = ch as u32;
if !(0x4E00..=0x9FFF).contains(&cp) {
continue;
}
if !(2..=3).contains(&code.len()) {
continue;
}
if !code.bytes().all(|b| (b'a'..=b'y').contains(&b)) {
continue;
}
entries.insert((code.to_string(), ch.to_string()));
}
println!(
"# Auto-imported by golia-import-simplified.\n\
# Format: <code>\\t<char>\n\
# Source: Wubi 86 standard 简码 table (公开规范).\n\
# Counts: see footer."
);
let mut n2 = 0usize;
let mut n3 = 0usize;
for (code, ch) in &entries {
if code.len() == 2 {
n2 += 1;
} else {
n3 += 1;
}
println!("{code}\t{ch}");
}
eprintln!(
"[import-simplified] 2-letter: {n2}, 3-letter: {n3}, total: {}",
entries.len()
);
}