mod generated;
#[cfg(target_arch = "wasm32")]
mod wasm;
pub fn lookup_key(text: &str) -> Option<&'static str> {
if text.is_empty() {
return None;
}
if let Some(key) = generated::INDEX.get(text) {
return Some(key);
}
let lower = text.to_lowercase();
if let Some(key) = generated::INDEX.get(lower.as_str()) {
return Some(key);
}
for token in lower.split_whitespace() {
if let Some(key) = generated::INDEX.get(token) {
return Some(key);
}
}
None
}
pub fn lookup_all(text: &str) -> Option<(&'static str, &'static [&'static str])> {
let key = lookup_key(text)?;
let variants = generated::VARIANTS.get(key)?;
Some((key, variants))
}
pub fn get_cluster_info(canonical_key: &str) -> Option<(&'static str, &'static [&'static str])> {
let language = generated::LANGUAGE.get(canonical_key)?;
let forms = generated::VARIANTS.get(canonical_key)?;
Some((language, forms))
}
pub fn lookup_candidates(text: &str) -> Vec<&'static str> {
if text.is_empty() {
return Vec::new();
}
let mut seen: std::collections::HashSet<&'static str> = std::collections::HashSet::new();
let mut result: Vec<&'static str> = Vec::new();
let mut collect = |lookup_key: &str| {
if let Some(cands) = generated::CANDIDATES.get(lookup_key) {
for &c in *cands {
if seen.insert(c) {
result.push(c);
}
}
}
};
let key = text.trim();
collect(key);
let key_lower = key.to_lowercase();
if key_lower != key {
collect(key_lower.as_str());
}
for token in key_lower.split_whitespace() {
collect(token);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn chan_and_chen_same_key() {
assert_eq!(lookup_key("chan"), lookup_key("chen"));
}
#[test]
fn chan_resolves_to_simplified_chinese() {
assert_eq!(lookup_key("chan"), Some("陈"));
}
#[test]
fn hui_and_xu_same_key() {
assert_eq!(lookup_key("hui"), lookup_key("xu"));
}
#[test]
fn wang_and_wong_same_key() {
assert_eq!(lookup_key("wang"), lookup_key("wong"));
}
#[test]
fn park_and_bak_same_key() {
assert_eq!(lookup_key("park"), lookup_key("bak"));
}
#[test]
fn lee_and_rhee_same_key() {
assert_eq!(lookup_key("yi"), lookup_key("rhee"));
}
#[test]
fn muhammad_and_mohammed_same_key() {
assert_eq!(lookup_key("muhammad"), lookup_key("mohammed"));
}
#[test]
fn ivanov_and_ivanoff_same_key() {
assert_eq!(lookup_key("ivanov"), lookup_key("ivanoff"));
}
#[test]
fn uppercase_input_matches() {
assert_eq!(lookup_key("CHAN"), Some("陈"));
}
#[test]
fn mixed_case_input_matches() {
assert_eq!(lookup_key("Chan"), Some("陈"));
}
#[test]
fn full_name_resolves_via_token_split() {
assert_eq!(lookup_key("Chan Wai Ming"), Some("陈"));
}
#[test]
fn korean_full_name_resolves() {
assert_eq!(lookup_key("Park Ji-sung"), lookup_key("park"));
}
#[test]
fn unknown_returns_none() {
assert_eq!(lookup_key("Smith"), None);
assert_eq!(lookup_key("Kowalski"), None);
assert_eq!(lookup_key(""), None);
}
#[test]
fn all_unknown_tokens_returns_none() {
assert_eq!(lookup_key("Kowalski Smith"), None);
}
#[test]
fn lookup_all_chan_returns_variants() {
let (key, variants) = lookup_all("Chan").unwrap();
assert_eq!(key, "陈");
assert!(variants.contains(&"chen"));
assert!(variants.contains(&"陳"));
}
#[test]
fn lookup_all_unknown_returns_none() {
assert!(lookup_all("Smith").is_none());
}
#[test]
fn lookup_candidates_lee_returns_multiple_scripts() {
let result = lookup_candidates("Lee");
assert!(result.contains(&"李"), "should contain Chinese 李");
assert!(result.contains(&"이"), "should contain Korean 이");
}
#[test]
fn lookup_candidates_unknown_returns_empty() {
assert!(lookup_candidates("Smith").is_empty());
assert!(lookup_candidates("").is_empty());
}
#[test]
fn lookup_candidates_unambiguous_returns_one() {
let result = lookup_candidates("Nguyen");
assert!(!result.is_empty());
}
}