use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec::Vec;
static BUILTIN_ROMANIZATION: &str = include_str!("../data/romanization_th.tsv");
pub struct RomanizationMap(BTreeMap<String, String>);
impl RomanizationMap {
pub fn builtin() -> Self {
Self::from_tsv(BUILTIN_ROMANIZATION)
}
pub fn from_tsv(data: &str) -> Self {
let mut map: BTreeMap<String, String> = BTreeMap::new();
for line in data.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let mut parts = line.splitn(2, '\t');
let word = match parts.next() {
Some(w) if !w.is_empty() => String::from(w),
_ => continue,
};
let roman = match parts.next() {
Some(r) if !r.is_empty() => String::from(r.trim()),
_ => continue,
};
map.insert(word, roman);
}
RomanizationMap(map)
}
pub fn romanize(&self, word: &str) -> Option<&str> {
self.0.get(word).map(String::as_str)
}
pub fn romanize_or_raw<'a>(&'a self, word: &'a str) -> &'a str {
self.0.get(word).map(String::as_str).unwrap_or(word)
}
pub fn romanize_tokens(&self, tokens: &[&str]) -> Vec<String> {
tokens
.iter()
.map(|t| String::from(self.romanize_or_raw(t)))
.collect()
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn builtin_common_words() {
let map = RomanizationMap::builtin();
assert_eq!(map.romanize("กิน"), Some("kin"));
assert_eq!(map.romanize("ข้าว"), Some("khao"));
assert_eq!(map.romanize("น้ำ"), Some("nam"));
assert_eq!(map.romanize("ปลา"), Some("pla"));
}
#[test]
fn unknown_word_returns_none() {
let map = RomanizationMap::builtin();
assert_eq!(map.romanize("เปปซี่"), None);
}
#[test]
fn romanize_or_raw_fallback() {
let map = RomanizationMap::builtin();
assert_eq!(map.romanize_or_raw("เปปซี่"), "เปปซี่");
}
#[test]
fn romanize_or_raw_hit() {
let map = RomanizationMap::builtin();
assert_eq!(map.romanize_or_raw("กิน"), "kin");
}
#[test]
fn from_tsv_last_duplicate_wins() {
let map = RomanizationMap::from_tsv("กิน\tkin\nกิน\tgin\n");
assert_eq!(map.romanize("กิน"), Some("gin"));
}
#[test]
fn romanize_tokens_aligned() {
let map = RomanizationMap::from_tsv("กิน\tkin\nปลา\tpla\n");
let out = map.romanize_tokens(&["กิน", "ปลา"]);
assert_eq!(out, vec!["kin", "pla"]);
}
#[test]
fn romanize_tokens_unknown_passthrough() {
let map = RomanizationMap::from_tsv("กิน\tkin\n");
let out = map.romanize_tokens(&["กิน", "xyz"]);
assert_eq!(out, vec!["kin", "xyz"]);
}
#[test]
fn comment_and_blank_lines_skipped() {
let map = RomanizationMap::from_tsv("# comment\n\nกิน\tkin\n");
assert_eq!(map.len(), 1);
assert_eq!(map.romanize("กิน"), Some("kin"));
}
#[test]
fn line_without_tab_skipped() {
let map = RomanizationMap::from_tsv("กิน\n");
assert!(map.is_empty());
}
#[test]
fn whitespace_trimmed_from_romanization() {
let map = RomanizationMap::from_tsv("กิน\t kin \n");
assert_eq!(map.romanize("กิน"), Some("kin"));
}
#[test]
fn empty_input_produces_empty_map() {
assert!(RomanizationMap::from_tsv("").is_empty());
}
#[test]
fn romanize_tokens_empty_slice() {
let map = RomanizationMap::builtin();
assert!(map.romanize_tokens(&[]).is_empty());
}
}