#![allow(clippy::unreadable_literal)]
include!(concat!(env!("OUT_DIR"), "/reverse_translit_phf.rs"));
const MAX_KEY_LEN: usize = 4;
fn reverse_lookup(key: &str, lang: &str) -> Option<&'static str> {
let table: &phf::Map<&'static str, &'static str> = match lang {
"ru" => &REVERSE_RU,
"uk" => &REVERSE_UK,
"el" => &REVERSE_EL,
_ => return None,
};
table.get(key).copied()
}
const REVERSE_LANGS: &[&str] = &["el", "ru", "uk"];
pub(crate) fn supports_reverse(lang: &str) -> bool {
REVERSE_LANGS.contains(&lang)
}
pub(crate) fn reverse_langs() -> Vec<String> {
REVERSE_LANGS.iter().map(|s| (*s).to_string()).collect()
}
pub(crate) fn reverse_transliterate_impl(text: &str, lang: &str) -> String {
let bytes = text.as_bytes();
let len = bytes.len();
let mut result = String::with_capacity(len);
let mut i = 0;
while i < len {
let remaining = len - i;
let max_try = remaining.min(MAX_KEY_LEN);
let mut matched = false;
for key_len in (1..=max_try).rev() {
if let Ok(candidate) = std::str::from_utf8(&bytes[i..i + key_len]) {
if let Some(native) = reverse_lookup(candidate, lang) {
result.push_str(native);
i += key_len;
matched = true;
break;
}
if key_len > 1 && candidate.bytes().all(|b| b.is_ascii_uppercase()) {
if let Some(native) = reverse_lookup(&candidate.to_ascii_lowercase(), lang) {
result.push_str(&native.to_uppercase());
i += key_len;
matched = true;
break;
}
}
}
}
if !matched {
if let Some(ch) = text[i..].chars().next() {
result.push(ch);
i += ch.len_utf8();
} else {
i += 1;
}
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_russian_basic() {
assert_eq!(reverse_transliterate_impl("Moskva", "ru"), "Москва");
}
#[test]
fn test_russian_digraphs() {
assert_eq!(reverse_transliterate_impl("zh", "ru"), "ж");
assert_eq!(reverse_transliterate_impl("sh", "ru"), "ш");
assert_eq!(reverse_transliterate_impl("ch", "ru"), "ч");
}
#[test]
fn test_russian_trigraphs() {
assert_eq!(reverse_transliterate_impl("shch", "ru"), "щ");
}
#[test]
fn test_passthrough() {
assert_eq!(reverse_transliterate_impl("123!", "ru"), "123!");
}
#[test]
fn test_greek_upsilon_no_latin_leak() {
assert_eq!(reverse_transliterate_impl("psychi", "el"), "ψυχη");
assert_eq!(reverse_transliterate_impl("oyzo", "el"), "ουζο");
assert_eq!(reverse_transliterate_impl("Y", "el"), "Υ");
assert_eq!(reverse_transliterate_impl("y", "el"), "υ");
for s in ["psychi", "oyzo", "ayrio", "Kypros"] {
let rev = reverse_transliterate_impl(s, "el");
assert!(
!rev.chars().any(|c| c.is_ascii_alphabetic()),
"reverse el leaked a Latin letter: {s:?} -> {rev:?}"
);
}
}
#[test]
fn test_supports_reverse() {
assert!(supports_reverse("ru"));
assert!(supports_reverse("uk"));
assert!(supports_reverse("el"));
assert!(!supports_reverse("de"));
}
}