use super::Transliterator;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CyrillicScheme {
Gost2005,
BgnPcgn,
AlaLc,
}
static CYRILLIC_TABLE: &[(char, &str, &str, &str)] = &[
('А', "A", "A", "A"),
('а', "a", "a", "a"),
('Б', "B", "B", "B"),
('б', "b", "b", "b"),
('В', "V", "V", "V"),
('в', "v", "v", "v"),
('Г', "G", "G", "G"),
('г', "g", "g", "g"),
('Д', "D", "D", "D"),
('д', "d", "d", "d"),
('Е', "E", "Ye", "E"),
('е', "e", "ye", "e"),
('Ё', "Yo", "Yo", "Ë"),
('ё', "yo", "yo", "ë"),
('Ж', "Zh", "Zh", "Zh"),
('ж', "zh", "zh", "zh"),
('З', "Z", "Z", "Z"),
('з', "z", "z", "z"),
('И', "I", "I", "I"),
('и', "i", "i", "i"),
('Й', "J", "Y", "\u{012C}"), ('й', "j", "y", "\u{012D}"), ('К', "K", "K", "K"),
('к', "k", "k", "k"),
('Л', "L", "L", "L"),
('л', "l", "l", "l"),
('М', "M", "M", "M"),
('м', "m", "m", "m"),
('Н', "N", "N", "N"),
('н', "n", "n", "n"),
('О', "O", "O", "O"),
('о', "o", "o", "o"),
('П', "P", "P", "P"),
('п', "p", "p", "p"),
('Р', "R", "R", "R"),
('р', "r", "r", "r"),
('С', "S", "S", "S"),
('с', "s", "s", "s"),
('Т', "T", "T", "T"),
('т', "t", "t", "t"),
('У', "U", "U", "U"),
('у', "u", "u", "u"),
('Ф', "F", "F", "F"),
('ф', "f", "f", "f"),
('Х', "Kh", "Kh", "Kh"),
('х', "kh", "kh", "kh"),
('Ц', "C", "Ts", "T\u{0361}s"),
('ц', "c", "ts", "t\u{0361}s"),
('Ч', "Ch", "Ch", "Ch"),
('ч', "ch", "ch", "ch"),
('Ш', "Sh", "Sh", "Sh"),
('ш', "sh", "sh", "sh"),
('Щ', "Sch", "Shch", "Shch"),
('щ', "sch", "shch", "shch"),
('Ъ', "\u{2033}", "", "\u{02BA}"),
('ъ', "\u{2033}", "", "\u{02BA}"),
('Ы', "Y", "Y", "Y"),
('ы', "y", "y", "y"),
('Ь', "\u{2032}", "'", "\u{02B9}"),
('ь', "\u{2032}", "'", "\u{02B9}"),
('Э', "Eh", "E", "\u{0116}"),
('э', "eh", "e", "\u{0117}"),
('Ю', "Yu", "Yu", "I\u{0361}u"),
('ю', "yu", "yu", "i\u{0361}u"),
('Я', "Ya", "Ya", "I\u{0361}a"),
('я', "ya", "ya", "i\u{0361}a"),
('\u{0407}', "Yi", "Yi", "Yi"),
('\u{0457}', "yi", "yi", "yi"),
('\u{0406}', "I", "I", "I"),
('\u{0456}', "i", "i", "i"),
('\u{0404}', "Ye", "Ye", "Ie"),
('\u{0454}', "ye", "ye", "ie"),
('\u{0490}', "G", "G", "G"),
('\u{0491}', "g", "g", "g"),
];
#[derive(Debug, Clone)]
pub struct CyrillicTransliterator {
scheme: CyrillicScheme,
}
impl CyrillicTransliterator {
pub fn new(scheme: CyrillicScheme) -> Self {
Self { scheme }
}
pub fn scheme(&self) -> CyrillicScheme {
self.scheme
}
fn lookup(&self, ch: char) -> Option<&'static str> {
CYRILLIC_TABLE
.iter()
.find(|(src, ..)| *src == ch)
.map(|(_, gost, bgn, ala)| match self.scheme {
CyrillicScheme::Gost2005 => *gost,
CyrillicScheme::BgnPcgn => *bgn,
CyrillicScheme::AlaLc => *ala,
})
}
}
impl Transliterator for CyrillicTransliterator {
fn transliterate(&self, input: &str) -> String {
let mut result = String::with_capacity(input.len() * 2);
for ch in input.chars() {
if let Some(roman) = self.lookup(ch) {
result.push_str(roman);
} else {
result.push(ch);
}
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::transliteration::Transliterator;
#[test]
fn test_gost_basic() {
let t = CyrillicTransliterator::new(CyrillicScheme::Gost2005);
assert_eq!(t.transliterate("а"), "a");
assert_eq!(t.transliterate("б"), "b");
assert_eq!(t.transliterate("ш"), "sh");
assert_eq!(t.transliterate("ж"), "zh");
}
#[test]
fn test_gost_moskva() {
let t = CyrillicTransliterator::new(CyrillicScheme::Gost2005);
let r = t.transliterate("Москва");
assert!(r.to_lowercase().contains("moskva"), "got: {r}");
}
#[test]
fn test_bgn_rossiya() {
let t = CyrillicTransliterator::new(CyrillicScheme::BgnPcgn);
let r = t.transliterate("Россия");
assert!(r.to_lowercase().contains("rossiya"), "got: {r}");
}
#[test]
fn test_ala_lc_known() {
let t = CyrillicTransliterator::new(CyrillicScheme::AlaLc);
assert_eq!(t.transliterate("ё"), "ë");
assert_eq!(t.transliterate("й"), "\u{012D}");
}
#[test]
fn test_uppercase_preservation() {
let t = CyrillicTransliterator::new(CyrillicScheme::BgnPcgn);
let r = t.transliterate("А");
assert_eq!(r, "A");
}
#[test]
fn test_passthrough_latin() {
let t = CyrillicTransliterator::new(CyrillicScheme::Gost2005);
assert_eq!(t.transliterate("Hello"), "Hello");
}
#[test]
fn test_mixed_cyrillic_latin() {
let t = CyrillicTransliterator::new(CyrillicScheme::BgnPcgn);
let r = t.transliterate("Москва (Moscow)");
assert!(r.contains("oskva"), "got: {r}");
assert!(r.contains("Moscow"), "got: {r}");
}
#[test]
fn test_hard_sign_bgn_empty() {
let t = CyrillicTransliterator::new(CyrillicScheme::BgnPcgn);
assert_eq!(t.transliterate("ъ"), "");
}
#[test]
fn test_soft_sign_bgn_apostrophe() {
let t = CyrillicTransliterator::new(CyrillicScheme::BgnPcgn);
assert_eq!(t.transliterate("ь"), "'");
}
}