use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
const FROM_X_CI: &[&str] = &[
"cx", "gx", "hx", "jx", "sx", "ux",
];
const FROM_UTF8: &[&str] = &[
"ĉ", "ĝ", "ĥ", "ĵ", "ŝ", "ŭ",
"Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ",
];
const FROM_H_CI: &[&str] = &[
"komenchor", "kuracherb", "potenchav", "prononchelp", "senchav",
"pruchelp", "drogherb", "flughaven", "longhar",
"lesvigholstini", "vanghar", "gajhumor", "amashisteri",
"tobushaltej", "bushaltej", "ashund", "dishak",
"disharmoni", "dishelig", "dishirtig", "fikshejm", "grashav",
"grashepata", "invershav", "kashal", "misharmoni", "mishelp",
"mishumor", "neinvershav", "plushor", "sekshontem", "seshektar",
"seshor", "sukceshav",
"blankaurs", "doganauni", "ropauni", "grandaursin",
"imaginaraunu", "kakauj", "malgrandaursin", "matricaunu",
"naur", "praul", "saudaarabuj", "tiaul", "traurb", "unuaul",
"ch", "gh", "hh", "jh", "sh",
"au",
];
pub fn utf8_to_x_system(s: &str) -> String {
let ac = AhoCorasick::new(FROM_UTF8);
let mut result = String::new();
ac.replace_all_with(s, &mut result, |_, found, dst| {
dst.push_str(match found {
"ĉ" => "cx",
"ĝ" => "gx",
"ĥ" => "hx",
"ĵ" => "jx",
"ŝ" => "sx",
"ŭ" => "ux",
"Ĉ" => "CX",
"Ĝ" => "GX",
"Ĥ" => "HX",
"Ĵ" => "JX",
"Ŝ" => "SX",
"Ŭ" => "UX",
_ => found,
});
true
});
result
}
pub fn utf8_to_h_system(s: &str) -> String {
let ac = AhoCorasick::new(FROM_UTF8);
let mut result = String::new();
ac.replace_all_with(s, &mut result, |_, found, dst| {
dst.push_str(match found {
"ĉ" => "ch",
"ĝ" => "gh",
"ĥ" => "hh",
"ĵ" => "jh",
"ŝ" => "sh",
"ŭ" => "u",
"Ĉ" => "CH",
"Ĝ" => "GH",
"Ĥ" => "HH",
"Ĵ" => "JH",
"Ŝ" => "SH",
"Ŭ" => "U",
_ => found,
});
true
});
result
}
pub fn x_system_to_utf8(s: &str) -> String {
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(FROM_X_CI);
let mut result = String::new();
ac.replace_all_with(s, &mut result, |_, found, dst| {
dst.push_str(match found {
"cx" => "ĉ",
"gx" => "ĝ",
"hx" => "ĥ",
"jx" => "ĵ",
"sx" => "ŝ",
"ux" => "ŭ",
"CX" | "Cx" | "cX" => "Ĉ",
"GX" | "Gx" | "gX" => "Ĝ",
"HX" | "Hx" | "hX" => "Ĥ",
"JX" | "Jx" | "jX" => "Ĵ",
"SX" | "Sx" | "sX" => "Ŝ",
"UX" | "Ux" | "uX" => "Ŭ",
_ => found,
});
true
});
result
}
pub fn h_system_to_utf8(s: &str) -> String {
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.match_kind(MatchKind::LeftmostLongest)
.build(FROM_H_CI);
let mut result = String::new();
ac.replace_all_with(s, &mut result, |_, found, dst| {
dst.push_str(match found {
"ch" => "ĉ",
"gh" => "ĝ",
"hh" => "ĥ",
"jh" => "ĵ",
"sh" => "ŝ",
"au" => "aŭ",
"CH" | "Ch" | "cH" => "Ĉ",
"GH" | "Gh" | "gH" => "Ĝ",
"HH" | "Hh" | "hH" => "Ĥ",
"JH" | "Jh" | "jH" => "Ĵ",
"SH" | "Sh" | "sH" => "Ŝ",
"AU" => "AŬ",
"Au" => "Aŭ",
"aU" => "aŬ",
_ => found,
});
true
});
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_x_system_to_utf8_noop() {
let input = "The quick brown fox jumps over the lazy dog. And my axe.".to_owned();
assert_eq!(input, x_system_to_utf8(&input));
}
#[test]
fn test_x_system_to_utf8_echo_change() {
let input = "ehxosxangxo cxiujxauxde EHXOSXANGXO CXIUJXAUXDE";
let expected = "eĥoŝanĝo ĉiuĵaŭde EĤOŜANĜO ĈIUĴAŬDE";
assert_eq!(&x_system_to_utf8(input), expected);
}
#[test]
fn test_x_system_to_utf8_mixed_case() {
let input = "eHxoSxanGxo CxiuJxaUxde ehXosXangXo cXiujXauXde";
let expected = "eĤoŜanĜo ĈiuĴaŬde eĤoŜanĜo ĈiuĴaŬde";
assert_eq!(&x_system_to_utf8(input), expected);
}
#[test]
fn test_utf8_to_x_system_noop() {
let input = "The quick brown fox jumps over the lazy dog. And my axe.".to_owned();
assert_eq!(input, utf8_to_x_system(&input));
}
#[test]
fn test_utf8_to_x_system_echo_change() {
let input = "eĥoŝanĝo ĉiuĵaŭde EĤOŜANĜO ĈIUĴAŬDE";
let expected = "ehxosxangxo cxiujxauxde EHXOSXANGXO CXIUJXAUXDE";
assert_eq!(&utf8_to_x_system(input), expected);
}
#[test]
fn test_utf8_to_h_system_noop() {
let input = "The quick brown fox jumps over the lazy dog. And my axe.".to_owned();
assert_eq!(input, utf8_to_h_system(&input));
}
#[test]
fn test_utf8_to_h_system_echo_change() {
let input = "eĥoŝanĝo ĉiuĵaŭde EĤOŜANĜO ĈIUĴAŬDE";
let expected = "ehhoshangho chiujhaude EHHOSHANGHO CHIUJHAUDE";
assert_eq!(&utf8_to_h_system(input), expected);
}
#[test]
fn test_h_system_to_utf8_noop() {
let input = "The quick brown fox jumps over the lazy dog. And my axe.".to_owned();
assert_eq!(input, h_system_to_utf8(&input));
}
#[test]
fn test_h_system_to_utf8_echo_change() {
let input = "ehhoshangho chiujhaude EHHOSHANGHO CHIUJHAUDE";
let expected = "eĥoŝanĝo ĉiuĵaŭde EĤOŜANĜO ĈIUĴAŬDE";
assert_eq!(&h_system_to_utf8(input), expected);
}
#[test]
fn test_h_system_to_utf8_mixed_case() {
let input = "eHhoShanGho ChiuJhAUde ehHosHangHo cHiujHaUde";
let expected = "eĤoŜanĜo ĈiuĴAŬde eĤoŜanĜo ĈiuĴaŬde";
assert_eq!(&h_system_to_utf8(input), expected);
}
#[test]
fn test_h_system_ambiguous_h() {
let input = "Chiuj estas senchavaj ideoj.";
let expected = "Ĉiuj estas senchavaj ideoj.";
assert_eq!(&h_system_to_utf8(input), expected);
}
#[test]
fn test_h_system_ambiguous_u() {
let input = "Hierau mi vizitis Nauron.";
let expected = "Hieraŭ mi vizitis Nauron.";
assert_eq!(&h_system_to_utf8(input), expected);
}
}