mod dict;
#[must_use]
pub fn to_persian(text: &str) -> String {
let mut out = String::with_capacity(text.len() * 2);
let mut current_word = String::new();
for c in text.chars() {
if c.is_ascii_alphabetic() || c == '\'' {
current_word.push(c);
} else {
if !current_word.is_empty() {
out.push_str(&convert_word(¤t_word));
current_word.clear();
}
out.push(c);
}
}
if !current_word.is_empty() {
out.push_str(&convert_word(¤t_word));
}
out
}
fn convert_word(word: &str) -> String {
let lower = word.to_lowercase();
if let Some(persian) = dict::lookup(&lower) {
return persian.to_owned();
}
char_level(&lower)
}
fn char_level(word: &str) -> String {
let chars: Vec<char> = word.chars().collect();
let mut out = String::new();
let mut i = 0;
while i < chars.len() {
if i + 1 < chars.len() {
let pair: String = chars[i..i + 2].iter().collect();
if let Some(persian) = digraph(&pair) {
out.push_str(persian);
i += 2;
continue;
}
}
out.push_str(monograph(chars[i], i, &chars));
i += 1;
}
out
}
fn digraph(s: &str) -> Option<&'static str> {
Some(match s {
"kh" => "خ",
"sh" => "ش",
"ch" => "چ",
"zh" => "ژ",
"gh" => "ق",
"oo" => "و",
"ou" => "و",
"aa" => "ا",
"ee" => "ی",
_ => return None,
})
}
fn monograph(c: char, position: usize, chars: &[char]) -> &'static str {
match c {
'a' if position == 0 && chars.len() > 1 => "آ",
'a' => "ا",
'b' => "ب",
'c' => "ک",
'd' => "د",
'e' if position == chars.len() - 1 => "ه", 'e' => "", 'f' => "ف",
'g' => "گ",
'h' => "ه",
'i' => "ی",
'j' => "ج",
'k' => "ک",
'l' => "ل",
'm' => "م",
'n' => "ن",
'o' => "و",
'p' => "پ",
'q' => "ق",
'r' => "ر",
's' => "س",
't' => "ت",
'u' => "و",
'v' => "و",
'w' => "و",
'x' => "خ",
'y' => "ی",
'z' => "ز",
'\'' => "ع",
_ => "",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn dict_words_take_priority() {
assert_eq!(to_persian("salam"), "سلام");
assert_eq!(to_persian("merci"), "مرسی");
}
#[test]
fn case_insensitive() {
assert_eq!(to_persian("Salam"), "سلام");
assert_eq!(to_persian("MERCI"), "مرسی");
}
#[test]
fn punctuation_passes_through() {
assert_eq!(to_persian("salam!"), "سلام!");
assert_eq!(to_persian("chetori?"), "چطوری?");
}
#[test]
fn whitespace_preserved() {
assert_eq!(to_persian("man khoobam"), "من خوبم");
}
#[test]
fn char_level_fallback() {
assert_eq!(to_persian("ketab"), "کتاب");
}
#[test]
fn digraphs_kh_sh_ch() {
assert_eq!(to_persian("khosh"), "خوش");
assert_eq!(to_persian("char"), "چار");
}
#[test]
fn empty_string() {
assert_eq!(to_persian(""), "");
}
#[test]
fn mixed_persian_passes_through() {
assert_eq!(to_persian("سلام salam"), "سلام سلام");
}
}