use crate::conversion::latin::CONSONANTS;
use crate::syllable::separate;
use crate::util::{remove_acute_accent, IsLetter, SplitIntoWords};
use unicode_normalization::UnicodeNormalization;
pub fn convert_latn_to_kana(latn: &str) -> String {
fn convert_word(word: &str) -> String {
let latn = word.replace("=", "");
let latn = remove_acute_accent(&latn);
let latn = latn.to_ascii_lowercase();
let syllables = separate(&latn);
let mut result = String::new();
for syllable in syllables.iter() {
if syllable.len() == 0 {
continue;
}
let last_char = syllable.chars().last().unwrap();
let (remains, coda) = if CONSONANTS.contains(last_char) {
let (remains, coda) = syllable.split_at(syllable.len() - 1);
(remains, coda)
} else {
(syllable.as_str(), "")
};
let converted_remains = match remains {
"a" => "ア",
"i" => "イ",
"u" => "ウ",
"e" => "エ",
"o" => "オ",
"'a" => "ア",
"'i" => "イ",
"'u" => "ウ",
"'e" => "エ",
"'o" => "オ",
"’a" => "ア",
"’i" => "イ",
"’u" => "ウ",
"’e" => "エ",
"’o" => "オ",
"ka" => "カ",
"ki" => "キ",
"ku" => "ク",
"ke" => "ケ",
"ko" => "コ",
"sa" => "サ",
"si" => "シ",
"su" => "ス",
"se" => "セ",
"so" => "ソ",
"ta" => "タ",
"tu" => "ト゚",
"te" => "テ",
"to" => "ト",
"ca" => "チャ",
"ci" => "チ",
"cu" => "チュ",
"ce" => "チェ",
"co" => "チョ",
"na" => "ナ",
"ni" => "ニ",
"nu" => "ヌ",
"ne" => "ネ",
"no" => "ノ",
"ha" => "ハ",
"hi" => "ヒ",
"hu" => "フ",
"he" => "ヘ",
"ho" => "ホ",
"pa" => "パ",
"pi" => "ピ",
"pu" => "プ",
"pe" => "ペ",
"po" => "ポ",
"ma" => "マ",
"mi" => "ミ",
"mu" => "ム",
"me" => "メ",
"mo" => "モ",
"ya" => "ヤ",
"yi" => "イ",
"yu" => "ユ",
"ye" => "イェ",
"yo" => "ヨ",
"ra" => "ラ",
"ri" => "リ",
"ru" => "ル",
"re" => "レ",
"ro" => "ロ",
"wa" => "ワ",
"wi" => "ヰ",
"we" => "ヱ",
"wo" => "ヲ",
"nn" => "ン",
"tt" => "ッ",
_ => syllable,
};
result.push_str(converted_remains);
let vowel = remains.chars().last();
let converted_coda = {
match coda {
"w" => "ゥ",
"y" => "ィ",
"m" => "ㇺ",
"n" => "ㇴ",
"s" => "ㇱ",
"p" => "ㇷ゚",
"t" => "ッ",
"T" => "ㇳ",
"k" => "ㇰ",
"r" => match vowel {
Some('a') => "ㇻ",
Some('i') => "ㇼ",
Some('u') => "ㇽ",
Some('e') => "ㇾ",
Some('o') => "ㇿ",
_ => "ㇽ",
},
"h" => match vowel {
Some('a') => "ㇵ",
Some('i') => "ㇶ",
Some('u') => "ㇷ",
Some('e') => "ㇸ",
Some('o') => "ㇹ",
_ => "ㇷ",
},
"x" => match vowel {
Some('a') => "ㇵ",
Some('i') => "ㇶ",
Some('u') => "ㇷ",
Some('e') => "ㇸ",
Some('o') => "ㇹ",
_ => "ㇷ",
},
_ => coda,
}
};
result.push_str(converted_coda);
}
result
.replace('ィ', "イ")
.replace('ゥ', "ウ")
.replace("ㇴ", "ン")
.replace("ヱ", "ウェ")
.replace("ヰ", "ウィ")
.replace("ヲ", "ウォ")
.replace("’", "")
}
latn.split_into_words()
.into_iter()
.map(|word| {
if word.chars().all(|c| c.is_ainu_letter()) {
convert_word(&word)
} else {
word.to_owned()
}
})
.collect::<Vec<String>>()
.join("")
}
pub fn convert_kana_to_latn(kana: &str) -> String {
fn convert_word(word: &str) -> String {
let mut result: Vec<String> = Vec::new();
let mut chars = word
.chars()
.map(|c| match c {
'゜' | '゚' => '\u{309A}',
'゛' | '゙' => '\u{3099}',
_ => c,
})
.nfc()
.peekable();
while let Some(current_char) = chars.next() {
let next_char = chars.peek();
let converted_digraph: Option<&str> = match (current_char, next_char) {
('イ', Some('ェ')) => Some("ye"),
('ウ', Some('ェ')) => Some("we"),
('ウ', Some('ィ')) => Some("wi"),
('ウ', Some('ォ')) => Some("wo"),
('ト', Some('ゥ')) => Some("tu"),
('ㇷ', Some('゚')) => Some("p"),
('フ', Some('\u{ff9f}')) => Some("p"),
('ト', Some('゚')) => Some("tu"),
('チ', Some('ャ')) => Some("ca"),
('チ', Some('ュ')) => Some("cu"),
('チ', Some('ェ')) => Some("ce"),
('チ', Some('ョ')) => Some("co"),
('オ', Some('イ')) => Some("oy"),
('エ', Some('イ')) => Some("ey"),
('ウ', Some('イ')) => Some("uy"),
_ => None,
};
if let Some(digraph) = converted_digraph {
result.push(digraph.to_owned());
chars.next();
continue;
}
let converted = match current_char {
'ア' => Some("a"),
'イ' => Some("i"),
'ウ' => Some("u"),
'エ' => Some("e"),
'オ' => Some("o"),
'カ' => Some("ka"),
'キ' => Some("ki"),
'ク' => Some("ku"),
'ケ' => Some("ke"),
'コ' => Some("ko"),
'サ' => Some("sa"),
'シ' => Some("si"),
'ス' => Some("su"),
'セ' => Some("se"),
'ソ' => Some("so"),
'タ' => Some("ta"),
'チ' => Some("ci"),
'テ' => Some("te"),
'ト' => Some("to"),
'ナ' => Some("na"),
'ニ' => Some("ni"),
'ヌ' => Some("nu"),
'ネ' => Some("ne"),
'ノ' => Some("no"),
'ハ' => Some("ha"),
'ヒ' => Some("hi"),
'フ' => Some("hu"),
'ヘ' => Some("he"),
'ホ' => Some("ho"),
'パ' => Some("pa"),
'ピ' => Some("pi"),
'プ' => Some("pu"),
'ペ' => Some("pe"),
'ポ' => Some("po"),
'マ' => Some("ma"),
'ミ' => Some("mi"),
'ム' => Some("mu"),
'メ' => Some("me"),
'モ' => Some("mo"),
'ヤ' => Some("ya"),
'ユ' => Some("yu"),
'ヨ' => Some("yo"),
'ラ' => Some("ra"),
'リ' => Some("ri"),
'ル' => Some("ru"),
'レ' => Some("re"),
'ロ' => Some("ro"),
'ワ' => Some("wa"),
'ヲ' => Some("wo"),
'ン' => Some("n"),
'ム' => Some("m"),
'ヌ' => Some("n"),
'ウ' => Some("w"),
'イ' => Some("y"),
'フ' => Some("h"),
'シ' => Some("s"),
'ツ' => Some("t"),
'ト' => Some("t"),
'ク' => Some("k"),
'ハ' => Some("x"),
'ヒ' => Some("x"),
'ヘ' => Some("x"),
'ホ' => Some("x"),
'ア' => Some("a"),
'エ' => Some("e"),
'オ' => Some("o"),
'ラ' => Some("r"),
'リ' => Some("r"),
'ル' => Some("r"),
'レ' => Some("r"),
'ロ' => Some("r"),
'ン' => Some("n"),
'ㇺ' => Some("m"),
'ㇴ' => Some("n"),
'ゥ' => Some("w"),
'ィ' => Some("y"),
'ㇷ' => Some("h"),
'ㇱ' => Some("s"),
'ッ' => Some("t"),
'ㇳ' => Some("t"),
'ㇰ' => Some("k"),
'ㇵ' => Some("x"),
'ㇶ' => Some("x"),
'ㇸ' => Some("x"),
'ㇹ' => Some("x"),
'ァ' => Some("a"),
'ェ' => Some("e"),
'ォ' => Some("o"),
'ㇻ' => Some("r"),
'ㇼ' => Some("r"),
'ㇽ' => Some("r"),
'ㇾ' => Some("r"),
'ㇿ' => Some("r"),
'あ' => Some("a"),
'い' => Some("i"),
'う' => Some("u"),
'え' => Some("e"),
'お' => Some("o"),
'か' => Some("ka"),
'き' => Some("ki"),
'く' => Some("ku"),
'け' => Some("ke"),
'こ' => Some("ko"),
'さ' => Some("sa"),
'し' => Some("si"),
'す' => Some("su"),
'せ' => Some("se"),
'そ' => Some("so"),
'た' => Some("ta"),
'ち' => Some("ci"),
'つ' => Some("tu"),
'て' => Some("te"),
'と' => Some("to"),
'な' => Some("na"),
'に' => Some("ni"),
'ぬ' => Some("nu"),
'ね' => Some("ne"),
'の' => Some("no"),
'は' => Some("ha"),
'ひ' => Some("hi"),
'ふ' => Some("hu"),
'へ' => Some("he"),
'ほ' => Some("ho"),
'ぱ' => Some("pa"),
'ぴ' => Some("pi"),
'ぷ' => Some("pu"),
'ぺ' => Some("pe"),
'ぽ' => Some("po"),
'ま' => Some("ma"),
'み' => Some("mi"),
'む' => Some("mu"),
'め' => Some("me"),
'も' => Some("mo"),
'や' => Some("ya"),
'ゆ' => Some("yu"),
'よ' => Some("yo"),
'ら' => Some("ra"),
'り' => Some("ri"),
'る' => Some("ru"),
'れ' => Some("re"),
'ろ' => Some("ro"),
'わ' => Some("wa"),
'ゐ' => Some("wi"),
'ゑ' => Some("we"),
'を' => Some("wo"),
'ん' => Some("n"),
'っ' => Some("t"),
_ => None,
};
match converted {
Some(c) => result.push(c.to_owned()),
None => {
result.push(current_char.to_string());
}
}
}
let joined = result.join("’");
fn is_vowel(c: char) -> bool {
matches!(c, 'a' | 'e' | 'i' | 'o' | 'u')
}
let mut final_result = Vec::new();
for (i, char) in joined.chars().enumerate() {
if char == '’' {
if i > 0 && is_vowel(joined.chars().nth(i - 1).unwrap()) {
continue;
}
if i < joined.len() - 1 && !is_vowel(joined.chars().nth(i + 1).unwrap()) {
continue;
}
}
final_result.push(char);
}
final_result.iter().collect()
}
kana.split_into_words()
.into_iter()
.map(|word| {
if word.chars().all(|c| c.is_ainu_letter()) {
convert_word(&word)
} else {
word.to_owned()
.chars()
.map(|c| match c {
'。' => ". ".into(),
'「' => " \"".into(),
'」' => "\" ".into(),
'『' => " '".into(),
'』' => "' ".into(),
'!' => "! ".into(),
'?' => "? ".into(),
'、' => ", ".into(),
' ' => " ".into(),
_ => c.to_string(),
})
.collect::<Vec<String>>()
.join("")
}
})
.collect::<Vec<String>>()
.join("")
}