#[must_use]
pub(super) fn has_jongseong(ch: char) -> bool {
let code = ch as u32;
if (0xAC00..=0xD7A3).contains(&code) {
(code - 0xAC00) % 28 != 0
} else {
false
}
}
#[must_use]
pub(super) fn remove_jongseong_rieul(ch: char) -> Option<char> {
let code = ch as u32;
if (0xAC00..=0xD7A3).contains(&code) {
let jongseong = (code - 0xAC00) % 28;
if jongseong == 8 {
let new_code = code - 8;
char::from_u32(new_code)
} else {
None
}
} else {
None
}
}
#[must_use]
pub(super) fn remove_jongseong_nieun(ch: char) -> Option<char> {
let code = ch as u32;
if (0xAC00..=0xD7A3).contains(&code) {
let jongseong = (code - 0xAC00) % 28;
if jongseong == 4 {
let new_code = code - 4;
char::from_u32(new_code)
} else {
None
}
} else {
None
}
}
#[must_use]
pub(super) fn remove_jongseong_bieup(ch: char) -> Option<char> {
let code = ch as u32;
if (0xAC00..=0xD7A3).contains(&code) {
let jongseong = (code - 0xAC00) % 28;
if jongseong == 17 {
let new_code = code - 17;
char::from_u32(new_code)
} else {
None
}
} else {
None
}
}
#[must_use]
pub(super) fn extract_vowel(ch: char) -> char {
let code = ch as u32;
if (0xAC00..=0xD7A3).contains(&code) {
let vowel_idx = ((code - 0xAC00) / 28) % 21;
let vowels = [
'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ',
'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ',
];
vowels[vowel_idx as usize]
} else {
ch
}
}
#[must_use]
pub(super) fn normalize_jamo(text: &str) -> String {
let jongseong_to_compat: [(char, char); 27] = [
('ᆨ', 'ㄱ'),
('ᆩ', 'ㄲ'),
('ᆪ', 'ㄳ'),
('ᆫ', 'ㄴ'),
('ᆬ', 'ㄵ'),
('ᆭ', 'ㄶ'),
('ᆮ', 'ㄷ'),
('ᆯ', 'ㄹ'),
('ᆰ', 'ㄺ'),
('ᆱ', 'ㄻ'),
('ᆲ', 'ㄼ'),
('ᆳ', 'ㄽ'),
('ᆴ', 'ㄾ'),
('ᆵ', 'ㄿ'),
('ᆶ', 'ㅀ'),
('ᆷ', 'ㅁ'),
('ᆸ', 'ㅂ'),
('ᆹ', 'ㅄ'),
('ᆺ', 'ㅅ'),
('ᆻ', 'ㅆ'),
('ᆼ', 'ㅇ'),
('ᆽ', 'ㅈ'),
('ᆾ', 'ㅊ'),
('ᆿ', 'ㅋ'),
('ᇀ', 'ㅌ'),
('ᇁ', 'ㅍ'),
('ᇂ', 'ㅎ'),
];
let map: std::collections::HashMap<char, char> = jongseong_to_compat.into_iter().collect();
text.chars().map(|c| *map.get(&c).unwrap_or(&c)).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_has_jongseong_with_batchim() {
assert!(has_jongseong('밥'));
assert!(has_jongseong('닭'));
assert!(has_jongseong('한'));
}
#[test]
fn test_has_jongseong_without_batchim() {
assert!(!has_jongseong('가'));
assert!(!has_jongseong('나'));
assert!(!has_jongseong('보'));
assert!(!has_jongseong('a'));
}
#[test]
fn test_remove_jongseong_rieul_removes_rieul() {
assert_eq!(remove_jongseong_rieul('할'), Some('하'));
assert_eq!(remove_jongseong_rieul('갈'), Some('가'));
assert_eq!(remove_jongseong_rieul('볼'), Some('보'));
}
#[test]
fn test_remove_jongseong_rieul_returns_none_for_other_jongseong() {
assert_eq!(remove_jongseong_rieul('가'), None);
assert_eq!(remove_jongseong_rieul('밥'), None); assert_eq!(remove_jongseong_rieul('한'), None); }
#[test]
fn test_normalize_jamo_converts_jongseong_to_compat() {
let jongseong_rieul = '\u{11AF}';
assert_eq!(normalize_jamo(&jongseong_rieul.to_string()), "ㄹ");
let jongseong_nieun = '\u{11AB}';
assert_eq!(normalize_jamo(&jongseong_nieun.to_string()), "ㄴ");
}
#[test]
fn test_normalize_jamo_passes_through_regular_syllables() {
assert_eq!(normalize_jamo("가나다"), "가나다");
assert_eq!(normalize_jamo("hello"), "hello");
}
}