pub trait HangulDissassemble<T: Iterator<Item = char>> {
fn disassemble(self) -> Disassemble<T>;
}
pub struct Disassemble<T>
where
T: Iterator<Item = char>,
{
input: T,
buffer: Vec<char>,
}
impl<'a, T: Iterator<Item = char> + 'a> Iterator for Disassemble<T> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if !self.buffer.is_empty() {
Some(self.buffer.remove(0))
} else if let Some(letter) = self.input.next() {
Some(decompose(letter, &mut self.buffer).unwrap_or(letter))
} else {
None
}
}
}
impl<'a> HangulDissassemble<std::str::Chars<'a>> for &'a str {
fn disassemble(self) -> Disassemble<std::str::Chars<'a>> {
Disassemble {
input: self.chars(),
buffer: Vec::new(),
}
}
}
impl<'a> HangulDissassemble<std::str::Chars<'a>> for &'a String {
fn disassemble(self) -> Disassemble<std::str::Chars<'a>> {
Disassemble {
input: self.chars(),
buffer: Vec::new(),
}
}
}
impl<'a, T: Iterator<Item = char>> HangulDissassemble<T> for T {
fn disassemble(self) -> Disassemble<T> {
Disassemble {
input: self,
buffer: Vec::new(),
}
}
}
pub trait HangulAssemble {
fn assemble(self) -> String;
}
#[derive(Clone, Copy)]
enum CombineStage {
None,
ChosungOnly(char),
JungsungOnly(char),
ChosungJungsung(char, char),
NonCombinedFull(char, char, char),
CombinedFull {
cho: char,
jung: char,
jong: char,
secondary_jong: char,
combined_jong: char,
},
}
impl<T: Iterator<Item = char>> HangulAssemble for T {
fn assemble(self) -> String {
let mut stage = CombineStage::None;
let mut output = String::new();
for letter in self {
match (stage, letter) {
(CombineStage::None, 'ㄱ'..='ㅎ') if !is_combined(letter) => {
stage = CombineStage::ChosungOnly(letter);
}
(CombineStage::None, 'ㅏ'..='ㅣ') => {
stage = CombineStage::JungsungOnly(letter);
}
(CombineStage::None, _) => {
output.push(letter);
}
(CombineStage::ChosungOnly(prev_cho), 'ㄱ'..='ㅎ') if !is_combined(letter) => {
stage = CombineStage::ChosungOnly(letter);
output.push(prev_cho);
}
(CombineStage::ChosungOnly(prev_cho), 'ㅏ'..='ㅣ') => {
stage = CombineStage::ChosungJungsung(prev_cho, letter);
}
(CombineStage::ChosungOnly(prev_cho), _) => {
stage = CombineStage::None;
output.push(prev_cho);
output.push(letter);
}
(CombineStage::JungsungOnly(prev_jung), 'ㄱ'..='ㅎ') if !is_combined(letter) => {
stage = CombineStage::ChosungOnly(letter);
output.push(prev_jung);
}
(CombineStage::JungsungOnly(prev_jung), 'ㅏ'..='ㅣ') => {
if let Some(combined) = combine_jungsung(prev_jung, letter) {
stage = CombineStage::JungsungOnly(combined);
} else {
stage = CombineStage::JungsungOnly(letter);
output.push(prev_jung);
}
}
(CombineStage::JungsungOnly(prev_jung), _) => {
stage = CombineStage::None;
output.push(prev_jung);
output.push(letter);
}
(CombineStage::ChosungJungsung(cho, jung), 'ㄱ'..='ㅎ') if !is_combined(letter) => {
if is_jongsung(letter) {
stage = CombineStage::NonCombinedFull(cho, jung, letter);
} else {
stage = CombineStage::ChosungOnly(letter);
output.push(build_hangul(cho, jung, None));
}
}
(CombineStage::ChosungJungsung(cho, jung), 'ㅏ'..='ㅣ') => {
if let Some(combined) = combine_jungsung(jung, letter) {
stage = CombineStage::ChosungJungsung(cho, combined);
} else {
stage = CombineStage::JungsungOnly(letter);
output.push(build_hangul(cho, jung, None));
}
}
(CombineStage::ChosungJungsung(cho, jung), _) => {
stage = CombineStage::None;
output.push(build_hangul(cho, jung, None));
output.push(letter);
}
(CombineStage::NonCombinedFull(cho, jung, jong), 'ㄱ'..='ㅎ')
if !is_combined(letter) =>
{
if let Some(combined) = combine_jongsung(jong, letter) {
stage = CombineStage::CombinedFull {
cho,
jung,
jong,
secondary_jong: letter,
combined_jong: combined,
};
} else {
stage = CombineStage::ChosungOnly(letter);
output.push(build_hangul(cho, jung, Some(jong)));
}
}
(CombineStage::NonCombinedFull(cho, jung, jong), 'ㅏ'..='ㅣ') => {
stage = CombineStage::ChosungJungsung(jong, letter);
output.push(build_hangul(cho, jung, None));
}
(CombineStage::NonCombinedFull(cho, jung, jong), _) => {
stage = CombineStage::None;
output.push(build_hangul(cho, jung, Some(jong)));
output.push(letter);
}
(
CombineStage::CombinedFull {
cho,
jung,
combined_jong,
..
},
'ㄱ'..='ㅎ',
) if !is_combined(letter) => {
stage = CombineStage::ChosungOnly(letter);
output.push(build_hangul(cho, jung, Some(combined_jong)));
}
(
CombineStage::CombinedFull {
cho,
jung,
jong,
secondary_jong,
..
},
'ㅏ'..='ㅣ',
) => {
stage = CombineStage::ChosungJungsung(secondary_jong, letter);
output.push(build_hangul(cho, jung, Some(jong)));
}
(
CombineStage::CombinedFull {
cho,
jung,
combined_jong,
..
},
_,
) => {
stage = CombineStage::None;
output.push(build_hangul(cho, jung, Some(combined_jong)));
output.push(letter);
}
}
}
match stage {
CombineStage::None => {}
CombineStage::ChosungOnly(cho) => {
output.push(cho);
}
CombineStage::JungsungOnly(jung) => {
output.push(jung);
}
CombineStage::ChosungJungsung(cho, jung) => {
output.push(build_hangul(cho, jung, None));
}
CombineStage::NonCombinedFull(cho, jung, jong) => {
output.push(build_hangul(cho, jung, Some(jong)));
}
CombineStage::CombinedFull {
cho,
jung,
combined_jong,
..
} => {
output.push(build_hangul(cho, jung, Some(combined_jong)));
}
}
output
}
}
#[derive(Debug)]
pub struct Hangul(u32, u32, u32);
impl Hangul {
pub fn new(cho: char, jung: char, jong: Option<char>) -> Option<Self> {
let cho = try_calculate_chosung_index(cho)?;
let jong = try_calculate_jongsung_index(jong)?;
if ('ㅏ'..='ㅣ').contains(&jung) {
Some(Hangul(cho, jung as u32 - 0x314F, jong))
} else {
None
}
}
pub fn parts(&self) -> (char, char, Option<char>) {
(
HANGUL_CHOSUNG_TO_COMPATIBILITY[self.0 as usize],
HANGUL_JUNGSUNG_TO_COMPATIBILITY[self.1 as usize],
if self.2 == 0 {
None
} else {
Some(HANGUL_JONGSUNG_TO_COMPATIBILITY[self.2 as usize - 1])
}
)
}
pub fn set_cho(&mut self, cho: char) -> Result<(), char> {
self.0 = try_calculate_chosung_index(cho).ok_or(cho)?;
Ok(())
}
pub fn set_jung(&mut self, jung: char) -> Result<(), char> {
if ('ㅏ'..='ㅣ').contains(&jung) {
self.1 = jung as u32 - 0x314F;
Ok(())
} else {
Err(jung)
}
}
pub fn set_jong(&mut self, jong: Option<char>) -> Result<(), char> {
if let Some(jong) = try_calculate_jongsung_index(jong) {
self.1 = jong;
Ok(())
} else {
Err(jong.unwrap())
}
}
}
impl std::convert::TryFrom<char> for Hangul {
type Error = char;
fn try_from(letter: char) -> Result<Self, char> {
match letter {
'가'..='힣' => {
let mut code = letter as u32 - 0xAC00;
let jongsung = code % 28;
code /= 28;
let jungsung = code % 21;
code /= 21;
let chosung = code;
Ok(Hangul(
chosung,
jungsung,
jongsung,
))
}
_ => Err(letter),
}
}
}
impl From<Hangul> for char {
fn from(hangul: Hangul) -> char {
std::char::from_u32(0xAC00 + (hangul.0 * 21 + hangul.1) * 28 + hangul.2).unwrap()
}
}
const HANGUL_CHOSUNG_TO_COMPATIBILITY: [char; 19] = [
'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ',
'ㅌ', 'ㅍ', 'ㅎ',
];
const HANGUL_JUNGSUNG_TO_COMPATIBILITY: [char; 21] = [
'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ',
'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ',
];
const HANGUL_JONGSUNG_TO_COMPATIBILITY: [char; 27] = [
'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ',
'ㅂ', 'ㅄ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ',
];
fn break_jungsung(letter: char) -> Option<[char; 2]> {
match letter {
'ㅘ' => Some(['ㅗ', 'ㅏ']),
'ㅙ' => Some(['ㅗ', 'ㅐ']),
'ㅚ' => Some(['ㅗ', 'ㅣ']),
'ㅝ' => Some(['ㅜ', 'ㅓ']),
'ㅞ' => Some(['ㅜ', 'ㅔ']),
'ㅟ' => Some(['ㅜ', 'ㅣ']),
'ㅢ' => Some(['ㅡ', 'ㅣ']),
_ => None,
}
}
fn break_jongsung(letter: char) -> Option<[char; 2]> {
match letter {
'ㄳ' => Some(['ㄱ', 'ㅅ']),
'ㄵ' => Some(['ㄴ', 'ㅈ']),
'ㄶ' => Some(['ㄴ', 'ㅎ']),
'ㄺ' => Some(['ㄹ', 'ㄱ']),
'ㄻ' => Some(['ㄹ', 'ㅁ']),
'ㄼ' => Some(['ㄹ', 'ㅂ']),
'ㄽ' => Some(['ㄹ', 'ㅅ']),
'ㄾ' => Some(['ㄹ', 'ㅌ']),
'ㄿ' => Some(['ㄹ', 'ㅍ']),
'ㅀ' => Some(['ㄹ', 'ㅎ']),
'ㅄ' => Some(['ㅂ', 'ㅅ']),
_ => None,
}
}
fn decompose(c: char, buffer: &mut Vec<char>) -> Option<char> {
let letter = c as usize;
match letter {
0x3131..=0x314E => {
if let Some(parts) = break_jongsung(c) {
buffer.push(parts[1]);
Some(parts[0])
} else {
Some(c)
}
}
0x314F..=0x3163 => {
if let Some(parts) = break_jungsung(c) {
buffer.push(parts[1]);
Some(parts[0])
} else {
Some(c)
}
}
0xAC00..=0xD7A3 => {
let mut code = letter - 0xAC00;
let jongsung = code % 28;
code /= 28;
let jungsung = code % 21;
code /= 21;
let chosung = code;
{
let jungsung = HANGUL_JUNGSUNG_TO_COMPATIBILITY[jungsung];
if let Some(parts) = break_jungsung(jungsung) {
buffer.extend_from_slice(&parts);
} else {
buffer.push(jungsung);
}
}
if jongsung > 0 {
let jongsung = HANGUL_JONGSUNG_TO_COMPATIBILITY[jongsung - 1];
if let Some(parts) = break_jongsung(jongsung) {
buffer.extend_from_slice(&parts);
} else {
buffer.push(jongsung);
}
}
Some(HANGUL_CHOSUNG_TO_COMPATIBILITY[chosung])
}
_ => None,
}
}
fn build_hangul(cho: char, jung: char, jong: Option<char>) -> char {
Hangul::new(cho, jung, jong).unwrap().into()
}
fn combine_jungsung(a: char, b: char) -> Option<char> {
match (a, b) {
('ㅗ', 'ㅏ') => Some('ㅘ'),
('ㅗ', 'ㅐ') => Some('ㅙ'),
('ㅗ', 'ㅣ') => Some('ㅚ'),
('ㅜ', 'ㅓ') => Some('ㅝ'),
('ㅜ', 'ㅔ') => Some('ㅞ'),
('ㅜ', 'ㅣ') => Some('ㅟ'),
('ㅡ', 'ㅣ') => Some('ㅢ'),
_ => None,
}
}
fn combine_jongsung(a: char, b: char) -> Option<char> {
match (a, b) {
('ㄱ', 'ㅅ') => Some('ㄳ'),
('ㄴ', 'ㅈ') => Some('ㄵ'),
('ㄴ', 'ㅎ') => Some('ㄶ'),
('ㄹ', 'ㄱ') => Some('ㄺ'),
('ㄹ', 'ㅁ') => Some('ㄻ'),
('ㄹ', 'ㅂ') => Some('ㄼ'),
('ㄹ', 'ㅅ') => Some('ㄽ'),
('ㄹ', 'ㅌ') => Some('ㄾ'),
('ㄹ', 'ㅎ') => Some('ㅀ'),
('ㅂ', 'ㅅ') => Some('ㅄ'),
_ => None,
}
}
fn is_combined(a: char) -> bool {
match a {
'ㄳ' | 'ㄵ' | 'ㄶ' | 'ㄺ' | 'ㄻ' | 'ㄼ' | 'ㄽ' | 'ㄾ' | 'ㄿ' | 'ㅀ' | 'ㅄ' => {
true
}
_ => false,
}
}
fn is_jongsung(a: char) -> bool {
match a {
'ㄸ' | 'ㅃ' | 'ㅉ' => false,
'ㄱ'..='ㅎ' => true,
_ => false,
}
}
fn try_calculate_chosung_index(cho: char) -> Option<u32> {
let index = cho as u32;
if index < 0x3131 {
None
} else {
let index = index - 0x3131;
match cho {
'ㄱ'..='ㄲ' => Some(index),
'ㄴ' => Some(index - 1),
'ㄷ'..='ㄹ' => Some(index - 3),
'ㅁ'..='ㅃ' => Some(index - 10),
'ㅅ'..='ㅎ' => Some(index - 11),
_ => None,
}
}
}
fn try_calculate_jongsung_index(jong: Option<char>) -> Option<u32> {
if let Some(jong) = jong {
let index = jong as u32;
if index < 0x3131 {
None
} else {
let index = index - 0x3131 + 1;
match jong {
'ㄱ'..='ㄷ' => Some(index),
'ㄹ'..='ㅂ' => Some(index - 1),
'ㅄ'..='ㅈ' => Some(index - 2),
'ㅊ'..='ㅎ' => Some(index - 3),
_ => None,
}
}
} else {
Some(0)
}
}
#[cfg(test)]
mod test {
use super::*;
use proptest::*;
#[test]
fn disassemble_no_jongsung() {
assert_eq!("ㅇㅣㅅㅏ", "이사".disassemble().collect::<String>());
}
#[test]
fn disassemble_jongsung() {
assert_eq!("ㅇㅣㅅㅏㅇ", "이상".disassemble().collect::<String>());
}
#[test]
fn disassemble_compound_jongsung() {
assert_eq!("ㅇㅏㄴㅈㄷㅏ", "앉다".disassemble().collect::<String>());
}
#[test]
fn disassemble_compound_jungsung() {
assert_eq!("ㅊㅏㅁㅇㅗㅣ", "참외".disassemble().collect::<String>());
}
#[test]
fn disassmble_consonant() {
assert_eq!(
"ㄱㅅㄴㅈㄴㅎㄹㄱㄹㅁㄹㅂㄹㅅㄹㅌㄹㅍㄹㅎㅂㅅ",
"ㄳㄵㄶㄺㄻㄼㄽㄾㄿㅀㅄ".disassemble().collect::<String>()
);
}
#[test]
fn disassmble_vowel() {
assert_eq!(
"ㅗㅏㅗㅐㅗㅣㅜㅓㅜㅔㅜㅣㅡㅣ",
"ㅘㅙㅚㅝㅞㅟㅢ".disassemble().collect::<String>()
);
}
#[test]
fn assemble_no_jongsung() {
assert_eq!("고구마", "ㄱㅗㄱㅜㅁㅏ".chars().assemble());
}
#[test]
fn assemble_jongsung() {
assert_eq!("감자", "ㄱㅏㅁㅈㅏ".chars().assemble());
}
#[test]
fn assemble_compound_jungsung() {
assert_eq!("휘발유", "ㅎㅜㅣㅂㅏㄹㅇㅠ".chars().assemble());
}
#[test]
fn assemble_compound_jongsung() {
assert_eq!("훑개", "ㅎㅜㄹㅌㄱㅐ".chars().assemble());
}
#[test]
fn disassemble_single() {
use std::convert::*;
let disassembled: Hangul = '닭'.try_into().unwrap();
assert_eq!(('ㄷ', 'ㅏ', Some('ㄺ')), disassembled.parts());
}
#[test]
fn disassemble_single_no_jongsung() {
use std::convert::*;
let disassembled: Hangul = '깨'.try_into().unwrap();
assert_eq!(('ㄲ', 'ㅐ', None), disassembled.parts());
}
#[test]
fn disassemble_nonhangul_error() {
use std::convert::*;
let disassembled: Result<Hangul, _> = 'a'.try_into();
assert!(disassembled.is_err());
}
#[test]
fn assemble_single() {
let assembled: char = Hangul::new('ㅂ', 'ㅏ', Some('ㅌ')).unwrap().into();
assert_eq!('밭', assembled);
}
#[test]
fn assemble_single_no_jongsung() {
let assembled: char = Hangul::new('ㅊ', 'ㅟ', None).unwrap().into();
assert_eq!('취', assembled);
}
#[test]
fn assemble_nonhangul_error() {
assert!(Hangul::new('a', 'b', None).is_none());
}
proptest! {
#[test]
fn no_panic(korean in "[가-힣ㄱ-ㅎㅏ-ㅣ]+") {
korean.disassemble().assemble();
}
#[test]
fn no_panic_single(letter in proptest::arbitrary::any::<char>()) {
use std::convert::*;
let _: Option<char> = letter.try_into().ok().and_then(|hangul: Hangul| hangul.try_into().ok());
}
#[test]
fn hangul_convert_redundant(letter in proptest::char::range('가', '\u{D7A4}')) {
use std::convert::*;
let disassembled: Hangul = letter.try_into().unwrap();
let processed = disassembled.into();
assert_eq!(letter, processed);
}
}
}