use crate::data::normalization::{
canonical_composition, canonical_decomposition, ccc, compatibility_decomposition,
};
const S_BASE: u32 = 0xAC00;
const L_BASE: u32 = 0x1100;
const V_BASE: u32 = 0x1161;
const T_BASE: u32 = 0x11A7;
const L_COUNT: u32 = 19;
const V_COUNT: u32 = 21;
const T_COUNT: u32 = 28;
const N_COUNT: u32 = V_COUNT * T_COUNT; const S_COUNT: u32 = L_COUNT * N_COUNT;
#[must_use]
pub fn nfd(s: &str) -> String {
let decomposed = decompose(s, false);
String::from_iter(decomposed.iter().map(|&cp| unsafe {
char::from_u32_unchecked(cp)
}))
}
#[must_use]
pub fn nfc(s: &str) -> String {
let decomposed = decompose(s, false);
let composed = compose(&decomposed);
String::from_iter(composed.iter().map(|&cp| unsafe {
char::from_u32_unchecked(cp)
}))
}
#[must_use]
pub fn nfkd(s: &str) -> String {
let decomposed = decompose(s, true);
String::from_iter(decomposed.iter().map(|&cp| unsafe {
char::from_u32_unchecked(cp)
}))
}
#[must_use]
pub fn nfkc(s: &str) -> String {
let decomposed = decompose(s, true);
let composed = compose(&decomposed);
String::from_iter(composed.iter().map(|&cp| unsafe {
char::from_u32_unchecked(cp)
}))
}
fn decompose(s: &str, compat: bool) -> Vec<u32> {
let mut result: Vec<u32> = Vec::with_capacity(s.len());
for ch in s.chars() {
decompose_char(ch as u32, compat, &mut result);
}
canonical_order(&mut result);
result
}
fn decompose_char(cp: u32, compat: bool, out: &mut Vec<u32>) {
if cp >= S_BASE && cp < S_BASE + S_COUNT {
let s_index = cp - S_BASE;
let l = L_BASE + s_index / N_COUNT;
let v = V_BASE + (s_index % N_COUNT) / T_COUNT;
let t = T_BASE + s_index % T_COUNT;
out.push(l);
out.push(v);
if t != T_BASE {
out.push(t);
}
return;
}
let decomp = if compat {
compatibility_decomposition(cp)
} else {
canonical_decomposition(cp)
};
match decomp {
Some(mapping) => {
for &dcp in mapping {
decompose_char(dcp, compat, out);
}
}
None => {
out.push(cp);
}
}
}
fn canonical_order(cps: &mut [u32]) {
let len = cps.len();
if len < 2 {
return;
}
let mut changed = true;
while changed {
changed = false;
for i in 0..len - 1 {
let ccc_a = ccc(cps[i]);
let ccc_b = ccc(cps[i + 1]);
if ccc_a > ccc_b && ccc_b != 0 {
cps.swap(i, i + 1);
changed = true;
}
}
}
}
fn compose(cps: &[u32]) -> Vec<u32> {
if cps.is_empty() {
return Vec::new();
}
let mut result: Vec<u32> = Vec::with_capacity(cps.len());
let len = cps.len();
let buf: Vec<u32> = cps.to_vec();
let mut consumed = vec![false; len];
let mut i = 0;
while i < len {
if consumed[i] {
i += 1;
continue;
}
let starter_ccc = ccc(buf[i]);
if starter_ccc != 0 {
result.push(buf[i]);
i += 1;
continue;
}
let mut starter = buf[i];
let mut last_ccc: u8 = 0;
let mut j = i + 1;
while j < len {
if consumed[j] {
j += 1;
continue;
}
let c = buf[j];
let c_ccc = ccc(c);
let blocked = if c_ccc == 0 {
last_ccc > 0 || (j != i + 1 && has_non_consumed_between(i, j, &consumed))
} else {
last_ccc >= c_ccc
};
if !blocked {
if let Some(composite) = hangul_compose(starter, c)
.or_else(|| canonical_composition(starter, c))
{
starter = composite;
consumed[j] = true;
j += 1;
continue;
}
}
if c_ccc == 0 {
break;
}
last_ccc = c_ccc;
j += 1;
}
result.push(starter);
i += 1;
}
result
}
fn hangul_compose(first: u32, second: u32) -> Option<u32> {
if first >= L_BASE && first < L_BASE + L_COUNT
&& second >= V_BASE && second < V_BASE + V_COUNT
{
let l_index = first - L_BASE;
let v_index = second - V_BASE;
let lv = S_BASE + (l_index * N_COUNT) + (v_index * T_COUNT);
return Some(lv);
}
if first >= S_BASE && first < S_BASE + S_COUNT {
let s_index = first - S_BASE;
if s_index % T_COUNT == 0 {
if second > T_BASE && second < T_BASE + T_COUNT {
let lvt = first + (second - T_BASE);
return Some(lvt);
}
}
}
None
}
fn has_non_consumed_between(start: usize, end: usize, consumed: &[bool]) -> bool {
for k in (start + 1)..end {
if !consumed[k] {
return true;
}
}
false
}