use crate::unicode::CodePoint;
use crate::unicode::Ucd;
use crate::unicode::props::Gc;
use crate::unicode::props::Dt;
pub(crate) fn starter(cp: u32) -> bool {
let cp = CodePoint::new(cp).unwrap();
let cp_ccc = cp.ccc();
let cp_gc = cp.gc();
if cp_gc == Gc::Mn {
if cp_ccc as u8 == 0 {
return true;
} else if cp_ccc as u8 > 0 {
return false;
}
}
if cp_gc == Gc::Mc {
if cp_ccc as u8 == 0 {
return true;
} else if cp_ccc as u8 > 0 {
return false;
}
}
if cp_gc == Gc::Me {
if cp_ccc as u8 == 0 {
return true;
}
}
if cp_ccc as u8 == 0 {
return true;
}
false
}
pub(super) fn reorderable_pair(pair: (u32, u32)) -> bool {
let a = CodePoint::new(pair.0).unwrap();
let b = CodePoint::new(pair.1).unwrap();
a.ccc() as u8 > b.ccc() as u8 && b.ccc() as u8 > 0
}
pub(super) fn canonical_ordering(sequence: &mut Vec<char>) {
if sequence.len() == 0 {
return ();
}
let mut last_idx = sequence.len() - 1;
while last_idx > 0 {
for i in 0..=last_idx {
if i + 1 == last_idx + 1 {
continue;
}
if reorderable_pair((sequence[i] as u32, sequence[i + 1] as u32)) {
sequence.swap(i, i + 1);
}
}
last_idx -= 1;
}
}
pub(super) fn compatibility_decomposition(s: &Vec<char>) -> Vec<char> {
let mut count = 0;
let mut decomposed = vec![];
for ch in s.iter() {
if ch.dm() == "" {
decomposed.push(*ch);
} else {
match ch.dt() {
Dt::None => {
decomposed.push(*ch);
}
_ => {
for decomposed_char in ch.dm().chars() {
decomposed.push(decomposed_char);
count += 1;
}
}
}
}
}
if count == 0 {
return decomposed;
}
compatibility_decomposition(&decomposed)
}
pub(super) fn canonical_decomposition(s: Vec<char>) -> Vec<char>{
let mut count = 0;
let mut decomposed = vec![];
for ch in s.iter() {
if ch.dm() == "" {
decomposed.push(*ch);
} else {
if ch.dt() == Dt::Can {
for decomposed_char in ch.dm().chars() {
decomposed.push(decomposed_char);
count += 1;
}
} else {
decomposed.push(*ch);
}
}
}
if count == 0 {
return decomposed;
}
canonical_decomposition(decomposed)
}
pub(crate) fn singleton_decomposition(cp: u32) -> bool {
let code_point = CodePoint::new(cp).unwrap();
let mut self_char = String::new();
self_char.push(std::char::from_u32(code_point.to_u32()).unwrap());
if code_point.dm() == self_char {
return false;
}
if code_point.dm().chars().collect::<Vec<char>>().len() == 1 {
if code_point.dt() != Dt::Can {
return false;
}
return true;
}
false
}
pub(crate) fn non_starter_decomposition(cp: u32) -> bool {
let code_point = CodePoint::new(cp).unwrap();
let decomposed = canonical_decomposition(
vec![std::char::from_u32(code_point.to_u32()).unwrap()]
);
if decomposed.len() > 1 && !starter(decomposed[0] as u32) {
return true;
}
false
}
fn primary_composite(cp: u32) -> bool {
let code_point = CodePoint::new(cp).unwrap();
if code_point.dt() == Dt::Can && !code_point.comp_ex() {
return true;
}
false
}
fn blocked(sequence: &[char]) -> bool {
if sequence[0].ccc() as u8 != 0 {
return false;
}
let first_i = 0;
let last_i = sequence.len() - 1;
if last_i - 1 == first_i {
return false;
}
if sequence[last_i - 1].ccc() as u8 == 0
|| sequence[last_i - 1].ccc() as u8 >= sequence[last_i].ccc() as u8
{
return true;
}
false
}
pub(super) fn canonical_composition(s: &mut Vec<char>) {
if s.len() == 1 {
return ();
}
let mut offset = 1;
while offset < s.len() {
let i = offset;
let mut back_i = i - 1;
while back_i != 0 && !starter(s[back_i] as u32) {
back_i -= 1;
}
let mut lc = String::new();
lc.push(s[back_i]);
lc.push(s[i]);
let mapping = crate::unicode::ucd::dm::rdm(&lc);
let is_primary_composite = primary_composite(mapping);
if (starter(s[back_i] as u32)
&& !blocked(&s[back_i..=i]))
&& (mapping != 0x0 && is_primary_composite)
{
s[back_i] = std::char::from_u32(mapping).unwrap();
s.remove(i);
offset -= 1;
}
offset += 1;
}
}
pub(crate) fn nfd(s: &str) -> Vec<char> {
let seq = s.chars().collect::<Vec<char>>();
let mut seq = canonical_decomposition(seq);
canonical_ordering(&mut seq);
seq
}
pub(crate) fn nfkd(s: &str) -> Vec<char> {
let seq = s.chars().collect::<Vec<char>>();
let mut seq = compatibility_decomposition(&seq);
canonical_ordering(&mut seq);
seq
}
pub(crate) fn nfc(s: &str) -> Vec<char> {
let mut seq = nfd(s);
canonical_composition(&mut seq);
seq
}
pub(crate) fn nfkc(s: &str) -> Vec<char> {
let mut seq = nfkd(s);
canonical_composition(&mut seq);
seq
}
mod tests {
#[test]
fn test_canonical_ordering() {
let mut s1 = vec!['a', '\u{0305}', '\u{0315}', '\u{0300}', '\u{05AE}', 'b'];
super::canonical_ordering(&mut s1);
assert_eq!(
s1,
vec!['a', '\u{05AE}', '\u{0305}', '\u{0300}', '\u{0315}', 'b']
)
}
#[test]
fn test_blocked() {
let s1 = &['A', 'B'];
assert_eq!(super::blocked(s1), false);
assert_eq!(super::blocked(&['a', '\u{05AE}', '\u{0305}', '\u{0300}']), true);
}
}