use crate::char_from_u16;
use crate::error::NormalizerError;
use crate::in_inclusive_range;
use crate::provider::CanonicalCompositionsV1Marker;
use crate::provider::CanonicalDecompositionDataV1Marker;
use crate::provider::CanonicalDecompositionTablesV1Marker;
use crate::provider::NonRecursiveDecompositionSupplementV1Marker;
use crate::trie_value_has_ccc;
use crate::trie_value_indicates_special_non_starter_decomposition;
use crate::BACKWARD_COMBINING_STARTER_MARKER;
use crate::FDFA_MARKER;
use crate::HANGUL_L_BASE;
use crate::HANGUL_N_COUNT;
use crate::HANGUL_S_BASE;
use crate::HANGUL_S_COUNT;
use crate::HANGUL_T_BASE;
use crate::HANGUL_T_COUNT;
use crate::HANGUL_V_BASE;
use crate::NON_ROUND_TRIP_MARKER;
use crate::SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16;
use icu_properties::CanonicalCombiningClass;
use icu_provider::prelude::*;
pub struct CanonicalComposition {
canonical_compositions: DataPayload<CanonicalCompositionsV1Marker>,
}
impl CanonicalComposition {
#[inline(always)]
pub fn compose(&self, starter: char, second: char) -> Option<char> {
crate::compose(
self.canonical_compositions
.get()
.canonical_compositions
.iter(),
starter,
second,
)
}
pub fn try_new_unstable<D>(data_provider: &D) -> Result<Self, NormalizerError>
where
D: DataProvider<CanonicalCompositionsV1Marker> + ?Sized,
{
let canonical_compositions: DataPayload<CanonicalCompositionsV1Marker> =
data_provider.load(Default::default())?.take_payload()?;
Ok(CanonicalComposition {
canonical_compositions,
})
}
icu_provider::gen_any_buffer_constructors!(locale: skip, options: skip, error: NormalizerError);
}
#[allow(clippy::exhaustive_enums)]
#[derive(Debug, PartialEq, Eq)]
pub enum Decomposed {
Default,
Singleton(char),
Expansion(char, char),
}
pub struct CanonicalDecomposition {
decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
tables: DataPayload<CanonicalDecompositionTablesV1Marker>,
non_recursive: DataPayload<NonRecursiveDecompositionSupplementV1Marker>,
}
impl CanonicalDecomposition {
#[inline]
pub fn decompose(&self, c: char) -> Decomposed {
let lvt = u32::from(c).wrapping_sub(HANGUL_S_BASE);
if lvt >= HANGUL_S_COUNT {
return self.decompose_non_hangul(c);
}
let t = lvt % HANGUL_T_COUNT;
if t == 0 {
let l = lvt / HANGUL_N_COUNT;
let v = (lvt % HANGUL_N_COUNT) / HANGUL_T_COUNT;
return Decomposed::Expansion(
unsafe { char::from_u32_unchecked(HANGUL_L_BASE + l) },
unsafe { char::from_u32_unchecked(HANGUL_V_BASE + v) },
);
}
let lv = lvt - t;
Decomposed::Expansion(
unsafe { char::from_u32_unchecked(HANGUL_S_BASE + lv) },
unsafe { char::from_u32_unchecked(HANGUL_T_BASE + t) },
)
}
#[inline(always)]
fn decompose_non_hangul(&self, c: char) -> Decomposed {
let decomposition = self.decompositions.get().trie.get(c);
if decomposition <= BACKWARD_COMBINING_STARTER_MARKER {
return Decomposed::Default;
}
#[allow(clippy::never_loop)]
loop {
let trail_or_complex = (decomposition >> 16) as u16;
let lead = decomposition as u16;
if lead > NON_ROUND_TRIP_MARKER && trail_or_complex != 0 {
if in_inclusive_range(c, '\u{1F71}', '\u{1FFB}') {
break;
}
return Decomposed::Expansion(char_from_u16(lead), char_from_u16(trail_or_complex));
}
if lead > NON_ROUND_TRIP_MARKER {
debug_assert_ne!(
lead, FDFA_MARKER,
"How come we got the U+FDFA NFKD marker here?"
);
if lead == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16 {
if !in_inclusive_range(c, '\u{0340}', '\u{0F81}') {
return Decomposed::Default;
}
return match c {
'\u{0340}' => {
Decomposed::Singleton('\u{0300}')
}
'\u{0341}' => {
Decomposed::Singleton('\u{0301}')
}
'\u{0343}' => {
Decomposed::Singleton('\u{0313}')
}
'\u{0344}' => {
Decomposed::Expansion('\u{0308}', '\u{0301}')
}
'\u{0F73}' => {
Decomposed::Expansion('\u{0F71}', '\u{0F72}')
}
'\u{0F75}' => {
Decomposed::Expansion('\u{0F71}', '\u{0F74}')
}
'\u{0F81}' => {
Decomposed::Expansion('\u{0F71}', '\u{0F80}')
}
_ => Decomposed::Default,
};
}
return Decomposed::Singleton(char_from_u16(lead));
}
if c == '\u{212B}' {
return Decomposed::Singleton('\u{00C5}');
}
let offset = usize::from(trail_or_complex & 0xFFF);
let tables = self.tables.get();
if offset < tables.scalars16.len() {
if usize::from(trail_or_complex >> 13) != 0 {
break;
}
if let Some(first) = tables.scalars16.get(offset) {
if let Some(second) = tables.scalars16.get(offset + 1) {
return Decomposed::Expansion(char_from_u16(first), char_from_u16(second));
}
}
debug_assert!(false);
return Decomposed::Default;
}
let len = usize::from(trail_or_complex >> 13) + 1;
if len > 2 {
break;
}
let offset24 = offset - tables.scalars16.len();
if let Some(first_c) = tables.scalars24.get(offset24) {
if len == 1 {
return Decomposed::Singleton(first_c);
}
if let Some(second_c) = tables.scalars24.get(offset24 + 1) {
return Decomposed::Expansion(first_c, second_c);
}
}
debug_assert!(false);
return Decomposed::Default;
}
let non_recursive = self.non_recursive.get();
let non_recursive_decomposition = non_recursive.trie.get(c);
if non_recursive_decomposition == 0 {
debug_assert!(false);
return Decomposed::Default;
}
let trail_or_complex = (non_recursive_decomposition >> 16) as u16;
let lead = non_recursive_decomposition as u16;
if lead != 0 && trail_or_complex != 0 {
return Decomposed::Expansion(char_from_u16(lead), char_from_u16(trail_or_complex));
}
if lead != 0 {
return Decomposed::Singleton(char_from_u16(lead));
}
let offset = usize::from(trail_or_complex - 1);
if let Some(first) = non_recursive.scalars24.get(offset) {
if let Some(second) = non_recursive.scalars24.get(offset + 1) {
return Decomposed::Expansion(first, second);
}
}
debug_assert!(false);
Decomposed::Default
}
pub fn try_new_unstable<D>(data_provider: &D) -> Result<Self, NormalizerError>
where
D: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<NonRecursiveDecompositionSupplementV1Marker>
+ ?Sized,
{
let decompositions: DataPayload<CanonicalDecompositionDataV1Marker> =
data_provider.load(Default::default())?.take_payload()?;
let tables: DataPayload<CanonicalDecompositionTablesV1Marker> =
data_provider.load(Default::default())?.take_payload()?;
if tables.get().scalars16.len() + tables.get().scalars24.len() > 0xFFF {
return Err(NormalizerError::FutureExtension);
}
let non_recursive: DataPayload<NonRecursiveDecompositionSupplementV1Marker> =
data_provider.load(Default::default())?.take_payload()?;
Ok(CanonicalDecomposition {
decompositions,
tables,
non_recursive,
})
}
icu_provider::gen_any_buffer_constructors!(locale: skip, options: skip, error: NormalizerError);
}
pub struct CanonicalCombiningClassMap {
decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
}
impl CanonicalCombiningClassMap {
#[inline(always)]
pub fn get(&self, c: char) -> CanonicalCombiningClass {
self.get32(u32::from(c))
}
pub fn get32(&self, c: u32) -> CanonicalCombiningClass {
let trie_value = self.decompositions.get().trie.get32(c);
if trie_value_has_ccc(trie_value) {
CanonicalCombiningClass(trie_value as u8)
} else if trie_value_indicates_special_non_starter_decomposition(trie_value) {
match c {
0x0340 | 0x0341 | 0x0343 | 0x0344 => CanonicalCombiningClass::Above,
_ => CanonicalCombiningClass::NotReordered,
}
} else {
CanonicalCombiningClass::NotReordered
}
}
pub fn try_new_unstable<D>(data_provider: &D) -> Result<Self, NormalizerError>
where
D: DataProvider<CanonicalDecompositionDataV1Marker> + ?Sized,
{
let decompositions: DataPayload<CanonicalDecompositionDataV1Marker> =
data_provider.load(Default::default())?.take_payload()?;
Ok(CanonicalCombiningClassMap { decompositions })
}
icu_provider::gen_any_buffer_constructors!(locale: skip, options: skip, error: NormalizerError);
}