use alloc::vec::Vec;
use icu_normalizer::properties::Decomposed;
use crate::analysis::AnalysisDataSources;
const MAX_CLUSTER_SIZE: usize = 32;
#[derive(Debug, Default)]
pub(crate) struct CharCluster {
pub chars: Vec<Char>,
pub is_emoji: bool,
pub map_len: u8,
pub start: u32,
pub end: u32,
pub force_normalize: bool,
comp: Form,
decomp: Form,
form: FormKind,
best_ratio: f32,
}
impl CharCluster {
pub(crate) fn range(&self) -> SourceRange {
SourceRange {
start: self.start,
end: self.end,
}
}
}
#[derive(Copy, Clone)]
pub(crate) struct SourceRange {
pub start: u32,
pub end: u32,
}
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct Char {
pub ch: char,
pub is_control_character: bool,
pub contributes_to_shaping: bool,
pub glyph_id: GlyphId,
pub style_index: u16,
}
pub(crate) type GlyphId = u16;
#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Debug)]
#[repr(u8)]
pub(crate) enum Whitespace {
None = 0,
Space = 1,
NoBreakSpace = 2,
Tab = 3,
Newline = 4,
}
impl Whitespace {
pub(crate) fn is_space_or_nbsp(self) -> bool {
matches!(self, Self::Space | Self::NoBreakSpace)
}
}
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub(crate) enum Status {
Discard,
Keep,
Complete,
}
impl CharCluster {
pub(crate) fn clear(&mut self) {
self.chars.clear();
self.is_emoji = false;
self.map_len = 0;
self.start = 0;
self.end = 0;
self.force_normalize = false;
self.comp.clear();
self.decomp.clear();
self.form = FormKind::Original;
self.best_ratio = 0.;
}
#[inline(always)]
fn len(&self) -> usize {
self.chars.len()
}
#[inline(always)]
pub(crate) fn style_index(&self) -> u16 {
self.chars[0].style_index
}
#[inline(always)]
fn contributes_to_shaping(ch: char, analysis_data_sources: &AnalysisDataSources) -> bool {
let props = analysis_data_sources.properties(ch);
crate::analysis::contributes_to_shaping(props.general_category(), props.script())
}
fn decomposed(&mut self, analysis_data_sources: &AnalysisDataSources) -> Option<&[Char]> {
match self.decomp.state {
FormState::Invalid => None,
FormState::None => {
self.decomp.state = FormState::Invalid;
if self.chars.len() != 1 {
return None;
}
let decomposer = analysis_data_sources.decomposing_normalizer();
let decomp = decomposer.decompose(self.chars[0].ch);
match decomp {
Decomposed::Default | Decomposed::Singleton(_) => {
return None;
}
Decomposed::Expansion(a, b) => {
let mut copy = self.chars[0];
copy.ch = a;
copy.contributes_to_shaping =
Self::contributes_to_shaping(a, analysis_data_sources);
self.decomp.chars[0] = copy;
copy.ch = b;
copy.contributes_to_shaping =
Self::contributes_to_shaping(b, analysis_data_sources);
self.decomp.chars[1] = copy;
self.decomp.len = 2;
}
}
self.decomp.state = FormState::Valid;
self.decomp.setup();
Some(self.decomp.chars())
}
FormState::Valid => Some(self.decomp.chars()),
}
}
fn composed(&mut self, analysis_data_sources: &AnalysisDataSources) -> Option<&[Char]> {
match self.comp.state {
FormState::Invalid => None,
FormState::None => {
self.comp.state = FormState::Invalid;
if self.chars.len() != 2 {
return None;
}
let composer = analysis_data_sources.composing_normalizer();
let comp = composer.compose(self.chars[0].ch, self.chars[1].ch);
match comp {
None => {}
Some(ch) => {
let mut copy = self.chars[0];
copy.ch = ch;
copy.contributes_to_shaping =
Self::contributes_to_shaping(ch, analysis_data_sources);
self.comp.chars[0] = copy;
self.comp.len = 1;
}
}
self.comp.state = FormState::Valid;
self.comp.setup();
Some(self.comp.chars())
}
FormState::Valid => Some(self.comp.chars()),
}
}
pub(crate) fn map(
&mut self,
f: impl Fn(char) -> GlyphId,
analysis_data_sources: &AnalysisDataSources,
) -> Status {
let len = self.len();
if len == 0 {
return Status::Complete;
}
let mut glyph_ids = [0_u16; MAX_CLUSTER_SIZE];
let prev_ratio = self.best_ratio;
let mut ratio;
if self.force_normalize && self.composed(analysis_data_sources).is_some() {
ratio = self.comp.map(&f, &mut glyph_ids, self.best_ratio);
if ratio > self.best_ratio {
self.best_ratio = ratio;
self.form = FormKind::NFC;
if ratio >= 1. {
return Status::Complete;
}
}
}
ratio = Mapper {
chars: &mut self.chars[..len],
map_len: self.map_len.max(1),
}
.map(&f, &mut glyph_ids, self.best_ratio);
if ratio > self.best_ratio {
self.best_ratio = ratio;
self.form = FormKind::Original;
if ratio >= 1. {
return Status::Complete;
}
}
if self.decomposed(analysis_data_sources).is_some() {
ratio = self.decomp.map(&f, &mut glyph_ids, self.best_ratio);
if ratio > self.best_ratio {
self.best_ratio = ratio;
self.form = FormKind::NFD;
if ratio >= 1. {
return Status::Complete;
}
}
if !self.force_normalize && self.composed(analysis_data_sources).is_some() {
ratio = self.comp.map(&f, &mut glyph_ids, self.best_ratio);
if ratio > self.best_ratio {
self.best_ratio = ratio;
self.form = FormKind::NFC;
if ratio >= 1. {
return Status::Complete;
}
}
}
}
if self.best_ratio > prev_ratio {
Status::Keep
} else {
Status::Discard
}
}
}
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
#[allow(clippy::upper_case_acronyms)]
enum FormKind {
#[default]
Original,
NFD,
NFC,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum FormState {
None,
Valid,
Invalid,
}
#[derive(Clone, Debug)]
pub(crate) struct Form {
chars: [Char; 2],
len: u8,
map_len: u8,
state: FormState,
}
impl Default for Form {
fn default() -> Self {
Self::new()
}
}
impl Form {
fn new() -> Self {
Self {
chars: [Char::default(), Char::default()],
len: 0,
map_len: 0,
state: FormState::None,
}
}
fn clear(&mut self) {
self.chars = [Char::default(), Char::default()];
self.len = 0;
self.map_len = 0;
self.state = FormState::None;
}
#[inline(always)]
fn chars(&self) -> &[Char] {
&self.chars[..self.len as usize]
}
#[inline(always)]
fn setup(&mut self) {
self.map_len = (self
.chars()
.iter()
.filter(|c| !c.is_control_character)
.count() as u8)
.max(1);
}
#[inline(always)]
fn map(
&mut self,
f: &impl Fn(char) -> u16,
glyphs: &mut [u16; MAX_CLUSTER_SIZE],
best_ratio: f32,
) -> f32 {
Mapper {
chars: &mut self.chars[..self.len as usize],
map_len: self.map_len,
}
.map(f, glyphs, best_ratio)
}
}
struct Mapper<'a> {
chars: &'a mut [Char],
map_len: u8,
}
impl<'a> Mapper<'a> {
fn map(
&mut self,
f: &impl Fn(char) -> u16,
glyphs: &mut [u16; MAX_CLUSTER_SIZE],
best_ratio: f32,
) -> f32 {
if self.map_len == 0 {
return 1.;
}
let mut mapped = 0;
for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) {
if !c.contributes_to_shaping {
*g = f(c.ch);
if self.map_len == 1 {
mapped += 1;
}
} else {
let gid = f(c.ch);
*g = gid;
if gid != 0 {
mapped += 1;
}
}
}
let ratio = mapped as f32 / self.map_len as f32;
if ratio > best_ratio {
for (ch, glyph) in self.chars.iter_mut().zip(glyphs) {
ch.glyph_id = *glyph;
}
}
ratio
}
}