use std::borrow::Cow;
use super::{
filter::{CodepointFilter, FilterAction, FilterIterator},
page_table::{
StrReplaceFindIter, decode_page_table, page_table_lookup, replace_spans, unpack_str_ref,
},
};
pub(crate) struct NormalizeFilter<'a> {
l1: &'a [u16],
l2: &'a [u32],
strings: &'a str,
}
impl<'a> CodepointFilter<'a> for NormalizeFilter<'a> {
#[inline(always)]
fn filter_ascii(&self, byte: u8) -> FilterAction<'a> {
if byte.is_ascii_uppercase()
&& let Some(value) = page_table_lookup(byte as u32, self.l1, self.l2)
&& let Some(s) = unpack_str_ref(value, self.strings)
{
FilterAction::ReplaceBytes(s.as_bytes())
} else {
FilterAction::Keep
}
}
#[inline(always)]
fn filter_codepoint(&self, cp: u32) -> FilterAction<'a> {
if let Some(value) = page_table_lookup(cp, self.l1, self.l2)
&& let Some(s) = unpack_str_ref(value, self.strings)
{
FilterAction::ReplaceBytes(s.as_bytes())
} else {
FilterAction::Keep
}
}
}
#[derive(Clone)]
pub(crate) struct NormalizeMatcher {
l1: Box<[u16]>,
l2: Box<[u32]>,
strings: Cow<'static, str>,
}
impl NormalizeMatcher {
#[inline(always)]
fn iter<'a>(&'a self, text: &'a str) -> StrReplaceFindIter<'a, true> {
StrReplaceFindIter {
l1: &self.l1,
l2: &self.l2,
strings: self.strings.as_ref(),
text,
byte_offset: 0,
}
}
pub(crate) fn replace(&self, text: &str) -> Option<String> {
replace_spans(text, self.iter(text))
}
pub(crate) fn new(l1: &'static [u8], l2: &'static [u8], strings: &'static str) -> Self {
let (l1, l2) = decode_page_table(l1, l2);
Self {
l1,
l2,
strings: Cow::Borrowed(strings),
}
}
#[inline(always)]
pub(crate) fn filter_bytes<'a>(
&'a self,
text: &'a str,
) -> FilterIterator<'a, NormalizeFilter<'a>> {
FilterIterator::new(
text,
NormalizeFilter {
l1: &self.l1,
l2: &self.l2,
strings: self.strings.as_ref(),
},
)
}
}