use super::{simd::skip_ascii_simd, utf8::decode_utf8_raw};
pub(super) struct StrReplaceFindIter<'a, const CHECK_ASCII: bool> {
pub(super) l1: &'a [u16],
pub(super) l2: &'a [u32],
pub(super) strings: &'a str,
pub(super) text: &'a str,
pub(super) byte_offset: usize,
}
impl<'a, const CHECK_ASCII: bool> Iterator for StrReplaceFindIter<'a, CHECK_ASCII> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let bytes = self.text.as_bytes();
let len = bytes.len();
loop {
if !CHECK_ASCII {
self.byte_offset = skip_ascii_simd(bytes, self.byte_offset);
}
if self.byte_offset >= len {
return None;
}
let start = self.byte_offset;
let b = bytes[start];
if b < 0x80 {
self.byte_offset += 1;
if CHECK_ASCII
&& b.is_ascii_uppercase()
&& let Some(value) = page_table_lookup(b as u32, self.l1, self.l2)
&& let Some(s) = unpack_str_ref(value, self.strings)
{
return Some((start, start + 1, s));
}
continue;
}
let (cp, char_len) = unsafe { decode_utf8_raw(bytes, start) };
self.byte_offset += char_len;
if let Some(value) = page_table_lookup(cp, self.l1, self.l2)
&& let Some(s) = unpack_str_ref(value, self.strings)
{
return Some((start, self.byte_offset, s));
}
}
}
}
#[inline(always)]
pub(crate) fn replace_scan<I>(text: &str, mut iter: I) -> Option<String>
where
I: Iterator<Item = (usize, usize, char)>,
{
if let Some((start, end, ch)) = iter.next() {
let mut result = String::with_capacity(text.len());
result.push_str(&text[..start]);
result.push(ch);
let mut last_end = end;
for (start, end, ch) in iter {
result.push_str(&text[last_end..start]);
result.push(ch);
last_end = end;
}
result.push_str(&text[last_end..]);
Some(result)
} else {
None
}
}
#[inline(always)]
pub(crate) fn replace_spans<'a, I>(text: &str, mut iter: I) -> Option<String>
where
I: Iterator<Item = (usize, usize, &'a str)>,
{
if let Some((start, end, replacement)) = iter.next() {
let mut result = String::with_capacity(text.len());
result.push_str(&text[..start]);
result.push_str(replacement);
let mut last_end = end;
for (start, end, replacement) in iter {
result.push_str(&text[last_end..start]);
result.push_str(replacement);
last_end = end;
}
result.push_str(&text[last_end..]);
Some(result)
} else {
None
}
}
#[inline(always)]
pub(crate) fn page_table_lookup(cp: u32, l1: &[u16], l2: &[u32]) -> Option<u32> {
let page_idx = (cp >> 8) as usize;
let char_idx = (cp & 0xFF) as usize;
if page_idx >= l1.len() {
return None;
}
unsafe { core::hint::assert_unchecked(page_idx < l1.len()) };
let page = l1[page_idx] as usize;
if page == 0 {
return None;
}
unsafe { core::hint::assert_unchecked(page * 256 + char_idx < l2.len()) };
let value = l2[page * 256 + char_idx];
(value != 0).then_some(value)
}
fn decode_u16_table(bytes: &[u8]) -> Box<[u16]> {
debug_assert_eq!(bytes.len() % 2, 0);
bytes
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>()
.into_boxed_slice()
}
fn decode_u32_table(bytes: &[u8]) -> Box<[u32]> {
debug_assert_eq!(bytes.len() % 4, 0);
bytes
.chunks_exact(4)
.map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
.collect::<Vec<_>>()
.into_boxed_slice()
}
pub(crate) fn decode_page_table(l1: &[u8], l2: &[u8]) -> (Box<[u16]>, Box<[u32]>) {
(decode_u16_table(l1), decode_u32_table(l2))
}
#[inline(always)]
pub(crate) fn unpack_str_ref(value: u32, strings: &str) -> Option<&str> {
let offset = (value >> 8) as usize;
let len = (value & 0xFF) as usize;
if offset + len <= strings.len() {
Some(&strings[offset..offset + len])
} else {
None
}
}
pub(crate) fn trim_romanize_packed(value: u32, strings: &str) -> u32 {
if value == 0 {
return 0;
}
let mut start = (value >> 8) as usize;
let mut end = start + (value & 0xFF) as usize;
let bytes = strings.as_bytes();
while start < end && bytes[start] == b' ' {
start += 1;
}
while end > start && bytes[end - 1] == b' ' {
end -= 1;
}
((start as u32) << 8) | ((end - start) as u32)
}