#[inline]
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
(byte & (0x7F >> width)) as u32
}
#[inline]
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | (byte & CONT_MASK) as u32
}
#[inline]
pub(super) const fn utf8_is_cont_byte(byte: u8) -> bool {
(byte as i8) < -64
}
#[inline]
pub unsafe fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
let x = *bytes.next()?;
if x < 128 {
return Some(x as u32);
}
let init = utf8_first_byte(x, 2);
let y = unsafe { *bytes.next().unwrap_unchecked() };
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = unsafe { *bytes.next().unwrap_unchecked() };
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
let w = unsafe { *bytes.next().unwrap_unchecked() };
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}
Some(ch)
}
#[inline]
pub unsafe fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
where
I: DoubleEndedIterator<Item = &'a u8>,
{
let w = match *bytes.next_back()? {
next_byte if next_byte < 128 => return Some(next_byte as u32),
back_byte => back_byte,
};
let mut ch;
let z = unsafe { *bytes.next_back().unwrap_unchecked() };
ch = utf8_first_byte(z, 2);
if utf8_is_cont_byte(z) {
let y = unsafe { *bytes.next_back().unwrap_unchecked() };
ch = utf8_first_byte(y, 3);
if utf8_is_cont_byte(y) {
let x = unsafe { *bytes.next_back().unwrap_unchecked() };
ch = utf8_first_byte(x, 4);
ch = utf8_acc_cont_byte(ch, y);
}
ch = utf8_acc_cont_byte(ch, z);
}
ch = utf8_acc_cont_byte(ch, w);
Some(ch)
}
const CONT_MASK: u8 = 0b0011_1111;