#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn is_word_byte(b: u8) -> bool {
const fn mkwordset() -> [bool; 256] {
let mut set = [false; 256];
set[b'_' as usize] = true;
let mut byte = b'0';
while byte <= b'9' {
set[byte as usize] = true;
byte += 1;
}
byte = b'A';
while byte <= b'Z' {
set[byte as usize] = true;
byte += 1;
}
byte = b'a';
while byte <= b'z' {
set[byte as usize] = true;
byte += 1;
}
set
}
const WORD: [bool; 256] = mkwordset();
WORD[b as usize]
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, u8>> {
if bytes.is_empty() {
return None;
}
let len = match len(bytes[0]) {
None => return Some(Err(bytes[0])),
Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
Some(1) => return Some(Ok(char::from(bytes[0]))),
Some(len) => len,
};
match core::str::from_utf8(&bytes[..len]) {
Ok(s) => Some(Ok(s.chars().next().unwrap())),
Err(_) => Some(Err(bytes[0])),
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn decode_last(bytes: &[u8]) -> Option<Result<char, u8>> {
if bytes.is_empty() {
return None;
}
let mut start = bytes.len() - 1;
let limit = bytes.len().saturating_sub(4);
while start > limit && !is_leading_or_invalid_byte(bytes[start]) {
start -= 1;
}
match decode(&bytes[start..]) {
None => None,
Some(Ok(ch)) => Some(Ok(ch)),
Some(Err(_)) => Some(Err(bytes[bytes.len() - 1])),
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn len(byte: u8) -> Option<usize> {
match byte {
0b0000_0000..=0b0111_1111 => Some(1),
0b1000_0000..=0b1011_1111 => None,
0b1100_0000..=0b1101_1111 => Some(2),
0b1110_0000..=0b1110_1111 => Some(3),
0b1111_0000..=0b1111_0111 => Some(4),
_ => None,
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn is_boundary(bytes: &[u8], i: usize) -> bool {
match bytes.get(i) {
None => i == bytes.len(),
Some(&b) => b <= 0b0111_1111 || b >= 0b1100_0000,
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn is_leading_or_invalid_byte(b: u8) -> bool {
(b & 0b1100_0000) != 0b1000_0000
}