use core::convert::TryFrom;
use core::str;
pub(crate) const REPLACEMENT_CHAR_STR: &str = "\u{FFFD}";
pub(crate) const REPLACEMENT_CHAR: char = '\u{FFFD}';
#[inline]
pub(crate) fn expected_char_len(first: u8) -> u8 {
const MSB4_TO_LEN: [u8; 16] = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4];
MSB4_TO_LEN[usize::from(first >> 4)]
}
pub(crate) fn take_char<I>(mut bytes: I) -> Option<(char, u8)>
where
I: Iterator<Item = u8>,
{
let first = bytes.next()?;
let mut buf = [first, 0, 0, 0];
let expected_len = expected_char_len(first);
match expected_len {
0 => return None,
1 => {
debug_assert!(first.is_ascii());
let c = char::try_from(first).expect("[consistency] the byte is valid ASCII character");
return Some((c, 1));
}
2..=4 => {
for dest in &mut buf[1..usize::from(expected_len)] {
*dest = bytes.next()?;
}
}
_ => unreachable!(
"[validity] `expected_char_len()` must return the value less than or equal to 4"
),
};
debug_assert!(expected_len >= 2);
let s = str::from_utf8(&buf[..(expected_len as usize)]).ok()?;
let c = s
.chars()
.next()
.expect("[consistency] the string is not empty");
Some((c, expected_len))
}
pub(crate) fn split_incomplete_suffix(bytes: &[u8]) -> (&str, Option<&[u8]>) {
let len = bytes.len();
let partial_len = last_char_len_in_last_4bytes(bytes).expect(
"[consistency] bytes should be valid UTF-8 sequence, \
except for the possible trailing incomplete character",
);
let valid_up_to = len - partial_len.len_incomplete();
let s = str::from_utf8(&bytes[..valid_up_to]).expect(
"[consistency] bytes should be valid UTF-8 sequence, \
except for the possible trailing incomplete character",
);
let partial = (!partial_len.is_complete()).then(|| &bytes[valid_up_to..]);
(s, partial)
}
#[derive(Debug, Clone, Copy)]
pub(super) struct LastCharLen {
pub(super) available: u8,
pub(super) expected: u8,
}
impl LastCharLen {
#[inline]
#[must_use]
pub(super) fn is_complete(self) -> bool {
self.available == self.expected
}
#[inline]
#[must_use]
pub(super) fn len_incomplete(self) -> usize {
if self.available < self.expected {
usize::from(self.available)
} else {
0
}
}
#[inline]
#[must_use]
pub(super) fn len_missing(self) -> usize {
usize::from(self.expected - self.available)
}
}
pub(crate) fn last_char_len_in_last_4bytes(bytes: &[u8]) -> Option<LastCharLen> {
if bytes.is_empty() {
return Some(LastCharLen {
available: 0,
expected: 0,
});
}
let last_char_len = bytes
.iter()
.rev()
.take(4)
.position(|b| (b & 0b1100_0000) != 0x80)?
+ 1;
debug_assert!(
(1..=4).contains(&last_char_len),
"[validity] the iterator is limited to emit at most 4 elements"
);
let last_char_start = bytes.len() - last_char_len;
let last_char_len = last_char_len as u8;
let expected_last_char_len = expected_char_len(bytes[last_char_start]);
debug_assert!(
last_char_len <= expected_last_char_len,
"[consistency] the character must not be longer than expected"
);
Some(LastCharLen {
available: last_char_len,
expected: expected_last_char_len,
})
}