use std::str;
#[cfg(doc)]
use crate::*;
#[derive(Copy, Clone)]
pub struct Utf8Chunks<'a> {
buf: &'a [u8],
invalid_prefix: Option<usize>,
}
impl<'a> Utf8Chunks<'a> {
pub fn rest(self) -> &'a [u8] {
self.buf
}
pub(crate) fn new(buf: &'a [u8]) -> Self {
Self { buf, invalid_prefix: None }
}
unsafe fn take(&mut self, len: usize) -> &'a [u8] {
debug_assert!(len <= self.buf.len());
let pre = self.buf.get_unchecked(..len);
self.buf = self.buf.get_unchecked(len..);
pre
}
}
impl<'a> Iterator for Utf8Chunks<'a> {
type Item = Result<&'a str, &'a [u8]>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(prefix) = self.invalid_prefix.take() {
let bytes = unsafe {
self.take(prefix)
};
return Some(Err(bytes));
}
if self.buf.is_empty() {
return None;
}
let utf8 = match str::from_utf8(self.buf) {
Ok(utf8) => {
self.buf = &[];
utf8
}
Err(e) => {
let bytes = unsafe {
self.take(e.valid_up_to())
};
let utf8 = match cfg!(debug_assertions) {
true => str::from_utf8(bytes).unwrap(),
false => unsafe { str::from_utf8_unchecked(bytes) },
};
self.invalid_prefix = match e.error_len() {
Some(len) => Some(len),
None => Some(self.buf.len()),
};
if utf8.is_empty() {
return self.next();
}
utf8
}
};
Some(Ok(utf8))
}
}
pub const fn encode_utf8(c: char) -> ([u8; 4], usize) {
const CONT: u8 = 0b1000_0000;
const CONT_MASK: u8 = !CONT >> 1;
const B1: u8 = 0b0000_0000;
const B1_MASK: u8 = !B1 >> 1;
const B2: u8 = 0b1100_0000;
const B2_MASK: u8 = !B2 >> 1;
const B3: u8 = 0b1110_0000;
const B3_MASK: u8 = !B3 >> 1;
const B4: u8 = 0b1111_0000;
const B4_MASK: u8 = !B4 >> 1;
const fn sextet(c: char, idx: u32) -> u8 {
((c as u32) >> (idx * 6)) as u8
}
match c.len_utf8() {
1 => ([sextet(c, 0) & B1_MASK | B1, 0, 0, 0], 1),
2 => {
([sextet(c, 1) & B2_MASK | B2, sextet(c, 0) & CONT_MASK | CONT, 0, 0], 2)
}
3 => (
[
sextet(c, 2) & B3_MASK | B3,
sextet(c, 1) & CONT_MASK | CONT,
sextet(c, 0) & CONT_MASK | CONT,
0,
],
3,
),
4 => (
[
sextet(c, 3) & B4_MASK | B4,
sextet(c, 2) & CONT_MASK | CONT,
sextet(c, 1) & CONT_MASK | CONT,
sextet(c, 0) & CONT_MASK | CONT,
],
4,
),
_ => unreachable!(),
}
}