use ascii::validate_ascii;
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
pub struct Utf8Error {
valid_up_to: usize,
}
impl Utf8Error {
pub fn valid_up_to(&self) -> usize {
self.valid_up_to
}
}
#[cfg_attr(feature = "cargo-clippy", allow(eval_order_dependence))]
#[inline(always)]
pub fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
let mut offset = 0;
let len = v.len();
'outer: loop {
let mut first = {
let remaining = &v[offset..];
match validate_ascii(remaining) {
None => {
break 'outer;
}
Some((non_ascii, consumed)) => {
offset += consumed;
non_ascii
}
}
};
let old_offset = offset;
macro_rules! err { () => {{
return Err(Utf8Error {
valid_up_to: old_offset
})
}}}
macro_rules! next { () => {{
offset += 1;
if offset >= len {
err!()
}
v[offset]
}}}
'inner: loop {
let second = next!();
match first {
0xC2...0xDF => {
if second & !CONT_MASK != TAG_CONT_U8 {
err!()
}
}
0xE0 => {
match (second, next!() & !CONT_MASK) {
(0xA0...0xBF, TAG_CONT_U8) => {}
_ => err!(),
}
}
0xE1...0xEC | 0xEE...0xEF => {
match (second & !CONT_MASK, next!() & !CONT_MASK) {
(TAG_CONT_U8, TAG_CONT_U8) => {}
_ => err!(),
}
}
0xED => {
match (second, next!() & !CONT_MASK) {
(0x80...0x9F, TAG_CONT_U8) => {}
_ => err!(),
}
}
0xF0 => {
match (second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
(0x90...0xBF, TAG_CONT_U8, TAG_CONT_U8) => {}
_ => err!(),
}
}
0xF1...0xF3 => {
match (second & !CONT_MASK, next!() & !CONT_MASK, next!() & !CONT_MASK) {
(TAG_CONT_U8, TAG_CONT_U8, TAG_CONT_U8) => {}
_ => err!(),
}
}
0xF4 => {
match (second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
(0x80...0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
_ => err!(),
}
}
_ => err!(),
}
offset += 1;
if offset == len {
break 'outer;
}
first = v[offset];
if first < 0x80 {
offset += 1;
continue 'outer;
}
continue 'inner;
}
}
Ok(())
}
const CONT_MASK: u8 = 0b0011_1111;
const TAG_CONT_U8: u8 = 0b1000_0000;