#![allow(clippy::unnested_or_patterns)]
#![allow(clippy::cast_ptr_alignment)]
pub mod position;
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
pub struct Utf8Error {
pub valid_up_to: usize,
pub error_len: Option<u8>,
}
impl core::fmt::Display for Utf8Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
if let Some(error_len) = self.error_len {
write!(
f,
"invalid utf-8 sequence of {} bytes from index {}",
error_len, self.valid_up_to
)
} else {
write!(
f,
"incomplete utf-8 byte sequence from index {}",
self.valid_up_to
)
}
}
}
impl core::error::Error for Utf8Error {}
use core::intrinsics::const_eval_select;
pub macro const_eval_select {
(
@capture$([$($binders:tt)*])? { $($arg:ident : $ty:ty = $val:expr),* $(,)? } $( -> $ret:ty )? :
if const
$(#[$compiletime_attr:meta])* $compiletime:block
else
$(#[$runtime_attr:meta])* $runtime:block
) => {{
#[inline]
$(#[$runtime_attr])*
fn runtime$(<$($binders)*>)?($($arg: $ty),*) $( -> $ret )? {
$runtime
}
#[inline]
$(#[$compiletime_attr])*
const fn compiletime$(<$($binders)*>)?($($arg: $ty),*) $( -> $ret )? {
$(let _ = $arg;)*
$compiletime
}
const_eval_select(($($val,)*), compiletime, runtime)
}},
(
@capture$([$($binders:tt)*])? { $($arg:ident : $ty:ty),* $(,)? } $( -> $ret:ty )? :
if const
$(#[$compiletime_attr:meta])* $compiletime:block
else
$(#[$runtime_attr:meta])* $runtime:block
) => {
$crate::text::const_eval_select!(
@capture$([$($binders)*])? { $($arg : $ty = $arg),* } $(-> $ret)? :
if const
$(#[$compiletime_attr])* $compiletime
else
$(#[$runtime_attr])* $runtime
)
},
}
const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; size_of::<usize>()]);
#[inline]
const fn contains_nonascii(x: usize) -> bool {
(x & NONASCII_MASK) != 0
}
use core::str::utf8_char_width;
#[allow(clippy::missing_errors_doc)]
#[inline(always)]
#[rustc_allow_const_fn_unstable(const_eval_select)] pub const fn is_utf8_or_range(v: &[u8]) -> Result<(), Utf8Error> {
const USIZE_BYTES: usize = size_of::<usize>();
let mut index = 0;
let len = v.len();
let ascii_block_size = 2 * USIZE_BYTES;
let blocks_end = if len >= ascii_block_size {
len - ascii_block_size + 1
} else {
0
};
let align = const_eval_select!(
@capture { v: &[u8] } -> usize:
if const {
usize::MAX
} else {
v.as_ptr().align_offset(USIZE_BYTES)
}
);
while index < len {
let old_offset = index;
macro_rules! err {
($error_len: expr) => {
return Err(Utf8Error {
valid_up_to: old_offset,
error_len: $error_len,
})
};
}
macro_rules! next {
() => {{
index += 1;
if index >= len {
err!(None)
}
v[index]
}};
}
let first = v[index];
if first >= 128 {
let w = utf8_char_width(first);
match w {
2 => {
if next!() as i8 >= -64 {
err!(Some(1))
}
}
3 => {
match (first, next!()) {
(0xE0, 0xA0..=0xBF)
| (0xE1..=0xEC, 0x80..=0xBF)
| (0xED, 0x80..=0x9F)
| (0xEE..=0xEF, 0x80..=0xBF) => {}
_ => err!(Some(1)),
}
if next!() as i8 >= -64 {
err!(Some(2))
}
}
4 => {
match (first, next!()) {
(0xF0, 0x90..=0xBF)
| (0xF1..=0xF3, 0x80..=0xBF)
| (0xF4, 0x80..=0x8F) => {}
_ => err!(Some(1)),
}
if next!() as i8 >= -64 {
err!(Some(2))
}
if next!() as i8 >= -64 {
err!(Some(3))
}
}
_ => err!(Some(1)),
}
index += 1;
} else {
if align != usize::MAX
&& align.wrapping_sub(index).is_multiple_of(USIZE_BYTES)
{
let ptr = v.as_ptr();
while index < blocks_end {
unsafe {
let block = ptr.add(index).cast::<usize>();
let zu = contains_nonascii(*block);
let zv = contains_nonascii(*block.add(1));
if zu || zv {
break;
}
}
index += ascii_block_size;
}
while index < len && v[index] < 128 {
index += 1;
}
} else {
index += 1;
}
}
}
Ok(())
}
#[allow(clippy::missing_errors_doc)]
#[inline(always)]
#[rustc_allow_const_fn_unstable(const_eval_select)] pub const fn is_utf8(v: &[u8]) -> bool {
const USIZE_BYTES: usize = size_of::<usize>();
let mut index = 0;
let len = v.len();
let ascii_block_size = 2 * USIZE_BYTES;
let blocks_end = if len >= ascii_block_size {
len - ascii_block_size + 1
} else {
0
};
let align = const_eval_select!(
@capture { v: &[u8] } -> usize:
if const {
usize::MAX
} else {
v.as_ptr().align_offset(USIZE_BYTES)
}
);
while index < len {
macro_rules! next {
() => {{
index += 1;
if index >= len {
return false;
}
v[index]
}};
}
let first = v[index];
if first >= 128 {
let w = utf8_char_width(first);
match w {
2 => {
if next!() as i8 >= -64 {
return false;
}
}
3 => {
match (first, next!()) {
(0xE0, 0xA0..=0xBF)
| (0xE1..=0xEC, 0x80..=0xBF)
| (0xED, 0x80..=0x9F)
| (0xEE..=0xEF, 0x80..=0xBF) => {}
_ => return false,
}
if next!() as i8 >= -64 {
return false;
}
}
4 => {
match (first, next!()) {
(0xF0, 0x90..=0xBF)
| (0xF1..=0xF3, 0x80..=0xBF)
| (0xF4, 0x80..=0x8F) => {}
_ => return false,
}
if next!() as i8 >= -64 {
return false;
}
if next!() as i8 >= -64 {
return false;
}
}
_ => return false,
}
index += 1;
} else {
if align != usize::MAX
&& align.wrapping_sub(index).is_multiple_of(USIZE_BYTES)
{
let ptr = v.as_ptr();
while index < blocks_end {
unsafe {
let block = ptr.add(index).cast::<usize>();
let zu = contains_nonascii(*block);
let zv = contains_nonascii(*block.add(1));
if zu || zv {
break;
}
}
index += ascii_block_size;
}
while index < len && v[index] < 128 {
index += 1;
}
} else {
index += 1;
}
}
}
true
}