#![deny(unsafe_op_in_unsafe_fn)]
use std::fmt;
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64::vceqq_u8;
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64::vdupq_n_u8;
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64::vld1q_u8;
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64::vminvq_u8;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::__m128i;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm_cmpeq_epi8;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm_loadu_si128;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm_movemask_epi8;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm_set1_epi8;
pub fn write_escaped(f: &mut fmt::Formatter<'_>, bytes: &[u8]) -> fmt::Result {
let mut rest = bytes;
while !rest.is_empty() {
match std::str::from_utf8(rest) {
Ok(valid) => {
f.write_str(valid)?;
break;
}
Err(error) => {
let valid_up_to = error.valid_up_to();
if valid_up_to > 0 {
f.write_str(unsafe { std::str::from_utf8_unchecked(&rest[..valid_up_to]) })?;
}
let invalid_len = error.error_len().unwrap_or(rest.len() - valid_up_to);
for &byte in &rest[valid_up_to..valid_up_to + invalid_len] {
write!(f, "\\x{byte:02X}")?;
}
rest = &rest[valid_up_to + invalid_len..];
}
}
}
Ok(())
}
#[derive(Debug, Clone, Copy)]
#[repr(transparent)]
pub struct BytesDisplay<'src>(pub &'src [u8]);
impl fmt::Display for BytesDisplay<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write_escaped(f, self.0)
}
}
#[inline]
#[must_use]
pub fn trim_start_byte(s: &[u8], byte: u8) -> &[u8] {
let mut i = simd_skip_leading(s, byte);
while i < s.len() && s[i] == byte {
i += 1;
}
&s[i..]
}
#[inline]
#[must_use]
pub fn trim_end_byte(s: &[u8], byte: u8) -> &[u8] {
let mut end = simd_skip_trailing(s, byte);
while end > 0 && s[end - 1] == byte {
end -= 1;
}
&s[..end]
}
#[inline]
#[must_use]
pub fn trim_byte(s: &[u8], byte: u8) -> &[u8] {
trim_end_byte(trim_start_byte(s, byte), byte)
}
#[cfg(target_arch = "x86_64")]
#[inline]
fn simd_skip_leading(s: &[u8], byte: u8) -> usize {
let mut i = 0;
#[allow(clippy::multiple_unsafe_ops_per_block)]
unsafe {
let target = _mm_set1_epi8(byte as i8);
while i + 16 <= s.len() {
#[allow(clippy::cast_ptr_alignment)]
let chunk = _mm_loadu_si128(s.as_ptr().add(i).cast::<__m128i>());
let eq = _mm_cmpeq_epi8(chunk, target);
let mask = _mm_movemask_epi8(eq) as u32;
if mask == 0xFFFF {
i += 16;
continue;
}
return i + mask.trailing_ones() as usize;
}
}
i
}
#[cfg(target_arch = "aarch64")]
#[inline]
fn simd_skip_leading(s: &[u8], byte: u8) -> usize {
let mut i = 0;
#[allow(clippy::multiple_unsafe_ops_per_block)]
unsafe {
let target = vdupq_n_u8(byte);
while i + 16 <= s.len() {
let chunk = vld1q_u8(s.as_ptr().add(i));
let eq = vceqq_u8(chunk, target);
if vminvq_u8(eq) != 0xFF {
break;
}
i += 16;
}
}
i
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
#[inline]
fn simd_skip_leading(_s: &[u8], _byte: u8) -> usize {
0
}
#[cfg(target_arch = "x86_64")]
#[inline]
fn simd_skip_trailing(s: &[u8], byte: u8) -> usize {
let mut end = s.len();
#[allow(clippy::multiple_unsafe_ops_per_block)]
unsafe {
let target = _mm_set1_epi8(byte as i8);
while end >= 16 {
let start = end - 16;
#[allow(clippy::cast_ptr_alignment)]
let chunk = _mm_loadu_si128(s.as_ptr().add(start).cast::<__m128i>());
let eq = _mm_cmpeq_epi8(chunk, target);
let mask = _mm_movemask_epi8(eq) as u32;
if mask == 0xFFFF {
end = start;
continue;
}
let mask_hi = mask << 16;
return start + 16 - mask_hi.leading_ones() as usize;
}
}
end
}
#[cfg(target_arch = "aarch64")]
#[inline]
fn simd_skip_trailing(s: &[u8], byte: u8) -> usize {
let mut end = s.len();
#[allow(clippy::multiple_unsafe_ops_per_block)]
unsafe {
let target = vdupq_n_u8(byte);
while end >= 16 {
let start = end - 16;
let chunk = vld1q_u8(s.as_ptr().add(start));
let eq = vceqq_u8(chunk, target);
if vminvq_u8(eq) != 0xFF {
break;
}
end = start;
}
}
end
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
#[inline]
fn simd_skip_trailing(s: &[u8], _byte: u8) -> usize {
s.len()
}
#[cfg(test)]
mod tests {
use super::*;
fn escaped(bytes: &[u8]) -> String {
BytesDisplay(bytes).to_string()
}
#[test]
fn escape_pure_ascii_is_verbatim() {
assert_eq!(escaped(b""), "");
assert_eq!(escaped(b"hello"), "hello");
assert_eq!(escaped(b"a b\tc"), "a b\tc");
}
#[test]
fn escape_valid_utf8_is_verbatim() {
assert_eq!(escaped("café".as_bytes()), "café");
assert_eq!(escaped("日本語".as_bytes()), "日本語");
}
#[test]
fn escape_invalid_bytes() {
assert_eq!(escaped(b"Caf\xC9\xE9\xFF"), "Caf\\xC9\\xE9\\xFF");
assert_eq!(escaped(b"\xFF\xFE"), "\\xFF\\xFE");
assert_eq!(escaped(b"a\xFFb"), "a\\xFFb");
assert_eq!(escaped(b"\xC3\xA9\xFF"), "é\\xFF");
assert_eq!(escaped(b"ab\xC9"), "ab\\xC9");
}
#[test]
fn trim_start_byte_basic() {
assert_eq!(trim_start_byte(b"", b'x'), b"");
assert_eq!(trim_start_byte(b"xxx", b'x'), b"");
assert_eq!(trim_start_byte(b"xxxa", b'x'), b"a");
assert_eq!(trim_start_byte(b"axxx", b'x'), b"axxx");
assert_eq!(trim_start_byte(b"abc", b'x'), b"abc");
}
#[test]
fn trim_start_byte_long() {
let s: Vec<u8> = b"xxxxxxxxxxxxxxxx".iter().chain(b"abc".iter()).copied().collect();
assert_eq!(trim_start_byte(&s, b'x'), b"abc");
let s: Vec<u8> = b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".iter().chain(b"yz".iter()).copied().collect();
assert_eq!(trim_start_byte(&s, b'x'), b"yz");
let s: Vec<u8> = b"xxxxxxxxxxxxxxxxx".iter().chain(b"q".iter()).copied().collect();
assert_eq!(trim_start_byte(&s, b'x'), b"q");
let s = vec![b'x'; 64];
assert_eq!(trim_start_byte(&s, b'x'), b"");
let mut s = vec![b'x'; 16];
s[15] = b'q';
assert_eq!(trim_start_byte(&s, b'x'), b"q");
}
#[test]
fn trim_end_byte_basic() {
assert_eq!(trim_end_byte(b"", b'x'), b"");
assert_eq!(trim_end_byte(b"xxx", b'x'), b"");
assert_eq!(trim_end_byte(b"axxx", b'x'), b"a");
assert_eq!(trim_end_byte(b"xxxa", b'x'), b"xxxa");
assert_eq!(trim_end_byte(b"abc", b'x'), b"abc");
}
#[test]
fn trim_end_byte_long() {
let s: Vec<u8> = b"abc".iter().chain(b"xxxxxxxxxxxxxxxx".iter()).copied().collect();
assert_eq!(trim_end_byte(&s, b'x'), b"abc");
let s: Vec<u8> = b"yz".iter().chain(b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".iter()).copied().collect();
assert_eq!(trim_end_byte(&s, b'x'), b"yz");
let s: Vec<u8> = b"q".iter().chain(b"xxxxxxxxxxxxxxxxx".iter()).copied().collect();
assert_eq!(trim_end_byte(&s, b'x'), b"q");
let s = vec![b'x'; 64];
assert_eq!(trim_end_byte(&s, b'x'), b"");
let mut s = vec![b'x'; 16];
s[0] = b'q';
assert_eq!(trim_end_byte(&s, b'x'), b"q");
}
#[test]
fn trim_byte_both_sides() {
assert_eq!(trim_byte(b"xxxabcxxx", b'x'), b"abc");
assert_eq!(trim_byte(b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxabcxxxxxxxxxxxxxxxx", b'x'), b"abc");
assert_eq!(trim_byte(b"abc", b'x'), b"abc");
assert_eq!(trim_byte(b"", b'x'), b"");
assert_eq!(trim_byte(b"xxxx", b'x'), b"");
}
}