#![cfg_attr(feature = "safe", deny(unsafe_code))]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(feature = "std", doc = include_str!("../README.md"))]
pub(crate) mod decoder;
pub(crate) mod encoder;
pub mod iter;
#[cfg(feature = "alloc")]
extern crate alloc;
#[cfg(feature = "alloc")]
use alloc::{borrow::Cow, string::String, vec::Vec};
const REQUIRES_ESCAPE_TABLE: [bool; 256] = {
let mut table = [true; 256];
table[b'\t' as usize] = false;
table[b'\n' as usize] = false;
table[b'\r' as usize] = false;
let mut i = b' ';
while i <= b'~' {
if i != b'`' {
table[i as usize] = false;
}
i += 1;
}
table
};
const HEX_NIBBLE_DECODE_INVALID_ERR: u8 = 0xFF;
const HEX_NIBBLE_DECODE_LOWERCASE_ERR: u8 = 0xFE;
const HEX_NIBBLE_DECODE_TABLE: [u8; 256] = {
let mut table = [HEX_NIBBLE_DECODE_INVALID_ERR; 256];
let mut i = b'0';
while i <= b'9' {
table[i as usize] = i - b'0';
i += 1;
}
i = b'A';
while i <= b'F' {
table[i as usize] = i - b'A' + 10;
i += 1;
}
i = b'a';
while i <= b'f' {
table[i as usize] = HEX_NIBBLE_DECODE_LOWERCASE_ERR;
i += 1;
}
table
};
#[cfg(feature = "alloc")]
#[must_use]
pub fn encode(input: &[u8]) -> Cow<'_, str> {
input
.iter()
.position(|byte| requires_escape(*byte))
.map_or_else(
|| {
debug_assert!(input.is_ascii());
Cow::Borrowed(from_utf8_unchecked_potentially_unsafe(input))
},
|index| {
let validated = &input[..index];
debug_assert!(validated.is_ascii());
let validated = from_utf8_unchecked_potentially_unsafe(validated);
let mut output = String::with_capacity(input.len() + 1);
output.push_str(validated);
let requires_encoding = &input[index..];
encode_to_string(requires_encoding, &mut output);
Cow::Owned(output)
},
)
}
pub fn encode_iter<I>(iter: I) -> iter::EncodeIter<I::IntoIter>
where
I: IntoIterator<Item = u8>,
{
iter::EncodeIter::new(iter.into_iter())
}
#[cfg(feature = "alloc")]
pub fn decode(input: &[u8]) -> Result<Cow<'_, [u8]>, DecodeError> {
let escape_index = input.iter().position(|byte| requires_escape(*byte));
match escape_index {
Some(index) => {
let validated = &input[..index];
let output_est_capacity = validated.len() + (input.len() - validated.len() + 2) / 3;
let mut output = Vec::with_capacity(output_est_capacity);
output.extend_from_slice(validated);
let requires_decoding = &input[index..];
decode_to_vec(requires_decoding, &mut output)?;
Ok(Cow::Owned(output))
}
None => Ok(Cow::Borrowed(input)),
}
}
pub fn decode_iter<I>(iter: I) -> iter::DecodeIter<I::IntoIter>
where
I: IntoIterator<Item = u8>,
{
iter::DecodeIter::new(iter.into_iter())
}
pub fn decode_in_place(input: &mut [u8]) -> Result<&mut [u8], DecodeError> {
let Some(escape_index) = input.iter().position(|byte| requires_escape(*byte)) else {
return Ok(input);
};
let mut head = escape_index;
let mut tail = escape_index;
while tail < input.len() {
if input[tail] == b'`' {
let escaped = input.get(tail + 1).ok_or(DecodeError::UnexpectedEnd)?;
match escaped {
b'`' => {
input[head] = b'`';
tail += 2;
head += 1;
}
high => {
let low = input.get(tail + 2).ok_or(DecodeError::UnexpectedEnd)?;
let byte = hex_bytes_to_byte(*high, *low)?;
input[head] = byte;
tail += 3;
head += 1;
}
}
} else if requires_escape(input[tail]) {
return Err(DecodeError::InvalidByte(input[tail]));
} else {
input[head] = input[tail];
tail += 1;
head += 1;
}
}
let decoded = &mut input[..head];
Ok(decoded)
}
#[inline]
#[must_use]
pub const fn requires_escape(byte: u8) -> bool {
REQUIRES_ESCAPE_TABLE[byte as usize]
}
#[cfg(feature = "alloc")]
pub fn encode_to_string(input: &[u8], output: &mut String) -> usize {
let mut written = 0;
output.reserve(input.len());
for &byte in input {
if byte == b'`' {
output.push_str("``");
written += 2;
} else if requires_escape(byte) {
let [high, low] = byte_to_hex_chars(byte);
output.push('`');
output.push(high);
output.push(low);
written += 3;
} else {
output.push(byte as char);
written += 1;
}
}
written
}
#[cfg(feature = "alloc")]
pub fn encode_to_vec(input: &[u8], output: &mut Vec<u8>) -> usize {
let mut written = 0;
output.reserve(input.len());
for &byte in input {
if byte == b'`' {
output.extend_from_slice(b"``");
written += 2;
} else if requires_escape(byte) {
let [high, low] = byte_to_hex_bytes(byte);
output.extend_from_slice(&[b'`', high, low]);
written += 3;
} else {
output.push(byte);
written += 1;
}
}
written
}
#[cfg(feature = "alloc")]
pub fn decode_to_vec(input: &[u8], output: &mut Vec<u8>) -> Result<usize, DecodeError> {
let mut written = 0;
let mut iter = input.iter();
while let Some(&byte) = iter.next() {
if byte == b'`' {
let escaped = iter.next().ok_or(DecodeError::UnexpectedEnd)?;
match escaped {
b'`' => {
output.push(b'`');
written += 1;
}
high => {
let low = iter.next().ok_or(DecodeError::UnexpectedEnd)?;
let byte = hex_bytes_to_byte(*high, *low)?;
output.push(byte);
written += 1;
}
}
} else if requires_escape(byte) {
return Err(DecodeError::InvalidByte(byte));
} else {
output.push(byte);
written += 1;
}
}
Ok(written)
}
#[inline]
const fn nibble_to_hex(n: u8) -> u8 {
n + b'0' + ((n > 9) as u8) * 7
}
#[inline]
const fn byte_to_hex_bytes(byte: u8) -> [u8; 2] {
[nibble_to_hex(byte >> 4), nibble_to_hex(byte & 0x0F)]
}
const fn byte_to_hex_chars(byte: u8) -> [char; 2] {
let [high_byte, low_byte] = byte_to_hex_bytes(byte);
[high_byte as char, low_byte as char]
}
#[inline]
const fn hex_bytes_to_byte(high: u8, low: u8) -> Result<u8, DecodeError> {
let high_value = HEX_NIBBLE_DECODE_TABLE[high as usize];
let low_value = HEX_NIBBLE_DECODE_TABLE[low as usize];
match (high_value, low_value) {
(0..=0x0F, 0..=0x0F) => {
let byte = (high_value << 4) | low_value;
if byte == b'`' || !requires_escape(byte) {
return Err(DecodeError::UnexpectedEscape(
EscapedHex(high, low),
byte as char,
));
}
Ok(byte)
}
(HEX_NIBBLE_DECODE_INVALID_ERR, _) | (_, HEX_NIBBLE_DECODE_INVALID_ERR) => {
Err(DecodeError::InvalidHex(EscapedHex(high, low)))
}
_ => Err(DecodeError::LowercaseHex(EscapedHex(high, low))),
}
}
#[cfg(feature = "safe")]
fn from_utf8_unchecked_potentially_unsafe(bytes: &[u8]) -> &str {
core::str::from_utf8(bytes).unwrap()
}
#[cfg(not(feature = "safe"))]
fn from_utf8_unchecked_potentially_unsafe(bytes: &[u8]) -> &str {
debug_assert!(bytes.is_ascii());
unsafe { core::str::from_utf8_unchecked(bytes) }
}
#[derive(Debug)]
#[cfg_attr(feature = "std", derive(thiserror::Error))]
pub enum DecodeError {
#[cfg_attr(feature = "std", error("invalid encoded byte 0x{0:02x}"))]
InvalidByte(u8),
#[cfg_attr(feature = "std", error("unexpected end after `"))]
UnexpectedEnd,
#[cfg_attr(feature = "std", error("unexpected escape {0}, expected {1}"))]
UnexpectedEscape(EscapedHex, char),
#[cfg_attr(feature = "std", error("expected uppercase hex sequence, found {0}"))]
LowercaseHex(EscapedHex),
#[cfg_attr(feature = "std", error("invalid hex sequence {0}"))]
InvalidHex(EscapedHex),
}
pub struct EscapedHex(pub u8, pub u8);
impl core::fmt::Debug for EscapedHex {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let Self(high, low) = self;
if requires_escape(*high) || requires_escape(*low) {
f.debug_tuple("EscapedHex")
.field(&self.0)
.field(&self.1)
.finish()
} else {
f.debug_tuple("EscapedHex")
.field(&(*high as char))
.field(&(*low as char))
.finish()
}
}
}
impl core::fmt::Display for EscapedHex {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let Self(high, low) = self;
if requires_escape(*high) || requires_escape(*low) {
write!(f, "0x{high:02X} 0x{low:02X}")
} else {
write!(f, "`{}{}", *high as char, *low as char)
}
}
}