#![no_std]
extern crate alloc;
use alloc::string::String;
use alloc::vec::Vec;
const ILLEGALS: [u8; 6] = [0, 10, 13, 34, 38, 92];
const SHORTENED: u8 = 0b111;
const ASCII_MASK_8: u64 = 0x8080_8080_8080_8080;
const LO7_8: u64 = 0x7F7F_7F7F_7F7F_7F7F;
const Z0_MASK: u64 = 0x0000_0000_0000_0000;
const Z10_MASK: u64 = 0x0A0A_0A0A_0A0A_0A0A;
const Z13_MASK: u64 = 0x0D0D_0D0D_0D0D_0D0D;
const Z34_MASK: u64 = 0x2222_2222_2222_2222;
const Z38_MASK: u64 = 0x2626_2626_2626_2626;
const Z92_MASK: u64 = 0x5C5C_5C5C_5C5C_5C5C;
const CLASS: [u8; 128] = {
let mut arr = [0u8; 128];
arr[0] = 1;
arr[10] = 2;
arr[13] = 3;
arr[34] = 4;
arr[38] = 5;
arr[92] = 6;
arr
};
const ESCAPE_TABLE: [[u16; 128]; 7] = {
let mut table = [[0u16; 128]; 7];
let mut idx = 0usize;
while idx < 6 {
let mut next = 0usize;
while next < 128 {
let n = next as u8;
let b1 = 0b1100_0010 | (idx as u8) << 2 | (n >> 6);
let b2 = 0x80 | (n & 0x3F);
table[idx][next] = u16::from_le_bytes([b1, b2]);
next += 1;
}
idx += 1;
}
let mut next = 0usize;
while next < 128 {
let n = next as u8;
let b1 = 0b1100_0010 | (SHORTENED << 2) | (n >> 6);
let b2 = 0x80 | (n & 0x3F);
table[6][next] = u16::from_le_bytes([b1, b2]);
next += 1;
}
table
};
const LEAD_DECODE: [u8; 256] = {
let mut arr = [0xFFu8; 256];
let mut idx = 0usize;
while idx < 6 {
let mut bit = 0usize;
while bit < 2 {
let lead = 0b1100_0010 | ((idx as u8) << 2) | (bit as u8);
arr[lead as usize] = ((idx as u8) << 1) | (bit as u8);
bit += 1;
}
idx += 1;
}
let mut bit = 0usize;
while bit < 2 {
let lead = 0b1100_0010 | (SHORTENED << 2) | (bit as u8);
arr[lead as usize] = (SHORTENED << 1) | (bit as u8);
bit += 1;
}
arr
};
#[inline(always)]
unsafe fn store_u64_le(dst: *mut u8, value: u64) {
unsafe { (dst as *mut u64).write_unaligned(value.to_le()) };
}
#[inline(always)]
unsafe fn store_u16_le(dst: *mut u8, value: u16) {
unsafe { (dst as *mut u16).write_unaligned(value.to_le()) };
}
#[inline(always)]
unsafe fn store_be_partial(dst: *mut u8, value: u64, len: usize) {
debug_assert!(len > 0 && len <= 8);
let shifted = value << ((8 - len) * 8);
unsafe { (dst as *mut u64).write_unaligned(shifted.to_be()) };
}
#[inline(always)]
unsafe fn load_u64_le(ptr: *const u8) -> u64 {
u64::from_le(unsafe { (ptr as *const u64).read_unaligned() })
}
#[inline(always)]
unsafe fn load56_be_overread1(ptr: *const u8) -> u64 {
u64::from_be(unsafe { (ptr as *const u64).read_unaligned() }) >> 8
}
#[inline(always)]
unsafe fn load56_be_exact(ptr: *const u8) -> u64 {
#[cfg(miri)]
{
let mut tmp = 0u64;
unsafe { core::ptr::copy_nonoverlapping(ptr, &mut tmp as *mut u64 as *mut u8, 7) };
u64::from_be(tmp) >> 8
}
#[cfg(not(miri))]
{
unsafe {
let val = (ptr as *const u64).read_unaligned();
val.swap_bytes() >> 8
}
}
}
#[inline(always)]
fn split56_to_groups_le(bits56: u64) -> u64 {
#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "bmi2"
))]
{
let scattered = unsafe { core::arch::x86_64::_pdep_u64(bits56, LO7_8) };
return scattered.swap_bytes();
}
#[allow(unreachable_code)]
{
((bits56 >> 49) & 0x7F)
| (((bits56 >> 42) & 0x7F) << 8)
| (((bits56 >> 35) & 0x7F) << 16)
| (((bits56 >> 28) & 0x7F) << 24)
| (((bits56 >> 21) & 0x7F) << 32)
| (((bits56 >> 14) & 0x7F) << 40)
| (((bits56 >> 7) & 0x7F) << 48)
| ((bits56 & 0x7F) << 56)
}
}
#[inline(always)]
fn gather_groups_to_bits56(chunk_le: u64) -> u64 {
#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "bmi2"
))]
{
return unsafe { core::arch::x86_64::_pext_u64(chunk_le.swap_bytes(), LO7_8) };
}
#[allow(unreachable_code)]
{
let g0 = (chunk_le & 0xFF) as u64;
let g1 = ((chunk_le >> 8) & 0xFF) as u64;
let g2 = ((chunk_le >> 16) & 0xFF) as u64;
let g3 = ((chunk_le >> 24) & 0xFF) as u64;
let g4 = ((chunk_le >> 32) & 0xFF) as u64;
let g5 = ((chunk_le >> 40) & 0xFF) as u64;
let g6 = ((chunk_le >> 48) & 0xFF) as u64;
let g7 = ((chunk_le >> 56) & 0xFF) as u64;
(g0 << 49) | (g1 << 42) | (g2 << 35) | (g3 << 28) | (g4 << 21) | (g5 << 14) | (g6 << 7) | g7
}
}
#[inline(always)]
fn illegal_high_mask(x: u64) -> u64 {
let z0 = x ^ Z0_MASK;
let z10 = x ^ Z10_MASK;
let z13 = x ^ Z13_MASK;
let z34 = x ^ Z34_MASK;
let z38 = x ^ Z38_MASK;
let z92 = x ^ Z92_MASK;
let t0 = !z0.wrapping_add(LO7_8);
let t10 = !z10.wrapping_add(LO7_8);
let t13 = !z13.wrapping_add(LO7_8);
let t34 = !z34.wrapping_add(LO7_8);
let t38 = !z38.wrapping_add(LO7_8);
let t92 = !z92.wrapping_add(LO7_8);
(t0 | t10 | t13 | t34 | t38 | t92) & ASCII_MASK_8
}
#[inline(always)]
unsafe fn emit_escape_pair(out_ptr: *mut u8, out_pos: &mut usize, first: u8, next: u8) {
let class = unsafe { *CLASS.get_unchecked(first as usize) };
debug_assert!(class >= 1 && class <= 6);
let idx = (class - 1) as usize;
let pair = unsafe { *ESCAPE_TABLE.get_unchecked(idx).get_unchecked(next as usize) };
unsafe { store_u16_le(out_ptr.add(*out_pos), pair) };
*out_pos += 2;
}
#[inline(always)]
unsafe fn emit_shortened(out_ptr: *mut u8, out_pos: &mut usize, bits: u8) {
let pair = unsafe { *ESCAPE_TABLE.get_unchecked(6).get_unchecked(bits as usize) };
unsafe { store_u16_le(out_ptr.add(*out_pos), pair) };
*out_pos += 2;
}
#[inline(always)]
fn pull7_tail(tail: &[u8], pos: &mut usize, acc: &mut u64, acc_bits: &mut u32) -> Option<u8> {
while *acc_bits < 7 && *pos < tail.len() {
*acc = (*acc << 8) | tail[*pos] as u64;
*pos += 1;
*acc_bits += 8;
}
if *acc_bits >= 7 {
*acc_bits -= 7;
let bits = ((*acc >> *acc_bits) & 0x7F) as u8;
if *acc_bits == 0 {
*acc = 0;
} else {
*acc &= (1_u64 << *acc_bits) - 1;
}
Some(bits)
} else if *acc_bits > 0 {
let bits = ((*acc << (7 - *acc_bits)) & 0x7F) as u8;
*acc = 0;
*acc_bits = 0;
Some(bits)
} else {
None
}
}
#[inline(always)]
fn group_count(input_len: usize) -> usize {
if input_len == 0 {
0
} else {
input_len.saturating_mul(8).saturating_add(6) / 7
}
}
#[inline(always)]
fn encoded_capacity(input_len: usize) -> usize {
group_count(input_len).saturating_mul(2).saturating_add(8)
}
#[inline(always)]
fn decoded_capacity(encoded_len: usize) -> usize {
encoded_len
.saturating_mul(7)
.saturating_add(7)
.saturating_div(8)
.saturating_add(8)
}
#[inline(always)]
unsafe fn process_groups8_masked(
mut groups_le: u64,
mut illegal_mask: u64,
out_ptr: *mut u8,
out_pos: &mut usize,
pending_illegal_bits: &mut u8,
has_pending_illegal: &mut bool,
) {
let mut remaining = 8usize;
if *has_pending_illegal {
let next = groups_le as u8;
unsafe { emit_escape_pair(out_ptr, out_pos, *pending_illegal_bits, next) };
*has_pending_illegal = false;
groups_le >>= 8;
illegal_mask >>= 8;
remaining -= 1;
}
if illegal_mask == 0 {
unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
*out_pos += remaining;
return;
}
if (illegal_mask & (illegal_mask - 1)) == 0 {
let prefix = (illegal_mask.trailing_zeros() >> 3) as usize;
if prefix != 0 {
unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
*out_pos += prefix;
groups_le >>= prefix * 8;
remaining -= prefix;
}
let cur = groups_le as u8;
if remaining > 1 {
let next = (groups_le >> 8) as u8;
unsafe { emit_escape_pair(out_ptr, out_pos, cur, next) };
groups_le >>= 16;
remaining -= 2;
if remaining != 0 {
unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
*out_pos += remaining;
}
} else {
*pending_illegal_bits = cur;
*has_pending_illegal = true;
}
return;
}
while remaining != 0 {
if illegal_mask == 0 {
unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
*out_pos += remaining;
break;
}
let prefix = (illegal_mask.trailing_zeros() >> 3) as usize;
if prefix != 0 {
unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
*out_pos += prefix;
groups_le >>= prefix * 8;
illegal_mask >>= prefix * 8;
remaining -= prefix;
}
let cur = groups_le as u8;
if remaining > 1 {
let next = (groups_le >> 8) as u8;
unsafe { emit_escape_pair(out_ptr, out_pos, cur, next) };
groups_le >>= 16;
illegal_mask >>= 16;
remaining -= 2;
} else {
*pending_illegal_bits = cur;
*has_pending_illegal = true;
break;
}
}
}
pub fn encode(data: &[u8]) -> String {
if data.is_empty() {
return String::new();
}
let mut out = Vec::<u8>::with_capacity(encoded_capacity(data.len()));
let out_ptr = out.as_mut_ptr();
let len = data.len();
let ptr = data.as_ptr();
let mut out_pos = 0usize;
let mut i = 0usize;
let mut pending_illegal_bits = 0u8;
let mut has_pending_illegal = false;
while i + 29 <= len {
let bits56_a = unsafe { load56_be_overread1(ptr.add(i)) };
let bits56_b = unsafe { load56_be_overread1(ptr.add(i + 7)) };
let bits56_c = unsafe { load56_be_overread1(ptr.add(i + 14)) };
let bits56_d = unsafe { load56_be_overread1(ptr.add(i + 21)) };
let groups_a = split56_to_groups_le(bits56_a);
let groups_b = split56_to_groups_le(bits56_b);
let groups_c = split56_to_groups_le(bits56_c);
let groups_d = split56_to_groups_le(bits56_d);
let mask_a = illegal_high_mask(groups_a);
let mask_b = illegal_high_mask(groups_b);
let mask_c = illegal_high_mask(groups_c);
let mask_d = illegal_high_mask(groups_d);
if !has_pending_illegal && (mask_a | mask_b | mask_c | mask_d) == 0 {
unsafe {
store_u64_le(out_ptr.add(out_pos), groups_a);
store_u64_le(out_ptr.add(out_pos + 8), groups_b);
store_u64_le(out_ptr.add(out_pos + 16), groups_c);
store_u64_le(out_ptr.add(out_pos + 24), groups_d);
}
out_pos += 32;
i += 28;
continue;
}
unsafe {
process_groups8_masked(
groups_a,
mask_a,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
process_groups8_masked(
groups_b,
mask_b,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
process_groups8_masked(
groups_c,
mask_c,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
process_groups8_masked(
groups_d,
mask_d,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
}
i += 28;
}
while i + 15 <= len {
let bits56_a = unsafe { load56_be_overread1(ptr.add(i)) };
let bits56_b = unsafe { load56_be_overread1(ptr.add(i + 7)) };
let groups_a = split56_to_groups_le(bits56_a);
let groups_b = split56_to_groups_le(bits56_b);
let mask_a = illegal_high_mask(groups_a);
let mask_b = illegal_high_mask(groups_b);
if !has_pending_illegal && (mask_a | mask_b) == 0 {
unsafe {
store_u64_le(out_ptr.add(out_pos), groups_a);
store_u64_le(out_ptr.add(out_pos + 8), groups_b);
}
out_pos += 16;
i += 14;
continue;
}
unsafe {
process_groups8_masked(
groups_a,
mask_a,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
process_groups8_masked(
groups_b,
mask_b,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
}
i += 14;
}
while i + 8 <= len {
let bits56 = unsafe { load56_be_overread1(ptr.add(i)) };
let groups_le = split56_to_groups_le(bits56);
let mask = illegal_high_mask(groups_le);
if !has_pending_illegal && mask == 0 {
unsafe { store_u64_le(out_ptr.add(out_pos), groups_le) };
out_pos += 8;
i += 7;
continue;
}
unsafe {
process_groups8_masked(
groups_le,
mask,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
}
i += 7;
}
if i + 7 <= len {
let bits56 = unsafe { load56_be_exact(ptr.add(i)) };
let groups_le = split56_to_groups_le(bits56);
let mask = illegal_high_mask(groups_le);
if !has_pending_illegal && mask == 0 {
unsafe { store_u64_le(out_ptr.add(out_pos), groups_le) };
out_pos += 8;
i += 7;
} else {
unsafe {
process_groups8_masked(
groups_le,
mask,
out_ptr,
&mut out_pos,
&mut pending_illegal_bits,
&mut has_pending_illegal,
);
}
i += 7;
}
}
let tail = &data[i..];
let mut tail_pos = 0usize;
let mut acc = 0u64;
let mut acc_bits = 0u32;
if has_pending_illegal {
if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
unsafe { emit_escape_pair(out_ptr, &mut out_pos, pending_illegal_bits, nb) };
} else {
unsafe { emit_shortened(out_ptr, &mut out_pos, pending_illegal_bits) };
}
}
while let Some(cur) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
let class = unsafe { *CLASS.get_unchecked(cur as usize) };
if class == 0 {
unsafe { *out_ptr.add(out_pos) = cur };
out_pos += 1;
} else if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
unsafe { emit_escape_pair(out_ptr, &mut out_pos, cur, nb) };
} else {
unsafe { emit_shortened(out_ptr, &mut out_pos, cur) };
break;
}
}
unsafe {
out.set_len(out_pos);
String::from_utf8_unchecked(out)
}
}
#[inline(always)]
unsafe fn unpack8groups_chunk_le(
chunk_le: u64,
out_ptr: *mut u8,
out_pos: &mut usize,
acc: &mut u64,
acc_bits: &mut u32,
) {
let bits56 = gather_groups_to_bits56(chunk_le);
let k = *acc_bits;
let combined = (*acc << 56) | bits56;
unsafe { store_be_partial(out_ptr.add(*out_pos), combined >> k, 7) };
*out_pos += 7;
*acc = combined & ((1u64 << k).wrapping_sub(1));
}
#[inline(always)]
unsafe fn push_ascii_prefix_le(
mut chunk_le: u64,
count: usize,
out_ptr: *mut u8,
out_pos: &mut usize,
acc: &mut u64,
acc_bits: &mut u32,
) {
debug_assert!(count <= 7);
let mut packed = 0u64;
let mut n = 0usize;
while n < count {
packed = (packed << 7) | ((chunk_le as u8) as u64);
chunk_le >>= 8;
n += 1;
}
let add_bits = (count as u32) * 7;
let total_bits = *acc_bits + add_bits;
let new_bits = total_bits & 7;
let emitted = (total_bits >> 3) as usize;
let combined = (*acc << add_bits) | packed;
if emitted != 0 {
unsafe { store_be_partial(out_ptr.add(*out_pos), combined >> new_bits, emitted) };
*out_pos += emitted;
}
*acc_bits = new_bits;
*acc = combined & ((1u64 << new_bits).wrapping_sub(1));
}
#[inline(always)]
unsafe fn push7_scalar(
out_ptr: *mut u8,
out_pos: &mut usize,
acc: &mut u64,
acc_bits: &mut u32,
bits: u8,
) {
let combined = (*acc << 7) | (bits as u64);
let total_bits = *acc_bits + 7;
if total_bits >= 8 {
let new_bits = total_bits - 8;
unsafe { *out_ptr.add(*out_pos) = (combined >> new_bits) as u8 };
*out_pos += 1;
*acc_bits = new_bits;
*acc = combined & ((1u64 << new_bits).wrapping_sub(1));
} else {
*acc_bits = total_bits;
*acc = combined;
}
}
#[inline(always)]
unsafe fn push14_scalar(
out_ptr: *mut u8,
out_pos: &mut usize,
acc: &mut u64,
acc_bits: &mut u32,
hi7: u8,
lo7: u8,
) {
let combined = (*acc << 14) | ((hi7 as u64) << 7) | (lo7 as u64);
let total_bits = *acc_bits + 14;
let new_bits = total_bits & 7;
let emitted = total_bits >> 3;
let out_bits = combined >> new_bits;
if emitted == 2 {
unsafe { (out_ptr.add(*out_pos) as *mut u16).write_unaligned((out_bits as u16).to_be()) };
*out_pos += 2;
} else {
unsafe { *out_ptr.add(*out_pos) = out_bits as u8 };
*out_pos += 1;
}
*acc_bits = new_bits;
*acc = combined & ((1u64 << new_bits).wrapping_sub(1));
}
#[cold]
#[inline(never)]
fn decode_err<T>(msg: &'static str) -> Result<T, &'static str> {
Err(msg)
}
#[inline(always)]
unsafe fn break_to_scalar(
ptr: *const u8,
i: &mut usize,
len: usize,
out_ptr: *mut u8,
out_pos: &mut usize,
acc: &mut u64,
acc_bits: &mut u32,
) -> Result<(), &'static str> {
if *i + 2 > len {
return decode_err("Unexpected end of input");
}
let b1 = unsafe { *ptr.add(*i) };
let code = LEAD_DECODE[b1 as usize];
if code == 0xFF {
return decode_err("Invalid lead byte");
}
let b2 = unsafe { *ptr.add(*i + 1) };
if (b2 & 0xC0) != 0x80 {
return decode_err("Invalid continuation byte");
}
*i += 2;
let illegal_index = code >> 1;
let first_bit = code & 1;
let next = (first_bit << 6) | (b2 & 0x3F);
unsafe {
if illegal_index < 6 {
push14_scalar(
out_ptr,
out_pos,
acc,
acc_bits,
ILLEGALS[illegal_index as usize],
next,
);
} else {
debug_assert_eq!(illegal_index, SHORTENED);
push7_scalar(out_ptr, out_pos, acc, acc_bits, next);
}
}
Ok(())
}
pub fn decode(encoded: &str) -> Result<Vec<u8>, &'static str> {
if encoded.is_empty() {
return Ok(Vec::new());
}
let bytes = encoded.as_bytes();
let len = bytes.len();
let mut out = Vec::<u8>::with_capacity(decoded_capacity(len));
let out_ptr = out.as_mut_ptr();
let ptr = bytes.as_ptr();
let mut out_pos = 0usize;
let mut acc = 0u64;
let mut acc_bits = 0u32;
let mut i = 0usize;
while i + 8 <= len {
let chunk = unsafe { load_u64_le(ptr.add(i)) };
let high = chunk & ASCII_MASK_8;
if high == 0 {
unsafe {
unpack8groups_chunk_le(chunk, out_ptr, &mut out_pos, &mut acc, &mut acc_bits);
}
i += 8;
continue;
}
let ascii_prefix = (high.trailing_zeros() >> 3) as usize;
if ascii_prefix != 0 {
unsafe {
push_ascii_prefix_le(
chunk,
ascii_prefix,
out_ptr,
&mut out_pos,
&mut acc,
&mut acc_bits,
);
}
i += ascii_prefix;
continue;
}
(unsafe {
break_to_scalar(
ptr,
&mut i,
len,
out_ptr,
&mut out_pos,
&mut acc,
&mut acc_bits,
)
})?;
}
while i < len {
let b = unsafe { *ptr.add(i) };
if b < 128 {
i += 1;
unsafe { push7_scalar(out_ptr, &mut out_pos, &mut acc, &mut acc_bits, b) };
continue;
}
let code = LEAD_DECODE[b as usize];
if code == 0xFF {
return decode_err("Invalid lead byte");
}
if i + 1 >= len {
return decode_err("Unexpected end of input");
}
let b2 = unsafe { *ptr.add(i + 1) };
if (b2 & 0xC0) != 0x80 {
return decode_err("Invalid continuation byte");
}
i += 2;
let illegal_index = code >> 1;
let first_bit = code & 1;
let next = (first_bit << 6) | (b2 & 0x3F);
unsafe {
if illegal_index < 6 {
push14_scalar(
out_ptr,
&mut out_pos,
&mut acc,
&mut acc_bits,
ILLEGALS[illegal_index as usize],
next,
);
} else {
debug_assert_eq!(illegal_index, SHORTENED);
push7_scalar(out_ptr, &mut out_pos, &mut acc, &mut acc_bits, next);
}
}
}
unsafe { out.set_len(out_pos) };
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::{format, vec};
#[test]
fn test_empty() {
assert_eq!(encode(b""), "");
assert_eq!(decode("").unwrap(), b"");
}
#[test]
fn test_hello_world() {
let data = b"hello world";
let enc = encode(data);
let dec = decode(&enc).expect("decoding failed");
assert_eq!(dec, data);
}
#[test]
fn test_single_byte_values() {
for b in 0..=255u8 {
let data = vec![b];
let enc = encode(&data);
let dec = decode(&enc).expect(&format!("decoding failed for byte {}", b));
assert_eq!(dec, data, "failed for byte {}", b);
}
}
#[test]
fn test_various_lengths_roundtrip() {
for len in [
0, 1, 2, 3, 6, 7, 8, 9, 14, 15, 16, 17, 31, 32, 33, 100, 255, 256, 511, 512,
] {
let data: Vec<u8> = (0..len).map(|i| (i % 251) as u8).collect();
let enc = encode(&data);
let dec = decode(&enc).expect("decoding failed");
assert_eq!(dec, data, "roundtrip failed for length {}", len);
}
}
#[test]
fn test_all_illegal_bytes_handling() {
let data = b"\x00\x0A\x0D\x22\x26\x5C";
let enc = encode(data);
let dec = decode(&enc).expect("decoding failed");
assert_eq!(dec, data.as_ref());
}
#[test]
fn test_mixed_content() {
let data: Vec<u8> = (0..=255).collect();
let enc = encode(&data);
let dec = decode(&enc).expect("decoding failed");
assert_eq!(dec, data);
}
#[test]
fn test_repeated_illegal_bytes() {
let data = vec![0u8; 100];
let enc = encode(&data);
let dec = decode(&enc).expect("decoding failed");
assert_eq!(dec, data);
}
#[test]
fn test_decode_invalid_lead_byte() {
let invalid = vec![0x80u8];
let s = unsafe { String::from_utf8_unchecked(invalid) };
assert!(decode(&s).is_err());
let invalid2 = vec![0xFFu8];
let s2 = unsafe { String::from_utf8_unchecked(invalid2) };
assert!(decode(&s2).is_err());
}
#[test]
fn test_decode_truncated_escape() {
let mut data = vec![0xC0u8];
let s = unsafe { String::from_utf8_unchecked(data.clone()) };
assert!(decode(&s).is_err());
data.push(0x40);
let s2 = unsafe { String::from_utf8_unchecked(data) };
assert!(decode(&s2).is_err());
}
#[test]
fn test_decode_invalid_continuation_byte() {
let data = vec![0xC2u8, 0xFF];
let s = unsafe { String::from_utf8_unchecked(data) };
assert!(decode(&s).is_err());
}
#[test]
fn test_shortened_at_end() {
let data = vec![0u8];
let enc = encode(&data);
let dec = decode(&enc).expect("decode failed");
assert_eq!(dec, data);
}
#[test]
fn test_very_long_input() {
use rand::Rng;
const DATA_SIZE: usize = 5_000_000;
let mut data = vec![0u8; DATA_SIZE];
let mut rng = rand::rng();
rng.fill_bytes(&mut data);
let enc = encode(&data);
let dec = decode(&enc).expect("decode failed");
assert_eq!(dec, data);
}
}