use std::{borrow::Cow, fmt::Display, string::FromUtf8Error};
const UNRESERVED: [bool; 256] = {
let mut table = [false; 256];
let mut i = b'0';
while i <= b'9' {
table[i as usize] = true;
i += 1;
}
i = b'A';
while i <= b'Z' {
table[i as usize] = true;
i += 1;
}
i = b'a';
while i <= b'z' {
table[i as usize] = true;
i += 1;
}
table[b'-' as usize] = true;
table[b'.' as usize] = true;
table[b'_' as usize] = true;
table[b'~' as usize] = true;
table
};
const ENCODE_PAIR: [[u8; 2]; 256] = {
const NIBBLE: [u8; 16] = *b"0123456789ABCDEF";
let mut table = [[0u8; 2]; 256];
let mut i = 0u16;
while i < 256 {
table[i as usize] = [NIBBLE[(i >> 4) as usize], NIBBLE[(i & 0x0f) as usize]];
i += 1;
}
table
};
const DECODE_NIBBLE: [u8; 256] = {
let mut table = [0xFFu8; 256];
let mut i = 0u8;
while i < 10 {
table[(b'0' + i) as usize] = i;
i += 1;
}
i = 0;
while i < 6 {
table[(b'a' + i) as usize] = 10 + i;
table[(b'A' + i) as usize] = 10 + i;
i += 1;
}
table
};
#[must_use]
pub fn encode(input: &str) -> Cow<'_, str> {
match encode_bytes(input.as_bytes()) {
Cow::Borrowed(_) => Cow::Borrowed(input),
Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).expect("encoded output is ASCII")),
}
}
#[must_use]
pub fn encode_bytes(input: &[u8]) -> Cow<'_, [u8]> {
let Some(first) = input.iter().position(|&b| !UNRESERVED[b as usize]) else {
return Cow::Borrowed(input);
};
let mut out = Vec::with_capacity(input.len() + input.len() / 2 + 16);
out.extend_from_slice(&input[..first]);
let mut rest = &input[first..];
while let Some(&byte) = rest.first() {
if UNRESERVED[byte as usize] {
let run_end = rest
.iter()
.position(|&b| !UNRESERVED[b as usize])
.unwrap_or(rest.len());
out.extend_from_slice(&rest[..run_end]);
rest = &rest[run_end..];
} else {
out.push(b'%');
out.extend_from_slice(&ENCODE_PAIR[byte as usize]);
rest = &rest[1..];
}
}
Cow::Owned(out)
}
pub fn decode(input: &str) -> Result<Cow<'_, str>, DecodeError> {
match decode_bytes(input.as_bytes()) {
Cow::Borrowed(_) => Ok(Cow::Borrowed(input)),
Cow::Owned(bytes) => String::from_utf8(bytes)
.map(Cow::Owned)
.map_err(DecodeError::InvalidUtf8),
}
}
#[must_use]
pub fn decode_bytes(input: &[u8]) -> Cow<'_, [u8]> {
let Some(first) = input.iter().position(|&b| b == b'%') else {
return Cow::Borrowed(input);
};
let mut out = Vec::with_capacity(input.len());
out.extend_from_slice(&input[..first]);
let mut i = first;
while i < input.len() {
if input[i] == b'%' {
if i + 2 < input.len() {
let hi = DECODE_NIBBLE[input[i + 1] as usize];
let lo = DECODE_NIBBLE[input[i + 2] as usize];
if (hi | lo) & 0xF0 == 0 {
out.push((hi << 4) | lo);
i += 3;
continue;
}
}
out.push(b'%');
i += 1;
} else {
let run_start = i;
while i < input.len() && input[i] != b'%' {
i += 1;
}
out.extend_from_slice(&input[run_start..i]);
}
}
Cow::Owned(out)
}
#[derive(Debug)]
pub enum DecodeError {
InvalidUtf8(FromUtf8Error),
}
impl Display for DecodeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidUtf8(err) => write!(f, "invalid UTF-8 in decoded bytes: {err}"),
}
}
}
impl std::error::Error for DecodeError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::InvalidUtf8(err) => Some(err),
}
}
}
impl From<FromUtf8Error> for DecodeError {
fn from(err: FromUtf8Error) -> Self {
Self::InvalidUtf8(err)
}
}
#[cfg(test)]
mod tests {
use rstest::rstest;
use super::*;
const UNRESERVED_CHARS: &str =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";
const RESERVED_CHARS: &str = ":/?#[]@!$&'()*+,;=";
#[rstest]
#[case("", "")]
#[case("abc", "abc")]
#[case("ABC-xyz_0.9~", "ABC-xyz_0.9~")]
#[case(" ", "%20")]
#[case("+", "%2B")]
#[case("/", "%2F")]
#[case("?", "%3F")]
#[case("#", "%23")]
#[case("&", "%26")]
#[case("=", "%3D")]
#[case("%", "%25")]
#[case("hello world", "hello%20world")]
#[case("a b+c/d", "a%20b%2Bc%2Fd")]
#[case("\x7f", "%7F")]
fn test_encode_ascii_vectors(#[case] input: &str, #[case] expected: &str) {
assert_eq!(encode(input), expected);
}
#[rstest]
fn test_encode_all_unreserved_unchanged() {
let out = encode(UNRESERVED_CHARS);
assert_eq!(out, UNRESERVED_CHARS);
assert!(matches!(out, Cow::Borrowed(_)));
}
#[rstest]
fn test_encode_all_reserved_percent_encoded() {
let out = encode(RESERVED_CHARS);
assert_eq!(out.len(), RESERVED_CHARS.len() * 3);
for byte in out.bytes() {
assert!(
matches!(byte, b'%' | b'0'..=b'9' | b'A'..=b'F'),
"unexpected byte {byte:#04x} in encoded reserved output"
);
}
}
#[rstest]
fn test_encode_hex_is_uppercase() {
let out = encode("/");
assert_eq!(out, "%2F");
assert!(!out.contains('f'));
}
#[rstest]
fn test_encode_every_byte_position() {
for byte in 0u8..=255 {
let input = [byte];
let out = encode_bytes(&input);
if UNRESERVED[byte as usize] {
assert!(
matches!(out, Cow::Borrowed(_)),
"unreserved byte {byte:#04x} should not allocate"
);
assert_eq!(out.as_ref(), &[byte]);
} else {
let expected = format!("%{byte:02X}").into_bytes();
assert_eq!(out.as_ref(), expected.as_slice(), "byte {byte:#04x}");
}
}
}
#[rstest]
fn test_encode_utf8_multibyte() {
assert_eq!(encode("\u{00E9}"), "%C3%A9");
assert_eq!(encode("\u{4E2D}"), "%E4%B8%AD");
assert_eq!(encode("\u{1F600}"), "%F0%9F%98%80");
}
#[rstest]
fn test_encode_mixed_ascii_and_utf8() {
assert_eq!(encode("a é/"), "a%20%C3%A9%2F");
}
#[rstest]
fn test_encode_returns_borrowed_when_no_work() {
let out = encode("safe-string_123.xyz~");
assert!(matches!(out, Cow::Borrowed(_)));
}
#[rstest]
fn test_encode_returns_owned_when_encoding_needed() {
let out = encode("needs encoding");
assert!(matches!(out, Cow::Owned(_)));
}
#[rstest]
#[case("", "")]
#[case("abc", "abc")]
#[case("%20", " ")]
#[case("%2F", "/")]
#[case("%2f", "/")] #[case("%2b", "+")]
#[case("%25", "%")]
#[case("hello%20world", "hello world")]
#[case("a%20b%2Bc%2Fd", "a b+c/d")]
#[case("%C3%A9", "\u{00E9}")]
#[case("%E4%B8%AD", "\u{4E2D}")]
#[case("%F0%9F%98%80", "\u{1F600}")]
fn test_decode_ascii_and_utf8_vectors(#[case] input: &str, #[case] expected: &str) {
assert_eq!(decode(input).unwrap(), expected);
}
#[rstest]
#[case("%", "%")] #[case("%2", "%2")] #[case("%GG", "%GG")] #[case("%2G", "%2G")] #[case("%G2", "%G2")] #[case("%%20", "% ")] #[case("100%", "100%")] fn test_decode_malformed_percent_passes_through(#[case] input: &str, #[case] expected: &str) {
assert_eq!(decode(input).unwrap(), expected);
}
#[rstest]
fn test_decode_returns_borrowed_when_no_percent() {
let out = decode("no-percent-here").unwrap();
assert!(matches!(out, Cow::Borrowed(_)));
}
#[rstest]
fn test_decode_returns_owned_when_percent_present() {
let out = decode("a%20b").unwrap();
assert!(matches!(out, Cow::Owned(_)));
}
#[rstest]
#[case("this%2x%26that", "this%2x&that")]
#[case("%%25", "%%")]
#[case("%2%26", "%2&")]
#[case("a%2Zb%20c", "a%2Zb c")]
fn test_decode_malformed_then_valid(#[case] input: &str, #[case] expected: &str) {
assert_eq!(decode(input).unwrap(), expected);
}
#[rstest]
fn test_decode_invalid_utf8_errors() {
let err = decode("%FF").unwrap_err();
assert!(matches!(err, DecodeError::InvalidUtf8(_)));
}
#[rstest]
fn test_decode_invalid_utf8_bytes_ok() {
let out = decode_bytes(b"%FF");
assert_eq!(out.as_ref(), &[0xFF]);
}
#[rstest]
fn test_decode_consecutive_percent_triples() {
assert_eq!(decode("%e2%98%83").unwrap(), "\u{2603}"); }
#[rstest]
fn test_decode_nul_byte() {
let decoded = decode("a%00b").unwrap();
assert_eq!(decoded.as_bytes(), &[b'a', 0x00, b'b']);
}
#[rstest]
fn test_roundtrip_every_byte() {
for byte in 0u8..=255 {
let input = [byte];
let encoded = encode_bytes(&input);
let decoded = decode_bytes(encoded.as_ref());
assert_eq!(
decoded.as_ref(),
input.as_slice(),
"round-trip failed for byte {byte:#04x}"
);
}
}
#[rstest]
#[case("hello")]
#[case("a b c")]
#[case("https://example.com/path?q=1&x=2")]
#[case("\u{00E9}\u{00E0}\u{00FC}")]
#[case("\u{4E2D}\u{6587}\u{6D4B}\u{8BD5}")]
#[case("mix 123 !@# %^&*()")]
#[case("\u{1F600}\u{1F680}\u{1F3C6}")]
fn test_roundtrip_string(#[case] input: &str) {
let encoded = encode(input);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, input);
}
#[rstest]
fn test_encoded_output_only_ascii() {
let encoded = encode("\u{00E9}\u{4E2D}\u{1F600}");
assert!(encoded.is_ascii(), "encoded output must be ASCII-only");
}
#[rstest]
fn test_encode_bytes_arbitrary_binary() {
let input: Vec<u8> = (0u8..=255).collect();
let encoded = encode_bytes(&input);
assert!(encoded.iter().all(u8::is_ascii));
let decoded = decode_bytes(encoded.as_ref());
assert_eq!(decoded.as_ref(), input.as_slice());
}
#[rstest]
fn test_decode_error_display_and_source() {
let err = decode("%FF").unwrap_err();
let msg = err.to_string();
assert!(msg.starts_with("invalid UTF-8"), "got: {msg}");
assert!(std::error::Error::source(&err).is_some());
}
fn reference_encode(input: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len());
for &b in input {
let is_unreserved =
b.is_ascii_alphanumeric() || b == b'-' || b == b'.' || b == b'_' || b == b'~';
if is_unreserved {
out.push(b);
} else {
out.push(b'%');
out.extend_from_slice(format!("{b:02X}").as_bytes());
}
}
out
}
fn reference_decode(input: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len());
let mut i = 0;
while i < input.len() {
if input[i] == b'%' && i + 2 < input.len() {
let a = input[i + 1];
let b = input[i + 2];
if a.is_ascii_hexdigit() && b.is_ascii_hexdigit() {
let hi = if a.is_ascii_digit() {
a - b'0'
} else {
(a | 0x20) - b'a' + 10
};
let lo = if b.is_ascii_digit() {
b - b'0'
} else {
(b | 0x20) - b'a' + 10
};
out.push((hi << 4) | lo);
i += 3;
continue;
}
}
out.push(input[i]);
i += 1;
}
out
}
proptest::proptest! {
#[rstest]
fn prop_encode_matches_reference(input: Vec<u8>) {
let actual = encode_bytes(&input);
let expected = reference_encode(&input);
proptest::prop_assert_eq!(actual.as_ref(), expected.as_slice());
}
#[rstest]
fn prop_decode_matches_reference(input: Vec<u8>) {
let actual = decode_bytes(&input);
let expected = reference_decode(&input);
proptest::prop_assert_eq!(actual.as_ref(), expected.as_slice());
}
#[rstest]
fn prop_bytes_roundtrip(input: Vec<u8>) {
let encoded = encode_bytes(&input);
let decoded = decode_bytes(encoded.as_ref());
proptest::prop_assert_eq!(decoded.as_ref(), input.as_slice());
}
#[rstest]
fn prop_string_roundtrip(input: String) {
let encoded = encode(&input);
let decoded = decode(&encoded).unwrap();
proptest::prop_assert_eq!(decoded.as_ref(), input.as_str());
}
#[rstest]
fn prop_encoded_output_ascii(input: Vec<u8>) {
let encoded = encode_bytes(&input);
proptest::prop_assert!(encoded.iter().all(u8::is_ascii));
}
}
}