use std::{
borrow::Borrow,
cmp::Ordering,
fmt::{self, Display, Formatter},
hash::{Hash, Hasher},
str::FromStr,
};
use crate::{Encoder, InvalidPctString, PctStr, scan::scan_keep_run, util::find_percent};
pub struct PctString(Vec<u8>);
impl PctString {
pub fn new<B: Into<Vec<u8>>>(bytes: B) -> Result<Self, InvalidPctString<Vec<u8>>> {
let bytes = bytes.into();
if find_percent(&bytes).is_none() {
return match core::str::from_utf8(&bytes) {
Ok(_) => Ok(Self(bytes)),
Err(_) => Err(InvalidPctString(bytes)),
};
}
if PctStr::validate(bytes.iter().copied()) {
Ok(Self(bytes))
} else {
Err(InvalidPctString(bytes))
}
}
pub fn from_string(string: String) -> Result<Self, InvalidPctString<String>> {
Self::new(string).map_err(|e| {
e.map(|bytes| unsafe {
String::from_utf8_unchecked(bytes)
})
})
}
pub unsafe fn new_unchecked<B: Into<Vec<u8>>>(bytes: B) -> Self {
Self(bytes.into())
}
pub fn encode<E: Encoder>(src: impl IntoIterator<Item = char>, encoder: E) -> PctString {
static HEX: &[u8; 16] = b"0123456789ABCDEF";
let iter = src.into_iter();
let mut out = Vec::with_capacity(iter.size_hint().0);
let mut ubuf = [0u8; 4];
for c in iter {
if encoder.encode(c) || c == '%' {
let s = c.encode_utf8(&mut ubuf);
for &b in s.as_bytes() {
out.push(b'%');
out.push(HEX[(b >> 4) as usize]);
out.push(HEX[(b & 0x0F) as usize]);
}
} else if c.is_ascii() {
out.push(c as u8);
} else {
out.extend_from_slice(c.encode_utf8(&mut ubuf).as_bytes());
}
}
PctString(out)
}
pub fn encode_bytes<E: Encoder>(src: &str, encoder: E) -> PctString {
static HEX: &[u8; 16] = b"0123456789ABCDEF";
let bytes = src.as_bytes();
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
if let Some(table) = encoder.ascii_keep_table() {
let lo_shuf = encoder.ascii_keep_lo_shuf();
let mut i = 0usize;
while i < bytes.len() {
let run_end = scan_keep_run(bytes, i, table, lo_shuf);
if run_end > i {
out.extend_from_slice(&bytes[i..run_end]);
i = run_end;
if i >= bytes.len() {
break;
}
}
let b = bytes[i];
if b < 0x80 {
out.push(b'%');
out.push(HEX[(b >> 4) as usize]);
out.push(HEX[(b & 0x0F) as usize]);
i += 1;
} else {
let start = i;
i += 1;
while i < bytes.len() && (bytes[i] & 0xC0) == 0x80 {
i += 1;
}
let seq = &bytes[start..i];
let c = unsafe { core::str::from_utf8_unchecked(seq) }.chars().next().unwrap();
if encoder.encode(c) {
for &byte in seq {
out.push(b'%');
out.push(HEX[(byte >> 4) as usize]);
out.push(HEX[(byte & 0x0F) as usize]);
}
} else {
out.extend_from_slice(seq);
}
}
}
} else {
let mut ubuf = [0u8; 4];
for c in src.chars() {
if encoder.encode(c) || c == '%' {
let s = c.encode_utf8(&mut ubuf);
for &b in s.as_bytes() {
out.push(b'%');
out.push(HEX[(b >> 4) as usize]);
out.push(HEX[(b & 0x0F) as usize]);
}
} else if c.is_ascii() {
out.push(c as u8);
} else {
out.extend_from_slice(c.encode_utf8(&mut ubuf).as_bytes());
}
}
}
PctString(out)
}
#[inline]
pub fn as_pct_str(&self) -> &PctStr {
unsafe {
PctStr::new_unchecked(&self.0)
}
}
#[inline]
pub fn into_string(self) -> String {
unsafe {
String::from_utf8_unchecked(self.0)
}
}
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.0
}
}
impl std::ops::Deref for PctString {
type Target = PctStr;
#[inline]
fn deref(&self) -> &PctStr {
self.as_pct_str()
}
}
impl Borrow<PctStr> for PctString {
fn borrow(&self) -> &PctStr {
self.as_pct_str()
}
}
impl AsRef<PctStr> for PctString {
fn as_ref(&self) -> &PctStr {
self.as_pct_str()
}
}
impl AsRef<str> for PctString {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<[u8]> for PctString {
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl PartialEq for PctString {
#[inline]
fn eq(&self, other: &PctString) -> bool {
**self == **other
}
}
impl Eq for PctString {}
impl PartialEq<PctStr> for PctString {
#[inline]
fn eq(&self, other: &PctStr) -> bool {
**self == *other
}
}
impl PartialEq<&str> for PctString {
#[inline]
fn eq(&self, other: &&str) -> bool {
**self == **other
}
}
impl PartialEq<str> for PctString {
#[inline]
fn eq(&self, other: &str) -> bool {
**self == *other
}
}
impl PartialOrd for PctString {
fn partial_cmp(&self, other: &PctString) -> Option<Ordering> {
self.as_pct_str().partial_cmp(other.as_pct_str())
}
}
impl PartialOrd<PctStr> for PctString {
fn partial_cmp(&self, other: &PctStr) -> Option<Ordering> {
self.as_pct_str().partial_cmp(other)
}
}
impl Hash for PctString {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
(**self).hash(hasher)
}
}
impl Display for PctString {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
Display::fmt(self.as_str(), f)
}
}
impl fmt::Debug for PctString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl FromStr for PctString {
type Err = InvalidPctString<String>;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::from_string(s.to_string())
}
}
impl TryFrom<String> for PctString {
type Error = InvalidPctString<String>;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::from_string(value)
}
}
impl<'a> TryFrom<&'a str> for PctString {
type Error = InvalidPctString<String>;
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
Self::from_string(value.to_owned())
}
}
impl<'a> TryFrom<&'a str> for &'a PctStr {
type Error = InvalidPctString<&'a str>;
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
PctStr::new(value)
}
}
#[cfg(test)]
mod encode_bytes_tests {
use super::*;
use crate::{IriReserved, UriReserved};
fn check<E: Encoder + Copy>(src: &str, enc: E) {
let per_char = PctString::encode(src.chars(), enc);
let byte_path = PctString::encode_bytes(src, enc);
assert_eq!(per_char.as_str(), byte_path.as_str(), "src={:?}", src);
}
#[test]
fn parity_ascii_mixed() {
check("Hello, world!", UriReserved::Any);
check("foo/bar?baz=qux&x=y", UriReserved::Path);
check("abc 123 !@#$%^&*()", UriReserved::Query);
check("", UriReserved::Any);
check("%already%encoded%20", UriReserved::Any);
}
#[test]
fn parity_unicode() {
check("традиционное польское блюдо", UriReserved::Any);
check("中文 日本語 한국어", UriReserved::Any);
check("😀🚀🌍", UriReserved::Any);
check("традиционное польское блюдо", IriReserved::Path);
check("?test=中文&private=\u{10FFFD}", IriReserved::Query);
check("?test=中文&private=\u{10FFFD}", IriReserved::Fragment);
}
#[test]
fn parity_custom_encoder() {
struct UppercaseToo;
impl Encoder for UppercaseToo {
fn encode(&self, c: char) -> bool {
UriReserved::Any.encode(c) || c.is_uppercase()
}
}
let src = "Hello World!";
let per_char = PctString::encode(src.chars(), UppercaseToo);
let byte_path = PctString::encode_bytes(src, UppercaseToo);
assert_eq!(per_char.as_str(), byte_path.as_str());
}
}