pub mod encoder;
#[cfg(feature = "alloc")]
mod estring;
pub(crate) mod table;
#[cfg(feature = "alloc")]
pub use estring::EString;
pub use table::Table;
use crate::imp::PathEncoder;
use core::{cmp::Ordering, hash, iter::FusedIterator, marker::PhantomData, str};
use ref_cast::{ref_cast_custom, RefCastCustom};
#[cfg(feature = "alloc")]
use alloc::{
borrow::{Cow, ToOwned},
string::String,
vec::Vec,
};
pub trait Encoder: 'static {
const TABLE: &'static Table;
}
#[derive(RefCastCustom)]
#[repr(transparent)]
pub struct EStr<E: Encoder> {
encoder: PhantomData<E>,
inner: str,
}
#[cfg(feature = "alloc")]
struct Assert<L: Encoder, R: Encoder> {
_marker: PhantomData<(L, R)>,
}
#[cfg(feature = "alloc")]
impl<L: Encoder, R: Encoder> Assert<L, R> {
const L_IS_SUB_ENCODER_OF_R: () = assert!(L::TABLE.is_subset(R::TABLE), "not a sub-encoder");
}
impl<E: Encoder> EStr<E> {
const ASSERT_ALLOWS_PCT_ENCODED: () = assert!(
E::TABLE.allows_pct_encoded(),
"table does not allow percent-encoded octets"
);
#[ref_cast_custom]
pub(crate) const fn new_validated(s: &str) -> &Self;
pub const EMPTY: &'static Self = Self::new_validated("");
pub(crate) fn cast<F: Encoder>(&self) -> &EStr<F> {
EStr::new_validated(&self.inner)
}
#[must_use]
pub const fn new_or_panic(s: &str) -> &Self {
match Self::new(s) {
Some(s) => s,
None => panic!("improperly encoded string"),
}
}
#[must_use]
pub const fn new(s: &str) -> Option<&Self> {
if E::TABLE.validate(s.as_bytes()) {
Some(Self::new_validated(s))
} else {
None
}
}
#[must_use]
pub fn encode_byte(x: u8) -> &'static Self {
() = Self::ASSERT_ALLOWS_PCT_ENCODED;
Self::new_validated(encode_byte(x))
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.inner
}
#[must_use]
pub fn len(&self) -> usize {
self.inner.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
#[cfg(fluent_uri_unstable)]
#[must_use]
pub fn upcast<SuperE: Encoder>(&self) -> &EStr<SuperE> {
() = Assert::<E, SuperE>::L_IS_SUB_ENCODER_OF_R;
EStr::new_validated(self.as_str())
}
#[cfg(fluent_uri_unstable)]
#[must_use]
pub fn is_unencoded(&self) -> bool {
!(E::TABLE.allows_pct_encoded() && self.inner.contains('%'))
}
pub fn decode(&self) -> Decode<'_> {
() = Self::ASSERT_ALLOWS_PCT_ENCODED;
Decode::new(&self.inner)
}
pub fn split(&self, delim: char) -> Split<'_, E> {
assert!(
delim.is_ascii() && table::RESERVED.allows(delim),
"splitting with non-reserved character"
);
Split {
inner: self.inner.split(delim),
encoder: PhantomData,
}
}
#[must_use]
pub fn split_once(&self, delim: char) -> Option<(&Self, &Self)> {
assert!(
delim.is_ascii() && table::RESERVED.allows(delim),
"splitting with non-reserved character"
);
self.inner
.split_once(delim)
.map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
}
#[must_use]
pub fn rsplit_once(&self, delim: char) -> Option<(&Self, &Self)> {
assert!(
delim.is_ascii() && table::RESERVED.allows(delim),
"splitting with non-reserved character"
);
self.inner
.rsplit_once(delim)
.map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
}
}
impl<E: Encoder> AsRef<Self> for EStr<E> {
fn as_ref(&self) -> &Self {
self
}
}
impl<E: Encoder> AsRef<str> for EStr<E> {
fn as_ref(&self) -> &str {
&self.inner
}
}
impl<E: Encoder> PartialEq for EStr<E> {
fn eq(&self, other: &Self) -> bool {
self.inner == other.inner
}
}
impl<E: Encoder> PartialEq<str> for EStr<E> {
fn eq(&self, other: &str) -> bool {
&self.inner == other
}
}
impl<E: Encoder> PartialEq<EStr<E>> for str {
fn eq(&self, other: &EStr<E>) -> bool {
self == &other.inner
}
}
impl<E: Encoder> Eq for EStr<E> {}
impl<E: Encoder> hash::Hash for EStr<E> {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.inner.hash(state);
}
}
impl<E: Encoder> PartialOrd for EStr<E> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<E: Encoder> Ord for EStr<E> {
fn cmp(&self, other: &Self) -> Ordering {
self.inner.cmp(&other.inner)
}
}
impl<E: Encoder> Default for &EStr<E> {
fn default() -> Self {
EStr::EMPTY
}
}
#[cfg(feature = "alloc")]
impl<E: Encoder> ToOwned for EStr<E> {
type Owned = EString<E>;
fn to_owned(&self) -> EString<E> {
EString::new_validated(self.inner.to_owned())
}
fn clone_into(&self, target: &mut EString<E>) {
self.inner.clone_into(&mut target.buf);
}
}
impl<E: PathEncoder> EStr<E> {
#[inline]
#[must_use]
pub fn is_absolute(&self) -> bool {
self.inner.starts_with('/')
}
#[inline]
#[must_use]
pub fn is_rootless(&self) -> bool {
!self.inner.starts_with('/')
}
#[inline]
#[must_use]
pub fn segments_if_absolute(&self) -> Option<Split<'_, E>> {
self.inner
.strip_prefix('/')
.map(|s| Self::new_validated(s).split('/'))
}
}
const fn gen_octet_table(hi: bool) -> [u8; 256] {
let mut out = [0xff; 256];
let shift = if hi { 4 } else { 0 };
let mut i = 0;
while i < 10 {
out[(i + b'0') as usize] = i << shift;
i += 1;
}
while i < 16 {
out[(i - 10 + b'A') as usize] = i << shift;
out[(i - 10 + b'a') as usize] = i << shift;
i += 1;
}
out
}
const OCTET_TABLE_HI: &[u8; 256] = &gen_octet_table(true);
pub(crate) const OCTET_TABLE_LO: &[u8; 256] = &gen_octet_table(false);
pub(crate) fn decode_octet(hi: u8, lo: u8) -> u8 {
debug_assert!(hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit());
OCTET_TABLE_HI[hi as usize] | OCTET_TABLE_LO[lo as usize]
}
#[derive(Clone, Debug)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Decode<'a> {
source: &'a str,
}
#[derive(Clone, Copy, Debug)]
pub enum DecodedChunk<'a> {
Unencoded(&'a str),
PctDecoded(u8),
}
impl<'a> Decode<'a> {
pub(crate) fn new(source: &'a str) -> Self {
Self { source }
}
fn next_if_unencoded(&mut self) -> Option<&'a str> {
let i = self
.source
.bytes()
.position(|x| x == b'%')
.unwrap_or(self.source.len());
if i == 0 {
None
} else {
let s;
(s, self.source) = self.source.split_at(i);
Some(s)
}
}
}
impl<'a> Iterator for Decode<'a> {
type Item = DecodedChunk<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.source.is_empty() {
None
} else if let Some(s) = self.next_if_unencoded() {
Some(DecodedChunk::Unencoded(s))
} else {
let s;
(s, self.source) = self.source.split_at(3);
let x = decode_octet(s.as_bytes()[1], s.as_bytes()[2]);
Some(DecodedChunk::PctDecoded(x))
}
}
}
impl FusedIterator for Decode<'_> {}
#[cfg(feature = "alloc")]
pub(crate) enum DecodedUtf8Chunk<'a, 'b> {
Unencoded(&'a str),
Decoded { valid: &'b str, invalid: &'b [u8] },
}
#[cfg(feature = "alloc")]
impl<'a> Decode<'a> {
pub(crate) fn decode_utf8(self, mut handle_chunk: impl FnMut(DecodedUtf8Chunk<'a, '_>)) {
use crate::utf8::Utf8Chunks;
let mut buf = [0; 32];
let mut len = 0;
'decode: for chunk in self {
match chunk {
DecodedChunk::Unencoded(s) => {
if len > 0 {
for chunk in Utf8Chunks::new(&buf[..len]) {
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: chunk.invalid(),
});
}
len = 0;
}
handle_chunk(DecodedUtf8Chunk::Unencoded(s));
}
DecodedChunk::PctDecoded(x) => {
buf[len] = x;
len += 1;
if len >= buf.len() {
for chunk in Utf8Chunks::new(&buf[..len]) {
if chunk.incomplete() {
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: &[],
});
let invalid_len = chunk.invalid().len();
buf.copy_within(len - invalid_len..len, 0);
len = invalid_len;
continue 'decode;
}
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: chunk.invalid(),
});
}
len = 0;
}
}
}
}
for chunk in Utf8Chunks::new(&buf[..len]) {
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: chunk.invalid(),
});
}
}
fn decoded_len(&self) -> usize {
self.source.len() - self.source.bytes().filter(|&x| x == b'%').count() * 2
}
fn borrow_all_or_prep_buf(&mut self) -> Result<&'a str, String> {
if let Some(s) = self.next_if_unencoded() {
if self.source.is_empty() {
return Ok(s);
}
let mut buf = String::with_capacity(s.len() + self.decoded_len());
buf.push_str(s);
Err(buf)
} else {
Err(String::with_capacity(self.decoded_len()))
}
}
#[must_use]
pub fn to_bytes(mut self) -> Cow<'a, [u8]> {
if self.source.is_empty() {
return Cow::Borrowed(&[]);
}
let mut buf = match self.borrow_all_or_prep_buf() {
Ok(s) => return Cow::Borrowed(s.as_bytes()),
Err(buf) => buf.into_bytes(),
};
for chunk in self {
match chunk {
DecodedChunk::Unencoded(s) => buf.extend_from_slice(s.as_bytes()),
DecodedChunk::PctDecoded(s) => buf.push(s),
}
}
Cow::Owned(buf)
}
pub fn to_string(mut self) -> Result<Cow<'a, str>, Vec<u8>> {
if self.source.is_empty() {
return Ok(Cow::Borrowed(""));
}
let mut buf = match self.borrow_all_or_prep_buf() {
Ok(s) => return Ok(Cow::Borrowed(s)),
Err(buf) => Ok::<_, Vec<u8>>(buf),
};
self.decode_utf8(|chunk| match chunk {
DecodedUtf8Chunk::Unencoded(s) => match &mut buf {
Ok(string) => string.push_str(s),
Err(vec) => vec.extend_from_slice(s.as_bytes()),
},
DecodedUtf8Chunk::Decoded { valid, invalid } => match &mut buf {
Ok(string) => {
string.push_str(valid);
if !invalid.is_empty() {
let mut vec = core::mem::take(string).into_bytes();
vec.extend_from_slice(invalid);
buf = Err(vec);
}
}
Err(vec) => {
vec.extend_from_slice(valid.as_bytes());
vec.extend_from_slice(invalid);
}
},
});
match buf {
Ok(buf) => Ok(Cow::Owned(buf)),
Err(buf) => Err(buf),
}
}
#[must_use]
pub fn to_string_lossy(mut self) -> Cow<'a, str> {
if self.source.is_empty() {
return Cow::Borrowed("");
}
let mut buf = match self.borrow_all_or_prep_buf() {
Ok(s) => return Cow::Borrowed(s),
Err(buf) => buf,
};
self.decode_utf8(|chunk| match chunk {
DecodedUtf8Chunk::Unencoded(s) => buf.push_str(s),
DecodedUtf8Chunk::Decoded { valid, invalid } => {
buf.push_str(valid);
if !invalid.is_empty() {
buf.push(char::REPLACEMENT_CHARACTER);
}
}
});
Cow::Owned(buf)
}
}
pub(crate) fn encode_byte(x: u8) -> &'static str {
const TABLE: &[u8; 256 * 3] = &{
const HEX_DIGITS: &[u8; 16] = b"0123456789ABCDEF";
let mut i = 0;
let mut table = [0; 256 * 3];
while i < 256 {
table[i * 3] = b'%';
table[i * 3 + 1] = HEX_DIGITS[i >> 4];
table[i * 3 + 2] = HEX_DIGITS[i & 0b1111];
i += 1;
}
table
};
const TABLE_STR: &str = match str::from_utf8(TABLE) {
Ok(s) => s,
Err(_) => unreachable!(),
};
&TABLE_STR[x as usize * 3..x as usize * 3 + 3]
}
#[cfg(feature = "alloc")]
#[derive(Clone, Debug)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub(crate) struct Encode<'t, 's> {
table: &'t Table,
source: &'s str,
enc_len: usize,
enc_i: usize,
}
#[cfg(feature = "alloc")]
impl<'t, 's> Encode<'t, 's> {
pub(crate) fn new(table: &'t Table, source: &'s str) -> Self {
Self {
table,
source,
enc_len: 0,
enc_i: 0,
}
}
}
#[cfg(feature = "alloc")]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum EncodedChunk<'a> {
Unencoded(&'a str),
PctEncoded(&'static str),
}
#[cfg(feature = "alloc")]
impl<'a> EncodedChunk<'a> {
#[must_use]
pub fn as_str(self) -> &'a str {
match self {
Self::Unencoded(s) | Self::PctEncoded(s) => s,
}
}
}
#[cfg(feature = "alloc")]
impl<'t, 's> Iterator for Encode<'t, 's> {
type Item = EncodedChunk<'s>;
fn next(&mut self) -> Option<Self::Item> {
if self.enc_i < self.enc_len {
let s = encode_byte(self.source.as_bytes()[self.enc_i]);
self.enc_i += 1;
return Some(EncodedChunk::PctEncoded(s));
}
self.source = &self.source[self.enc_len..];
self.enc_len = 0;
if self.source.is_empty() {
return None;
}
let mut iter = self.source.char_indices();
let i = iter
.find_map(|(i, ch)| (!self.table.allows(ch)).then_some(i))
.unwrap_or(self.source.len());
if i == 0 {
self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize;
self.enc_i = 1;
let s = encode_byte(self.source.as_bytes()[0]);
Some(EncodedChunk::PctEncoded(s))
} else {
let s;
(s, self.source) = self.source.split_at(i);
self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize;
self.enc_i = 0;
Some(EncodedChunk::Unencoded(s))
}
}
}
#[cfg(feature = "alloc")]
impl FusedIterator for Encode<'_, '_> {}
#[derive(Clone, Debug)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Split<'a, E: Encoder> {
inner: str::Split<'a, char>,
encoder: PhantomData<E>,
}
impl<'a, E: Encoder> Iterator for Split<'a, E> {
type Item = &'a EStr<E>;
fn next(&mut self) -> Option<&'a EStr<E>> {
self.inner.next().map(EStr::new_validated)
}
}
impl<'a, E: Encoder> DoubleEndedIterator for Split<'a, E> {
fn next_back(&mut self) -> Option<&'a EStr<E>> {
self.inner.next_back().map(EStr::new_validated)
}
}
impl<E: Encoder> FusedIterator for Split<'_, E> {}