pub mod encoder;
#[cfg(feature = "alloc")]
mod estring;
pub(crate) mod table;
#[cfg(feature = "alloc")]
pub use estring::EString;
pub use table::Table;
use crate::imp::PathEncoder;
use core::{cmp::Ordering, hash, iter::FusedIterator, marker::PhantomData, str};
use ref_cast::{ref_cast_custom, RefCastCustom};
#[cfg(feature = "alloc")]
use alloc::{
borrow::{Cow, ToOwned},
string::String,
vec::Vec,
};
pub trait Encoder: 'static {
const TABLE: &'static Table;
}
#[derive(RefCastCustom)]
#[repr(transparent)]
pub struct EStr<E: Encoder> {
encoder: PhantomData<E>,
inner: str,
}
#[cfg(feature = "alloc")]
struct Assert<L: Encoder, R: Encoder> {
_marker: PhantomData<(L, R)>,
}
#[cfg(feature = "alloc")]
impl<L: Encoder, R: Encoder> Assert<L, R> {
const L_IS_SUB_ENCODER_OF_R: () = assert!(L::TABLE.is_subset(R::TABLE), "not a sub-encoder");
}
impl<E: Encoder> EStr<E> {
const ASSERT_ALLOWS_PCT_ENCODED: () = assert!(
E::TABLE.allows_pct_encoded(),
"table does not allow percent-encoded octets"
);
#[ref_cast_custom]
pub(crate) const fn new_validated(s: &str) -> &Self;
pub const EMPTY: &'static Self = Self::new_validated("");
pub(crate) fn cast<F: Encoder>(&self) -> &EStr<F> {
EStr::new_validated(&self.inner)
}
#[must_use]
pub const fn new_or_panic(s: &str) -> &Self {
match Self::new(s) {
Some(s) => s,
None => panic!("improperly encoded string"),
}
}
#[must_use]
pub const fn new(s: &str) -> Option<&Self> {
if E::TABLE.validate(s.as_bytes()) {
Some(Self::new_validated(s))
} else {
None
}
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.inner
}
#[must_use]
pub fn len(&self) -> usize {
self.inner.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
#[cfg(fluent_uri_unstable)]
#[must_use]
pub fn upcast<SuperE: Encoder>(&self) -> &EStr<SuperE> {
() = Assert::<E, SuperE>::L_IS_SUB_ENCODER_OF_R;
EStr::new_validated(self.as_str())
}
#[cfg(fluent_uri_unstable)]
#[must_use]
pub fn is_unencoded(&self) -> bool {
!(E::TABLE.allows_pct_encoded() && self.inner.contains('%'))
}
pub fn decode(&self) -> Decode<'_> {
() = Self::ASSERT_ALLOWS_PCT_ENCODED;
Decode {
source: &self.inner,
}
}
pub fn split(&self, delim: char) -> Split<'_, E> {
assert!(
delim.is_ascii() && table::RESERVED.allows(delim),
"splitting with non-reserved character"
);
Split {
inner: self.inner.split(delim),
encoder: PhantomData,
}
}
#[must_use]
pub fn split_once(&self, delim: char) -> Option<(&Self, &Self)> {
assert!(
delim.is_ascii() && table::RESERVED.allows(delim),
"splitting with non-reserved character"
);
self.inner
.split_once(delim)
.map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
}
#[must_use]
pub fn rsplit_once(&self, delim: char) -> Option<(&Self, &Self)> {
assert!(
delim.is_ascii() && table::RESERVED.allows(delim),
"splitting with non-reserved character"
);
self.inner
.rsplit_once(delim)
.map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
}
}
impl<E: Encoder> AsRef<Self> for EStr<E> {
fn as_ref(&self) -> &Self {
self
}
}
impl<E: Encoder> AsRef<str> for EStr<E> {
fn as_ref(&self) -> &str {
&self.inner
}
}
impl<E: Encoder> PartialEq for EStr<E> {
fn eq(&self, other: &Self) -> bool {
self.inner == other.inner
}
}
impl<E: Encoder> PartialEq<str> for EStr<E> {
fn eq(&self, other: &str) -> bool {
&self.inner == other
}
}
impl<E: Encoder> PartialEq<EStr<E>> for str {
fn eq(&self, other: &EStr<E>) -> bool {
self == &other.inner
}
}
impl<E: Encoder> Eq for EStr<E> {}
impl<E: Encoder> hash::Hash for EStr<E> {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.inner.hash(state);
}
}
impl<E: Encoder> PartialOrd for EStr<E> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<E: Encoder> Ord for EStr<E> {
fn cmp(&self, other: &Self) -> Ordering {
self.inner.cmp(&other.inner)
}
}
impl<E: Encoder> Default for &EStr<E> {
fn default() -> Self {
EStr::EMPTY
}
}
#[cfg(feature = "alloc")]
impl<E: Encoder> ToOwned for EStr<E> {
type Owned = EString<E>;
fn to_owned(&self) -> EString<E> {
EString::new_validated(self.inner.to_owned())
}
fn clone_into(&self, target: &mut EString<E>) {
self.inner.clone_into(&mut target.buf);
}
}
impl<E: PathEncoder> EStr<E> {
#[inline]
#[must_use]
pub fn is_absolute(&self) -> bool {
self.inner.starts_with('/')
}
#[inline]
#[must_use]
pub fn is_rootless(&self) -> bool {
!self.inner.starts_with('/')
}
#[inline]
#[must_use]
pub fn segments_if_absolute(&self) -> Option<Split<'_, E>> {
self.inner
.strip_prefix('/')
.map(|s| Self::new_validated(s).split('/'))
}
}
const fn gen_octet_table(hi: bool) -> [u8; 256] {
let mut out = [0xff; 256];
let shift = if hi { 4 } else { 0 };
let mut i = 0;
while i < 10 {
out[(i + b'0') as usize] = i << shift;
i += 1;
}
while i < 16 {
out[(i - 10 + b'A') as usize] = i << shift;
out[(i - 10 + b'a') as usize] = i << shift;
i += 1;
}
out
}
const OCTET_TABLE_HI: &[u8; 256] = &gen_octet_table(true);
pub(crate) const OCTET_TABLE_LO: &[u8; 256] = &gen_octet_table(false);
pub(crate) fn decode_octet(hi: u8, lo: u8) -> u8 {
debug_assert!(hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit());
OCTET_TABLE_HI[hi as usize] | OCTET_TABLE_LO[lo as usize]
}
#[cfg(feature = "alloc")]
pub(crate) fn encode_byte(x: u8, buf: &mut alloc::string::String) {
const HEX_TABLE: [u8; 512] = {
const HEX_DIGITS: &[u8; 16] = b"0123456789ABCDEF";
let mut i = 0;
let mut table = [0; 512];
while i < 256 {
table[i * 2] = HEX_DIGITS[i >> 4];
table[i * 2 + 1] = HEX_DIGITS[i & 0b1111];
i += 1;
}
table
};
buf.push('%');
buf.push(HEX_TABLE[x as usize * 2] as char);
buf.push(HEX_TABLE[x as usize * 2 + 1] as char);
}
#[derive(Clone, Debug)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Decode<'a> {
source: &'a str,
}
#[derive(Clone, Copy, Debug)]
pub enum DecodedChunk<'a> {
Unencoded(&'a str),
PctDecoded(u8),
}
impl<'a> Iterator for Decode<'a> {
type Item = DecodedChunk<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.source.is_empty() {
return None;
}
let i = self
.source
.bytes()
.position(|x| x == b'%')
.unwrap_or(self.source.len());
if i == 0 {
let (s, rest) = self.source.split_at(3);
let x = decode_octet(s.as_bytes()[1], s.as_bytes()[2]);
self.source = rest;
Some(DecodedChunk::PctDecoded(x))
} else {
let (s, rest) = self.source.split_at(i);
self.source = rest;
Some(DecodedChunk::Unencoded(s))
}
}
}
impl FusedIterator for Decode<'_> {}
#[cfg(feature = "alloc")]
enum DecodedUtf8Chunk<'a, 'b> {
Unencoded(&'a str),
Decoded { valid: &'b str, invalid: &'b [u8] },
}
#[cfg(feature = "alloc")]
fn decode_utf8<'a>(
iter: impl Iterator<Item = DecodedChunk<'a>>,
mut handle_chunk: impl FnMut(DecodedUtf8Chunk<'a, '_>),
) {
use crate::utf8::Utf8Chunks;
let mut buf = [0; 32];
let mut cnt = 0;
'decode: for chunk in iter {
match chunk {
DecodedChunk::Unencoded(s) => {
if cnt > 0 {
for chunk in Utf8Chunks::new(&buf[..cnt]) {
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: chunk.invalid(),
});
}
cnt = 0;
}
handle_chunk(DecodedUtf8Chunk::Unencoded(s));
}
DecodedChunk::PctDecoded(x) => {
buf[cnt] = x;
cnt += 1;
if cnt >= buf.len() {
for chunk in Utf8Chunks::new(&buf[..cnt]) {
if chunk.incomplete() {
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: &[],
});
let invalid_len = chunk.invalid().len();
buf.copy_within(cnt - invalid_len..cnt, 0);
cnt = invalid_len;
continue 'decode;
}
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: chunk.invalid(),
});
}
cnt = 0;
}
}
}
}
for chunk in Utf8Chunks::new(&buf[..cnt]) {
handle_chunk(DecodedUtf8Chunk::Decoded {
valid: chunk.valid(),
invalid: chunk.invalid(),
});
}
}
#[cfg(feature = "alloc")]
impl<'a> Decode<'a> {
#[must_use]
pub fn to_bytes(self) -> Cow<'a, [u8]> {
let len = self.source.len();
let mut iter = self.peekable();
let mut buf;
match iter.peek() {
Some(&DecodedChunk::Unencoded(s)) => {
iter.next();
if iter.peek().is_none() {
return Cow::Borrowed(s.as_bytes());
}
buf = Vec::with_capacity(len);
buf.extend_from_slice(s.as_bytes());
}
None => return Cow::Borrowed(&[]),
_ => buf = Vec::with_capacity(len),
}
for chunk in iter {
match chunk {
DecodedChunk::Unencoded(s) => buf.extend_from_slice(s.as_bytes()),
DecodedChunk::PctDecoded(s) => buf.push(s),
}
}
Cow::Owned(buf)
}
pub fn to_string(self) -> Result<Cow<'a, str>, Vec<u8>> {
let len = self.source.len();
let mut iter = self.peekable();
let mut buf;
match iter.peek() {
Some(&DecodedChunk::Unencoded(s)) => {
iter.next();
if iter.peek().is_none() {
return Ok(Cow::Borrowed(s));
}
buf = String::with_capacity(len);
buf.push_str(s);
}
None => return Ok(Cow::Borrowed("")),
_ => buf = String::with_capacity(len),
}
let mut buf = Ok::<_, Vec<u8>>(buf);
decode_utf8(iter, |chunk| match chunk {
DecodedUtf8Chunk::Unencoded(s) => match &mut buf {
Ok(string) => string.push_str(s),
Err(vec) => vec.extend_from_slice(s.as_bytes()),
},
DecodedUtf8Chunk::Decoded { valid, invalid } => match &mut buf {
Ok(string) => {
string.push_str(valid);
if !invalid.is_empty() {
let mut vec = core::mem::take(string).into_bytes();
vec.extend_from_slice(invalid);
buf = Err(vec);
}
}
Err(vec) => {
vec.extend_from_slice(valid.as_bytes());
vec.extend_from_slice(invalid);
}
},
});
match buf {
Ok(buf) => Ok(Cow::Owned(buf)),
Err(buf) => Err(buf),
}
}
#[must_use]
pub fn to_string_lossy(self) -> Cow<'a, str> {
let len = self.source.len();
let mut iter = self.peekable();
let mut buf;
match iter.peek() {
Some(&DecodedChunk::Unencoded(s)) => {
iter.next();
if iter.peek().is_none() {
return Cow::Borrowed(s);
}
buf = String::with_capacity(len);
buf.push_str(s);
}
None => return Cow::Borrowed(""),
_ => buf = String::with_capacity(len),
}
decode_utf8(iter, |chunk| match chunk {
DecodedUtf8Chunk::Unencoded(s) => buf.push_str(s),
DecodedUtf8Chunk::Decoded { valid, invalid } => {
buf.push_str(valid);
if !invalid.is_empty() {
buf.push(char::REPLACEMENT_CHARACTER);
}
}
});
Cow::Owned(buf)
}
}
#[derive(Clone, Debug)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Split<'a, E: Encoder> {
inner: str::Split<'a, char>,
encoder: PhantomData<E>,
}
impl<'a, E: Encoder> Iterator for Split<'a, E> {
type Item = &'a EStr<E>;
fn next(&mut self) -> Option<&'a EStr<E>> {
self.inner.next().map(EStr::new_validated)
}
}
impl<'a, E: Encoder> DoubleEndedIterator for Split<'a, E> {
fn next_back(&mut self) -> Option<&'a EStr<E>> {
self.inner.next_back().map(EStr::new_validated)
}
}
impl<E: Encoder> FusedIterator for Split<'_, E> {}