use core::char::REPLACEMENT_CHARACTER;
use core::fmt;
use core::fmt::Display;
use core::fmt::Write;
use core::iter::FusedIterator;
use core::str::{from_utf8, from_utf8_unchecked};
#[cfg(feature = "alloc")]
use alloc::borrow::Cow;
#[cfg(feature = "alloc")]
use alloc::string::{String, ToString};
#[cfg(feature = "alloc")]
use core::convert::TryInto;
static UTF8_CHAR_WIDTH: [u8; 256] = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
#[derive(Debug, Clone)]
pub struct UnescapeUri<'a> {
pub(super) iter: core::str::Chars<'a>,
pub(super) iter_index: usize,
pub(super) next_c: Option<(char, Option<char>)>,
pub(super) had_error: Option<DecodingError>,
pub(super) skip_slashes: bool,
}
impl<'a> Display for UnescapeUri<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.clone().try_for_each(|c| f.write_char(c))
}
}
#[cfg(feature = "alloc")]
impl<'a> From<UnescapeUri<'a>> for Cow<'a, str> {
fn from(iter: UnescapeUri<'a>) -> Self {
iter.to_cow()
}
}
impl<'a> FusedIterator for UnescapeUri<'a> {}
impl<'a> UnescapeUri<'a> {
pub fn first_error(&self) -> Option<UnescapeError> {
let mut iter = self.clone();
let begin = iter.index();
while let Some(_) = iter.next() {
if iter.had_error.is_some() {
break;
}
}
if let Some(err) = iter.had_error {
let end = iter.index();
let i = (end - begin).saturating_sub(1);
return Some(UnescapeError::new(err, i));
}
None
}
pub fn index(&self) -> usize {
self.iter_index
}
pub fn skip_slashes(mut self) -> Self {
self.skip_slashes = true;
self
}
#[cfg(feature = "alloc")]
pub fn to_cow(&self) -> Cow<'a, str> {
let as_str = self.iter.as_str();
if as_str
.find(|c: char| !c.is_ascii_graphic() || c == '%')
.is_some()
{
Cow::from(self.to_string())
} else {
Cow::from(as_str)
}
}
#[cfg(feature = "alloc")]
pub fn try_to_cow(&self) -> Result<Cow<'a, str>, UnescapeError> {
let as_str = self.iter.as_str();
if as_str
.find(|c: char| !c.is_ascii_graphic() || c == '%')
.is_some()
{
self.try_to_string().map(Cow::from)
} else {
Ok(Cow::from(as_str))
}
}
#[cfg(feature = "alloc")]
pub fn try_to_string(&self) -> Result<String, UnescapeError> {
self.clone().try_into()
}
pub fn starts_with<T: AsRef<str>>(&self, unescaped_prefix: T) -> Option<usize> {
let mut iter_self = self.clone();
let mut iter_pat = unescaped_prefix.as_ref().chars();
loop {
let b = iter_pat.next();
if b.is_none() {
break Some(iter_self.index());
}
let a = iter_self.next();
if iter_self.had_error.is_some() {
break None;
}
if a != b {
break None;
}
}
}
}
#[cfg(feature = "alloc")]
impl<'a> TryInto<String> for UnescapeUri<'a> {
type Error = UnescapeError;
fn try_into(mut self) -> Result<String, Self::Error> {
let mut buffer = String::with_capacity(self.size_hint().0);
while let Some(ch) = self.next() {
if let Some(e) = self.had_error {
return Err(UnescapeError::new(e, self.index()));
}
buffer.push(ch);
}
buffer.shrink_to_fit();
Ok(buffer)
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct UnescapeError {
inner: DecodingError,
pub index: usize,
}
impl ::core::error::Error for UnescapeError {}
impl fmt::Display for UnescapeError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.inner)
}
}
impl UnescapeError {
pub(crate) fn new(inner: DecodingError, index: usize) -> Self {
Self { inner, index }
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum DecodingError {
UnescapedAsciiControl(char),
Space,
MissingChar(u8),
InvalidEscape(char),
AsciiControl(char),
InvalidUtf8 {
buf: [u8; 4],
len: u8,
},
UnfinishedUtf8 {
buf: [u8; 4],
len: u8,
},
}
impl core::error::Error for DecodingError {}
impl fmt::Display for DecodingError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::UnescapedAsciiControl(c) => {
write!(f, "unescaped ascii control character `{:?}`", c)
}
Self::Space => write!(f, "unescaped space ` `"),
Self::MissingChar(n) => write!(f, "missing {} char after `%`", n),
Self::InvalidEscape(c) => write!(
f,
"the 2 char after `%` must be a valid hex character `{:?}`",
c
),
Self::AsciiControl(c) => write!(
f,
"ascii control chars are forbidden for security reasons `{:?}`",
c
),
Self::InvalidUtf8 { buf, len } => write!(f, "invalid utf8 {:?}", &buf[..*len as usize]),
Self::UnfinishedUtf8 { buf, len } => {
write!(f, "unfinished utf8 {:?}", &buf[..*len as usize])
}
}
}
}
impl<'a> Iterator for UnescapeUri<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
let mut utf8_buf = [0u8; 4];
let mut utf8_len = 0usize;
if let Some((c, next_c)) = self.next_c.take() {
if let Some(x) = next_c {
self.next_c = Some((x, None));
}
return Some(c);
}
while let Some(c) = self.iter.next() {
self.iter_index += 1;
if c.is_ascii_control() {
self.had_error = Some(DecodingError::UnescapedAsciiControl(c));
continue;
}
match c {
' ' => {
self.had_error = Some(DecodingError::Space);
if utf8_len == 0 {
return Some(c);
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
}
'%' => {
self.iter_index += 1;
let msn = match self.iter.next() {
Some(c) if c.is_ascii_hexdigit() => c as u8,
Some(c) => {
self.had_error = Some(DecodingError::InvalidEscape(c));
if utf8_len == 0 {
self.next_c = Some((c, None));
return Some('%');
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
}
None => {
self.iter_index -= 1;
self.had_error = Some(DecodingError::MissingChar(2));
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
};
self.iter_index += 1;
let lsn = match self.iter.next() {
Some(c) if c.is_ascii_hexdigit() => c as u8,
Some(c) => {
self.had_error = Some(DecodingError::InvalidEscape(c));
if utf8_len == 0 {
self.next_c = Some((msn as char, Some(c)));
return Some('%');
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
}
None => {
self.iter_index -= 1;
self.had_error = Some(DecodingError::MissingChar(1));
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
};
let buf = [msn, lsn];
let buf_str = unsafe { from_utf8_unchecked(&buf) };
let decoded = u8::from_str_radix(&buf_str, 16).unwrap();
if self.skip_slashes && decoded == b'/' {
self.next_c = Some((msn as char, Some(lsn as char)));
return Some('%');
}
if (decoded as char).is_ascii_control() {
const CONTROL_PICTURES: u32 = 0x2400;
let c = core::char::from_u32(decoded as u32 + CONTROL_PICTURES).unwrap();
self.had_error = Some(DecodingError::AsciiControl(decoded as char));
if utf8_len == 0 {
return Some(c);
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
} else if decoded & 0x80 == 0x80 {
utf8_buf[utf8_len] = decoded;
utf8_len += 1;
if utf8_len >= UTF8_CHAR_WIDTH[utf8_buf[0] as usize] as usize {
if let Ok(utf8_str) = from_utf8(&utf8_buf[..utf8_len]) {
return Some(utf8_str.chars().next().unwrap());
} else {
self.had_error = Some(DecodingError::InvalidUtf8 {
buf: utf8_buf,
len: utf8_len as u8,
});
return Some(REPLACEMENT_CHARACTER);
}
}
} else if utf8_len != 0 {
self.had_error = Some(DecodingError::UnfinishedUtf8 {
buf: utf8_buf,
len: (utf8_len * 2 + utf8_len) as u8,
});
self.next_c = Some((decoded as char, None));
return Some(REPLACEMENT_CHARACTER);
} else {
return Some(decoded as char);
}
}
c => {
if utf8_len != 0 {
self.next_c = Some((c, None));
self.had_error = Some(DecodingError::UnfinishedUtf8 {
buf: utf8_buf,
len: (utf8_len * 2 + utf8_len) as u8,
});
return Some(REPLACEMENT_CHARACTER);
}
return Some(c);
}
}
}
None
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let n = self.iter.size_hint().0;
(n, Some(n))
}
}