use std::char::REPLACEMENT_CHARACTER;
use std::str::{from_utf8, from_utf8_unchecked};
static UTF8_CHAR_WIDTH: [u8; 256] = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
#[derive(Debug, Clone)]
pub struct UnescapeUri<'a> {
pub(super) iter: std::str::Chars<'a>,
pub(super) iter_index: usize,
pub(super) next_c: Option<(char, Option<char>)>,
pub(super) had_error: bool,
pub(super) skip_slashes: bool,
}
impl<'a> UnescapeUri<'a> {
pub fn new(string: &str) -> UnescapeUri<'_> {
UnescapeUri {
iter: string.chars(),
iter_index: 0,
next_c: None,
had_error: false,
skip_slashes: false,
}
}
pub fn first_error(&self) -> Option<usize> {
let mut iter = self.clone();
let begin = iter.index();
while let Some(_) = iter.next() {
if iter.had_error {
break;
}
}
if iter.had_error {
let end = iter.index();
return Some(end - begin);
}
None
}
pub fn index(&self) -> usize {
self.iter_index
}
}
impl<'a> Iterator for UnescapeUri<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
let mut utf8_buf = [0u8; 4];
let mut utf8_len = 0usize;
if let Some((c, next_c)) = self.next_c.take() {
if let Some(x) = next_c {
self.next_c = Some((x, None));
}
return Some(c);
}
while let Some(c) = self.iter.next() {
self.iter_index += 1;
if c.is_ascii_control() {
self.had_error = true;
continue;
}
match c {
' ' => {
self.had_error = true;
if utf8_len == 0 {
return Some(c);
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
}
'%' => {
self.iter_index += 1;
let msn = match self.iter.next() {
Some(c) if c.is_ascii_hexdigit() => c as u8,
Some(c) => {
self.had_error = true;
if utf8_len == 0 {
self.next_c = Some((c, None));
return Some('%');
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
}
None => {
self.iter_index -= 1;
self.had_error = true;
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
};
self.iter_index += 1;
let lsn = match self.iter.next() {
Some(c) if c.is_ascii_hexdigit() => c as u8,
Some(c) => {
self.had_error = true;
if utf8_len == 0 {
self.next_c = Some((msn as char, Some(c)));
return Some('%');
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
}
None => {
self.iter_index -= 1;
self.had_error = true;
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
};
let buf = [msn, lsn];
let buf_str = unsafe { from_utf8_unchecked(&buf) };
let decoded = u8::from_str_radix(&buf_str, 16).unwrap();
if self.skip_slashes && decoded == b'/' {
self.next_c = Some((msn as char, Some(lsn as char)));
return Some('%');
}
if (decoded as char).is_ascii_control() {
const CONTROL_PICTURES: u32 = 0x2400;
let c = core::char::from_u32(decoded as u32 + CONTROL_PICTURES).unwrap();
self.had_error = true;
if utf8_len == 0 {
return Some(c);
} else {
self.next_c = Some((c, None));
return Some(REPLACEMENT_CHARACTER);
}
} else if decoded & 0x80 == 0x80 {
utf8_buf[utf8_len] = decoded;
utf8_len += 1;
if utf8_len >= UTF8_CHAR_WIDTH[utf8_buf[0] as usize] as usize {
if let Ok(utf8_str) = from_utf8(&utf8_buf[..utf8_len]) {
return Some(utf8_str.chars().next().unwrap());
} else {
self.had_error = true;
return Some(REPLACEMENT_CHARACTER);
}
}
} else if utf8_len != 0 {
self.had_error = true;
self.next_c = Some((decoded as char, None));
return Some(REPLACEMENT_CHARACTER);
} else {
return Some(decoded as char);
}
}
c => {
if utf8_len != 0 {
self.next_c = Some((c, None));
self.had_error = true;
return Some(REPLACEMENT_CHARACTER);
}
return Some(c);
}
}
}
None
}
}