use std::io::{Result, Write};
const HEXDIGITS: &[u8; 16] = b"0123456789abcdef";
pub(crate) struct BackslashEscapeWriter<'a, W>(pub &'a mut W)
where
W: ?Sized + Write;
impl<W> Write for BackslashEscapeWriter<'_, W>
where
W: ?Sized + Write,
{
fn write(&mut self, buf: &[u8]) -> Result<usize> {
let mut quoted = [b'\\', b'u', b'0', b'0', b'0', b'0'];
let mut start_unquoted = 0;
for (n, c) in buf.iter().enumerate() {
let quoted = match c {
b'"' => &br#"\""#[..],
b'\\' => &br#"\\"#[..],
b'\x08' => &br#"\b"#[..],
b'\x0c' => &br#"\f"#[..],
b'\n' => &br#"\n"#[..],
b'\r' => &br#"\r"#[..],
b'\t' => &br#"\t"#[..],
c if *c < 32 => {
quoted[4] = HEXDIGITS[((*c & 0xf0) >> 4) as usize];
quoted[5] = HEXDIGITS[(*c & 0x0f) as usize];
"ed
}
_ => continue,
};
self.0.write_all(&buf[start_unquoted..n])?;
self.0.write_all(quoted)?;
start_unquoted = n + 1;
}
self.0.write_all(&buf[start_unquoted..])?;
Ok(buf.len())
}
fn flush(&mut self) -> Result<()> {
self.0.flush()
}
}
fn write_quoted_byte<W>(writer: &mut W, value: u8) -> Result<()>
where
W: ?Sized + Write,
{
let value = value as usize;
writer.write_all(&[b'%', HEXDIGITS[value >> 4], HEXDIGITS[value & 0x0f]])
}
pub(crate) struct URIEscapeWriter<'a, W>(pub &'a mut W)
where
W: ?Sized + Write;
impl<W> Write for URIEscapeWriter<'_, W>
where
W: ?Sized + Write,
{
fn write(&mut self, buf: &[u8]) -> Result<usize> {
let mut utf8state: Option<u8> = None;
let mut stash = tinyvec::array_vec!([u8; 4]);
let mut start_unquoted = 0;
for (n, c) in buf.iter().enumerate() {
loop {
match utf8state {
None => {
if *c >= 32
&& *c < 127
&& ![b'%', b'+', b'\x08', b'\x0c', b'\n', b'\r', b'\t'].contains(c)
{
break;
}
self.0.write_all(&buf[start_unquoted..n])?;
start_unquoted = n + 1;
let len = match *c {
n if n & 0b11100000 == 0b11000000 => 1,
n if n & 0b11110000 == 0b11100000 => 2,
n if n & 0b11111000 == 0b11110000 => 3,
_ => {
write_quoted_byte(self.0, *c)?;
break;
}
};
stash.clear();
stash.push(*c);
utf8state = Some(len);
break;
}
Some(ref mut len) => {
if *c & 0b11000000 == 0b10000000 {
start_unquoted = n + 1;
stash.push(*c);
*len -= 1;
if *len == 0 {
match std::str::from_utf8(&stash) {
Ok(s) if s != "\u{feff}" => self.0.write_all(&stash)?,
_ => stash
.iter()
.try_for_each(|c| write_quoted_byte(self.0, *c))?,
}
utf8state = None;
}
break;
} else {
stash
.iter()
.try_for_each(|c| write_quoted_byte(self.0, *c))?;
utf8state = None;
}
}
}
}
}
match utf8state {
Some(_) => stash
.iter()
.try_for_each(|c| write_quoted_byte(self.0, *c))?,
None => self.0.write_all(&buf[start_unquoted..])?,
};
Ok(buf.len())
}
fn flush(&mut self) -> Result<()> {
self.0.flush()
}
}
#[cfg(test)]
mod test {
use super::URIEscapeWriter;
use std::io::Write;
fn uri_escaped(value: &[u8]) -> String {
let mut buf = Vec::with_capacity(value.len());
URIEscapeWriter(&mut buf).write(value).unwrap();
String::from_utf8(buf).unwrap()
}
#[test]
fn uri_escape() {
assert_eq!(" ", uri_escaped(b" "));
assert_eq!("asdf", uri_escaped(b"asdf"));
assert_eq!("%2b", uri_escaped(b"+"));
assert_eq!("%25", uri_escaped(b"%"));
assert_eq!("%2b%2b%2b", uri_escaped(b"+++"));
assert_eq!("%25%25%25", uri_escaped(b"%%%"));
assert_eq!("%25%2b%25", uri_escaped(b"%+%"));
assert_eq!("ä", uri_escaped(b"\xc3\xa4"));
assert_eq!("€", uri_escaped(b"\xe2\x82\xac"));
assert_eq!("💖", uri_escaped(b"\xf0\x9f\x92\x96"));
assert_eq!("äöü", uri_escaped(b"\xc3\xa4\xc3\xb6\xc3\xbc"));
assert_eq!(
"abcdäöüefgh",
uri_escaped(b"abcd\xc3\xa4\xc3\xb6\xc3\xbcefgh")
);
assert_eq!("🄻🄰🅄🅁🄴🄻", uri_escaped(b"\xf0\x9f\x84\xbb\xf0\x9f\x84\xb0\xf0\x9f\x85\x84\xf0\x9f\x85\x81\xf0\x9f\x84\xb4\xf0\x9f\x84\xbb"));
assert_eq!("%c3ä", uri_escaped(b"\xc3\xc3\xa4"));
assert_eq!("%f0💖", uri_escaped(b"\xf0\xf0\x9f\x92\x96"));
assert_eq!("%f0💖%f0", uri_escaped(b"\xf0\xf0\x9f\x92\x96\xf0"));
assert_eq!("%f0💖asdf", uri_escaped(b"\xf0\xf0\x9f\x92\x96asdf"));
assert_eq!("%f0%9f💖", uri_escaped(b"\xf0\x9f\xf0\x9f\x92\x96"));
assert_eq!("%f0%9f%92💖", uri_escaped(b"\xf0\x9f\x92\xf0\x9f\x92\x96"));
assert_eq!("%ef%bb%bf", uri_escaped(b"\xEF\xBB\xBF"));
}
}