use crate::scanner::{
error::{ScanError, ScanResult as Result},
flag::Flags,
};
pub(in crate::scanner) fn flow_unescape(
opts: Flags,
base: &str,
scratch: &mut Vec<u8>,
) -> Result<usize>
{
let mut buffer = base;
let mut escape_len: Option<u8> = None;
cache!(~buffer, 1, opts)?;
if !check!(~buffer => b'\\')
{
return Ok(0);
}
advance!(buffer, 1);
cache!(~buffer, 1, opts)?;
match buffer.as_bytes()
{
[b'0', ..] => scratch.push(b'\0'),
[b'a', ..] => scratch.push(b'\x07'),
[b'b', ..] => scratch.push(b'\x08'),
[b't', ..] | [b'\t', ..] => scratch.push(b'\x09'),
[b'n', ..] => scratch.push(b'\x0A'),
[b'v', ..] => scratch.push(b'\x0B'),
[b'f', ..] => scratch.push(b'\x0C'),
[b'r', ..] => scratch.push(b'\x0D'),
[b'e', ..] => scratch.push(b'\x1B'),
[b' ', ..] => scratch.push(b'\x20'),
[b'"', ..] => scratch.push(b'"'),
[b'/', ..] => scratch.push(b'/'),
[b'\\', ..] => scratch.push(b'\\'),
[b'N', ..] => scratch.extend_from_slice(&NEL),
[b'_', ..] => scratch.extend_from_slice(&NBS),
[b'L', ..] => scratch.extend_from_slice(&LS),
[b'P', ..] => scratch.extend_from_slice(&PS),
[b'x', ..] => escape_len = Some(2),
[b'u', ..] => escape_len = Some(4),
[b'U', ..] => escape_len = Some(8),
[] => return Err(ScanError::UnexpectedEOF),
_ => return Err(ScanError::UnknownEscape),
}
advance!(buffer, 1);
if let Some(sequence) = escape_len
{
cache!(~buffer, sequence, opts)?;
let amt = write_unicode_point(buffer, scratch, sequence)?;
advance!(buffer, amt);
}
Ok(base.len() - buffer.len())
}
pub(in crate::scanner) fn tag_uri_unescape(
opts: Flags,
base: &str,
scratch: &mut Vec<u8>,
_directive: bool,
) -> Result<usize>
{
let mut buffer = base;
let mut codepoint_len: i8 = 0;
while {
cache!(~buffer, 3, opts)?;
if buffer.len() < 3
{
return Err(ScanError::UnexpectedEOF);
}
if !(check!(~buffer => b'%') && isHex!(~buffer, 1) && isHex!(~buffer, 2))
{
return Err(ScanError::UnknownEscape);
}
let octet: u8 = (as_hex(buffer.as_bytes()[1]) << 4) + as_hex(buffer.as_bytes()[2]);
match codepoint_len
{
0 =>
{
codepoint_len = match octet
{
o if (o & 0x80) == 0x00 => 1,
o if (o & 0xE0) == 0xC0 => 2,
o if (o & 0xF0) == 0xE0 => 3,
o if (o & 0xF8) == 0xF0 => 4,
_ => return Err(ScanError::UnknownEscape),
}
},
_ =>
{
if (octet & 0xC0) != 0x80
{
return Err(ScanError::UnknownEscape);
}
},
}
scratch.push(octet);
codepoint_len -= 1;
advance!(buffer, 3);
codepoint_len > 0
}
{}
Ok(base.len() - buffer.len())
}
fn write_unicode_point(base: &str, scratch: &mut Vec<u8>, codepoint_len: u8) -> Result<usize>
{
let mut buffer = base;
let mut i = 0;
let mut value: u32 = 0;
if codepoint_len < 1
{
return Ok(0);
}
while i < codepoint_len
{
match buffer.as_bytes().first()
{
None => return Err(ScanError::UnexpectedEOF),
Some(c) if !c.is_ascii_hexdigit() => return Err(ScanError::UnknownEscape),
Some(b) => value = (value << 4) + as_hex(*b) as u32,
}
advance!(buffer, 1, i);
}
match value
{
v if v <= 0x7F => scratch.push(v as u8),
v if v <= 0x7FF =>
{
scratch.extend_from_slice(&[0xC0 | (v >> 6) as u8, 0x80 | (v & 0x3F) as u8])
},
v if v <= 0xFFFF => scratch.extend_from_slice(&[
0xE0 | (v >> 12) as u8,
0x80 | ((v >> 6) & 0x3F) as u8,
0x80 | (v & 0x3F) as u8,
]),
v => scratch.extend_from_slice(&[
0xF0 | (v >> 18) as u8,
0x80 | ((v >> 12) & 0x3F) as u8,
0x80 | ((v >> 6) & 0x3F) as u8,
0x80 | (v & 0x3F) as u8,
]),
}
Ok(codepoint_len as usize)
}
#[allow(clippy::manual_range_contains)]
#[inline]
fn as_hex(b: u8) -> u8
{
if b >= b'A' && b <= b'F'
{
b - b'A' + 10
}
else if b >= b'a' && b <= b'f'
{
b - b'a' + 10
}
else
{
b - b'0'
}
}
const NEL: [u8; 2] = [b'\xC2', b'\x85'];
const NBS: [u8; 2] = [b'\xC2', b'\xA0'];
const LS: [u8; 3] = [b'\xE2', b'\x80', b'\xA8'];
const PS: [u8; 3] = [b'\xE2', b'\x80', b'\xA9'];
#[cfg(test)]
mod tests
{
use anyhow::{anyhow, bail};
use pretty_assertions::assert_eq;
use super::*;
use crate::scanner::flag::O_ZEROED;
type TestResult = anyhow::Result<()>;
#[test]
fn flow_escape_special() -> TestResult
{
let mut s = Vec::new();
let scratch = &mut s;
let data = &[
r#"\0"#, r#"\a"#, r#"\b"#, r#"\t"#, r#"\ "#, r#"\n"#, r#"\v"#, r#"\f"#, r#"\r"#,
r#"\e"#, r#"\ "#, r#"\""#, r#"\/"#, r#"\\"#, r#"\N"#, r#"\_"#, r#"\L"#, r#"\P"#,
];
let expected: &[&[u8]] = &[
&[b'\0'], &[b'\x07'], &[b'\x08'], &[b'\x09'], &[b'\x09'], &[b'\x0A'], &[b'\x0B'], &[b'\x0C'], &[b'\x0D'], &[b'\x1B'], &[b'\x20'], &[b'"'], &[b'/'], &[b'\\'], &NEL, &NBS, &LS, &PS, ];
assert_eq!(
data.len(),
expected.len(),
"test data length != expected length"
);
for (i, (&t, &ex)) in data.into_iter().zip(expected).enumerate()
{
scratch.clear();
flow_unescape(O_ZEROED, t, scratch)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(scratch, ex, "on iteration {}", i)
}
Ok(())
}
#[test]
fn flow_escape_hex() -> TestResult
{
let mut s = Vec::new();
let scratch = &mut s;
#[rustfmt::skip]
let data = &[
r#"\x64"#, r#"\x65"#, r#"\x7f"#, r#"\xF7"#, r#"\xB6"#, r#"\xFF"#, r#"\xC6"#, r#"\u2c61"#, r#"\u30C4"#, r#"\ua026"#, r#"\U000111E1"#, ];
#[rustfmt::skip]
let expected = &[
'd', 'e', '\u{7f}', '÷', '¶', 'ÿ', 'Æ', 'ⱡ', 'ツ', 'ꀦ', '𑇡' ];
assert_eq!(
data.len(),
expected.len(),
"test data length != expected length"
);
for (i, (&t, &ex)) in data.into_iter().zip(expected).enumerate()
{
let mut c: [u8; 4] = [0; 4];
scratch.clear();
flow_unescape(O_ZEROED, t, scratch)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(
scratch,
ex.encode_utf8(&mut c).as_bytes(),
"on iteration {}, codepoint '{}'",
i,
ex
)
}
Ok(())
}
#[test]
fn flow_escape_consumed() -> TestResult
{
let mut s = Vec::new();
let scratch = &mut s;
let data = &[
r#"\x64"#, r#"\x65"#, r#"\x7f"#, r#"\xF7"#, r#"\xB6"#, r#"\xFF"#, r#"\xC6"#, r#"\u2c61"#, r#"\u30C4"#, r#"\ua026"#, r#"\U000111E1"#, ];
for (i, &t) in data.into_iter().enumerate()
{
scratch.clear();
let consumed = flow_unescape(O_ZEROED, t, scratch)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(
consumed,
t.len(),
"on iteration {}, expected to consume {}, got {}",
i,
t.len(),
consumed
)
}
Ok(())
}
#[test]
fn tag_uri_unescape_codepoint() -> TestResult
{
let data = &[
r#"%C2%85"#,
r#"%c5%b4"#,
r#"%E2%B1%bf"#,
r#"%E2%B8%BF"#,
r#"%f0%90%8f%95"#,
r#"%F0%90%AD%81"#,
];
let expected: &[&[u8]] = &[
&[0xC2, 0x85],
&[0xC5, 0xB4],
&[0xE2, 0xB1, 0xBF],
&[0xE2, 0xB8, 0xBF],
&[0xF0, 0x90, 0x8F, 0x95],
&[0xF0, 0x90, 0xAD, 0x81],
];
let scratch = &mut Vec::new();
assert_eq!(
data.len(),
expected.len(),
"test data and expected data are not the same length"
);
for (i, (&t, &e)) in data.into_iter().zip(expected).enumerate()
{
scratch.clear();
let consumed = tag_uri_unescape(O_ZEROED, t, scratch, true)
.map_err(|e| anyhow!("on iteration {}, test errored with {}", i, e))?;
assert_eq!(
scratch, e,
"on iteration {}, expected byte sequence {:?}, got {:?}",
i, e, &*scratch
);
assert_eq!(
consumed,
t.len(),
"on iteration {}, expected to consume {}, got {}",
i,
t.len(),
consumed
)
}
Ok(())
}
#[test]
fn tag_uri_unescape_eof() -> TestResult
{
let data = r#"%C2%8"#;
let scratch = &mut Vec::new();
let expected = ScanError::UnexpectedEOF;
match tag_uri_unescape(O_ZEROED, data, scratch, true)
{
Err(e) if e == expected => Ok(()),
Err(e) => bail!("expected error: {}, got different error: {}", expected, e),
Ok(amt) => bail!(
"expected error: {}, got unexpected value: {}",
expected,
amt
),
}
}
#[test]
fn tag_uri_unescape_invalid() -> TestResult
{
let data = r#"\xC285"#;
let scratch = &mut Vec::new();
let expected = ScanError::UnknownEscape;
match tag_uri_unescape(O_ZEROED, data, scratch, true)
{
Err(e) if e == expected => Ok(()),
Err(e) => bail!("expected error: {}, got different error: {}", expected, e),
Ok(amt) => bail!(
"expected error: {}, got unexpected value: {}",
expected,
amt
),
}
}
}