use crate::http_token_utils::TokenSimilarity::{Distant, Identical, SameLetters};
use crate::MESSAGE_MAX_SIZE;
use std::fmt::Write;
pub type HttpToken<'a> = &'a [u8];
pub const fn http_token(v: &str) -> HttpToken {
v.as_bytes()
}
include!(concat!(env!("OUT_DIR"), "/char_tables.rs"));
pub const TE: HttpToken = http_token("transfer-encoding");
pub const CL: HttpToken = http_token("content-length");
const SP: u8 = b' ';
const COLON: u8 = b':';
pub const CHUNKED: HttpToken = http_token("chunked");
const VALID_TE: &[HttpToken] = &[
CHUNKED,
http_token("compress"),
http_token("deflate"),
http_token("gzip"),
http_token("identity"),
];
#[derive(PartialOrd, PartialEq, Debug)]
pub enum TokenSimilarity {
Identical,
SameLetters,
Distant,
}
pub fn is_valid_te(value: HttpToken) -> Option<HttpToken> {
let trimmed = rfc_whitespace_trim(value);
let mut end: usize = 0;
while end < trimmed.len() && trimmed[end] != b';' {
end += 1;
}
let te = rfc_whitespace_trim(&trimmed[..end]);
VALID_TE
.iter()
.copied()
.find(|s| s.eq_ignore_ascii_case(te))
}
#[inline(always)]
pub fn is_valid_uri_char(b: u8) -> bool {
VALID_URI_CHARS[b as usize]
}
#[inline(always)]
pub fn is_space(b: u8) -> bool {
b == SP
}
#[inline(always)]
pub fn is_colon(b: u8) -> bool {
b == COLON
}
#[inline(always)]
pub fn find_next_header_symbol(value: HttpToken, current: usize) -> usize {
let mut i = current;
while i < value.len() && value[i] <= SP {
i += 1;
}
i
}
#[inline(always)]
pub fn find_next_alphanumeric(value: HttpToken, current: usize) -> usize {
let mut i = current;
while i < value.len() && !value[i].is_ascii_alphanumeric() {
i += 1;
}
i
}
#[inline(always)]
pub fn is_rfc_tchar(b: u8) -> bool {
TCHAR_TABLE[b as usize]
}
#[inline(always)]
pub fn is_valid_header_value_char(b: u8) -> bool {
HEADER_VALUE_CHARS[b as usize]
}
#[inline(always)]
pub fn is_rfc_vchar(b: u8) -> bool {
VCHAR[b as usize]
}
#[inline(always)]
pub fn is_rfc_obs_text(b: u8) -> bool {
b >= 0x80
}
#[inline(always)]
pub fn is_bad_http_character(b: u8) -> bool {
BAD_CHARACTERS[b as usize]
}
pub fn to_quoted_ascii(raw_data: HttpToken) -> String {
let mut str = String::with_capacity(raw_data.len().min(MESSAGE_MAX_SIZE / 2));
str.push('"');
for b in raw_data {
if str.len() >= MESSAGE_MAX_SIZE / 2 - 4 {
break;
}
match *b {
b'\r' => str.push_str("\\r"),
b'\n' => str.push_str("\\n"),
b'\t' => str.push_str("\\t"),
b'\\' => str.push_str("\\\\"),
_ if is_rfc_vchar(*b) => {
str.push(*b as char);
}
_ => {
write!(str, "\\{:#04x}", *b).expect("Writing to strings is infallible");
}
}
}
str.push('"');
str
}
pub fn obfuscate_value(raw_data: HttpToken) -> String {
let mut str = String::with_capacity(MESSAGE_MAX_SIZE / 2);
str.push('"');
let mut prev_char = 0;
for b in raw_data {
if str.len() >= MESSAGE_MAX_SIZE / 2 - 4 {
break;
}
match *b {
b'\r' => str.push_str("\\r"),
b'\n' => str.push_str("\\n"),
b'\t' => str.push_str("\\t"),
b'\\' => str.push_str("\\\\"),
b' ' => str.push_str(" "),
_ if !is_rfc_vchar(*b) && !is_rfc_obs_text(*b) => {
write!(str, "\\{:#04x}", *b).expect("Writing to strings is infallible");
}
_ if prev_char != b'_' => {
str.push('_');
}
_ => {
}
}
prev_char = str.as_bytes()[str.len() - 1];
}
str.push('"');
str
}
#[inline(always)]
pub fn is_rfc_whitespace(b: u8) -> bool {
RFC_WHITE_SPACE[b as usize]
}
pub fn parse_num(value: HttpToken) -> Result<u64, &str> {
let trimmed = rfc_whitespace_trim(value);
let mut result: u64 = 0;
for c in trimmed.iter() {
if !c.is_ascii_digit() {
return Err("Not an integer");
}
let digit = (*c - b'0') as u64;
result = result.checked_mul(10).ok_or("64-bits overflow")?;
result = result.checked_add(digit).ok_or("64-bits overflow")?;
}
Ok(result)
}
#[inline]
pub fn rfc_whitespace_trim(value: HttpToken) -> HttpToken {
if value.is_empty() {
return value;
}
let mut first_non_space = 0;
let mut last_non_space = value.len();
while first_non_space < last_non_space && is_rfc_whitespace(value[first_non_space]) {
first_non_space += 1;
}
while first_non_space < last_non_space && is_rfc_whitespace(value[last_non_space - 1]) {
last_non_space -= 1;
}
&value[first_non_space..last_non_space]
}
#[inline]
fn same_char(original_char: u8, header: HttpToken, index: usize) -> (bool, usize) {
let header_char = header[index];
if original_char == header_char.to_ascii_lowercase() {
return (true, index + 1);
} else if header.len() > index + 1 {
const MATCHES: &[(u8, u8, u8)] = &[(b's', 0xc5, 0xbf), (b'i', 0xc4, 0xb1)];
if MATCHES
.iter()
.any(|p| *p == (original_char, header_char, header[index + 1]))
{
return (true, index + 2);
}
}
(false, index + 1)
}
pub fn determine_similarity(important_header: HttpToken, header: HttpToken) -> TokenSimilarity {
debug_assert!(important_header
.iter()
.all(|c| *c == c.to_ascii_lowercase()));
debug_assert!(important_header
.iter()
.all(|c| c.is_ascii_alphabetic() || *c == b'-'));
if header.len() < important_header.len() {
return Distant;
}
let mut header_index = find_next_alphanumeric(header, 0);
let mut identical = header_index == 0 && important_header.len() == header.len();
for c in important_header.iter() {
if header_index == header.len() {
return Distant;
}
let delimiter = *c == b'-';
let (same_symbol, new_index) = same_char(*c, header, header_index);
if !same_symbol {
if !delimiter {
break;
} else {
identical = false;
}
}
header_index = if delimiter {
find_next_alphanumeric(header, new_index)
} else {
find_next_header_symbol(header, new_index)
};
}
let exhausted_header = header_index == header.len();
if exhausted_header && identical {
Identical
} else if exhausted_header || find_next_alphanumeric(header, header_index) == header.len() {
SameLetters
} else {
Distant
}
}
#[cfg(test)]
mod tests {
use crate::http_token_utils::*;
use crate::MESSAGE_MAX_SIZE;
use smallvec::alloc::str::from_utf8;
#[test]
fn verify_essential_headers_lowercase() {
&[TE, CL].iter().for_each(|h| {
h.iter().for_each(|b| {
assert_eq!(b.to_ascii_lowercase(), *b);
});
});
}
#[test]
fn verify_te_options_lowercase() {
VALID_TE.iter().for_each(|h| {
h.iter().for_each(|b| {
assert_eq!((*b).to_ascii_lowercase(), *b);
});
});
}
#[test]
fn test_same_char() {
assert_eq!((true, 2), same_char(b's', &[0xc5, 0xbf], 0));
assert_eq!((true, 2), same_char(b'i', &[0xc4, 0xb1], 0));
assert_eq!((true, 1), same_char(b's', &[b's'], 0));
assert_eq!((true, 1), same_char(b's', &[b'S'], 0));
assert_eq!((false, 1), same_char(b's', &[b'a'], 0));
assert_eq!((false, 1), same_char(b's', &[0xc5], 0));
}
#[test]
fn test_same_char_covers_all_utf8() {
let important_headers = &[
String::from_utf8(TE.to_vec()).expect("ASCII -> UTF8 is infallible"),
String::from_utf8(CL.to_vec()).expect("ASCII -> UTF8 is infallible"),
];
let mut chars_matched = 0;
let mut chars_checked = 0;
for d in 0x80..=0xFF {
for c in 0x80..=0xFF {
let (matched, checked) = check_utf8_character(important_headers, &[c, d]);
chars_checked += checked;
chars_matched += matched;
for b in 0x80..=0xFF {
let (matched, checked) = check_utf8_character(important_headers, &[b, c, d]);
chars_checked += checked;
chars_matched += matched;
for a in 0xF0..=0xFF {
let (matched, checked) =
check_utf8_character(important_headers, &[a, b, c, d]);
chars_checked += checked;
chars_matched += matched;
}
}
}
}
assert_eq!(
chars_checked, 3,
"We know there are three such characters, that convert from UTF8 to ASCII"
);
assert_eq!(
chars_matched, 2,
"We know there are two of them that should match TE/CL headers"
);
}
fn check_utf8_character(important_headers: &[String], utf8_char: &[u8]) -> (usize, usize) {
let mut checked = 0;
let mut matched = 0;
if let Ok(s) = from_utf8(utf8_char) {
let upper = s.to_uppercase();
let lower = s.to_lowercase();
let to_ascii = upper.len() == 1 || lower.len() == 1;
if to_ascii {
checked += 1;
let contains_upper = upper.len() == 1
&& important_headers
.iter()
.any(|header| header.to_ascii_uppercase().contains(upper.as_str()));
if contains_upper {
assert_eq!(
same_char(upper.to_ascii_lowercase().as_bytes()[0], utf8_char, 0),
(true, utf8_char.len()),
"{} is not covered",
s
);
matched += 1;
}
let contains_lower = lower.len() == 1
&& important_headers
.iter()
.any(|header| header.contains(lower.as_str()));
if contains_lower {
assert_eq!(
same_char(lower.as_bytes()[0], utf8_char, 0),
(true, utf8_char.len()),
"{} is not covered",
s
);
matched += 1;
}
}
if matched == 0 {
for c in b'a'..=b'z' {
assert_eq!(
same_char(c, utf8_char, 0),
(false, 1),
"{} must not match any characters, but it did: {}",
s,
to_quoted_ascii(&[c])
);
}
}
}
return (matched, checked);
}
#[test]
fn test_same_letters() {
let test_cases = vec![
("t-e", "t-e", TokenSimilarity::Identical),
("t-e", "t_e", TokenSimilarity::SameLetters),
("t-e", "t_e ", TokenSimilarity::SameLetters),
("t-e", "t____e", TokenSimilarity::SameLetters),
("t-e", "t----e", TokenSimilarity::SameLetters),
("t", "tp", TokenSimilarity::Distant),
("t", "t0", TokenSimilarity::Distant),
("t", "t_", TokenSimilarity::SameLetters),
("t", "p", TokenSimilarity::Distant),
("t", "", TokenSimilarity::Distant),
(
"transfer-encoding",
"transfer-encoding1",
TokenSimilarity::Distant,
),
(
"transfer-encoding",
"tran\u{017f}fer-encoding",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"transfer-encod\u{0131}ng",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"transfer-encoding",
TokenSimilarity::Identical,
),
(
"transfer-encoding",
"Transfer-Encoding",
TokenSimilarity::Identical,
),
(
"x-my-custom-important-header",
"X-My-Custom-Important-Header",
TokenSimilarity::Identical,
),
(
"x-my-custom-important-header",
"X_My_Custom_Important_Header",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"transfer-encodin",
TokenSimilarity::Distant,
),
("transfer", "transfer-encoding", TokenSimilarity::Distant),
(
"transfer-encoding",
"transfer-encoding1",
TokenSimilarity::Distant,
),
(
"transfer-encoding",
"TRANSFER-ENCODING",
TokenSimilarity::Identical,
),
(
"transfer-encoding",
"transfer-\tencoding",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"\r\ntransfer-\t\te\tn\tc o d i ng\t",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"transfer_encoding",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"transfer---encoding",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"x_transfer_encoding",
TokenSimilarity::Distant,
),
(
"transfer-encoding",
"x_transfer_encodings",
TokenSimilarity::Distant,
),
("transfer-encoding", "gibberish", TokenSimilarity::Distant),
(
"transfer-encoding",
"\t\x01\x02transfer-encoding\t\x0f\x0c",
TokenSimilarity::SameLetters,
),
(
"transfer-encoding",
"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
TokenSimilarity::Distant,
),
(
"transfer-encoding",
"accept-encoding",
TokenSimilarity::Distant,
),
];
test_cases.iter().for_each(|(e, c, expected_distance)| {
let important_header = http_token(e);
let header = http_token(c);
let actual_distance = determine_similarity(important_header, header);
assert_eq!(
actual_distance, *expected_distance,
"similarity({}, {:?}) != {:?}",
e, c, *expected_distance
);
});
}
#[test]
fn test_utf8_spaces() {
let test_cases = vec![
"\u{0009}", "\u{000A}", "\u{000B}", "\u{000C}", "\u{000D}", "\u{0020}", "\u{0085}", "\u{00A0}", "\u{1680}", "\u{180E}", "\u{2000}", "\u{2001}", "\u{2002}", "\u{2003}", "\u{2004}", "\u{2005}", "\u{2006}", "\u{2007}", "\u{2008}", "\u{2009}", "\u{200A}", "\u{200B}", "\u{200C}", "\u{200D}", "\u{2028}", "\u{2029}", "\u{202F}", "\u{205F}", "\u{2060}", "\u{3000}", "\u{FEFF}", ];
test_cases.iter().for_each(|e| {
let important_header = http_token("transfer-encoding");
let header_name = format!("{}Transfer-Encoding{}", e, e);
let header = http_token(header_name.as_str());
let actual_distance = determine_similarity(important_header, header);
assert_eq!(
actual_distance,
TokenSimilarity::SameLetters,
"similarity({}) != TokenSimilarity::SameLetters",
to_quoted_ascii(e.as_bytes()),
);
});
}
#[test]
fn test_utf8_delimiters() {
let test_cases = vec![
"Transfer\u{058A}Encoding", "Transfer\u{05BE}Encoding", "Transfer\u{1400}Encoding", "Transfer\u{1806}Encoding", "Transfer\u{2010}Encoding", "Transfer\u{2011}Encoding", "Transfer\u{2012}Encoding", "Transfer\u{2013}Encoding", "Transfer\u{2014}Encoding", "Transfer\u{2015}Encoding", "Transfer\u{2E17}Encoding", "Transfer\u{2E1A}Encoding", "Transfer\u{2E3A}Encoding", "Transfer\u{2E3B}Encoding", "Transfer\u{2E40}Encoding", "Transfer\u{301C}Encoding", "Transfer\u{3030}Encoding", "Transfer\u{30A0}Encoding", "Transfer\u{FE31}Encoding", "Transfer\u{FE32}Encoding", "Transfer\u{FE58}Encoding", "Transfer\u{FE63}Encoding", "Transfer\u{FF0D}Encoding", "Transfer\u{10EAD}Encoding", ];
test_cases.iter().for_each(|e| {
let important_header = http_token("transfer-encoding");
let header = http_token(e);
let actual_distance = determine_similarity(important_header, header);
assert_eq!(
actual_distance,
TokenSimilarity::SameLetters,
"similarity({}) != TokenSimilarity::SameLetters",
to_quoted_ascii(e.as_bytes()),
);
});
}
#[test]
fn test_arbitrary_non_utf8_bytes() {
let test_cases = vec![
"Transfer\u{88}Encoding",
"Transfer\u{88}\u{88}Encoding",
"Transfer-Encoding\u{99}",
"Transfer-Encoding\u{99}\u{99}\u{99}",
"\u{a0}Transfer-Encoding",
"\u{a0}\u{a0}\u{a0}\u{a0}Transfer-Encoding",
];
test_cases.iter().for_each(|e| {
let important_header = http_token("transfer-encoding");
let mut header = vec![];
e.as_bytes().iter().for_each(|x| {
header.push(match *x {
0xc0..=0xcf => 0x99,
x => x,
});
});
let actual_distance = determine_similarity(important_header, header.as_slice());
assert_eq!(
actual_distance,
TokenSimilarity::SameLetters,
"similarity({}) != TokenSimilarity::SameLetters",
to_quoted_ascii(e.as_bytes()),
);
});
}
#[test]
fn test_valid_te() {
let test_cases = vec![
("chunked", "chunked", true),
("compress", "compress", true),
("deflate", "deflate", true),
("gzip", "gzip", true),
("identity", "identity", true),
("chunked;", "chunked", true),
("chunked;extension1;extension2;", "chunked", true),
(" CHUNKED ", "chunked", true),
(" chunked", "chunked", true),
(" chunked\t", "chunked", true),
(" chunked\t\t;\t\t", "chunked", true),
("xchunked", "unreachable", false),
("x_chunked", "unreachable", false),
];
test_cases.iter().for_each(|(te, canonical, valid)| {
let valid_te = is_valid_te(http_token(te));
match valid_te {
Some(v) => {
assert!(*valid, "Expected {}={}", to_quoted_ascii(v), *valid);
assert!(
v.eq_ignore_ascii_case(http_token(canonical)),
"{} != {}",
to_quoted_ascii(v),
canonical
);
}
None => {
assert!(!*valid, "Expected {}={}", te, *valid);
}
}
});
}
#[test]
fn test_trim() {
let test_cases = vec![
(" abc \t\t\t", "abc"),
("\t \t \t abc \t\t\t", "abc"),
(" \t\t\t", ""),
("", ""),
];
test_cases.iter().for_each(|(value, result)| {
assert_eq!(rfc_whitespace_trim(http_token(value)), http_token(result));
});
}
#[test]
fn test_parse_num() {
let test_cases = vec![
("0", Ok(0)),
("1000", Ok(1000)),
("\t\t\t1000 ", Ok(1000)),
("123456789", Ok(123456789)),
("18446744073709551615", Ok(18446744073709551615)),
("18446744073709551616", Err("64-bits overflow")),
(
"123456789123456789123456789123456789",
Err("64-bits overflow"),
),
("123,123", Err("Not an integer")),
("123.123", Err("Not an integer")),
("-1", Err("Not an integer")),
];
test_cases.iter().for_each(|(num_str, result)| {
assert_eq!(parse_num(http_token(num_str)), *result);
});
}
#[test]
fn test_to_ascii() {
let test_cases = vec![
("abc", "abc"),
("abc ", "abc "),
("abc\x01", "abc\\0x01"),
("abc\\x01", "abc\\\\x01"),
("\raaaa\tbbb\nccc", "\\raaaa\\tbbb\\nccc"),
("\nabc\u{7f}", "\\nabc\\0x7f"),
("\u{0}abc", "\\0x00abc"),
(
"\u{01}abc\u{02}abcefd901\u{03}",
"\\0x01abc\\0x02abcefd901\\0x03",
),
];
test_cases.iter().for_each(|(value, result)| {
assert_eq!(
to_quoted_ascii(http_token(value)),
format!("\"{}\"", result)
);
});
let bytes = [0x81, b'a', b'b', b'c', 0x82, b'0', b'1', b'2', 0x83];
assert_eq!(
to_quoted_ascii(&bytes),
format!("\"{}\"", "\\0x81abc\\0x82012\\0x83")
);
}
#[test]
fn test_obfuscate_value() {
let test_cases = vec![
("A private message", "_ _ _"),
("A private message\\r", "_ _ _\\\\_"),
("\rSensitive\tuser\ndata", "\\r_\\t_\\n_"),
("\nPassword", "\\n_"),
("\rPIN", "\\r_"),
("\u{0}DL-ID", "\\0x00_"),
("\u{01}SSN\u{02}card-number\u{03}", "\\0x01_\\0x02_\\0x03"),
];
test_cases.iter().for_each(|(value, result)| {
assert_eq!(
obfuscate_value(http_token(value)),
format!("\"{}\"", result)
);
});
}
#[test]
fn test_to_ascii_large_value() {
let large_value = std::iter::repeat('\u{1}').take(500).collect::<String>();
assert!(to_quoted_ascii(http_token(large_value.as_str())).len() < MESSAGE_MAX_SIZE / 2);
}
#[test]
fn test_obfuscate_large_value() {
let large_value = std::iter::repeat('\u{1}').take(500).collect::<String>();
assert!(obfuscate_value(http_token(large_value.as_str())).len() < MESSAGE_MAX_SIZE / 2);
}
#[test]
fn test_tchar() {
let delims = b"-_.!#$%&'*+^`|~";
for c in 0..=255 {
assert_eq!(
is_rfc_tchar(c),
c.is_ascii_alphanumeric() || delims.contains(&(c as u8)),
"Didn't work out for {}",
(c as char)
);
}
}
#[test]
fn test_header_value() {
for c in 0..=255 {
assert_eq!(
is_valid_header_value_char(c),
is_rfc_vchar(c) || is_rfc_obs_text(c) || is_rfc_whitespace(c),
"Didn't work out for {}",
(c as char)
);
}
}
}