use crate::{
header::{FieldBody, FieldName, HeaderFields},
signature::CanonicalizationAlgorithm,
};
use std::{borrow::Cow, collections::HashSet};
const SP: u8 = b' ';
const CR: u8 = b'\r';
const LF: u8 = b'\n';
const CRLF: [u8; 2] = [CR, LF];
pub fn canonicalize_headers(
algorithm: CanonicalizationAlgorithm,
headers: &HeaderFields,
selected_headers: &[FieldName],
) -> Vec<u8> {
let mut result = vec![];
let mut processed_indexes = HashSet::with_capacity(selected_headers.len());
for selected_header in selected_headers {
if let Some((i, (name, val))) = headers
.as_ref()
.iter()
.rev()
.enumerate()
.find(|(i, (name, _))| !processed_indexes.contains(i) && name == selected_header)
{
canonicalize_header(&mut result, algorithm, name, val);
result.extend(CRLF);
processed_indexes.insert(i);
}
}
result
}
pub fn canonicalize_header(
result: &mut Vec<u8>,
algorithm: CanonicalizationAlgorithm,
name: impl AsRef<str>,
value: impl AsRef<[u8]>,
) {
let name = name.as_ref();
let value = value.as_ref();
match algorithm {
CanonicalizationAlgorithm::Simple => {
result.extend(name.bytes());
result.push(b':');
result.extend(value);
}
CanonicalizationAlgorithm::Relaxed => {
result.extend(name.to_ascii_lowercase().bytes());
result.push(b':');
canonicalize_header_relaxed(result, value);
}
}
}
fn canonicalize_header_relaxed(result: &mut Vec<u8>, value: &[u8]) {
fn is_space(b: u8) -> bool {
matches!(b, b' ' | b'\t' | b'\r' | b'\n')
}
fn trim_space(mut value: &[u8]) -> &[u8] {
while let Some((_, rest)) = value.split_first().filter(|(&b, _)| is_space(b)) {
value = rest;
}
while let Some((_, rest)) = value.split_last().filter(|(&b, _)| is_space(b)) {
value = rest;
}
value
}
debug_assert!(FieldBody::new(value).is_ok());
let value = trim_space(value);
let mut compressing = false;
for &b in value {
if is_space(b) {
if !compressing {
result.push(SP);
compressing = true;
}
} else {
result.push(b);
if compressing {
compressing = false;
}
}
}
}
#[derive(Clone, Copy)]
enum CState {
Init,
CrLf,
Cr,
Wsp,
WspCr,
Byte,
}
#[derive(Clone)]
pub struct BodyCanonicalizer {
kind: CanonicalizationAlgorithm,
state: CState,
blank_line: bool, empty_lines: usize, }
impl BodyCanonicalizer {
pub fn new(kind: CanonicalizationAlgorithm) -> Self {
let blank_line = match kind {
CanonicalizationAlgorithm::Simple => false,
CanonicalizationAlgorithm::Relaxed => true,
};
Self {
kind,
state: CState::Init,
blank_line,
empty_lines: 0,
}
}
pub fn canonicalize_chunk<'a>(&mut self, bytes: &'a [u8]) -> Cow<'a, [u8]> {
match self.kind {
CanonicalizationAlgorithm::Simple => self.canon_chunk_simple(bytes),
CanonicalizationAlgorithm::Relaxed => self.canon_chunk_relaxed(bytes),
}
}
fn canon_chunk_simple<'a>(&mut self, mut bytes: &'a [u8]) -> Cow<'a, [u8]> {
if matches!(self.state, CState::Init) {
if let Some(mut bnext) = bytes.strip_suffix(&CRLF) {
while let Some(bx) = bnext.strip_suffix(&CRLF) {
(bytes, bnext) = (bnext, bx);
self.empty_lines += 1;
}
self.blank_line = true;
self.state = CState::CrLf;
return bytes.into();
}
}
let mut result = Vec::with_capacity(bytes.len());
for &b in bytes {
match self.state {
CState::Init | CState::CrLf => {
if b == CR {
self.state = CState::Cr;
} else {
self.flush_empty_lines(&mut result);
result.push(b);
self.state = CState::Byte;
}
}
CState::Cr => {
if b == LF {
if self.blank_line {
self.empty_lines += 1;
} else {
result.extend(CRLF);
self.blank_line = true;
}
self.state = CState::CrLf;
continue;
}
self.flush_empty_lines(&mut result);
result.push(CR);
if b != CR {
result.push(b);
self.state = CState::Byte;
}
}
CState::Byte => {
if b == CR {
self.state = CState::Cr;
} else {
result.push(b);
}
}
CState::Wsp | CState::WspCr => unreachable!(),
}
}
result.into()
}
fn canon_chunk_relaxed<'a>(&mut self, bytes: &'a [u8]) -> Cow<'a, [u8]> {
fn is_wsp(b: u8) -> bool {
matches!(b, b'\t' | b' ')
}
let mut result = Vec::with_capacity(bytes.len());
for &b in bytes {
match self.state {
CState::Init | CState::CrLf => {
if is_wsp(b) {
self.state = CState::Wsp;
} else if b == CR {
self.state = CState::Cr;
} else {
self.flush_empty_lines(&mut result);
result.push(b);
self.state = CState::Byte;
}
}
CState::Wsp => {
if b == CR {
self.state = CState::WspCr;
} else if !is_wsp(b) {
self.flush_empty_lines(&mut result);
result.push(SP);
result.push(b);
self.state = CState::Byte;
}
}
CState::Cr => {
if b == LF {
if self.blank_line {
self.empty_lines += 1;
} else {
result.extend(CRLF);
self.blank_line = true;
}
self.state = CState::CrLf;
continue;
}
self.flush_empty_lines(&mut result);
result.push(CR);
if is_wsp(b) {
self.state = CState::Wsp;
} else if b != CR {
result.push(b);
self.state = CState::Byte;
}
}
CState::WspCr => {
if b == LF {
if self.blank_line {
self.empty_lines += 1;
} else {
result.extend(CRLF);
self.blank_line = true;
}
self.state = CState::CrLf;
continue;
}
self.flush_empty_lines(&mut result);
result.push(SP);
result.push(CR);
if b == CR {
self.state = CState::Cr;
} else if is_wsp(b) {
self.state = CState::Wsp;
} else {
result.push(b);
self.state = CState::Byte;
}
}
CState::Byte => {
if is_wsp(b) {
self.state = CState::Wsp;
} else if b == CR {
self.state = CState::Cr;
} else {
result.push(b);
}
}
}
}
result.into()
}
pub fn finish(mut self) -> Cow<'static, [u8]> {
match self.kind {
CanonicalizationAlgorithm::Simple => {
match self.state {
CState::Init | CState::Byte => (&CRLF[..]).into(),
CState::CrLf => (&[][..]).into(),
CState::Cr => {
let mut result = vec![];
self.flush_empty_lines(&mut result);
result.push(CR);
result.extend(CRLF);
result.into()
}
CState::Wsp | CState::WspCr => unreachable!(),
}
}
CanonicalizationAlgorithm::Relaxed => {
match self.state {
CState::Init | CState::CrLf => (&[][..]).into(),
CState::Cr => {
let mut result = vec![];
self.flush_empty_lines(&mut result);
result.push(CR);
result.extend(CRLF);
result.into()
}
CState::Wsp => {
if self.blank_line {
(&[][..]).into()
} else {
(&CRLF[..]).into()
}
}
CState::WspCr => {
let mut result = vec![];
self.flush_empty_lines(&mut result);
result.push(SP);
result.push(CR);
result.extend(CRLF);
result.into()
}
CState::Byte => (&CRLF[..]).into(),
}
}
}
}
fn flush_empty_lines(&mut self, result: &mut Vec<u8>) {
for _ in 0..self.empty_lines {
result.extend(CRLF);
}
self.empty_lines = 0;
self.blank_line = false;
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::signature::CanonicalizationAlgorithm::*;
use rand::{
distributions::{Distribution, Slice},
Rng,
};
use std::{ops::RangeInclusive, str};
#[test]
fn canonicalize_headers_relaxed_ok() {
let headers = HeaderFields::from_vec(vec![
("from".to_owned(), b" Good \t ".to_vec()),
("to".to_owned(), b" see me".to_vec()),
("Date".to_owned(), b" Fri 24\r\n\tfoo".to_vec()),
("To".to_owned(), b" another one".to_vec()),
])
.unwrap();
let selected_headers = vec![
FieldName::new("to").unwrap(),
FieldName::new("from").unwrap(),
FieldName::new("to").unwrap(),
];
assert_eq!(
canonicalize_headers(Relaxed, &headers, &selected_headers),
&b"to:another one\r\nfrom:Good\r\nto:see me\r\n"[..],
);
}
#[test]
fn canonicalize_header_relaxed_dkim_sig() {
let example = "v=1; a=rsa-sha256; d=example.net; s=brisbane;
c=simple; q=dns/txt; i=@eng.example.net;
h=from:to:subject:date;
bh=MTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTI=;
b=dzdV... ";
let example = example.replace('\n', "\r\n");
let mut result = vec![];
canonicalize_header(&mut result, Relaxed, "Dkim-Signature", &example);
assert_eq!(
result,
b"dkim-signature:v=1; a=rsa-sha256; d=example.net; \
s=brisbane; c=simple; q=dns/txt; i=@eng.example.net; h=from:to:subject:date; \
bh=MTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTI=; b=dzdV..."[..]
);
}
#[test]
fn body_canon_simple_ok() {
let bc = BodyCanonicalizer::new(Simple);
let body = canonicalize_chunks(
bc,
&[b"well hello \r\n", b"\r\n\r? what's up \r\n\r\n", b"\r\n"],
);
assert_eq!(body, b"well hello \r\n\r\n\r? what's up \r\n");
}
#[test]
fn body_canon_simple_cases() {
fn c(bs: &[u8]) -> Vec<u8> {
let bc = BodyCanonicalizer::new(Simple);
canonicalize_chunks(bc, &[bs])
}
assert_eq!(c(b""), b"\r\n");
assert_eq!(c(b"\r"), b"\r\r\n");
assert_eq!(c(b"\rx"), b"\rx\r\n");
assert_eq!(c(b"\r\r"), b"\r\r\r\n");
assert_eq!(c(b"x"), b"x\r\n");
assert_eq!(c(b"x\r"), b"x\r\r\n");
assert_eq!(c(b"x\r\n"), b"x\r\n");
assert_eq!(c(b"x\r\n\r\n"), b"x\r\n");
assert_eq!(c(b"x\r\n\r\n\rx"), b"x\r\n\r\n\rx\r\n");
assert_eq!(c(b"\n"), b"\n\r\n");
assert_eq!(c(b"\r\n"), b"\r\n");
assert_eq!(c(b"\r\n\r"), b"\r\n\r\r\n");
assert_eq!(c(b"\r\n\r\n"), b"\r\n");
assert_eq!(c(b"\r\n\r\nx"), b"\r\n\r\nx\r\n");
}
#[test]
fn body_canon_simple_optimized() {
fn c(chunk1: &[u8], chunk2: &[u8]) -> Vec<u8> {
let bc = BodyCanonicalizer::new(Simple);
canonicalize_chunks(bc, &[chunk1, chunk2])
}
assert_eq!(c(b"", b"x"), b"x\r\n");
assert_eq!(c(b"\r\n", b"\r\n"), b"\r\n");
assert_eq!(c(b"x\r\n", b"\r\nx"), b"x\r\n\r\nx\r\n");
assert_eq!(c(b"\r\n\r\n\r\n", b""), b"\r\n");
assert_eq!(c(b"\r\n\r\n\r\n", b"\r\n"), b"\r\n");
assert_eq!(c(b"\r\n\r\n\r\n", b"\r"), b"\r\n\r\n\r\n\r\r\n");
}
#[test]
fn body_canon_relaxed_basic() {
let bc = BodyCanonicalizer::new(Relaxed);
let body = canonicalize_chunks(
bc,
&[b"well hello \r\n", b"\r\n what's up \r\n\r\n", b"\r\n"],
);
assert_eq!(body, b"well hello\r\n\r\n what's up\r\n");
}
#[test]
fn body_canon_relaxed_small_chunks() {
let bc = BodyCanonicalizer::new(Relaxed);
let body = canonicalize_chunks(
bc,
&[
b"well ",
b" hello ",
b"\r",
b"\n\r",
b"\n what's up \r\n\r\n",
b"\r\n",
],
);
assert_eq!(body, b"well hello\r\n\r\n what's up\r\n");
}
#[test]
fn body_canon_relaxed_initial_empty_lines() {
let bc = BodyCanonicalizer::new(Relaxed);
let body = canonicalize_chunks(bc, &[b"\r\n\r\n", b"\ra \r", b"\nb ", b"c"]);
assert_eq!(body, b"\r\n\r\n\ra\r\nb c\r\n");
}
#[test]
fn body_canon_relaxed_cases() {
fn c(bs: &[u8]) -> Vec<u8> {
let bc = BodyCanonicalizer::new(Relaxed);
canonicalize_chunks(bc, &[bs])
}
assert_eq!(c(b""), b"");
assert_eq!(c(b" "), b"");
assert_eq!(c(b" "), b"");
assert_eq!(c(b" x"), b" x\r\n");
assert_eq!(c(b" x"), b" x\r\n");
assert_eq!(c(b" x "), b" x\r\n");
assert_eq!(c(b" \r"), b" \r\r\n");
assert_eq!(c(b" \r "), b" \r\r\n");
assert_eq!(c(b" \r\r"), b" \r\r\r\n");
assert_eq!(c(b" \rx"), b" \rx\r\n");
assert_eq!(c(b" \r\n"), b"");
assert_eq!(c(b" \r\n\r\n \r\n"), b"");
assert_eq!(c(b" \r\n\r\n "), b"");
assert_eq!(c(b"\r"), b"\r\r\n");
assert_eq!(c(b"\r\r"), b"\r\r\r\n");
assert_eq!(c(b"\rx"), b"\rx\r\n");
assert_eq!(c(b"\r "), b"\r\r\n");
assert_eq!(c(b"\r "), b"\r\r\n");
assert_eq!(c(b"x"), b"x\r\n");
assert_eq!(c(b"xy"), b"xy\r\n");
assert_eq!(c(b"x\r\n"), b"x\r\n");
assert_eq!(c(b"x\r\n\r\n"), b"x\r\n");
assert_eq!(c(b"x "), b"x\r\n");
assert_eq!(c(b"x\r\ny"), b"x\r\ny\r\n");
assert_eq!(c(b"x\r\n\ry"), b"x\r\n\ry\r\n");
assert_eq!(c(b"x\r\n\r\ny"), b"x\r\n\r\ny\r\n");
assert_eq!(c(b"x\r\n\r\n\ry"), b"x\r\n\r\n\ry\r\n");
assert_eq!(c(b"x\r\n \r\ny"), b"x\r\n\r\ny\r\n");
assert_eq!(c(b"x\r\n \r\ny"), b"x\r\n\r\ny\r\n");
assert_eq!(c(b"x\r\n z\r\ny"), b"x\r\n z\r\ny\r\n");
assert_eq!(c(b"x\r\n z \r\ny"), b"x\r\n z\r\ny\r\n");
assert_eq!(c(b"x\r\n z \r\ny"), b"x\r\n z\r\ny\r\n");
assert_eq!(c(b"x y z\r\n"), b"x y z\r\n");
assert_eq!(c(b"x y z\r\n"), b"x y z\r\n");
assert_eq!(c(b"x y z \r\n"), b"x y z\r\n");
assert_eq!(c(b"\r\n"), b"");
assert_eq!(c(b"\r\n\r"), b"\r\n\r\r\n");
assert_eq!(c(b"\r\n\r "), b"\r\n\r\r\n");
assert_eq!(c(b"\r\n\r\r"), b"\r\n\r\r\r\n");
assert_eq!(c(b"\r\n\r\n"), b"");
assert_eq!(c(b"\r\n \r\n"), b"");
assert_eq!(c(b"\r\n \r\n"), b"");
assert_eq!(c(b"\r\n\r\nx"), b"\r\n\r\nx\r\n");
assert_eq!(c(b"\r\n \r\nx"), b"\r\n\r\nx\r\n");
}
fn canonicalize_chunks(mut bc: BodyCanonicalizer, chunks: &[&[u8]]) -> Vec<u8> {
let mut result = vec![];
for c in chunks {
result.extend(bc.canonicalize_chunk(c).into_owned());
}
result.extend(bc.finish().into_owned());
result
}
#[test]
#[ignore = "randomly generated test inputs"]
fn fuzz_body_canonicalizer_simple() {
let alt_impl = |bytes: &_| {
str::from_utf8(bytes)
.unwrap()
.trim_end_matches("\r\n")
.bytes()
.chain(*b"\r\n")
.collect()
};
let s_elems = ["x", "\r", "\n", "\r\n"];
let s_len = 0..=9;
run_fuzz(300, Simple, &s_elems, s_len, alt_impl);
}
#[test]
#[ignore = "randomly generated test inputs"]
fn fuzz_body_canonicalizer_relaxed() {
let alt_impl = |bytes: &_| {
let s = str::from_utf8(bytes).unwrap();
let s = s.split("\r\n")
.map(|s| {
let mut sp = false;
s.trim_end_matches(' ')
.chars()
.filter(|&c| {
if c == ' ' {
let ret = !sp;
sp = true;
ret
} else {
sp = false;
true
}
})
.collect::<String>()
})
.collect::<Vec<_>>()
.join("\r\n");
let s = s.trim_end_matches("\r\n");
let mut s = s.to_owned();
if !s.is_empty() {
s.push_str("\r\n");
}
s.bytes().collect()
};
let s_elems = ["x", " ", "\r", "\n", "\r\n"];
let s_len = 0..=12;
run_fuzz(1000, Relaxed, &s_elems, s_len, alt_impl);
}
fn run_fuzz(
repetitions: usize,
alg: CanonicalizationAlgorithm,
s_elems: &[&str],
s_len: RangeInclusive<u8>,
alt_impl: impl Fn(&[u8]) -> Vec<u8>,
) {
let elems = Slice::new(s_elems).unwrap();
let mut rng = rand::thread_rng();
for _ in 0..repetitions {
let n = rng.gen_range(s_len.clone()).into();
let s: String = elems.sample_iter(&mut rng).copied().take(n).collect();
let bytes = s.as_bytes();
let r1 = canonicalize_chunks(BodyCanonicalizer::new(alg), &[bytes]);
let r2 = alt_impl(bytes);
assert_eq!(
r1,
r2,
"divergent results for input {s:?}: {:?} != {:?}",
str::from_utf8(&r1).unwrap(),
str::from_utf8(&r2).unwrap()
);
}
}
}