eml-codec 0.4.0

Email enCOder DECoder in Rust. Support Internet Message Format and MIME (RFC 822, 5322, 2045, 2046, 2047, 2048, 2049, 6532).
Documentation
#[cfg(feature = "arbitrary")]
use arbitrary::Arbitrary;
use bounded_static::ToStatic;
use nom::{
    branch::alt,
    bytes::complete::{tag, take_while1},
    character::complete::space0,
    combinator::{all_consuming, consumed, eof, map, rest},
    multi::many0,
    sequence::{pair, terminated, tuple},
    IResult, Parser,
};
use std::borrow::Cow;
use std::fmt;
#[cfg(feature = "tracing")]
use tracing::warn;

use crate::i18n::ContainsUtf8;
use crate::print::{Formatter, Print};
use crate::raw_input::RawInput;
use crate::text::misc_token;
use crate::text::whitespace::{foldable_line, obs_crlf};
#[cfg(any(feature = "tracing-recover", feature = "tracing-unsupported"))]
use crate::utils::bytes_to_trace_string;
#[cfg(feature = "arbitrary")]
use crate::{arbitrary_utils::arbitrary_vec_nonempty_where, fuzz_eq::FuzzEq};

// A valid header field name.
#[derive(PartialEq, Clone, ContainsUtf8, ToStatic)]
#[contains_utf8(false)]
pub struct FieldName<'a>(pub Cow<'a, [u8]>);
impl<'a> FieldName<'a> {
    pub fn bytes(&'a self) -> &'a [u8] {
        &self.0
    }
}
impl<'a> fmt::Debug for FieldName<'a> {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt.debug_tuple("FieldName")
            .field(&String::from_utf8_lossy(&self.0))
            .finish()
    }
}
impl<'a> Print for FieldName<'a> {
    fn print(&self, fmt: &mut impl Formatter) {
        fmt.write_bytes(&self.0)
    }
}
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for FieldName<'a> {
    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
        let bytes: Vec<u8> = arbitrary_vec_nonempty_where(u, |c| is_ftext(*c), b'X')?;
        Ok(FieldName(Cow::Owned(bytes)))
    }
}
#[cfg(feature = "arbitrary")]
impl<'a> FuzzEq for FieldName<'a> {
    fn fuzz_eq(&self, other: &Self) -> bool {
        self.0 == other.0
    }
}

// Intermediate AST for two-step parsing of header fields. Structured headers
// are then parsed from this.
//
// A `FieldRaw` corresponds to a header field after performing "framing", i.e.
// identifier header field boundaries: it is the raw data found between two
// header boundaries.
//
// - `Good` corresponds to a header field that could be split into a
// valid name and arbitrary body. It does not say anything about the validity of
// the body. The body is stored as a raw slice because it will be parsed further.
//
// - `Bad` corresponds to a header field that could not be split into a name and
// body; it basically contains arbitrary data.
#[derive(PartialEq, Clone)]
pub struct FieldRaw<'a> {
    pub name: FieldName<'a>,
    pub body: &'a [u8],
}
impl<'a> fmt::Debug for FieldRaw<'a> {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt.debug_struct("header::FieldRaw")
            .field("name", &self.name)
            .field("body", &String::from_utf8_lossy(self.body))
            .finish()
    }
}
impl<'a> ContainsUtf8 for FieldRaw<'a> {
    fn contains_utf8(&self) -> bool {
        self.body.iter().any(|c| !c.is_ascii())
    }
}

/// Parse headers as raw key/values.
/// Stop at an empty line or at EOF.
#[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
pub fn header_kv(input: &[u8]) -> (&[u8], Vec<FieldRaw<'_>>) {
    // SAFETY: `field_raw_opt` only accepts non-empty inputs
    let (input, mut fields) = many0(field_raw_opt)(input).unwrap();
    // SAFETY: `rest` (last case) always succeeds.
    let (input, terminator) = alt((
        // empty line
        map(obs_crlf, |_| None),
        // The empty line is optional if there is no body following the headers,
        // so we must also accept EOF.
        map(eof, |_| None),
        // For best-effort parsing, we also try to parse any remaining bytes before
        // EOF (as if EOF was a CRLF).
        map(consumed(pair(field_name, rest)), |(_i, (name, body))| {
            #[cfg(feature = "tracing-recover")]
            warn!(input = %bytes_to_trace_string(_i), "raw field before EOF");
            Some(FieldRaw { name, body })
        }),
        map(rest, |_i: &[u8]| {
            #[cfg(feature = "tracing-unsupported")]
            warn!(input = %bytes_to_trace_string(_i), "raw bytes before EOF");
            None
        }),
    ))(input)
    .unwrap();

    fields.push(terminator);

    // drop `None`s ("bad" fields)
    let fields = fields.into_iter().flatten().collect();

    (input, fields)
}

// NOTE: field_raw only recognizes non-empty inputs.
fn field_raw(input: &[u8]) -> IResult<&[u8], FieldRaw<'_>> {
    map(pair(field_name, foldable_line(false)), |(name, body)| {
        FieldRaw { name, body }
    })(input)
}

// A best-effort version of `field_raw` that also recognizes lines that cannot
// be parsed as a field name and body. (It returns `None` in this case.)
// NOTE: `field_raw_opt` only recognizes non-empty inputs.
// NOTE: furthermore, in the "best effort" case, `foldable_line` only
// recognizes non-empty lines; this is important so that it does not consume the
// final empty line (obs_crlf) that terminates `header_kv`.
fn field_raw_opt(input: &[u8]) -> IResult<&[u8], Option<FieldRaw<'_>>> {
    alt((
        map(field_raw, Some),
        // best-effort: a (non-empty) foldable line that cannot even be parsed as
        // a field name and body. We drop it afterwards.
        map(foldable_line(true), |_i| {
            #[cfg(feature = "tracing-unsupported")]
            warn!(input = %bytes_to_trace_string(_i), "malformed raw header line");
            None
        }),
    ))(input)
}

/// Header field name
/// ```abnf
/// field-name =   1*ftext
/// ftext      =   %d33-57 /          ; Printable US-ASCII
///                %d59-126           ;  characters not including
///                                   ;  ":".
/// followed by *WSP in the obsolete syntax
/// ```
pub fn field_name(input: &[u8]) -> IResult<&[u8], FieldName<'_>> {
    terminated(
        take_while1(is_ftext).map(|s| FieldName(Cow::Borrowed(s))),
        tuple((space0, tag(b":"))),
    )(input)
}

fn is_ftext(c: u8) -> bool {
    (0x21..=0x7E).contains(&c) && c != 0x3A
}

// Parse a raw header field as an unstructured header

#[derive(Debug, PartialEq, Clone, ContainsUtf8, ToStatic)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
pub struct Unstructured<'a> {
    pub name: FieldName<'a>,
    pub body: misc_token::Unstructured<'a>,
    pub raw_body: RawInput<'a>,
}

impl<'a> Unstructured<'a> {
    // TODO: don't throw away the errors
    pub fn from_raw(f: &FieldRaw<'a>) -> Option<Self> {
        let (_, body) = all_consuming(misc_token::unstructured)(f.body).ok()?;
        Some(Unstructured {
            name: f.name.clone(),
            body,
            raw_body: f.body.into(),
        })
    }
}
impl<'a> Print for Unstructured<'a> {
    fn print(&self, fmt: &mut impl Formatter) {
        print_unstructured(fmt, &self.name.0, &self.body)
    }
}

// Helper to print structured headers

pub fn print<T: Print>(fmt: &mut impl Formatter, name: &[u8], body: T) {
    fmt.write_bytes(name);
    fmt.write_bytes(b":");
    fmt.write_fws();
    body.print(fmt);
    fmt.write_crlf();
}

pub fn print_unstructured<'a>(
    fmt: &mut impl Formatter,
    name: &[u8],
    body: &misc_token::Unstructured<'a>,
) {
    fmt.write_bytes(name);
    fmt.write_bytes(b":");
    body.print(fmt);
    fmt.write_crlf();
}

#[cfg(test)]
mod tests {
    use super::*;
    use misc_token::{UnstrToken, UnstrTxtKind};

    #[test]
    fn test_field_raw_good() {
        let (rest, f) = field_raw(b"X-Unknown: something something\r\n").unwrap();
        assert!(rest.is_empty());
        assert_eq!(
            f,
            FieldRaw {
                name: FieldName(b"X-Unknown".into()),
                body: &b" something something"[..],
            }
        );

        let (rest, f) = field_raw(b"X-Foo:\r\n").unwrap();
        assert!(rest.is_empty());
        assert_eq!(
            f,
            FieldRaw {
                name: FieldName(b"X-Foo".into()),
                body: &b""[..],
            }
        );

        // with line folding
        let (rest, f) = field_raw(b"From:\r\n foo@example.com\r\n abcd\r\n").unwrap();
        assert!(rest.is_empty());
        assert_eq!(
            f,
            FieldRaw {
                name: FieldName(b"From".into()),
                body: &b"\r\n foo@example.com\r\n abcd"[..],
            }
        );
    }

    #[test]
    fn test_unstructured() {
        let u = Unstructured::from_raw(&FieldRaw {
            name: FieldName(b"X-Unknown".into()),
            body: &b" something something"[..],
        })
        .unwrap();
        assert_eq!(
            u,
            Unstructured {
                name: FieldName(b"X-Unknown".into()),
                body: misc_token::Unstructured(vec![
                    UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
                    UnstrToken::from_plain("something", UnstrTxtKind::Txt),
                    UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
                    UnstrToken::from_plain("something", UnstrTxtKind::Txt),
                ]),
                raw_body: b" something something".into(),
            }
        )
    }

    #[test]
    fn test_no_body() {
        let (rest, fields) = header_kv(b"X-Foo: something something\r\nX-Bar: something else\r\n");
        assert!(rest.is_empty());
        assert_eq!(
            fields,
            vec![
                FieldRaw {
                    name: FieldName(b"X-Foo".into()),
                    body: b" something something"
                },
                FieldRaw {
                    name: FieldName(b"X-Bar".into()),
                    body: b" something else"
                },
            ]
        )
    }

    #[test]
    fn test_no_headers() {
        let (rest, fields) = header_kv(b"\r\nthe rest");
        assert_eq!(rest, b"the rest");
        assert_eq!(fields, vec![]);

        let (rest, fields) = header_kv(b"\nthe rest");
        assert_eq!(rest, b"the rest");
        assert_eq!(fields, vec![]);

        let (rest, fields) = header_kv(b"\n\t\t\t");
        assert_eq!(rest, b"\t\t\t");
        assert_eq!(fields, vec![]);

        let (rest, fields) = header_kv(b"\n\t\t\t\r\n");
        assert_eq!(rest, b"\t\t\t\r\n");
        assert_eq!(fields, vec![]);
    }

    #[test]
    fn test_best_effort_good_before_eof() {
        let (rest, fields) = header_kv(b"X-Foo: something something\r\nX-Bar: something else");
        assert!(rest.is_empty());
        assert_eq!(
            fields,
            vec![
                FieldRaw {
                    name: FieldName(b"X-Foo".into()),
                    body: b" something something"
                },
                FieldRaw {
                    name: FieldName(b"X-Bar".into()),
                    body: b" something else"
                },
            ]
        )
    }

    #[test]
    fn test_best_effort_bad_before_eof() {
        let (rest, fields) = header_kv(b"X-Foo: something something\r\nrandom junk");
        assert!(rest.is_empty());
        assert_eq!(
            fields,
            vec![FieldRaw {
                name: FieldName(b"X-Foo".into()),
                body: b" something something"
            },]
        )
    }
}