email-parser 0.5.0

The fastest and lightest email parsing Rust library. Supports MIME.
Documentation
use crate::{parsing::fields::unknown, prelude::*};
use std::borrow::Cow;
use std::collections::HashMap;

use super::multipart;

pub fn raw_entity(mut input: Cow<[u8]>) -> Result<RawEntity, Error> {
    let (
        encoding,
        mime_type,
        subtype,
        parameters,
        additional_headers,
        id,
        description,
        disposition,
    ) = match input {
        Cow::Borrowed(ref mut input) => {
            let (len, r) = header_part(input)?;
            *input = &input[input.len() - len..];
            r
        }
        Cow::Owned(ref mut input) => {
            let (len, r) = header_part_owned(input)?;
            input.drain(..input.len() - len);
            r
        }
    };
    let value = decode_value(input, encoding)?;

    Ok(RawEntity {
        mime_type,
        subtype,
        parameters,
        id,
        description,
        value,
        additional_headers,
        #[cfg(feature = "content-disposition")]
        disposition,
    })
}

pub fn entity(mut raw_entity: RawEntity) -> Result<Entity, Error> {
    if raw_entity.mime_type == ContentType::Multipart {
        match raw_entity.value {
            Cow::Borrowed(value) => {
                return Ok(Entity::Multipart {
                    subtype: raw_entity.subtype,
                    content: multipart::parse_multipart(value, raw_entity.parameters)?,
                })
            }
            Cow::Owned(value) => {
                return Ok(Entity::Multipart {
                    subtype: raw_entity.subtype,
                    content: multipart::parse_multipart_owned(value, raw_entity.parameters)?,
                })
            }
        }
    }

    if raw_entity.mime_type == ContentType::Text {
        use textcode::*;

        if raw_entity.subtype == "plain" && raw_entity.parameters.get("charset").is_none() {
            raw_entity
                .parameters
                .insert("charset".into(), "us-ascii".into());
        }

        if let Some(charset) = raw_entity.parameters.get("charset") {
            let charset = charset.to_lowercase();

            let value: Cow<str> = match charset.as_str() {
                "utf-8" | "us-ascii" => match raw_entity.value {
                    Cow::Borrowed(value) => Cow::Borrowed(
                        std::str::from_utf8(value)
                            .map_err(|_| Error::Unknown("Invalid text encoding"))?,
                    ),
                    Cow::Owned(value) => Cow::Owned(
                        String::from_utf8(value)
                            .map_err(|_| Error::Unknown("Invalid text encoding"))?,
                    ),
                },
                "iso-8859-1" => Cow::Owned(iso8859_1::decode_to_string(&raw_entity.value)),
                "iso-8859-2" => Cow::Owned(iso8859_2::decode_to_string(&raw_entity.value)),
                "iso-8859-3" => Cow::Owned(iso8859_3::decode_to_string(&raw_entity.value)),
                "iso-8859-4" => Cow::Owned(iso8859_4::decode_to_string(&raw_entity.value)),
                "iso-8859-5" => Cow::Owned(iso8859_5::decode_to_string(&raw_entity.value)),
                "iso-8859-6" => Cow::Owned(iso8859_6::decode_to_string(&raw_entity.value)),
                "iso-8859-7" => Cow::Owned(iso8859_7::decode_to_string(&raw_entity.value)),
                "iso-8859-8" => Cow::Owned(iso8859_8::decode_to_string(&raw_entity.value)),
                "iso-8859-9" => Cow::Owned(iso8859_9::decode_to_string(&raw_entity.value)),
                "iso-8859-10" => Cow::Owned(iso8859_10::decode_to_string(&raw_entity.value)),
                "iso-8859-11" => Cow::Owned(iso8859_11::decode_to_string(&raw_entity.value)),
                "iso-8859-13" => Cow::Owned(iso8859_13::decode_to_string(&raw_entity.value)),
                "iso-8859-14" => Cow::Owned(iso8859_14::decode_to_string(&raw_entity.value)),
                "iso-8859-15" => Cow::Owned(iso8859_15::decode_to_string(&raw_entity.value)),
                "iso-8859-16" => Cow::Owned(iso8859_16::decode_to_string(&raw_entity.value)),
                "iso-6937" => Cow::Owned(iso6937::decode_to_string(&raw_entity.value)),
                "gb2312" => Cow::Owned(gb2312::decode_to_string(&raw_entity.value)),
                _ => return Ok(Entity::Unknown(Box::new(raw_entity))),
            };

            return Ok(Entity::Text {
                subtype: raw_entity.subtype,
                value,
            });
        }
    }

    Ok(Entity::Unknown(Box::new(raw_entity)))
}

pub fn header_part_owned(
    input: &[u8],
) -> Result<
    (
        usize,
        (
            ContentTransferEncoding<'static>,
            ContentType<'static>,
            Cow<'static, str>,
            HashMap<Cow<'static, str>, Cow<'static, str>>,
            Vec<(Cow<'static, str>, Cow<'static, str>)>,
            Option<(Cow<'static, str>, Cow<'static, str>)>,
            Option<Cow<'static, str>>,
            Option<Disposition<'static>>,
        ),
    ),
    Error,
> {
    let (r, (ct, mt, st, p, ah, id, desc, disp)) = header_part(&input)?;

    Ok((
        r,
        (
            ct.into_owned(),
            mt.into_owned(),
            Cow::Owned(st.into_owned()),
            p.into_iter()
                .map(|(n, v)| (Cow::Owned(n.into_owned()), Cow::Owned(v.into_owned())))
                .collect(),
            ah.into_iter()
                .map(|(n, v)| (Cow::Owned(n.into_owned()), Cow::Owned(v.into_owned())))
                .collect(),
            id.map(|(f, l)| (Cow::Owned(f.into_owned()), Cow::Owned(l.into_owned()))),
            desc.map(|desc| (Cow::Owned(desc.into_owned()))),
            disp.map(|disp| disp.into_owned()),
        ),
    ))
}

pub fn header_part(
    mut input: &[u8],
) -> Result<
    (
        usize,
        (
            ContentTransferEncoding,
            ContentType,
            Cow<str>,
            HashMap<Cow<str>, Cow<str>>,
            Vec<(Cow<str>, Cow<str>)>,
            Option<(Cow<str>, Cow<str>)>,
            Option<Cow<str>>,
            Option<Disposition>,
        ),
    ),
    Error,
> {
    let mut encoding = None;
    let mut mime_type = None;
    let mut id = None;
    let mut description = None;
    let mut disposition = None;
    let mut additional_headers = Vec::new();

    loop {
        if let Ok((new_input, content_transfer_encoding)) = content_transfer_encoding(input) {
            input = new_input;
            encoding = Some(content_transfer_encoding);
        } else if let Ok((new_input, content_type)) = content_type(input) {
            input = new_input;
            mime_type = Some(content_type);
        } else if let Ok((new_input, cid)) = content_id(input) {
            input = new_input;
            id = Some(cid);
        } else if let Ok((new_input, cdescription)) = content_description(input) {
            input = new_input;
            description = Some(cdescription);
        } else {
            if cfg!(feature = "content-disposition") {
                if let Ok((new_input, cdisposition)) = content_disposition(input) {
                    input = new_input;
                    disposition = Some(cdisposition);
                    continue;
                }
            }

            if let Ok((new_input, (name, value))) = unknown(input) {
                input = new_input;
                additional_headers.push((Cow::Borrowed(name), value));
                continue;
            }

            break;
        }
    }

    let encoding = encoding.unwrap_or(ContentTransferEncoding::SevenBit);
    let (mime_type, subtype, parameters) = mime_type.unwrap_or((
        ContentType::Text,
        Cow::Borrowed("plain"),
        vec![(Cow::Borrowed("charset"), Cow::Borrowed("us-ascii"))]
            .into_iter()
            .collect(),
    ));

    if input.is_empty() {
        return Ok((
            input.len(),
            (
                encoding,
                mime_type,
                subtype,
                parameters,
                additional_headers,
                id,
                description,
                disposition,
            ),
        ));
    }

    let (input, _) = tag(&input, b"\r\n")?;

    Ok((
        input.len(),
        (
            encoding,
            mime_type,
            subtype,
            parameters,
            additional_headers,
            id,
            description,
            disposition,
        ),
    ))
}

pub fn decode_value<'a>(
    value: Cow<'a, [u8]>,
    encoding: ContentTransferEncoding,
) -> Result<Cow<'a, [u8]>, Error> {
    Ok(match encoding {
        ContentTransferEncoding::Base64 => {
            Cow::Owned(super::base64::decode_base64(value.into_owned())?)
        }
        ContentTransferEncoding::SevenBit => value, // No need to check, we have to be tolerant
        ContentTransferEncoding::HeightBit => value,
        ContentTransferEncoding::QuotedPrintable => {
            Cow::Owned(super::quoted_printables::decode_qp(value.into_owned()))
        }
        ContentTransferEncoding::Unknown(_) => {
            return Err(Error::Unknown("Unknown format")); // FIXME: Allow user to get this data
        }
        ContentTransferEncoding::Binary => value,
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn raw_entity_test() {
        assert_eq!(
            RawEntity {
                mime_type: ContentType::Text,
                subtype: "plain".into(),
                description: None,
                id: None,
                parameters: vec![(Cow::Borrowed("charset"), Cow::Borrowed("us-ascii"))]
                    .into_iter()
                    .collect(),
                value: Cow::Borrowed(&[84, 101, 120, 116]),
                #[cfg(feature = "content-disposition")]
                disposition: None,
                additional_headers: vec![]
            },
            raw_entity(Cow::Borrowed(b"\r\nText")).unwrap()
        );
        assert_eq!(
            RawEntity {
                mime_type: ContentType::Text,
                subtype: "plain".into(),
                description: None,
                id: None,
                parameters: vec![(Cow::Borrowed("charset"), Cow::Borrowed("us-ascii"))]
                    .into_iter()
                    .collect(),
                value: Cow::Borrowed(&[84, 101, 120, 116]),
                #[cfg(feature = "content-disposition")]
                disposition: None,
                additional_headers: vec![]
            },
            raw_entity(Cow::Owned(b"\r\nText".to_vec())).unwrap()
        );
        assert_eq!(
            RawEntity {
                mime_type: ContentType::Text,
                subtype: "html".into(),
                description: None,
                id: None,
                parameters: vec![(Cow::Borrowed("charset"), Cow::Borrowed("utf-8"))]
                    .into_iter()
                    .collect(),
                value: Cow::Borrowed(&[60, 112, 62, 84, 101, 120, 116, 60, 47, 112, 62]),
                #[cfg(feature = "content-disposition")]
                disposition: None,
                additional_headers: vec![("Unknown".into(), " Test".into())]
            },
            raw_entity(Cow::Owned(
                b"Content-type: text/html; charset=utf-8\r\nUnknown: Test\r\n\r\n<p>Text</p>"
                    .to_vec()
            ))
            .unwrap()
        );

        println!("{:?}", raw_entity(Cow::Borrowed(b"Content-type: multipart/alternative; boundary=\"simple boundary\"\r\n\r\nThis is the preamble.  It is to be ignored, though it\r\nis a handy place for composition agents to include an\r\nexplanatory note to non-MIME conformant readers.\r\n\r\n--simple boundary\r\n\r\nThis is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.\r\n--simple boundary\r\nContent-type: text/plain; charset=us-ascii\r\n\r\nThis is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n\r\n--simple boundary--\r\n\r\nThis is the epilogue.  It is also to be ignored.")).unwrap());
    }

    #[test]
    fn entity_test() {
        assert_eq!(Entity::Text {
            subtype: "html".into(),
            value: "<p>Testé</p>".into()
        }, raw_entity(Cow::Owned(b"Content-type: text/html; charset=utf-8\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n<p>Test=C3=A9</p>".to_vec())).unwrap().parse().unwrap());
    }
}