mailparse 0.9.0

A simple parser for MIME e-mail messages
Documentation
#[derive(Debug, PartialEq)]
pub struct SingleInfo {
    pub display_name: Option<String>,
    pub addr: String,
}

impl SingleInfo {
    fn new(name: Option<String>, addr: String) -> Self {
        SingleInfo {
            display_name: name,
            addr: addr,
        }
    }
}

#[derive(Debug, PartialEq)]
pub struct GroupInfo {
    pub group_name: String,
    pub addrs: Vec<SingleInfo>,
}

impl GroupInfo {
    fn new(name: String, addrs: Vec<SingleInfo>) -> Self {
        GroupInfo {
            group_name: name,
            addrs: addrs,
        }
    }
}

#[derive(Debug, PartialEq)]
pub enum MailAddr {
    Group(GroupInfo),
    Single(SingleInfo),
}

#[derive(Debug)]
enum AddrParseState {
    Initial,
    QuotedName,
    EscapedChar,
    AfterQuotedName,
    BracketedAddr,
    AfterBracketedAddr,
    Unquoted,
    TrailerComment,
}

pub fn addrparse(addrs: &str) -> Result<Vec<MailAddr>, &'static str> {
    let mut it = addrs.chars();
    addrparse_inner(&mut it, false)
}

fn addrparse_inner(it: &mut std::str::Chars, in_group: bool) -> Result<Vec<MailAddr>, &'static str> {
    let mut result = vec![];
    let mut state = AddrParseState::Initial;

    let mut c = match it.next() {
        None => return Ok(vec![]),
        Some(v) => v,
    };

    let mut name = None;
    let mut addr = None;
    let mut post_quote_ws = None;

    loop {
        match state {
            AddrParseState::Initial => {
                if c.is_whitespace() {
                    // continue in same state
                } else if c == '"' {
                    state = AddrParseState::QuotedName;
                    name = Some(String::new());
                } else if c == '<' {
                    state = AddrParseState::BracketedAddr;
                    addr = Some(String::new());
                } else if c == ';' {
                    if !in_group {
                        return Err("Unexpected group terminator found in initial list");
                    }
                    return Ok(result);
                } else {
                    state = AddrParseState::Unquoted;
                    addr = Some(String::new());
                    addr.as_mut().unwrap().push(c);
                }
            }
            AddrParseState::QuotedName => {
                if c == '\\' {
                    state = AddrParseState::EscapedChar;
                } else if c == '"' {
                    state = AddrParseState::AfterQuotedName;
                } else {
                    name.as_mut().unwrap().push(c);
                }
            }
            AddrParseState::EscapedChar => {
                state = AddrParseState::QuotedName;
                name.as_mut().unwrap().push(c);
            }
            AddrParseState::AfterQuotedName => {
                if c.is_whitespace() {
                    if post_quote_ws.is_none() {
                        post_quote_ws = Some(String::new());
                    }
                    post_quote_ws.as_mut().unwrap().push(c);
                } else if c == '<' {
                    state = AddrParseState::BracketedAddr;
                    addr = Some(String::new());
                } else if c == ':' {
                    if in_group {
                        return Err("Found unexpected nested group");
                    }
                    let group_addrs = try!(addrparse_inner(it, true));
                    state = AddrParseState::Initial;
                    result.push(MailAddr::Group(GroupInfo::new(
                        name.unwrap(),
                        group_addrs.into_iter().map(|addr| {
                            match addr {
                                MailAddr::Single(s) => s,
                                MailAddr::Group(_) => panic!("Unexpected nested group encountered"),
                            }
                        }).collect()
                    )));
                    name = None;
                } else {
                    // I think technically not valid, but this occurs in real-world corpus, so
                    // handle gracefully
                    if c == '"' {
                        post_quote_ws.map(|ws| name.as_mut().unwrap().push_str(&ws));
                        state = AddrParseState::QuotedName;
                    } else {
                        post_quote_ws.map(|ws| name.as_mut().unwrap().push_str(&ws));
                        name.as_mut().unwrap().push(c);
                    }
                    post_quote_ws = None;
                }
            }
            AddrParseState::BracketedAddr => {
                if c == '>' {
                    state = AddrParseState::AfterBracketedAddr;
                    result.push(MailAddr::Single(SingleInfo::new(name, addr.unwrap())));
                    name = None;
                    addr = None;
                } else {
                    addr.as_mut().unwrap().push(c);
                }
            }
            AddrParseState::AfterBracketedAddr => {
                if c.is_whitespace() {
                    // continue in same state
                } else if c == ',' {
                    state = AddrParseState::Initial;
                } else if c == ';' {
                    if in_group {
                        return Ok(result);
                    }
                    // Technically not valid, but a similar case occurs in real-world corpus, so handle it gracefully
                    state = AddrParseState::Initial;
                } else if c == '(' {
                    state = AddrParseState::TrailerComment;
                } else {
                    return Err("Unexpected char found after bracketed address");
                }
            }
            AddrParseState::Unquoted => {
                if c == '<' {
                    state = AddrParseState::BracketedAddr;
                    name = addr.map(|s| s.trim_end().to_owned());
                    addr = Some(String::new());
                } else if c == ',' {
                    state = AddrParseState::Initial;
                    result.push(MailAddr::Single(SingleInfo::new(None, addr.unwrap().trim_end().to_owned())));
                    addr = None;
                } else if c == ';' {
                    result.push(MailAddr::Single(SingleInfo::new(None, addr.unwrap().trim_end().to_owned())));
                    if in_group {
                        return Ok(result);
                    }
                    // Technically not valid, but occurs in real-world corpus, so handle it gracefully
                    state = AddrParseState::Initial;
                    addr = None;
                } else if c == ':' {
                    if in_group {
                        return Err("Found unexpected nested group");
                    }
                    let group_addrs = try!(addrparse_inner(it, true));
                    state = AddrParseState::Initial;
                    result.push(MailAddr::Group(GroupInfo::new(
                        addr.unwrap().trim_end().to_owned(),
                        group_addrs.into_iter().map(|addr| {
                            match addr {
                                MailAddr::Single(s) => s,
                                MailAddr::Group(_) => panic!("Unexpected nested group encountered"),
                            }
                        }).collect()
                    )));
                    addr = None;
                } else {
                    addr.as_mut().unwrap().push(c);
                }
            }
            AddrParseState::TrailerComment => {
                if c == ')' {
                    state = AddrParseState::AfterBracketedAddr;
                }
            }
        }

        c = match it.next() {
            None => break,
            Some(v) => v,
        };
    }

    if in_group {
        return Err("Found unterminated group address");
    }

    match state {
        AddrParseState::QuotedName |
        AddrParseState::EscapedChar |
        AddrParseState::AfterQuotedName |
        AddrParseState::BracketedAddr |
        AddrParseState::TrailerComment => {
            Err("Address string unexpected terminated")
        }
        AddrParseState::Unquoted => {
            result.push(MailAddr::Single(SingleInfo::new(None, addr.unwrap().trim_end().to_owned())));
            Ok(result)
        }
        _ => {
            Ok(result)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_basic() {
        assert_eq!(
            addrparse("foo bar <foo@bar.com>").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()))]
        );
        assert_eq!(
            addrparse("\"foo bar\" <foo@bar.com>").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()))]
        );
        assert_eq!(
            addrparse("foo@bar.com ").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(None, "foo@bar.com".to_string()))]
        );
        assert_eq!(
            addrparse("foo <bar>").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("foo".to_string()), "bar".to_string()))]
        );
        assert_eq!(
            addrparse("\"foo\" <bar>").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("foo".to_string()), "bar".to_string()))]
        );
        assert_eq!(
            addrparse("\"foo \" <bar>").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("foo ".to_string()), "bar".to_string()))]
        );
    }

    #[test]
    fn parse_backslashes() {
        assert_eq!(
            addrparse(r#" "First \"nick\" Last" <user@host.tld> "#).unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("First \"nick\" Last".to_string()), "user@host.tld".to_string()))]
        );
        assert_eq!(
            addrparse(r#" First \"nick\" Last <user@host.tld> "#).unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("First \\\"nick\\\" Last".to_string()), "user@host.tld".to_string()))]
        );
    }

    #[test]
    fn parse_multi() {
        assert_eq!(
            addrparse("foo <bar>, joe, baz <quux>").unwrap(),
            vec![
                MailAddr::Single(SingleInfo::new(Some("foo".to_string()), "bar".to_string())),
                MailAddr::Single(SingleInfo::new(None, "joe".to_string())),
                MailAddr::Single(SingleInfo::new(Some("baz".to_string()), "quux".to_string())),
            ]
        );
    }

    #[test]
    fn parse_empty_group() {
        assert_eq!(
            addrparse("empty-group:;").unwrap(),
            vec![MailAddr::Group(GroupInfo::new("empty-group".to_string(), vec![]))]
        );
        assert_eq!(
            addrparse(" empty-group : ; ").unwrap(),
            vec![MailAddr::Group(GroupInfo::new("empty-group".to_string(), vec![]))]
        );
    }

    #[test]
    fn parse_simple_group() {
        assert_eq!(
            addrparse("bar-group: foo <foo@bar.com>;").unwrap(),
            vec![MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
                SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
            ]))]
        );
        assert_eq!(
            addrparse("bar-group: foo <foo@bar.com>, baz@bar.com;").unwrap(),
            vec![MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
                SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
                SingleInfo::new(None, "baz@bar.com".to_string()),
            ]))]
        );
    }

    #[test]
    fn parse_mixed() {
        assert_eq!(
            addrparse("joe@bloe.com, bar-group: foo <foo@bar.com>;").unwrap(),
            vec![
                MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
                MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
                    SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
                ])),
            ]
        );
        assert_eq!(
            addrparse("bar-group: foo <foo@bar.com>; joe@bloe.com").unwrap(),
            vec![
                MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
                    SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
                ])),
                MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
            ]
        );
        assert_eq!(
            addrparse("flim@flam.com, bar-group: foo <foo@bar.com>; joe@bloe.com").unwrap(),
            vec![
                MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string())),
                MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
                    SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
                ])),
                MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
            ]
        );
        assert_eq!(
            addrparse("first-group:; flim@flam.com, bar-group: foo <foo@bar.com>; joe@bloe.com, final-group: zip, zap, \"Zaphod\" <zaphod@beeblebrox>;").unwrap(),
            vec![
                MailAddr::Group(GroupInfo::new("first-group".to_string(), vec![])),
                MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string())),
                MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
                    SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
                ])),
                MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
                MailAddr::Group(GroupInfo::new("final-group".to_string(), vec![
                    SingleInfo::new(None, "zip".to_string()),
                    SingleInfo::new(None, "zap".to_string()),
                    SingleInfo::new(Some("Zaphod".to_string()), "zaphod@beeblebrox".to_string()),
                ])),
            ]
        );
    }

    #[test]
    fn real_world_examples() {
        // taken from a real "From" header. This might not be valid according to the RFC
        // but obviously made it through the internet so we should at least not crash.
        assert_eq!(
            addrparse("\"The Foo of Bar\" Course Staff <foo-no-reply@bar.edx.org>").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("The Foo of Bar Course Staff".to_string()), "foo-no-reply@bar.edx.org".to_string()))]
        );

        // This one has a comment tacked on to the end. Adding proper support for comments seems
        // complicated so I just added trailer comment support.
        assert_eq!(
            addrparse("John Doe <support@github.com> (GitHub Staff)").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(Some("John Doe".to_string()), "support@github.com".to_string()))]
        );

        // Taken from a real world "To" header. It was spam, but still...
        assert_eq!(
            addrparse("foo@bar.com;").unwrap(),
            vec![MailAddr::Single(SingleInfo::new(None, "foo@bar.com".to_string()))]
        );
    }
}