cargo_author 1.0.7

Parse author fields (name <email> (url)) used by Cargo and npm
Documentation
//! Extract structured information from `authors` fields used in `Cargo.toml`
//!
//! It's tweaked to handle actual odd/broken cases found in Cargo crates.
//!
//! Strings are normalized to Unicode NFC, since some tools tend to insert decomposed names that expose rendering bugs in software.
#![allow(clippy::needless_raw_string_hashes)]

use regex::{Captures, Regex};
use std::str::FromStr;
use unicode_normalization::UnicodeNormalization;

#[derive(Debug, Clone)]
pub struct Author {
    pub name: Option<String>,
    pub email: Option<String>,
    pub url: Option<String>,
}

impl Author {
    /// Parse the given string.
    ///
    /// This function can't fail, because anything unrecognizable is assumed to be the name.
    #[must_use]
    pub fn new(s: &str) -> Self {
        static REGEXES: std::sync::OnceLock<(Regex,Regex,Regex,Regex)> = std::sync::OnceLock::new();
        let (qname_regex, email_regex, raw_email_regex, url_regex) = REGEXES.get_or_init(|| (
            Regex::new(r#"^\s*"([^"]+)""#).unwrap(),
            Regex::new(r#"<([^>@]*@[^>]+)(?:>|$)"#).unwrap(), // Also fix "Foo <bar@baz"
            Regex::new(r#"(?:^|\p{P})([^()>,@\s]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+)(?:$|\p{P})"#).unwrap(),
            Regex::new(r#"\((http[^)]+)\)|https?://[^<>()"\s]+"#).unwrap(),
        ));

        let mut name = None;
        let rest = qname_regex.replacen(s, 1, |m: &Captures<'_>| {
            name = Some(nfc(&m[1])).filter(|n| !n.trim_start().is_empty());
            String::new()
        });

        let mut url = None;
        let rest = url_regex.replacen(&rest, 1, |m: &Captures<'_>| {
            url = m.get(1).or_else(|| m.get(0)).map(|m| m.as_str().trim().to_owned());
            String::new()
        });

        let mut email = None;
        let rest = email_regex.replacen(&rest, 1, |m: &Captures<'_>| {
            email = Some(m[1].trim().to_owned());
            String::new()
        });

        let rest = if email.is_none() {
            raw_email_regex.replacen(&rest, 1, |m: &Captures<'_>| {
                email = Some(m[1].trim().to_owned());
                String::new()
            })
        } else {rest};

        let rest = if let Some(s) = rest.strip_suffix(" <>") { s.into() } else { rest };

        Author {
            name: name.or_else(|| {
                let rest = rest.trim();
                if !rest.is_empty() {
                    Some(nfc(rest))
                } else {
                    None
                }
            }),
            email,
            url,
        }
    }
}

#[inline(never)]
fn nfc(s: &str) -> String {
    s.nfc().collect()
}

impl FromStr for Author {
    type Err = std::convert::Infallible;

    #[inline]
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(Author::new(s))
    }
}

#[test]
fn author_parse() {
    let tmp = Author::new("Foo Bar-Baz! (foo.bar.baz@gmail.com)");
    assert_eq!(tmp.email, Some("foo.bar.baz@gmail.com".to_owned()));
    assert_eq!(tmp.name, Some("Foo Bar-Baz!".to_owned()));

    assert_eq!(Author::new("raw@email.com").email, Some("raw@email.com".to_owned()));
    assert_eq!(Author::new("raw@email.com").name, None);
    assert_eq!(Author::new("contact@company.com").name, None);
    assert_eq!(Author::new("stupid sufix <>").name.unwrap(), "stupid sufix");
    assert_eq!(Author::new("eh <@badmail>").email.unwrap(), "@badmail");
    assert_eq!(Author::new("not raw@email").name.unwrap(), "not raw@email");
    assert_ne!("A\u{0308}ffin", "Äffin");
    assert_eq!(Author::new("A\u{0308}ffin").name.unwrap(), "Äffin");

    for s in &[
        "\"Name\" <ema@il> (https://url)",
        "Name <ema@il> (https://url)",
        "Name <ema@il>(https://url)",
        " Name<ema@il> (https://url)",
        "  Name<ema@il>(https://url)",
        "  Name<ema@il>(https://url )",
        "Name (https://url) <ema@il>   ",
        "Name (https://url)<ema@il> ",
        "Name(https://url) <ema@il>",
        "Name(https://url) <ema@il",
        "Name(https://url)<ema@il>",
        " \"Name\" <ema@il>  https://url ",
        " Name <ema@il>  https://url",
        " Name <ema@il> https://url ",
        "Name<ema@il>  https://url",
        "Name<ema@il> https://url ",
        " Name  https://url  <ema@il>   ",
        " Name  https://url <ema@il> ",
        " Name https://url  <ema@il>",
        " Name https://url <ema@il>",
        " Name https://url < ema@il >",
    ] {
        let a = Author::new(s);
        assert_eq!("Name", a.name.expect(s));
        assert_eq!("ema@il", a.email.expect(s));
        assert_eq!("https://url", a.url.expect(s));
    }
}