1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
//! Extract structured information from `authors` fields used in `Cargo.toml`
//!
//! It's tweaked to handle actual odd/broken cases found in Cargo crates.
//!
//! Strings are normalized to Unicode NFC, since some tools tend to insert decomposed names that expose rendering bugs in software.

extern crate unicode_normalization;
extern crate regex;
#[macro_use] extern crate lazy_static;

use regex::{Regex, Captures};
use unicode_normalization::UnicodeNormalization;

#[derive(Debug, Clone)]
pub struct Author {
    pub name: Option<String>,
    pub email: Option<String>,
    pub url: Option<String>,
}

impl Author {
    /// Parse the given string.
    ///
    /// This function can't fail, because anything unrecognizable is assumed to be the name.
    pub fn new(s: &str) -> Self {
        lazy_static! {
            static ref QNAME: Regex = Regex::new(r#"^\s*"([^"]+)""#).unwrap();
            static ref EMAIL: Regex = Regex::new(r#"<([^>@]+@[^>]+)(?:>|$)"#).unwrap(); // Also fix "Foo <bar@baz"
            static ref RAWEMAIL: Regex = Regex::new(r#"^[^>@\s]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+$"#).unwrap();
            static ref URL: Regex = Regex::new(r#"\((http[^)]+)\)|https?://[^<>()"\s]+"#).unwrap();
        }

        let s = s.nfc().collect::<String>();
        let mut name = None;
        let rest = QNAME.replacen(&s, 1, |m: &Captures| {
            name = Some(m[1].to_owned());
            String::new()
        });

        let mut url = None;
        let rest = URL.replacen(&rest, 1, |m: &Captures| {
            url = m.get(1).or_else(|| m.get(0)).map(|m| m.as_str().to_owned());
            String::new()
        });

        let mut email = None;
        let rest = EMAIL.replacen(&rest, 1, |m: &Captures| {
            email = Some(m[1].to_owned());
            String::new()
        });

        let rest = if email.is_none() {
            RAWEMAIL.replacen(&rest, 1, |m: &Captures| {
                email = Some(m[0].to_owned());
                "".to_string()
            })
        } else {rest};

        let rest = if rest.ends_with(" <>") {
            rest[..rest.len()-3].into()
        } else {rest};

        Author {
            name: name.or_else(|| {
                let rest = rest.trim();
                if rest.len() > 0 {Some(rest.to_string())} else {None}
            }),
            email,
            url,
        }
    }
}

#[test]
fn author_parse() {
    assert_eq!(Author::new("raw@email.com").email, Some("raw@email.com".to_owned()));
    assert_eq!(Author::new("raw@email.com").name, None);
    assert_eq!(Author::new("contact@company.com").name, None);
    assert_eq!(Author::new("stupid sufix <>").name.unwrap(), "stupid sufix");
    assert_eq!(Author::new("not raw@email").name.unwrap(), "not raw@email");
    assert_ne!("A\u{0308}ffin", "Äffin");
    assert_eq!(Author::new("A\u{0308}ffin").name.unwrap(), "Äffin");

    for s in &["\"Name\" <ema@il> (https://url)",
                "Name <ema@il> (https://url)",
                "Name <ema@il>(https://url)",
                " Name<ema@il> (https://url)",
                "  Name<ema@il>(https://url)",
                "Name (https://url) <ema@il>   ",
                "Name (https://url)<ema@il> ",
                "Name(https://url) <ema@il>",
                "Name(https://url) <ema@il",
                "Name(https://url)<ema@il>",
                " \"Name\" <ema@il>  https://url ",
                " Name <ema@il>  https://url",
                " Name <ema@il> https://url ",
                "Name<ema@il>  https://url",
                "Name<ema@il> https://url ",
                " Name  https://url  <ema@il>   ",
                " Name  https://url <ema@il> ",
                " Name https://url  <ema@il>",
                " Name https://url <ema@il>",
                ] {
        let a = Author::new(s);
        assert_eq!("Name", a.name.expect(s));
        assert_eq!("ema@il", a.email.expect(s));
        assert_eq!("https://url", a.url.expect(s));
    }
}