#![allow(clippy::needless_raw_string_hashes)]
use regex::{Captures, Regex};
use std::str::FromStr;
use unicode_normalization::UnicodeNormalization;
#[derive(Debug, Clone)]
pub struct Author {
pub name: Option<String>,
pub email: Option<String>,
pub url: Option<String>,
}
impl Author {
#[must_use]
pub fn new(s: &str) -> Self {
static REGEXES: std::sync::OnceLock<(Regex,Regex,Regex,Regex)> = std::sync::OnceLock::new();
let (qname_regex, email_regex, raw_email_regex, url_regex) = REGEXES.get_or_init(|| (
Regex::new(r#"^\s*"([^"]+)""#).unwrap(),
Regex::new(r#"<([^>@]*@[^>]+)(?:>|$)"#).unwrap(), Regex::new(r#"(?:^|\p{P})([^()>,@\s]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+)(?:$|\p{P})"#).unwrap(),
Regex::new(r#"\((http[^)]+)\)|https?://[^<>()"\s]+"#).unwrap(),
));
let mut name = None;
let rest = qname_regex.replacen(s, 1, |m: &Captures<'_>| {
name = Some(nfc(&m[1])).filter(|n| !n.trim_start().is_empty());
String::new()
});
let mut url = None;
let rest = url_regex.replacen(&rest, 1, |m: &Captures<'_>| {
url = m.get(1).or_else(|| m.get(0)).map(|m| m.as_str().trim().to_owned());
String::new()
});
let mut email = None;
let rest = email_regex.replacen(&rest, 1, |m: &Captures<'_>| {
email = Some(m[1].trim().to_owned());
String::new()
});
let rest = if email.is_none() {
raw_email_regex.replacen(&rest, 1, |m: &Captures<'_>| {
email = Some(m[1].trim().to_owned());
String::new()
})
} else {rest};
let rest = if let Some(s) = rest.strip_suffix(" <>") { s.into() } else { rest };
Author {
name: name.or_else(|| {
let rest = rest.trim();
if !rest.is_empty() {
Some(nfc(rest))
} else {
None
}
}),
email,
url,
}
}
}
#[inline(never)]
fn nfc(s: &str) -> String {
s.nfc().collect()
}
impl FromStr for Author {
type Err = std::convert::Infallible;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Author::new(s))
}
}
#[test]
fn author_parse() {
let tmp = Author::new("Foo Bar-Baz! (foo.bar.baz@gmail.com)");
assert_eq!(tmp.email, Some("foo.bar.baz@gmail.com".to_owned()));
assert_eq!(tmp.name, Some("Foo Bar-Baz!".to_owned()));
assert_eq!(Author::new("raw@email.com").email, Some("raw@email.com".to_owned()));
assert_eq!(Author::new("raw@email.com").name, None);
assert_eq!(Author::new("contact@company.com").name, None);
assert_eq!(Author::new("stupid sufix <>").name.unwrap(), "stupid sufix");
assert_eq!(Author::new("eh <@badmail>").email.unwrap(), "@badmail");
assert_eq!(Author::new("not raw@email").name.unwrap(), "not raw@email");
assert_ne!("A\u{0308}ffin", "Äffin");
assert_eq!(Author::new("A\u{0308}ffin").name.unwrap(), "Äffin");
for s in &[
"\"Name\" <ema@il> (https://url)",
"Name <ema@il> (https://url)",
"Name <ema@il>(https://url)",
" Name<ema@il> (https://url)",
" Name<ema@il>(https://url)",
" Name<ema@il>(https://url )",
"Name (https://url) <ema@il> ",
"Name (https://url)<ema@il> ",
"Name(https://url) <ema@il>",
"Name(https://url) <ema@il",
"Name(https://url)<ema@il>",
" \"Name\" <ema@il> https://url ",
" Name <ema@il> https://url",
" Name <ema@il> https://url ",
"Name<ema@il> https://url",
"Name<ema@il> https://url ",
" Name https://url <ema@il> ",
" Name https://url <ema@il> ",
" Name https://url <ema@il>",
" Name https://url <ema@il>",
" Name https://url < ema@il >",
] {
let a = Author::new(s);
assert_eq!("Name", a.name.expect(s));
assert_eq!("ema@il", a.email.expect(s));
assert_eq!("https://url", a.url.expect(s));
}
}