1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
extern crate unicode_normalization;
extern crate regex;
#[macro_use] extern crate lazy_static;
use regex::{Regex, Captures};
use unicode_normalization::UnicodeNormalization;
#[derive(Debug, Clone)]
pub struct Author {
pub name: Option<String>,
pub email: Option<String>,
pub url: Option<String>,
}
impl Author {
pub fn new(s: &str) -> Self {
lazy_static! {
static ref QNAME: Regex = Regex::new(r#"^\s*"([^"]+)""#).unwrap();
static ref EMAIL: Regex = Regex::new(r#"<([^>@]+@[^>]+)(?:>|$)"#).unwrap();
static ref RAWEMAIL: Regex = Regex::new(r#"^[^>@\s]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+$"#).unwrap();
static ref URL: Regex = Regex::new(r#"\((http[^)]+)\)|https?://[^<>()"\s]+"#).unwrap();
}
let s = s.nfc().collect::<String>();
let mut name = None;
let rest = QNAME.replacen(&s, 1, |m: &Captures| {
name = Some(m[1].to_owned());
String::new()
});
let mut url = None;
let rest = URL.replacen(&rest, 1, |m: &Captures| {
url = m.get(1).or_else(|| m.get(0)).map(|m| m.as_str().to_owned());
String::new()
});
let mut email = None;
let rest = EMAIL.replacen(&rest, 1, |m: &Captures| {
email = Some(m[1].to_owned());
String::new()
});
let rest = if email.is_none() {
RAWEMAIL.replacen(&rest, 1, |m: &Captures| {
email = Some(m[0].to_owned());
"".to_string()
})
} else {rest};
let rest = if rest.ends_with(" <>") {
rest[..rest.len()-3].into()
} else {rest};
Author {
name: name.or_else(|| {
let rest = rest.trim();
if rest.len() > 0 {Some(rest.to_string())} else {None}
}),
email,
url,
}
}
}
#[test]
fn author_parse() {
assert_eq!(Author::new("raw@email.com").email, Some("raw@email.com".to_owned()));
assert_eq!(Author::new("raw@email.com").name, None);
assert_eq!(Author::new("contact@company.com").name, None);
assert_eq!(Author::new("stupid sufix <>").name.unwrap(), "stupid sufix");
assert_eq!(Author::new("not raw@email").name.unwrap(), "not raw@email");
assert_ne!("A\u{0308}ffin", "Äffin");
assert_eq!(Author::new("A\u{0308}ffin").name.unwrap(), "Äffin");
for s in &["\"Name\" <ema@il> (https://url)",
"Name <ema@il> (https://url)",
"Name <ema@il>(https://url)",
" Name<ema@il> (https://url)",
" Name<ema@il>(https://url)",
"Name (https://url) <ema@il> ",
"Name (https://url)<ema@il> ",
"Name(https://url) <ema@il>",
"Name(https://url) <ema@il",
"Name(https://url)<ema@il>",
" \"Name\" <ema@il> https://url ",
" Name <ema@il> https://url",
" Name <ema@il> https://url ",
"Name<ema@il> https://url",
"Name<ema@il> https://url ",
" Name https://url <ema@il> ",
" Name https://url <ema@il> ",
" Name https://url <ema@il>",
" Name https://url <ema@il>",
] {
let a = Author::new(s);
assert_eq!("Name", a.name.expect(s));
assert_eq!("ema@il", a.email.expect(s));
assert_eq!("https://url", a.url.expect(s));
}
}