commonmeta/
author_utils.rs1use serde_json::Value;
2
3use crate::data::Affiliation;
4use crate::utils::{normalize_id, validate_orcid, validate_ror};
5use crate::constants::CONTRIBUTOR_ROLES;
6
7const ORG_HINT_WORDS: &[&str] = &[
8 "University",
9 "College",
10 "Institute",
11 "School",
12 "Center",
13 "Department",
14 "Laboratory",
15 "Library",
16 "Museum",
17 "Foundation",
18 "Society",
19 "Association",
20 "Company",
21 "Corporation",
22 "Collaboration",
23 "Consortium",
24 "Incorporated",
25 "Inc.",
26 "Institut",
27 "Research",
28 "Science",
29 "Team",
30 "Ministry",
31 "Government",
32 "Count",
33 "Reviewers",
34 "Staff",
35 "Lab",
36 "Redaktion",
37 "Group",
38 "area",
39];
40
41pub fn cleanup_author(author: Option<&str>) -> Option<String> {
42 let Some(author) = author else {
43 return None;
44 };
45 let trimmed = author.trim();
46 if trimmed.is_empty() || trimmed.starts_with(',') {
47 return None;
48 }
49
50 let cleaned = trimmed
51 .replace(" - ", "-")
52 .split_whitespace()
53 .collect::<Vec<_>>()
54 .join(" ");
55 if cleaned.is_empty() {
56 None
57 } else {
58 Some(cleaned)
59 }
60}
61
62pub fn to_ror_id(id: Option<&str>) -> Option<String> {
63 let Some(id) = id else {
64 return None;
65 };
66 validate_ror(id).map(|ror| format!("https://ror.org/{}", ror))
67}
68
69pub fn is_personal_name(name: &str) -> bool {
70 if name.contains(';') {
71 return false;
72 }
73
74 if name.split_whitespace().count() == 1 && !name.contains(',') {
75 return false;
76 }
77
78 if ORG_HINT_WORDS.iter().any(|word| name.contains(word)) {
79 return false;
80 }
81
82 if let Some(last) = name.rsplit(", ").next()
83 && matches!(last, "MD" | "PhD" | "BS")
84 {
85 return true;
86 }
87
88 name.contains(',') || name.split_whitespace().count() >= 2
89}
90
91pub fn split_person_name(name: &str) -> (String, String, String) {
92 let name = name.trim();
93 if name.is_empty() {
94 return (String::new(), String::new(), String::new());
95 }
96
97 if let Some(comma) = name.find(',') {
98 let family = name[..comma].trim().to_string();
99 let given = name[comma + 1..].trim().to_string();
100 return (given, family, String::new());
101 }
102
103 if let Some(space) = name.rfind(' ') {
104 let given = name[..space].trim().to_string();
105 let family = name[space + 1..].trim().to_string();
106 if !given.is_empty() && !family.is_empty() {
107 return (given, family, String::new());
108 }
109 }
110
111 (String::new(), String::new(), name.to_string())
112}
113
114pub fn infer_contributor_type(
115 raw_type: &str,
116 id: &str,
117 given_name: &str,
118 family_name: &str,
119 name: &str,
120 via: Option<&str>,
121) -> String {
122 let mut type_ = raw_type.to_string();
123 if type_.ends_with("al") {
124 type_.truncate(type_.len() - 2);
125 }
126
127 if type_.is_empty() && validate_ror(id).is_some() {
128 return "Organization".to_string();
129 }
130 if type_.is_empty() && validate_orcid(id).is_some() {
131 return "Person".to_string();
132 }
133 if type_.is_empty() && (!given_name.is_empty() || !family_name.is_empty()) {
134 return "Person".to_string();
135 }
136 if type_.is_empty() && !name.is_empty() && via == Some("crossref") {
137 return "Organization".to_string();
138 }
139 if type_.is_empty() && is_personal_name(name) {
140 return "Person".to_string();
141 }
142 if type_.is_empty() && !name.is_empty() {
143 return "Organization".to_string();
144 }
145 type_
146}
147
148pub fn normalize_contributor_roles(raw_roles: &[String], default_role: &str) -> Vec<String> {
149 let filtered: Vec<String> = raw_roles
150 .iter()
151 .filter(|r| CONTRIBUTOR_ROLES.contains(&r.as_str()))
152 .cloned()
153 .collect();
154 if filtered.is_empty() {
155 vec![default_role.to_string()]
156 } else {
157 filtered
158 }
159}
160
161pub fn parse_affiliation_value(v: &Value) -> Option<Affiliation> {
162 if let Some(name) = v.as_str() {
163 if name.is_empty() {
164 return None;
165 }
166 return Some(Affiliation {
167 name: name.to_string(),
168 ..Default::default()
169 });
170 }
171
172 let obj = v.as_object()?;
173 let mut affiliation_identifier = String::new();
174 let name = obj
175 .get("name")
176 .and_then(Value::as_str)
177 .or_else(|| obj.get("#text").and_then(Value::as_str))
178 .unwrap_or("")
179 .to_string();
180
181 if let Some(raw_aff_id) = obj.get("affiliationIdentifier").and_then(Value::as_str) {
182 let normalized = if !raw_aff_id.starts_with("https://") {
183 if let Some(scheme_uri) = obj.get("schemeURI").and_then(Value::as_str) {
184 let normalized_scheme = if scheme_uri.ends_with('/') {
185 scheme_uri.to_string()
186 } else {
187 format!("{}/", scheme_uri)
188 };
189 normalize_id(&format!("{}{}", normalized_scheme, raw_aff_id))
190 } else {
191 normalize_id(raw_aff_id)
192 }
193 } else {
194 normalize_id(raw_aff_id)
195 };
196 affiliation_identifier = normalized;
197 } else if let Some(id_val) = obj
198 .get("id")
199 .and_then(Value::as_str)
200 .or_else(|| obj.get("@id").and_then(Value::as_str))
201 {
202 if id_val.starts_with("http://") || id_val.starts_with("https://") {
203 affiliation_identifier = id_val.to_string();
204 } else if let Some(ror) = to_ror_id(Some(id_val)) {
205 affiliation_identifier = ror;
207 }
208 } else if let Some(same_as) = obj.get("sameAs").and_then(Value::as_str)
209 && (same_as.starts_with("http://") || same_as.starts_with("https://"))
210 {
211 affiliation_identifier = same_as.to_string();
212 }
213
214 if name.is_empty() && affiliation_identifier.is_empty() {
215 return None;
216 }
217
218 let id = to_ror_id(Some(&affiliation_identifier)).unwrap_or_default();
219 if id.is_empty() && name.is_empty() {
220 return None;
221 }
222 Some(Affiliation {
223 id,
224 name,
225 ..Default::default()
226 })
227}
228
229pub fn parse_affiliations(values: &[Value]) -> Vec<Affiliation> {
230 let mut out = Vec::new();
231 for value in values {
232 if let Some(aff) = parse_affiliation_value(value) {
233 let duplicate = out
234 .iter()
235 .any(|existing: &Affiliation| existing.id == aff.id && existing.name == aff.name);
236 if !duplicate {
237 out.push(aff);
238 }
239 }
240 }
241 out
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247
248 #[test]
249 fn detects_personal_name() {
250 assert!(is_personal_name("Doe, Jane"));
251 assert!(is_personal_name("Jane Doe"));
252 assert!(!is_personal_name("Big Science Collaboration"));
253 }
254
255 #[test]
256 fn infers_type_from_orcid_and_ror() {
257 assert_eq!(
258 infer_contributor_type("", "https://orcid.org/0000-0001-5000-0007", "", "", "", None),
259 "Person"
260 );
261 assert_eq!(
262 infer_contributor_type("", "https://ror.org/05dxps055", "", "", "", None),
263 "Organization"
264 );
265 }
266
267 #[test]
268 fn parses_affiliation_from_string_and_object() {
269 let values = vec![
270 Value::String("Example University".to_string()),
271 serde_json::json!({"id": "https://ror.org/05dxps055", "name": "Example University"}),
272 ];
273 let affiliations = parse_affiliations(&values);
274 assert_eq!(affiliations.len(), 2);
275 assert_eq!(affiliations[0].name, "Example University");
276 assert_eq!(affiliations[1].id, "https://ror.org/05dxps055");
277 }
278}