use lazy_static::lazy_static;
use regex::Regex;
use reqwest::Client;
use std::error::Error;
use std::string::ToString;
use std::time::Duration;
use url::Url;
pub fn prefix_from_url(s: &str) -> Result<String, Box<dyn Error>> {
let url = Url::parse(s)?;
if url.host_str() != Some("doi.org") || !url.path().starts_with("/10.") {
return Ok(String::new());
}
let path: Vec<&str> = url.path().split('/').collect();
if path.len() < 2 {
return Ok(String::new());
}
Ok(path[1].to_string())
}
pub fn normalize_doi(doi: &str) -> String {
if let Some(doi_str) = validate_doi(doi) {
let resolver = doi_resolver(doi, false);
return format!("{}{}", resolver, doi_str.to_lowercase());
}
String::new()
}
pub fn validate_doi(doi: &str) -> Option<String> {
lazy_static! {
static ref DOI_REGEX: Regex = Regex::new(
r"^(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org|handle\.test\.datacite\.org)/)?(doi:)?(10\.\d{4,5}/[^\s]+)$"
).unwrap();
}
if let Some(captures) = DOI_REGEX.captures(doi) {
return captures.get(6).map(|m| m.as_str().to_string());
}
None
}
pub fn escape_doi(doi: &str) -> String {
if let Some(doi_str) = validate_doi(doi) {
return doi_str.replace("/", "%2F");
}
String::new()
}
pub fn encode_doi(prefix: &str) -> String {
let suffix = crate::crockford::generate(10, 5, true);
let doi = format!("https://doi.org/{}/{}", prefix, suffix);
doi
}
pub fn decode_doi(doi: &str) -> i64 {
if let Some(d) = validate_doi(doi) {
let parts: Vec<&str> = d.split('/').collect();
if parts.len() < 2 {
return 0;
}
let suffix = parts[1];
match crate::crockford::decode(suffix, true) {
Ok(number) => return number,
Err(e) => {
eprintln!("Error decoding DOI suffix: {}", e);
return 0;
}
}
}
0
}
pub async fn is_registered_doi(doi: &str) -> bool {
let url = normalize_doi(doi);
if url.is_empty() {
return false;
}
let client = Client::builder()
.timeout(Duration::from_secs(10))
.build()
.unwrap_or_default();
match client.head(&url).send().await {
Ok(resp) => resp.status().as_u16() <= 308,
Err(_) => false,
}
}
pub fn validate_prefix(doi: &str) -> Option<String> {
lazy_static! {
static ref PREFIX_REGEX: Regex = Regex::new(
r"^(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org|handle\.test\.datacite\.org)/)?(doi:)?(10\.\d{4,5})"
).unwrap();
}
if let Some(captures) = PREFIX_REGEX.captures(doi) {
return captures.get(6).map(|m| m.as_str().to_string());
}
None
}
pub fn doi_resolver(doi: &str, sandbox: bool) -> String {
if let Ok(d) = Url::parse(doi)
&& (d.host_str() == Some("stage.datacite.org") || sandbox) {
return "https://handle.stage.datacite.org/".to_string();
}
"https://doi.org/".to_string()
}
pub fn get_doi_ra_sync(doi: &str) -> Option<String> {
let prefix = validate_prefix(doi)?;
let url = format!("https://doi.org/ra/{}", prefix);
#[derive(serde::Deserialize)]
struct RaEntry {
#[serde(rename = "RA", default)]
ra: String,
}
let client = reqwest::blocking::Client::builder()
.timeout(std::time::Duration::from_secs(10))
.build()
.ok()?;
let entries: Vec<RaEntry> = client.get(&url).send().ok()?.json().ok()?;
let ra = entries.into_iter().next()?.ra;
if ra.is_empty() { None } else { Some(ra) }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_doi_parity_cases() {
let cases = [
("10.7554/elife.01567", Some("10.7554/elife.01567")),
(
"https://doi.org/10.7554/elife.01567",
Some("10.7554/elife.01567"),
),
("https://doi.org/10.7554", None),
("10.7554", None),
("10.3201/eid1503.081203 10.1083/jcb.1843iti1", None),
("", None),
];
for (input, expected) in cases {
assert_eq!(validate_doi(input).as_deref(), expected, "input: {input}");
}
}
#[test]
fn test_validate_prefix_parity_cases() {
let cases = [
("10.7554/elife.01567", Some("10.7554")),
("https://doi.org/10.7554/elife.01567", Some("10.7554")),
("https://doi.org/10.7554", Some("10.7554")),
("10.7554", Some("10.7554")),
("", None),
];
for (input, expected) in cases {
assert_eq!(validate_prefix(input).as_deref(), expected, "input: {input}");
}
}
#[test]
fn test_normalize_and_escape_doi() {
assert_eq!(
normalize_doi("10.7554/eLife.01567"),
"https://doi.org/10.7554/elife.01567"
);
assert_eq!(
escape_doi("https://doi.org/10.7554/elife.01567"),
"10.7554%2Felife.01567"
);
assert_eq!(normalize_doi("not-a-doi"), "");
assert_eq!(escape_doi("not-a-doi"), "");
}
#[test]
fn test_prefix_from_url() {
assert_eq!(
prefix_from_url("https://doi.org/10.7554/elife.01567").ok(),
Some("10.7554".to_string())
);
assert_eq!(
prefix_from_url("https://example.org/10.7554/elife.01567").ok(),
Some("".to_string())
);
}
}