use anyhow::Result;
use regex::Regex;
use std::path::Path;
pub fn extract_doi(path: &Path) -> Result<Option<String>> {
let bytes = std::fs::read(path)?;
let content = String::from_utf8_lossy(&bytes);
let patterns = [
r#"(?i)doi[:\s]+(['"]?)(10\.\d{4,}/[^\s\x00-\x1f\)\]>"']+)"#,
r#"https?://(?:dx\.)?doi\.org/(10\.\d{4,}/[^\s\x00-\x1f\)\]>"']+)"#,
r#"\b(10\.\d{4,}/[^\s\x00-\x1f\)\]>"',;]+)"#,
];
for pattern in &patterns {
let re = Regex::new(pattern)?;
if let Some(caps) = re.captures(&content) {
let doi = caps
.get(caps.len() - 1)
.map(|m| m.as_str())
.unwrap_or("")
.trim_end_matches(['.', ',', ';', ')', ']', '>'])
.to_string();
if !doi.is_empty() && doi.starts_with("10.") {
return Ok(Some(doi));
}
}
}
Ok(None)
}