use async_trait::async_trait;
use serde::Deserialize;
use std::sync::Arc;
use crate::models::{Paper, PaperBuilder, SearchQuery, SearchResponse, SourceType};
use crate::sources::{Source, SourceCapabilities, SourceError};
use crate::utils::{api_retry_config, with_retry, HttpClient};
const ZENODO_API_BASE: &str = "https://zenodo.org/api/records";
#[derive(Debug, Clone)]
pub struct ZenodoSource {
client: Arc<HttpClient>,
}
impl ZenodoSource {
pub fn new() -> Result<Self, SourceError> {
Ok(Self {
client: Arc::new(HttpClient::new()?),
})
}
}
impl Default for ZenodoSource {
fn default() -> Self {
Self::new().expect("Failed to create ZenodoSource")
}
}
#[async_trait]
impl Source for ZenodoSource {
fn id(&self) -> &str {
"zenodo"
}
fn name(&self) -> &str {
"Zenodo"
}
fn capabilities(&self) -> SourceCapabilities {
SourceCapabilities::SEARCH | SourceCapabilities::DOI_LOOKUP
}
async fn search(&self, query: &SearchQuery) -> Result<SearchResponse, SourceError> {
let max_results = query.max_results.min(1000);
let url = format!(
"{}?q={}&size={}&type=publication",
ZENODO_API_BASE,
urlencoding::encode(&query.query),
max_results
);
let client = Arc::clone(&self.client);
let url_for_retry = url.clone();
let response = with_retry(api_retry_config(), || {
let client = Arc::clone(&client);
let url = url_for_retry.clone();
async move {
let request = client.get(&url);
let response = request
.send()
.await
.map_err(|e| SourceError::Network(format!("Failed to search Zenodo: {}", e)))?;
if !response.status().is_success() {
let status = response.status();
if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
tracing::debug!("Zenodo API rate-limited - returning empty results");
return Err(SourceError::Api("Zenodo rate-limited".to_string()));
}
let text = response.text().await.unwrap_or_default();
return Err(SourceError::Api(format!(
"Zenodo API returned status {}: {}",
status, text
)));
}
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or_default();
if !content_type.contains("application/json") {
tracing::debug!(
"Zenodo returned non-JSON content-type: {} - rate-limited?",
content_type
);
return Err(SourceError::Api("Zenodo rate-limited".to_string()));
}
let json: ZenodoResponse = response.json().await.map_err(|e| {
SourceError::Parse(format!("Failed to parse Zenodo response: {}", e))
})?;
Ok(json)
}
})
.await;
let response = match response {
Ok(r) => r,
Err(SourceError::Api(msg)) if msg.contains("rate-limited") => {
tracing::debug!("Zenodo rate-limited - returning empty results");
return Ok(SearchResponse::new(vec![], "Zenodo", &query.query));
}
Err(e) => return Err(e),
};
let total = response.hits.total.value;
let papers: Result<Vec<Paper>, SourceError> = response
.hits
.hits
.into_iter()
.map(|item| self.parse_result(&item))
.collect();
let papers = papers?;
let mut response = SearchResponse::new(papers, "Zenodo", &query.query);
response.total_results = Some(total);
Ok(response)
}
async fn get_by_doi(&self, doi: &str) -> Result<Paper, SourceError> {
let clean_doi = doi
.replace("https://doi.org/", "")
.replace("doi:", "")
.trim()
.to_string();
let url = format!(
"{}?q=doi:\"{}\"",
ZENODO_API_BASE,
urlencoding::encode(&clean_doi)
);
let client = Arc::clone(&self.client);
let url_for_retry = url.clone();
let response: ZenodoResponse = with_retry(api_retry_config(), || {
let client = Arc::clone(&client);
let url = url_for_retry.clone();
async move {
let request = client.get(&url);
let response = request.send().await.map_err(|e| {
SourceError::Network(format!("Failed to lookup DOI in Zenodo: {}", e))
})?;
if !response.status().is_success() {
return Err(SourceError::NotFound(format!(
"Paper not found in Zenodo: {}",
doi
)));
}
response.json().await.map_err(|e| {
SourceError::Parse(format!("Failed to parse Zenodo response: {}", e))
})
}
})
.await?;
if let Some(hit) = response.hits.hits.into_iter().next() {
self.parse_result(&hit)
} else {
Err(SourceError::NotFound(format!(
"Paper not found in Zenodo: {}",
doi
)))
}
}
}
impl ZenodoSource {
fn parse_result(&self, item: &ZenodoHit) -> Result<Paper, SourceError> {
let id = item.id.to_string();
let title = item.metadata.title.clone().unwrap_or_default();
let abstract_text = item.metadata.description.clone().unwrap_or_default();
let doi = item.metadata.doi.clone().unwrap_or_default();
let authors: String = item
.metadata
.creators
.iter()
.filter_map(|c| c.name.clone())
.collect::<Vec<_>>()
.join("; ");
let year = item.metadata.publication_date.clone().unwrap_or_default();
let url = item.links.html.clone().unwrap_or_else(|| {
if !doi.is_empty() {
format!("https://doi.org/{}", doi)
} else {
format!("https://zenodo.org/record/{}", id)
}
});
Ok(
PaperBuilder::new(id, title, url, SourceType::Other("zenodo".to_string()))
.authors(&authors)
.published_date(&year)
.abstract_text(&abstract_text)
.doi(&doi)
.build(),
)
}
}
#[derive(Debug, Deserialize)]
struct ZenodoResponse {
hits: ZenodoHits,
}
#[derive(Debug, Deserialize)]
struct ZenodoHits {
total: ZenodoTotal,
hits: Vec<ZenodoHit>,
}
#[derive(Debug, Deserialize)]
struct ZenodoTotal {
value: usize,
}
#[derive(Debug, Deserialize)]
struct ZenodoHit {
id: usize,
metadata: ZenodoMetadata,
links: ZenodoLinks,
}
#[derive(Debug, Deserialize)]
struct ZenodoMetadata {
title: Option<String>,
description: Option<String>,
doi: Option<String>,
publication_date: Option<String>,
creators: Vec<ZenodoCreator>,
}
#[derive(Debug, Deserialize)]
struct ZenodoCreator {
name: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ZenodoLinks {
html: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_source_creation() {
let source = ZenodoSource::new();
assert!(source.is_ok());
}
}