#![cfg(feature = "tdm-springer")]
use async_trait::async_trait;
use secrecy::ExposeSecret;
use url::Url;
use crate::provenance::{Capability, LogEvent, LogResult, RowInput};
use crate::source::{FetchContext, FetchError, FetchResult, Source};
use crate::{CapabilityProfile, Ref};
const DEFAULT_BASE: &str = "https://api.springernature.com";
const API_KEY_PARAM: &str = "api_key";
const REDACTED: &str = "REDACTED";
#[derive(Clone, Debug)]
pub struct TdmSpringerSource {
base: Url,
}
impl TdmSpringerSource {
#[must_use]
pub fn new() -> Self {
Self {
#[allow(clippy::expect_used)]
base: Url::parse(DEFAULT_BASE).expect("hard-coded base URL is valid"),
}
}
pub fn with_base(base: Url) -> Self {
Self { base }
}
fn request_url(&self, doi: &crate::Doi, api_key: &str) -> Result<Url, FetchError> {
let mut url = self
.base
.join("/openaccess/json")
.map_err(|e| FetchError::SourceSchema {
hint: format!("tdm-springer URL construction failed: {e}"),
})?;
url.query_pairs_mut()
.append_pair("q", &format!("doi:{}", doi.as_str()))
.append_pair(API_KEY_PARAM, api_key);
Ok(url)
}
}
fn redact_api_key_in_url(url: &Url) -> Url {
if url.query_pairs().all(|(k, _)| k != API_KEY_PARAM) {
return url.clone();
}
let mut redacted = url.clone();
let pairs: Vec<(String, String)> = url
.query_pairs()
.map(|(k, v)| {
if k == API_KEY_PARAM {
(k.into_owned(), REDACTED.to_string())
} else {
(k.into_owned(), v.into_owned())
}
})
.collect();
redacted.query_pairs_mut().clear().extend_pairs(pairs);
redacted
}
impl Default for TdmSpringerSource {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Source for TdmSpringerSource {
fn name(&self) -> &str {
"tdm-springer"
}
fn can_serve(&self, profile: &CapabilityProfile, ref_: &Ref) -> bool {
profile.tdm_springer.is_some() && matches!(ref_, Ref::Doi(_))
}
async fn fetch(
&self,
ref_: &Ref,
profile: &CapabilityProfile,
ctx: &FetchContext,
) -> Result<FetchResult, FetchError> {
let doi = match ref_ {
Ref::Doi(d) => d,
Ref::Arxiv(_) => {
return Err(FetchError::NotEligible {
source_key: "tdm-springer".into(),
});
}
};
let grant = profile
.tdm_springer
.as_ref()
.ok_or_else(|| FetchError::NotEligible {
source_key: "tdm-springer".into(),
})?;
let api_key = grant.api_key.expose_secret();
if api_key.is_empty() {
return Err(FetchError::NotEligible {
source_key: "tdm-springer".into(),
});
}
let _permit = ctx.rate_limiter.acquire(self.name()).await;
let url = self.request_url(doi, api_key)?;
let (body, final_url) = ctx.http.fetch_bytes(self.name(), url).await?;
let final_url = redact_api_key_in_url(&final_url);
let envelope: serde_json::Value =
serde_json::from_slice(&body).map_err(|e| FetchError::SourceSchema {
hint: format!("tdm-springer returned non-JSON: {e}"),
})?;
let records = envelope
.get("records")
.and_then(|r| r.as_array())
.ok_or_else(|| FetchError::SourceSchema {
hint: format!(
"tdm-springer response missing `records` array (got: {})",
truncate_for_hint(&body)
),
})?;
let first = records.first().ok_or_else(|| FetchError::SourceSchema {
hint: "tdm-springer returned 0 records for this DOI".to_string(),
})?;
let canonical = ref_.promote(self.name(), None).digest_hex();
ctx.log.append(RowInput {
event: LogEvent::Fetch,
result: LogResult::Ok,
capability: Capability::TdmSpringer,
ref_: Some(doi.as_str()),
source: Some(self.name()),
error_code: None,
size_bytes: Some(body.len() as u64),
license: None,
store_path: None,
canonical_digest: Some(&canonical),
})?;
Ok(FetchResult {
source: self.name().to_string(),
license: "unknown".into(),
pdf_bytes: None,
final_url: Some(final_url),
metadata_json: Some(first.clone()),
})
}
}
fn truncate_for_hint(body: &[u8]) -> String {
const MAX: usize = 200;
let s = String::from_utf8_lossy(body);
if s.len() <= MAX {
s.into_owned()
} else {
format!("{}…", &s[..MAX])
}
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use std::sync::Arc;
use camino::Utf8PathBuf;
use tempfile::TempDir;
use wiremock::matchers::{method, path, query_param};
use wiremock::{Mock, MockServer, ResponseTemplate};
use crate::http::HttpClient;
use crate::provenance::ProvenanceLog;
use crate::rate_limiter::RateLimiter;
use crate::{CapabilityProfile, Doi, RateLimits, Ref, TdmGrant};
const SAMPLE_ENVELOPE_HIT: &str = r#"{
"apiMessage": "ok",
"query": "doi:10.1234/example",
"records": [
{
"identifier": "doi:10.1234/example",
"title": "Example Springer OA Article",
"publicationName": "Example Journal",
"openaccess": "true"
}
]
}"#;
const SAMPLE_ENVELOPE_EMPTY: &str = r#"{
"apiMessage": "ok",
"query": "doi:10.1234/example",
"records": []
}"#;
fn build_test_context(wiremock_host: &str) -> (TempDir, FetchContext) {
let td = TempDir::new().expect("tempdir");
let log_dir =
Utf8PathBuf::try_from(td.path().to_path_buf()).expect("temp dir path must be UTF-8");
let log_path = log_dir.join("test.jsonl");
let http = Arc::new(HttpClient::new_for_tests_allow_http(
"tdm-springer",
wiremock_host,
));
let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
let session_id = "01J0000000000000000000TEST".to_string();
let log = Arc::new(
ProvenanceLog::open(log_path, session_id.clone()).expect("provenance log opens"),
);
let ctx = FetchContext {
http,
rate_limiter,
log,
session_id,
};
(td, ctx)
}
const TEST_KEY: &str = "test-key-xyz";
fn profile_with_springer_grant() -> CapabilityProfile {
let mut p = CapabilityProfile::from_env().expect("clean env never errors");
p.tdm_springer = Some(TdmGrant {
api_key: secrecy::SecretString::from(TEST_KEY.to_string()),
agree_env_var: "DOIGET_AGREE_TDM_SPRINGER".to_string(),
..Default::default()
});
p
}
fn profile_with_empty_key_grant() -> CapabilityProfile {
let mut p = CapabilityProfile::from_env().expect("clean env never errors");
p.tdm_springer = Some(TdmGrant {
api_key: secrecy::SecretString::from(String::new()),
agree_env_var: "DOIGET_AGREE_TDM_SPRINGER".to_string(),
..Default::default()
});
p
}
#[tokio::test]
#[serial_test::serial]
async fn fetch_doi_returns_first_record_and_passes_key_in_query() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/openaccess/json"))
.and(query_param("q", "doi:10.1234/example"))
.and(query_param("api_key", TEST_KEY))
.respond_with(ResponseTemplate::new(200).set_body_string(SAMPLE_ENVELOPE_HIT))
.mount(&server)
.await;
let (_td, ctx) = build_test_context(&server.uri());
let src =
TdmSpringerSource::with_base(Url::parse(&server.uri()).expect("wiremock URI parses"));
let profile = profile_with_springer_grant();
let ref_ = Ref::Doi(Doi::parse("10.1234/example").expect("DOI parses"));
let result = src.fetch(&ref_, &profile, &ctx).await.expect("fetch ok");
assert_eq!(result.source, "tdm-springer");
assert!(result.pdf_bytes.is_none(), "metadata-only contract");
let final_url = result.final_url.expect("final_url present");
assert!(
!final_url.as_str().contains(TEST_KEY),
"api_key must be redacted out of final_url: {final_url}"
);
assert!(
final_url
.query_pairs()
.any(|(k, v)| k == "api_key" && v == "REDACTED"),
"redacted api_key sentinel must be present: {final_url}"
);
let meta = result.metadata_json.expect("metadata_json present");
assert_eq!(meta["title"], "Example Springer OA Article");
}
#[tokio::test]
#[serial_test::serial]
async fn fetch_with_empty_grant_key_is_not_eligible() {
let (_td, ctx) = build_test_context("http://127.0.0.1:1");
let src = TdmSpringerSource::with_base(Url::parse("http://127.0.0.1:1").expect("parses"));
let profile = profile_with_empty_key_grant();
let ref_ = Ref::Doi(Doi::parse("10.1234/example").expect("DOI parses"));
let err = src
.fetch(&ref_, &profile, &ctx)
.await
.expect_err("empty grant key must fail-close");
assert!(matches!(err, FetchError::NotEligible { .. }));
}
#[test]
fn redact_api_key_in_url_replaces_only_the_key() {
let u = Url::parse(
"https://api.springernature.com/openaccess/json?q=doi:10.1/x&api_key=SUPERSECRET",
)
.expect("parses");
let r = redact_api_key_in_url(&u);
assert!(!r.as_str().contains("SUPERSECRET"), "key must be gone: {r}");
assert!(
r.query_pairs().any(|(k, v)| k == "q" && v == "doi:10.1/x"),
"other pairs preserved: {r}"
);
assert!(r
.query_pairs()
.any(|(k, v)| k == "api_key" && v == "REDACTED"));
let clean = Url::parse("https://api.springernature.com/openaccess/json?q=doi:10.1/x")
.expect("parses");
assert_eq!(redact_api_key_in_url(&clean), clean);
}
#[tokio::test]
#[serial_test::serial]
async fn fetch_without_grant_is_not_eligible() {
let (_td, ctx) = build_test_context("http://127.0.0.1:1");
let src = TdmSpringerSource::with_base(Url::parse("http://127.0.0.1:1").expect("parses"));
let profile = CapabilityProfile::from_env().expect("clean env never errors");
let ref_ = Ref::Doi(Doi::parse("10.1234/example").expect("DOI parses"));
assert!(
!src.can_serve(&profile, &ref_),
"can_serve must be false without TdmGrant"
);
let err = src
.fetch(&ref_, &profile, &ctx)
.await
.expect_err("fetch must reject when grant is absent");
assert!(matches!(err, FetchError::NotEligible { .. }));
}
#[tokio::test]
#[serial_test::serial]
async fn fetch_empty_records_returns_source_schema() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/openaccess/json"))
.respond_with(ResponseTemplate::new(200).set_body_string(SAMPLE_ENVELOPE_EMPTY))
.mount(&server)
.await;
let (_td, ctx) = build_test_context(&server.uri());
let src =
TdmSpringerSource::with_base(Url::parse(&server.uri()).expect("wiremock URI parses"));
let profile = profile_with_springer_grant();
let ref_ = Ref::Doi(Doi::parse("10.1234/example").expect("DOI parses"));
let result = src.fetch(&ref_, &profile, &ctx).await;
let err = result.expect_err("empty records must surface as SourceSchema");
assert!(matches!(err, FetchError::SourceSchema { .. }));
}
}