use std::sync::Arc;
use async_trait::async_trait;
use bytes::Bytes;
use thiserror::Error;
use crate::http::{HttpClient, HttpError};
use crate::provenance::{LogError, ProvenanceLog};
use crate::rate_limiter::RateLimiter;
use crate::{CapabilityProfile, Ref, RefParseError};
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct FetchResult {
pub source: String,
pub license: String,
pub pdf_bytes: Option<Bytes>,
pub final_url: Option<url::Url>,
pub metadata_json: Option<serde_json::Value>,
}
#[derive(Clone)]
pub struct FetchContext {
pub http: Arc<HttpClient>,
pub rate_limiter: Arc<RateLimiter>,
pub log: Arc<ProvenanceLog>,
pub session_id: String,
}
impl std::fmt::Debug for FetchContext {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FetchContext")
.field("session_id", &self.session_id)
.finish_non_exhaustive()
}
}
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum FetchError {
#[error("source {source_key} cannot serve this ref")]
NotEligible {
source_key: String,
},
#[error("Tier 1 sources reported no OA URL for this ref")]
NoOaAvailable,
#[error("network error: {0}")]
Http(#[from] HttpError),
#[error("provenance log error: {0}")]
Log(#[from] LogError),
#[error("invalid ref: {0}")]
InvalidRef(#[from] RefParseError),
#[error("source-side schema error: {hint}")]
SourceSchema {
hint: String,
},
#[error("too many refs: got {got}, max {max}")]
TooManyRefs {
got: usize,
max: usize,
},
}
impl From<FetchError> for crate::ErrorCode {
fn from(e: FetchError) -> crate::ErrorCode {
crate::ErrorCode::from(&e)
}
}
impl From<&FetchError> for crate::ErrorCode {
fn from(e: &FetchError) -> crate::ErrorCode {
match e {
FetchError::NotEligible { .. } => crate::ErrorCode::CapabilityDenied,
FetchError::NoOaAvailable => crate::ErrorCode::NoOaAvailable,
FetchError::Http(_) => crate::ErrorCode::NetworkError,
FetchError::Log(_) => crate::ErrorCode::LogError,
FetchError::InvalidRef(_) => crate::ErrorCode::InvalidRef,
FetchError::SourceSchema { .. } => crate::ErrorCode::InternalError,
FetchError::TooManyRefs { .. } => crate::ErrorCode::InvalidRef,
}
}
}
impl From<&FetchError> for Option<crate::DenialContext> {
fn from(e: &FetchError) -> Self {
use crate::{DenialContext, DenialReason};
match e {
FetchError::NotEligible { source_key } => Some(DenialContext {
reason: DenialReason::CapabilityNotGranted,
source: Some(source_key.clone()),
attempted: None,
expected: None,
hop_index: None,
cap: None,
actual: None,
}),
FetchError::Http(http_err) => http_err.into(),
FetchError::NoOaAvailable
| FetchError::Log(_)
| FetchError::InvalidRef(_)
| FetchError::SourceSchema { .. }
| FetchError::TooManyRefs { .. } => None,
}
}
}
#[async_trait]
pub trait Source: Send + Sync {
fn name(&self) -> &str;
fn can_serve(&self, profile: &CapabilityProfile, ref_: &Ref) -> bool;
async fn fetch(
&self,
ref_: &Ref,
profile: &CapabilityProfile,
ctx: &FetchContext,
) -> Result<FetchResult, FetchError>;
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use camino::Utf8PathBuf;
use tempfile::TempDir;
use crate::http::{tier_1_allowlist, HttpClient};
use crate::provenance::ProvenanceLog;
use crate::rate_limiter::RateLimiter;
use crate::{CapabilityProfile, Doi, ErrorCode, RateLimits, Ref};
struct MockSource;
#[async_trait]
impl Source for MockSource {
fn name(&self) -> &str {
"mock"
}
fn can_serve(&self, _: &CapabilityProfile, _: &Ref) -> bool {
true
}
async fn fetch(
&self,
_: &Ref,
_: &CapabilityProfile,
_: &FetchContext,
) -> Result<FetchResult, FetchError> {
Ok(FetchResult {
source: "mock".into(),
license: "unknown".into(),
pdf_bytes: None,
final_url: None,
metadata_json: None,
})
}
}
fn build_test_context() -> (TempDir, FetchContext) {
let td = TempDir::new().expect("tempdir");
let log_dir =
Utf8PathBuf::try_from(td.path().to_path_buf()).expect("temp dir path must be UTF-8");
let log_path = log_dir.join("test.jsonl");
let http = Arc::new(HttpClient::new(tier_1_allowlist()).expect("http client builds"));
let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
let session_id = "01J0000000000000000000TEST".to_string();
let log = Arc::new(
ProvenanceLog::open(log_path, session_id.clone()).expect("provenance log opens"),
);
(
td,
FetchContext {
http,
rate_limiter,
log,
session_id,
},
)
}
#[tokio::test]
async fn mock_source_compiles_as_trait_object() {
let s: Box<dyn Source> = Box::new(MockSource);
assert_eq!(s.name(), "mock");
let profile = CapabilityProfile::from_env().expect("Phase 0 stub");
let r = Ref::Doi(Doi("10.1234/example".to_string()));
assert!(s.can_serve(&profile, &r));
let (_td, ctx) = build_test_context();
let res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
assert_eq!(res.source, "mock");
}
#[tokio::test]
async fn mock_source_fetch_returns_result() {
let s = MockSource;
let profile = CapabilityProfile::from_env().expect("Phase 0 stub");
let r = Ref::Doi(Doi("10.1234/example".to_string()));
let (_td, ctx) = build_test_context();
let res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
assert_eq!(res.source, "mock");
assert_eq!(res.license, "unknown");
assert!(res.pdf_bytes.is_none());
assert!(res.final_url.is_none());
assert!(res.metadata_json.is_none());
}
#[test]
fn fetch_error_collapses_to_error_code() {
let e: ErrorCode = FetchError::NotEligible {
source_key: "mock".into(),
}
.into();
assert_eq!(e, ErrorCode::CapabilityDenied);
let e: ErrorCode = FetchError::NoOaAvailable.into();
assert_eq!(e, ErrorCode::NoOaAvailable);
let e: ErrorCode = FetchError::Http(HttpError::UnknownSource {
source_key: "mock".into(),
})
.into();
assert_eq!(e, ErrorCode::NetworkError);
let e: ErrorCode = FetchError::Log(LogError::Io(std::io::Error::other("synthetic"))).into();
assert_eq!(e, ErrorCode::LogError);
let e: ErrorCode = FetchError::InvalidRef(RefParseError::Empty).into();
assert_eq!(e, ErrorCode::InvalidRef);
let e: ErrorCode = FetchError::SourceSchema {
hint: "missing field 'license'".into(),
}
.into();
assert_eq!(e, ErrorCode::InternalError);
let e: ErrorCode = FetchError::TooManyRefs { got: 101, max: 100 }.into();
assert_eq!(e, ErrorCode::InvalidRef);
}
#[test]
fn fetch_context_debug_redacts_internals() {
let (_td, ctx) = build_test_context();
let s = format!("{:?}", ctx);
assert!(
s.contains("session_id"),
"session_id must be in Debug: {}",
s
);
assert!(s.contains("01J0000000000000000000TEST"));
assert!(
!s.contains("HttpClient") && !s.contains("RateLimiter") && !s.contains("ProvenanceLog"),
"FetchContext Debug must not dump foundation internals: {}",
s,
);
}
#[test]
fn denial_from_not_eligible_carries_source_key() {
use crate::{DenialContext, DenialReason};
let e = FetchError::NotEligible {
source_key: "tdm-elsevier".to_string(),
};
let dc: Option<DenialContext> = (&e).into();
let dc = dc.expect("NotEligible -> Some(DenialContext)");
assert_eq!(dc.reason, DenialReason::CapabilityNotGranted);
assert_eq!(dc.source.as_deref(), Some("tdm-elsevier"));
assert!(dc.attempted.is_none());
assert!(dc.expected.is_none());
}
#[test]
fn denial_from_http_delegates_to_http_mapping() {
use crate::http::HttpError;
use crate::{DenialContext, DenialReason, PDF_MAX_BYTES};
let e = FetchError::Http(HttpError::OversizedBody {
actual: 209_715_200,
cap: PDF_MAX_BYTES,
});
let dc: Option<DenialContext> = (&e).into();
let dc = dc.expect("Http(OversizedBody) -> Some(DenialContext)");
assert_eq!(dc.reason, DenialReason::SizeCapExceeded);
assert_eq!(dc.cap, Some(PDF_MAX_BYTES));
assert_eq!(dc.actual, Some(209_715_200));
}
#[test]
fn denial_from_non_denial_variants_returns_none() {
use crate::DenialContext;
let e = FetchError::NoOaAvailable;
let dc: Option<DenialContext> = (&e).into();
assert!(dc.is_none(), "NoOaAvailable must not produce DenialContext");
let e = FetchError::Log(LogError::Io(std::io::Error::other("synthetic")));
let dc: Option<DenialContext> = (&e).into();
assert!(dc.is_none(), "Log must not produce DenialContext");
let e = FetchError::InvalidRef(RefParseError::Empty);
let dc: Option<DenialContext> = (&e).into();
assert!(dc.is_none(), "InvalidRef must not produce DenialContext");
let e = FetchError::SourceSchema {
hint: "missing field 'license'".into(),
};
let dc: Option<DenialContext> = (&e).into();
assert!(dc.is_none(), "SourceSchema must not produce DenialContext");
}
}