1use async_trait::async_trait;
7use serde::Deserialize;
8use url::Url;
9
10use crate::provenance::{Capability, LogEvent, LogResult, RowInput};
11use crate::source::{FetchContext, FetchError, FetchResult, Source};
12use crate::{CapabilityProfile, Ref};
13
14const DEFAULT_BASE: &str = "https://api.crossref.org";
17
18#[derive(Clone, Debug)]
22pub struct CrossrefSource {
23 base: Url,
28 #[allow(dead_code)]
33 contact_email: String,
34}
35
36impl CrossrefSource {
37 #[must_use]
42 pub fn new(contact_email: String) -> Self {
43 Self {
44 #[allow(clippy::expect_used)]
48 base: Url::parse(DEFAULT_BASE).expect("hard-coded base URL is valid"),
49 contact_email,
50 }
51 }
52
53 pub fn with_base(base: Url, contact_email: String) -> Self {
60 Self {
61 base,
62 contact_email,
63 }
64 }
65
66 fn request_url(&self, doi: &crate::Doi) -> Result<Url, FetchError> {
71 let path = format!("/works/{}", doi.as_str());
76 self.base.join(&path).map_err(|e| FetchError::SourceSchema {
77 hint: format!("crossref URL construction failed: {e}"),
78 })
79 }
80}
81
82#[async_trait]
83impl Source for CrossrefSource {
84 fn name(&self) -> &str {
85 "crossref"
86 }
87
88 fn can_serve(&self, _profile: &CapabilityProfile, ref_: &Ref) -> bool {
89 matches!(ref_, Ref::Doi(_))
90 }
91
92 async fn fetch(
93 &self,
94 ref_: &Ref,
95 _profile: &CapabilityProfile,
96 ctx: &FetchContext,
97 ) -> Result<FetchResult, FetchError> {
98 let doi = match ref_ {
99 Ref::Doi(d) => d,
100 Ref::Arxiv(_) => {
101 return Err(FetchError::NotEligible {
102 source_key: "crossref".into(),
103 });
104 }
105 };
106
107 let _permit = ctx.rate_limiter.acquire(self.name()).await;
109
110 let url = self.request_url(doi)?;
114 let (body, final_url) = ctx.http.fetch_bytes(self.name(), url).await?;
115
116 let envelope: CrossrefEnvelope =
120 serde_json::from_slice(&body).map_err(|e| FetchError::SourceSchema {
121 hint: format!("crossref returned non-JSON: {e}"),
122 })?;
123 if envelope.status != "ok" {
124 return Err(FetchError::SourceSchema {
125 hint: format!("crossref status = {}", envelope.status),
126 });
127 }
128
129 let canonical = ref_.promote(self.name(), None).digest_hex();
134 ctx.log.append(RowInput {
135 event: LogEvent::Fetch,
136 result: LogResult::Ok,
137 capability: Capability::Oa,
138 ref_: Some(doi.as_str()),
139 source: Some(self.name()),
140 error_code: None,
141 size_bytes: Some(body.len() as u64),
142 license: None,
143 store_path: None,
144 canonical_digest: Some(&canonical),
145 })?;
146
147 Ok(FetchResult {
148 source: self.name().to_string(),
149 license: "unknown".into(),
150 pdf_bytes: None,
153 final_url: Some(final_url),
154 metadata_json: Some(envelope.message),
155 })
156 }
157}
158
159#[derive(Debug, Deserialize)]
162struct CrossrefEnvelope {
163 status: String,
164 message: serde_json::Value,
165}
166
167#[cfg(test)]
172#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
173mod tests {
174 use super::*;
175
176 use std::sync::Arc;
177
178 use camino::Utf8PathBuf;
179 use tempfile::TempDir;
180 use wiremock::matchers::{method, path};
181 use wiremock::{Mock, MockServer, ResponseTemplate};
182
183 use crate::http::HttpClient;
184 use crate::provenance::ProvenanceLog;
185 use crate::rate_limiter::RateLimiter;
186 use crate::{ArxivId, CapabilityProfile, Doi, RateLimits, Ref};
187
188 fn build_test_context(wiremock_host: &str) -> (TempDir, FetchContext) {
193 let td = TempDir::new().expect("tempdir");
194 let log_dir =
196 Utf8PathBuf::try_from(td.path().to_path_buf()).expect("temp dir path must be UTF-8");
197 let log_path = log_dir.join("test.jsonl");
198
199 let http = Arc::new(HttpClient::new_for_tests_allow_http(
204 "crossref",
205 wiremock_host,
206 ));
207 let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
208 let session_id = "01J0000000000000000000TEST".to_string();
209 let log = Arc::new(
210 ProvenanceLog::open(log_path, session_id.clone()).expect("provenance log opens"),
211 );
212
213 (
214 td,
215 FetchContext {
216 http,
217 rate_limiter,
218 log,
219 session_id,
220 },
221 )
222 }
223
224 fn server_host(server: &MockServer) -> String {
226 server
227 .uri()
228 .parse::<Url>()
229 .expect("wiremock uri parses")
230 .host_str()
231 .expect("wiremock uri has host")
232 .to_string()
233 }
234
235 fn crossref_for(server: &MockServer) -> CrossrefSource {
237 let base = server.uri().parse::<Url>().expect("wiremock uri parses");
238 CrossrefSource::with_base(base, "test@example.org".to_string())
239 }
240
241 #[test]
242 fn crossref_can_serve_returns_true_for_doi() {
243 let s = CrossrefSource::new("test@example.org".into());
244 let profile = CapabilityProfile::from_env().expect("clean env");
245 let r = Ref::Doi(Doi::parse("10.1234/example").unwrap());
246 assert!(s.can_serve(&profile, &r));
247 }
248
249 #[test]
250 fn crossref_can_serve_returns_false_for_arxiv() {
251 let s = CrossrefSource::new("test@example.org".into());
252 let profile = CapabilityProfile::from_env().expect("clean env");
253 let r = Ref::Arxiv(ArxivId::parse("2401.12345").unwrap());
254 assert!(!s.can_serve(&profile, &r));
255 }
256
257 #[tokio::test]
258 async fn crossref_fetch_returns_envelope_message() {
259 let server = MockServer::start().await;
260 Mock::given(method("GET"))
261 .and(path("/works/10.1234/example"))
262 .respond_with(
263 ResponseTemplate::new(200)
264 .set_body_string(r#"{"status":"ok","message":{"title":["Example"]}}"#),
265 )
266 .mount(&server)
267 .await;
268
269 let host = server_host(&server);
270 let s = crossref_for(&server);
271 let (_td, ctx) = build_test_context(&host);
272 let profile = CapabilityProfile::from_env().expect("clean env");
273 let r = Ref::Doi(Doi::parse("10.1234/example").unwrap());
274
275 let res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
276 assert_eq!(res.source, "crossref");
277 assert_eq!(
278 res.metadata_json,
279 Some(serde_json::json!({ "title": ["Example"] })),
280 );
281 assert!(res.pdf_bytes.is_none());
282 assert!(res.final_url.is_some());
283 }
284
285 #[tokio::test]
286 async fn crossref_fetch_with_arxiv_ref_errors_not_eligible() {
287 let s = CrossrefSource::with_base(
291 Url::parse("http://127.0.0.1:1/").unwrap(),
292 "test@example.org".into(),
293 );
294 let (_td, ctx) = build_test_context("127.0.0.1");
295 let profile = CapabilityProfile::from_env().expect("clean env");
296 let r = Ref::Arxiv(ArxivId::parse("2401.12345").unwrap());
297
298 let err = s.fetch(&r, &profile, &ctx).await.expect_err("not eligible");
299 match err {
300 FetchError::NotEligible { source_key } => {
301 assert_eq!(source_key, "crossref");
302 }
303 other => panic!("expected NotEligible, got {:?}", other),
304 }
305 }
306
307 #[tokio::test]
308 async fn crossref_fetch_writes_log_row() {
309 let server = MockServer::start().await;
310 Mock::given(method("GET"))
311 .and(path("/works/10.1234/example"))
312 .respond_with(
313 ResponseTemplate::new(200)
314 .set_body_string(r#"{"status":"ok","message":{"title":["Example"]}}"#),
315 )
316 .mount(&server)
317 .await;
318
319 let host = server_host(&server);
320 let s = crossref_for(&server);
321 let (_td, ctx) = build_test_context(&host);
322 let profile = CapabilityProfile::from_env().expect("clean env");
323 let r = Ref::Doi(Doi::parse("10.1234/example").unwrap());
324
325 let _res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
326
327 let log_path = _td.path().join("test.jsonl");
331 let raw = std::fs::read_to_string(&log_path).expect("log file readable");
332 let lines: Vec<&str> = raw.lines().filter(|l| !l.is_empty()).collect();
333 assert_eq!(lines.len(), 1, "expected exactly one row, got {:?}", lines);
334 let row: serde_json::Value = serde_json::from_str(lines[0]).expect("row is valid JSON");
335 assert_eq!(row["event"], "fetch");
336 assert_eq!(row["result"], "ok");
337 assert_eq!(row["source"], "crossref");
338 assert_eq!(row["ref"], "10.1234/example");
339 }
340
341 #[tokio::test]
342 async fn crossref_404_maps_to_http_error() {
343 let server = MockServer::start().await;
344 Mock::given(method("GET"))
345 .and(path("/works/10.1234/example"))
346 .respond_with(ResponseTemplate::new(404))
347 .mount(&server)
348 .await;
349
350 let host = server_host(&server);
351 let s = crossref_for(&server);
352 let (_td, ctx) = build_test_context(&host);
353 let profile = CapabilityProfile::from_env().expect("clean env");
354 let r = Ref::Doi(Doi::parse("10.1234/example").unwrap());
355
356 let err = s.fetch(&r, &profile, &ctx).await.expect_err("404 errors");
357 match err {
358 FetchError::Http(_) => {}
359 other => panic!("expected Http(_) on 404, got {:?}", other),
360 }
361 }
362
363 #[tokio::test]
364 async fn crossref_non_ok_status_field_errors_source_schema() {
365 let server = MockServer::start().await;
366 Mock::given(method("GET"))
367 .and(path("/works/10.1234/example"))
368 .respond_with(
369 ResponseTemplate::new(200).set_body_string(r#"{"status":"error","message":{}}"#),
370 )
371 .mount(&server)
372 .await;
373
374 let host = server_host(&server);
375 let s = crossref_for(&server);
376 let (_td, ctx) = build_test_context(&host);
377 let profile = CapabilityProfile::from_env().expect("clean env");
378 let r = Ref::Doi(Doi::parse("10.1234/example").unwrap());
379
380 let err = s
381 .fetch(&r, &profile, &ctx)
382 .await
383 .expect_err("non-ok status errors");
384 match err {
385 FetchError::SourceSchema { hint } => {
386 assert!(
387 hint.contains("status"),
388 "expected status mention in hint, got {hint}"
389 );
390 }
391 other => panic!("expected SourceSchema, got {:?}", other),
392 }
393 }
394}