doiget_core/source.rs
1//! Source abstraction. Each Tier 1/2/3 fetcher implements this trait.
2//!
3//! Binding spec: `docs/PUBLIC_API.md` §2 (trait surface),
4//! `docs/ARCHITECTURE.md` §6 (per-fetch data flow), and
5//! `docs/PROVENANCE_LOG.md` §3 (the `Fetch` row source impls emit).
6//!
7//! Phase 1 ships the trait + supporting types; concrete impls (Crossref,
8//! Unpaywall, arXiv) land in follow-up PRs (see `docs/SOURCES.md` for the
9//! source matrix and tiering).
10
11use std::sync::Arc;
12
13use async_trait::async_trait;
14use bytes::Bytes;
15use thiserror::Error;
16
17use crate::http::{HttpClient, HttpError};
18use crate::provenance::{LogError, ProvenanceLog};
19use crate::rate_limiter::RateLimiter;
20use crate::{CapabilityProfile, Ref, RefParseError};
21
22/// What a successful fetch returns to the caller.
23///
24/// Whether `pdf_bytes` is `None` depends on the source: metadata-only
25/// sources (Phase 4) leave it unset; OA sources (Phase 1) return PDF bytes
26/// when an OA URL was discovered.
27#[derive(Debug, Clone)]
28#[non_exhaustive]
29pub struct FetchResult {
30 /// Source's name (matches `Source::name()`); set for the audit trail.
31 pub source: String,
32 /// OA license string (`"CC-BY-4.0"`, `"unknown"`, etc.).
33 pub license: String,
34 /// PDF bytes; `None` for metadata-only sources.
35 pub pdf_bytes: Option<Bytes>,
36 /// Final URL after redirect resolution; useful for the metadata
37 /// `[doiget].url` field.
38 pub final_url: Option<url::Url>,
39 /// Source-side metadata payload as a serde_json value. The Source impl
40 /// is responsible for the shape; the caller (Phase 1+ orchestrator)
41 /// maps it into `Metadata` when one exists (Phase 1+).
42 pub metadata_json: Option<serde_json::Value>,
43}
44
45/// Per-fetch context shared by all `Source` impls.
46///
47/// Held by the orchestrator (CLI / MCP server) and passed by reference into
48/// each [`Source::fetch`]. Sources MUST NOT construct their own
49/// [`HttpClient`] / [`RateLimiter`] / [`ProvenanceLog`] — they go through
50/// this context for uniform politeness, redirect allowlisting, and audit
51/// logging.
52#[derive(Clone)]
53pub struct FetchContext {
54 /// Shared, allowlist-aware HTTP client. See [`HttpClient`].
55 pub http: Arc<HttpClient>,
56 /// Process-wide async rate limiter. See [`RateLimiter`].
57 pub rate_limiter: Arc<RateLimiter>,
58 /// Append-only, hash-chained provenance log. Source impls MUST emit
59 /// one `LogEvent::Fetch` row per attempt via `log.append`. See
60 /// [`ProvenanceLog`].
61 pub log: Arc<ProvenanceLog>,
62 /// 26-char ULID identifying this process invocation. Mirrors the
63 /// `session_id` stamped into every provenance row by the writer; held
64 /// here so source impls can include it in their own structured logs
65 /// without re-reading the env.
66 pub session_id: String,
67 /// Resolver cache root (`<cache_root>/resolver/<safekey>.toml`, see
68 /// `docs/CACHE.md` and [`crate::resolver_cache`]). `Some` enables the
69 /// metadata-only resolve cache (repeat resolves served from disk,
70 /// avoiding upstream rate limits); `None` disables it (tests, or a
71 /// caller that opts out). Only `metadata_only` consults it — per-PDF
72 /// fetches are never cached.
73 pub cache_root: Option<camino::Utf8PathBuf>,
74}
75
76impl std::fmt::Debug for FetchContext {
77 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78 // Avoid printing the full HTTP / rate-limiter / log internals; only
79 // the session_id is human-meaningful for log breadcrumbs.
80 f.debug_struct("FetchContext")
81 .field("session_id", &self.session_id)
82 .finish_non_exhaustive()
83 }
84}
85
86/// Errors returned by [`Source::fetch`].
87///
88/// At the public CLI / MCP boundary, every variant collapses to an
89/// [`crate::ErrorCode`] via the `From<FetchError>` impl below — mirroring
90/// the [`RefParseError`] → [`crate::ErrorCode::InvalidRef`] collapse from
91/// PR #55.
92#[derive(Debug, Error)]
93#[non_exhaustive]
94pub enum FetchError {
95 /// The source does not handle the given ref under the runtime
96 /// capability profile (covers both `can_serve = false` outcomes and
97 /// runtime denials raised inside `fetch`).
98 #[error("source {source_key} cannot serve this ref")]
99 NotEligible {
100 /// The source key that declined.
101 source_key: String,
102 },
103 /// Tier 1 sources reported no OA URL for this ref.
104 #[error("Tier 1 sources reported no OA URL for this ref")]
105 NoOaAvailable,
106 /// A metadata source authoritatively reported that the identifier does
107 /// not exist — distinct from a transport failure. Surfaces as
108 /// [`crate::ErrorCode::NotFound`]. Used for sources whose
109 /// "absent" signal is NOT an HTTP 404/410 (e.g. the arXiv Atom API
110 /// returns HTTP 200 with an empty `<feed>` for an unknown id).
111 #[error("identifier not found: {hint}")]
112 NotFound {
113 /// Human-readable detail (which source, and how it signalled
114 /// absence); not parsed.
115 hint: String,
116 },
117 /// A name filter (author / venue / publisher) matched MORE than one
118 /// OpenAlex entity with no clear winner. Carries a candidate listing
119 /// so the caller can narrow the name (or pass an explicit id).
120 /// Collapses to [`crate::ErrorCode::Ambiguous`] (wire `"AMBIGUOUS"`) —
121 /// distinct from `NotFound` so an agent narrows rather than gives up.
122 /// Used by [`crate::discovery`].
123 #[error("{hint}")]
124 Ambiguous {
125 /// Human-readable candidate listing; not parsed.
126 hint: String,
127 },
128 /// Underlying HTTP / network failure. See [`HttpError`].
129 #[error("network error: {0}")]
130 Http(#[from] HttpError),
131 /// Provenance log write failed. Per `docs/SECURITY.md` §1.8 this is a
132 /// fail-closed signal; the surrounding fetch MUST be aborted.
133 #[error("provenance log error: {0}")]
134 Log(#[from] LogError),
135 /// Ref re-parse / validation failed inside the source (e.g. when a
136 /// source receives a borrowed string from upstream and re-validates).
137 #[error("invalid ref: {0}")]
138 InvalidRef(#[from] RefParseError),
139 /// Source-side schema mismatch (unexpected JSON shape, missing
140 /// required field). Surfaces to [`crate::ErrorCode::InternalError`]
141 /// at the public boundary.
142 #[error("source-side schema error: {hint}")]
143 SourceSchema {
144 /// Human-readable hint at the offending field/path; not parsed.
145 hint: String,
146 },
147 /// Batch orchestrator received more refs than
148 /// [`crate::MAX_BATCH_REFS`]. Surfaced to the MCP `doiget_batch_fetch`
149 /// tool as `ErrorCode::InvalidRef` (closest closed-set fit — the
150 /// request shape itself is invalid; no `denial_context` channel
151 /// applies). Slice 2 / `docs/MCP_TOOLS.md` §1.
152 #[error("too many refs: got {got}, max {max}")]
153 TooManyRefs {
154 /// Number of refs the batch orchestrator was handed.
155 got: usize,
156 /// The hard cap ([`crate::MAX_BATCH_REFS`]).
157 max: usize,
158 },
159 /// A source returned a successful response that contained no usable
160 /// representation of the requested kind — currently `doiget text`'s
161 /// ar5iv leg returning a 200 with no extractable prose (the paper was
162 /// never converted to HTML). The identifier is valid; only this one
163 /// representation is missing. Surfaces as
164 /// [`crate::ErrorCode::TextUnavailable`] so an agent fetches the PDF
165 /// instead of concluding the reference is wrong (issue #302) — NOT
166 /// [`Self::NotFound`], which means the id itself does not exist.
167 #[error(
168 "no readable text for arXiv:{arxiv_id} (no ar5iv HTML render); \
169 the PDF may be fetchable instead"
170 )]
171 TextUnavailable {
172 /// The arXiv id whose ar5iv render was empty; echoed into the
173 /// human/MCP message so the actionable `doiget fetch <id>` hint is
174 /// self-contained. A validated [`crate::ArxivId`] (review #318) —
175 /// the id was already parsed, so the error cannot carry a malformed
176 /// string into the actionable `doiget fetch <id>` hint.
177 arxiv_id: crate::ArxivId,
178 },
179 /// A source returned a successful response that contained no file of the
180 /// requested kind for `doiget source` — a PDF-only / single-file
181 /// submission (no multi-file bundle), or `--figures-only` on a submission
182 /// with no image files. The identifier is valid; only the bundle / figure
183 /// representation is absent. Surfaces as
184 /// [`crate::ErrorCode::TextUnavailable`] (same "this representation is
185 /// missing; the PDF may be fetchable" class as [`Self::TextUnavailable`]),
186 /// but as a DISTINCT variant so the message is not ar5iv-specific
187 /// (issue #343 / ADR-0034; PR review).
188 #[error("no source files for arXiv:{arxiv_id} ({kind}); the PDF may be fetchable instead")]
189 SourceUnavailable {
190 /// The arXiv id whose source bundle / figures were absent.
191 arxiv_id: crate::ArxivId,
192 /// Which representation was requested: `"source bundle"` or `"figures"`.
193 kind: &'static str,
194 },
195}
196
197/// Map [`FetchError`] to the closed [`crate::ErrorCode`] set surfaced at
198/// the public CLI / MCP boundary. Mirrors the
199/// `From<RefParseError> for ErrorCode` collapse from PR #55.
200impl From<FetchError> for crate::ErrorCode {
201 fn from(e: FetchError) -> crate::ErrorCode {
202 crate::ErrorCode::from(&e)
203 }
204}
205
206/// Borrow-form of the collapse above, so a caller that still needs the
207/// error for its `Display` message / `denial_context` side-channel
208/// (notably the CLI human-persona renderer, issue #119) can obtain the
209/// closed code without consuming it. The owned impl delegates here so
210/// the mapping table lives in exactly one place.
211impl From<&FetchError> for crate::ErrorCode {
212 fn from(e: &FetchError) -> crate::ErrorCode {
213 match e {
214 FetchError::NotEligible { .. } => crate::ErrorCode::CapabilityDenied,
215 FetchError::NoOaAvailable => crate::ErrorCode::NoOaAvailable,
216 FetchError::NotFound { .. } => crate::ErrorCode::NotFound,
217 // A name filter that matched several entities is its own wire
218 // code so agents can distinguish "narrow the name" from
219 // "does not exist" (ADR-0031 D5).
220 FetchError::Ambiguous { .. } => crate::ErrorCode::Ambiguous,
221 // 404 / 410 / 451 are authoritative "this id does not exist"
222 // signals → `NotFound` (not retriable). 401 / 403 mean the
223 // server understood the request but denied access (IP block, auth
224 // required) — `CapabilityDenied` lets agents distinguish access
225 // denial from a transient connectivity failure. Everything else
226 // is treated as transient.
227 FetchError::Http(HttpError::HttpStatus {
228 status: 404 | 410 | 451,
229 ..
230 }) => crate::ErrorCode::NotFound,
231 FetchError::Http(HttpError::HttpStatus {
232 status: 401 | 403, ..
233 }) => crate::ErrorCode::CapabilityDenied,
234 FetchError::Http(_) => crate::ErrorCode::NetworkError,
235 FetchError::Log(_) => crate::ErrorCode::LogError,
236 FetchError::InvalidRef(_) => crate::ErrorCode::InvalidRef,
237 FetchError::SourceSchema { .. } => crate::ErrorCode::InternalError,
238 // Slice 2: a too-large batch is a request-shape failure, so
239 // collapse to `INVALID_REF` (closest closed-set fit). The
240 // `#[non_exhaustive]` wildcard below would otherwise route
241 // it to `INTERNAL_ERROR`, which would mislead agents.
242 FetchError::TooManyRefs { .. } => crate::ErrorCode::InvalidRef,
243 // The id resolved; only the ar5iv text representation is
244 // missing. Its own code so an agent fetches the PDF rather
245 // than conclude the reference is wrong (issue #302).
246 FetchError::TextUnavailable { .. } => crate::ErrorCode::TextUnavailable,
247 // The id resolved; only the source-bundle / figure representation
248 // is absent. Same wire code as TextUnavailable (representation
249 // missing → fetch the PDF), distinct variant for a correct message.
250 FetchError::SourceUnavailable { .. } => crate::ErrorCode::TextUnavailable,
251 }
252 }
253}
254
255/// Map a [`FetchError`] reference to the structured [`crate::DenialContext`]
256/// channel introduced by ADR-0023 §4.
257///
258/// `&FetchError` (rather than `FetchError`) so the orchestrator can
259/// produce the structured side-channel without consuming the error it
260/// still needs for `error.message` and the `From<FetchError> for
261/// ErrorCode` collapse above. The `Http` arm delegates to the
262/// `From<&HttpError> for Option<DenialContext>` impl in [`crate::http`].
263impl From<&FetchError> for Option<crate::DenialContext> {
264 fn from(e: &FetchError) -> Self {
265 use crate::{DenialContext, DenialReason};
266 match e {
267 FetchError::NotEligible { source_key } => Some(DenialContext {
268 reason: DenialReason::CapabilityNotGranted,
269 source: Some(source_key.clone()),
270 attempted: None,
271 // CapabilityNotGranted has no allowlist channel: the
272 // producer leaves `expected` at `None` (NOT `Some(vec![])`).
273 // See `DenialContext::expected` for the disambiguation.
274 expected: None,
275 hop_index: None,
276 cap: None,
277 actual: None,
278 }),
279 // Delegate to the HttpError mapping (ADR-0023 §4 mapping table).
280 FetchError::Http(http_err) => http_err.into(),
281 // Non-denial variants map to None per ADR-0023 §4. (Slice 2:
282 // `TooManyRefs` is a request-shape failure, not a denial —
283 // adding it to the None arm keeps the mapping table consistent.)
284 FetchError::NoOaAvailable
285 | FetchError::NotFound { .. }
286 | FetchError::Ambiguous { .. }
287 | FetchError::Log(_)
288 | FetchError::InvalidRef(_)
289 | FetchError::SourceSchema { .. }
290 | FetchError::TooManyRefs { .. }
291 | FetchError::TextUnavailable { .. }
292 | FetchError::SourceUnavailable { .. } => None,
293 }
294 }
295}
296
297/// The trait implemented by every Tier 1 / 2 / 3 fetcher.
298///
299/// Binding signature: `docs/PUBLIC_API.md` §2 (NORMATIVE — the wire shape
300/// of these three methods is semver-locked).
301#[async_trait]
302pub trait Source: Send + Sync {
303 /// Stable name used in metadata (`[doiget].source`) and provenance
304 /// rows. Conventional values: `"crossref"`, `"unpaywall"`, `"arxiv"`,
305 /// `"openalex"`, `"semantic-scholar"`, `"doaj"`, `"tdm-elsevier"`,
306 /// etc. (see `docs/SOURCES.md`).
307 fn name(&self) -> &str;
308
309 /// True if this source can plausibly serve the given ref under the
310 /// runtime capability profile. Implementations MUST be fast and
311 /// non-blocking; the orchestrator calls `can_serve` to decide whether
312 /// to invoke `fetch` at all.
313 fn can_serve(&self, profile: &CapabilityProfile, ref_: &Ref) -> bool;
314
315 /// Perform the source-specific fetch.
316 ///
317 /// Implementations:
318 /// 1. acquire `ctx.rate_limiter.acquire(self.name()).await`,
319 /// 2. fetch via `ctx.http.fetch_bytes` / `ctx.http.fetch_pdf`,
320 /// 3. emit one `LogEvent::Fetch` row via `ctx.log.append`,
321 /// 4. return a [`FetchResult`].
322 ///
323 /// The trait does NOT enforce these steps; it documents the protocol
324 /// so concrete impls produce uniform audit trails (per
325 /// `docs/ARCHITECTURE.md` §6 and `docs/PROVENANCE_LOG.md` §3).
326 async fn fetch(
327 &self,
328 ref_: &Ref,
329 profile: &CapabilityProfile,
330 ctx: &FetchContext,
331 ) -> Result<FetchResult, FetchError>;
332}
333
334// ---------------------------------------------------------------------------
335// Tests
336// ---------------------------------------------------------------------------
337
338#[cfg(test)]
339#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
340mod tests {
341 use super::*;
342
343 use camino::Utf8PathBuf;
344 use tempfile::TempDir;
345
346 use crate::http::{tier_1_allowlist, HttpClient};
347 use crate::provenance::ProvenanceLog;
348 use crate::rate_limiter::RateLimiter;
349 use crate::{CapabilityProfile, Doi, ErrorCode, RateLimits, Ref};
350
351 /// Minimal `Source` impl exercised purely to pin the trait shape and
352 /// verify dispatch through `Box<dyn Source>`. Concrete sources land in
353 /// follow-up PRs (Crossref / Unpaywall / arXiv).
354 struct MockSource;
355
356 #[async_trait]
357 impl Source for MockSource {
358 fn name(&self) -> &str {
359 "mock"
360 }
361 fn can_serve(&self, _: &CapabilityProfile, _: &Ref) -> bool {
362 true
363 }
364 async fn fetch(
365 &self,
366 _: &Ref,
367 _: &CapabilityProfile,
368 _: &FetchContext,
369 ) -> Result<FetchResult, FetchError> {
370 Ok(FetchResult {
371 source: "mock".into(),
372 license: "unknown".into(),
373 pdf_bytes: None,
374 final_url: None,
375 metadata_json: None,
376 })
377 }
378 }
379
380 /// Build a `FetchContext` backed by real (but inert) Round-A
381 /// foundation modules: a `HttpClient` over the Tier-1 allowlist, a
382 /// `RateLimiter` at hard-coded politeness, and a `ProvenanceLog` in
383 /// a tempdir. Returns the dir as well so the caller keeps it alive
384 /// for the duration of the test.
385 fn build_test_context() -> (TempDir, FetchContext) {
386 let td = TempDir::new().expect("tempdir");
387 // Workspace lints ban `std::path::PathBuf` for log paths; convert
388 // via camino's `Utf8PathBuf::try_from`.
389 let log_dir =
390 Utf8PathBuf::try_from(td.path().to_path_buf()).expect("temp dir path must be UTF-8");
391 let log_path = log_dir.join("test.jsonl");
392
393 let http = Arc::new(HttpClient::new(tier_1_allowlist()).expect("http client builds"));
394 let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
395 let session_id = "01J0000000000000000000TEST".to_string();
396 let log = Arc::new(
397 ProvenanceLog::open(log_path, session_id.clone()).expect("provenance log opens"),
398 );
399
400 (
401 td,
402 FetchContext {
403 http,
404 rate_limiter,
405 log,
406 session_id,
407 cache_root: None,
408 },
409 )
410 }
411
412 #[tokio::test]
413 async fn mock_source_compiles_as_trait_object() {
414 // Trait-shape pin: a `Source` impl is dyn-safe and can be boxed.
415 let s: Box<dyn Source> = Box::new(MockSource);
416 assert_eq!(s.name(), "mock");
417 let profile = CapabilityProfile::from_env().expect("Phase 0 stub");
418 let r = Ref::Doi(Doi("10.1234/example".to_string()));
419 assert!(s.can_serve(&profile, &r));
420
421 let (_td, ctx) = build_test_context();
422 let res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
423 assert_eq!(res.source, "mock");
424 }
425
426 #[tokio::test]
427 async fn mock_source_fetch_returns_result() {
428 // Direct dispatch (not through `dyn`) to exercise the async fn
429 // body and assert the populated FetchResult fields.
430 let s = MockSource;
431 let profile = CapabilityProfile::from_env().expect("Phase 0 stub");
432 let r = Ref::Doi(Doi("10.1234/example".to_string()));
433 let (_td, ctx) = build_test_context();
434
435 let res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
436 assert_eq!(res.source, "mock");
437 assert_eq!(res.license, "unknown");
438 assert!(res.pdf_bytes.is_none());
439 assert!(res.final_url.is_none());
440 assert!(res.metadata_json.is_none());
441 }
442
443 #[test]
444 fn fetch_error_collapses_to_error_code() {
445 // Mirrors `docs/PUBLIC_API.md` §4 / PR #55 boundary collapse.
446 // Each variant must map to its documented code.
447 let e: ErrorCode = FetchError::NotEligible {
448 source_key: "mock".into(),
449 }
450 .into();
451 assert_eq!(e, ErrorCode::CapabilityDenied);
452
453 let e: ErrorCode = FetchError::NoOaAvailable.into();
454 assert_eq!(e, ErrorCode::NoOaAvailable);
455
456 let e: ErrorCode = FetchError::Http(HttpError::UnknownSource {
457 source_key: "mock".into(),
458 })
459 .into();
460 assert_eq!(e, ErrorCode::NetworkError);
461
462 // 404 / 410 / 451 from a metadata source are authoritative "id does
463 // not exist" → NotFound (network-independent), NOT NetworkError.
464 for status in [404u16, 410, 451] {
465 let e: ErrorCode = FetchError::Http(HttpError::HttpStatus {
466 status,
467 url: "https://api.crossref.org/works/10.5555/absent".into(),
468 })
469 .into();
470 assert_eq!(
471 e,
472 ErrorCode::NotFound,
473 "status {status} should map to NotFound"
474 );
475 }
476 // A non-HTTP authoritative absence (e.g. arXiv's empty Atom feed)
477 // also maps to NotFound.
478 let e: ErrorCode = FetchError::NotFound {
479 hint: "arxiv empty feed".into(),
480 }
481 .into();
482 assert_eq!(e, ErrorCode::NotFound);
483 // A transient upstream status (e.g. 503) stays NetworkError so
484 // `doiget verify` tolerates it rather than failing a live id.
485 let e: ErrorCode = FetchError::Http(HttpError::HttpStatus {
486 status: 503,
487 url: "https://api.crossref.org/works/10.5555/down".into(),
488 })
489 .into();
490 assert_eq!(e, ErrorCode::NetworkError);
491
492 let e: ErrorCode = FetchError::Log(LogError::Io(std::io::Error::other("synthetic"))).into();
493 assert_eq!(e, ErrorCode::LogError);
494
495 let e: ErrorCode = FetchError::InvalidRef(RefParseError::Empty).into();
496 assert_eq!(e, ErrorCode::InvalidRef);
497
498 let e: ErrorCode = FetchError::SourceSchema {
499 hint: "missing field 'license'".into(),
500 }
501 .into();
502 assert_eq!(e, ErrorCode::InternalError);
503
504 // Slice 2 — TooManyRefs collapses to INVALID_REF, NOT
505 // InternalError (the `#[non_exhaustive]` wildcard would
506 // otherwise misroute this to InternalError).
507 let e: ErrorCode = FetchError::TooManyRefs { got: 101, max: 100 }.into();
508 assert_eq!(e, ErrorCode::InvalidRef);
509
510 // #343 / ADR-0034 — SourceUnavailable shares the TextUnavailable wire
511 // code (representation missing; the PDF may be fetchable), distinct
512 // variant for a non-ar5iv message.
513 let arxiv = match Ref::parse("arxiv:2401.12345").expect("parse arxiv id") {
514 Ref::Arxiv(a) => a,
515 Ref::Doi(_) => unreachable!("parsed an arxiv id"),
516 };
517 let e: ErrorCode = FetchError::SourceUnavailable {
518 arxiv_id: arxiv,
519 kind: "figures",
520 }
521 .into();
522 assert_eq!(e, ErrorCode::TextUnavailable);
523 }
524
525 #[test]
526 fn fetch_context_debug_redacts_internals() {
527 // Pin the Debug shape — only `session_id` is printed, the rest is
528 // elided. Prevents accidental log leakage when a context is
529 // included in a `tracing::debug!` event.
530 let (_td, ctx) = build_test_context();
531 let s = format!("{:?}", ctx);
532 assert!(
533 s.contains("session_id"),
534 "session_id must be in Debug: {}",
535 s
536 );
537 assert!(s.contains("01J0000000000000000000TEST"));
538 assert!(
539 !s.contains("HttpClient") && !s.contains("RateLimiter") && !s.contains("ProvenanceLog"),
540 "FetchContext Debug must not dump foundation internals: {}",
541 s,
542 );
543 }
544
545 // ---------------------------------------------------------------
546 // FetchError -> Option<DenialContext> (ADR-0023 §4)
547 // ---------------------------------------------------------------
548
549 #[test]
550 fn denial_from_not_eligible_carries_source_key() {
551 use crate::{DenialContext, DenialReason};
552 let e = FetchError::NotEligible {
553 source_key: "tdm-elsevier".to_string(),
554 };
555 let dc: Option<DenialContext> = (&e).into();
556 let dc = dc.expect("NotEligible -> Some(DenialContext)");
557 assert_eq!(dc.reason, DenialReason::CapabilityNotGranted);
558 assert_eq!(dc.source.as_deref(), Some("tdm-elsevier"));
559 assert!(dc.attempted.is_none());
560 // Post-refinement: `expected: None` ("producer did not populate")
561 // rather than `Some(vec![])` ("explicit empty allowlist"). See
562 // `DenialContext::expected` field doc for the disambiguation.
563 assert!(dc.expected.is_none());
564 }
565
566 #[test]
567 fn denial_from_http_delegates_to_http_mapping() {
568 use crate::http::HttpError;
569 use crate::{DenialContext, DenialReason, PDF_MAX_BYTES};
570 // The Http arm must delegate to the HttpError mapping rather than
571 // reinventing it, so an OversizedBody surfaces with cap/actual
572 // populated and the SizeCapExceeded reason — proving delegation
573 // works without per-variant duplication.
574 let e = FetchError::Http(HttpError::OversizedBody {
575 actual: 209_715_200,
576 cap: PDF_MAX_BYTES,
577 });
578 let dc: Option<DenialContext> = (&e).into();
579 let dc = dc.expect("Http(OversizedBody) -> Some(DenialContext)");
580 assert_eq!(dc.reason, DenialReason::SizeCapExceeded);
581 assert_eq!(dc.cap, Some(PDF_MAX_BYTES));
582 assert_eq!(dc.actual, Some(209_715_200));
583 }
584
585 #[test]
586 fn denial_from_non_denial_variants_returns_none() {
587 use crate::DenialContext;
588 // Each of the four non-denial FetchError arms maps to None per
589 // ADR-0023 §4.
590 let e = FetchError::NoOaAvailable;
591 let dc: Option<DenialContext> = (&e).into();
592 assert!(dc.is_none(), "NoOaAvailable must not produce DenialContext");
593
594 let e = FetchError::Log(LogError::Io(std::io::Error::other("synthetic")));
595 let dc: Option<DenialContext> = (&e).into();
596 assert!(dc.is_none(), "Log must not produce DenialContext");
597
598 let e = FetchError::InvalidRef(RefParseError::Empty);
599 let dc: Option<DenialContext> = (&e).into();
600 assert!(dc.is_none(), "InvalidRef must not produce DenialContext");
601
602 let e = FetchError::SourceSchema {
603 hint: "missing field 'license'".into(),
604 };
605 let dc: Option<DenialContext> = (&e).into();
606 assert!(dc.is_none(), "SourceSchema must not produce DenialContext");
607 }
608}