Skip to main content

doiget_core/
source.rs

1//! Source abstraction. Each Tier 1/2/3 fetcher implements this trait.
2//!
3//! Binding spec: `docs/PUBLIC_API.md` §2 (trait surface),
4//! `docs/ARCHITECTURE.md` §6 (per-fetch data flow), and
5//! `docs/PROVENANCE_LOG.md` §3 (the `Fetch` row source impls emit).
6//!
7//! Phase 1 ships the trait + supporting types; concrete impls (Crossref,
8//! Unpaywall, arXiv) land in follow-up PRs (see `docs/SOURCES.md` for the
9//! source matrix and tiering).
10
11use std::sync::Arc;
12
13use async_trait::async_trait;
14use bytes::Bytes;
15use thiserror::Error;
16
17use crate::http::{HttpClient, HttpError};
18use crate::provenance::{LogError, ProvenanceLog};
19use crate::rate_limiter::RateLimiter;
20use crate::{CapabilityProfile, Ref, RefParseError};
21
22/// What a successful fetch returns to the caller.
23///
24/// Whether `pdf_bytes` is `None` depends on the source: metadata-only
25/// sources (Phase 4) leave it unset; OA sources (Phase 1) return PDF bytes
26/// when an OA URL was discovered.
27#[derive(Debug, Clone)]
28#[non_exhaustive]
29pub struct FetchResult {
30    /// Source's name (matches `Source::name()`); set for the audit trail.
31    pub source: String,
32    /// OA license string (`"CC-BY-4.0"`, `"unknown"`, etc.).
33    pub license: String,
34    /// PDF bytes; `None` for metadata-only sources.
35    pub pdf_bytes: Option<Bytes>,
36    /// Final URL after redirect resolution; useful for the metadata
37    /// `[doiget].url` field.
38    pub final_url: Option<url::Url>,
39    /// Source-side metadata payload as a serde_json value. The Source impl
40    /// is responsible for the shape; the caller (Phase 1+ orchestrator)
41    /// maps it into `Metadata` when one exists (Phase 1+).
42    pub metadata_json: Option<serde_json::Value>,
43}
44
45/// Per-fetch context shared by all `Source` impls.
46///
47/// Held by the orchestrator (CLI / MCP server) and passed by reference into
48/// each [`Source::fetch`]. Sources MUST NOT construct their own
49/// [`HttpClient`] / [`RateLimiter`] / [`ProvenanceLog`] — they go through
50/// this context for uniform politeness, redirect allowlisting, and audit
51/// logging.
52#[derive(Clone)]
53pub struct FetchContext {
54    /// Shared, allowlist-aware HTTP client. See [`HttpClient`].
55    pub http: Arc<HttpClient>,
56    /// Process-wide async rate limiter. See [`RateLimiter`].
57    pub rate_limiter: Arc<RateLimiter>,
58    /// Append-only, hash-chained provenance log. Source impls MUST emit
59    /// one `LogEvent::Fetch` row per attempt via `log.append`. See
60    /// [`ProvenanceLog`].
61    pub log: Arc<ProvenanceLog>,
62    /// 26-char ULID identifying this process invocation. Mirrors the
63    /// `session_id` stamped into every provenance row by the writer; held
64    /// here so source impls can include it in their own structured logs
65    /// without re-reading the env.
66    pub session_id: String,
67    /// Resolver cache root (`<cache_root>/resolver/<safekey>.toml`, see
68    /// `docs/CACHE.md` and [`crate::resolver_cache`]). `Some` enables the
69    /// metadata-only resolve cache (repeat resolves served from disk,
70    /// avoiding upstream rate limits); `None` disables it (tests, or a
71    /// caller that opts out). Only `metadata_only` consults it — per-PDF
72    /// fetches are never cached.
73    pub cache_root: Option<camino::Utf8PathBuf>,
74}
75
76impl std::fmt::Debug for FetchContext {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        // Avoid printing the full HTTP / rate-limiter / log internals; only
79        // the session_id is human-meaningful for log breadcrumbs.
80        f.debug_struct("FetchContext")
81            .field("session_id", &self.session_id)
82            .finish_non_exhaustive()
83    }
84}
85
86/// Errors returned by [`Source::fetch`].
87///
88/// At the public CLI / MCP boundary, every variant collapses to an
89/// [`crate::ErrorCode`] via the `From<FetchError>` impl below — mirroring
90/// the [`RefParseError`] → [`crate::ErrorCode::InvalidRef`] collapse from
91/// PR #55.
92#[derive(Debug, Error)]
93#[non_exhaustive]
94pub enum FetchError {
95    /// The source does not handle the given ref under the runtime
96    /// capability profile (covers both `can_serve = false` outcomes and
97    /// runtime denials raised inside `fetch`).
98    #[error("source {source_key} cannot serve this ref")]
99    NotEligible {
100        /// The source key that declined.
101        source_key: String,
102    },
103    /// Tier 1 sources reported no OA URL for this ref.
104    #[error("Tier 1 sources reported no OA URL for this ref")]
105    NoOaAvailable,
106    /// A metadata source authoritatively reported that the identifier does
107    /// not exist — distinct from a transport failure. Surfaces as
108    /// [`crate::ErrorCode::NotFound`]. Used for sources whose
109    /// "absent" signal is NOT an HTTP 404/410 (e.g. the arXiv Atom API
110    /// returns HTTP 200 with an empty `<feed>` for an unknown id).
111    #[error("identifier not found: {hint}")]
112    NotFound {
113        /// Human-readable detail (which source, and how it signalled
114        /// absence); not parsed.
115        hint: String,
116    },
117    /// A name filter (author / venue / publisher) matched MORE than one
118    /// OpenAlex entity with no clear winner. Carries a candidate listing
119    /// so the caller can narrow the name (or pass an explicit id).
120    /// Collapses to [`crate::ErrorCode::Ambiguous`] (wire `"AMBIGUOUS"`) —
121    /// distinct from `NotFound` so an agent narrows rather than gives up.
122    /// Used by [`crate::discovery`].
123    #[error("{hint}")]
124    Ambiguous {
125        /// Human-readable candidate listing; not parsed.
126        hint: String,
127    },
128    /// Underlying HTTP / network failure. See [`HttpError`].
129    #[error("network error: {0}")]
130    Http(#[from] HttpError),
131    /// Provenance log write failed. Per `docs/SECURITY.md` §1.8 this is a
132    /// fail-closed signal; the surrounding fetch MUST be aborted.
133    #[error("provenance log error: {0}")]
134    Log(#[from] LogError),
135    /// Ref re-parse / validation failed inside the source (e.g. when a
136    /// source receives a borrowed string from upstream and re-validates).
137    #[error("invalid ref: {0}")]
138    InvalidRef(#[from] RefParseError),
139    /// Source-side schema mismatch (unexpected JSON shape, missing
140    /// required field). Surfaces to [`crate::ErrorCode::InternalError`]
141    /// at the public boundary.
142    #[error("source-side schema error: {hint}")]
143    SourceSchema {
144        /// Human-readable hint at the offending field/path; not parsed.
145        hint: String,
146    },
147    /// Batch orchestrator received more refs than
148    /// [`crate::MAX_BATCH_REFS`]. Surfaced to the MCP `doiget_batch_fetch`
149    /// tool as `ErrorCode::InvalidRef` (closest closed-set fit — the
150    /// request shape itself is invalid; no `denial_context` channel
151    /// applies). Slice 2 / `docs/MCP_TOOLS.md` §1.
152    #[error("too many refs: got {got}, max {max}")]
153    TooManyRefs {
154        /// Number of refs the batch orchestrator was handed.
155        got: usize,
156        /// The hard cap ([`crate::MAX_BATCH_REFS`]).
157        max: usize,
158    },
159    /// A source returned a successful response that contained no usable
160    /// representation of the requested kind — currently `doiget text`'s
161    /// ar5iv leg returning a 200 with no extractable prose (the paper was
162    /// never converted to HTML). The identifier is valid; only this one
163    /// representation is missing. Surfaces as
164    /// [`crate::ErrorCode::TextUnavailable`] so an agent fetches the PDF
165    /// instead of concluding the reference is wrong (issue #302) — NOT
166    /// [`Self::NotFound`], which means the id itself does not exist.
167    #[error(
168        "no readable text for arXiv:{arxiv_id} (no ar5iv HTML render); \
169         the PDF may be fetchable instead"
170    )]
171    TextUnavailable {
172        /// The arXiv id whose ar5iv render was empty; echoed into the
173        /// human/MCP message so the actionable `doiget fetch <id>` hint is
174        /// self-contained. A validated [`crate::ArxivId`] (review #318) —
175        /// the id was already parsed, so the error cannot carry a malformed
176        /// string into the actionable `doiget fetch <id>` hint.
177        arxiv_id: crate::ArxivId,
178    },
179    /// A source returned a successful response that contained no file of the
180    /// requested kind for `doiget source` — a PDF-only / single-file
181    /// submission (no multi-file bundle), or `--figures-only` on a submission
182    /// with no image files. The identifier is valid; only the bundle / figure
183    /// representation is absent. Surfaces as
184    /// [`crate::ErrorCode::TextUnavailable`] (same "this representation is
185    /// missing; the PDF may be fetchable" class as [`Self::TextUnavailable`]),
186    /// but as a DISTINCT variant so the message is not ar5iv-specific
187    /// (issue #343 / ADR-0034; PR review).
188    #[error("no source files for arXiv:{arxiv_id} ({kind}); the PDF may be fetchable instead")]
189    SourceUnavailable {
190        /// The arXiv id whose source bundle / figures were absent.
191        arxiv_id: crate::ArxivId,
192        /// Which representation was requested: `"source bundle"` or `"figures"`.
193        kind: &'static str,
194    },
195}
196
197/// Map [`FetchError`] to the closed [`crate::ErrorCode`] set surfaced at
198/// the public CLI / MCP boundary. Mirrors the
199/// `From<RefParseError> for ErrorCode` collapse from PR #55.
200impl From<FetchError> for crate::ErrorCode {
201    fn from(e: FetchError) -> crate::ErrorCode {
202        crate::ErrorCode::from(&e)
203    }
204}
205
206/// Borrow-form of the collapse above, so a caller that still needs the
207/// error for its `Display` message / `denial_context` side-channel
208/// (notably the CLI human-persona renderer, issue #119) can obtain the
209/// closed code without consuming it. The owned impl delegates here so
210/// the mapping table lives in exactly one place.
211impl From<&FetchError> for crate::ErrorCode {
212    fn from(e: &FetchError) -> crate::ErrorCode {
213        match e {
214            FetchError::NotEligible { .. } => crate::ErrorCode::CapabilityDenied,
215            FetchError::NoOaAvailable => crate::ErrorCode::NoOaAvailable,
216            FetchError::NotFound { .. } => crate::ErrorCode::NotFound,
217            // A name filter that matched several entities is its own wire
218            // code so agents can distinguish "narrow the name" from
219            // "does not exist" (ADR-0031 D5).
220            FetchError::Ambiguous { .. } => crate::ErrorCode::Ambiguous,
221            // 404 / 410 / 451 are authoritative "this id does not exist"
222            // signals → `NotFound` (not retriable). 401 / 403 mean the
223            // server understood the request but denied access (IP block, auth
224            // required) — `CapabilityDenied` lets agents distinguish access
225            // denial from a transient connectivity failure. Everything else
226            // is treated as transient.
227            FetchError::Http(HttpError::HttpStatus {
228                status: 404 | 410 | 451,
229                ..
230            }) => crate::ErrorCode::NotFound,
231            FetchError::Http(HttpError::HttpStatus {
232                status: 401 | 403, ..
233            }) => crate::ErrorCode::CapabilityDenied,
234            FetchError::Http(_) => crate::ErrorCode::NetworkError,
235            FetchError::Log(_) => crate::ErrorCode::LogError,
236            FetchError::InvalidRef(_) => crate::ErrorCode::InvalidRef,
237            FetchError::SourceSchema { .. } => crate::ErrorCode::InternalError,
238            // Slice 2: a too-large batch is a request-shape failure, so
239            // collapse to `INVALID_REF` (closest closed-set fit). The
240            // `#[non_exhaustive]` wildcard below would otherwise route
241            // it to `INTERNAL_ERROR`, which would mislead agents.
242            FetchError::TooManyRefs { .. } => crate::ErrorCode::InvalidRef,
243            // The id resolved; only the ar5iv text representation is
244            // missing. Its own code so an agent fetches the PDF rather
245            // than conclude the reference is wrong (issue #302).
246            FetchError::TextUnavailable { .. } => crate::ErrorCode::TextUnavailable,
247            // The id resolved; only the source-bundle / figure representation
248            // is absent. Same wire code as TextUnavailable (representation
249            // missing → fetch the PDF), distinct variant for a correct message.
250            FetchError::SourceUnavailable { .. } => crate::ErrorCode::TextUnavailable,
251        }
252    }
253}
254
255/// Map a [`FetchError`] reference to the structured [`crate::DenialContext`]
256/// channel introduced by ADR-0023 §4.
257///
258/// `&FetchError` (rather than `FetchError`) so the orchestrator can
259/// produce the structured side-channel without consuming the error it
260/// still needs for `error.message` and the `From<FetchError> for
261/// ErrorCode` collapse above. The `Http` arm delegates to the
262/// `From<&HttpError> for Option<DenialContext>` impl in [`crate::http`].
263impl From<&FetchError> for Option<crate::DenialContext> {
264    fn from(e: &FetchError) -> Self {
265        use crate::{DenialContext, DenialReason};
266        match e {
267            FetchError::NotEligible { source_key } => Some(DenialContext {
268                reason: DenialReason::CapabilityNotGranted,
269                source: Some(source_key.clone()),
270                attempted: None,
271                // CapabilityNotGranted has no allowlist channel: the
272                // producer leaves `expected` at `None` (NOT `Some(vec![])`).
273                // See `DenialContext::expected` for the disambiguation.
274                expected: None,
275                hop_index: None,
276                cap: None,
277                actual: None,
278            }),
279            // Delegate to the HttpError mapping (ADR-0023 §4 mapping table).
280            FetchError::Http(http_err) => http_err.into(),
281            // Non-denial variants map to None per ADR-0023 §4. (Slice 2:
282            // `TooManyRefs` is a request-shape failure, not a denial —
283            // adding it to the None arm keeps the mapping table consistent.)
284            FetchError::NoOaAvailable
285            | FetchError::NotFound { .. }
286            | FetchError::Ambiguous { .. }
287            | FetchError::Log(_)
288            | FetchError::InvalidRef(_)
289            | FetchError::SourceSchema { .. }
290            | FetchError::TooManyRefs { .. }
291            | FetchError::TextUnavailable { .. }
292            | FetchError::SourceUnavailable { .. } => None,
293        }
294    }
295}
296
297/// The trait implemented by every Tier 1 / 2 / 3 fetcher.
298///
299/// Binding signature: `docs/PUBLIC_API.md` §2 (NORMATIVE — the wire shape
300/// of these three methods is semver-locked).
301#[async_trait]
302pub trait Source: Send + Sync {
303    /// Stable name used in metadata (`[doiget].source`) and provenance
304    /// rows. Conventional values: `"crossref"`, `"unpaywall"`, `"arxiv"`,
305    /// `"openalex"`, `"semantic-scholar"`, `"doaj"`, `"tdm-elsevier"`,
306    /// etc. (see `docs/SOURCES.md`).
307    fn name(&self) -> &str;
308
309    /// True if this source can plausibly serve the given ref under the
310    /// runtime capability profile. Implementations MUST be fast and
311    /// non-blocking; the orchestrator calls `can_serve` to decide whether
312    /// to invoke `fetch` at all.
313    fn can_serve(&self, profile: &CapabilityProfile, ref_: &Ref) -> bool;
314
315    /// Perform the source-specific fetch.
316    ///
317    /// Implementations:
318    ///   1. acquire `ctx.rate_limiter.acquire(self.name()).await`,
319    ///   2. fetch via `ctx.http.fetch_bytes` / `ctx.http.fetch_pdf`,
320    ///   3. emit one `LogEvent::Fetch` row via `ctx.log.append`,
321    ///   4. return a [`FetchResult`].
322    ///
323    /// The trait does NOT enforce these steps; it documents the protocol
324    /// so concrete impls produce uniform audit trails (per
325    /// `docs/ARCHITECTURE.md` §6 and `docs/PROVENANCE_LOG.md` §3).
326    async fn fetch(
327        &self,
328        ref_: &Ref,
329        profile: &CapabilityProfile,
330        ctx: &FetchContext,
331    ) -> Result<FetchResult, FetchError>;
332}
333
334// ---------------------------------------------------------------------------
335// Tests
336// ---------------------------------------------------------------------------
337
338#[cfg(test)]
339#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
340mod tests {
341    use super::*;
342
343    use camino::Utf8PathBuf;
344    use tempfile::TempDir;
345
346    use crate::http::{tier_1_allowlist, HttpClient};
347    use crate::provenance::ProvenanceLog;
348    use crate::rate_limiter::RateLimiter;
349    use crate::{CapabilityProfile, Doi, ErrorCode, RateLimits, Ref};
350
351    /// Minimal `Source` impl exercised purely to pin the trait shape and
352    /// verify dispatch through `Box<dyn Source>`. Concrete sources land in
353    /// follow-up PRs (Crossref / Unpaywall / arXiv).
354    struct MockSource;
355
356    #[async_trait]
357    impl Source for MockSource {
358        fn name(&self) -> &str {
359            "mock"
360        }
361        fn can_serve(&self, _: &CapabilityProfile, _: &Ref) -> bool {
362            true
363        }
364        async fn fetch(
365            &self,
366            _: &Ref,
367            _: &CapabilityProfile,
368            _: &FetchContext,
369        ) -> Result<FetchResult, FetchError> {
370            Ok(FetchResult {
371                source: "mock".into(),
372                license: "unknown".into(),
373                pdf_bytes: None,
374                final_url: None,
375                metadata_json: None,
376            })
377        }
378    }
379
380    /// Build a `FetchContext` backed by real (but inert) Round-A
381    /// foundation modules: a `HttpClient` over the Tier-1 allowlist, a
382    /// `RateLimiter` at hard-coded politeness, and a `ProvenanceLog` in
383    /// a tempdir. Returns the dir as well so the caller keeps it alive
384    /// for the duration of the test.
385    fn build_test_context() -> (TempDir, FetchContext) {
386        let td = TempDir::new().expect("tempdir");
387        // Workspace lints ban `std::path::PathBuf` for log paths; convert
388        // via camino's `Utf8PathBuf::try_from`.
389        let log_dir =
390            Utf8PathBuf::try_from(td.path().to_path_buf()).expect("temp dir path must be UTF-8");
391        let log_path = log_dir.join("test.jsonl");
392
393        let http = Arc::new(HttpClient::new(tier_1_allowlist()).expect("http client builds"));
394        let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
395        let session_id = "01J0000000000000000000TEST".to_string();
396        let log = Arc::new(
397            ProvenanceLog::open(log_path, session_id.clone()).expect("provenance log opens"),
398        );
399
400        (
401            td,
402            FetchContext {
403                http,
404                rate_limiter,
405                log,
406                session_id,
407                cache_root: None,
408            },
409        )
410    }
411
412    #[tokio::test]
413    async fn mock_source_compiles_as_trait_object() {
414        // Trait-shape pin: a `Source` impl is dyn-safe and can be boxed.
415        let s: Box<dyn Source> = Box::new(MockSource);
416        assert_eq!(s.name(), "mock");
417        let profile = CapabilityProfile::from_env().expect("Phase 0 stub");
418        let r = Ref::Doi(Doi("10.1234/example".to_string()));
419        assert!(s.can_serve(&profile, &r));
420
421        let (_td, ctx) = build_test_context();
422        let res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
423        assert_eq!(res.source, "mock");
424    }
425
426    #[tokio::test]
427    async fn mock_source_fetch_returns_result() {
428        // Direct dispatch (not through `dyn`) to exercise the async fn
429        // body and assert the populated FetchResult fields.
430        let s = MockSource;
431        let profile = CapabilityProfile::from_env().expect("Phase 0 stub");
432        let r = Ref::Doi(Doi("10.1234/example".to_string()));
433        let (_td, ctx) = build_test_context();
434
435        let res = s.fetch(&r, &profile, &ctx).await.expect("fetch ok");
436        assert_eq!(res.source, "mock");
437        assert_eq!(res.license, "unknown");
438        assert!(res.pdf_bytes.is_none());
439        assert!(res.final_url.is_none());
440        assert!(res.metadata_json.is_none());
441    }
442
443    #[test]
444    fn fetch_error_collapses_to_error_code() {
445        // Mirrors `docs/PUBLIC_API.md` §4 / PR #55 boundary collapse.
446        // Each variant must map to its documented code.
447        let e: ErrorCode = FetchError::NotEligible {
448            source_key: "mock".into(),
449        }
450        .into();
451        assert_eq!(e, ErrorCode::CapabilityDenied);
452
453        let e: ErrorCode = FetchError::NoOaAvailable.into();
454        assert_eq!(e, ErrorCode::NoOaAvailable);
455
456        let e: ErrorCode = FetchError::Http(HttpError::UnknownSource {
457            source_key: "mock".into(),
458        })
459        .into();
460        assert_eq!(e, ErrorCode::NetworkError);
461
462        // 404 / 410 / 451 from a metadata source are authoritative "id does
463        // not exist" → NotFound (network-independent), NOT NetworkError.
464        for status in [404u16, 410, 451] {
465            let e: ErrorCode = FetchError::Http(HttpError::HttpStatus {
466                status,
467                url: "https://api.crossref.org/works/10.5555/absent".into(),
468            })
469            .into();
470            assert_eq!(
471                e,
472                ErrorCode::NotFound,
473                "status {status} should map to NotFound"
474            );
475        }
476        // A non-HTTP authoritative absence (e.g. arXiv's empty Atom feed)
477        // also maps to NotFound.
478        let e: ErrorCode = FetchError::NotFound {
479            hint: "arxiv empty feed".into(),
480        }
481        .into();
482        assert_eq!(e, ErrorCode::NotFound);
483        // A transient upstream status (e.g. 503) stays NetworkError so
484        // `doiget verify` tolerates it rather than failing a live id.
485        let e: ErrorCode = FetchError::Http(HttpError::HttpStatus {
486            status: 503,
487            url: "https://api.crossref.org/works/10.5555/down".into(),
488        })
489        .into();
490        assert_eq!(e, ErrorCode::NetworkError);
491
492        let e: ErrorCode = FetchError::Log(LogError::Io(std::io::Error::other("synthetic"))).into();
493        assert_eq!(e, ErrorCode::LogError);
494
495        let e: ErrorCode = FetchError::InvalidRef(RefParseError::Empty).into();
496        assert_eq!(e, ErrorCode::InvalidRef);
497
498        let e: ErrorCode = FetchError::SourceSchema {
499            hint: "missing field 'license'".into(),
500        }
501        .into();
502        assert_eq!(e, ErrorCode::InternalError);
503
504        // Slice 2 — TooManyRefs collapses to INVALID_REF, NOT
505        // InternalError (the `#[non_exhaustive]` wildcard would
506        // otherwise misroute this to InternalError).
507        let e: ErrorCode = FetchError::TooManyRefs { got: 101, max: 100 }.into();
508        assert_eq!(e, ErrorCode::InvalidRef);
509
510        // #343 / ADR-0034 — SourceUnavailable shares the TextUnavailable wire
511        // code (representation missing; the PDF may be fetchable), distinct
512        // variant for a non-ar5iv message.
513        let arxiv = match Ref::parse("arxiv:2401.12345").expect("parse arxiv id") {
514            Ref::Arxiv(a) => a,
515            Ref::Doi(_) => unreachable!("parsed an arxiv id"),
516        };
517        let e: ErrorCode = FetchError::SourceUnavailable {
518            arxiv_id: arxiv,
519            kind: "figures",
520        }
521        .into();
522        assert_eq!(e, ErrorCode::TextUnavailable);
523    }
524
525    #[test]
526    fn fetch_context_debug_redacts_internals() {
527        // Pin the Debug shape — only `session_id` is printed, the rest is
528        // elided. Prevents accidental log leakage when a context is
529        // included in a `tracing::debug!` event.
530        let (_td, ctx) = build_test_context();
531        let s = format!("{:?}", ctx);
532        assert!(
533            s.contains("session_id"),
534            "session_id must be in Debug: {}",
535            s
536        );
537        assert!(s.contains("01J0000000000000000000TEST"));
538        assert!(
539            !s.contains("HttpClient") && !s.contains("RateLimiter") && !s.contains("ProvenanceLog"),
540            "FetchContext Debug must not dump foundation internals: {}",
541            s,
542        );
543    }
544
545    // ---------------------------------------------------------------
546    // FetchError -> Option<DenialContext>  (ADR-0023 §4)
547    // ---------------------------------------------------------------
548
549    #[test]
550    fn denial_from_not_eligible_carries_source_key() {
551        use crate::{DenialContext, DenialReason};
552        let e = FetchError::NotEligible {
553            source_key: "tdm-elsevier".to_string(),
554        };
555        let dc: Option<DenialContext> = (&e).into();
556        let dc = dc.expect("NotEligible -> Some(DenialContext)");
557        assert_eq!(dc.reason, DenialReason::CapabilityNotGranted);
558        assert_eq!(dc.source.as_deref(), Some("tdm-elsevier"));
559        assert!(dc.attempted.is_none());
560        // Post-refinement: `expected: None` ("producer did not populate")
561        // rather than `Some(vec![])` ("explicit empty allowlist"). See
562        // `DenialContext::expected` field doc for the disambiguation.
563        assert!(dc.expected.is_none());
564    }
565
566    #[test]
567    fn denial_from_http_delegates_to_http_mapping() {
568        use crate::http::HttpError;
569        use crate::{DenialContext, DenialReason, PDF_MAX_BYTES};
570        // The Http arm must delegate to the HttpError mapping rather than
571        // reinventing it, so an OversizedBody surfaces with cap/actual
572        // populated and the SizeCapExceeded reason — proving delegation
573        // works without per-variant duplication.
574        let e = FetchError::Http(HttpError::OversizedBody {
575            actual: 209_715_200,
576            cap: PDF_MAX_BYTES,
577        });
578        let dc: Option<DenialContext> = (&e).into();
579        let dc = dc.expect("Http(OversizedBody) -> Some(DenialContext)");
580        assert_eq!(dc.reason, DenialReason::SizeCapExceeded);
581        assert_eq!(dc.cap, Some(PDF_MAX_BYTES));
582        assert_eq!(dc.actual, Some(209_715_200));
583    }
584
585    #[test]
586    fn denial_from_non_denial_variants_returns_none() {
587        use crate::DenialContext;
588        // Each of the four non-denial FetchError arms maps to None per
589        // ADR-0023 §4.
590        let e = FetchError::NoOaAvailable;
591        let dc: Option<DenialContext> = (&e).into();
592        assert!(dc.is_none(), "NoOaAvailable must not produce DenialContext");
593
594        let e = FetchError::Log(LogError::Io(std::io::Error::other("synthetic")));
595        let dc: Option<DenialContext> = (&e).into();
596        assert!(dc.is_none(), "Log must not produce DenialContext");
597
598        let e = FetchError::InvalidRef(RefParseError::Empty);
599        let dc: Option<DenialContext> = (&e).into();
600        assert!(dc.is_none(), "InvalidRef must not produce DenialContext");
601
602        let e = FetchError::SourceSchema {
603            hint: "missing field 'license'".into(),
604        };
605        let dc: Option<DenialContext> = (&e).into();
606        assert!(dc.is_none(), "SourceSchema must not produce DenialContext");
607    }
608}