Skip to main content

doiget_cli/commands/
fetch.rs

1//! `doiget fetch <ref>` subcommand.
2//!
3//! Phase 1 scope:
4//!
5//! - **arXiv refs** — full end-to-end: PDF bytes are fetched via the
6//!   `doiget_core::sources::arxiv::ArxivSource`, the `[doiget]`
7//!   extension table is populated with the resolved license, source,
8//!   size, and `fetched_at`, and the result is written to the on-disk
9//!   store with both the metadata TOML and the PDF.
10//! - **DOI refs** — Crossref metadata + Unpaywall license enrichment + an
11//!   OA PDF fetch when Unpaywall's `best_oa_location.url_for_pdf` (or
12//!   `best_oa_location.url`) resolves to a host on the synthetic
13//!   `"oa-publisher"` allowlist (`docs/REDIRECT_ALLOWLIST.md` §3). The OA
14//!   URL host check is informed-best-effort; if the host is not on the
15//!   allowlist or the body fails the magic-byte check, the orchestrator
16//!   logs a `Fetch err` row under `source = "oa-publisher"` and falls back
17//!   to metadata-only success — the metadata is still useful.
18//!
19//! ## Provenance contract
20//!
21//! Per `docs/PROVENANCE_LOG.md` §3, every invocation emits at least one
22//! `SessionStart`, one or more `Fetch` rows (one per source consulted), one
23//! `StoreWrite` row on success, and one `SessionEnd`. Each `Fetch` row is
24//! appended by the underlying `Source` impl; the orchestrator owns the
25//! session-bookend rows and the `StoreWrite` row.
26//!
27//! ## Configuration surface
28//!
29//! Hard-coded paths with env-var overrides; full `config.toml` plumbing
30//! arrives in a follow-up. See `docs/CONFIG.md` for the eventual surface.
31//!
32//! | Env var | Default | Purpose |
33//! |---|---|---|
34//! | `DOIGET_STORE_ROOT` | `$HOME/papers` (or `%USERPROFILE%\papers` on Windows) | Filesystem store root |
35//! | `DOIGET_LOG_PATH` | `<config>/doiget/access.jsonl` | Provenance log file |
36//! | `DOIGET_CONTACT_EMAIL` | `doiget@localhost` | Polite-pool contact email (User-Agent and Crossref) |
37//! | `DOIGET_UNPAYWALL_EMAIL` | (= contact email) | Unpaywall query-string email |
38//! | `DOIGET_ARXIV_BASE` | `https://arxiv.org` | arXiv source base (test override) |
39//! | `DOIGET_CROSSREF_BASE` | `https://api.crossref.org` | Crossref source base (test override) |
40//! | `DOIGET_UNPAYWALL_BASE` | `https://api.unpaywall.org/v2` | Unpaywall source base (test override) |
41//! | `DOIGET_OA_PUBLISHER_BASE` | (production allowlist) | OA publisher host allowlist override (test override) |
42
43use std::sync::Arc;
44
45use anyhow::{anyhow, Context, Result};
46use camino::Utf8PathBuf;
47
48#[cfg(feature = "citation")]
49use doiget_core::http::tier_2_allowlist;
50use doiget_core::http::{oa_publisher_allowlist, tier_1_allowlist, HttpClient};
51use doiget_core::orchestrator::{fetch_paper as core_fetch_paper, FetchPaperOutcome};
52use doiget_core::provenance::{Capability, LogEvent, LogResult, ProvenanceLog, RowInput};
53use doiget_core::rate_limiter::RateLimiter;
54use doiget_core::source::FetchContext;
55use doiget_core::store::FsStore;
56use doiget_core::{CapabilityProfile, RateLimits, Ref};
57
58/// Defer to docs/PROVENANCE_LOG.md §3: 26-char ULID per process invocation.
59fn new_session_id() -> String {
60    ulid::Ulid::new().to_string()
61}
62
63// ---------------------------------------------------------------------------
64// Dry-run plan / preview (ADR-0022)
65// ---------------------------------------------------------------------------
66
67// The structured `FetchPlan` shape, the `build_fetch_plan` builder, and
68// the `build_dry_run_envelope` JSON-shape helper live in `doiget-core`
69// so the MCP server can produce a bit-identical envelope without
70// depending on `doiget-cli`. The CLI re-exports them here for callers
71// that already `use doiget_cli::commands::fetch`.
72pub use doiget_core::dry_run::{
73    build_dry_run_envelope, build_fetch_plan, FetchPlan, PdfSourcePlan, RateLimitBudget,
74};
75
76/// Serialize the dry-run envelope and write it to stdout. Used by the
77/// `--dry-run` flag on `doiget fetch` and `doiget batch`. The envelope
78/// shape matches ADR-0022 §1 / `docs/MCP_TOOLS.md` §10.
79///
80/// `pub` so `commands::batch` (multi-ref dry-run) can reuse it. The
81/// function lives in `doiget-cli` (not `doiget-core`) because `println!`
82/// is a CLI concern; the MCP server uses [`build_dry_run_envelope`]
83/// directly and routes the bytes via JSON-RPC.
84///
85/// `print_stdout` is workspace-deny for MCP stdio safety (ADR-0001 /
86/// `docs/SECURITY.md` §3); `--dry-run` is a CLI-only path that never
87/// runs under the MCP server, so the localized `#[allow]` is the
88/// minimal intervention — same pattern used by `commands::config`,
89/// `commands::info`, etc.
90#[allow(clippy::print_stdout)]
91pub fn emit_dry_run_plan_to_stdout(ref_: &Ref, plan: &FetchPlan) -> Result<()> {
92    let envelope = build_dry_run_envelope(ref_, plan);
93    let s = serde_json::to_string(&envelope).context("serializing dry-run envelope to JSON")?;
94    println!("{s}");
95    Ok(())
96}
97
98/// Resolve the provenance log path. `DOIGET_LOG_PATH` wins; otherwise
99/// fall back to `<config>/doiget/access.jsonl` per `docs/PROVENANCE_LOG.md`
100/// §1.
101fn resolve_log_path() -> Result<Utf8PathBuf> {
102    if let Some(s) = read_env_utf8("DOIGET_LOG_PATH")? {
103        return Ok(Utf8PathBuf::from(s));
104    }
105    let cfg = config_dir_utf8()?;
106    Ok(cfg.join("doiget").join("access.jsonl"))
107}
108
109/// Read an env var and assert it is valid UTF-8. Returns `Ok(None)` if
110/// unset; `Ok(Some(s))` if set and UTF-8; `Err(...)` if set but non-UTF-8.
111/// `std::env::var` already requires UTF-8 (returns `VarError::NotUnicode`
112/// otherwise); we wrap it to surface a friendlier error and avoid the
113/// banned `std::path::PathBuf` round-trip.
114fn read_env_utf8(key: &str) -> Result<Option<String>> {
115    match std::env::var(key) {
116        Ok(s) => Ok(Some(s)),
117        Err(std::env::VarError::NotPresent) => Ok(None),
118        Err(std::env::VarError::NotUnicode(_)) => Err(anyhow!("{key} is not valid UTF-8")),
119    }
120}
121
122/// Best-effort home-dir resolution without depending on the `dirs` crate
123/// (every new dep adds cargo-vet exemption churn). Honors `HOME` first
124/// (POSIX + most CI), then `USERPROFILE` (Windows).
125fn home_dir_utf8() -> Result<Utf8PathBuf> {
126    if let Some(s) = read_env_utf8("HOME")? {
127        return Ok(Utf8PathBuf::from(s));
128    }
129    if let Some(s) = read_env_utf8("USERPROFILE")? {
130        return Ok(Utf8PathBuf::from(s));
131    }
132    Err(anyhow!("neither HOME nor USERPROFILE is set"))
133}
134
135/// Best-effort config-dir resolution. Honors `XDG_CONFIG_HOME` first
136/// (POSIX), then `APPDATA` (Windows), then falls back to `$HOME/.config`.
137fn config_dir_utf8() -> Result<Utf8PathBuf> {
138    if let Some(s) = read_env_utf8("XDG_CONFIG_HOME")? {
139        return Ok(Utf8PathBuf::from(s));
140    }
141    if let Some(s) = read_env_utf8("APPDATA")? {
142        return Ok(Utf8PathBuf::from(s));
143    }
144    let home = home_dir_utf8()?;
145    Ok(home.join(".config"))
146}
147
148/// Construct the workspace-wide [`HttpClient`].
149///
150/// Production path: `HttpClient::new(tier_1_allowlist() ∪ oa_publisher_allowlist())` —
151/// strict HTTPS-only with the canonical Tier-1 redirect allowlist (Crossref,
152/// Unpaywall, arXiv) plus the synthetic `"oa-publisher"` allowlist used for
153/// the OA PDF leg of the DOI fetch path (`fetch_doi` issues
154/// `HttpClient::fetch_pdf("oa-publisher", url)` against the URL Unpaywall
155/// returned in `best_oa_location`). The OA-publisher list is
156/// informed-best-effort per `docs/REDIRECT_ALLOWLIST.md` §3.
157///
158/// Test path: when any of the three `DOIGET_*_BASE` env vars is set, build a
159/// multi-source relaxed-`https_only` client whose per-source allowlist is
160/// derived from the corresponding env-var hosts. The `oa-publisher` source
161/// key is registered against the same host (typically the wiremock origin)
162/// when `DOIGET_OA_PUBLISHER_BASE` is set — this lets the integration tests
163/// under `tests/fetch_doi_oa_pdf_e2e.rs` exercise the full PDF leg without
164/// touching the real network.
165fn build_http_client() -> Result<HttpClient> {
166    let arxiv = std::env::var("DOIGET_ARXIV_BASE").ok();
167    let crossref = std::env::var("DOIGET_CROSSREF_BASE").ok();
168    let unpaywall = std::env::var("DOIGET_UNPAYWALL_BASE").ok();
169    let oa_publisher = std::env::var("DOIGET_OA_PUBLISHER_BASE").ok();
170    // Slice 16: `DOIGET_OPENALEX_BASE` selects a wiremock host for the
171    // citation-graph BFS. Only meaningful with `--features citation`,
172    // but reading the env unconditionally keeps the branch logic
173    // simple and is harmless for default builds.
174    let openalex_base = std::env::var("DOIGET_OPENALEX_BASE").ok();
175
176    if arxiv.is_none()
177        && crossref.is_none()
178        && unpaywall.is_none()
179        && oa_publisher.is_none()
180        && openalex_base.is_none()
181    {
182        let mut allowlists = tier_1_allowlist();
183        allowlists.extend(oa_publisher_allowlist());
184        // Slice 16: when the `citation` feature is compiled in, the
185        // graph subcommand walks OpenAlex Work IDs via
186        // `ctx.http.fetch_bytes("openalex", ...)`. The Tier 2
187        // allowlist registers the `api.openalex.org` host under
188        // that source key. CapabilityProfile.metadata.openalex is
189        // the runtime gate; the allowlist is the transport gate.
190        #[cfg(feature = "citation")]
191        allowlists.extend(tier_2_allowlist());
192        return HttpClient::new(allowlists).context("building HTTP client");
193    }
194
195    // Test-base mode: build a relaxed client per overridden source.
196    let mut owned: Vec<(String, String)> = Vec::new();
197    for (source, base) in [
198        ("arxiv", arxiv.as_deref()),
199        ("crossref", crossref.as_deref()),
200        ("unpaywall", unpaywall.as_deref()),
201        ("oa-publisher", oa_publisher.as_deref()),
202        ("openalex", openalex_base.as_deref()),
203    ] {
204        if let Some(b) = base {
205            let url = url::Url::parse(b)
206                .with_context(|| format!("DOIGET_*_BASE for {source} is not a URL: {b}"))?;
207            let host = url
208                .host_str()
209                .ok_or_else(|| anyhow!("base URL has no host: {b}"))?;
210            owned.push((source.to_string(), host.to_string()));
211        }
212    }
213    let entries: Vec<(&str, &str)> = owned
214        .iter()
215        .map(|(s, h)| (s.as_str(), h.as_str()))
216        .collect();
217    Ok(HttpClient::new_for_tests_allow_http_multi(&entries))
218}
219
220// Slice 2: the per-source env-aware constructors that used to live here
221// (`build_arxiv_source`, `build_crossref_source`, `build_unpaywall_source`)
222// moved into `doiget-core::orchestrator` so the core `fetch_paper`
223// orchestrator and the MCP server both honor the same `DOIGET_*_BASE`
224// test-override surface. The CLI no longer constructs sources directly —
225// it builds the `FetchContext` + `FsStore` and hands them to the core
226// orchestrator.
227
228/// Resolved configuration derived from the environment.
229///
230/// Slice 2: `contact_email` / `unpaywall_email` are now read by the
231/// `doiget-core::orchestrator::fetch_paper` orchestrator directly from
232/// the env (`contact_email_from_env` / `unpaywall_email_from_env` in
233/// that module), so the CLI no longer threads them through. The fields
234/// stay here so a future slice that adds CLI-flag overrides has a
235/// natural attachment point — the `#[allow(dead_code)]` is the minimal
236/// intervention until that slice lands.
237#[allow(dead_code)]
238pub(crate) struct OrchestratorConfig {
239    pub(crate) store_root: Utf8PathBuf,
240    pub(crate) log_path: Utf8PathBuf,
241    pub(crate) contact_email: String,
242    pub(crate) unpaywall_email: String,
243}
244
245impl OrchestratorConfig {
246    fn from_env() -> Result<Self> {
247        let store_root = super::resolve_store_root()?;
248        let log_path = resolve_log_path()?;
249        let contact_email =
250            std::env::var("DOIGET_CONTACT_EMAIL").unwrap_or_else(|_| "doiget@localhost".into());
251        let unpaywall_email =
252            std::env::var("DOIGET_UNPAYWALL_EMAIL").unwrap_or_else(|_| contact_email.clone());
253        Ok(Self {
254            store_root,
255            log_path,
256            contact_email,
257            unpaywall_email,
258        })
259    }
260}
261
262/// Reusable fetch harness shared by `doiget fetch <ref>` (single ref) and
263/// `doiget batch <path>` (many refs). Owns the shared foundation modules
264/// (`HttpClient` / `RateLimiter` / `ProvenanceLog`), the on-disk store, and
265/// the resolved capability profile, plus the session bookkeeping required by
266/// `docs/PROVENANCE_LOG.md` §3 (the 26-char ULID `session_id`).
267///
268/// Construction is performed once via [`FetchHarness::from_env`]. Per-ref
269/// orchestration runs through [`FetchHarness::fetch_one`]; bookend rows go
270/// via [`FetchHarness::log_session_start`] / [`FetchHarness::log_session_end`]
271/// so the orchestrator can frame either one fetch or many.
272pub(crate) struct FetchHarness {
273    pub(crate) http: Arc<HttpClient>,
274    pub(crate) rate_limiter: Arc<RateLimiter>,
275    pub(crate) log: Arc<ProvenanceLog>,
276    pub(crate) store: FsStore,
277    pub(crate) profile: CapabilityProfile,
278    pub(crate) session_id: String,
279    /// Resolved config; Slice 2 keeps this on the harness for the
280    /// CLI-only env diagnostics path (`commands::config::doctor`), even
281    /// though `fetch_one` no longer needs it (the core orchestrator
282    /// re-reads contact email from env directly).
283    #[allow(dead_code)]
284    pub(crate) cfg: OrchestratorConfig,
285}
286
287impl FetchHarness {
288    /// Build a harness from the same env-var surface documented at the top
289    /// of this module. Creates the log parent directory if missing, opens
290    /// the provenance log (allocating a fresh `session_id`), and constructs
291    /// the HTTP client honoring `DOIGET_*_BASE` overrides for tests.
292    pub(crate) fn from_env() -> Result<Self> {
293        let cfg = OrchestratorConfig::from_env()?;
294        if let Some(parent) = cfg.log_path.parent() {
295            if !parent.as_str().is_empty() {
296                std::fs::create_dir_all(parent.as_std_path())
297                    .with_context(|| format!("creating log dir {parent}"))?;
298            }
299        }
300        let session_id = new_session_id();
301        let log = Arc::new(
302            ProvenanceLog::open(cfg.log_path.clone(), session_id.clone())
303                .context("opening provenance log")?,
304        );
305        let http = Arc::new(build_http_client()?);
306        let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
307        let store = FsStore::new(cfg.store_root.clone()).context("opening store")?;
308        let profile = CapabilityProfile::from_env().context("resolving capability profile")?;
309
310        Ok(Self {
311            http,
312            rate_limiter,
313            log,
314            store,
315            profile,
316            session_id,
317            cfg,
318        })
319    }
320
321    /// Build a [`FetchContext`] view over this harness's foundation modules.
322    /// Creating one is cheap (cloning three `Arc`s + a `String`); per-ref
323    /// orchestration constructs one on demand.
324    pub(crate) fn fetch_context(&self) -> FetchContext {
325        FetchContext {
326            http: self.http.clone(),
327            rate_limiter: self.rate_limiter.clone(),
328            log: self.log.clone(),
329            session_id: self.session_id.clone(),
330        }
331    }
332
333    /// Append a `SessionStart` row. `ref_input` is the raw user-supplied ref
334    /// string (single-fetch path); pass `None` for batch sessions where no
335    /// single ref attributes the session.
336    pub(crate) fn log_session_start(&self, ref_input: Option<&str>) -> Result<()> {
337        self.log
338            .append(RowInput {
339                event: LogEvent::SessionStart,
340                result: LogResult::Ok,
341                capability: Capability::Oa,
342                ref_: ref_input,
343                source: None,
344                error_code: None,
345                size_bytes: None,
346                license: None,
347                store_path: None,
348                // Session bookend — no audit identity (ADR-0021 §1).
349                canonical_digest: None,
350            })
351            .context("appending SessionStart row")?;
352        Ok(())
353    }
354
355    /// Append a `SessionEnd` row. `ref_input` mirrors the `log_session_start`
356    /// argument; pass `None` for batch sessions. The result is best-effort —
357    /// if this append fails, the caller already has the underlying fetch
358    /// error (if any) and we don't override it.
359    pub(crate) fn log_session_end(&self, ok: bool, ref_input: Option<&str>) {
360        let result = if ok { LogResult::Ok } else { LogResult::Err };
361        let _ = self.log.append(RowInput {
362            event: LogEvent::SessionEnd,
363            result,
364            capability: Capability::Oa,
365            ref_: ref_input,
366            source: None,
367            error_code: None,
368            size_bytes: None,
369            license: None,
370            store_path: None,
371            // Session bookend — no audit identity (ADR-0021 §1).
372            canonical_digest: None,
373        });
374    }
375
376    /// Run a single ref through the per-kind orchestration (arxiv → PDF +
377    /// metadata; doi → metadata-only via Crossref + Unpaywall, with an
378    /// informed-best-effort OA PDF leg). Errors here are scoped to this
379    /// one ref — the caller decides whether to abort the surrounding
380    /// session.
381    ///
382    /// Slice 2: delegates to
383    /// [`doiget_core::orchestrator::fetch_paper`] for the actual work
384    /// (which both CLI and MCP now share). This function keeps the
385    /// CLI-only stderr success-line print.
386    pub(crate) async fn fetch_one(&self, ref_: &Ref) -> Result<()> {
387        let ctx = self.fetch_context();
388        let outcome =
389            core_fetch_paper(ref_, &self.profile, &ctx, &self.store, self.store.root()).await?;
390        emit_success_line(ref_, &outcome);
391        Ok(())
392    }
393}
394
395/// CLI-only one-line success message on stderr (ADR-0001 stdio
396/// convention). Renders the [`FetchPaperOutcome`] in the same form the
397/// pre-Slice-2 CLI emitted: a full-PDF success names the PDF path; a
398/// metadata-only DOI fallback (size_bytes == 0) names the metadata TOML
399/// path the orchestrator wrote.
400fn emit_success_line(ref_: &Ref, outcome: &FetchPaperOutcome) {
401    let label = match ref_ {
402        Ref::Arxiv(id) => format!("arxiv:{}", id.as_str()),
403        Ref::Doi(doi) => format!("doi:{}", doi.as_str()),
404    };
405    if outcome.size_bytes == 0 {
406        print_success(format_args!(
407            "fetched {} (metadata-only) -> {}",
408            label, outcome.path
409        ));
410    } else {
411        print_success(format_args!(
412            "fetched {} ({} bytes) -> {}",
413            label, outcome.size_bytes, outcome.path
414        ));
415    }
416}
417
418/// Run the `doiget fetch <ref>` subcommand.
419///
420/// `dry_run` (ADR-0022 §1): when `true`, build a [`FetchPlan`] from the
421/// parsed [`Ref`] and the configured store root, serialize it as JSON to
422/// stdout, and return `Ok(())` immediately, **without** building a
423/// `FetchHarness` (no provenance log open), without contacting the
424/// network, without writing to the store, and without appending a
425/// provenance row.
426///
427/// When `dry_run` is `false`, the function runs the normal end-to-end
428/// orchestration path: open the provenance log, dispatch the per-kind
429/// orchestrator, emit a `SessionStart` / `SessionEnd` bookend pair.
430///
431/// On success returns `Ok(())` and writes a one-line success message to
432/// stderr (per ADR-0001 stdio convention — no stdout writes from `fetch`
433/// on the normal path). On failure, returns an `anyhow::Error` and emits
434/// a `SessionEnd` row with `result=err` to the provenance log before
435/// returning.
436///
437/// # History
438///
439/// Slice 5 (PR #84 advisory item A2/A3 refactor): the previous
440/// `FetchOptions { dry_run: bool }` single-field option bundle plus the
441/// thin `run(input)` backwards-compat wrapper were collapsed into this
442/// single `dry_run: bool` parameter — the option bundle's single-bool
443/// shape was YAGNI, and the wrapper only existed to spare integration
444/// tests a `FetchOptions::default()` literal.
445pub async fn run_with_options(input: String, dry_run: bool) -> Result<()> {
446    // Step 1: parse + safekey. Granular `RefParseError` collapses to anyhow
447    // via `?`; the higher-level CLI binary maps the error to its exit code.
448    let ref_ = Ref::parse(&input).with_context(|| format!("invalid ref: {input}"))?;
449
450    // Dry-run branch: build the plan and emit it. NO harness, NO network,
451    // NO store write, NO provenance row. Posture-lint ADR-0022 §5 will
452    // verify this branch never reaches `HttpClient::fetch_*`,
453    // `FsStore::write_*`, or `ProvenanceLog::append`.
454    if dry_run {
455        // Resolve store root for path projections. Failures here surface
456        // as a normal CLI error (not as a denial) — same behaviour the
457        // non-dry-run path would exhibit on a misconfigured environment.
458        let store_root = super::resolve_store_root()?;
459        let plan = build_fetch_plan(&ref_, &store_root);
460        emit_dry_run_plan_to_stdout(&ref_, &plan)?;
461        return Ok(());
462    }
463
464    // Step 2: build harness (foundation modules + provenance log).
465    let harness = FetchHarness::from_env()?;
466
467    // Step 3: emit SessionStart. Fail-closed if the log write fails — the
468    // surrounding fetch MUST NOT proceed (`docs/PROVENANCE_LOG.md` §5).
469    harness.log_session_start(Some(ref_.as_input_str()))?;
470
471    // Step 4: dispatch on ref kind.
472    let result = harness.fetch_one(&ref_).await;
473
474    // Step 5: emit SessionEnd regardless of outcome. Best-effort: if this
475    // append also fails, surface the underlying fetch error (or a fresh one
476    // if the fetch was Ok).
477    harness.log_session_end(result.is_ok(), Some(ref_.as_input_str()));
478
479    result
480}
481
482/// Single-line user-visible success message, written to stderr per ADR-0001
483/// (stdio convention — the CLI never writes a success line to stdout). This
484/// is the one place where `eprintln!` is intentional; the workspace
485/// `clippy::print_stderr` lint is `warn` so the localized `#[allow]` is the
486/// minimal intervention.
487#[allow(clippy::print_stderr)]
488fn print_success(args: std::fmt::Arguments<'_>) {
489    eprintln!("{args}");
490}
491
492// ---------------------------------------------------------------------------
493// Tests
494// ---------------------------------------------------------------------------
495
496#[cfg(test)]
497#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
498mod tests {
499    use super::*;
500
501    #[test]
502    fn new_session_id_is_26_chars() {
503        // ULID textual form is fixed-width 26 chars (Crockford base32).
504        // `docs/PROVENANCE_LOG.md` §3 requires this exact length.
505        let id = new_session_id();
506        assert_eq!(id.len(), 26, "session id must be 26 chars: {:?}", id);
507        // Crockford base32 uses uppercase letters and digits; specifically
508        // I, L, O, U are excluded. Every char must be ASCII alphanumeric.
509        assert!(
510            id.chars().all(|c| c.is_ascii_alphanumeric()),
511            "ulid must be ASCII alphanumeric: {:?}",
512            id
513        );
514    }
515
516    // Slice 2: the `extract_crossref_fields_*` unit tests moved to
517    // `doiget_core::orchestrator::tests` along with the function they
518    // covered. The CLI no longer owns those helpers; the marker test
519    // below keeps the CLI's `fetch::tests` non-empty after the helper
520    // migration so a future regression that nukes the delegation path
521    // surfaces as a build failure (the `FetchPaperOutcome` re-import
522    // would stop resolving).
523    #[test]
524    fn fetch_paper_outcome_is_reachable_from_cli() {
525        let _ = std::any::type_name::<doiget_core::orchestrator::FetchPaperOutcome>();
526    }
527}