doiget_cli/commands/fetch.rs
1//! `doiget fetch <ref>` subcommand.
2//!
3//! Phase 1 scope:
4//!
5//! - **arXiv refs** — full end-to-end: PDF bytes are fetched via the
6//! `doiget_core::sources::arxiv::ArxivSource`, the `[doiget]`
7//! extension table is populated with the resolved license, source,
8//! size, and `fetched_at`, and the result is written to the on-disk
9//! store with both the metadata TOML and the PDF.
10//! - **DOI refs** — Crossref metadata + Unpaywall license enrichment + an
11//! OA PDF fetch when Unpaywall's `best_oa_location.url_for_pdf` (or
12//! `best_oa_location.url`) resolves to a host on the synthetic
13//! `"oa-publisher"` allowlist (`docs/REDIRECT_ALLOWLIST.md` §3). The OA
14//! URL host check is informed-best-effort; if the host is not on the
15//! allowlist or the body fails the magic-byte check, the orchestrator
16//! logs a `Fetch err` row under `source = "oa-publisher"` and falls back
17//! to metadata-only success — the metadata is still useful.
18//!
19//! ## Provenance contract
20//!
21//! Per `docs/PROVENANCE_LOG.md` §3, every invocation emits at least one
22//! `SessionStart`, one or more `Fetch` rows (one per source consulted), one
23//! `StoreWrite` row on success, and one `SessionEnd`. Each `Fetch` row is
24//! appended by the underlying `Source` impl; the orchestrator owns the
25//! session-bookend rows and the `StoreWrite` row.
26//!
27//! ## Configuration surface
28//!
29//! Hard-coded paths with env-var overrides; full `config.toml` plumbing
30//! arrives in a follow-up. See `docs/CONFIG.md` for the eventual surface.
31//!
32//! | Env var | Default | Purpose |
33//! |---|---|---|
34//! | `DOIGET_STORE_ROOT` | `$HOME/papers` (or `%USERPROFILE%\papers` on Windows) | Filesystem store root |
35//! | `DOIGET_LOG_PATH` | `<config>/doiget/access.jsonl` | Provenance log file |
36//! | `DOIGET_CONTACT_EMAIL` | `doiget@localhost` | Polite-pool contact email (User-Agent and Crossref) |
37//! | `DOIGET_UNPAYWALL_EMAIL` | (= contact email) | Unpaywall query-string email |
38//! | `DOIGET_ARXIV_BASE` | `https://arxiv.org` | arXiv source base (test override) |
39//! | `DOIGET_CROSSREF_BASE` | `https://api.crossref.org` | Crossref source base (test override) |
40//! | `DOIGET_UNPAYWALL_BASE` | `https://api.unpaywall.org/v2` | Unpaywall source base (test override) |
41//! | `DOIGET_OA_PUBLISHER_BASE` | (production allowlist) | OA publisher host allowlist override (test override) |
42
43use std::sync::Arc;
44
45use anyhow::{anyhow, Context, Result};
46use camino::Utf8PathBuf;
47
48#[cfg(feature = "citation")]
49use doiget_core::http::tier_2_allowlist;
50use doiget_core::http::{oa_publisher_allowlist, tier_1_allowlist, HttpClient};
51use doiget_core::orchestrator::{fetch_paper as core_fetch_paper, FetchPaperOutcome, PdfLegStatus};
52use doiget_core::provenance::{Capability, LogEvent, LogResult, ProvenanceLog, RowInput};
53use doiget_core::rate_limiter::RateLimiter;
54use doiget_core::source::{FetchContext, FetchError};
55use doiget_core::store::FsStore;
56use doiget_core::{CapabilityProfile, DenialContext, DenialReason, ErrorCode, RateLimits, Ref};
57
58/// Defer to docs/PROVENANCE_LOG.md §3: 26-char ULID per process invocation.
59pub(crate) fn new_session_id() -> String {
60 ulid::Ulid::new().to_string()
61}
62
63// ---------------------------------------------------------------------------
64// Dry-run plan / preview (ADR-0022)
65// ---------------------------------------------------------------------------
66
67// The structured `FetchPlan` shape, the `build_fetch_plan` builder, and
68// the `build_dry_run_envelope` JSON-shape helper live in `doiget-core`
69// so the MCP server can produce a bit-identical envelope without
70// depending on `doiget-cli`. The CLI re-exports them here for callers
71// that already `use doiget_cli::commands::fetch`.
72pub use doiget_core::dry_run::{
73 build_dry_run_envelope, build_fetch_plan, FetchPlan, PdfSourcePlan, RateLimitBudget,
74};
75
76/// Serialize the dry-run envelope and write it to stdout. Used by the
77/// `--dry-run` flag on `doiget fetch` and `doiget batch`. The envelope
78/// shape matches ADR-0022 §1 / `docs/MCP_TOOLS.md` §10.
79///
80/// `pub` so `commands::batch` (multi-ref dry-run) can reuse it. The
81/// function lives in `doiget-cli` (not `doiget-core`) because `println!`
82/// is a CLI concern; the MCP server uses [`build_dry_run_envelope`]
83/// directly and routes the bytes via JSON-RPC.
84///
85/// `print_stdout` is workspace-deny for MCP stdio safety (ADR-0001 /
86/// `docs/SECURITY.md` §3); `--dry-run` is a CLI-only path that never
87/// runs under the MCP server, so the localized `#[allow]` is the
88/// minimal intervention — same pattern used by `commands::config`,
89/// `commands::info`, etc.
90#[allow(clippy::print_stdout)]
91pub fn emit_dry_run_plan_to_stdout(ref_: &Ref, plan: &FetchPlan) -> Result<()> {
92 let envelope = build_dry_run_envelope(ref_, plan);
93 let s = serde_json::to_string(&envelope).context("serializing dry-run envelope to JSON")?;
94 println!("{s}");
95 Ok(())
96}
97
98/// Resolve the provenance log path. `DOIGET_LOG_PATH` wins; otherwise
99/// fall back to `<config>/doiget/access.jsonl` per `docs/PROVENANCE_LOG.md`
100/// §1.
101pub(crate) fn resolve_log_path() -> Result<Utf8PathBuf> {
102 if let Some(s) = read_env_utf8("DOIGET_LOG_PATH")? {
103 return Ok(Utf8PathBuf::from(s));
104 }
105 let cfg = config_dir_utf8()?;
106 Ok(cfg.join("doiget").join("access.jsonl"))
107}
108
109/// Read an env var and assert it is valid UTF-8. Returns `Ok(None)` if
110/// unset; `Ok(Some(s))` if set and UTF-8; `Err(...)` if set but non-UTF-8.
111/// `std::env::var` already requires UTF-8 (returns `VarError::NotUnicode`
112/// otherwise); we wrap it to surface a friendlier error and avoid the
113/// banned `std::path::PathBuf` round-trip.
114fn read_env_utf8(key: &str) -> Result<Option<String>> {
115 match std::env::var(key) {
116 Ok(s) => Ok(Some(s)),
117 Err(std::env::VarError::NotPresent) => Ok(None),
118 Err(std::env::VarError::NotUnicode(_)) => Err(anyhow!("{key} is not valid UTF-8")),
119 }
120}
121
122/// Best-effort home-dir resolution without depending on the `dirs` crate
123/// (every new dep adds cargo-vet exemption churn). Honors `HOME` first
124/// (POSIX + most CI), then `USERPROFILE` (Windows).
125fn home_dir_utf8() -> Result<Utf8PathBuf> {
126 if let Some(s) = read_env_utf8("HOME")? {
127 return Ok(Utf8PathBuf::from(s));
128 }
129 if let Some(s) = read_env_utf8("USERPROFILE")? {
130 return Ok(Utf8PathBuf::from(s));
131 }
132 Err(anyhow!("neither HOME nor USERPROFILE is set"))
133}
134
135/// Best-effort config-dir resolution. Honors `XDG_CONFIG_HOME` first
136/// (POSIX), then `APPDATA` (Windows), then falls back to `$HOME/.config`.
137///
138/// Crate-visible so sibling modules (`commands::capabilities`,
139/// `commands::config`) can resolve the same `<config_dir>/doiget/`
140/// path the production HTTP-client builder reads from. Keep the
141/// signature stable: any divergence between this and the MCP-side
142/// copy (`crates/doiget-mcp/src/lib.rs::config_dir_utf8`) would
143/// silently desync the user-extension allowlist surfaces.
144pub(crate) fn config_dir_utf8() -> Result<Utf8PathBuf> {
145 if let Some(s) = read_env_utf8("XDG_CONFIG_HOME")? {
146 return Ok(Utf8PathBuf::from(s));
147 }
148 if let Some(s) = read_env_utf8("APPDATA")? {
149 return Ok(Utf8PathBuf::from(s));
150 }
151 let home = home_dir_utf8()?;
152 Ok(home.join(".config"))
153}
154
155/// Best-effort resolver-cache root (`docs/CACHE.md`). Honors
156/// `DOIGET_CACHE_ROOT` first, then `XDG_CACHE_HOME/doiget` (POSIX), then
157/// `LOCALAPPDATA\doiget\cache` (Windows), then `$HOME/.cache/doiget`.
158/// Crate-visible so the `verify` command can enable the resolve cache.
159pub(crate) fn cache_dir_utf8() -> Result<Utf8PathBuf> {
160 if let Some(s) = read_env_utf8("DOIGET_CACHE_ROOT")? {
161 return Ok(Utf8PathBuf::from(s));
162 }
163 if let Some(s) = read_env_utf8("XDG_CACHE_HOME")? {
164 return Ok(Utf8PathBuf::from(s).join("doiget"));
165 }
166 if let Some(s) = read_env_utf8("LOCALAPPDATA")? {
167 return Ok(Utf8PathBuf::from(s).join("doiget").join("cache"));
168 }
169 let home = home_dir_utf8()?;
170 Ok(home.join(".cache").join("doiget"))
171}
172
173/// Build a metadata-resolution [`FetchContext`]: HTTP client, rate
174/// limiter, and provenance log resolved from the environment, with the
175/// resolver cache (`docs/CACHE.md`) enabled best-effort.
176///
177/// This is the shared context for the read-only resolve commands
178/// (`verify`, `cite`) — neither persists to the store, so no store
179/// handle is constructed. Enabling `cache_root` means repeat resolves of
180/// the same ref are served from disk, avoiding upstream rate limits; if
181/// the cache dir can't be resolved the run simply proceeds without it.
182pub(crate) fn build_resolve_context() -> Result<FetchContext> {
183 let session_id = new_session_id();
184 let log_path = resolve_log_path()?;
185 let http = Arc::new(build_http_client()?);
186 let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
187 let log = Arc::new(
188 ProvenanceLog::open(log_path, session_id.clone())
189 .context("failed to open provenance log")?,
190 );
191 let cache_root = cache_dir_utf8().ok();
192 Ok(FetchContext {
193 http,
194 rate_limiter,
195 log,
196 session_id,
197 cache_root,
198 })
199}
200
201/// Construct the workspace-wide [`HttpClient`].
202///
203/// Production path: `HttpClient::new(tier_1_allowlist() ∪ oa_publisher_allowlist())` —
204/// strict HTTPS-only with the canonical Tier-1 redirect allowlist (Crossref,
205/// Unpaywall, arXiv) plus the synthetic `"oa-publisher"` allowlist used for
206/// the OA PDF leg of the DOI fetch path (`fetch_doi` issues
207/// `HttpClient::fetch_pdf("oa-publisher", url)` against the URL Unpaywall
208/// returned in `best_oa_location`). The OA-publisher list is
209/// informed-best-effort per `docs/REDIRECT_ALLOWLIST.md` §3.
210///
211/// Test path: when any of the three `DOIGET_*_BASE` env vars is set, build a
212/// multi-source relaxed-`https_only` client whose per-source allowlist is
213/// derived from the corresponding env-var hosts. The `oa-publisher` source
214/// key is registered against the same host (typically the wiremock origin)
215/// when `DOIGET_OA_PUBLISHER_BASE` is set — this lets the integration tests
216/// under `tests/fetch_doi_oa_pdf_e2e.rs` exercise the full PDF leg without
217/// touching the real network.
218pub(crate) fn build_http_client() -> Result<HttpClient> {
219 let arxiv = std::env::var("DOIGET_ARXIV_BASE").ok();
220 let crossref = std::env::var("DOIGET_CROSSREF_BASE").ok();
221 let unpaywall = std::env::var("DOIGET_UNPAYWALL_BASE").ok();
222 let oa_publisher = std::env::var("DOIGET_OA_PUBLISHER_BASE").ok();
223 // Slice 16: `DOIGET_OPENALEX_BASE` selects a wiremock host for the
224 // citation-graph BFS. Only meaningful with `--features citation`,
225 // but reading the env unconditionally keeps the branch logic
226 // simple and is harmless for default builds.
227 let openalex_base = std::env::var("DOIGET_OPENALEX_BASE").ok();
228
229 if arxiv.is_none()
230 && crossref.is_none()
231 && unpaywall.is_none()
232 && oa_publisher.is_none()
233 && openalex_base.is_none()
234 {
235 let mut allowlists = tier_1_allowlist();
236 allowlists.extend(oa_publisher_allowlist());
237 // Slice 16: when the `citation` feature is compiled in, the
238 // graph subcommand walks OpenAlex Work IDs via
239 // `ctx.http.fetch_bytes("openalex", ...)`. The Tier 2
240 // allowlist registers the `api.openalex.org` host under
241 // that source key. CapabilityProfile.metadata.openalex is
242 // the runtime gate; the allowlist is the transport gate.
243 #[cfg(feature = "citation")]
244 allowlists.extend(tier_2_allowlist());
245
246 // ADR-0028 D2: merge user-extension hosts from
247 // `<config_dir>/doiget/config.toml`. See
248 // `doiget_core::user_extension` for the wire contract and
249 // the (deferred) S3b provenance / doctor / capabilities
250 // surfaces.
251 //
252 // Failure handling is opt-in-convenience: a missing config
253 // is silent (Ok-empty), a malformed config emits
254 // `tracing::warn!` and continues with the curated allowlist,
255 // and an unresolvable config dir emits `tracing::debug!`
256 // (only happens in stripped envs with no HOME / XDG /
257 // APPDATA — review pass I3 / A1).
258 match config_dir_utf8() {
259 Ok(cfg_dir) => {
260 let path = cfg_dir.join("doiget").join("config.toml");
261 match doiget_core::user_extension::load(&path) {
262 Ok(user_hosts) if !user_hosts.is_empty() => {
263 tracing::info!(
264 count = user_hosts.len(),
265 path = %path,
266 "merging user-extension allowlist hosts (ADR-0028 D2)"
267 );
268 doiget_core::user_extension::merge_into_allowlists(
269 &mut allowlists,
270 &user_hosts,
271 );
272 }
273 Ok(_) => {}
274 Err(e) => {
275 tracing::warn!(
276 error = %e,
277 path = %path,
278 "failed to load user-extension allowlist; \
279 falling back to curated set only"
280 );
281 }
282 }
283 }
284 Err(e) => {
285 tracing::debug!(
286 error = %e,
287 "config dir unresolvable; \
288 user-extension allowlist disabled (curated set only)"
289 );
290 }
291 }
292
293 return HttpClient::new(allowlists).context("building HTTP client");
294 }
295
296 // Test-base mode: build a relaxed client per overridden source.
297 let mut owned: Vec<(String, String)> = Vec::new();
298 for (source, base) in [
299 ("arxiv", arxiv.as_deref()),
300 ("crossref", crossref.as_deref()),
301 ("unpaywall", unpaywall.as_deref()),
302 ("oa-publisher", oa_publisher.as_deref()),
303 ("openalex", openalex_base.as_deref()),
304 ] {
305 if let Some(b) = base {
306 let url = url::Url::parse(b)
307 .with_context(|| format!("DOIGET_*_BASE for {source} is not a URL: {b}"))?;
308 let host = url
309 .host_str()
310 .ok_or_else(|| anyhow!("base URL has no host: {b}"))?;
311 owned.push((source.to_string(), host.to_string()));
312 }
313 }
314 let entries: Vec<(&str, &str)> = owned
315 .iter()
316 .map(|(s, h)| (s.as_str(), h.as_str()))
317 .collect();
318 Ok(HttpClient::new_for_tests_allow_http_multi(&entries))
319}
320
321// Slice 2: the per-source env-aware constructors that used to live here
322// (`build_arxiv_source`, `build_crossref_source`, `build_unpaywall_source`)
323// moved into `doiget-core::orchestrator` so the core `fetch_paper`
324// orchestrator and the MCP server both honor the same `DOIGET_*_BASE`
325// test-override surface. The CLI no longer constructs sources directly —
326// it builds the `FetchContext` + `FsStore` and hands them to the core
327// orchestrator.
328
329/// Resolved configuration derived from the environment.
330///
331/// Slice 2: `contact_email` / `unpaywall_email` are now read by the
332/// `doiget-core::orchestrator::fetch_paper` orchestrator directly from
333/// the env (`contact_email_from_env` / `unpaywall_email_from_env` in
334/// that module), so the CLI no longer threads them through. The fields
335/// stay here so a future slice that adds CLI-flag overrides has a
336/// natural attachment point — the `#[allow(dead_code)]` is the minimal
337/// intervention until that slice lands.
338#[allow(dead_code)]
339pub(crate) struct OrchestratorConfig {
340 pub(crate) store_root: Utf8PathBuf,
341 pub(crate) log_path: Utf8PathBuf,
342 pub(crate) contact_email: String,
343 pub(crate) unpaywall_email: String,
344}
345
346impl OrchestratorConfig {
347 fn from_env() -> Result<Self> {
348 let store_root = super::resolve_store_root()?;
349 let log_path = resolve_log_path()?;
350 let contact_email =
351 std::env::var("DOIGET_CONTACT_EMAIL").unwrap_or_else(|_| "doiget@localhost".into());
352 let unpaywall_email =
353 std::env::var("DOIGET_UNPAYWALL_EMAIL").unwrap_or_else(|_| contact_email.clone());
354 Ok(Self {
355 store_root,
356 log_path,
357 contact_email,
358 unpaywall_email,
359 })
360 }
361}
362
363/// Reusable fetch harness shared by `doiget fetch <ref>` (single ref) and
364/// `doiget batch <path>` (many refs). Owns the shared foundation modules
365/// (`HttpClient` / `RateLimiter` / `ProvenanceLog`), the on-disk store, and
366/// the resolved capability profile, plus the session bookkeeping required by
367/// `docs/PROVENANCE_LOG.md` §3 (the 26-char ULID `session_id`).
368///
369/// Construction is performed once via [`FetchHarness::from_env`]. Per-ref
370/// orchestration runs through [`FetchHarness::fetch_one`]; bookend rows go
371/// via [`FetchHarness::log_session_start`] / [`FetchHarness::log_session_end`]
372/// so the orchestrator can frame either one fetch or many.
373pub(crate) struct FetchHarness {
374 pub(crate) http: Arc<HttpClient>,
375 pub(crate) rate_limiter: Arc<RateLimiter>,
376 pub(crate) log: Arc<ProvenanceLog>,
377 pub(crate) store: FsStore,
378 pub(crate) profile: CapabilityProfile,
379 pub(crate) session_id: String,
380 /// Resolved config; Slice 2 keeps this on the harness for the
381 /// CLI-only env diagnostics path (`commands::config::doctor`), even
382 /// though `fetch_one` no longer needs it (the core orchestrator
383 /// re-reads contact email from env directly).
384 #[allow(dead_code)]
385 pub(crate) cfg: OrchestratorConfig,
386}
387
388impl FetchHarness {
389 /// Build a harness from the same env-var surface documented at the top
390 /// of this module. Creates the log parent directory if missing, opens
391 /// the provenance log (allocating a fresh `session_id`), and constructs
392 /// the HTTP client honoring `DOIGET_*_BASE` overrides for tests.
393 pub(crate) fn from_env() -> Result<Self> {
394 let cfg = OrchestratorConfig::from_env()?;
395 if let Some(parent) = cfg.log_path.parent() {
396 if !parent.as_str().is_empty() {
397 std::fs::create_dir_all(parent.as_std_path())
398 .with_context(|| format!("creating log dir {parent}"))?;
399 }
400 }
401 let session_id = new_session_id();
402 let log = Arc::new(
403 ProvenanceLog::open(cfg.log_path.clone(), session_id.clone())
404 .context("opening provenance log")?,
405 );
406 let http = Arc::new(build_http_client()?);
407 let rate_limiter = Arc::new(RateLimiter::new(RateLimits::HARD_CODED));
408 let store = FsStore::new(cfg.store_root.clone()).context("opening store")?;
409 let profile = CapabilityProfile::from_env().context("resolving capability profile")?;
410
411 Ok(Self {
412 http,
413 rate_limiter,
414 log,
415 store,
416 profile,
417 session_id,
418 cfg,
419 })
420 }
421
422 /// Build a [`FetchContext`] view over this harness's foundation modules.
423 /// Creating one is cheap (cloning three `Arc`s + a `String`); per-ref
424 /// orchestration constructs one on demand.
425 pub(crate) fn fetch_context(&self) -> FetchContext {
426 FetchContext {
427 http: self.http.clone(),
428 rate_limiter: self.rate_limiter.clone(),
429 log: self.log.clone(),
430 session_id: self.session_id.clone(),
431 cache_root: None,
432 }
433 }
434
435 /// Append a `SessionStart` row. `ref_input` is the raw user-supplied ref
436 /// string (single-fetch path); pass `None` for batch sessions where no
437 /// single ref attributes the session.
438 pub(crate) fn log_session_start(&self, ref_input: Option<&str>) -> Result<()> {
439 self.log
440 .append(RowInput {
441 event: LogEvent::SessionStart,
442 result: LogResult::Ok,
443 capability: Capability::Oa,
444 ref_: ref_input,
445 source: None,
446 error_code: None,
447 size_bytes: None,
448 license: None,
449 store_path: None,
450 // Session bookend — no audit identity (ADR-0021 §1).
451 canonical_digest: None,
452 })
453 .context("appending SessionStart row")?;
454 Ok(())
455 }
456
457 /// Append a `SessionEnd` row. `ref_input` mirrors the `log_session_start`
458 /// argument; pass `None` for batch sessions. The result is best-effort —
459 /// if this append fails, the caller already has the underlying fetch
460 /// error (if any) and we don't override it.
461 pub(crate) fn log_session_end(&self, ok: bool, ref_input: Option<&str>) {
462 let result = if ok { LogResult::Ok } else { LogResult::Err };
463 let _ = self.log.append(RowInput {
464 event: LogEvent::SessionEnd,
465 result,
466 capability: Capability::Oa,
467 ref_: ref_input,
468 source: None,
469 error_code: None,
470 size_bytes: None,
471 license: None,
472 store_path: None,
473 // Session bookend — no audit identity (ADR-0021 §1).
474 canonical_digest: None,
475 });
476 }
477
478 /// Run a single ref through the per-kind orchestration (arxiv → PDF +
479 /// metadata; doi → metadata-only via Crossref + Unpaywall, with an
480 /// informed-best-effort OA PDF leg). Errors here are scoped to this
481 /// one ref — the caller decides whether to abort the surrounding
482 /// session.
483 ///
484 /// Slice 2: delegates to
485 /// [`doiget_core::orchestrator::fetch_paper`] for the actual work
486 /// (which both CLI and MCP now share). This function keeps the
487 /// CLI-only stderr success-line print.
488 pub(crate) async fn fetch_one(&self, ref_: &Ref) -> Result<FetchPaperOutcome, FetchError> {
489 // Pure data path: return the typed outcome (or typed error)
490 // without any CLI-only rendering or exit-code synthesis. The
491 // single-fetch caller (`run_with_options`) and the batch
492 // caller (`commands::batch::classify_joined`) each render the
493 // human / JSON surface and map to `CliExit` themselves — see
494 // #210 for the rationale (batch's `--json` JSONL needs the
495 // structured `FetchPaperOutcome` to emit `result.{safekey,
496 // store_path, canonical_digest}` on success and
497 // `denial_context` on a `PdfLegStatus::Blocked` outcome, which
498 // was unreachable through the previous `Result<()>`
499 // signature).
500 let ctx = self.fetch_context();
501 core_fetch_paper(ref_, &self.profile, &ctx, &self.store, self.store.root()).await
502 }
503}
504
505/// `true` iff the outcome represents a clean fetch: `Fetched` (full
506/// PDF) or `NoOaUrl` (metadata-only by design). A `Blocked` PDF leg
507/// is a failure for SessionEnd / exit-code purposes — an OA PDF was
508/// discovered but could not be retrieved — even though the metadata
509/// TOML did land on disk. Pulled out so both `run_with_options` and
510/// `commands::batch` agree on the failure boundary.
511pub(crate) fn outcome_is_clean_success(outcome: &FetchPaperOutcome) -> bool {
512 !matches!(outcome.pdf_leg, PdfLegStatus::Blocked { .. })
513}
514
515/// CLI-only one-line success message on stderr (ADR-0001 stdio
516/// convention). Renders the [`FetchPaperOutcome`] in the same form the
517/// pre-Slice-2 CLI emitted: a full-PDF success names the PDF path; a
518/// metadata-only DOI fallback (size_bytes == 0) names the metadata TOML
519/// path the orchestrator wrote.
520fn emit_success_line(ref_: &Ref, outcome: &FetchPaperOutcome) {
521 let label = match ref_ {
522 Ref::Arxiv(id) => format!("arxiv:{}", id.as_str()),
523 Ref::Doi(doi) => format!("doi:{}", doi.as_str()),
524 };
525 match &outcome.pdf_leg {
526 PdfLegStatus::Fetched => {
527 print_success(format_args!(
528 "fetched {} ({} bytes) -> {}",
529 label, outcome.size_bytes, outcome.path
530 ));
531 }
532 PdfLegStatus::NoOaUrl => {
533 print_success(format_args!(
534 "fetched {} (metadata-only: no OA PDF available) -> {}",
535 label, outcome.path
536 ));
537 }
538 // Issue #145: `Blocked` is NO LONGER a success outcome. It is
539 // intercepted in `fetch_one` BEFORE `emit_success_line` is
540 // called and rendered via `render_blocked_error` with a
541 // non-zero exit (`docs/ERRORS.md` §3/§6 — no silent failures).
542 // Reaching this arm would mean the interception regressed, so we
543 // fail closed: surface the `error[CODE]:` line here too rather
544 // than printing a misleading success line.
545 PdfLegStatus::Blocked {
546 code,
547 message,
548 denial,
549 suggested_arxiv_id,
550 } => {
551 // Same #145 reclassification as the primary interception in
552 // `fetch_one`, so this fail-closed fallback stays consistent.
553 let effective = effective_blocked_code(*code, denial.as_ref());
554 render_blocked_error(
555 ref_,
556 outcome,
557 effective,
558 message,
559 denial.as_ref(),
560 suggested_arxiv_id.as_deref(),
561 );
562 }
563 // `PdfLegStatus` is `#[non_exhaustive]`; a future variant
564 // degrades to the size-based wording rather than failing the
565 // downstream-crate build.
566 _ => {
567 if outcome.size_bytes == 0 {
568 print_success(format_args!(
569 "fetched {} (metadata-only) -> {}",
570 label, outcome.path
571 ));
572 } else {
573 print_success(format_args!(
574 "fetched {} ({} bytes) -> {}",
575 label, outcome.size_bytes, outcome.path
576 ));
577 }
578 }
579 }
580}
581
582/// Run the `doiget fetch <ref>` subcommand.
583///
584/// `dry_run` (ADR-0022 §1): when `true`, build a [`FetchPlan`] from the
585/// parsed [`Ref`] and the configured store root, serialize it as JSON to
586/// stdout, and return `Ok(())` immediately, **without** building a
587/// `FetchHarness` (no provenance log open), without contacting the
588/// network, without writing to the store, and without appending a
589/// provenance row.
590///
591/// When `dry_run` is `false`, the function runs the normal end-to-end
592/// orchestration path: open the provenance log, dispatch the per-kind
593/// orchestrator, emit a `SessionStart` / `SessionEnd` bookend pair.
594///
595/// On success returns `Ok(())` and writes a one-line success message to
596/// stderr (per ADR-0001 stdio convention — no stdout writes from `fetch`
597/// on the normal path). On failure, returns an `anyhow::Error` and emits
598/// a `SessionEnd` row with `result=err` to the provenance log before
599/// returning.
600///
601/// # History
602///
603/// Slice 5 (PR #84 advisory item A2/A3 refactor): the previous
604/// `FetchOptions { dry_run: bool }` single-field option bundle plus the
605/// thin `run(input)` backwards-compat wrapper were collapsed into this
606/// single `dry_run: bool` parameter — the option bundle's single-bool
607/// shape was YAGNI, and the wrapper only existed to spare integration
608/// tests a `FetchOptions::default()` literal.
609pub async fn run_with_options(
610 input: String,
611 dry_run: bool,
612 _mode: super::output::OutputMode,
613) -> Result<()> {
614 // `_mode` is threaded per ADR-0017 / #144. Quiet-suppression of the
615 // success line is tracked in #203. The dry-run plan envelope is
616 // product output (the requested artifact) and is unaffected by
617 // mode.
618 // Step 1: parse + safekey. Issue #119: render the cargo-style
619 // `error[INVALID_REF]:` line + carry the exit code, rather than
620 // letting the granular `RefParseError` fall out as an opaque
621 // anyhow `{:?}` dump.
622 let ref_ = match Ref::parse(&input) {
623 Ok(r) => r,
624 Err(e) => {
625 print_err(format_args!(
626 "error[{}]: invalid ref: {e}",
627 ErrorCode::InvalidRef.as_wire()
628 ));
629 return Err(anyhow::Error::new(CliExit(cli_exit_code(
630 ErrorCode::InvalidRef,
631 ))));
632 }
633 };
634
635 // Dry-run branch: build the plan and emit it. NO harness, NO network,
636 // NO store write, NO provenance row. Posture-lint ADR-0022 §5 will
637 // verify this branch never reaches `HttpClient::fetch_*`,
638 // `FsStore::write_*`, or `ProvenanceLog::append`.
639 if dry_run {
640 // Resolve store root for path projections. Failures here surface
641 // as a normal CLI error (not as a denial) — same behaviour the
642 // non-dry-run path would exhibit on a misconfigured environment.
643 let store_root = super::resolve_store_root()?;
644 let plan = build_fetch_plan(&ref_, &store_root);
645 emit_dry_run_plan_to_stdout(&ref_, &plan)?;
646 return Ok(());
647 }
648
649 // Step 2: build harness (foundation modules + provenance log).
650 let harness = FetchHarness::from_env()?;
651
652 // Step 3: emit SessionStart. Fail-closed if the log write fails — the
653 // surrounding fetch MUST NOT proceed (`docs/PROVENANCE_LOG.md` §5).
654 harness.log_session_start(Some(ref_.as_input_str()))?;
655
656 // Step 4: dispatch on ref kind. `fetch_one` now returns the
657 // typed `FetchPaperOutcome` / `FetchError` per #210; the
658 // single-fetch caller (this fn) owns rendering + exit code.
659 let result = harness.fetch_one(&ref_).await;
660
661 // Step 5: emit SessionEnd regardless of outcome. A `Blocked` PDF
662 // leg is NOT a clean success even though the typed `Result` is
663 // `Ok` — `outcome_is_clean_success` collapses both halves so the
664 // SessionEnd `is_ok` field matches the user-facing exit code.
665 let session_ok = match &result {
666 Ok(o) => outcome_is_clean_success(o),
667 Err(_) => false,
668 };
669 harness.log_session_end(session_ok, Some(ref_.as_input_str()));
670
671 // Step 6: render the user-facing surface and map to `CliExit`.
672 // The Blocked-PDF reclassification logic that used to live inside
673 // `fetch_one` was lifted here verbatim so the batch caller can
674 // share the same `effective_blocked_code` / `render_blocked_error`
675 // helpers (issue #210 / #145).
676 match result {
677 Ok(outcome) => {
678 if let PdfLegStatus::Blocked {
679 code,
680 message,
681 denial,
682 suggested_arxiv_id,
683 } = &outcome.pdf_leg
684 {
685 let effective = effective_blocked_code(*code, denial.as_ref());
686 render_blocked_error(
687 &ref_,
688 &outcome,
689 effective,
690 message,
691 denial.as_ref(),
692 suggested_arxiv_id.as_deref(),
693 );
694 return Err(anyhow::Error::new(CliExit(cli_exit_code(effective))));
695 }
696 emit_success_line(&ref_, &outcome);
697 Ok(())
698 }
699 Err(e) => {
700 render_fetch_error(&e);
701 let code: ErrorCode = (&e).into();
702 Err(anyhow::Error::new(CliExit(cli_exit_code(code))))
703 }
704 }
705}
706
707/// Single-line user-visible success message, written to stderr per ADR-0001
708/// (stdio convention — the CLI never writes a success line to stdout). This
709/// is the one place where `eprintln!` is intentional; the workspace
710/// `clippy::print_stderr` lint is `warn` so the localized `#[allow]` is the
711/// minimal intervention.
712#[allow(clippy::print_stderr)]
713fn print_success(args: std::fmt::Arguments<'_>) {
714 eprintln!("{args}");
715}
716
717/// Stderr sink for the `docs/ERRORS.md` §3 human-error lines. Mirrors
718/// [`print_success`]; the localized `#[allow]` is the minimal
719/// intervention for the workspace `clippy::print_stderr` lint.
720#[allow(clippy::print_stderr)]
721fn print_err(args: std::fmt::Arguments<'_>) {
722 eprintln!("{args}");
723}
724
725/// Carries a `docs/ERRORS.md` §4 process exit code out of a CLI
726/// command to `main`, which owns the actual `std::process::exit`
727/// (calling it inside `run_with_options` would kill in-process
728/// integration tests). The human-readable `error[CODE]: …` line has
729/// ALREADY been written to stderr by `render_fetch_error` before
730/// this is constructed, so `main` must NOT print it again. Issue #119.
731#[derive(Debug)]
732pub struct CliExit(pub i32);
733
734impl std::fmt::Display for CliExit {
735 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
736 write!(f, "exiting with status {}", self.0)
737 }
738}
739
740impl std::error::Error for CliExit {}
741
742/// Reclassify a `PdfLegStatus::Blocked` code at the CLI layer (issue
743/// #145 / `docs/ERRORS.md` §2 "NETWORK_ERROR" vs §3.1 / §6).
744///
745/// The core maps *every* `FetchError::Http(_)` to
746/// [`ErrorCode::NetworkError`] (`doiget_core::source`'s
747/// `From<&FetchError> for ErrorCode`). `docs/ERRORS.md` §2 defines
748/// `NETWORK_ERROR` as a transport / DNS / TLS fault where "retry usually
749/// fine" — true for a real network blip, but **false** for a deliberate
750/// supply-chain policy block (off-allowlist redirect, insecure-scheme
751/// redirect, host-blocklist hit): retrying such a block never helps, so
752/// surfacing it as `NETWORK_ERROR` (generic exit 1) misrepresents a flaky
753/// network to humans and agents.
754///
755/// The orchestrator already preserves the true reason on the
756/// [`DenialContext`] side-channel (the `From<&HttpError> for
757/// Option<DenialContext>` impl walks reqwest's `source()` chain, so even
758/// a redirect denial wrapped as `HttpError::Network` still yields
759/// [`DenialReason::RedirectNotInAllowlist`]). When that reason is one of
760/// the closed-set *policy* denials, promote the surface code to
761/// [`ErrorCode::CapabilityDenied`] so the CLI renders
762/// `error[CAPABILITY_DENIED]:` and [`cli_exit_code`] returns exit 3 —
763/// the same code `fetch` / `graph` already use for capability denials.
764/// Non-policy blocks (no `denial`, or a non-policy reason such as
765/// `SizeCapExceeded` / `ContentTypeMismatch`) keep the core's code so a
766/// genuine transport failure still reads as `NETWORK_ERROR`.
767pub(crate) fn effective_blocked_code(code: ErrorCode, denial: Option<&DenialContext>) -> ErrorCode {
768 match denial.map(|d| d.reason) {
769 Some(
770 DenialReason::RedirectNotInAllowlist
771 | DenialReason::InsecureScheme
772 | DenialReason::HostInBlockList,
773 ) => ErrorCode::CapabilityDenied,
774 _ => code,
775 }
776}
777
778/// Snake-case wire token for a [`DenialReason`], matching the
779/// `#[serde(rename_all = "snake_case")]` JSON/MCP surface (ADR-0023 §2)
780/// so the CLI human line uses the SAME vocabulary as the machine
781/// envelope (`docs/ERRORS.md` §3.1). Only the policy-denial reasons the
782/// CLI inlines are enumerated; everything else degrades to a generic
783/// token rather than drifting from the serde form.
784fn denial_reason_wire(reason: DenialReason) -> &'static str {
785 match reason {
786 DenialReason::RedirectNotInAllowlist => "redirect_not_in_allowlist",
787 DenialReason::InsecureScheme => "insecure_scheme",
788 DenialReason::HostInBlockList => "host_in_block_list",
789 _ => "policy_denied",
790 }
791}
792
793/// `docs/ERRORS.md` §4 closed-code → process exit code. Anything not
794/// individually listed falls under "at least one fetch failed" (1).
795///
796/// `pub(crate)` so sibling subcommands (`commands::graph`, …) route
797/// their typed denials through the SAME centralized mapping instead of
798/// open-coding magic exit numbers — keeps the `ErrorCode`→exit contract
799/// single-sourced (issue #149).
800pub(crate) fn cli_exit_code(code: ErrorCode) -> i32 {
801 match code {
802 ErrorCode::CapabilityDenied => 3,
803 ErrorCode::StoreError | ErrorCode::LogError => 4,
804 ErrorCode::FetchTimeout => 124,
805 _ => 1,
806 }
807}
808
809/// Render a terminal [`FetchError`] in the `docs/ERRORS.md` §3
810/// "Researcher (CLI human)" form: `error[CODE]: message` on stderr,
811/// plus an actionable `= note:` line carrying the ADR-0023
812/// `denial_context` (attempted / expected hosts) when the failure was
813/// a denial class. stdout stays clean (ADR-0001).
814fn render_fetch_error(e: &FetchError) {
815 let code: ErrorCode = e.into();
816 print_err(format_args!("error[{}]: {}", code.as_wire(), e));
817 if let Some(dc) = Option::<DenialContext>::from(e) {
818 let attempted = dc.attempted.as_deref().unwrap_or("(unknown)");
819 match &dc.expected {
820 Some(exp) if !exp.is_empty() => {
821 print_err(format_args!(
822 " = note: attempted {attempted}; allowed: {}",
823 exp.join(", ")
824 ));
825 }
826 _ => {
827 print_err(format_args!(" = note: attempted {attempted}"));
828 }
829 }
830 }
831}
832
833/// Render a `PdfLegStatus::Blocked` outcome in the `docs/ERRORS.md` §3
834/// "Researcher (CLI human)" form. Issue #145: an OA PDF was discovered
835/// but could not be retrieved — the metadata WAS written, but this is a
836/// denial, not a clean success. We emit the same `error[CODE]:` stderr
837/// shape as [`render_fetch_error`] (so pipelines and humans see an
838/// unambiguous failure), name the metadata path that DID land so the
839/// partial result is still discoverable, and surface the ADR-0023
840/// `denial_context` note when present. stdout stays clean (ADR-0001).
841fn render_blocked_error(
842 ref_: &Ref,
843 outcome: &FetchPaperOutcome,
844 code: ErrorCode,
845 message: &str,
846 denial: Option<&DenialContext>,
847 suggested_arxiv_id: Option<&str>,
848) {
849 let label = match ref_ {
850 Ref::Arxiv(id) => format!("arxiv:{}", id.as_str()),
851 Ref::Doi(doi) => format!("doi:{}", doi.as_str()),
852 };
853 // Issue #145: when the block is a deliberate policy denial, name the
854 // closed-set reason inline so a human/agent reading the
855 // `error[CAPABILITY_DENIED]:` line immediately sees this is a
856 // supply-chain policy block (retrying is futile), not a flaky network.
857 match denial.map(|d| d.reason) {
858 Some(
859 reason @ (DenialReason::RedirectNotInAllowlist
860 | DenialReason::InsecureScheme
861 | DenialReason::HostInBlockList),
862 ) => {
863 print_err(format_args!(
864 "error[{}]: {label}: an OA PDF was found but its host is blocked by \
865 supply-chain policy ({}): {message}",
866 code.as_wire(),
867 denial_reason_wire(reason)
868 ));
869 }
870 _ => {
871 print_err(format_args!(
872 "error[{}]: {label}: an OA PDF was found but could not be retrieved: {message}",
873 code.as_wire()
874 ));
875 }
876 }
877 if let Some(dc) = denial {
878 let attempted = dc.attempted.as_deref().unwrap_or("(unknown)");
879 match &dc.expected {
880 Some(exp) if !exp.is_empty() => {
881 print_err(format_args!(
882 " = note: attempted {attempted}; allowed: {}",
883 exp.join(", ")
884 ));
885 }
886 _ => {
887 print_err(format_args!(" = note: attempted {attempted}"));
888 }
889 }
890 }
891 // The metadata TOML still landed; point the user at it so the
892 // partial result is not lost (it is still useful), without
893 // pretending the fetch succeeded.
894 print_err(format_args!(
895 " = note: metadata-only record written to {}",
896 outcome.path
897 ));
898 if let Some(arxiv_id) = suggested_arxiv_id {
899 print_err(format_args!(
900 " = suggest: Try fetching the arXiv version: doiget fetch arxiv:{}",
901 arxiv_id
902 ));
903 }
904}
905
906// ---------------------------------------------------------------------------
907// Tests
908// ---------------------------------------------------------------------------
909
910#[cfg(test)]
911#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
912mod tests {
913 use super::*;
914 use serial_test::serial;
915
916 #[test]
917 fn new_session_id_is_26_chars() {
918 // ULID textual form is fixed-width 26 chars (Crockford base32).
919 // `docs/PROVENANCE_LOG.md` §3 requires this exact length.
920 let id = new_session_id();
921 assert_eq!(id.len(), 26, "session id must be 26 chars: {:?}", id);
922 // Crockford base32 uses uppercase letters and digits; specifically
923 // I, L, O, U are excluded. Every char must be ASCII alphanumeric.
924 assert!(
925 id.chars().all(|c| c.is_ascii_alphanumeric()),
926 "ulid must be ASCII alphanumeric: {:?}",
927 id
928 );
929 }
930
931 /// Review pass C2: end-to-end coverage of the user-extension
932 /// merge inside `build_http_client`. Without this test the
933 /// production path that turns a `config.toml`
934 /// `[[network.additional_hosts]]` entry into a passing
935 /// allowlist match is unexercised — every existing e2e sets
936 /// `DOIGET_*_BASE` and short-circuits into the test-mode
937 /// builder above.
938 #[test]
939 #[serial]
940 fn build_http_client_merges_user_extension_into_oa_publisher_allowlist() {
941 use std::io::Write;
942
943 // Construct a tempdir + minimal config.toml under it.
944 let td = tempfile::TempDir::new().expect("tempdir");
945 let cfg_dir = td.path().join("doiget");
946 std::fs::create_dir_all(&cfg_dir).expect("mkdir doiget/");
947 let cfg_path = cfg_dir.join("config.toml");
948 let mut f = std::fs::File::create(&cfg_path).expect("create config.toml");
949 f.write_all(
950 br#"
951[[network.additional_hosts]]
952host = "ruj.uj.edu.pl"
953note = "Jagiellonian"
954
955[[network.additional_hosts]]
956host = "*.uj.edu.pl"
957"#,
958 )
959 .expect("write config.toml");
960 drop(f);
961
962 // Save + override env so `config_dir_utf8()` lands on the
963 // tempdir. Restored on Drop by EnvGuard. We also clear the
964 // five `DOIGET_*_BASE` env vars to force the production
965 // branch of `build_http_client`.
966 struct EnvGuard {
967 key: &'static str,
968 prev: Option<String>,
969 }
970 impl EnvGuard {
971 fn save(key: &'static str) -> Self {
972 Self {
973 key,
974 prev: std::env::var(key).ok(),
975 }
976 }
977 }
978 impl Drop for EnvGuard {
979 fn drop(&mut self) {
980 match &self.prev {
981 Some(v) => std::env::set_var(self.key, v),
982 None => std::env::remove_var(self.key),
983 }
984 }
985 }
986 let _g0 = EnvGuard::save("XDG_CONFIG_HOME");
987 let _g1 = EnvGuard::save("APPDATA");
988 let _g2 = EnvGuard::save("HOME");
989 let _g3 = EnvGuard::save("USERPROFILE");
990 let _g4 = EnvGuard::save("DOIGET_ARXIV_BASE");
991 let _g5 = EnvGuard::save("DOIGET_CROSSREF_BASE");
992 let _g6 = EnvGuard::save("DOIGET_UNPAYWALL_BASE");
993 let _g7 = EnvGuard::save("DOIGET_OA_PUBLISHER_BASE");
994 let _g8 = EnvGuard::save("DOIGET_OPENALEX_BASE");
995 std::env::set_var("XDG_CONFIG_HOME", td.path());
996 std::env::set_var("APPDATA", td.path());
997 std::env::set_var("HOME", td.path());
998 std::env::set_var("USERPROFILE", td.path());
999 std::env::remove_var("DOIGET_ARXIV_BASE");
1000 std::env::remove_var("DOIGET_CROSSREF_BASE");
1001 std::env::remove_var("DOIGET_UNPAYWALL_BASE");
1002 std::env::remove_var("DOIGET_OA_PUBLISHER_BASE");
1003 std::env::remove_var("DOIGET_OPENALEX_BASE");
1004
1005 let client = build_http_client().expect("HttpClient builds");
1006 let oa = client
1007 .source_allowlist("oa-publisher")
1008 .expect("oa-publisher source registered");
1009
1010 // Pre-existing curated allowlist still effective.
1011 assert!(
1012 oa.redirect_hosts.iter().any(|p| p == "*.aps.org"),
1013 "curated *.aps.org MUST still be present after merge; got {:?}",
1014 oa.redirect_hosts
1015 );
1016 // User-added literal host passes match.
1017 assert!(
1018 oa.matches("ruj.uj.edu.pl"),
1019 "literal `ruj.uj.edu.pl` from user config MUST match"
1020 );
1021 // User-added wildcard passes match for a subdomain.
1022 assert!(
1023 oa.matches("alpha.uj.edu.pl"),
1024 "wildcard `*.uj.edu.pl` from user config MUST match alpha.uj.edu.pl"
1025 );
1026 // Unrelated host MUST still fail.
1027 assert!(
1028 !oa.matches("ruj.uj.edu.ru"),
1029 "host outside the suffix MUST NOT match"
1030 );
1031 }
1032
1033 // Slice 2: the `extract_crossref_fields_*` unit tests moved to
1034 // `doiget_core::orchestrator::tests` along with the function they
1035 // covered. The CLI no longer owns those helpers; the marker test
1036 // below keeps the CLI's `fetch::tests` non-empty after the helper
1037 // migration so a future regression that nukes the delegation path
1038 // surfaces as a build failure (the `FetchPaperOutcome` re-import
1039 // would stop resolving).
1040 #[test]
1041 fn fetch_paper_outcome_is_reachable_from_cli() {
1042 let _ = std::any::type_name::<doiget_core::orchestrator::FetchPaperOutcome>();
1043 }
1044
1045 /// Minimal `DenialContext` carrying only `reason`; every other field
1046 /// is optional (ADR-0023 §3) so `None`/empty is a valid producer
1047 /// shape for the reclassification decision under test.
1048 fn denial(reason: DenialReason) -> DenialContext {
1049 DenialContext {
1050 reason,
1051 source: None,
1052 attempted: None,
1053 expected: None,
1054 hop_index: None,
1055 cap: None,
1056 actual: None,
1057 }
1058 }
1059
1060 /// Issue #145 / `docs/ERRORS.md` §6.1: a policy-class denial reason
1061 /// on a `Blocked` OA-PDF leg must be reclassified from the core's
1062 /// blanket `NetworkError` to `CapabilityDenied` at the CLI layer, so
1063 /// the user-facing exit becomes 3 (not the generic 1) and a flaky
1064 /// network is not implied for a deliberate supply-chain block.
1065 #[test]
1066 fn policy_denials_reclassify_network_error_to_capability_denied() {
1067 for r in [
1068 DenialReason::RedirectNotInAllowlist,
1069 DenialReason::InsecureScheme,
1070 DenialReason::HostInBlockList,
1071 ] {
1072 let d = denial(r);
1073 assert_eq!(
1074 effective_blocked_code(ErrorCode::NetworkError, Some(&d)),
1075 ErrorCode::CapabilityDenied,
1076 "policy reason {r:?} must promote NetworkError -> CapabilityDenied"
1077 );
1078 assert_eq!(
1079 cli_exit_code(effective_blocked_code(ErrorCode::NetworkError, Some(&d))),
1080 3,
1081 "policy reason {r:?} must map to exit 3 (docs/ERRORS.md §4/§6.1)"
1082 );
1083 }
1084 }
1085
1086 /// A genuine transport fault carries NO `DenialContext`; it must stay
1087 /// `NetworkError` / exit 1 — `docs/ERRORS.md` §2 "retry usually fine"
1088 /// is the correct signal there. (This is exactly the e2e
1089 /// `..._host_off_allowlist` path: first-leg connect failure, no
1090 /// redirect hop, so no allowlist denial is produced.)
1091 #[test]
1092 fn absent_denial_context_keeps_network_error() {
1093 assert_eq!(
1094 effective_blocked_code(ErrorCode::NetworkError, None),
1095 ErrorCode::NetworkError
1096 );
1097 assert_eq!(
1098 cli_exit_code(effective_blocked_code(ErrorCode::NetworkError, None)),
1099 1
1100 );
1101 }
1102
1103 /// Non-policy denial reasons (size cap, content-type mismatch) are
1104 /// NOT supply-chain policy blocks; they keep the core's code so a
1105 /// genuine cap/transport class is not masked as a capability denial.
1106 #[test]
1107 fn non_policy_denials_keep_core_code() {
1108 for r in [
1109 DenialReason::SizeCapExceeded,
1110 DenialReason::ContentTypeMismatch,
1111 ] {
1112 let d = denial(r);
1113 assert_eq!(
1114 effective_blocked_code(ErrorCode::NetworkError, Some(&d)),
1115 ErrorCode::NetworkError,
1116 "non-policy reason {r:?} must NOT be reclassified"
1117 );
1118 }
1119 }
1120
1121 /// The closed-set wire token used in the human `error[...]:` line
1122 /// must match the serde `snake_case` form so the CLI vocabulary does
1123 /// not drift from the JSON/MCP envelope (`docs/ERRORS.md` §3.1).
1124 #[test]
1125 fn denial_reason_wire_matches_serde_snake_case() {
1126 for r in [
1127 DenialReason::RedirectNotInAllowlist,
1128 DenialReason::InsecureScheme,
1129 DenialReason::HostInBlockList,
1130 ] {
1131 let serde_form = serde_json::to_string(&r).expect("serialize DenialReason");
1132 // serde_json wraps the enum unit variant in quotes.
1133 let serde_token = serde_form.trim_matches('"');
1134 assert_eq!(
1135 denial_reason_wire(r),
1136 serde_token,
1137 "CLI wire token for {r:?} must equal the serde snake_case form"
1138 );
1139 }
1140 }
1141}