Skip to main content

doiget_core/
lib.rs

1//! # doiget-core
2//!
3//! Core library for [doiget](https://github.com/sotashimozono/doiget): an Open Access
4//! first paper-fetcher with strict capability gating, fail-closed provenance logging,
5//! and a BiblioFetch.jl-compatible store layout.
6//!
7//! Phase 0 ships only this skeleton. Real implementations land in Phase 1.
8//! See `docs/PUBLIC_API.md` for the semver-locked surface and `docs/ARCHITECTURE.md`
9//! for the high-level design.
10
11#![warn(missing_docs)]
12#![forbid(unsafe_code)]
13
14use serde::{Deserialize, Serialize};
15use sha2::Digest;
16
17// --- Modules ---
18pub mod canonical;
19pub mod dry_run;
20pub mod http;
21pub mod orchestrator;
22pub mod provenance;
23pub mod rate_limiter;
24pub mod source;
25pub mod sources;
26pub mod store;
27
28// Phase 4 citation graph (ADR-0010). Compile-gated by the `citation`
29// Cargo feature, which itself enables the `metadata` feature so the
30// Tier-2 source impls are available.
31#[cfg(feature = "citation")]
32pub mod citation_graph;
33
34// Re-export the canonical-tuple audit-identity types at the crate root
35// per ADR-0024 / `docs/PUBLIC_API.md` §1. The types themselves live in
36// the [`canonical`] submodule.
37pub use crate::canonical::{CanonicalRef, SourceType};
38
39/// Crate version. Used by `doiget-cli --version` and `doiget_health`.
40pub const VERSION: &str = env!("CARGO_PKG_VERSION");
41
42/// TOML schema version this build writes. See `docs/STORE.md` §3.
43pub const SCHEMA_VERSION: &str = "1.0";
44
45/// Hard-coded rate limit. See `docs/LEGAL.md` §6 safeguard 8.
46pub const MAX_CONCURRENT_FETCHES: u32 = 5;
47
48/// Hard-coded rate limit. See `docs/LEGAL.md` §6 safeguard 8.
49pub const MAX_FETCHES_PER_SECOND: f32 = 5.0;
50
51/// Maximum batch size for `doiget batch` and `doiget_batch_fetch`.
52pub const MCP_BATCH_MAX_SIZE: usize = 100;
53
54/// Slice 2 alias for [`MCP_BATCH_MAX_SIZE`] using the
55/// spec-language name (`docs/MCP_TOOLS.md` §1 / Slice 2 plan). The
56/// numeric value MUST equal [`MCP_BATCH_MAX_SIZE`]; an internal test
57/// pins the equivalence so the two constants cannot drift.
58pub const MAX_BATCH_REFS: usize = MCP_BATCH_MAX_SIZE;
59
60/// Maximum queued MCP requests beyond `MAX_CONCURRENT_FETCHES`. Excess returns
61/// `ErrorCode::RateLimited`. See `docs/SECURITY.md` §1.4 / `docs/MCP_TOOLS.md`.
62pub const MCP_QUEUE_DEPTH_MAX: usize = 100;
63
64/// MCP server stdin-EOF graceful-shutdown deadline, in seconds. See ADR-0001
65/// and `docs/MCP_TOOLS.md` §8.
66pub const MCP_STDIN_EOF_SHUTDOWN_SEC: u64 = 5;
67
68/// Maximum DOI suffix length accepted at validation. See `docs/SECURITY.md` §1.1.
69pub const DOI_SUFFIX_MAX_LEN: usize = 256;
70
71/// Maximum PDF body size accepted by the fetcher, in bytes. See
72/// `docs/SECURITY.md` §1.2 (Oversized PDF).
73pub const PDF_MAX_BYTES: u64 = 100_000_000;
74
75/// Time-to-live for entries in `~/.cache/doiget/resolver/`. See
76/// `docs/CACHE.md` §3.
77pub const RESOLVER_CACHE_TTL_DAYS: u32 = 7;
78
79/// Time-to-live for entries in `~/.cache/doiget/citations/`. See
80/// `docs/CACHE.md` §3.
81pub const CITATION_CACHE_TTL_DAYS: u32 = 30;
82
83// ---------------------------------------------------------------------------
84// Ref
85// ---------------------------------------------------------------------------
86
87/// A reference to a paper, either by DOI or arXiv id.
88///
89/// See `docs/SECURITY.md` §1.1 for input-validation rules.
90#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
91#[serde(rename_all = "lowercase", tag = "kind", content = "id")]
92pub enum Ref {
93    /// A DOI (e.g., `10.1234/example`).
94    Doi(Doi),
95    /// An arXiv id (e.g., `2401.12345`).
96    Arxiv(ArxivId),
97}
98
99/// A validated DOI string.
100///
101/// Construct via `Doi::parse(s)` (Phase 1+). The inner field is intentionally
102/// `pub(crate)` to forbid bypass construction; tests inside `doiget-core` may
103/// still use `Doi(s)` for fixture purposes.
104///
105/// Wire format: bare string (`#[serde(transparent)]`), e.g. `"10.1234/example"`.
106#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
107#[serde(transparent)]
108pub struct Doi(pub(crate) String);
109
110/// A validated arXiv id string.
111///
112/// Construct via `ArxivId::parse(s)` (Phase 1+). Inner field is `pub(crate)`.
113///
114/// Wire format: bare string (`#[serde(transparent)]`), e.g. `"2401.12345"`.
115#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
116#[serde(transparent)]
117pub struct ArxivId(pub(crate) String);
118
119impl Doi {
120    /// Returns the DOI as a string slice.
121    pub fn as_str(&self) -> &str {
122        &self.0
123    }
124
125    /// Parses and validates a DOI string per `docs/SECURITY.md` §1.1.
126    ///
127    /// Accepts:
128    /// - Bare DOIs: `10.<registrant>/<suffix>` where `<registrant>` is 4–9
129    ///   digits and `<suffix>` is a non-empty sequence of characters drawn
130    ///   from `[A-Za-z0-9._/()-]`.
131    /// - The `doi:` URI scheme prefix; it is stripped before validation, so
132    ///   the stored value never carries a scheme. (Matches the convention
133    ///   established in `docs/SAFEKEY.md` §3 step 0.)
134    ///
135    /// Rejects:
136    /// - Inputs missing the literal `10.` prefix (after optional scheme
137    ///   strip).
138    /// - Suffixes longer than [`DOI_SUFFIX_MAX_LEN`] bytes.
139    /// - Empty suffixes.
140    /// - Any character outside the suffix charset above (including control
141    ///   characters, whitespace, and non-ASCII).
142    ///
143    /// # Errors
144    ///
145    /// Returns a [`RefParseError`] variant that names the specific rejection
146    /// category. Tier 1+ callers should map any [`RefParseError`] to
147    /// [`ErrorCode::InvalidRef`] when surfacing to MCP / CLI.
148    pub fn parse(s: &str) -> Result<Self, RefParseError> {
149        let stripped = parse::strip_doi_scheme(s);
150        parse::validate_doi(stripped)?;
151        Ok(Doi(stripped.to_string()))
152    }
153}
154
155impl ArxivId {
156    /// Returns the arXiv id as a string slice.
157    pub fn as_str(&self) -> &str {
158        &self.0
159    }
160
161    /// Parses and validates an arXiv id per `docs/SECURITY.md` §1.1 and the
162    /// pattern published in `docs/MCP_TOOLS.md`.
163    ///
164    /// Accepts:
165    /// - New-style ids: `YYMM.NNNNN[vN]` where the date block is 4 digits, the
166    ///   sequence number is 4–5 digits, and the optional version `vN` is one
167    ///   or more digits. Examples: `2401.12345`, `2401.12345v2`.
168    /// - Old-style ids: `subject-class/YYMMNNN[vN]` where the subject class
169    ///   is a lowercase token (with optional internal hyphens and an
170    ///   optional `.XX` two-uppercase-letter group), and the numeric body
171    ///   is exactly 7 digits with optional `vN`. Examples:
172    ///   `cond-mat/9501001`, `astro-ph.CO/0703123v2`.
173    /// - The `arxiv:` / `arXiv:` URI scheme prefix; it is stripped before
174    ///   validation.
175    ///
176    /// Rejects:
177    /// - Inputs that match neither the new-style nor old-style shape.
178    /// - Inputs containing characters outside the per-shape charset
179    ///   (control chars, whitespace, non-ASCII).
180    /// - Empty input.
181    ///
182    /// # Errors
183    ///
184    /// Returns a [`RefParseError`] variant that names the specific rejection
185    /// category.
186    pub fn parse(s: &str) -> Result<Self, RefParseError> {
187        let stripped = parse::strip_arxiv_scheme(s);
188        parse::validate_arxiv(stripped)?;
189        Ok(ArxivId(stripped.to_string()))
190    }
191}
192
193impl Ref {
194    /// Parses a string into a [`Ref`], auto-detecting DOI vs arXiv.
195    ///
196    /// Detection rules:
197    /// 1. If the input begins with the case-insensitive `doi:` scheme, the
198    ///    remainder is parsed as a DOI.
199    /// 2. If the input begins with the `arxiv:` or `arXiv:` scheme, the
200    ///    remainder is parsed as an arXiv id.
201    /// 3. Otherwise, if the input starts with `10.` it is treated as a bare
202    ///    DOI; this matches the heuristic in `docs/SAFEKEY.md` §4 (Julia
203    ///    reference) and is stable because DOIs always begin `10.`.
204    /// 4. Failing all of the above, parsing falls back to arXiv.
205    ///
206    /// The returned [`Ref`] never carries the URI scheme — `as_str()` on the
207    /// inner `Doi` / `ArxivId` is always the bare identifier.
208    ///
209    /// # Errors
210    ///
211    /// Returns a [`RefParseError`] from the underlying [`Doi::parse`] or
212    /// [`ArxivId::parse`] call. When the input has an explicit scheme
213    /// (`doi:` / `arxiv:`), the matching parser is dispatched and its error
214    /// surfaces directly. When the input is bare and ambiguous, the
215    /// heuristic in rule 3/4 selects the parser; an unparsable bare input
216    /// surfaces the arXiv parser's error (a non-`10.` ref that also fails
217    /// arXiv validation is never a valid DOI).
218    pub fn parse(s: &str) -> Result<Self, RefParseError> {
219        // Reject empty up front so all three parsers see a meaningful slice;
220        // without this, `strip_*_scheme("")` returns "" and we'd get a
221        // confusing "missing 10. prefix" error for empty input.
222        if s.is_empty() {
223            return Err(RefParseError::Empty);
224        }
225
226        if parse::has_doi_scheme(s) {
227            return Doi::parse(s).map(Ref::Doi);
228        }
229        if parse::has_arxiv_scheme(s) {
230            return ArxivId::parse(s).map(Ref::Arxiv);
231        }
232        if s.starts_with("10.") {
233            return Doi::parse(s).map(Ref::Doi);
234        }
235        ArxivId::parse(s).map(Ref::Arxiv)
236    }
237}
238
239// ---------------------------------------------------------------------------
240// Parser internals
241// ---------------------------------------------------------------------------
242
243mod parse {
244    use super::{RefParseError, DOI_SUFFIX_MAX_LEN};
245
246    /// Case-insensitive `doi:` prefix detector. Matches both `doi:` and
247    /// `DOI:` (and any case mix); the spec in `docs/SAFEKEY.md` §3 only
248    /// names the lowercase form, but the field convention is to be lenient
249    /// in what we accept (the scheme is dropped at the boundary anyway).
250    pub(crate) fn has_doi_scheme(s: &str) -> bool {
251        s.len() >= 4 && s.is_char_boundary(4) && s[..4].eq_ignore_ascii_case("doi:")
252    }
253
254    /// Case-insensitive `arxiv:` prefix detector. Accepts `arxiv:`,
255    /// `arXiv:` (the form used in `docs/MCP_TOOLS.md`), and any other case
256    /// mix.
257    pub(crate) fn has_arxiv_scheme(s: &str) -> bool {
258        s.len() >= 6 && s.is_char_boundary(6) && s[..6].eq_ignore_ascii_case("arxiv:")
259    }
260
261    pub(crate) fn strip_doi_scheme(s: &str) -> &str {
262        if has_doi_scheme(s) {
263            &s[4..]
264        } else {
265            s
266        }
267    }
268
269    pub(crate) fn strip_arxiv_scheme(s: &str) -> &str {
270        if has_arxiv_scheme(s) {
271            &s[6..]
272        } else {
273            s
274        }
275    }
276
277    /// DOI suffix charset per `docs/SECURITY.md` §1.1:
278    /// `[A-Za-z0-9._/()-]`. The forward slash is permitted inside the
279    /// suffix (e.g. `10.1016/...`); the registrant separator is the
280    /// *first* `/` and the suffix is everything after it.
281    fn is_doi_suffix_char(c: char) -> bool {
282        matches!(c,
283            'A'..='Z' | 'a'..='z' | '0'..='9'
284            | '.' | '_' | '/' | '(' | ')' | '-'
285        )
286    }
287
288    pub(crate) fn validate_doi(s: &str) -> Result<(), RefParseError> {
289        if s.is_empty() {
290            return Err(RefParseError::Empty);
291        }
292
293        // Must begin with literal "10."; the registrant is 4–9 digits up
294        // to the first '/'. After that, everything is suffix.
295        let rest = s
296            .strip_prefix("10.")
297            .ok_or(RefParseError::MissingDoiPrefix)?;
298        let slash_idx = rest
299            .find('/')
300            .ok_or(RefParseError::MissingDoiSuffixSeparator)?;
301        let registrant = &rest[..slash_idx];
302        let suffix = &rest[slash_idx + 1..];
303
304        // Registrant: 4–9 ASCII digits.
305        if registrant.len() < 4
306            || registrant.len() > 9
307            || !registrant.chars().all(|c| c.is_ascii_digit())
308        {
309            return Err(RefParseError::InvalidDoiRegistrant);
310        }
311
312        // Suffix: non-empty, charset-restricted, length-bounded.
313        if suffix.is_empty() {
314            return Err(RefParseError::EmptyDoiSuffix);
315        }
316        if suffix.len() > DOI_SUFFIX_MAX_LEN {
317            return Err(RefParseError::DoiSuffixTooLong {
318                len: suffix.len(),
319                max: DOI_SUFFIX_MAX_LEN,
320            });
321        }
322        if let Some(bad) = suffix.chars().find(|c| !is_doi_suffix_char(*c)) {
323            return Err(RefParseError::InvalidDoiSuffixChar { ch: bad });
324        }
325        Ok(())
326    }
327
328    /// Validates an arXiv id (with the `arxiv:` / `arXiv:` scheme already
329    /// stripped). Tries the new-style shape first, then the old-style.
330    pub(crate) fn validate_arxiv(s: &str) -> Result<(), RefParseError> {
331        if s.is_empty() {
332            return Err(RefParseError::Empty);
333        }
334        if validate_arxiv_new(s).is_ok() || validate_arxiv_old(s).is_ok() {
335            return Ok(());
336        }
337        Err(RefParseError::InvalidArxivShape)
338    }
339
340    /// New-style arXiv id: `YYMM.NNNNN[vN]`.
341    fn validate_arxiv_new(s: &str) -> Result<(), ()> {
342        let dot_idx = s.find('.').ok_or(())?;
343        let head = &s[..dot_idx];
344        let tail = &s[dot_idx + 1..];
345
346        // Head: exactly 4 ASCII digits.
347        if head.len() != 4 || !head.chars().all(|c| c.is_ascii_digit()) {
348            return Err(());
349        }
350
351        // Tail: 4–5 digits, then optional `v` followed by ≥1 digits.
352        let bytes = tail.as_bytes();
353        let mut i = 0;
354        while i < bytes.len() && bytes[i].is_ascii_digit() {
355            i += 1;
356        }
357        let digits_len = i;
358        if !(4..=5).contains(&digits_len) {
359            return Err(());
360        }
361        if i == bytes.len() {
362            return Ok(());
363        }
364        // Optional version suffix.
365        if bytes[i] != b'v' {
366            return Err(());
367        }
368        i += 1;
369        let v_start = i;
370        while i < bytes.len() && bytes[i].is_ascii_digit() {
371            i += 1;
372        }
373        if i == v_start || i != bytes.len() {
374            return Err(());
375        }
376        Ok(())
377    }
378
379    /// Old-style arXiv id: `subject-class/YYMMNNN[vN]`.
380    /// Subject class: `[a-z]([a-z-]*[a-z])?(\.[A-Z]{2})?`.
381    fn validate_arxiv_old(s: &str) -> Result<(), ()> {
382        let slash_idx = s.find('/').ok_or(())?;
383        let class = &s[..slash_idx];
384        let id = &s[slash_idx + 1..];
385
386        // Class: starts with [a-z], body is [a-z-], optional `.XX` (two
387        // ASCII upper).
388        let (core_class, dot_part) = match class.find('.') {
389            Some(d) => (&class[..d], Some(&class[d + 1..])),
390            None => (class, None),
391        };
392        if core_class.is_empty()
393            || !core_class
394                .chars()
395                .all(|c| c.is_ascii_lowercase() || c == '-')
396            || core_class.starts_with('-')
397            || core_class.ends_with('-')
398        {
399            return Err(());
400        }
401        if let Some(dp) = dot_part {
402            if dp.len() != 2 || !dp.chars().all(|c| c.is_ascii_uppercase()) {
403                return Err(());
404            }
405        }
406
407        // Id: 7 digits, optional `vN`.
408        let bytes = id.as_bytes();
409        let mut i = 0;
410        while i < bytes.len() && bytes[i].is_ascii_digit() {
411            i += 1;
412        }
413        if i != 7 {
414            return Err(());
415        }
416        if i == bytes.len() {
417            return Ok(());
418        }
419        if bytes[i] != b'v' {
420            return Err(());
421        }
422        i += 1;
423        let v_start = i;
424        while i < bytes.len() && bytes[i].is_ascii_digit() {
425            i += 1;
426        }
427        if i == v_start || i != bytes.len() {
428            return Err(());
429        }
430        Ok(())
431    }
432}
433
434// ---------------------------------------------------------------------------
435// RefParseError
436// ---------------------------------------------------------------------------
437
438/// Reasons a `Doi::parse` / `ArxivId::parse` / `Ref::parse` call can fail.
439///
440/// Each variant maps to one rejection category in `docs/SECURITY.md` §1.1.
441/// All variants funnel to [`ErrorCode::InvalidRef`] when surfacing to MCP /
442/// CLI; the granular shape is preserved for tests and for future log
443/// breadcrumbs. The `From<RefParseError> for ErrorCode` impl below makes
444/// `?` propagation collapse to `INVALID_REF` automatically, satisfying
445/// `docs/PUBLIC_API.md` §4.
446///
447/// Marked `#[non_exhaustive]` so adding new categories is a non-breaking
448/// change. Pattern-match with a wildcard arm.
449#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
450#[non_exhaustive]
451pub enum RefParseError {
452    /// Input was empty.
453    #[error("empty input")]
454    Empty,
455    /// Input did not begin with the required `10.` literal (after any
456    /// scheme strip).
457    #[error("DOI must begin with '10.'")]
458    MissingDoiPrefix,
459    /// Input started with `10.` but had no `/` separator between
460    /// registrant and suffix.
461    #[error("DOI must contain '/' between registrant and suffix")]
462    MissingDoiSuffixSeparator,
463    /// Registrant was not 4–9 ASCII digits.
464    #[error("DOI registrant must be 4–9 ASCII digits")]
465    InvalidDoiRegistrant,
466    /// DOI suffix was empty.
467    #[error("DOI suffix is empty")]
468    EmptyDoiSuffix,
469    /// DOI suffix exceeded `DOI_SUFFIX_MAX_LEN` bytes.
470    #[error("DOI suffix is {len} bytes; maximum is {max}")]
471    DoiSuffixTooLong {
472        /// Observed suffix length, in bytes.
473        len: usize,
474        /// Hard upper bound (always [`DOI_SUFFIX_MAX_LEN`]).
475        max: usize,
476    },
477    /// DOI suffix contained a character outside `[A-Za-z0-9._/()-]`.
478    #[error("DOI suffix contains invalid character {ch:?}")]
479    InvalidDoiSuffixChar {
480        /// The first offending character.
481        ch: char,
482    },
483    /// Input matched neither the new-style nor old-style arXiv shape.
484    #[error("input does not match any known arXiv id shape")]
485    InvalidArxivShape,
486}
487
488impl From<RefParseError> for ErrorCode {
489    fn from(_: RefParseError) -> Self {
490        // All parse failures collapse to INVALID_REF at the public boundary,
491        // matching `docs/PUBLIC_API.md` §4 and `docs/SECURITY.md` §1.1.
492        ErrorCode::InvalidRef
493    }
494}
495
496// ---------------------------------------------------------------------------
497// Safekey
498// ---------------------------------------------------------------------------
499
500/// A filesystem-safe key derived deterministically from a `Ref`.
501///
502/// See `docs/SAFEKEY.md` for the full algorithm and reference test vectors.
503/// Construct via `Ref::safekey()` (Phase 1+); inner field is `pub(crate)`.
504///
505/// Wire format: bare string (`#[serde(transparent)]`), e.g. `"doi_10.1234_example"`.
506#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
507#[serde(transparent)]
508pub struct Safekey(pub(crate) String);
509
510impl Safekey {
511    /// Returns the safekey as a string slice.
512    pub fn as_str(&self) -> &str {
513        &self.0
514    }
515}
516
517impl Ref {
518    /// Returns the bare identifier string usable as a provenance `ref` field.
519    ///
520    /// Equivalent to `Doi::as_str` / `ArxivId::as_str` dispatched on the
521    /// variant — the URI scheme (`doi:` / `arxiv:`) is never present in the
522    /// inner identifiers (it is stripped at parse time), so the result is
523    /// always the bare DOI or arXiv id. Used by the CLI / MCP orchestrators
524    /// to populate the `ref` column of provenance log rows
525    /// (`docs/PROVENANCE_LOG.md` §3) without re-matching the variant.
526    pub fn as_input_str(&self) -> &str {
527        match self {
528            Ref::Doi(d) => d.as_str(),
529            Ref::Arxiv(a) => a.as_str(),
530        }
531    }
532
533    /// Derives a deterministic, filesystem-safe key from this reference.
534    ///
535    /// The algorithm is the NORMATIVE binding spec in `docs/SAFEKEY.md` §3.
536    /// Both Rust and Julia implementations MUST produce bit-identical output
537    /// for every entry in `tests/fixtures/safekey/vectors.json`.
538    ///
539    /// # Algorithm summary
540    ///
541    /// 1. Prefix with `doi_` or `arxiv_` (per variant).
542    /// 2. Replace any character outside `[A-Za-z0-9._-]` with `_`.
543    /// 3. Collapse consecutive `_` runs to a single `_`.
544    /// 4. Trim leading/trailing `_`.
545    /// 5. If the result exceeds 192 bytes, take the first 192 bytes plus
546    ///    `_` plus the first 8 hex chars of `SHA-256(raw)` (where `raw` is
547    ///    the step-1 output, before escaping).
548    ///
549    /// The bound on `as_str()` after step 4 is pure ASCII (steps 1-3 produce
550    /// only ASCII bytes), so the byte-slice in step 5 cannot split a
551    /// multibyte char.
552    pub fn safekey(&self) -> Safekey {
553        // Step 0: prefix per variant. Doi/ArxivId hold the bare identifier
554        // (no `doi:` / `arxiv:` URI scheme — that is stripped by Ref::parse,
555        // not relevant here).
556        let raw = match self {
557            Ref::Doi(d) => format!("doi_{}", d.as_str()),
558            Ref::Arxiv(a) => format!("arxiv_{}", a.as_str()),
559        };
560
561        // Step 1: replace unsafe chars with '_'. Non-ASCII chars (emitted by
562        // String::chars() as full Unicode code points) all hit the wildcard
563        // arm and become a single '_'.
564        let escaped: String = raw
565            .chars()
566            .map(|c| match c {
567                'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_' => c,
568                _ => '_',
569            })
570            .collect();
571
572        // Step 2: collapse consecutive '_' runs to a single '_'.
573        let mut collapsed = String::with_capacity(escaped.len());
574        let mut last_was_underscore = false;
575        for c in escaped.chars() {
576            if c == '_' {
577                if !last_was_underscore {
578                    collapsed.push('_');
579                }
580                last_was_underscore = true;
581            } else {
582                collapsed.push(c);
583                last_was_underscore = false;
584            }
585        }
586
587        // Step 3: trim leading/trailing '_'.
588        let trimmed = collapsed.trim_matches('_');
589
590        // Step 4: length-bound. After steps 1-3 `trimmed` is pure ASCII, so
591        // `len()` (bytes) == char count and `&trimmed[..192]` is char-safe.
592        let key = if trimmed.len() > 192 {
593            let digest = sha2::Sha256::digest(raw.as_bytes());
594            let hash = hex::encode(&digest[..4]);
595            format!("{}_{}", &trimmed[..192], hash)
596        } else {
597            trimmed.to_string()
598        };
599
600        Safekey(key)
601    }
602}
603
604// ---------------------------------------------------------------------------
605// ErrorCode
606// ---------------------------------------------------------------------------
607
608/// The closed set of error codes doiget surfaces.
609///
610/// See `docs/ERRORS.md` for the persona × code matrix.
611///
612/// Marked `#[non_exhaustive]` so adding new variants is a minor (not major)
613/// version bump.
614#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
615#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
616#[non_exhaustive]
617pub enum ErrorCode {
618    /// DOI / arXiv id failed validation.
619    InvalidRef,
620    /// Tier 1 sources reported no OA URL.
621    NoOaAvailable,
622    /// Internal rate cap or upstream 429.
623    RateLimited,
624    /// Transport / DNS / TLS failure.
625    NetworkError,
626    /// Filesystem write failed.
627    StoreError,
628    /// Provenance log write failed; the fetch was aborted.
629    LogError,
630    /// Source not granted by the runtime `CapabilityProfile`.
631    CapabilityDenied,
632    /// Per-request timeout exceeded.
633    FetchTimeout,
634    /// Store entry's `schema_version` is ahead of this build.
635    SchemaTooNew,
636    /// Could not acquire `flock` within 5 s.
637    LockTimeout,
638    /// Bug — please open an issue.
639    InternalError,
640    /// Feature is spec'd but not yet wired in this Phase. Distinct from
641    /// [`Self::InternalError`] (which signals a bug) and
642    /// [`Self::CapabilityDenied`] (which signals a runtime config gate).
643    /// Returned by stubs that exist to pin the public surface ahead of
644    /// orchestrator implementation, so an agent can react with "wait for
645    /// next minor release" rather than "report a bug" or "tweak my
646    /// capability profile". Wire form: `"NOT_IMPLEMENTED"`.
647    NotImplemented,
648}
649
650impl ErrorCode {
651    /// The `SCREAMING_SNAKE_CASE` wire token for this code, as a
652    /// `&'static str`. Identical to the serde representation but
653    /// allocation-free and usable where a borrowed string with a
654    /// `'static` lifetime is required — notably the provenance log
655    /// `error_code` column (`docs/PROVENANCE_LOG.md` §3), so a failure
656    /// row records the *actual* mapped code instead of a hand-written
657    /// literal that can drift from this enum (issue #118).
658    #[must_use]
659    pub fn as_wire(&self) -> &'static str {
660        match self {
661            ErrorCode::InvalidRef => "INVALID_REF",
662            ErrorCode::NoOaAvailable => "NO_OA_AVAILABLE",
663            ErrorCode::RateLimited => "RATE_LIMITED",
664            ErrorCode::NetworkError => "NETWORK_ERROR",
665            ErrorCode::StoreError => "STORE_ERROR",
666            ErrorCode::LogError => "LOG_ERROR",
667            ErrorCode::CapabilityDenied => "CAPABILITY_DENIED",
668            ErrorCode::FetchTimeout => "FETCH_TIMEOUT",
669            ErrorCode::SchemaTooNew => "SCHEMA_TOO_NEW",
670            ErrorCode::LockTimeout => "LOCK_TIMEOUT",
671            ErrorCode::InternalError => "INTERNAL_ERROR",
672            ErrorCode::NotImplemented => "NOT_IMPLEMENTED",
673        }
674    }
675}
676
677// ---------------------------------------------------------------------------
678// DenialReason / DenialContext (ADR-0023)
679// ---------------------------------------------------------------------------
680
681/// Closed-set reasons a denial-class error envelope can carry on its
682/// optional `denial_context.reason` field.
683///
684/// Wire form (JSON / MCP) is `snake_case` — e.g. `"redirect_not_in_allowlist"`.
685/// The set is **closed** per ADR-0023 §2: adding a new variant is a minor
686/// semver bump; renaming or repurposing one is a breaking change. Mirrors
687/// the stability rule that already governs [`ErrorCode`].
688///
689/// See [`DenialContext`] for the surrounding struct, `docs/ERRORS.md` §3.1
690/// for the wire surface, and `docs/PUBLIC_API.md` §8 for the
691/// semver-locked surface contract.
692#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
693#[serde(rename_all = "snake_case")]
694pub enum DenialReason {
695    /// Redirect target host did not match the source's allowlist
696    /// (`HttpError::RedirectDenied`).
697    RedirectNotInAllowlist,
698    /// Redirect target had a non-HTTPS scheme (`HttpError::InsecureRedirect`).
699    InsecureScheme,
700    /// Source produced a URL whose host is on a future blocklist.
701    ///
702    /// Reserved — no producer wired yet. Will be emitted by the future
703    /// per-source URL host-blocklist guard once that component lands
704    /// (post-Phase-1 supply-chain hardening; see
705    /// `docs/REDIRECT_ALLOWLIST.md` §4 for the staging plan).
706    HostInBlockList,
707    /// Body exceeded [`PDF_MAX_BYTES`] (`HttpError::OversizedBody`).
708    SizeCapExceeded,
709    /// Store entry's `schema_version` is ahead of this binary.
710    ///
711    /// Reserved — no producer wired yet. Will be emitted by the
712    /// `FsStore` schema-rejection path once the read-side bump check
713    /// lands (it currently only writes the current `SCHEMA_VERSION`).
714    SchemaDrift,
715    /// Source not in the runtime [`CapabilityProfile`]
716    /// (`FetchError::NotEligible`).
717    CapabilityNotGranted,
718    /// Rate limiter rejected the call inside the current window.
719    ///
720    /// Reserved — no producer wired yet. Will be emitted by
721    /// [`RateLimiter`](crate::rate_limiter::RateLimiter) once the
722    /// limiter surfaces structured denials (Phase 2+; today the
723    /// limiter only sleeps to enforce the window).
724    RateLimitWindow,
725    /// SSRF guard rejected a private / link-local / cloud-metadata address.
726    ///
727    /// Reserved — no producer wired yet. Will be emitted by the
728    /// future SSRF pre-flight check (post-Phase-1 supply-chain
729    /// hardening; the workspace currently relies on rustls + the
730    /// HTTPS-only redirect policy to keep the attack surface small).
731    SsrfPrivateAddress,
732    /// Response Content-Type / magic-byte mismatch (`HttpError::NotAPdf`).
733    ContentTypeMismatch,
734}
735
736/// Structured machine-parseable companion to `error.message` for
737/// recoverable denials.
738///
739/// The field is **optional and additive** on the public error envelope —
740/// every previously-shipped `{code, message}` envelope remains valid, and
741/// agents that ignore this struct continue to work. When present, it
742/// carries the concrete parameters an LLM agent can use to plan a recovery
743/// (e.g. "the redirect to `evil.example.com` was denied because it is not
744/// in the crossref allowlist") without text-mining `error.message`.
745///
746/// ## Wire shape
747///
748/// `#[serde(deny_unknown_fields)]`: forward-compatible field additions on
749/// the wire are forbidden by design — adding a field to this struct is a
750/// **breaking** change. This is why the type is **not** `#[non_exhaustive]`
751/// (per `docs/PUBLIC_API.md` §8): both production rules — Rust struct
752/// construction outside the crate AND wire-level extension — must agree.
753///
754/// All fields except `reason` are optional. Producers populate the fields
755/// relevant to the reason and leave the rest at `None`; consumers MUST
756/// tolerate any subset of fields being present. Optional fields are
757/// skipped on serialize but accepted as missing on deserialize via
758/// `#[serde(default, skip_serializing_if = "Option::is_none")]`.
759///
760/// [`Self::expected`] is `Option<Vec<String>>` rather than `Vec<String>`
761/// so the producer can distinguish "this reason has no allowlist channel"
762/// (`None` → field absent on the wire) from "this is the explicit list of
763/// acceptable values, possibly empty" (`Some(vec![])` → `"expected":[]` on
764/// the wire). The previous `Vec<String>` shape collapsed both states
765/// into "field omitted", which an LLM agent could not safely disambiguate.
766///
767/// Mapping table: see ADR-0023 §4, plus the
768/// `From<&HttpError> for Option<DenialContext>` and
769/// `From<&FetchError> for Option<DenialContext>` impls in
770/// [`crate::http`] / [`crate::source`].
771#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
772#[serde(deny_unknown_fields)]
773pub struct DenialContext {
774    /// Closed-enum reason code; the only required field.
775    pub reason: DenialReason,
776    /// Resolver source key (e.g. `"crossref"`) when one is in scope.
777    #[serde(default, skip_serializing_if = "Option::is_none")]
778    pub source: Option<String>,
779    /// Concrete value the producer attempted (host, path, hex magic bytes,
780    /// scheme prefix). Shape is reason-specific; consumers MUST treat it
781    /// as opaque text.
782    #[serde(default, skip_serializing_if = "Option::is_none")]
783    pub attempted: Option<String>,
784    /// Allowlist entries / acceptable values. `Option<Vec<String>>` so the
785    /// producer can distinguish "this reason has no allowlist channel"
786    /// (`None`, field absent on the wire) from "this is the explicit list
787    /// of acceptable values, possibly empty" (`Some(vec![])`, `"expected":[]`
788    /// on the wire). The inner `Vec<String>` is used even when only one
789    /// value is meaningful (e.g. `Some(vec!["%PDF-".into()])`) so the
790    /// format does not have to flip when multiple values are acceptable.
791    #[serde(default, skip_serializing_if = "Option::is_none")]
792    pub expected: Option<Vec<String>>,
793    /// Redirect-chain hop position, 0-indexed. `u8` because the chain is
794    /// hard-capped at [`crate::http`]'s `MAX_REDIRECTS` (= 10) and any
795    /// larger value indicates a bug.
796    #[serde(default, skip_serializing_if = "Option::is_none")]
797    pub hop_index: Option<u8>,
798    /// Size or rate cap value (e.g. [`PDF_MAX_BYTES`]).
799    #[serde(default, skip_serializing_if = "Option::is_none")]
800    pub cap: Option<u64>,
801    /// Observed value (e.g. response bytes when [`Self::cap`] is the byte
802    /// cap, or row schema_version when [`Self::cap`] is the binary's).
803    #[serde(default, skip_serializing_if = "Option::is_none")]
804    pub actual: Option<u64>,
805}
806
807// ---------------------------------------------------------------------------
808// CapabilityProfile (placeholder; full impl in Phase 1)
809// ---------------------------------------------------------------------------
810
811/// Marker for the always-on Open Access tier. See `docs/CAPABILITY.md`.
812#[derive(Debug, Clone, Copy)]
813pub struct AlwaysOn;
814
815/// Which Tier 2 metadata sources are enabled this session. See `docs/CAPABILITY.md`.
816#[derive(Debug, Clone, Default)]
817#[non_exhaustive]
818pub struct MetadataAccess {
819    /// Phase 4+; enabled by `DOIGET_ENABLE_OPENALEX`.
820    pub openalex: bool,
821    /// Phase 4+; enabled by `DOIGET_ENABLE_S2`.
822    pub semantic_scholar: bool,
823    /// Phase 4+; enabled by `DOIGET_ENABLE_DOAJ`.
824    pub doaj: bool,
825}
826
827/// Process-wide rate limits. Hard-coded; not configurable.
828///
829/// Construct only via [`RateLimits::HARD_CODED`]. The struct fields are
830/// `pub(crate)` so downstream code cannot synthesize a `RateLimits` with
831/// different values, which would weaken `docs/LEGAL.md` §6 safeguard 8.
832#[derive(Debug, Clone, Copy)]
833#[non_exhaustive]
834pub struct RateLimits {
835    pub(crate) max_concurrent_fetches: u32,
836    pub(crate) max_fetches_per_second: f32,
837    pub(crate) per_source_backoff_ms: u64,
838}
839
840impl RateLimits {
841    /// The single, hard-coded set of rate limits. There is no other public
842    /// constructor — see the type-level docs.
843    pub const HARD_CODED: Self = Self {
844        max_concurrent_fetches: MAX_CONCURRENT_FETCHES,
845        max_fetches_per_second: MAX_FETCHES_PER_SECOND,
846        per_source_backoff_ms: 200,
847    };
848
849    /// Maximum number of concurrent fetches in flight.
850    pub const fn max_concurrent_fetches(&self) -> u32 {
851        self.max_concurrent_fetches
852    }
853
854    /// Maximum fetch attempts per second across all sources.
855    pub const fn max_fetches_per_second(&self) -> f32 {
856        self.max_fetches_per_second
857    }
858
859    /// Per-source backoff in milliseconds between consecutive requests.
860    pub const fn per_source_backoff_ms(&self) -> u64 {
861        self.per_source_backoff_ms
862    }
863}
864
865/// A successful TDM grant.
866///
867/// Carries the validated API key (`docs/CAPABILITY.md` §1) so that the key
868/// flows from the startup capability gate into the source, rather than each
869/// TDM source re-reading the env var at fetch time (issue #153 — an env
870/// mutation between startup and fetch is otherwise undetectable).
871///
872/// The `api_key` field exists only when at least one `tdm-*` Cargo feature
873/// is compiled in (the `secrecy` dependency is `optional = true` and gated
874/// on those features per ADR-0002, so default release binaries contain no
875/// TDM code path at all). The struct is `#[non_exhaustive]`; the
876/// `tdm-*`-gated `api_key` field is therefore additive, not breaking, for
877/// builds that toggle the feature set.
878///
879/// `docs/CAPABILITY.md` §1 specifies the type as `Secret<String>`; that is
880/// the `secrecy` 0.9 spelling. The workspace pins `secrecy` 0.10, whose
881/// equivalent owned-string secret type is `secrecy::SecretString`
882/// (`= SecretBox<str>`). CAPABILITY.md §1 has been updated to match the
883/// 0.10 API. `Debug` redacts the value.
884///
885/// Implements `Default` so in-crate test fixtures using
886/// `TdmGrant { agree_env_var: ..., ..Default::default() }` keep compiling;
887/// the default `api_key` is an empty secret.
888#[derive(Debug, Clone)]
889#[non_exhaustive]
890pub struct TdmGrant {
891    /// The publisher API key, validated present at startup by
892    /// [`CapabilityProfile::from_env`]. Wrapped in
893    /// `secrecy::SecretString` so `Debug` never prints it; use
894    /// `secrecy::ExposeSecret::expose_secret` at the point of use.
895    ///
896    /// Only present when a `tdm-*` feature is compiled in (see the
897    /// type-level docs and ADR-0002).
898    #[cfg(any(
899        feature = "tdm-elsevier",
900        feature = "tdm-aps",
901        feature = "tdm-springer"
902    ))]
903    pub api_key: secrecy::SecretString,
904    /// Which env var the user used to acknowledge the publisher's ToS.
905    pub agree_env_var: String,
906    /// When the agreement env var was first observed at startup.
907    pub agreed_at: chrono::DateTime<chrono::Utc>,
908}
909
910impl Default for TdmGrant {
911    fn default() -> Self {
912        Self {
913            #[cfg(any(
914                feature = "tdm-elsevier",
915                feature = "tdm-aps",
916                feature = "tdm-springer"
917            ))]
918            api_key: secrecy::SecretString::from(String::new()),
919            agree_env_var: String::new(),
920            agreed_at: chrono::Utc::now(),
921        }
922    }
923}
924
925/// Runtime gate for which sources may be invoked. See `docs/CAPABILITY.md`.
926///
927/// Marked `#[non_exhaustive]` so adding new capability classes is non-breaking.
928/// Pattern-match only against the documented variants and use a wildcard arm.
929///
930/// **Construction**: external callers use [`CapabilityProfile::from_env()`].
931/// Struct-literal construction is blocked outside this crate by
932/// `#[non_exhaustive]`; this is intentional — the type's safety guarantees
933/// rely on the resolution rules in `from_env`. `Default` is **not yet**
934/// implemented; Phase 1 will add it once the field set stabilizes.
935#[derive(Debug, Clone)]
936#[non_exhaustive]
937pub struct CapabilityProfile {
938    /// Tier 1 OA sources are always permitted.
939    pub oa: AlwaysOn,
940    /// Tier 2 metadata access (Phase 4+).
941    pub metadata: MetadataAccess,
942    /// Tier 3 grants are populated only when both env var and feature compile-in are set.
943    pub tdm_elsevier: Option<TdmGrant>,
944    /// Tier 3 grants are populated only when both env var and feature compile-in are set.
945    pub tdm_aps: Option<TdmGrant>,
946    /// Tier 3 grants are populated only when both env var and feature compile-in are set.
947    pub tdm_springer: Option<TdmGrant>,
948    /// Hard-coded rate limits for this process.
949    pub rate_limits: RateLimits,
950}
951
952/// Errors that can arise during `CapabilityProfile::from_env`.
953#[derive(Debug, thiserror::Error)]
954pub enum CapabilityError {
955    /// User set the agree env var but provided no key. See `docs/CAPABILITY.md` §2.
956    #[error("env {agree_var} is set but {key_var} is missing")]
957    AgreedButNoKey {
958        /// The agreement env var the user set.
959        agree_var: String,
960        /// The key env var that should accompany it.
961        key_var: String,
962    },
963    /// Key env var is set but user has not agreed. See `docs/CAPABILITY.md` §2.
964    #[error("key for {agree_var} is present but {agree_var} is not set to '1'")]
965    KeyButNotAgreed {
966        /// The agreement env var the user must set to `1` before the key takes effect.
967        agree_var: String,
968    },
969}
970
971impl CapabilityProfile {
972    /// Read the runtime profile from environment variables.
973    ///
974    /// Implements the resolution algorithm specified in
975    /// [`docs/CAPABILITY.md`](../../../docs/CAPABILITY.md) §2.
976    ///
977    /// # Tier 1 (Open Access)
978    ///
979    /// Always permitted; not gated on any env var or feature.
980    ///
981    /// # Tier 2 (metadata)
982    ///
983    /// Each metadata source becomes available when its env var is set
984    /// (presence-checked, value ignored) **and** the `metadata` Cargo feature
985    /// was compiled in. If the env var is set but the feature is not compiled
986    /// in, a `tracing::warn!` is emitted and the source is left disabled —
987    /// this is not an error so that users can move binaries between machines
988    /// (or switch feature sets between cargo invocations) without breaking
989    /// startup. See `docs/CAPABILITY.md` §3 for the env var list.
990    ///
991    /// # Tier 3 (TDM)
992    ///
993    /// For each publisher in `{ELSEVIER, APS, SPRINGER}`, the
994    /// `DOIGET_AGREE_TDM_<X>` agreement env var is paired with
995    /// `DOIGET_KEY_<X>`. Resolution rules (per `docs/CAPABILITY.md` §2):
996    ///
997    /// - both unset → `tdm_<x> = None` (no error);
998    /// - `agree == "1"` and key set → `Some(TdmGrant { .. })` (subject to the
999    ///   feature gate below);
1000    /// - `agree == "1"` and key unset → [`CapabilityError::AgreedButNoKey`];
1001    /// - key set but `agree` unset (or `agree != "1"`) →
1002    ///   [`CapabilityError::KeyButNotAgreed`].
1003    ///
1004    /// When both env vars are set correctly **but** the corresponding
1005    /// `tdm-<x>` Cargo feature is not compiled in, this function emits a
1006    /// `tracing::warn!` and sets the grant to `None` rather than returning an
1007    /// error — same rationale as for the Tier 2 warn-and-skip behavior.
1008    ///
1009    /// # Precondition: tracing subscriber must be installed first
1010    ///
1011    /// Warn breadcrumbs are delivered via `tracing::warn!`. Callers MUST
1012    /// install a `tracing-subscriber` (or equivalent) **before** invoking
1013    /// this function, otherwise warnings are silently dropped. The
1014    /// `doiget-cli` binary does this in `main.rs`.
1015    ///
1016    /// # Errors
1017    ///
1018    /// Returns [`CapabilityError::AgreedButNoKey`] or
1019    /// [`CapabilityError::KeyButNotAgreed`] when the TDM env-var pair for any
1020    /// publisher is misconfigured. See the variant docs for the precise
1021    /// trigger conditions.
1022    ///
1023    /// # Note on `api_key` storage
1024    ///
1025    /// When a `tdm-*` feature is compiled in, [`TdmGrant`] carries the
1026    /// validated key as `secrecy::SecretString` (issue #153). The key is
1027    /// read exactly once here, at startup; TDM sources consume it from the
1028    /// grant and never re-read the env var at fetch time. This makes the
1029    /// grant a true startup attestation — an env mutation between startup
1030    /// and fetch can no longer silently change the credential in flight.
1031    /// See the [`TdmGrant`] doc-comment and `docs/CAPABILITY.md` §1/§2.
1032    pub fn from_env() -> Result<Self, CapabilityError> {
1033        // Issue #153: the validated API key is now threaded through
1034        // `TdmGrant` (as `secrecy::SecretString`, behind the `tdm-*`
1035        // features) by `resolve_tdm_grant` below — sources no longer
1036        // re-read the key env var at fetch time. See the `TdmGrant`
1037        // doc-comment and `docs/CAPABILITY.md` §1/§2.
1038
1039        // -- Tier 2 metadata -------------------------------------------------
1040        let metadata = MetadataAccess {
1041            openalex: resolve_metadata_flag(
1042                "DOIGET_ENABLE_OPENALEX",
1043                "metadata",
1044                cfg!(feature = "metadata"),
1045            ),
1046            semantic_scholar: resolve_metadata_flag(
1047                "DOIGET_ENABLE_S2",
1048                "metadata",
1049                cfg!(feature = "metadata"),
1050            ),
1051            doaj: resolve_metadata_flag(
1052                "DOIGET_ENABLE_DOAJ",
1053                "metadata",
1054                cfg!(feature = "metadata"),
1055            ),
1056        };
1057
1058        // -- Tier 3 TDM grants ----------------------------------------------
1059        let tdm_elsevier = resolve_tdm_grant(
1060            "DOIGET_AGREE_TDM_ELSEVIER",
1061            "DOIGET_KEY_ELSEVIER",
1062            "tdm-elsevier",
1063            cfg!(feature = "tdm-elsevier"),
1064        )?;
1065        let tdm_aps = resolve_tdm_grant(
1066            "DOIGET_AGREE_TDM_APS",
1067            "DOIGET_KEY_APS",
1068            "tdm-aps",
1069            cfg!(feature = "tdm-aps"),
1070        )?;
1071        let tdm_springer = resolve_tdm_grant(
1072            "DOIGET_AGREE_TDM_SPRINGER",
1073            "DOIGET_KEY_SPRINGER",
1074            "tdm-springer",
1075            cfg!(feature = "tdm-springer"),
1076        )?;
1077
1078        Ok(Self {
1079            oa: AlwaysOn,
1080            metadata,
1081            tdm_elsevier,
1082            tdm_aps,
1083            tdm_springer,
1084            rate_limits: RateLimits::HARD_CODED,
1085        })
1086    }
1087}
1088
1089/// Resolve a Tier 2 metadata flag from its env var and compile-in feature.
1090///
1091/// Returns `true` only when both the env var is present and the feature is
1092/// compiled in. When the env var is set without the feature, emits a
1093/// `tracing::warn!` and returns `false` — see [`CapabilityProfile::from_env`]
1094/// for the rationale (binaries may move between hosts / feature sets).
1095fn resolve_metadata_flag(env_var: &str, feature: &str, feature_enabled: bool) -> bool {
1096    let env_set = std::env::var_os(env_var).is_some();
1097    match (env_set, feature_enabled) {
1098        (true, true) => true,
1099        (true, false) => {
1100            tracing::warn!(
1101                env_var,
1102                feature,
1103                "{} is set but feature {} was not compiled in; the source will be unavailable",
1104                env_var,
1105                feature
1106            );
1107            false
1108        }
1109        (false, _) => false,
1110    }
1111}
1112
1113/// Resolve a Tier 3 TDM grant from the `agree`/`key` env-var pair and the
1114/// per-publisher Cargo feature.
1115///
1116/// Implements the rules in `docs/CAPABILITY.md` §2:
1117///
1118/// - both unset → `Ok(None)`.
1119/// - `agree == "1"` and `key` set → `Ok(Some(TdmGrant { .. }))` (when the
1120///   feature is enabled), or warn-and-`Ok(None)` (when the feature is not
1121///   compiled in).
1122/// - `agree == "1"` and `key` unset →
1123///   [`CapabilityError::AgreedButNoKey`].
1124/// - `key` set and `agree` unset OR `agree` set to anything other than `"1"`
1125///   → [`CapabilityError::KeyButNotAgreed`].
1126fn resolve_tdm_grant(
1127    agree_var: &str,
1128    key_var: &str,
1129    feature: &str,
1130    feature_enabled: bool,
1131) -> Result<Option<TdmGrant>, CapabilityError> {
1132    // `agree` is "agreed" iff the value is exactly the literal "1"; any other
1133    // value (including "true", "yes", empty) is treated as not-agreed per
1134    // `docs/CAPABILITY.md` §2.
1135    let agree_raw = std::env::var(agree_var).ok();
1136    let agreed = matches!(agree_raw.as_deref(), Some("1"));
1137    let agree_present = agree_raw.is_some();
1138    // Read the key value once, at startup, so the validated key flows
1139    // through `TdmGrant` and sources never re-read the env (issue #153).
1140    // An empty value is treated as "not set" — an empty API key cannot
1141    // authenticate, and silently constructing a grant around it would
1142    // mask the misconfiguration the AgreedButNoKey rule exists to surface.
1143    let key_value = std::env::var(key_var).ok().filter(|v| !v.is_empty());
1144
1145    match (agreed, agree_present, key_value) {
1146        (true, _, Some(key)) => {
1147            if feature_enabled {
1148                Ok(Some(build_tdm_grant(agree_var, key)))
1149            } else {
1150                // `key` is dropped here; under no-tdm builds it is the only
1151                // consumer of the owned `String`, which is intended.
1152                let _ = key;
1153                tracing::warn!(
1154                    env_var = agree_var,
1155                    feature,
1156                    "{} is set but feature {} was not compiled in; the source will be unavailable",
1157                    agree_var,
1158                    feature
1159                );
1160                Ok(None)
1161            }
1162        }
1163        (true, _, None) => Err(CapabilityError::AgreedButNoKey {
1164            agree_var: agree_var.to_string(),
1165            key_var: key_var.to_string(),
1166        }),
1167        // agree set to non-"1", key also set: KeyButNotAgreed (the key would
1168        // otherwise authorize the source without an explicit agreement).
1169        (false, true, Some(_)) => Err(CapabilityError::KeyButNotAgreed {
1170            agree_var: agree_var.to_string(),
1171        }),
1172        // agree unset, key set: KeyButNotAgreed (same rule).
1173        (false, false, Some(_)) => Err(CapabilityError::KeyButNotAgreed {
1174            agree_var: agree_var.to_string(),
1175        }),
1176        // agree set to non-"1" and no key: treat as no-grant. The user
1177        // expressed something but did not opt in and provided no credential,
1178        // so silent skip is the safe default (no source enabled).
1179        (false, true, None) => Ok(None),
1180        // Neither env var set: no grant, no error.
1181        (false, false, None) => Ok(None),
1182    }
1183}
1184
1185/// Construct a [`TdmGrant`] from the validated agreement var and key value.
1186///
1187/// Split out so the `tdm-*`-gated `api_key` field is populated in exactly
1188/// one place. When no `tdm-*` feature is compiled in the `key` is consumed
1189/// (dropped) here — the grant is still produced so that startup attestation
1190/// behavior (the warn-and-skip path) does not change shape between feature
1191/// sets.
1192fn build_tdm_grant(agree_var: &str, key: String) -> TdmGrant {
1193    #[cfg(any(
1194        feature = "tdm-elsevier",
1195        feature = "tdm-aps",
1196        feature = "tdm-springer"
1197    ))]
1198    {
1199        TdmGrant {
1200            api_key: secrecy::SecretString::from(key),
1201            agree_env_var: agree_var.to_string(),
1202            agreed_at: chrono::Utc::now(),
1203        }
1204    }
1205    #[cfg(not(any(
1206        feature = "tdm-elsevier",
1207        feature = "tdm-aps",
1208        feature = "tdm-springer"
1209    )))]
1210    {
1211        let _ = key;
1212        TdmGrant {
1213            agree_env_var: agree_var.to_string(),
1214            agreed_at: chrono::Utc::now(),
1215        }
1216    }
1217}
1218
1219// ---------------------------------------------------------------------------
1220// Tests — one smoke test per legally-load-bearing constant. See
1221// `docs/LEGAL.md` §6 safeguard 8 and `docs/PHASES.md` §4. These also keep the
1222// `cargo test --workspace` job from being a false-green during Phase 0.
1223// ---------------------------------------------------------------------------
1224
1225// `expect`/`unwrap` are idiomatic in tests where panics double as assertions.
1226// The workspace lints deny them in production code; relax for the test module
1227// only.
1228#[cfg(test)]
1229#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
1230mod tests {
1231    use super::*;
1232
1233    #[test]
1234    fn rate_limits_hard_coded_match_legal_safeguards() {
1235        // docs/LEGAL.md §6 safeguard 8 names these exact values.
1236        assert_eq!(RateLimits::HARD_CODED.max_concurrent_fetches(), 5);
1237        assert!((RateLimits::HARD_CODED.max_fetches_per_second() - 5.0).abs() < f32::EPSILON);
1238        assert_eq!(RateLimits::HARD_CODED.per_source_backoff_ms(), 200);
1239    }
1240
1241    #[test]
1242    fn batch_size_caps_match_security_doc() {
1243        // docs/SECURITY.md §1.4 + docs/MCP_TOOLS.md.
1244        assert_eq!(MCP_BATCH_MAX_SIZE, 100);
1245        assert_eq!(MCP_QUEUE_DEPTH_MAX, 100);
1246        assert_eq!(DOI_SUFFIX_MAX_LEN, 256);
1247        assert_eq!(MCP_STDIN_EOF_SHUTDOWN_SEC, 5);
1248        // Slice 2: spec-language alias for MCP_BATCH_MAX_SIZE must
1249        // numerically agree with the original constant.
1250        assert_eq!(MAX_BATCH_REFS, MCP_BATCH_MAX_SIZE);
1251    }
1252
1253    #[test]
1254    fn schema_version_is_pinned_to_1_0() {
1255        // docs/STORE.md §3 — Phase 0/1 writes 1.0 exactly.
1256        // A bump to 1.1 (minor, backward-compat additions) requires updating
1257        // both this test and the cross-tool compat fixtures simultaneously.
1258        assert_eq!(SCHEMA_VERSION, "1.0");
1259    }
1260
1261    // -----------------------------------------------------------------
1262    // CapabilityProfile::from_env — Phase 1 resolution algorithm tests.
1263    //
1264    // These tests mutate process-global env state via std::env::set_var /
1265    // remove_var, so each test holds an `EnvGuard` RAII drop guard that
1266    // captures the pre-test value of every env var it touches and restores
1267    // it on drop (even on panic). They also use `#[serial_test::serial]` so
1268    // that no two tests in this module touch env state concurrently — the
1269    // workspace's test runner defaults to multi-threaded.
1270    //
1271    // Spec: docs/CAPABILITY.md §2 (resolution algorithm) and §3 (env var
1272    // reference table).
1273    // -----------------------------------------------------------------
1274
1275    /// RAII guard that captures the prior value of an env var on construction
1276    /// and restores it on drop. Use one guard per touched var per test.
1277    struct EnvGuard {
1278        var: &'static str,
1279        prior: Option<std::ffi::OsString>,
1280    }
1281
1282    impl EnvGuard {
1283        /// Capture and clear `var`. Use `set` afterwards to install a value.
1284        fn unset(var: &'static str) -> Self {
1285            let prior = std::env::var_os(var);
1286            // SAFETY (env mutation): tests are serialized via
1287            // `#[serial_test::serial]`. `remove_var` is sound when no other
1288            // thread reads or writes the environment concurrently.
1289            std::env::remove_var(var);
1290            EnvGuard { var, prior }
1291        }
1292
1293        /// Capture, then set `var` to `value`.
1294        fn set(var: &'static str, value: &str) -> Self {
1295            let prior = std::env::var_os(var);
1296            std::env::set_var(var, value);
1297            EnvGuard { var, prior }
1298        }
1299    }
1300
1301    impl Drop for EnvGuard {
1302        fn drop(&mut self) {
1303            match &self.prior {
1304                Some(v) => std::env::set_var(self.var, v),
1305                None => std::env::remove_var(self.var),
1306            }
1307        }
1308    }
1309
1310    /// Convenience: unset every Tier 2 / Tier 3 env var the resolution
1311    /// algorithm reads, returning a vector of guards that restore them on
1312    /// drop. Callers can then `EnvGuard::set` individual vars on top.
1313    fn unset_all_capability_env_vars() -> Vec<EnvGuard> {
1314        [
1315            "DOIGET_ENABLE_OPENALEX",
1316            "DOIGET_ENABLE_S2",
1317            "DOIGET_ENABLE_DOAJ",
1318            "DOIGET_AGREE_TDM_ELSEVIER",
1319            "DOIGET_KEY_ELSEVIER",
1320            "DOIGET_AGREE_TDM_APS",
1321            "DOIGET_KEY_APS",
1322            "DOIGET_AGREE_TDM_SPRINGER",
1323            "DOIGET_KEY_SPRINGER",
1324        ]
1325        .iter()
1326        .map(|v| EnvGuard::unset(v))
1327        .collect()
1328    }
1329
1330    #[test]
1331    #[serial_test::serial]
1332    fn from_env_no_env_vars_set_returns_tier_1_only() {
1333        // Rule: with every relevant env var unset, the resolved profile has
1334        // all TDM grants `None` and all metadata flags `false`. Hard-coded
1335        // rate limits still apply. (Replaces the old Phase 0 stub test.)
1336        let _g = unset_all_capability_env_vars();
1337
1338        let p = CapabilityProfile::from_env().expect("clean env never errors");
1339        assert!(p.tdm_elsevier.is_none());
1340        assert!(p.tdm_aps.is_none());
1341        assert!(p.tdm_springer.is_none());
1342        assert!(!p.metadata.openalex);
1343        assert!(!p.metadata.semantic_scholar);
1344        assert!(!p.metadata.doaj);
1345        assert_eq!(p.rate_limits.max_concurrent_fetches(), 5);
1346    }
1347
1348    #[test]
1349    #[serial_test::serial]
1350    fn from_env_no_tdm_returns_tier_1_profile() {
1351        // Rule (CAPABILITY.md §2): with every TDM env var unset, all
1352        // `tdm_*` fields are `None` and no error is produced.
1353        let _g = unset_all_capability_env_vars();
1354
1355        let p = CapabilityProfile::from_env().expect("no TDM env -> Ok");
1356        assert!(p.tdm_elsevier.is_none());
1357        assert!(p.tdm_aps.is_none());
1358        assert!(p.tdm_springer.is_none());
1359    }
1360
1361    #[test]
1362    #[serial_test::serial]
1363    fn from_env_agreed_but_no_key_errs() {
1364        // Rule (CAPABILITY.md §2): agree=1 + key unset -> AgreedButNoKey.
1365        let _g = unset_all_capability_env_vars();
1366        let _agree = EnvGuard::set("DOIGET_AGREE_TDM_ELSEVIER", "1");
1367
1368        let result = CapabilityProfile::from_env();
1369        match result {
1370            Err(CapabilityError::AgreedButNoKey { agree_var, key_var }) => {
1371                assert_eq!(agree_var, "DOIGET_AGREE_TDM_ELSEVIER");
1372                assert_eq!(key_var, "DOIGET_KEY_ELSEVIER");
1373            }
1374            other => panic!("expected AgreedButNoKey, got {:?}", other),
1375        }
1376    }
1377
1378    #[test]
1379    #[serial_test::serial]
1380    fn from_env_agreed_but_empty_key_errs() {
1381        // Security-adjacent (PR #161 review): an *empty* key string is
1382        // treated as "not set" by `resolve_tdm_grant`. With agree=1 and
1383        // DOIGET_KEY_ELSEVIER="" the misconfiguration must surface as
1384        // AgreedButNoKey, not silently build a grant around an empty
1385        // secret that could never authenticate.
1386        let _g = unset_all_capability_env_vars();
1387        let _agree = EnvGuard::set("DOIGET_AGREE_TDM_ELSEVIER", "1");
1388        let _key = EnvGuard::set("DOIGET_KEY_ELSEVIER", "");
1389
1390        let result = CapabilityProfile::from_env();
1391        match result {
1392            Err(CapabilityError::AgreedButNoKey { agree_var, key_var }) => {
1393                assert_eq!(agree_var, "DOIGET_AGREE_TDM_ELSEVIER");
1394                assert_eq!(key_var, "DOIGET_KEY_ELSEVIER");
1395            }
1396            other => panic!("expected AgreedButNoKey for empty key, got {:?}", other),
1397        }
1398    }
1399
1400    #[test]
1401    #[serial_test::serial]
1402    fn from_env_empty_key_without_agree_is_no_grant() {
1403        // Security-adjacent (PR #161 review): an empty key with the
1404        // agree var unset is indistinguishable from "no key at all".
1405        // It must resolve to Ok(None) (no grant, no error) — an empty
1406        // string must NOT trip the KeyButNotAgreed leaked-credential
1407        // rule, since there is no credential.
1408        let _g = unset_all_capability_env_vars();
1409        let _key = EnvGuard::set("DOIGET_KEY_ELSEVIER", "");
1410
1411        let p = CapabilityProfile::from_env()
1412            .expect("empty key + agree unset must be Ok(None), not an error");
1413        assert!(
1414            p.tdm_elsevier.is_none(),
1415            "empty DOIGET_KEY_ELSEVIER with no agree var must yield no grant"
1416        );
1417        assert!(p.tdm_aps.is_none());
1418        assert!(p.tdm_springer.is_none());
1419    }
1420
1421    #[test]
1422    #[serial_test::serial]
1423    fn from_env_key_but_not_agreed_errs() {
1424        // Rule (CAPABILITY.md §2): key set + agree unset -> KeyButNotAgreed.
1425        // A leaked DOIGET_KEY_ELSEVIER must not silently enable a source.
1426        let _g = unset_all_capability_env_vars();
1427        let _key = EnvGuard::set("DOIGET_KEY_ELSEVIER", "sk-test");
1428
1429        let result = CapabilityProfile::from_env();
1430        match result {
1431            Err(CapabilityError::KeyButNotAgreed { agree_var }) => {
1432                assert_eq!(agree_var, "DOIGET_AGREE_TDM_ELSEVIER");
1433            }
1434            other => panic!("expected KeyButNotAgreed, got {:?}", other),
1435        }
1436    }
1437
1438    #[test]
1439    #[serial_test::serial]
1440    fn from_env_agree_not_one_errs() {
1441        // Rule (CAPABILITY.md §2): the agree var must be exactly "1". Any
1442        // other value (here: "true") is treated as not-agreed; combined
1443        // with a key set, that triggers KeyButNotAgreed.
1444        let _g = unset_all_capability_env_vars();
1445        let _agree = EnvGuard::set("DOIGET_AGREE_TDM_ELSEVIER", "true");
1446        let _key = EnvGuard::set("DOIGET_KEY_ELSEVIER", "sk-test");
1447
1448        let result = CapabilityProfile::from_env();
1449        match result {
1450            Err(CapabilityError::KeyButNotAgreed { agree_var }) => {
1451                assert_eq!(agree_var, "DOIGET_AGREE_TDM_ELSEVIER");
1452            }
1453            other => panic!("expected KeyButNotAgreed, got {:?}", other),
1454        }
1455    }
1456
1457    #[test]
1458    #[serial_test::serial]
1459    fn from_env_both_set_correctly_returns_grant() {
1460        // Rule (CAPABILITY.md §2): agree=1 + key set -> Some(TdmGrant) when
1461        // the corresponding feature is compiled in; else None (warn-and-skip).
1462        // The feature gate for elsevier is `tdm-elsevier`; this test asserts
1463        // both branches via `cfg!`.
1464        let _g = unset_all_capability_env_vars();
1465        let _agree = EnvGuard::set("DOIGET_AGREE_TDM_ELSEVIER", "1");
1466        let _key = EnvGuard::set("DOIGET_KEY_ELSEVIER", "sk-test");
1467
1468        let p = CapabilityProfile::from_env().expect("agree=1 + key -> Ok");
1469
1470        if cfg!(feature = "tdm-elsevier") {
1471            let grant = p
1472                .tdm_elsevier
1473                .as_ref()
1474                .expect("feature tdm-elsevier compiled in -> Some(TdmGrant)");
1475            assert_eq!(grant.agree_env_var, "DOIGET_AGREE_TDM_ELSEVIER");
1476            // Issue #153 / PR #161 review: prove the key was actually
1477            // threaded into TdmGrant::api_key at startup (not just that
1478            // the agree var was recorded). The field is cfg-gated to
1479            // the same `tdm-*` set as the assertion below, so gate the
1480            // check identically.
1481            #[cfg(any(
1482                feature = "tdm-elsevier",
1483                feature = "tdm-aps",
1484                feature = "tdm-springer"
1485            ))]
1486            {
1487                use secrecy::ExposeSecret as _;
1488                assert_eq!(
1489                    grant.api_key.expose_secret(),
1490                    "sk-test",
1491                    "the DOIGET_KEY_ELSEVIER value must be threaded into \
1492                     TdmGrant::api_key (issue #153)"
1493                );
1494            }
1495        } else {
1496            assert!(
1497                p.tdm_elsevier.is_none(),
1498                "feature tdm-elsevier NOT compiled in -> None (warn-and-skip)"
1499            );
1500        }
1501    }
1502
1503    #[test]
1504    #[serial_test::serial]
1505    fn from_env_metadata_env_warns_without_feature() {
1506        // Rule (CAPABILITY.md §2): metadata env var without the `metadata`
1507        // feature -> source disabled (warn-and-skip, not an error).
1508        // We don't capture the tracing warn here; we just assert the field
1509        // is `false` when the feature is absent and `true` when present.
1510        let _g = unset_all_capability_env_vars();
1511        let _enable = EnvGuard::set("DOIGET_ENABLE_OPENALEX", "1");
1512
1513        let p = CapabilityProfile::from_env().expect("metadata env never errors");
1514
1515        if cfg!(feature = "metadata") {
1516            assert!(p.metadata.openalex);
1517        } else {
1518            assert!(!p.metadata.openalex);
1519        }
1520    }
1521
1522    // -----------------------------------------------------------------
1523    // Safekey reference vectors (docs/SAFEKEY.md §3, NORMATIVE).
1524    //
1525    // The vectors.json file is the binding cross-tool contract with
1526    // BiblioFetch.jl: every entry MUST round-trip identically through
1527    // both implementations. Phase 0 ships 13 entries; the full 100-entry
1528    // set is gated on the BiblioFetch.jl pre-flight (ADR-0007 Status:
1529    // Proposed at the time of this Phase 1 implementation).
1530    //
1531    // `Ref::parse` is concurrent W3-A work and is not on `main` yet, so
1532    // this test branches on the input prefix (`doi:` / `arxiv:`) and
1533    // constructs the variant directly via the in-crate `pub(crate)`
1534    // tuple constructor.
1535    // -----------------------------------------------------------------
1536
1537    #[derive(Deserialize)]
1538    struct SafekeyVector {
1539        input: String,
1540        expected: String,
1541    }
1542
1543    #[derive(Deserialize)]
1544    struct SafekeyVectorFile {
1545        vectors: Vec<SafekeyVector>,
1546    }
1547
1548    /// In-crate test helper: build a `Ref` from the user-facing form used
1549    /// in the vectors file, by stripping the `doi:` / `arxiv:` URI scheme
1550    /// and wrapping the remainder. This bypasses validation; it is fine
1551    /// here because the vectors are hand-curated and the test asserts the
1552    /// derivation algorithm, not parser semantics.
1553    fn ref_from_vector_input(input: &str) -> Ref {
1554        if let Some(rest) = input.strip_prefix("doi:") {
1555            Ref::Doi(Doi(rest.to_string()))
1556        } else if let Some(rest) = input.strip_prefix("arxiv:") {
1557            Ref::Arxiv(ArxivId(rest.to_string()))
1558        } else {
1559            panic!(
1560                "vectors.json entry has unknown ref scheme (expected doi: or arxiv: prefix): {}",
1561                input
1562            );
1563        }
1564    }
1565
1566    #[test]
1567    fn safekey_matches_reference_vectors() {
1568        // include_str! resolves relative to the file containing this macro
1569        // call (crates/doiget-core/src/lib.rs), so we go up three levels
1570        // to reach the workspace root, then down to tests/fixtures.
1571        let raw = include_str!("../../../tests/fixtures/safekey/vectors.json");
1572        let parsed: SafekeyVectorFile =
1573            serde_json::from_str(raw).expect("vectors.json is valid JSON matching schema");
1574
1575        // Phase 0 final ships the full NORMATIVE 100-entry set
1576        // (docs/SAFEKEY.md §5). The fixture is the binding cross-tool
1577        // contract with BiblioFetch.jl; tightening the count guard to
1578        // `== 100` ensures the set cannot silently grow or shrink without
1579        // a coordinated ADR bump (per docs/SAFEKEY.md status block).
1580        assert_eq!(
1581            parsed.vectors.len(),
1582            100,
1583            "vectors.json MUST be exactly 100 entries (NORMATIVE per docs/SAFEKEY.md §5); got {}",
1584            parsed.vectors.len()
1585        );
1586
1587        let mut failures: Vec<String> = Vec::new();
1588        for v in &parsed.vectors {
1589            let r = ref_from_vector_input(&v.input);
1590            let got = r.safekey().as_str().to_string();
1591            if got != v.expected {
1592                failures.push(format!(
1593                    "input={:?}\n  expected={:?}\n  got     ={:?}",
1594                    v.input, v.expected, got
1595                ));
1596            }
1597        }
1598
1599        assert!(
1600            failures.is_empty(),
1601            "{}/{} safekey reference vectors failed:\n{}",
1602            failures.len(),
1603            parsed.vectors.len(),
1604            failures.join("\n")
1605        );
1606    }
1607
1608    #[test]
1609    fn safekey_truncates_long_inputs_with_sha256_suffix() {
1610        // Construct a synthetic DOI whose suffix produces a `trimmed` longer than
1611        // 192 chars after step 3. 220 ASCII-safe chars + the `doi_10.1234/`
1612        // prefix easily exceeds 192. The resulting key must be exactly 201 chars:
1613        // 192 (trimmed prefix) + 1 (`_` separator) + 8 (hex of first 4 bytes of
1614        // SHA-256(raw)). Per docs/SAFEKEY.md §3 step 5.
1615        let suffix = "a".repeat(220);
1616        let doi = Doi(format!("10.1234/{}", suffix));
1617        let key = Ref::Doi(doi).safekey();
1618        let s = key.as_str();
1619
1620        // Shape: <192 ASCII chars from {A-Za-z0-9._-}> + "_" + <8 hex chars>
1621        assert_eq!(
1622            s.len(),
1623            201,
1624            "expected 201-char truncated key, got {}: {}",
1625            s.len(),
1626            s
1627        );
1628        assert_eq!(&s[192..193], "_", "expected '_' separator at byte 192");
1629        let hash_part = &s[193..];
1630        assert_eq!(hash_part.len(), 8, "hash suffix must be 8 hex chars");
1631        assert!(
1632            hash_part
1633                .chars()
1634                .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()),
1635            "hash suffix must be lowercase hex: {}",
1636            hash_part
1637        );
1638
1639        // Determinism: same input twice must produce the same key.
1640        let key2 = Ref::Doi(Doi(format!("10.1234/{}", "a".repeat(220)))).safekey();
1641        assert_eq!(s, key2.as_str(), "safekey must be deterministic");
1642
1643        // Hash content: must equal hex(sha256(raw)[..4]) where raw is the
1644        // pre-escape prefixed form per docs/SAFEKEY.md §3 step 5.
1645        use sha2::Digest;
1646        let raw = format!("doi_10.1234/{}", "a".repeat(220));
1647        let expected_hash = {
1648            let digest = sha2::Sha256::digest(raw.as_bytes());
1649            format!(
1650                "{:02x}{:02x}{:02x}{:02x}",
1651                digest[0], digest[1], digest[2], digest[3]
1652            )
1653        };
1654        assert_eq!(
1655            hash_part, expected_hash,
1656            "hash must match SHA-256 of raw form"
1657        );
1658    }
1659
1660    // -----------------------------------------------------------------
1661    // Doi::parse / ArxivId::parse / Ref::parse — Phase 1 W3-A.
1662    // Spec: docs/SECURITY.md §1.1 (input validation). The rejection
1663    // category set is the binding contract; each test case below names
1664    // which rule it exercises in a comment.
1665    // -----------------------------------------------------------------
1666
1667    // ---- Doi::parse happy paths (≥6) --------------------------------
1668
1669    #[test]
1670    fn doi_parse_accepts_bare_canonical_form() {
1671        // Rule: "10.<registrant>/<suffix>" is the canonical bare form.
1672        let d = Doi::parse("10.1234/example").expect("canonical bare DOI");
1673        assert_eq!(d.as_str(), "10.1234/example");
1674    }
1675
1676    #[test]
1677    fn doi_parse_accepts_doi_uri_scheme() {
1678        // Rule: the `doi:` scheme is stripped at construction; as_str
1679        // never carries it (matches docs/SAFEKEY.md §3 step 0).
1680        let d = Doi::parse("doi:10.1234/example").expect("doi: scheme accepted");
1681        assert_eq!(d.as_str(), "10.1234/example");
1682    }
1683
1684    #[test]
1685    fn doi_parse_accepts_complex_real_world_suffix() {
1686        // Rule: suffix charset includes `.`, `(`, `)`, `-`. From a real
1687        // PhysRevLett DOI used elsewhere in the test fixture set.
1688        let d = Doi::parse("10.1103/PhysRevLett.130.200601").expect("real-world PhysRev DOI");
1689        assert_eq!(d.as_str(), "10.1103/PhysRevLett.130.200601");
1690    }
1691
1692    #[test]
1693    fn doi_parse_accepts_parens_in_suffix() {
1694        // Rule: `(` and `)` are explicitly listed in the spec charset.
1695        let d = Doi::parse("10.1016/S0370-1573(98)00122-3").expect("parens in suffix");
1696        assert_eq!(d.as_str(), "10.1016/S0370-1573(98)00122-3");
1697    }
1698
1699    #[test]
1700    fn doi_parse_accepts_nested_slashes_in_suffix() {
1701        // Rule: `/` is a suffix character; only the first `/` is the
1702        // registrant/suffix separator.
1703        let d = Doi::parse("10.1234/foo/bar/baz").expect("nested slashes");
1704        assert_eq!(d.as_str(), "10.1234/foo/bar/baz");
1705    }
1706
1707    #[test]
1708    fn doi_parse_accepts_suffix_at_max_len_boundary() {
1709        // Rule: a suffix of exactly DOI_SUFFIX_MAX_LEN bytes is accepted;
1710        // 1 byte more is rejected (covered separately below).
1711        let suffix = "a".repeat(DOI_SUFFIX_MAX_LEN);
1712        let input = format!("10.1234/{}", suffix);
1713        let d = Doi::parse(&input).expect("suffix at max len");
1714        assert_eq!(d.as_str().len(), "10.1234/".len() + DOI_SUFFIX_MAX_LEN);
1715    }
1716
1717    #[test]
1718    fn doi_parse_uri_scheme_is_case_insensitive() {
1719        // Rule: be lenient on scheme casing; the scheme is stripped
1720        // either way so the stored form is identical.
1721        let d = Doi::parse("DOI:10.1234/example").expect("uppercase scheme");
1722        assert_eq!(d.as_str(), "10.1234/example");
1723    }
1724
1725    // ---- Doi::parse rejection paths (≥6) ----------------------------
1726
1727    #[test]
1728    fn doi_parse_rejects_missing_10_prefix() {
1729        // Rule: must start with "10." literal.
1730        assert_eq!(
1731            Doi::parse("11.1234/example"),
1732            Err(RefParseError::MissingDoiPrefix)
1733        );
1734    }
1735
1736    #[test]
1737    fn doi_parse_rejects_empty_input() {
1738        // Rule: empty inputs are not valid DOIs.
1739        assert_eq!(Doi::parse(""), Err(RefParseError::Empty));
1740    }
1741
1742    #[test]
1743    fn doi_parse_rejects_missing_suffix_separator() {
1744        // Rule: must contain a `/` between registrant and suffix.
1745        assert_eq!(
1746            Doi::parse("10.1234"),
1747            Err(RefParseError::MissingDoiSuffixSeparator)
1748        );
1749    }
1750
1751    #[test]
1752    fn doi_parse_rejects_empty_suffix() {
1753        // Rule: suffix must be non-empty.
1754        assert_eq!(Doi::parse("10.1234/"), Err(RefParseError::EmptyDoiSuffix));
1755    }
1756
1757    #[test]
1758    fn doi_parse_rejects_invalid_registrant_too_short() {
1759        // Rule: registrant must be 4–9 digits.
1760        assert_eq!(
1761            Doi::parse("10.12/example"),
1762            Err(RefParseError::InvalidDoiRegistrant)
1763        );
1764    }
1765
1766    #[test]
1767    fn doi_parse_rejects_non_digit_registrant() {
1768        // Rule: registrant chars must all be ASCII digits.
1769        assert_eq!(
1770            Doi::parse("10.12ab/example"),
1771            Err(RefParseError::InvalidDoiRegistrant)
1772        );
1773    }
1774
1775    #[test]
1776    fn doi_parse_rejects_control_char_in_suffix() {
1777        // Rule (from docs/SECURITY.md §1.1, log-injection mitigation):
1778        // control chars are not in the suffix charset; reject before they
1779        // can reach the provenance log.
1780        let result = Doi::parse("10.1234/foo\nbar");
1781        assert!(
1782            matches!(
1783                result,
1784                Err(RefParseError::InvalidDoiSuffixChar { ch: '\n' })
1785            ),
1786            "got {:?}",
1787            result
1788        );
1789    }
1790
1791    #[test]
1792    fn doi_parse_rejects_suffix_over_max_len() {
1793        // Rule: DOI_SUFFIX_MAX_LEN + 1 bytes is rejected.
1794        let suffix = "a".repeat(DOI_SUFFIX_MAX_LEN + 1);
1795        let input = format!("10.1234/{}", suffix);
1796        let result = Doi::parse(&input);
1797        match result {
1798            Err(RefParseError::DoiSuffixTooLong { len, max }) => {
1799                assert_eq!(len, DOI_SUFFIX_MAX_LEN + 1);
1800                assert_eq!(max, DOI_SUFFIX_MAX_LEN);
1801            }
1802            other => panic!("expected DoiSuffixTooLong, got {:?}", other),
1803        }
1804    }
1805
1806    #[test]
1807    fn doi_parse_rejects_non_ascii_in_suffix() {
1808        // Rule: spec charset is ASCII-only; non-ASCII becomes an
1809        // InvalidDoiSuffixChar (consistent with safekey behavior of
1810        // collapsing such chars to '_', which is a downstream concern).
1811        let result = Doi::parse("10.1234/物理学");
1812        assert!(
1813            matches!(result, Err(RefParseError::InvalidDoiSuffixChar { .. })),
1814            "got {:?}",
1815            result
1816        );
1817    }
1818
1819    // ---- ArxivId::parse happy paths (≥6) ----------------------------
1820
1821    #[test]
1822    fn arxiv_parse_accepts_new_style_4_digit_seq() {
1823        // Rule: new-style YYMM.NNNN (4-digit sequence number).
1824        let a = ArxivId::parse("0704.0001").expect("new-style 4-digit seq");
1825        assert_eq!(a.as_str(), "0704.0001");
1826    }
1827
1828    #[test]
1829    fn arxiv_parse_accepts_new_style_5_digit_seq() {
1830        // Rule: new-style YYMM.NNNNN (5-digit sequence number, post-2015).
1831        let a = ArxivId::parse("2401.12345").expect("new-style 5-digit seq");
1832        assert_eq!(a.as_str(), "2401.12345");
1833    }
1834
1835    #[test]
1836    fn arxiv_parse_accepts_new_style_with_version() {
1837        // Rule: optional `vN` version suffix.
1838        let a = ArxivId::parse("2401.12345v2").expect("with version");
1839        assert_eq!(a.as_str(), "2401.12345v2");
1840    }
1841
1842    #[test]
1843    fn arxiv_parse_accepts_old_style() {
1844        // Rule: old-style subject-class/YYMMNNN.
1845        let a = ArxivId::parse("cond-mat/9501001").expect("old-style cond-mat");
1846        assert_eq!(a.as_str(), "cond-mat/9501001");
1847    }
1848
1849    #[test]
1850    fn arxiv_parse_accepts_old_style_with_subclass_and_version() {
1851        // Rule: old-style subject-class may have a `.XX` two-upper subclass
1852        // and an optional `vN` suffix.
1853        let a = ArxivId::parse("astro-ph.CO/0703123v2").expect("old-style with subclass + version");
1854        assert_eq!(a.as_str(), "astro-ph.CO/0703123v2");
1855    }
1856
1857    #[test]
1858    fn arxiv_parse_accepts_arxiv_uri_scheme() {
1859        // Rule: `arxiv:` / `arXiv:` scheme is stripped at construction.
1860        let a = ArxivId::parse("arxiv:2401.12345").expect("arxiv: scheme");
1861        assert_eq!(a.as_str(), "2401.12345");
1862    }
1863
1864    #[test]
1865    fn arxiv_parse_accepts_arxiv_uri_scheme_mixed_case() {
1866        // Rule: scheme case-insensitive; matches the `arXiv:` form named
1867        // in docs/MCP_TOOLS.md.
1868        let a = ArxivId::parse("arXiv:2401.12345v2").expect("arXiv: scheme");
1869        assert_eq!(a.as_str(), "2401.12345v2");
1870    }
1871
1872    // ---- ArxivId::parse rejection paths (≥6) ------------------------
1873
1874    #[test]
1875    fn arxiv_parse_rejects_empty_input() {
1876        // Rule: empty rejected up-front.
1877        assert_eq!(ArxivId::parse(""), Err(RefParseError::Empty));
1878    }
1879
1880    #[test]
1881    fn arxiv_parse_rejects_no_dot_or_slash() {
1882        // Rule: must contain `.` (new-style) or `/` (old-style).
1883        assert_eq!(
1884            ArxivId::parse("notanarxivid"),
1885            Err(RefParseError::InvalidArxivShape)
1886        );
1887    }
1888
1889    #[test]
1890    fn arxiv_parse_rejects_new_style_wrong_head_length() {
1891        // Rule: head must be exactly 4 digits.
1892        assert_eq!(
1893            ArxivId::parse("240.12345"),
1894            Err(RefParseError::InvalidArxivShape)
1895        );
1896    }
1897
1898    #[test]
1899    fn arxiv_parse_rejects_new_style_seq_too_short() {
1900        // Rule: seq must be 4–5 digits.
1901        assert_eq!(
1902            ArxivId::parse("2401.123"),
1903            Err(RefParseError::InvalidArxivShape)
1904        );
1905    }
1906
1907    #[test]
1908    fn arxiv_parse_rejects_old_style_wrong_id_length() {
1909        // Rule: old-style id is exactly 7 digits.
1910        assert_eq!(
1911            ArxivId::parse("cond-mat/95001"),
1912            Err(RefParseError::InvalidArxivShape)
1913        );
1914    }
1915
1916    #[test]
1917    fn arxiv_parse_rejects_invalid_version_suffix() {
1918        // Rule: version suffix is `v` followed by ≥1 digits, nothing else.
1919        assert_eq!(
1920            ArxivId::parse("2401.12345v"),
1921            Err(RefParseError::InvalidArxivShape)
1922        );
1923    }
1924
1925    #[test]
1926    fn arxiv_parse_rejects_control_char() {
1927        // Rule (docs/SECURITY.md §1.1 log-injection): no control chars.
1928        assert_eq!(
1929            ArxivId::parse("2401.12345\n"),
1930            Err(RefParseError::InvalidArxivShape)
1931        );
1932    }
1933
1934    #[test]
1935    fn arxiv_parse_rejects_non_ascii() {
1936        // Rule: ASCII-only.
1937        assert_eq!(
1938            ArxivId::parse("2401.物理"),
1939            Err(RefParseError::InvalidArxivShape)
1940        );
1941    }
1942
1943    // ---- Ref::parse happy paths (≥6) --------------------------------
1944
1945    #[test]
1946    fn ref_parse_dispatches_doi_scheme_to_doi() {
1947        // Detection rule 1: explicit `doi:` scheme.
1948        match Ref::parse("doi:10.1234/example").expect("doi: dispatched to Doi") {
1949            Ref::Doi(d) => assert_eq!(d.as_str(), "10.1234/example"),
1950            other => panic!("expected Ref::Doi, got {:?}", other),
1951        }
1952    }
1953
1954    #[test]
1955    fn ref_parse_dispatches_arxiv_scheme_to_arxiv() {
1956        // Detection rule 2: explicit `arxiv:` scheme.
1957        match Ref::parse("arxiv:2401.12345").expect("arxiv: dispatched to Arxiv") {
1958            Ref::Arxiv(a) => assert_eq!(a.as_str(), "2401.12345"),
1959            other => panic!("expected Ref::Arxiv, got {:?}", other),
1960        }
1961    }
1962
1963    #[test]
1964    fn ref_parse_dispatches_arxiv_mixed_case_scheme() {
1965        // Detection rule 2 (case-insensitive): `arXiv:` form.
1966        match Ref::parse("arXiv:cond-mat/9501001").expect("arXiv: dispatched") {
1967            Ref::Arxiv(a) => assert_eq!(a.as_str(), "cond-mat/9501001"),
1968            other => panic!("expected Ref::Arxiv, got {:?}", other),
1969        }
1970    }
1971
1972    #[test]
1973    fn ref_parse_bare_doi_resolves_to_doi() {
1974        // Detection rule 3: bare input starting with `10.` is a DOI.
1975        match Ref::parse("10.1234/foo").expect("bare DOI") {
1976            Ref::Doi(d) => assert_eq!(d.as_str(), "10.1234/foo"),
1977            other => panic!("expected Ref::Doi, got {:?}", other),
1978        }
1979    }
1980
1981    #[test]
1982    fn ref_parse_bare_arxiv_new_resolves_to_arxiv() {
1983        // Detection rule 4: bare input not starting with `10.` falls
1984        // through to arXiv. Tests the ambiguous-input branch named in the
1985        // PR brief: `2401.12345` should resolve to ArxivId.
1986        match Ref::parse("2401.12345").expect("bare new-style arXiv") {
1987            Ref::Arxiv(a) => assert_eq!(a.as_str(), "2401.12345"),
1988            other => panic!("expected Ref::Arxiv, got {:?}", other),
1989        }
1990    }
1991
1992    #[test]
1993    fn ref_parse_bare_arxiv_old_resolves_to_arxiv() {
1994        // Detection rule 4: bare old-style arXiv id.
1995        match Ref::parse("cond-mat/9501001").expect("bare old-style arXiv") {
1996            Ref::Arxiv(a) => assert_eq!(a.as_str(), "cond-mat/9501001"),
1997            other => panic!("expected Ref::Arxiv, got {:?}", other),
1998        }
1999    }
2000
2001    // ---- Ref::parse rejection paths (≥6) ----------------------------
2002
2003    #[test]
2004    fn ref_parse_rejects_empty() {
2005        // Rule: empty up-front.
2006        assert_eq!(Ref::parse(""), Err(RefParseError::Empty));
2007    }
2008
2009    #[test]
2010    fn ref_parse_doi_scheme_with_invalid_doi_propagates_doi_error() {
2011        // When the scheme is explicit, we surface the parser's error
2012        // verbatim — not a generic "shape mismatch".
2013        assert_eq!(
2014            Ref::parse("doi:10.1234"),
2015            Err(RefParseError::MissingDoiSuffixSeparator)
2016        );
2017    }
2018
2019    #[test]
2020    fn ref_parse_arxiv_scheme_with_invalid_arxiv_propagates_arxiv_error() {
2021        assert_eq!(
2022            Ref::parse("arxiv:notanid"),
2023            Err(RefParseError::InvalidArxivShape)
2024        );
2025    }
2026
2027    #[test]
2028    fn ref_parse_bare_with_10_prefix_uses_doi_errors() {
2029        // Bare `10.…` heuristic: DOI parser is dispatched and its error
2030        // surfaces (here: bad registrant).
2031        assert_eq!(
2032            Ref::parse("10.12/x"),
2033            Err(RefParseError::InvalidDoiRegistrant)
2034        );
2035    }
2036
2037    #[test]
2038    fn ref_parse_bare_without_10_prefix_uses_arxiv_errors() {
2039        // Bare ambiguous fallback: ArxivId parser is dispatched and its
2040        // error surfaces. `1.2.3` is neither a DOI nor an arXiv shape.
2041        assert_eq!(Ref::parse("1.2.3"), Err(RefParseError::InvalidArxivShape));
2042    }
2043
2044    #[test]
2045    fn ref_parse_rejects_doi_scheme_with_oversized_suffix() {
2046        // Length-bound: DOI suffix > DOI_SUFFIX_MAX_LEN through Ref::parse
2047        // surfaces DoiSuffixTooLong, not a generic InvalidArxivShape.
2048        let suffix = "a".repeat(DOI_SUFFIX_MAX_LEN + 5);
2049        let input = format!("doi:10.1234/{}", suffix);
2050        match Ref::parse(&input) {
2051            Err(RefParseError::DoiSuffixTooLong { .. }) => {}
2052            other => panic!("expected DoiSuffixTooLong, got {:?}", other),
2053        }
2054    }
2055
2056    #[test]
2057    fn ref_parse_round_trip_via_serde_preserves_inner_string() {
2058        // Wire-format check: Doi/ArxivId are #[serde(transparent)], and a
2059        // round-trip through Ref::parse → serde_json → Ref must preserve
2060        // the inner identifier. Guards against accidental scheme leakage
2061        // into the stored form.
2062        let r = Ref::parse("doi:10.1234/example").expect("parse ok");
2063        let json = serde_json::to_string(&r).expect("serialize");
2064        // The transparent inner value is the bare identifier (no `doi:`).
2065        assert!(
2066            json.contains("10.1234/example") && !json.contains("doi:"),
2067            "scheme leaked into wire form: {}",
2068            json
2069        );
2070    }
2071
2072    #[test]
2073    fn ref_parse_error_maps_to_invalid_ref_error_code() {
2074        // Public-API contract (docs/PUBLIC_API.md §4): all parse failures
2075        // collapse to ErrorCode::InvalidRef at the public boundary.
2076        let err: ErrorCode = RefParseError::Empty.into();
2077        assert_eq!(err, ErrorCode::InvalidRef);
2078        let err2: ErrorCode = RefParseError::MissingDoiPrefix.into();
2079        assert_eq!(err2, ErrorCode::InvalidRef);
2080    }
2081
2082    // -----------------------------------------------------------------
2083    // DenialReason / DenialContext (ADR-0023) — wire-shape tests.
2084    // -----------------------------------------------------------------
2085
2086    #[test]
2087    fn denial_reason_serializes_snake_case() {
2088        // ADR-0023 §2 / docs/PUBLIC_API.md §8: wire form is snake_case.
2089        let s = serde_json::to_string(&DenialReason::RedirectNotInAllowlist).expect("ser");
2090        assert_eq!(s, "\"redirect_not_in_allowlist\"");
2091        let s = serde_json::to_string(&DenialReason::SizeCapExceeded).expect("ser");
2092        assert_eq!(s, "\"size_cap_exceeded\"");
2093        let s = serde_json::to_string(&DenialReason::ContentTypeMismatch).expect("ser");
2094        assert_eq!(s, "\"content_type_mismatch\"");
2095    }
2096
2097    #[test]
2098    fn denial_reason_round_trip_via_serde() {
2099        // Round-trip every closed-set variant so adding a new variant
2100        // forces this test to be updated (the closed-set contract).
2101        for r in [
2102            DenialReason::RedirectNotInAllowlist,
2103            DenialReason::InsecureScheme,
2104            DenialReason::HostInBlockList,
2105            DenialReason::SizeCapExceeded,
2106            DenialReason::SchemaDrift,
2107            DenialReason::CapabilityNotGranted,
2108            DenialReason::RateLimitWindow,
2109            DenialReason::SsrfPrivateAddress,
2110            DenialReason::ContentTypeMismatch,
2111        ] {
2112            let s = serde_json::to_string(&r).expect("ser");
2113            let back: DenialReason = serde_json::from_str(&s).expect("de");
2114            assert_eq!(back, r, "round-trip mismatch for {:?} -> {}", r, s);
2115        }
2116    }
2117
2118    #[test]
2119    fn denial_context_round_trips_full_shape() {
2120        // A populated context (the redirect-denied case from ADR-0023 §1
2121        // example) survives a JSON round-trip. Whole-struct equality
2122        // exercises the `PartialEq` derive added per ADR-0023 §3 (added
2123        // in the multi-agent review feedback PR — see ADR-0023 history).
2124        let dc = DenialContext {
2125            reason: DenialReason::RedirectNotInAllowlist,
2126            source: Some("crossref".to_string()),
2127            attempted: Some("evil.example.com".to_string()),
2128            expected: Some(vec![
2129                "api.crossref.org".to_string(),
2130                "*.crossref.org".to_string(),
2131            ]),
2132            hop_index: Some(1),
2133            cap: None,
2134            actual: None,
2135        };
2136        let s = serde_json::to_string(&dc).expect("ser");
2137        let back: DenialContext = serde_json::from_str(&s).expect("de");
2138        assert_eq!(back, dc);
2139    }
2140
2141    #[test]
2142    fn denial_context_serialize_elides_empty_fields() {
2143        // `skip_serializing_if = "Option::is_none"` must keep the wire form
2144        // lean: every `None` field MUST NOT appear on the wire. Reason is
2145        // always present.
2146        let dc = DenialContext {
2147            reason: DenialReason::CapabilityNotGranted,
2148            source: None,
2149            attempted: None,
2150            expected: None,
2151            hop_index: None,
2152            cap: None,
2153            actual: None,
2154        };
2155        let s = serde_json::to_string(&dc).expect("ser");
2156        assert_eq!(s, "{\"reason\":\"capability_not_granted\"}");
2157    }
2158
2159    #[test]
2160    fn denial_context_expected_some_empty_vec_preserves_explicit_empty_allowlist() {
2161        // Post-refinement disambiguation: `expected: Some(vec![])` is the
2162        // "explicit empty allowlist" signal and MUST survive the wire as
2163        // `"expected":[]`. Only `expected: None` is skipped on serialize.
2164        // This is the bug the previous `Vec<String>` shape masked.
2165        let dc = DenialContext {
2166            reason: DenialReason::RedirectNotInAllowlist,
2167            source: Some("crossref".to_string()),
2168            attempted: Some("evil.example.com".to_string()),
2169            expected: Some(Vec::new()),
2170            hop_index: None,
2171            cap: None,
2172            actual: None,
2173        };
2174        let s = serde_json::to_string(&dc).expect("ser");
2175        assert!(
2176            s.contains("\"expected\":[]"),
2177            "expected:[] must survive on the wire (got: {s})"
2178        );
2179        let back: DenialContext = serde_json::from_str(&s).expect("de");
2180        assert_eq!(back.expected, Some(Vec::new()));
2181    }
2182
2183    #[test]
2184    fn denial_context_deserialize_tolerates_missing_optional_fields() {
2185        // Consumer-side contract (ADR-0023 §3): consumers MUST tolerate
2186        // any subset of fields being present. Missing optional fields
2187        // deserialize to their defaults via `#[serde(default)]`.
2188        let wire = r#"{"reason":"size_cap_exceeded","cap":104857600,"actual":209715200}"#;
2189        let dc: DenialContext = serde_json::from_str(wire).expect("de");
2190        assert_eq!(dc.reason, DenialReason::SizeCapExceeded);
2191        assert_eq!(dc.cap, Some(104857600));
2192        assert_eq!(dc.actual, Some(209715200));
2193        assert!(dc.source.is_none());
2194        assert!(dc.attempted.is_none());
2195        assert!(dc.expected.is_none());
2196        assert!(dc.hop_index.is_none());
2197    }
2198
2199    #[test]
2200    fn full_error_envelope_with_denial_context_serializes_to_pinned_json() {
2201        // Pins the byte-exact wire shape of the full failure envelope
2202        // documented in docs/ERRORS.md §3 + §3.1 and ADR-0023 §1. A
2203        // future regression that flips key order or skip-rules anywhere
2204        // in the chain breaks this test loudly.
2205        //
2206        // Note: serde_json's `Map` (used by `json!`) sorts keys
2207        // alphabetically when the `preserve_order` feature is NOT
2208        // enabled (we do not enable it). Embedding a `DenialContext`
2209        // via `json!` first re-serialises it through the same alphabet-
2210        // sorted Map path, so the inner field order is also alphabetical
2211        // here — NOT the struct field-order produced by direct
2212        // `to_string(&DenialContext)`. This is by design: the public
2213        // wire shape is canonicalised by serde_json's Map ordering, so
2214        // the byte-exact pin below documents that exact canonicalisation.
2215        let denial = DenialContext {
2216            reason: DenialReason::RedirectNotInAllowlist,
2217            source: Some("crossref".into()),
2218            attempted: Some("evil.example.com".into()),
2219            expected: Some(vec!["api.crossref.org".into(), "*.crossref.org".into()]),
2220            hop_index: Some(1),
2221            cap: None,
2222            actual: None,
2223        };
2224        let envelope = serde_json::json!({
2225            "ok": false,
2226            "error": {
2227                "code": ErrorCode::NetworkError,
2228                "message": "redirect target evil.example.com not in allowlist for source crossref",
2229                "denial_context": denial,
2230            }
2231        });
2232        let actual = serde_json::to_string(&envelope).expect("serialize envelope");
2233        let expected = r#"{"error":{"code":"NETWORK_ERROR","denial_context":{"attempted":"evil.example.com","expected":["api.crossref.org","*.crossref.org"],"hop_index":1,"reason":"redirect_not_in_allowlist","source":"crossref"},"message":"redirect target evil.example.com not in allowlist for source crossref"},"ok":false}"#;
2234        assert_eq!(actual, expected);
2235    }
2236
2237    #[test]
2238    fn denial_context_rejects_unknown_fields() {
2239        // `#[serde(deny_unknown_fields)]` (ADR-0023 §3, PUBLIC_API.md §8):
2240        // an unknown field on the wire MUST be a deserialize error so
2241        // forward-compat field additions stay a breaking change.
2242        let wire = r#"{"reason":"capability_not_granted","banana":1}"#;
2243        let result: Result<DenialContext, _> = serde_json::from_str(wire);
2244        assert!(
2245            result.is_err(),
2246            "deny_unknown_fields must reject 'banana': {:?}",
2247            result.map(|d| d.reason),
2248        );
2249    }
2250}