Skip to main content

doiget_cli/commands/
capabilities.rs

1//! `doiget capabilities` — single-shot inventory JSON for LLM cold-boot
2//! (#214).
3//!
4//! Emits a single JSON value describing the **full surface** of this
5//! `doiget` binary: subcommands (walked from the live `clap::Command`
6//! tree so the inventory cannot drift from the parser), positional args
7//! and named flags per subcommand, global flags, the four
8//! [`super::output::OutputMode`] values, hand-maintained env-var + example tables, the
9//! `doiget_*` MCP tool list, compile-time features, and a `docs` map
10//! pointing at the canonical spec files.
11//!
12//! Design rationale: the existing `--help` output lists subcommand
13//! names but the rest of doiget's surface (env vars, MCP tools, JSON
14//! schemas, ADR refs) is scattered across `docs/`. An LLM cold-booted
15//! into doiget — no repo access, no follow-up doc reads — cannot
16//! discover those via `--help` alone. This subcommand closes that gap
17//! with one round-trip.
18//!
19//! # Output mode
20//!
21//! `doiget capabilities` is a **product-output** command per the
22//! ADR-0017 convention (`--mode` is informational; the JSON inventory
23//! is the artefact). `--mode quiet` is the one mode that suppresses
24//! stdout (#203 / CONFIG.md §5); every other mode emits the same JSON.
25//!
26//! # Wire-format stability (whole module)
27//!
28//! Every `pub` struct / enum below carries `#[non_exhaustive]`. Adding
29//! a field is non-breaking; renaming or removing one is a
30//! compile-time break for downstream Rust consumers and a
31//! `[BREAKING]`-class change for JSON consumers (CHANGELOG must call
32//! it out). The per-item `#[non_exhaustive]` attributes intentionally
33//! carry no inline comment; this module-doc says it once.
34
35use anyhow::{Context, Result};
36use serde::Serialize;
37
38/// Top-level capability inventory. Serialised to stdout as one JSON
39/// value. Field names are part of the public wire format: renaming
40/// any field is a semver minor with a CHANGELOG `\[BREAKING\]` callout
41/// (same discipline as `EntryInfo` / `MigrationReport` in #213).
42#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
43#[non_exhaustive]
44#[derive(Debug, Serialize)]
45pub struct Capabilities {
46    /// `CARGO_PKG_VERSION` for this build.
47    pub version: &'static str,
48    /// Cargo features compiled into this binary. Contains `"oa-only"`
49    /// in stock release builds (the default feature). Empty only when
50    /// the crate was built with `--no-default-features` and **no
51    /// other features enabled**; a build like
52    /// `cargo build --no-default-features --features citation`
53    /// yields `["citation"]`, not `[]`.
54    pub features: Vec<&'static str>,
55    /// All four [`super::output::OutputMode`] values; the parser accepts these for
56    /// `--mode`. Mirrors `CONFIG.md` §5 (CLI flags).
57    pub modes: &'static [&'static str],
58    /// Global flags that apply to every subcommand.
59    pub global_flags: Vec<FlagSpec>,
60    /// One entry per CLI subcommand (clap-walked).
61    pub subcommands: Vec<SubcommandSpec>,
62    /// `DOIGET_*` env vars from CONFIG.md §4.
63    pub env_vars: &'static [EnvVar],
64    /// MCP tools exposed by `doiget serve` (hand-coded; the source of
65    /// truth is `docs/MCP_TOOLS.md` §1).
66    pub mcp_tools: &'static [McpTool],
67    /// Canonical doc paths an LLM can pull for deeper context.
68    pub docs: Docs,
69}
70
71/// What kind of value (if any) a [`FlagSpec`] carries.
72///
73/// Typed (not `&'static str`) so a typo can't slip into the wire
74/// format and the `Enum`-implies-`values`-present invariant is
75/// expressible at the type layer (see #215 for the design pass). Serialises
76/// as the lowercased variant name: `"bool"`, `"enum"`, `"string"`.
77#[non_exhaustive]
78#[derive(Debug, Serialize)]
79#[serde(rename_all = "lowercase")]
80pub enum FlagKind {
81    /// Boolean switch (no value).
82    Bool,
83    /// Value-bounded flag — `values` carries the accepted set.
84    Enum,
85    /// Any non-`Bool`, non-`Enum` flag. Today every such flag emits
86    /// `"string"`; richer typing (`Path` / `Int` etc.) is intentionally
87    /// out of scope until a real consumer needs it — `#[non_exhaustive]`
88    /// reserves space without commitment.
89    String,
90}
91
92#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
93#[non_exhaustive]
94#[derive(Debug, Serialize)]
95pub struct FlagSpec {
96    /// e.g. `--mode`, `--json`, `-q`.
97    pub name: String,
98    /// Boolean / enum / free-string discriminator. See [`FlagKind`].
99    pub kind: FlagKind,
100    /// `clap` `help` text.
101    pub help: Option<String>,
102    /// For `kind == FlagKind::Enum`: the accepted values, harvested
103    /// from clap's `PossibleValuesParser`. Owned (not `&'static`) so
104    /// the helper works for any future enum flag, not just `--mode`
105    /// (see #215).
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub values: Option<Vec<String>>,
108}
109
110#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
111#[non_exhaustive]
112#[derive(Debug, Serialize)]
113pub struct SubcommandSpec {
114    pub name: String,
115    pub summary: Option<String>,
116    pub args: Vec<ArgSpec>,
117    pub flags: Vec<FlagSpec>,
118    /// Hand-maintained canonical invocations.
119    pub examples: &'static [&'static str],
120    /// How this command interacts with `--mode json`. See [`JsonMode`].
121    pub json_mode: JsonMode,
122    /// Cargo feature this subcommand is gated behind, if any.
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub feature_gated: Option<&'static str>,
125}
126
127/// What kind of positional argument an [`ArgSpec`] describes.
128///
129/// Currently every entry is `Positional`; the typed enum reserves
130/// space for future variants (e.g. `Stdin` markers) without breaking
131/// existing JSON consumers. Serialises as `"positional"`.
132#[non_exhaustive]
133#[derive(Debug, Serialize)]
134#[serde(rename_all = "lowercase")]
135pub enum ArgKind {
136    /// A required-or-optional positional argument on the subcommand.
137    Positional,
138}
139
140#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
141#[non_exhaustive]
142#[derive(Debug, Serialize)]
143pub struct ArgSpec {
144    pub name: String,
145    /// Always [`ArgKind::Positional`] today. Kept as a discriminator
146    /// so the JSON shape can grow new arg kinds later without
147    /// renaming fields (see #215 for the design pass).
148    pub kind: ArgKind,
149    pub help: Option<String>,
150    /// `true` when the arg has no default and no `Option<T>` wrapper.
151    pub required: bool,
152}
153
154/// How a subcommand interacts with `--mode json`.
155///
156/// Wire shape: every variant serialises to an object with a `status`
157/// discriminant, so a consumer sees uniform `{"status":"…", …}`
158/// records (`#[serde(tag = "status")]`). Before #215 the previous
159/// mixed string/object representation forced consumers to handle two
160/// JSON shapes for sibling variants.
161///
162/// **Tuple variants not permitted.** `#[serde(tag = "status")]`
163/// requires the tag to live in the same flat object as variant
164/// fields; tuple variants are incompatible with internally-tagged
165/// representation. Future variants MUST use named fields.
166#[non_exhaustive] // Adding a future variant is non-breaking for JSON consumers.
167#[derive(Debug, Serialize)]
168#[serde(tag = "status", rename_all = "lowercase")]
169pub enum JsonMode {
170    /// The command's primary output IS the requested artifact, not
171    /// informational chatter. `--mode` is informational here; the
172    /// exact stdout shape (e.g. JSON for `csl` / `graph` /
173    /// `capabilities` and the JSON-RPC stream from `serve`; BibTeX
174    /// for `bib`; PDF-on-disk + stderr summary for `fetch`; a
175    /// `--dry-run` JSON plan in the dry-run variants) is fixed by
176    /// the subcommand and may vary across flags. **Consult
177    /// `examples` for the per-flag stdout form** rather than
178    /// assuming JSON.
179    Artifact,
180    /// Under `--mode json` the command emits a structured JSON body
181    /// on stdout; otherwise the human form (e.g. `info`,
182    /// `list-recent`, `audit-log`, `provenance migrate`, `batch`).
183    Supported,
184    // NOTE: a `Deferred { tracking: &'static str }` variant was
185    // sketched during #214's design phase but never instantiated by
186    // any subcommand. Removed in the #215 self-review pass to avoid
187    // shipping an unused wire shape; `#[non_exhaustive]` keeps the
188    // door open to add it back non-breakingly when a real consumer
189    // emerges.
190}
191
192#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
193#[non_exhaustive]
194#[derive(Debug, Serialize)]
195pub struct EnvVar {
196    pub name: &'static str,
197    /// `(none)` when no built-in default.
198    pub default: &'static str,
199    pub help: &'static str,
200}
201
202#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
203#[non_exhaustive]
204#[derive(Debug, Serialize)]
205pub struct McpTool {
206    pub name: &'static str,
207    /// Anchor-style reference into `docs/MCP_TOOLS.md`.
208    pub schema_ref: &'static str,
209}
210
211#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
212#[non_exhaustive]
213#[derive(Debug, Serialize)]
214pub struct Docs {
215    pub config: &'static str,
216    pub errors: &'static str,
217    pub scope: &'static str,
218    pub mcp: &'static str,
219    pub sources: &'static str,
220    pub redirect_allowlist: &'static str,
221    pub provenance_log: &'static str,
222}
223
224// ---------------------------------------------------------------------------
225// Static tables
226// ---------------------------------------------------------------------------
227
228const MODES: &[&str] = &["human", "json", "quiet", "mcp"];
229
230const ENV_VARS: &[EnvVar] = &[
231    EnvVar {
232        name: "DOIGET_STORE_ROOT",
233        default: "$HOME/papers",
234        help: "Root of the on-disk paper store. CONFIG.md §4.",
235    },
236    EnvVar {
237        name: "DOIGET_CACHE_ROOT",
238        default: "$HOME/.cache/doiget",
239        help: "Root of the on-disk HTTP / metadata cache. CONFIG.md §4.",
240    },
241    EnvVar {
242        name: "DOIGET_LOG_PATH",
243        default: "<config_dir>/doiget/access.jsonl",
244        help: "JSON-Lines provenance log file path (PROVENANCE_LOG.md §3).",
245    },
246    EnvVar {
247        name: "DOIGET_LOG_RETENTION_DAYS",
248        default: "90",
249        help: "Rotated-segment retention window (0 disables pruning). #140 / PROVENANCE_LOG.md §6.",
250    },
251    EnvVar {
252        name: "DOIGET_MODE",
253        default: "(none)",
254        help: "Output mode (`human`/`json`/`quiet`/`mcp`). ADR-0017 ladder rung 3.",
255    },
256    EnvVar {
257        name: "DOIGET_CONTACT_EMAIL",
258        default: "(none)",
259        help: "Contact email for polite User-Agent header (CONFIG.md §4).",
260    },
261    EnvVar {
262        name: "DOIGET_UNPAYWALL_EMAIL",
263        default: "(falls back to DOIGET_CONTACT_EMAIL)",
264        help: "Unpaywall-specific contact email.",
265    },
266    EnvVar {
267        name: "DOIGET_USER_AGENT",
268        default: "(default polite UA)",
269        help: "Override the User-Agent header for all outbound requests.",
270    },
271    EnvVar {
272        name: "DOIGET_ENABLE_OPENALEX",
273        default: "(off)",
274        help: "Enable the OpenAlex citation graph source (graph subcommand prerequisite).",
275    },
276    EnvVar {
277        name: "DOIGET_ARXIV_BASE",
278        default: "https://export.arxiv.org/",
279        help: "arXiv API base URL — primarily for testing/wiremock override.",
280    },
281    EnvVar {
282        name: "DOIGET_CROSSREF_BASE",
283        default: "https://api.crossref.org/",
284        help: "Crossref API base URL.",
285    },
286    EnvVar {
287        name: "DOIGET_UNPAYWALL_BASE",
288        default: "https://api.unpaywall.org/",
289        help: "Unpaywall API base URL.",
290    },
291];
292
293const MCP_TOOLS: &[McpTool] = &[
294    McpTool {
295        name: "doiget_resolve_paper",
296        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
297    },
298    McpTool {
299        name: "doiget_fetch_paper",
300        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
301    },
302    McpTool {
303        name: "doiget_metadata_only",
304        schema_ref: "docs/MCP_TOOLS.md#11-doiget_metadata_only-normative",
305    },
306    McpTool {
307        name: "doiget_batch_fetch",
308        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
309    },
310    McpTool {
311        name: "doiget_info",
312        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
313    },
314    McpTool {
315        name: "doiget_search_local",
316        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
317    },
318    McpTool {
319        name: "doiget_list_recent",
320        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
321    },
322    McpTool {
323        name: "doiget_paper_pdf_path",
324        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
325    },
326    McpTool {
327        name: "doiget_capability_profile",
328        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
329    },
330    McpTool {
331        name: "doiget_health",
332        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
333    },
334    McpTool {
335        name: "doiget_expand_citation_graph",
336        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
337    },
338    McpTool {
339        name: "doiget_bibtex_export",
340        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
341    },
342    McpTool {
343        name: "doiget_csl_export",
344        schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
345    },
346];
347
348const DOCS: Docs = Docs {
349    config: "docs/CONFIG.md",
350    errors: "docs/ERRORS.md",
351    scope: "docs/SCOPE.md",
352    mcp: "docs/MCP_TOOLS.md",
353    sources: "docs/SOURCES.md",
354    redirect_allowlist: "docs/REDIRECT_ALLOWLIST.md",
355    provenance_log: "docs/PROVENANCE_LOG.md",
356};
357
358/// Per-subcommand hand-maintained metadata. The clap walk provides
359/// name + summary + args + flags; this table adds examples,
360/// `json_mode` semantics, and feature-gating that clap doesn't
361/// expose. A regression unit test asserts every clap-visible
362/// subcommand has an entry here (otherwise the test fails loudly).
363///
364/// **Maintenance:** `feature_gated` MUST be kept in sync with the
365/// corresponding `#[cfg(feature = …)]` annotation in `main.rs`. There
366/// is no compile-time check; the `every_test_cli_subcommand_has_metadata`
367/// regression test does not cover feature-gating directly — it only
368/// asserts metadata exists. Add a CI matrix entry (`--features
369/// citation`) when introducing new gated subcommands so the e2e
370/// assertion list catches drift (see #215). Alternatively, add a
371/// unit test that asserts `metadata_for("graph").unwrap().feature_gated
372/// == Some("citation")` to lock the gate at the lib-test layer.
373struct SubcommandMeta {
374    examples: &'static [&'static str],
375    json_mode: JsonMode,
376    feature_gated: Option<&'static str>,
377}
378
379fn metadata_for(subcommand: &str) -> Option<SubcommandMeta> {
380    let m = match subcommand {
381        "fetch" => SubcommandMeta {
382            examples: &[
383                "doiget fetch 10.1234/foo",
384                "doiget fetch arxiv:2401.12345",
385                "doiget fetch 10.1234/foo --dry-run",
386            ],
387            // The success summary is on stderr (ADR-0001); the
388            // dry-run plan is JSON product output (ADR-0022).
389            json_mode: JsonMode::Artifact,
390            feature_gated: None,
391        },
392        "batch" => SubcommandMeta {
393            examples: &[
394                "doiget batch refs.txt",
395                "doiget batch refs.txt --dry-run",
396                "doiget batch refs.txt --json",
397            ],
398            // `--json` emits the ERRORS.md §3 JSONL per-ref shape (#205).
399            json_mode: JsonMode::Supported,
400            feature_gated: None,
401        },
402        "info" => SubcommandMeta {
403            examples: &[
404                "doiget info 10.1234/foo",
405                "doiget info arxiv:2401.12345 --json",
406            ],
407            json_mode: JsonMode::Supported,
408            feature_gated: None,
409        },
410        "list-recent" => SubcommandMeta {
411            examples: &[
412                "doiget list-recent",
413                "doiget list-recent 20",
414                "doiget list-recent --json",
415            ],
416            json_mode: JsonMode::Supported,
417            feature_gated: None,
418        },
419        "search" => SubcommandMeta {
420            examples: &[
421                "doiget search 'quantum entanglement'",
422                "doiget search renormalization --json",
423            ],
424            json_mode: JsonMode::Supported,
425            feature_gated: None,
426        },
427        "bib" => SubcommandMeta {
428            examples: &["doiget bib 10.1234/foo", "doiget bib arxiv:2401.12345"],
429            // BibTeX output is the product; `--mode` is informational.
430            json_mode: JsonMode::Artifact,
431            feature_gated: None,
432        },
433        "csl" => SubcommandMeta {
434            examples: &["doiget csl 10.1234/foo"],
435            json_mode: JsonMode::Artifact,
436            feature_gated: None,
437        },
438        "audit-log" => SubcommandMeta {
439            examples: &[
440                "doiget audit-log --verify",
441                "doiget audit-log --verify --json",
442                "doiget --quiet audit-log --verify   # exit code only",
443            ],
444            json_mode: JsonMode::Supported,
445            feature_gated: None,
446        },
447        "provenance" => SubcommandMeta {
448            examples: &[
449                "doiget provenance migrate --dry-run",
450                "doiget provenance migrate",
451                "doiget provenance migrate --dry-run --json",
452            ],
453            json_mode: JsonMode::Supported,
454            feature_gated: None,
455        },
456        "config" => SubcommandMeta {
457            examples: &[
458                "doiget config show",
459                "doiget config show --json",
460                "doiget config path",
461                "doiget config doctor",
462            ],
463            json_mode: JsonMode::Supported,
464            feature_gated: None,
465        },
466        "serve" => SubcommandMeta {
467            examples: &["doiget serve   # stdio MCP server (ADR-0001)"],
468            // serve always runs in mcp mode; the protocol output is
469            // JSON-RPC, which is product.
470            json_mode: JsonMode::Artifact,
471            feature_gated: None,
472        },
473        "graph" => SubcommandMeta {
474            examples: &[
475                "DOIGET_ENABLE_OPENALEX=1 doiget graph 10.1234/foo",
476                "DOIGET_ENABLE_OPENALEX=1 doiget graph 10.1234/foo --depth 2 --total 50",
477            ],
478            json_mode: JsonMode::Artifact,
479            feature_gated: Some("citation"),
480        },
481        "capabilities" => SubcommandMeta {
482            examples: &["doiget capabilities | jq ."],
483            // The whole point of capabilities IS JSON output.
484            json_mode: JsonMode::Artifact,
485            feature_gated: None,
486        },
487        // clap auto-adds `help`; we silently ignore it (it's not a
488        // domain subcommand).
489        "help" => return None,
490        _ => return None,
491    };
492    Some(m)
493}
494
495// ---------------------------------------------------------------------------
496// Build
497// ---------------------------------------------------------------------------
498
499/// Build the [`Capabilities`] inventory from `cli` (the clap parser
500/// for this binary, supplied by the caller because the `Cli` struct
501/// lives in `main.rs` and is not exposed in the library crate). The
502/// caller is `commands::main::run_dispatch` via `Cli::command()`.
503pub fn build_capabilities(cli: &clap::Command) -> Capabilities {
504    let global_flags = collect_global_flags(cli);
505    let subcommands = cli
506        .get_subcommands()
507        .filter_map(|sub| build_subcommand(sub, cli))
508        .collect::<Vec<_>>();
509    Capabilities {
510        version: env!("CARGO_PKG_VERSION"),
511        features: compile_time_features(),
512        modes: MODES,
513        global_flags,
514        subcommands,
515        env_vars: ENV_VARS,
516        mcp_tools: MCP_TOOLS,
517        docs: DOCS,
518    }
519}
520
521fn compile_time_features() -> Vec<&'static str> {
522    let mut feats: Vec<&'static str> = Vec::new();
523    if cfg!(feature = "oa-only") {
524        feats.push("oa-only");
525    }
526    if cfg!(feature = "metadata") {
527        feats.push("metadata");
528    }
529    if cfg!(feature = "citation") {
530        feats.push("citation");
531    }
532    if cfg!(feature = "tdm-elsevier") {
533        feats.push("tdm-elsevier");
534    }
535    if cfg!(feature = "tdm-aps") {
536        feats.push("tdm-aps");
537    }
538    if cfg!(feature = "tdm-springer") {
539        feats.push("tdm-springer");
540    }
541    feats
542}
543
544fn collect_global_flags(cmd: &clap::Command) -> Vec<FlagSpec> {
545    cmd.get_arguments()
546        .filter(|a| a.is_global_set())
547        .map(arg_to_flag_spec)
548        .collect()
549}
550
551fn build_subcommand(sub: &clap::Command, root: &clap::Command) -> Option<SubcommandSpec> {
552    let name = sub.get_name();
553    let meta = metadata_for(name)?;
554    let (args, flags) = split_args_and_flags(sub, root);
555    Some(SubcommandSpec {
556        name: name.to_string(),
557        summary: sub.get_about().map(|s| s.to_string()),
558        args,
559        flags,
560        examples: meta.examples,
561        json_mode: meta.json_mode,
562        feature_gated: meta.feature_gated,
563    })
564}
565
566fn split_args_and_flags(
567    sub: &clap::Command,
568    root: &clap::Command,
569) -> (Vec<ArgSpec>, Vec<FlagSpec>) {
570    // The root's global args appear in every subcommand's iterator;
571    // suppress them from per-subcommand `flags` (they're already in
572    // `global_flags`).
573    let global_names: std::collections::HashSet<&str> = root
574        .get_arguments()
575        .filter(|a| a.is_global_set())
576        .map(|a| a.get_id().as_str())
577        .collect();
578    let mut args = Vec::new();
579    let mut flags = Vec::new();
580    for a in sub.get_arguments() {
581        if global_names.contains(a.get_id().as_str()) {
582            continue;
583        }
584        // Clap auto-adds `--help` (and `--version` on the root) to
585        // every subcommand. They're not positional and not
586        // `is_global_set()`, so they would otherwise leak into every
587        // subcommand's `flags[]` as `kind: "string"`. Filter on the
588        // action against the known built-in variants.
589        //
590        // **Maintenance:** `clap::ArgAction` is itself
591        // `#[non_exhaustive]` upstream. A future clap release that
592        // adds a new built-in action (e.g. a hypothetical
593        // `HelpMarkdown`) would fall through this `matches!` and
594        // reappear in `flags[]`. Re-audit this filter on every clap
595        // minor-version bump.
596        if matches!(
597            a.get_action(),
598            clap::ArgAction::Help
599                | clap::ArgAction::HelpShort
600                | clap::ArgAction::HelpLong
601                | clap::ArgAction::Version
602        ) {
603            continue;
604        }
605        if a.is_positional() {
606            args.push(ArgSpec {
607                name: a.get_id().to_string(),
608                kind: ArgKind::Positional,
609                help: a.get_help().map(|s| s.to_string()),
610                required: a.is_required_set(),
611            });
612        } else {
613            flags.push(arg_to_flag_spec(a));
614        }
615    }
616    (args, flags)
617}
618
619fn arg_to_flag_spec(a: &clap::Arg) -> FlagSpec {
620    let name = a
621        .get_long()
622        .map(|s| format!("--{s}"))
623        .or_else(|| a.get_short().map(|c| format!("-{c}")))
624        .unwrap_or_else(|| a.get_id().to_string());
625    // Boolean switches → `Bool`; value-enum flags → `Enum` with the
626    // accepted values harvested from clap directly; everything else
627    // → `String`. The `possible_values()` harvest covers any future
628    // enum flag without code change (see #215).
629    let possible: Option<Vec<String>> = a
630        .get_value_parser()
631        .possible_values()
632        .map(|it| it.map(|pv| pv.get_name().to_owned()).collect());
633    let (kind, values) = if matches!(
634        a.get_action(),
635        clap::ArgAction::SetTrue | clap::ArgAction::SetFalse
636    ) {
637        (FlagKind::Bool, None)
638    } else if let Some(vs) = possible {
639        (FlagKind::Enum, Some(vs))
640    } else {
641        (FlagKind::String, None)
642    };
643    FlagSpec {
644        name,
645        kind,
646        help: a.get_help().map(|s| s.to_string()),
647        values,
648    }
649}
650
651// ---------------------------------------------------------------------------
652// Entry point
653// ---------------------------------------------------------------------------
654
655/// Run the `doiget capabilities` subcommand. Honors [`super::output::OutputMode`]:
656/// `Quiet` suppresses stdout (#203); every other mode emits the same
657/// pretty-printed JSON inventory. The caller passes the live
658/// `clap::Command` so the clap walk operates on the binary's actual
659/// `Cli` tree (which the lib half of this crate can't reach
660/// directly — the `Cli` struct lives in `main.rs`).
661pub fn run(cli: &clap::Command, mode: super::output::OutputMode) -> Result<()> {
662    // `Quiet` is the one mode that suppresses (per ADR-0017 / #203).
663    // Every other mode emits the same pretty JSON: `capabilities` is a
664    // product-output command.
665    if mode == super::output::OutputMode::Quiet {
666        return Ok(());
667    }
668    let caps = build_capabilities(cli);
669    let s = serde_json::to_string_pretty(&caps).context("serialise capabilities inventory")?;
670    // `print_stdout` workspace-deny; localised allow at the
671    // sanctioned product-output sink. See `commands/csl.rs`'s pattern.
672    #[allow(clippy::print_stdout)]
673    {
674        println!("{s}");
675    }
676    Ok(())
677}
678
679// ---------------------------------------------------------------------------
680// Tests
681// ---------------------------------------------------------------------------
682
683#[cfg(test)]
684#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
685mod tests {
686    use super::*;
687
688    /// Mirrors the `Cli` struct in `main.rs` for lib-test reach.
689    /// `commands::capabilities` is library-level; the binary-only
690    /// `Cli` struct can't be reached from here, so we re-derive a
691    /// shadow whose subcommand list is identical. The
692    /// `cli_shadow_matches_main_cli` integration test in
693    /// `tests/capabilities_e2e.rs` runs the real binary and asserts
694    /// the wire output matches.
695    fn test_cli() -> clap::Command {
696        use clap::{Arg, ArgAction, Command};
697        let mode_values = ["human", "json", "quiet", "mcp"];
698        let cmd = Command::new("doiget")
699            .arg(
700                Arg::new("mode")
701                    .long("mode")
702                    .global(true)
703                    .value_parser(clap::builder::PossibleValuesParser::new(mode_values))
704                    .help("Output mode (human|json|quiet|mcp)."),
705            )
706            .arg(
707                Arg::new("json")
708                    .long("json")
709                    .global(true)
710                    .action(ArgAction::SetTrue)
711                    .help("Short for `--mode json`."),
712            )
713            .arg(
714                Arg::new("quiet")
715                    .long("quiet")
716                    .short('q')
717                    .global(true)
718                    .action(ArgAction::SetTrue)
719                    .help("Short for `--mode quiet`."),
720            )
721            .subcommand(
722                Command::new("fetch")
723                    .about("Fetch a single paper PDF")
724                    .arg(Arg::new("ref").required(true))
725                    .arg(
726                        Arg::new("dry-run")
727                            .long("dry-run")
728                            .action(ArgAction::SetTrue),
729                    ),
730            )
731            .subcommand(
732                Command::new("batch")
733                    .about("Fetch many refs")
734                    .arg(Arg::new("path").required(true))
735                    .arg(
736                        Arg::new("dry-run")
737                            .long("dry-run")
738                            .action(ArgAction::SetTrue),
739                    ),
740            )
741            .subcommand(
742                Command::new("info")
743                    .about("Show metadata")
744                    .arg(Arg::new("ref").required(true)),
745            )
746            .subcommand(Command::new("list-recent").about("List recent"))
747            .subcommand(
748                Command::new("search")
749                    .about("Search local")
750                    .arg(Arg::new("query").required(true)),
751            )
752            .subcommand(
753                Command::new("bib")
754                    .about("BibTeX export")
755                    .arg(Arg::new("ref").required(true)),
756            )
757            .subcommand(
758                Command::new("csl")
759                    .about("CSL export")
760                    .arg(Arg::new("ref").required(true)),
761            )
762            .subcommand(
763                Command::new("audit-log")
764                    .about("Audit log")
765                    .arg(Arg::new("verify").long("verify").action(ArgAction::SetTrue)),
766            )
767            .subcommand(Command::new("provenance").about("Provenance ops"))
768            .subcommand(
769                Command::new("config")
770                    .about("Config")
771                    .arg(Arg::new("action").required(true)),
772            )
773            .subcommand(Command::new("serve").about("MCP server"));
774        // `graph` is `#[cfg(feature = "citation")]` in main.rs; mirror
775        // the gate so the shadow CLI matches the production surface
776        // (see #215).
777        #[cfg(feature = "citation")]
778        let cmd = cmd.subcommand(
779            Command::new("graph")
780                .about("Citation graph")
781                .arg(Arg::new("ref").required(true)),
782        );
783        cmd.subcommand(Command::new("capabilities").about("Capabilities"))
784    }
785
786    fn caps() -> Capabilities {
787        build_capabilities(&test_cli())
788    }
789
790    #[test]
791    fn capabilities_serialises_to_valid_json() {
792        let s = serde_json::to_string_pretty(&caps()).expect("serialise");
793        let v: serde_json::Value = serde_json::from_str(&s).expect("parse round-trip");
794        for key in [
795            "version",
796            "features",
797            "modes",
798            "global_flags",
799            "subcommands",
800            "env_vars",
801            "mcp_tools",
802            "docs",
803        ] {
804            assert!(
805                v.get(key).is_some(),
806                "top-level key `{key}` missing from capabilities JSON: {v}"
807            );
808        }
809    }
810
811    #[test]
812    fn modes_field_matches_output_mode_enum() {
813        // Tied to `OutputMode { Human, Json, Quiet, Mcp }`.
814        assert_eq!(caps().modes, &["human", "json", "quiet", "mcp"]);
815    }
816
817    #[test]
818    fn env_vars_all_use_doiget_prefix() {
819        for ev in ENV_VARS {
820            assert!(
821                ev.name.starts_with("DOIGET_"),
822                "env var name MUST use DOIGET_ prefix, got `{}`",
823                ev.name
824            );
825        }
826    }
827
828    #[test]
829    fn mcp_tools_all_use_doiget_prefix() {
830        for t in MCP_TOOLS {
831            assert!(
832                t.name.starts_with("doiget_"),
833                "MCP tool name MUST use doiget_ prefix, got `{}`",
834                t.name
835            );
836        }
837    }
838
839    #[test]
840    fn subcommand_examples_reference_the_subcommand_name() {
841        for sub in &caps().subcommands {
842            for ex in sub.examples {
843                // `graph` examples carry a `DOIGET_ENABLE_OPENALEX=1`
844                // env prefix before `doiget …`. Allow either form.
845                assert!(
846                    ex.starts_with("doiget ") || ex.contains(" doiget "),
847                    "example `{ex}` for `{}` must invoke `doiget` somewhere",
848                    sub.name
849                );
850                assert!(
851                    ex.contains(&sub.name),
852                    "example `{ex}` does not mention subcommand `{}`",
853                    sub.name
854                );
855            }
856        }
857    }
858
859    // Exact-set parity guard against drift between the static
860    // `ENV_VARS` table and the documented surface (#215). The expected set is the SOURCE OF TRUTH at test time;
861    // adding a new DOIGET_* env var requires updating both ENV_VARS
862    // and this list in lockstep. CHANGELOG records cross-PR changes.
863    #[test]
864    fn env_vars_exact_set_matches_expected() {
865        let actual: std::collections::BTreeSet<&str> = ENV_VARS.iter().map(|ev| ev.name).collect();
866        let expected: std::collections::BTreeSet<&str> = [
867            // CONFIG.md §4 documented:
868            "DOIGET_STORE_ROOT",
869            "DOIGET_CACHE_ROOT",
870            "DOIGET_LOG_PATH",
871            "DOIGET_LOG_RETENTION_DAYS",
872            "DOIGET_USER_AGENT",
873            "DOIGET_UNPAYWALL_EMAIL",
874            "DOIGET_MODE",
875            // Code-reachable but documented in code-level docs or
876            // CAPABILITY.md (not CONFIG.md §4):
877            "DOIGET_CONTACT_EMAIL",
878            "DOIGET_ENABLE_OPENALEX",
879            // Test/wiremock-override base URLs:
880            "DOIGET_ARXIV_BASE",
881            "DOIGET_CROSSREF_BASE",
882            "DOIGET_UNPAYWALL_BASE",
883        ]
884        .into_iter()
885        .collect();
886        assert_eq!(
887            actual, expected,
888            "ENV_VARS table drifted from the expected canonical set; \
889             update both `ENV_VARS` and this test together (and CONFIG.md §4 \
890             if the new var is user-documented)."
891        );
892    }
893
894    // Exact-set parity guard against drift between the static
895    // `MCP_TOOLS` table and `docs/MCP_TOOLS.md` §1 (#215).
896    #[test]
897    fn mcp_tools_exact_set_matches_expected() {
898        let actual: std::collections::BTreeSet<&str> = MCP_TOOLS.iter().map(|t| t.name).collect();
899        let expected: std::collections::BTreeSet<&str> = [
900            "doiget_resolve_paper",
901            "doiget_fetch_paper",
902            "doiget_metadata_only",
903            "doiget_batch_fetch",
904            "doiget_info",
905            "doiget_search_local",
906            "doiget_list_recent",
907            "doiget_paper_pdf_path",
908            "doiget_capability_profile",
909            "doiget_health",
910            "doiget_expand_citation_graph",
911            "doiget_bibtex_export",
912            "doiget_csl_export",
913        ]
914        .into_iter()
915        .collect();
916        assert_eq!(
917            actual, expected,
918            "MCP_TOOLS table drifted from the expected set; update both \
919             `MCP_TOOLS` and this test together (and docs/MCP_TOOLS.md §1)."
920        );
921    }
922
923    // Pin the `#[serde(tag = "status")]` wire shape: every variant
924    // serialises to a `{"status":"…", …}` object. Accidentally
925    // removing the `tag` attribute (or renaming the discriminant)
926    // would silently degrade the wire format; this test catches it
927    // (#215 N1).
928    #[test]
929    fn json_mode_serialises_with_status_discriminant() {
930        let s = serde_json::to_string(&JsonMode::Artifact).expect("serialise");
931        assert_eq!(
932            s, r#"{"status":"artifact"}"#,
933            "Artifact must emit a status-tagged object"
934        );
935        let s = serde_json::to_string(&JsonMode::Supported).expect("serialise");
936        assert_eq!(s, r#"{"status":"supported"}"#);
937    }
938
939    // `arg_to_flag_spec` was generalised in #215 to harvest the
940    // accepted values from clap's `PossibleValuesParser` instead of
941    // hard-coding `--mode`. Pin the contract: the `--mode` entry in
942    // `global_flags` MUST report `kind: Enum` with all four mode
943    // strings. A future regression that silently degrades `--mode`
944    // to `kind: String, values: None` would otherwise pass every
945    // existing test (#215 N3).
946    #[test]
947    fn mode_flag_carries_enum_kind_and_all_four_values() {
948        let global = &caps().global_flags;
949        let mode = global
950            .iter()
951            .find(|f| f.name == "--mode")
952            .expect("--mode flag is in global_flags");
953        assert!(
954            matches!(mode.kind, FlagKind::Enum),
955            "--mode kind MUST be Enum, got {:?}",
956            mode.kind
957        );
958        let vs = mode.values.as_ref().expect("--mode carries values");
959        let mut sorted = vs.clone();
960        sorted.sort();
961        assert_eq!(sorted, vec!["human", "json", "mcp", "quiet"]);
962    }
963
964    // `compile_time_features()` pushes string literals that must
965    // exactly match the Cargo feature names in `Cargo.toml`. A
966    // typo in the literal (`"oa_only"` vs `"oa-only"`) would
967    // silently invert the inventory's `features` field for every
968    // consumer. The default build has `oa-only` active; assert
969    // the literal round-trips (#215 A9).
970    #[test]
971    fn compile_time_features_contains_oa_only_under_default() {
972        // `cfg!(feature = "oa-only")` is true in the default test
973        // build; if a future maintainer disables the default feature
974        // for the test target, this test becomes meaningless but
975        // does not cause a false failure.
976        if cfg!(feature = "oa-only") {
977            let f = compile_time_features();
978            assert!(
979                f.contains(&"oa-only"),
980                "oa-only feature was enabled at compile time but \
981                 `compile_time_features()` did not list it: {f:?}"
982            );
983        }
984    }
985
986    #[test]
987    fn version_is_cargo_pkg_version() {
988        assert_eq!(caps().version, env!("CARGO_PKG_VERSION"));
989    }
990
991    #[test]
992    fn every_test_cli_subcommand_has_metadata() {
993        // Regression at the lib layer: anything we add to the shadow
994        // `test_cli` must also be in `metadata_for`. The real
995        // `Cli::command()` is exercised by the e2e test in
996        // `tests/capabilities_e2e.rs`.
997        for sub in test_cli().get_subcommands() {
998            let name = sub.get_name();
999            if name == "help" {
1000                continue;
1001            }
1002            assert!(
1003                metadata_for(name).is_some(),
1004                "subcommand `{name}` lacks metadata in `metadata_for`"
1005            );
1006        }
1007    }
1008}