doiget_cli/commands/capabilities.rs
1//! `doiget capabilities` — single-shot inventory JSON for LLM cold-boot
2//! (#214).
3//!
4//! Emits a single JSON value describing the **full surface** of this
5//! `doiget` binary: subcommands (walked from the live `clap::Command`
6//! tree so the inventory cannot drift from the parser), positional args
7//! and named flags per subcommand, global flags, the four
8//! [`super::output::OutputMode`] values, hand-maintained env-var + example tables, the
9//! `doiget_*` MCP tool list, compile-time features, and a `docs` map
10//! pointing at the canonical spec files.
11//!
12//! Design rationale: the existing `--help` output lists subcommand
13//! names but the rest of doiget's surface (env vars, MCP tools, JSON
14//! schemas, ADR refs) is scattered across `docs/`. An LLM cold-booted
15//! into doiget — no repo access, no follow-up doc reads — cannot
16//! discover those via `--help` alone. This subcommand closes that gap
17//! with one round-trip.
18//!
19//! # Output mode
20//!
21//! `doiget capabilities` is a **product-output** command per the
22//! ADR-0017 convention (`--mode` is informational; the JSON inventory
23//! is the artefact). `--mode quiet` is the one mode that suppresses
24//! stdout (#203 / CONFIG.md §5); every other mode emits the same JSON.
25//!
26//! # Wire-format stability (whole module)
27//!
28//! Every `pub` struct / enum below carries `#[non_exhaustive]`. Adding
29//! a field is non-breaking; renaming or removing one is a
30//! compile-time break for downstream Rust consumers and a
31//! `[BREAKING]`-class change for JSON consumers (CHANGELOG must call
32//! it out). The per-item `#[non_exhaustive]` attributes intentionally
33//! carry no inline comment; this module-doc says it once.
34
35use anyhow::{Context, Result};
36use serde::Serialize;
37
38/// Top-level capability inventory. Serialised to stdout as one JSON
39/// value. Field names are part of the public wire format: renaming
40/// any field is a semver minor with a CHANGELOG `\[BREAKING\]` callout
41/// (same discipline as `EntryInfo` / `MigrationReport` in #213).
42#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
43#[non_exhaustive]
44#[derive(Debug, Serialize)]
45pub struct Capabilities {
46 /// `CARGO_PKG_VERSION` for this build.
47 pub version: &'static str,
48 /// Cargo features compiled into this binary. Contains `"oa-only"`
49 /// in stock release builds (the default feature). Empty only when
50 /// the crate was built with `--no-default-features` and **no
51 /// other features enabled**; a build like
52 /// `cargo build --no-default-features --features citation`
53 /// yields `["citation"]`, not `[]`.
54 pub features: Vec<&'static str>,
55 /// All four [`super::output::OutputMode`] values; the parser accepts these for
56 /// `--mode`. Mirrors `CONFIG.md` §5 (CLI flags).
57 pub modes: &'static [&'static str],
58 /// Global flags that apply to every subcommand.
59 pub global_flags: Vec<FlagSpec>,
60 /// One entry per CLI subcommand (clap-walked).
61 pub subcommands: Vec<SubcommandSpec>,
62 /// `DOIGET_*` env vars from CONFIG.md §4.
63 pub env_vars: &'static [EnvVar],
64 /// MCP tools exposed by `doiget serve` (hand-coded; the source of
65 /// truth is `docs/MCP_TOOLS.md` §1).
66 pub mcp_tools: &'static [McpTool],
67 /// Canonical doc paths an LLM can pull for deeper context.
68 pub docs: Docs,
69}
70
71/// What kind of value (if any) a [`FlagSpec`] carries.
72///
73/// Typed (not `&'static str`) so a typo can't slip into the wire
74/// format and the `Enum`-implies-`values`-present invariant is
75/// expressible at the type layer (see #215 for the design pass). Serialises
76/// as the lowercased variant name: `"bool"`, `"enum"`, `"string"`.
77#[non_exhaustive]
78#[derive(Debug, Serialize)]
79#[serde(rename_all = "lowercase")]
80pub enum FlagKind {
81 /// Boolean switch (no value).
82 Bool,
83 /// Value-bounded flag — `values` carries the accepted set.
84 Enum,
85 /// Any non-`Bool`, non-`Enum` flag. Today every such flag emits
86 /// `"string"`; richer typing (`Path` / `Int` etc.) is intentionally
87 /// out of scope until a real consumer needs it — `#[non_exhaustive]`
88 /// reserves space without commitment.
89 String,
90}
91
92#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
93#[non_exhaustive]
94#[derive(Debug, Serialize)]
95pub struct FlagSpec {
96 /// e.g. `--mode`, `--json`, `-q`.
97 pub name: String,
98 /// Boolean / enum / free-string discriminator. See [`FlagKind`].
99 pub kind: FlagKind,
100 /// `clap` `help` text.
101 pub help: Option<String>,
102 /// For `kind == FlagKind::Enum`: the accepted values, harvested
103 /// from clap's `PossibleValuesParser`. Owned (not `&'static`) so
104 /// the helper works for any future enum flag, not just `--mode`
105 /// (see #215).
106 #[serde(skip_serializing_if = "Option::is_none")]
107 pub values: Option<Vec<String>>,
108}
109
110#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
111#[non_exhaustive]
112#[derive(Debug, Serialize)]
113pub struct SubcommandSpec {
114 pub name: String,
115 pub summary: Option<String>,
116 pub args: Vec<ArgSpec>,
117 pub flags: Vec<FlagSpec>,
118 /// Hand-maintained canonical invocations.
119 pub examples: &'static [&'static str],
120 /// How this command interacts with `--mode json`. See [`JsonMode`].
121 pub json_mode: JsonMode,
122 /// Cargo feature this subcommand is gated behind, if any.
123 #[serde(skip_serializing_if = "Option::is_none")]
124 pub feature_gated: Option<&'static str>,
125}
126
127/// What kind of positional argument an [`ArgSpec`] describes.
128///
129/// Currently every entry is `Positional`; the typed enum reserves
130/// space for future variants (e.g. `Stdin` markers) without breaking
131/// existing JSON consumers. Serialises as `"positional"`.
132#[non_exhaustive]
133#[derive(Debug, Serialize)]
134#[serde(rename_all = "lowercase")]
135pub enum ArgKind {
136 /// A required-or-optional positional argument on the subcommand.
137 Positional,
138}
139
140#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
141#[non_exhaustive]
142#[derive(Debug, Serialize)]
143pub struct ArgSpec {
144 pub name: String,
145 /// Always [`ArgKind::Positional`] today. Kept as a discriminator
146 /// so the JSON shape can grow new arg kinds later without
147 /// renaming fields (see #215 for the design pass).
148 pub kind: ArgKind,
149 pub help: Option<String>,
150 /// `true` when the arg has no default and no `Option<T>` wrapper.
151 pub required: bool,
152}
153
154/// How a subcommand interacts with `--mode json`.
155///
156/// Wire shape: every variant serialises to an object with a `status`
157/// discriminant, so a consumer sees uniform `{"status":"…", …}`
158/// records (`#[serde(tag = "status")]`). Before #215 the previous
159/// mixed string/object representation forced consumers to handle two
160/// JSON shapes for sibling variants.
161///
162/// **Tuple variants not permitted.** `#[serde(tag = "status")]`
163/// requires the tag to live in the same flat object as variant
164/// fields; tuple variants are incompatible with internally-tagged
165/// representation. Future variants MUST use named fields.
166#[non_exhaustive] // Adding a future variant is non-breaking for JSON consumers.
167#[derive(Debug, Serialize)]
168#[serde(tag = "status", rename_all = "lowercase")]
169pub enum JsonMode {
170 /// The command's primary output IS the requested artifact, not
171 /// informational chatter. `--mode` is informational here; the
172 /// exact stdout shape (e.g. JSON for `csl` / `graph` /
173 /// `capabilities` and the JSON-RPC stream from `serve`; BibTeX
174 /// for `bib`; PDF-on-disk + stderr summary for `fetch`; a
175 /// `--dry-run` JSON plan in the dry-run variants) is fixed by
176 /// the subcommand and may vary across flags. **Consult
177 /// `examples` for the per-flag stdout form** rather than
178 /// assuming JSON.
179 Artifact,
180 /// Under `--mode json` the command emits a structured JSON body
181 /// on stdout; otherwise the human form (e.g. `info`,
182 /// `list-recent`, `audit-log`, `provenance migrate`, `batch`).
183 Supported,
184 // NOTE: a `Deferred { tracking: &'static str }` variant was
185 // sketched during #214's design phase but never instantiated by
186 // any subcommand. Removed in the #215 self-review pass to avoid
187 // shipping an unused wire shape; `#[non_exhaustive]` keeps the
188 // door open to add it back non-breakingly when a real consumer
189 // emerges.
190}
191
192#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
193#[non_exhaustive]
194#[derive(Debug, Serialize)]
195pub struct EnvVar {
196 pub name: &'static str,
197 /// `(none)` when no built-in default.
198 pub default: &'static str,
199 pub help: &'static str,
200}
201
202#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
203#[non_exhaustive]
204#[derive(Debug, Serialize)]
205pub struct McpTool {
206 pub name: &'static str,
207 /// Anchor-style reference into `docs/MCP_TOOLS.md`.
208 pub schema_ref: &'static str,
209}
210
211#[allow(missing_docs)] // Field names ARE the schema; documented externally in #214.
212#[non_exhaustive]
213#[derive(Debug, Serialize)]
214pub struct Docs {
215 pub config: &'static str,
216 pub errors: &'static str,
217 pub scope: &'static str,
218 pub mcp: &'static str,
219 pub sources: &'static str,
220 pub redirect_allowlist: &'static str,
221 pub provenance_log: &'static str,
222}
223
224// ---------------------------------------------------------------------------
225// Static tables
226// ---------------------------------------------------------------------------
227
228const MODES: &[&str] = &["human", "json", "quiet", "mcp"];
229
230const ENV_VARS: &[EnvVar] = &[
231 EnvVar {
232 name: "DOIGET_STORE_ROOT",
233 default: "$HOME/papers",
234 help: "Root of the on-disk paper store. CONFIG.md §4.",
235 },
236 EnvVar {
237 name: "DOIGET_CACHE_ROOT",
238 default: "$HOME/.cache/doiget",
239 help: "Root of the on-disk HTTP / metadata cache. CONFIG.md §4.",
240 },
241 EnvVar {
242 name: "DOIGET_LOG_PATH",
243 default: "<config_dir>/doiget/access.jsonl",
244 help: "JSON-Lines provenance log file path (PROVENANCE_LOG.md §3).",
245 },
246 EnvVar {
247 name: "DOIGET_LOG_RETENTION_DAYS",
248 default: "90",
249 help: "Rotated-segment retention window (0 disables pruning). #140 / PROVENANCE_LOG.md §6.",
250 },
251 EnvVar {
252 name: "DOIGET_MODE",
253 default: "(none)",
254 help: "Output mode (`human`/`json`/`quiet`/`mcp`). ADR-0017 ladder rung 3.",
255 },
256 EnvVar {
257 name: "DOIGET_CONTACT_EMAIL",
258 default: "(none)",
259 help: "Contact email for polite User-Agent header (CONFIG.md §4).",
260 },
261 EnvVar {
262 name: "DOIGET_UNPAYWALL_EMAIL",
263 default: "(falls back to DOIGET_CONTACT_EMAIL)",
264 help: "Unpaywall-specific contact email.",
265 },
266 EnvVar {
267 name: "DOIGET_USER_AGENT",
268 default: "(default polite UA)",
269 help: "Override the User-Agent header for all outbound requests.",
270 },
271 EnvVar {
272 name: "DOIGET_ENABLE_OPENALEX",
273 default: "(off)",
274 help: "Enable the OpenAlex citation graph source (graph subcommand prerequisite).",
275 },
276 EnvVar {
277 name: "DOIGET_ARXIV_BASE",
278 default: "https://export.arxiv.org/",
279 help: "arXiv API base URL — primarily for testing/wiremock override.",
280 },
281 EnvVar {
282 name: "DOIGET_CROSSREF_BASE",
283 default: "https://api.crossref.org/",
284 help: "Crossref API base URL.",
285 },
286 EnvVar {
287 name: "DOIGET_UNPAYWALL_BASE",
288 default: "https://api.unpaywall.org/",
289 help: "Unpaywall API base URL.",
290 },
291];
292
293const MCP_TOOLS: &[McpTool] = &[
294 McpTool {
295 name: "doiget_resolve_paper",
296 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
297 },
298 McpTool {
299 name: "doiget_fetch_paper",
300 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
301 },
302 McpTool {
303 name: "doiget_metadata_only",
304 schema_ref: "docs/MCP_TOOLS.md#11-doiget_metadata_only-normative",
305 },
306 McpTool {
307 name: "doiget_batch_fetch",
308 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
309 },
310 McpTool {
311 name: "doiget_info",
312 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
313 },
314 McpTool {
315 name: "doiget_search_local",
316 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
317 },
318 McpTool {
319 name: "doiget_list_recent",
320 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
321 },
322 McpTool {
323 name: "doiget_paper_pdf_path",
324 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
325 },
326 McpTool {
327 name: "doiget_capability_profile",
328 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
329 },
330 McpTool {
331 name: "doiget_health",
332 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
333 },
334 McpTool {
335 name: "doiget_expand_citation_graph",
336 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
337 },
338 McpTool {
339 name: "doiget_bibtex_export",
340 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
341 },
342 McpTool {
343 name: "doiget_csl_export",
344 schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
345 },
346];
347
348const DOCS: Docs = Docs {
349 config: "docs/CONFIG.md",
350 errors: "docs/ERRORS.md",
351 scope: "docs/SCOPE.md",
352 mcp: "docs/MCP_TOOLS.md",
353 sources: "docs/SOURCES.md",
354 redirect_allowlist: "docs/REDIRECT_ALLOWLIST.md",
355 provenance_log: "docs/PROVENANCE_LOG.md",
356};
357
358/// Per-subcommand hand-maintained metadata. The clap walk provides
359/// name + summary + args + flags; this table adds examples,
360/// `json_mode` semantics, and feature-gating that clap doesn't
361/// expose. A regression unit test asserts every clap-visible
362/// subcommand has an entry here (otherwise the test fails loudly).
363///
364/// **Maintenance:** `feature_gated` MUST be kept in sync with the
365/// corresponding `#[cfg(feature = …)]` annotation in `main.rs`. There
366/// is no compile-time check; the `every_test_cli_subcommand_has_metadata`
367/// regression test does not cover feature-gating directly — it only
368/// asserts metadata exists. Add a CI matrix entry (`--features
369/// citation`) when introducing new gated subcommands so the e2e
370/// assertion list catches drift (see #215). Alternatively, add a
371/// unit test that asserts `metadata_for("graph").unwrap().feature_gated
372/// == Some("citation")` to lock the gate at the lib-test layer.
373struct SubcommandMeta {
374 examples: &'static [&'static str],
375 json_mode: JsonMode,
376 feature_gated: Option<&'static str>,
377}
378
379fn metadata_for(subcommand: &str) -> Option<SubcommandMeta> {
380 let m = match subcommand {
381 "fetch" => SubcommandMeta {
382 examples: &[
383 "doiget fetch 10.1234/foo",
384 "doiget fetch arxiv:2401.12345",
385 "doiget fetch 10.1234/foo --dry-run",
386 ],
387 // The success summary is on stderr (ADR-0001); the
388 // dry-run plan is JSON product output (ADR-0022).
389 json_mode: JsonMode::Artifact,
390 feature_gated: None,
391 },
392 "batch" => SubcommandMeta {
393 examples: &[
394 "doiget batch refs.txt",
395 "doiget batch refs.txt --dry-run",
396 "doiget batch refs.txt --json",
397 ],
398 // `--json` emits the ERRORS.md §3 JSONL per-ref shape (#205).
399 json_mode: JsonMode::Supported,
400 feature_gated: None,
401 },
402 "info" => SubcommandMeta {
403 examples: &[
404 "doiget info 10.1234/foo",
405 "doiget info arxiv:2401.12345 --json",
406 ],
407 json_mode: JsonMode::Supported,
408 feature_gated: None,
409 },
410 "list-recent" => SubcommandMeta {
411 examples: &[
412 "doiget list-recent",
413 "doiget list-recent 20",
414 "doiget list-recent --json",
415 ],
416 json_mode: JsonMode::Supported,
417 feature_gated: None,
418 },
419 "search" => SubcommandMeta {
420 examples: &[
421 "doiget search 'quantum entanglement'",
422 "doiget search renormalization --json",
423 ],
424 json_mode: JsonMode::Supported,
425 feature_gated: None,
426 },
427 "bib" => SubcommandMeta {
428 examples: &["doiget bib 10.1234/foo", "doiget bib arxiv:2401.12345"],
429 // BibTeX output is the product; `--mode` is informational.
430 json_mode: JsonMode::Artifact,
431 feature_gated: None,
432 },
433 "csl" => SubcommandMeta {
434 examples: &["doiget csl 10.1234/foo"],
435 json_mode: JsonMode::Artifact,
436 feature_gated: None,
437 },
438 "audit-log" => SubcommandMeta {
439 examples: &[
440 "doiget audit-log --verify",
441 "doiget audit-log --verify --json",
442 "doiget --quiet audit-log --verify # exit code only",
443 ],
444 json_mode: JsonMode::Supported,
445 feature_gated: None,
446 },
447 "provenance" => SubcommandMeta {
448 examples: &[
449 "doiget provenance migrate --dry-run",
450 "doiget provenance migrate",
451 "doiget provenance migrate --dry-run --json",
452 ],
453 json_mode: JsonMode::Supported,
454 feature_gated: None,
455 },
456 "config" => SubcommandMeta {
457 examples: &[
458 "doiget config show",
459 "doiget config show --json",
460 "doiget config path",
461 "doiget config doctor",
462 ],
463 json_mode: JsonMode::Supported,
464 feature_gated: None,
465 },
466 "serve" => SubcommandMeta {
467 examples: &["doiget serve # stdio MCP server (ADR-0001)"],
468 // serve always runs in mcp mode; the protocol output is
469 // JSON-RPC, which is product.
470 json_mode: JsonMode::Artifact,
471 feature_gated: None,
472 },
473 "graph" => SubcommandMeta {
474 examples: &[
475 "DOIGET_ENABLE_OPENALEX=1 doiget graph 10.1234/foo",
476 "DOIGET_ENABLE_OPENALEX=1 doiget graph 10.1234/foo --depth 2 --total 50",
477 ],
478 json_mode: JsonMode::Artifact,
479 feature_gated: Some("citation"),
480 },
481 "capabilities" => SubcommandMeta {
482 examples: &["doiget capabilities | jq ."],
483 // The whole point of capabilities IS JSON output.
484 json_mode: JsonMode::Artifact,
485 feature_gated: None,
486 },
487 // clap auto-adds `help`; we silently ignore it (it's not a
488 // domain subcommand).
489 "help" => return None,
490 _ => return None,
491 };
492 Some(m)
493}
494
495// ---------------------------------------------------------------------------
496// Build
497// ---------------------------------------------------------------------------
498
499/// Build the [`Capabilities`] inventory from `cli` (the clap parser
500/// for this binary, supplied by the caller because the `Cli` struct
501/// lives in `main.rs` and is not exposed in the library crate). The
502/// caller is `commands::main::run_dispatch` via `Cli::command()`.
503pub fn build_capabilities(cli: &clap::Command) -> Capabilities {
504 let global_flags = collect_global_flags(cli);
505 let subcommands = cli
506 .get_subcommands()
507 .filter_map(|sub| build_subcommand(sub, cli))
508 .collect::<Vec<_>>();
509 Capabilities {
510 version: env!("CARGO_PKG_VERSION"),
511 features: compile_time_features(),
512 modes: MODES,
513 global_flags,
514 subcommands,
515 env_vars: ENV_VARS,
516 mcp_tools: MCP_TOOLS,
517 docs: DOCS,
518 }
519}
520
521fn compile_time_features() -> Vec<&'static str> {
522 let mut feats: Vec<&'static str> = Vec::new();
523 if cfg!(feature = "oa-only") {
524 feats.push("oa-only");
525 }
526 if cfg!(feature = "metadata") {
527 feats.push("metadata");
528 }
529 if cfg!(feature = "citation") {
530 feats.push("citation");
531 }
532 if cfg!(feature = "tdm-elsevier") {
533 feats.push("tdm-elsevier");
534 }
535 if cfg!(feature = "tdm-aps") {
536 feats.push("tdm-aps");
537 }
538 if cfg!(feature = "tdm-springer") {
539 feats.push("tdm-springer");
540 }
541 feats
542}
543
544fn collect_global_flags(cmd: &clap::Command) -> Vec<FlagSpec> {
545 cmd.get_arguments()
546 .filter(|a| a.is_global_set())
547 .map(arg_to_flag_spec)
548 .collect()
549}
550
551fn build_subcommand(sub: &clap::Command, root: &clap::Command) -> Option<SubcommandSpec> {
552 let name = sub.get_name();
553 let meta = metadata_for(name)?;
554 let (args, flags) = split_args_and_flags(sub, root);
555 Some(SubcommandSpec {
556 name: name.to_string(),
557 summary: sub.get_about().map(|s| s.to_string()),
558 args,
559 flags,
560 examples: meta.examples,
561 json_mode: meta.json_mode,
562 feature_gated: meta.feature_gated,
563 })
564}
565
566fn split_args_and_flags(
567 sub: &clap::Command,
568 root: &clap::Command,
569) -> (Vec<ArgSpec>, Vec<FlagSpec>) {
570 // The root's global args appear in every subcommand's iterator;
571 // suppress them from per-subcommand `flags` (they're already in
572 // `global_flags`).
573 let global_names: std::collections::HashSet<&str> = root
574 .get_arguments()
575 .filter(|a| a.is_global_set())
576 .map(|a| a.get_id().as_str())
577 .collect();
578 let mut args = Vec::new();
579 let mut flags = Vec::new();
580 for a in sub.get_arguments() {
581 if global_names.contains(a.get_id().as_str()) {
582 continue;
583 }
584 // Clap auto-adds `--help` (and `--version` on the root) to
585 // every subcommand. They're not positional and not
586 // `is_global_set()`, so they would otherwise leak into every
587 // subcommand's `flags[]` as `kind: "string"`. Filter on the
588 // action against the known built-in variants.
589 //
590 // **Maintenance:** `clap::ArgAction` is itself
591 // `#[non_exhaustive]` upstream. A future clap release that
592 // adds a new built-in action (e.g. a hypothetical
593 // `HelpMarkdown`) would fall through this `matches!` and
594 // reappear in `flags[]`. Re-audit this filter on every clap
595 // minor-version bump.
596 if matches!(
597 a.get_action(),
598 clap::ArgAction::Help
599 | clap::ArgAction::HelpShort
600 | clap::ArgAction::HelpLong
601 | clap::ArgAction::Version
602 ) {
603 continue;
604 }
605 if a.is_positional() {
606 args.push(ArgSpec {
607 name: a.get_id().to_string(),
608 kind: ArgKind::Positional,
609 help: a.get_help().map(|s| s.to_string()),
610 required: a.is_required_set(),
611 });
612 } else {
613 flags.push(arg_to_flag_spec(a));
614 }
615 }
616 (args, flags)
617}
618
619fn arg_to_flag_spec(a: &clap::Arg) -> FlagSpec {
620 let name = a
621 .get_long()
622 .map(|s| format!("--{s}"))
623 .or_else(|| a.get_short().map(|c| format!("-{c}")))
624 .unwrap_or_else(|| a.get_id().to_string());
625 // Boolean switches → `Bool`; value-enum flags → `Enum` with the
626 // accepted values harvested from clap directly; everything else
627 // → `String`. The `possible_values()` harvest covers any future
628 // enum flag without code change (see #215).
629 let possible: Option<Vec<String>> = a
630 .get_value_parser()
631 .possible_values()
632 .map(|it| it.map(|pv| pv.get_name().to_owned()).collect());
633 let (kind, values) = if matches!(
634 a.get_action(),
635 clap::ArgAction::SetTrue | clap::ArgAction::SetFalse
636 ) {
637 (FlagKind::Bool, None)
638 } else if let Some(vs) = possible {
639 (FlagKind::Enum, Some(vs))
640 } else {
641 (FlagKind::String, None)
642 };
643 FlagSpec {
644 name,
645 kind,
646 help: a.get_help().map(|s| s.to_string()),
647 values,
648 }
649}
650
651// ---------------------------------------------------------------------------
652// Entry point
653// ---------------------------------------------------------------------------
654
655/// Run the `doiget capabilities` subcommand. Honors [`super::output::OutputMode`]:
656/// `Quiet` suppresses stdout (#203); every other mode emits the same
657/// pretty-printed JSON inventory. The caller passes the live
658/// `clap::Command` so the clap walk operates on the binary's actual
659/// `Cli` tree (which the lib half of this crate can't reach
660/// directly — the `Cli` struct lives in `main.rs`).
661pub fn run(cli: &clap::Command, mode: super::output::OutputMode) -> Result<()> {
662 // `Quiet` is the one mode that suppresses (per ADR-0017 / #203).
663 // Every other mode emits the same pretty JSON: `capabilities` is a
664 // product-output command.
665 if mode == super::output::OutputMode::Quiet {
666 return Ok(());
667 }
668 let caps = build_capabilities(cli);
669 let s = serde_json::to_string_pretty(&caps).context("serialise capabilities inventory")?;
670 // `print_stdout` workspace-deny; localised allow at the
671 // sanctioned product-output sink. See `commands/csl.rs`'s pattern.
672 #[allow(clippy::print_stdout)]
673 {
674 println!("{s}");
675 }
676 Ok(())
677}
678
679// ---------------------------------------------------------------------------
680// Tests
681// ---------------------------------------------------------------------------
682
683#[cfg(test)]
684#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
685mod tests {
686 use super::*;
687
688 /// Mirrors the `Cli` struct in `main.rs` for lib-test reach.
689 /// `commands::capabilities` is library-level; the binary-only
690 /// `Cli` struct can't be reached from here, so we re-derive a
691 /// shadow whose subcommand list is identical. The
692 /// `cli_shadow_matches_main_cli` integration test in
693 /// `tests/capabilities_e2e.rs` runs the real binary and asserts
694 /// the wire output matches.
695 fn test_cli() -> clap::Command {
696 use clap::{Arg, ArgAction, Command};
697 let mode_values = ["human", "json", "quiet", "mcp"];
698 let cmd = Command::new("doiget")
699 .arg(
700 Arg::new("mode")
701 .long("mode")
702 .global(true)
703 .value_parser(clap::builder::PossibleValuesParser::new(mode_values))
704 .help("Output mode (human|json|quiet|mcp)."),
705 )
706 .arg(
707 Arg::new("json")
708 .long("json")
709 .global(true)
710 .action(ArgAction::SetTrue)
711 .help("Short for `--mode json`."),
712 )
713 .arg(
714 Arg::new("quiet")
715 .long("quiet")
716 .short('q')
717 .global(true)
718 .action(ArgAction::SetTrue)
719 .help("Short for `--mode quiet`."),
720 )
721 .subcommand(
722 Command::new("fetch")
723 .about("Fetch a single paper PDF")
724 .arg(Arg::new("ref").required(true))
725 .arg(
726 Arg::new("dry-run")
727 .long("dry-run")
728 .action(ArgAction::SetTrue),
729 ),
730 )
731 .subcommand(
732 Command::new("batch")
733 .about("Fetch many refs")
734 .arg(Arg::new("path").required(true))
735 .arg(
736 Arg::new("dry-run")
737 .long("dry-run")
738 .action(ArgAction::SetTrue),
739 ),
740 )
741 .subcommand(
742 Command::new("info")
743 .about("Show metadata")
744 .arg(Arg::new("ref").required(true)),
745 )
746 .subcommand(Command::new("list-recent").about("List recent"))
747 .subcommand(
748 Command::new("search")
749 .about("Search local")
750 .arg(Arg::new("query").required(true)),
751 )
752 .subcommand(
753 Command::new("bib")
754 .about("BibTeX export")
755 .arg(Arg::new("ref").required(true)),
756 )
757 .subcommand(
758 Command::new("csl")
759 .about("CSL export")
760 .arg(Arg::new("ref").required(true)),
761 )
762 .subcommand(
763 Command::new("audit-log")
764 .about("Audit log")
765 .arg(Arg::new("verify").long("verify").action(ArgAction::SetTrue)),
766 )
767 .subcommand(Command::new("provenance").about("Provenance ops"))
768 .subcommand(
769 Command::new("config")
770 .about("Config")
771 .arg(Arg::new("action").required(true)),
772 )
773 .subcommand(Command::new("serve").about("MCP server"));
774 // `graph` is `#[cfg(feature = "citation")]` in main.rs; mirror
775 // the gate so the shadow CLI matches the production surface
776 // (see #215).
777 #[cfg(feature = "citation")]
778 let cmd = cmd.subcommand(
779 Command::new("graph")
780 .about("Citation graph")
781 .arg(Arg::new("ref").required(true)),
782 );
783 cmd.subcommand(Command::new("capabilities").about("Capabilities"))
784 }
785
786 fn caps() -> Capabilities {
787 build_capabilities(&test_cli())
788 }
789
790 #[test]
791 fn capabilities_serialises_to_valid_json() {
792 let s = serde_json::to_string_pretty(&caps()).expect("serialise");
793 let v: serde_json::Value = serde_json::from_str(&s).expect("parse round-trip");
794 for key in [
795 "version",
796 "features",
797 "modes",
798 "global_flags",
799 "subcommands",
800 "env_vars",
801 "mcp_tools",
802 "docs",
803 ] {
804 assert!(
805 v.get(key).is_some(),
806 "top-level key `{key}` missing from capabilities JSON: {v}"
807 );
808 }
809 }
810
811 #[test]
812 fn modes_field_matches_output_mode_enum() {
813 // Tied to `OutputMode { Human, Json, Quiet, Mcp }`.
814 assert_eq!(caps().modes, &["human", "json", "quiet", "mcp"]);
815 }
816
817 #[test]
818 fn env_vars_all_use_doiget_prefix() {
819 for ev in ENV_VARS {
820 assert!(
821 ev.name.starts_with("DOIGET_"),
822 "env var name MUST use DOIGET_ prefix, got `{}`",
823 ev.name
824 );
825 }
826 }
827
828 #[test]
829 fn mcp_tools_all_use_doiget_prefix() {
830 for t in MCP_TOOLS {
831 assert!(
832 t.name.starts_with("doiget_"),
833 "MCP tool name MUST use doiget_ prefix, got `{}`",
834 t.name
835 );
836 }
837 }
838
839 #[test]
840 fn subcommand_examples_reference_the_subcommand_name() {
841 for sub in &caps().subcommands {
842 for ex in sub.examples {
843 // `graph` examples carry a `DOIGET_ENABLE_OPENALEX=1`
844 // env prefix before `doiget …`. Allow either form.
845 assert!(
846 ex.starts_with("doiget ") || ex.contains(" doiget "),
847 "example `{ex}` for `{}` must invoke `doiget` somewhere",
848 sub.name
849 );
850 assert!(
851 ex.contains(&sub.name),
852 "example `{ex}` does not mention subcommand `{}`",
853 sub.name
854 );
855 }
856 }
857 }
858
859 // Exact-set parity guard against drift between the static
860 // `ENV_VARS` table and the documented surface (#215). The expected set is the SOURCE OF TRUTH at test time;
861 // adding a new DOIGET_* env var requires updating both ENV_VARS
862 // and this list in lockstep. CHANGELOG records cross-PR changes.
863 #[test]
864 fn env_vars_exact_set_matches_expected() {
865 let actual: std::collections::BTreeSet<&str> = ENV_VARS.iter().map(|ev| ev.name).collect();
866 let expected: std::collections::BTreeSet<&str> = [
867 // CONFIG.md §4 documented:
868 "DOIGET_STORE_ROOT",
869 "DOIGET_CACHE_ROOT",
870 "DOIGET_LOG_PATH",
871 "DOIGET_LOG_RETENTION_DAYS",
872 "DOIGET_USER_AGENT",
873 "DOIGET_UNPAYWALL_EMAIL",
874 "DOIGET_MODE",
875 // Code-reachable but documented in code-level docs or
876 // CAPABILITY.md (not CONFIG.md §4):
877 "DOIGET_CONTACT_EMAIL",
878 "DOIGET_ENABLE_OPENALEX",
879 // Test/wiremock-override base URLs:
880 "DOIGET_ARXIV_BASE",
881 "DOIGET_CROSSREF_BASE",
882 "DOIGET_UNPAYWALL_BASE",
883 ]
884 .into_iter()
885 .collect();
886 assert_eq!(
887 actual, expected,
888 "ENV_VARS table drifted from the expected canonical set; \
889 update both `ENV_VARS` and this test together (and CONFIG.md §4 \
890 if the new var is user-documented)."
891 );
892 }
893
894 // Exact-set parity guard against drift between the static
895 // `MCP_TOOLS` table and `docs/MCP_TOOLS.md` §1 (#215).
896 #[test]
897 fn mcp_tools_exact_set_matches_expected() {
898 let actual: std::collections::BTreeSet<&str> = MCP_TOOLS.iter().map(|t| t.name).collect();
899 let expected: std::collections::BTreeSet<&str> = [
900 "doiget_resolve_paper",
901 "doiget_fetch_paper",
902 "doiget_metadata_only",
903 "doiget_batch_fetch",
904 "doiget_info",
905 "doiget_search_local",
906 "doiget_list_recent",
907 "doiget_paper_pdf_path",
908 "doiget_capability_profile",
909 "doiget_health",
910 "doiget_expand_citation_graph",
911 "doiget_bibtex_export",
912 "doiget_csl_export",
913 ]
914 .into_iter()
915 .collect();
916 assert_eq!(
917 actual, expected,
918 "MCP_TOOLS table drifted from the expected set; update both \
919 `MCP_TOOLS` and this test together (and docs/MCP_TOOLS.md §1)."
920 );
921 }
922
923 // Pin the `#[serde(tag = "status")]` wire shape: every variant
924 // serialises to a `{"status":"…", …}` object. Accidentally
925 // removing the `tag` attribute (or renaming the discriminant)
926 // would silently degrade the wire format; this test catches it
927 // (#215 N1).
928 #[test]
929 fn json_mode_serialises_with_status_discriminant() {
930 let s = serde_json::to_string(&JsonMode::Artifact).expect("serialise");
931 assert_eq!(
932 s, r#"{"status":"artifact"}"#,
933 "Artifact must emit a status-tagged object"
934 );
935 let s = serde_json::to_string(&JsonMode::Supported).expect("serialise");
936 assert_eq!(s, r#"{"status":"supported"}"#);
937 }
938
939 // `arg_to_flag_spec` was generalised in #215 to harvest the
940 // accepted values from clap's `PossibleValuesParser` instead of
941 // hard-coding `--mode`. Pin the contract: the `--mode` entry in
942 // `global_flags` MUST report `kind: Enum` with all four mode
943 // strings. A future regression that silently degrades `--mode`
944 // to `kind: String, values: None` would otherwise pass every
945 // existing test (#215 N3).
946 #[test]
947 fn mode_flag_carries_enum_kind_and_all_four_values() {
948 let global = &caps().global_flags;
949 let mode = global
950 .iter()
951 .find(|f| f.name == "--mode")
952 .expect("--mode flag is in global_flags");
953 assert!(
954 matches!(mode.kind, FlagKind::Enum),
955 "--mode kind MUST be Enum, got {:?}",
956 mode.kind
957 );
958 let vs = mode.values.as_ref().expect("--mode carries values");
959 let mut sorted = vs.clone();
960 sorted.sort();
961 assert_eq!(sorted, vec!["human", "json", "mcp", "quiet"]);
962 }
963
964 // `compile_time_features()` pushes string literals that must
965 // exactly match the Cargo feature names in `Cargo.toml`. A
966 // typo in the literal (`"oa_only"` vs `"oa-only"`) would
967 // silently invert the inventory's `features` field for every
968 // consumer. The default build has `oa-only` active; assert
969 // the literal round-trips (#215 A9).
970 #[test]
971 fn compile_time_features_contains_oa_only_under_default() {
972 // `cfg!(feature = "oa-only")` is true in the default test
973 // build; if a future maintainer disables the default feature
974 // for the test target, this test becomes meaningless but
975 // does not cause a false failure.
976 if cfg!(feature = "oa-only") {
977 let f = compile_time_features();
978 assert!(
979 f.contains(&"oa-only"),
980 "oa-only feature was enabled at compile time but \
981 `compile_time_features()` did not list it: {f:?}"
982 );
983 }
984 }
985
986 #[test]
987 fn version_is_cargo_pkg_version() {
988 assert_eq!(caps().version, env!("CARGO_PKG_VERSION"));
989 }
990
991 #[test]
992 fn every_test_cli_subcommand_has_metadata() {
993 // Regression at the lib layer: anything we add to the shadow
994 // `test_cli` must also be in `metadata_for`. The real
995 // `Cli::command()` is exercised by the e2e test in
996 // `tests/capabilities_e2e.rs`.
997 for sub in test_cli().get_subcommands() {
998 let name = sub.get_name();
999 if name == "help" {
1000 continue;
1001 }
1002 assert!(
1003 metadata_for(name).is_some(),
1004 "subcommand `{name}` lacks metadata in `metadata_for`"
1005 );
1006 }
1007 }
1008}