use anyhow::{Context, Result};
use serde::Serialize;
#[allow(missing_docs)] #[non_exhaustive]
#[derive(Debug, Serialize)]
pub struct Capabilities {
pub version: &'static str,
pub features: Vec<&'static str>,
pub modes: &'static [&'static str],
pub global_flags: Vec<FlagSpec>,
pub subcommands: Vec<SubcommandSpec>,
pub env_vars: &'static [EnvVar],
pub mcp_tools: &'static [McpTool],
pub docs: Docs,
pub user_extension_count: usize,
}
#[non_exhaustive]
#[derive(Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum FlagKind {
Bool,
Enum,
String,
}
#[allow(missing_docs)] #[non_exhaustive]
#[derive(Debug, Serialize)]
pub struct FlagSpec {
pub name: String,
pub kind: FlagKind,
pub help: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub values: Option<Vec<String>>,
}
#[allow(missing_docs)] #[non_exhaustive]
#[derive(Debug, Serialize)]
pub struct SubcommandSpec {
pub name: String,
pub summary: Option<String>,
pub args: Vec<ArgSpec>,
pub flags: Vec<FlagSpec>,
pub examples: &'static [&'static str],
pub json_mode: JsonMode,
#[serde(skip_serializing_if = "Option::is_none")]
pub feature_gated: Option<&'static str>,
}
#[non_exhaustive]
#[derive(Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum ArgKind {
Positional,
}
#[allow(missing_docs)] #[non_exhaustive]
#[derive(Debug, Serialize)]
pub struct ArgSpec {
pub name: String,
pub kind: ArgKind,
pub help: Option<String>,
pub required: bool,
}
#[non_exhaustive] #[derive(Debug, Serialize)]
#[serde(tag = "status", rename_all = "lowercase")]
pub enum JsonMode {
Artifact,
Supported,
}
#[allow(missing_docs)] #[non_exhaustive]
#[derive(Debug, Serialize)]
pub struct EnvVar {
pub name: &'static str,
pub default: &'static str,
pub help: &'static str,
}
#[allow(missing_docs)] #[non_exhaustive]
#[derive(Debug, Serialize)]
pub struct McpTool {
pub name: &'static str,
pub schema_ref: &'static str,
}
#[allow(missing_docs)] #[non_exhaustive]
#[derive(Debug, Serialize)]
pub struct Docs {
pub config: &'static str,
pub errors: &'static str,
pub scope: &'static str,
pub mcp: &'static str,
pub sources: &'static str,
pub redirect_allowlist: &'static str,
pub provenance_log: &'static str,
}
const MODES: &[&str] = &["human", "json", "quiet", "mcp"];
const ENV_VARS: &[EnvVar] = &[
EnvVar {
name: "DOIGET_STORE_ROOT",
default: "$HOME/papers",
help: "Root of the on-disk paper store. CONFIG.md §4.",
},
EnvVar {
name: "DOIGET_CACHE_ROOT",
default: "$HOME/.cache/doiget",
help: "Root of the on-disk HTTP / metadata cache. CONFIG.md §4.",
},
EnvVar {
name: "DOIGET_LOG_PATH",
default: "<config_dir>/doiget/access.jsonl",
help: "JSON-Lines provenance log file path (PROVENANCE_LOG.md §3).",
},
EnvVar {
name: "DOIGET_LOG_RETENTION_DAYS",
default: "90",
help: "Rotated-segment retention window (0 disables pruning). #140 / PROVENANCE_LOG.md §6.",
},
EnvVar {
name: "DOIGET_MODE",
default: "(none)",
help: "Output mode (`human`/`json`/`quiet`/`mcp`). ADR-0017 ladder rung 3.",
},
EnvVar {
name: "DOIGET_CONTACT_EMAIL",
default: "(none)",
help: "Contact email for polite User-Agent header (CONFIG.md §4).",
},
EnvVar {
name: "DOIGET_UNPAYWALL_EMAIL",
default: "(falls back to DOIGET_CONTACT_EMAIL)",
help: "Unpaywall-specific contact email.",
},
EnvVar {
name: "DOIGET_USER_AGENT",
default: "(default polite UA)",
help: "Override the User-Agent header for all outbound requests.",
},
EnvVar {
name: "DOIGET_ENABLE_OPENALEX",
default: "(off)",
help: "Enable the OpenAlex citation graph source (graph subcommand prerequisite).",
},
EnvVar {
name: "DOIGET_ARXIV_BASE",
default: "https://export.arxiv.org/",
help: "arXiv API base URL — primarily for testing/wiremock override.",
},
EnvVar {
name: "DOIGET_CROSSREF_BASE",
default: "https://api.crossref.org/",
help: "Crossref API base URL.",
},
EnvVar {
name: "DOIGET_UNPAYWALL_BASE",
default: "https://api.unpaywall.org/",
help: "Unpaywall API base URL.",
},
];
const MCP_TOOLS: &[McpTool] = &[
McpTool {
name: "doiget_resolve_paper",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_fetch_paper",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_metadata_only",
schema_ref: "docs/MCP_TOOLS.md#11-doiget_metadata_only-normative",
},
McpTool {
name: "doiget_batch_fetch",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_info",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_search_local",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_list_recent",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_paper_pdf_path",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_capability_profile",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_health",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_expand_citation_graph",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_bibtex_export",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_csl_export",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
McpTool {
name: "doiget_batch_from_bibliography",
schema_ref: "docs/MCP_TOOLS.md#1-tool-list",
},
];
const DOCS: Docs = Docs {
config: "docs/CONFIG.md",
errors: "docs/ERRORS.md",
scope: "docs/SCOPE.md",
mcp: "docs/MCP_TOOLS.md",
sources: "docs/SOURCES.md",
redirect_allowlist: "docs/REDIRECT_ALLOWLIST.md",
provenance_log: "docs/PROVENANCE_LOG.md",
};
struct SubcommandMeta {
examples: &'static [&'static str],
json_mode: JsonMode,
feature_gated: Option<&'static str>,
}
fn metadata_for(subcommand: &str) -> Option<SubcommandMeta> {
let m = match subcommand {
"fetch" => SubcommandMeta {
examples: &[
"doiget fetch 10.1234/foo",
"doiget fetch arxiv:2401.12345",
"doiget fetch 10.1234/foo --dry-run",
],
json_mode: JsonMode::Artifact,
feature_gated: None,
},
"batch" => SubcommandMeta {
examples: &[
"doiget batch refs.txt",
"doiget batch refs.txt --dry-run",
"doiget batch refs.txt --json",
],
json_mode: JsonMode::Supported,
feature_gated: None,
},
"info" => SubcommandMeta {
examples: &[
"doiget info 10.1234/foo",
"doiget info arxiv:2401.12345 --json",
],
json_mode: JsonMode::Supported,
feature_gated: None,
},
"list-recent" => SubcommandMeta {
examples: &[
"doiget list-recent",
"doiget list-recent 20",
"doiget list-recent --json",
],
json_mode: JsonMode::Supported,
feature_gated: None,
},
"search" => SubcommandMeta {
examples: &[
"doiget search 'quantum entanglement'",
"doiget search renormalization --json",
],
json_mode: JsonMode::Supported,
feature_gated: None,
},
"bib" => SubcommandMeta {
examples: &["doiget bib 10.1234/foo", "doiget bib arxiv:2401.12345"],
json_mode: JsonMode::Artifact,
feature_gated: None,
},
"csl" => SubcommandMeta {
examples: &["doiget csl 10.1234/foo"],
json_mode: JsonMode::Artifact,
feature_gated: None,
},
"audit-log" => SubcommandMeta {
examples: &[
"doiget audit-log --verify",
"doiget audit-log --verify --json",
"doiget --quiet audit-log --verify # exit code only",
],
json_mode: JsonMode::Supported,
feature_gated: None,
},
"provenance" => SubcommandMeta {
examples: &[
"doiget provenance migrate --dry-run",
"doiget provenance migrate",
"doiget provenance migrate --dry-run --json",
],
json_mode: JsonMode::Supported,
feature_gated: None,
},
"config" => SubcommandMeta {
examples: &[
"doiget config show",
"doiget config show --json",
"doiget config path",
"doiget config doctor",
],
json_mode: JsonMode::Supported,
feature_gated: None,
},
"serve" => SubcommandMeta {
examples: &["doiget serve # stdio MCP server (ADR-0001)"],
json_mode: JsonMode::Artifact,
feature_gated: None,
},
"graph" => SubcommandMeta {
examples: &[
"DOIGET_ENABLE_OPENALEX=1 doiget graph 10.1234/foo",
"DOIGET_ENABLE_OPENALEX=1 doiget graph 10.1234/foo --depth 2 --total 50",
],
json_mode: JsonMode::Artifact,
feature_gated: Some("citation"),
},
"capabilities" => SubcommandMeta {
examples: &["doiget capabilities | jq ."],
json_mode: JsonMode::Artifact,
feature_gated: None,
},
"help" => return None,
_ => return None,
};
Some(m)
}
pub fn build_capabilities(cli: &clap::Command) -> Capabilities {
let global_flags = collect_global_flags(cli);
let subcommands = cli
.get_subcommands()
.filter_map(|sub| build_subcommand(sub, cli))
.collect::<Vec<_>>();
Capabilities {
version: env!("CARGO_PKG_VERSION"),
features: compile_time_features(),
modes: MODES,
global_flags,
subcommands,
env_vars: ENV_VARS,
mcp_tools: MCP_TOOLS,
docs: DOCS,
user_extension_count: user_extension_count(),
}
}
fn user_extension_count() -> usize {
let cfg_dir = match super::fetch::config_dir_utf8() {
Ok(p) => p,
Err(_) => return 0,
};
let path = cfg_dir.join("doiget").join("config.toml");
match doiget_core::user_extension::load(&path) {
Ok(hosts) => hosts.len(),
Err(_) => 0,
}
}
fn compile_time_features() -> Vec<&'static str> {
let mut feats: Vec<&'static str> = Vec::new();
if cfg!(feature = "oa-only") {
feats.push("oa-only");
}
if cfg!(feature = "metadata") {
feats.push("metadata");
}
if cfg!(feature = "citation") {
feats.push("citation");
}
if cfg!(feature = "tdm-elsevier") {
feats.push("tdm-elsevier");
}
if cfg!(feature = "tdm-aps") {
feats.push("tdm-aps");
}
if cfg!(feature = "tdm-springer") {
feats.push("tdm-springer");
}
feats
}
fn collect_global_flags(cmd: &clap::Command) -> Vec<FlagSpec> {
cmd.get_arguments()
.filter(|a| a.is_global_set())
.map(arg_to_flag_spec)
.collect()
}
fn build_subcommand(sub: &clap::Command, root: &clap::Command) -> Option<SubcommandSpec> {
let name = sub.get_name();
let meta = metadata_for(name)?;
let (args, flags) = split_args_and_flags(sub, root);
Some(SubcommandSpec {
name: name.to_string(),
summary: sub.get_about().map(|s| s.to_string()),
args,
flags,
examples: meta.examples,
json_mode: meta.json_mode,
feature_gated: meta.feature_gated,
})
}
fn split_args_and_flags(
sub: &clap::Command,
root: &clap::Command,
) -> (Vec<ArgSpec>, Vec<FlagSpec>) {
let global_names: std::collections::HashSet<&str> = root
.get_arguments()
.filter(|a| a.is_global_set())
.map(|a| a.get_id().as_str())
.collect();
let mut args = Vec::new();
let mut flags = Vec::new();
for a in sub.get_arguments() {
if global_names.contains(a.get_id().as_str()) {
continue;
}
if matches!(
a.get_action(),
clap::ArgAction::Help
| clap::ArgAction::HelpShort
| clap::ArgAction::HelpLong
| clap::ArgAction::Version
) {
continue;
}
if a.is_positional() {
args.push(ArgSpec {
name: a.get_id().to_string(),
kind: ArgKind::Positional,
help: a.get_help().map(|s| s.to_string()),
required: a.is_required_set(),
});
} else {
flags.push(arg_to_flag_spec(a));
}
}
(args, flags)
}
fn arg_to_flag_spec(a: &clap::Arg) -> FlagSpec {
let name = a
.get_long()
.map(|s| format!("--{s}"))
.or_else(|| a.get_short().map(|c| format!("-{c}")))
.unwrap_or_else(|| a.get_id().to_string());
let possible: Option<Vec<String>> = a
.get_value_parser()
.possible_values()
.map(|it| it.map(|pv| pv.get_name().to_owned()).collect());
let (kind, values) = if matches!(
a.get_action(),
clap::ArgAction::SetTrue | clap::ArgAction::SetFalse
) {
(FlagKind::Bool, None)
} else if let Some(vs) = possible {
(FlagKind::Enum, Some(vs))
} else {
(FlagKind::String, None)
};
FlagSpec {
name,
kind,
help: a.get_help().map(|s| s.to_string()),
values,
}
}
pub fn run(
cli: &clap::Command,
mode: super::output::OutputMode,
quiet_was_explicit: bool,
) -> Result<()> {
if mode == super::output::OutputMode::Quiet && quiet_was_explicit {
return Ok(());
}
let caps = build_capabilities(cli);
let s = serde_json::to_string_pretty(&caps).context("serialise capabilities inventory")?;
#[allow(clippy::print_stdout)]
{
println!("{s}");
}
Ok(())
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
fn test_cli() -> clap::Command {
use clap::{Arg, ArgAction, Command};
let mode_values = ["human", "json", "quiet", "mcp"];
let cmd = Command::new("doiget")
.arg(
Arg::new("mode")
.long("mode")
.global(true)
.value_parser(clap::builder::PossibleValuesParser::new(mode_values))
.help("Output mode (human|json|quiet|mcp)."),
)
.arg(
Arg::new("json")
.long("json")
.global(true)
.action(ArgAction::SetTrue)
.help("Short for `--mode json`."),
)
.arg(
Arg::new("quiet")
.long("quiet")
.short('q')
.global(true)
.action(ArgAction::SetTrue)
.help("Short for `--mode quiet`."),
)
.subcommand(
Command::new("fetch")
.about("Fetch a single paper PDF")
.arg(Arg::new("ref").required(true))
.arg(
Arg::new("dry-run")
.long("dry-run")
.action(ArgAction::SetTrue),
),
)
.subcommand(
Command::new("batch")
.about("Fetch many refs")
.arg(Arg::new("path").required(true))
.arg(
Arg::new("dry-run")
.long("dry-run")
.action(ArgAction::SetTrue),
),
)
.subcommand(
Command::new("info")
.about("Show metadata")
.arg(Arg::new("ref").required(true)),
)
.subcommand(Command::new("list-recent").about("List recent"))
.subcommand(
Command::new("search")
.about("Search local")
.arg(Arg::new("query").required(true)),
)
.subcommand(
Command::new("bib")
.about("BibTeX export")
.arg(Arg::new("ref").required(true)),
)
.subcommand(
Command::new("csl")
.about("CSL export")
.arg(Arg::new("ref").required(true)),
)
.subcommand(
Command::new("audit-log")
.about("Audit log")
.arg(Arg::new("verify").long("verify").action(ArgAction::SetTrue)),
)
.subcommand(Command::new("provenance").about("Provenance ops"))
.subcommand(
Command::new("config")
.about("Config")
.arg(Arg::new("action").required(true)),
)
.subcommand(Command::new("serve").about("MCP server"));
#[cfg(feature = "citation")]
let cmd = cmd.subcommand(
Command::new("graph")
.about("Citation graph")
.arg(Arg::new("ref").required(true)),
);
cmd.subcommand(Command::new("capabilities").about("Capabilities"))
}
fn caps() -> Capabilities {
build_capabilities(&test_cli())
}
#[test]
fn capabilities_serialises_to_valid_json() {
let s = serde_json::to_string_pretty(&caps()).expect("serialise");
let v: serde_json::Value = serde_json::from_str(&s).expect("parse round-trip");
for key in [
"version",
"features",
"modes",
"global_flags",
"subcommands",
"env_vars",
"mcp_tools",
"docs",
"user_extension_count",
] {
assert!(
v.get(key).is_some(),
"top-level key `{key}` missing from capabilities JSON: {v}"
);
}
}
#[test]
fn modes_field_matches_output_mode_enum() {
assert_eq!(caps().modes, &["human", "json", "quiet", "mcp"]);
}
#[test]
fn env_vars_all_use_doiget_prefix() {
for ev in ENV_VARS {
assert!(
ev.name.starts_with("DOIGET_"),
"env var name MUST use DOIGET_ prefix, got `{}`",
ev.name
);
}
}
#[test]
fn mcp_tools_all_use_doiget_prefix() {
for t in MCP_TOOLS {
assert!(
t.name.starts_with("doiget_"),
"MCP tool name MUST use doiget_ prefix, got `{}`",
t.name
);
}
}
#[test]
fn subcommand_examples_reference_the_subcommand_name() {
for sub in &caps().subcommands {
for ex in sub.examples {
assert!(
ex.starts_with("doiget ") || ex.contains(" doiget "),
"example `{ex}` for `{}` must invoke `doiget` somewhere",
sub.name
);
assert!(
ex.contains(&sub.name),
"example `{ex}` does not mention subcommand `{}`",
sub.name
);
}
}
}
#[test]
fn env_vars_exact_set_matches_expected() {
let actual: std::collections::BTreeSet<&str> = ENV_VARS.iter().map(|ev| ev.name).collect();
let expected: std::collections::BTreeSet<&str> = [
"DOIGET_STORE_ROOT",
"DOIGET_CACHE_ROOT",
"DOIGET_LOG_PATH",
"DOIGET_LOG_RETENTION_DAYS",
"DOIGET_USER_AGENT",
"DOIGET_UNPAYWALL_EMAIL",
"DOIGET_MODE",
"DOIGET_CONTACT_EMAIL",
"DOIGET_ENABLE_OPENALEX",
"DOIGET_ARXIV_BASE",
"DOIGET_CROSSREF_BASE",
"DOIGET_UNPAYWALL_BASE",
]
.into_iter()
.collect();
assert_eq!(
actual, expected,
"ENV_VARS table drifted from the expected canonical set; \
update both `ENV_VARS` and this test together (and CONFIG.md §4 \
if the new var is user-documented)."
);
}
#[test]
fn mcp_tools_exact_set_matches_expected() {
let actual: std::collections::BTreeSet<&str> = MCP_TOOLS.iter().map(|t| t.name).collect();
let expected: std::collections::BTreeSet<&str> = [
"doiget_resolve_paper",
"doiget_fetch_paper",
"doiget_metadata_only",
"doiget_batch_fetch",
"doiget_info",
"doiget_search_local",
"doiget_list_recent",
"doiget_paper_pdf_path",
"doiget_capability_profile",
"doiget_health",
"doiget_expand_citation_graph",
"doiget_bibtex_export",
"doiget_csl_export",
"doiget_batch_from_bibliography",
]
.into_iter()
.collect();
assert_eq!(
actual, expected,
"MCP_TOOLS table drifted from the expected set; update both \
`MCP_TOOLS` and this test together (and docs/MCP_TOOLS.md §1)."
);
}
#[test]
fn json_mode_serialises_with_status_discriminant() {
let s = serde_json::to_string(&JsonMode::Artifact).expect("serialise");
assert_eq!(
s, r#"{"status":"artifact"}"#,
"Artifact must emit a status-tagged object"
);
let s = serde_json::to_string(&JsonMode::Supported).expect("serialise");
assert_eq!(s, r#"{"status":"supported"}"#);
}
#[test]
fn mode_flag_carries_enum_kind_and_all_four_values() {
let global = &caps().global_flags;
let mode = global
.iter()
.find(|f| f.name == "--mode")
.expect("--mode flag is in global_flags");
assert!(
matches!(mode.kind, FlagKind::Enum),
"--mode kind MUST be Enum, got {:?}",
mode.kind
);
let vs = mode.values.as_ref().expect("--mode carries values");
let mut sorted = vs.clone();
sorted.sort();
assert_eq!(sorted, vec!["human", "json", "mcp", "quiet"]);
}
#[test]
fn compile_time_features_contains_oa_only_under_default() {
if cfg!(feature = "oa-only") {
let f = compile_time_features();
assert!(
f.contains(&"oa-only"),
"oa-only feature was enabled at compile time but \
`compile_time_features()` did not list it: {f:?}"
);
}
}
#[test]
fn version_is_cargo_pkg_version() {
assert_eq!(caps().version, env!("CARGO_PKG_VERSION"));
}
#[test]
#[serial_test::serial]
fn user_extension_count_reflects_config_toml_entries() {
let tmp = tempfile::TempDir::new().expect("tempdir");
let cfg_root = camino::Utf8Path::from_path(tmp.path()).expect("utf8 tempdir");
let doiget_dir = cfg_root.join("doiget");
std::fs::create_dir_all(doiget_dir.as_std_path()).expect("mk dir");
let config_toml = doiget_dir.join("config.toml");
std::fs::write(
config_toml.as_std_path(),
"[[network.additional_hosts]]\n\
host = \"example.org\"\n\
\n\
[[network.additional_hosts]]\n\
host = \"*.example.net\"\n\
note = \"university OA mirror\"\n",
)
.expect("write config.toml");
let _x = EnvGuard::set("XDG_CONFIG_HOME", cfg_root.as_str());
let _a = EnvGuard::unset("APPDATA");
let _h = EnvGuard::unset("HOME");
let _u = EnvGuard::unset("USERPROFILE");
let cli = test_cli();
let caps = build_capabilities(&cli);
assert_eq!(
caps.user_extension_count, 2,
"expected 2 user-extension hosts, got {}",
caps.user_extension_count
);
}
#[test]
#[serial_test::serial]
fn user_extension_count_is_zero_without_config_toml() {
let tmp = tempfile::TempDir::new().expect("tempdir");
let cfg_root = camino::Utf8Path::from_path(tmp.path()).expect("utf8 tempdir");
let _x = EnvGuard::set("XDG_CONFIG_HOME", cfg_root.as_str());
let _a = EnvGuard::unset("APPDATA");
let _h = EnvGuard::unset("HOME");
let _u = EnvGuard::unset("USERPROFILE");
let caps = build_capabilities(&test_cli());
assert_eq!(caps.user_extension_count, 0);
}
struct EnvGuard {
var: &'static str,
prior: Option<std::ffi::OsString>,
}
impl EnvGuard {
fn set(var: &'static str, value: &str) -> Self {
let prior = std::env::var_os(var);
std::env::set_var(var, value);
EnvGuard { var, prior }
}
fn unset(var: &'static str) -> Self {
let prior = std::env::var_os(var);
std::env::remove_var(var);
EnvGuard { var, prior }
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
match &self.prior {
Some(v) => std::env::set_var(self.var, v),
None => std::env::remove_var(self.var),
}
}
}
#[test]
fn every_test_cli_subcommand_has_metadata() {
for sub in test_cli().get_subcommands() {
let name = sub.get_name();
if name == "help" {
continue;
}
assert!(
metadata_for(name).is_some(),
"subcommand `{name}` lacks metadata in `metadata_for`"
);
}
}
}