use camino::Utf8PathBuf;
use clap::{Parser, Subcommand, ValueEnum};
use doiget_cli::commands::output::{self, FlagInput, OutputMode};
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
#[clap(rename_all = "lower")]
enum OutputColor {
Auto,
Always,
Never,
}
impl OutputColor {
fn as_env_value(self) -> &'static str {
match self {
OutputColor::Auto => "auto",
OutputColor::Always => "always",
OutputColor::Never => "never",
}
}
}
fn parse_utf8_path(raw: &str) -> Result<Utf8PathBuf, String> {
if raw.is_empty() {
return Err("path must not be empty".to_string());
}
if raw.contains('\0') {
return Err("path must not contain NUL bytes".to_string());
}
Ok(Utf8PathBuf::from(raw))
}
#[derive(Subcommand, Debug)]
enum ProvenanceAction {
Migrate {
#[arg(long)]
dry_run: bool,
},
}
#[derive(Parser, Debug)]
#[command(
name = "doiget",
version,
about = "Fetch academic papers via official Open Access APIs.",
long_about = "doiget is an OA-first paper fetcher and stdio MCP server.\n\
\n\
Subcommands:\n\
\x20 fetch Fetch a single paper PDF by DOI or arXiv id\n\
\x20 batch Fetch many refs from a newline-separated file\n\
\x20 bib Export a stored entry as BibTeX\n\
\x20 csl Export a stored entry as CSL JSON\n\
\x20 info Show metadata for a stored entry\n\
\x20 search Search the local store by title / authors / venue\n\
\x20 list-recent List the most recently fetched entries\n\
\x20 audit-log Inspect or verify the provenance log\n\
\x20 provenance Provenance-log lifecycle ops (migrate v1 -> v2)\n\
\x20 config Show or doctor the resolved configuration\n\
\x20 serve Run as an MCP server over stdio\n\
\x20 graph Expand a DOI's citation neighborhood via OpenAlex\n\
\x20 (requires --features citation + DOIGET_ENABLE_OPENALEX)\n\
\x20 capabilities Emit a JSON inventory of the binary's full surface\n\
\x20 (for LLM cold-boot; #214)\n\
\n\
See README.md and docs/ for the full specification."
)]
struct Cli {
#[arg(
long,
global = true,
value_enum,
conflicts_with_all = ["json", "quiet"],
)]
mode: Option<OutputMode>,
#[arg(long, global = true, conflicts_with_all = ["mode", "quiet"])]
json: bool,
#[arg(short = 'q', long, global = true, conflicts_with_all = ["mode", "json"])]
quiet: bool,
#[arg(long, global = true, value_name = "PATH", value_parser = parse_utf8_path)]
store_root: Option<Utf8PathBuf>,
#[arg(long, global = true, value_name = "PATH", value_parser = parse_utf8_path)]
log_path: Option<Utf8PathBuf>,
#[arg(long, global = true, value_enum)]
color: Option<OutputColor>,
#[arg(long, global = true, conflicts_with = "no_progress")]
progress: bool,
#[arg(long, global = true, conflicts_with = "progress")]
no_progress: bool,
#[command(subcommand)]
command: Option<Command>,
}
impl Cli {
fn progress_choice(&self) -> Option<bool> {
match (self.progress, self.no_progress) {
(true, _) => Some(true),
(_, true) => Some(false),
_ => None,
}
}
}
#[derive(Subcommand, Debug)]
enum Command {
Fetch {
ref_: String,
#[arg(long)]
dry_run: bool,
},
Batch {
path: String,
#[arg(long)]
dry_run: bool,
},
Info {
ref_: String,
},
ListRecent {
#[arg(default_value_t = 10)]
limit: usize,
},
Search {
query: String,
},
Bib {
ref_: String,
},
Csl {
ref_: String,
},
AuditLog {
#[arg(long)]
verify: bool,
},
Provenance {
#[command(subcommand)]
action: ProvenanceAction,
},
Serve,
Capabilities,
Config {
action: String,
},
#[cfg(feature = "citation")]
Graph {
ref_: String,
#[arg(long)]
depth: Option<u32>,
#[arg(long)]
total: Option<u32>,
#[arg(long)]
per_paper: Option<u32>,
},
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_writer(std::io::stderr)
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let cli = Cli::parse();
let result: anyhow::Result<()> = run_dispatch(cli).await;
match result {
Ok(()) => Ok(()),
Err(err) => match err.downcast_ref::<doiget_cli::commands::fetch::CliExit>() {
Some(doiget_cli::commands::fetch::CliExit(code)) => {
std::process::exit(*code);
}
None => Err(err),
},
}
}
fn flag_input_from(cli: &Cli) -> FlagInput {
if let Some(m) = cli.mode {
FlagInput::Explicit(m)
} else if cli.json {
FlagInput::JsonShort
} else if cli.quiet {
FlagInput::QuietShort
} else {
FlagInput::None
}
}
fn forced_implicit_for(command: &Option<Command>) -> Option<OutputMode> {
match command {
Some(Command::Serve) => Some(OutputMode::Mcp),
_ => None,
}
}
fn apply_global_overrides(cli: &Cli) {
if let Some(v) = cli.store_root.as_deref() {
std::env::set_var("DOIGET_STORE_ROOT", v.as_str());
}
if let Some(v) = cli.log_path.as_deref() {
std::env::set_var("DOIGET_LOG_PATH", v.as_str());
}
if let Some(c) = cli.color {
std::env::set_var("DOIGET_COLOR", c.as_env_value());
}
if let Some(on) = cli.progress_choice() {
std::env::set_var("DOIGET_PROGRESS", if on { "1" } else { "0" });
}
}
async fn run_dispatch(cli: Cli) -> anyhow::Result<()> {
apply_global_overrides(&cli);
let out = output::resolve(
forced_implicit_for(&cli.command),
flag_input_from(&cli),
std::env::var("DOIGET_MODE").ok().as_deref(),
output::stdout_is_tty(),
);
let mode = out.mode;
match cli.command {
None => {
anyhow::bail!("no subcommand. Run `doiget --help` for available commands.");
}
Some(Command::AuditLog { verify }) => doiget_cli::commands::audit_log::run(verify, mode),
Some(Command::Provenance { action }) => match action {
ProvenanceAction::Migrate { dry_run } => {
doiget_cli::commands::provenance::migrate(dry_run, mode)
}
},
Some(Command::Config { action }) => doiget_cli::commands::config::run(action, mode),
Some(Command::Info { ref_ }) => doiget_cli::commands::info::run(ref_, mode),
Some(Command::ListRecent { limit }) => doiget_cli::commands::list_recent::run(limit, mode),
Some(Command::Search { query }) => doiget_cli::commands::search::run(query, mode),
Some(Command::Fetch { ref_, dry_run }) => {
doiget_cli::commands::fetch::run_with_options(ref_, dry_run, mode).await
}
Some(Command::Batch { path, dry_run }) => {
doiget_cli::commands::batch::run_with_options(path, dry_run, mode).await
}
Some(Command::Bib { ref_ }) => doiget_cli::commands::bib::run(ref_, mode),
Some(Command::Csl { ref_ }) => doiget_cli::commands::csl::run(ref_, mode),
Some(Command::Serve) => {
debug_assert_eq!(mode, OutputMode::Mcp, "serve must resolve to Mcp");
let profile = doiget_core::CapabilityProfile::from_env()?;
doiget_mcp::Server::new(profile).run().await
}
Some(Command::Capabilities) => {
let cli_cmd = <Cli as clap::CommandFactory>::command();
doiget_cli::commands::capabilities::run(&cli_cmd, mode, out.quiet_was_explicit)
}
#[cfg(feature = "citation")]
Some(Command::Graph {
ref_,
depth,
total,
per_paper,
}) => doiget_cli::commands::graph::run(ref_, depth, total, per_paper, mode).await,
}
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use clap::Parser;
use serial_test::serial;
struct EnvGuard {
key: &'static str,
prev: Option<String>,
}
impl EnvGuard {
fn save(key: &'static str) -> Self {
Self {
key,
prev: std::env::var(key).ok(),
}
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
match &self.prev {
Some(v) => std::env::set_var(self.key, v),
None => std::env::remove_var(self.key),
}
}
}
fn parse_cli(args: &[&str]) -> Cli {
let mut argv = vec!["doiget"];
argv.extend_from_slice(args);
Cli::parse_from(argv)
}
#[test]
#[serial]
fn store_root_flag_overwrites_env() {
let _g = EnvGuard::save("DOIGET_STORE_ROOT");
std::env::set_var("DOIGET_STORE_ROOT", "/env/path");
let cli = parse_cli(&["--store-root", "/flag/path", "capabilities"]);
apply_global_overrides(&cli);
assert_eq!(
std::env::var("DOIGET_STORE_ROOT").unwrap(),
"/flag/path",
"--store-root MUST win over DOIGET_STORE_ROOT env"
);
}
#[test]
#[serial]
fn store_root_env_preserved_when_no_flag() {
let _g = EnvGuard::save("DOIGET_STORE_ROOT");
std::env::set_var("DOIGET_STORE_ROOT", "/env/path");
let cli = parse_cli(&["capabilities"]);
apply_global_overrides(&cli);
assert_eq!(
std::env::var("DOIGET_STORE_ROOT").unwrap(),
"/env/path",
"DOIGET_STORE_ROOT env MUST survive when --store-root is not given"
);
}
#[test]
#[serial]
fn log_path_flag_overwrites_env() {
let _g = EnvGuard::save("DOIGET_LOG_PATH");
std::env::set_var("DOIGET_LOG_PATH", "/env/log.jsonl");
let cli = parse_cli(&["--log-path", "/flag/log.jsonl", "capabilities"]);
apply_global_overrides(&cli);
assert_eq!(
std::env::var("DOIGET_LOG_PATH").unwrap(),
"/flag/log.jsonl",
"--log-path MUST win over DOIGET_LOG_PATH env"
);
}
#[test]
#[serial]
fn log_path_env_preserved_when_no_flag() {
let _g = EnvGuard::save("DOIGET_LOG_PATH");
std::env::set_var("DOIGET_LOG_PATH", "/env/log.jsonl");
let cli = parse_cli(&["capabilities"]);
apply_global_overrides(&cli);
assert_eq!(
std::env::var("DOIGET_LOG_PATH").unwrap(),
"/env/log.jsonl",
"DOIGET_LOG_PATH env MUST survive when --log-path is not given"
);
}
#[test]
#[serial]
fn color_flag_writes_doiget_color_env() {
let _g = EnvGuard::save("DOIGET_COLOR");
std::env::remove_var("DOIGET_COLOR");
for (arg, expected) in [("auto", "auto"), ("always", "always"), ("never", "never")] {
let cli = parse_cli(&["--color", arg, "capabilities"]);
apply_global_overrides(&cli);
assert_eq!(
std::env::var("DOIGET_COLOR").unwrap(),
expected,
"--color {arg} MUST write {expected} to DOIGET_COLOR"
);
}
}
#[test]
#[serial]
fn color_unset_when_no_flag_leaves_env_untouched() {
let _g = EnvGuard::save("DOIGET_COLOR");
std::env::remove_var("DOIGET_COLOR");
let cli = parse_cli(&["capabilities"]);
apply_global_overrides(&cli);
assert!(
std::env::var("DOIGET_COLOR").is_err(),
"absent --color MUST leave DOIGET_COLOR unset"
);
}
#[test]
#[serial]
fn color_env_preserved_when_no_flag() {
let _g = EnvGuard::save("DOIGET_COLOR");
std::env::set_var("DOIGET_COLOR", "sentinel");
let cli = parse_cli(&["capabilities"]);
apply_global_overrides(&cli);
assert_eq!(
std::env::var("DOIGET_COLOR").unwrap(),
"sentinel",
"absent --color MUST NOT clobber a user-set DOIGET_COLOR env"
);
}
#[test]
fn output_color_env_strings_match_clap_parser_side() {
for variant in [OutputColor::Auto, OutputColor::Always, OutputColor::Never] {
let parser_side = variant
.to_possible_value()
.expect("clap value-enum exposes every non-skipped variant");
assert_eq!(
variant.as_env_value(),
parser_side.get_name(),
"as_env_value MUST mirror clap's rename_all-driven name for {variant:?}"
);
}
}
#[test]
#[serial]
fn progress_flag_writes_one() {
let _g = EnvGuard::save("DOIGET_PROGRESS");
std::env::remove_var("DOIGET_PROGRESS");
let cli = parse_cli(&["--progress", "capabilities"]);
apply_global_overrides(&cli);
assert_eq!(std::env::var("DOIGET_PROGRESS").unwrap(), "1");
}
#[test]
#[serial]
fn no_progress_flag_writes_zero() {
let _g = EnvGuard::save("DOIGET_PROGRESS");
std::env::remove_var("DOIGET_PROGRESS");
let cli = parse_cli(&["--no-progress", "capabilities"]);
apply_global_overrides(&cli);
assert_eq!(std::env::var("DOIGET_PROGRESS").unwrap(), "0");
}
#[test]
#[serial]
fn neither_progress_nor_no_progress_leaves_env_untouched() {
let _g = EnvGuard::save("DOIGET_PROGRESS");
std::env::set_var("DOIGET_PROGRESS", "sentinel");
let cli = parse_cli(&["capabilities"]);
apply_global_overrides(&cli);
assert_eq!(
std::env::var("DOIGET_PROGRESS").unwrap(),
"sentinel",
"absent --progress/--no-progress MUST NOT clobber a user-set DOIGET_PROGRESS env"
);
}
#[test]
fn parse_utf8_path_accepts_normal_path() {
let p = parse_utf8_path("/tmp/papers").expect("normal path");
assert_eq!(p.as_str(), "/tmp/papers");
}
#[test]
fn parse_utf8_path_accepts_windows_style_path() {
let p = parse_utf8_path("C:\\Users\\me\\papers").expect("windows path");
assert_eq!(p.as_str(), "C:\\Users\\me\\papers");
}
#[test]
fn parse_utf8_path_rejects_empty_string() {
let err = parse_utf8_path("").expect_err("empty rejected");
assert!(
err.contains("empty"),
"error message MUST identify the empty-string condition, got: {err}"
);
}
#[test]
fn parse_utf8_path_rejects_nul_byte() {
let err = parse_utf8_path("a/b\0/c").expect_err("NUL rejected");
assert!(
err.to_ascii_lowercase().contains("nul"),
"error message MUST identify the NUL condition, got: {err}"
);
}
#[test]
fn cli_rejects_empty_path_flag_value_at_parse_time() {
let res = Cli::try_parse_from(["doiget", "--store-root", "", "capabilities"]);
assert!(
res.is_err(),
"--store-root with empty value MUST parse-fail"
);
}
#[test]
fn cli_rejects_nul_in_path_flag_value_at_parse_time() {
let res = Cli::try_parse_from(["doiget", "--log-path", "a/b\0/c", "capabilities"]);
assert!(res.is_err(), "--log-path with NUL byte MUST parse-fail");
}
}