doiget_cli/commands/config.rs
1//! `doiget config <action>` — config introspection.
2//!
3//! This subcommand is intentionally read-only and does NOT touch the network
4//! or instantiate the Store. Phase 1 resolves config from environment
5//! variables only with default fallbacks; the user `config.toml` reader
6//! lands in a follow-up. See `docs/CONFIG.md` for the canonical schema.
7//!
8//! `print_stdout` is denied workspace-wide for MCP stdio safety (ADR-0001 /
9//! `docs/SECURITY.md` §3). The `config show` and `config path` actions are
10//! the *spec'd* stdout channel for human-facing introspection — they are
11//! never invoked from inside an MCP session (`doiget serve` runs a
12//! different code path), so the lint is locally relaxed below.
13
14use anyhow::{bail, Result};
15use camino::Utf8PathBuf;
16
17/// Snapshot of the env-var + default-fallback config that `doiget` would
18/// use on the current machine.
19///
20/// Phase 1 surface: env vars only (`DOIGET_STORE_ROOT`, `DOIGET_LOG_DIR`,
21/// `DOIGET_CONTACT_EMAIL`, `DOIGET_UNPAYWALL_EMAIL`) layered over
22/// XDG / known-folder defaults. Phase 2 will layer the user config.toml
23/// underneath the env vars per `docs/CONFIG.md` §1.
24#[derive(Debug, serde::Serialize)]
25pub struct ResolvedConfig {
26 /// Root of the on-disk paper store. Default: `$HOME/papers`.
27 pub store_root: Utf8PathBuf,
28 /// Directory holding doiget's append-only logs.
29 pub log_dir: Utf8PathBuf,
30 /// JSON-Lines provenance log file path. Always `<log_dir>/access.jsonl`.
31 pub log_path: Utf8PathBuf,
32 /// Directory holding `config.toml` and `credentials.toml`.
33 pub config_dir: Utf8PathBuf,
34 /// Path of the user config file (may not exist on disk yet).
35 pub config_path: Utf8PathBuf,
36 /// Contact email for the polite User-Agent header (and Unpaywall fallback).
37 #[serde(skip_serializing_if = "Option::is_none")]
38 pub contact_email: Option<String>,
39 /// Unpaywall-specific contact email; falls back to `contact_email` when unset.
40 #[serde(skip_serializing_if = "Option::is_none")]
41 pub unpaywall_email: Option<String>,
42}
43
44impl ResolvedConfig {
45 /// Resolve the live config from process environment + platform defaults.
46 ///
47 /// Errors only if neither a home directory nor a config directory can
48 /// be determined for the current user (e.g. an unknown / locked-down
49 /// platform); on every realistic POSIX or Windows host this returns
50 /// `Ok` even with no `DOIGET_*` env vars set.
51 pub fn from_env() -> Result<Self> {
52 // `dirs::home_dir()` / `dirs::config_dir()` return `std::path::PathBuf`;
53 // hoist them into `Utf8PathBuf` immediately at the OS boundary so the
54 // rest of the function (and the public struct) stays UTF-8-only per
55 // the workspace `disallowed-types` clippy rule. A non-UTF-8 home dir
56 // is exotic and unsupported; surface it as an explicit error.
57 let home =
58 Utf8PathBuf::try_from(dirs::home_dir().ok_or_else(|| anyhow::anyhow!("no home dir"))?)?;
59 let cfg = Utf8PathBuf::try_from(
60 dirs::config_dir().ok_or_else(|| anyhow::anyhow!("no config dir"))?,
61 )?;
62
63 let store_root = std::env::var("DOIGET_STORE_ROOT")
64 .map(Utf8PathBuf::from)
65 .unwrap_or_else(|_| home.join("papers"));
66 let log_dir = std::env::var("DOIGET_LOG_DIR")
67 .map(Utf8PathBuf::from)
68 .unwrap_or_else(|_| cfg.join("doiget"));
69
70 let log_path = log_dir.join("access.jsonl");
71 let config_dir = cfg.join("doiget");
72 let config_path = config_dir.join("config.toml");
73
74 Ok(Self {
75 store_root,
76 log_dir,
77 log_path,
78 config_dir,
79 config_path,
80 contact_email: std::env::var("DOIGET_CONTACT_EMAIL").ok(),
81 unpaywall_email: std::env::var("DOIGET_UNPAYWALL_EMAIL").ok(),
82 })
83 }
84}
85
86/// Dispatch entrypoint for `doiget config <action>`.
87///
88/// `action` is one of `show`, `path`, `doctor`. Anything else returns
89/// `Err`; clap currently passes the raw string through.
90//
91// `print_stdout` and `print_stderr` are workspace-deny / workspace-warn for
92// MCP stdio safety. The `config` subcommand is the explicit human-facing
93// stdout channel for the resolved config; `doctor`'s checklist lines also
94// belong on stderr by design (stdout stays clean for `| jq` style pipes
95// when we add `--json` later).
96#[allow(clippy::print_stdout, clippy::print_stderr)]
97pub fn run(action: String) -> Result<()> {
98 let cfg = ResolvedConfig::from_env()?;
99 match action.as_str() {
100 "show" => {
101 let s = toml::to_string_pretty(&cfg)?;
102 print!("{s}");
103 }
104 "path" => {
105 println!("{}", cfg.config_path);
106 }
107 "doctor" => {
108 let mut all_ok = true;
109 check(
110 "store_root parent exists",
111 cfg.store_root.parent().map(|p| p.exists()).unwrap_or(true),
112 &mut all_ok,
113 );
114 check(
115 "log_dir parent exists",
116 cfg.log_dir.parent().map(|p| p.exists()).unwrap_or(true),
117 &mut all_ok,
118 );
119 check(
120 "contact_email set",
121 cfg.contact_email.is_some(),
122 &mut all_ok,
123 );
124 // Trying to actually create the dirs would have side-effects;
125 // keep doctor read-only and just check existence of parents.
126 if !all_ok {
127 bail!("config doctor: one or more checks failed");
128 }
129 }
130 other => bail!("unknown config action: {other}; expected `show` / `path` / `doctor`"),
131 }
132 Ok(())
133}
134
135/// Emit one `[ ok ]` / `[FAIL]` checklist line to stderr and update the
136/// running pass/fail flag. Stderr is used so that `doiget config doctor`
137/// stdout stays empty for green runs (script-friendly).
138#[allow(clippy::print_stderr)]
139fn check(label: &str, ok: bool, all_ok: &mut bool) {
140 let mark = if ok { "[ ok ]" } else { "[FAIL]" };
141 eprintln!("{mark} {label}");
142 if !ok {
143 *all_ok = false;
144 }
145}
146
147// ---------------------------------------------------------------------------
148// Tests — env-mutating, serialized via serial_test (same convention as
149// `doiget-core::tests`). Each test resets the four env vars it touches via
150// an EnvGuard RAII drop guard so that prior values are restored on panic.
151// ---------------------------------------------------------------------------
152#[cfg(test)]
153mod tests {
154 #![allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
155
156 use super::*;
157
158 /// RAII guard that captures the prior value of an env var on
159 /// construction and restores it on drop. Mirrors the convention in
160 /// `crates/doiget-core/src/lib.rs::tests`.
161 struct EnvGuard {
162 var: &'static str,
163 prior: Option<std::ffi::OsString>,
164 }
165
166 impl EnvGuard {
167 fn unset(var: &'static str) -> Self {
168 let prior = std::env::var_os(var);
169 // SAFETY: tests are serialized via `#[serial_test::serial]`;
170 // no other thread reads/writes env state concurrently.
171 std::env::remove_var(var);
172 EnvGuard { var, prior }
173 }
174
175 fn set(var: &'static str, value: &str) -> Self {
176 let prior = std::env::var_os(var);
177 std::env::set_var(var, value);
178 EnvGuard { var, prior }
179 }
180 }
181
182 impl Drop for EnvGuard {
183 fn drop(&mut self) {
184 match &self.prior {
185 Some(v) => std::env::set_var(self.var, v),
186 None => std::env::remove_var(self.var),
187 }
188 }
189 }
190
191 /// Unset every env var the `config` subcommand reads. Returns guards
192 /// that restore prior values on drop.
193 fn unset_all_doiget_config_env() -> Vec<EnvGuard> {
194 [
195 "DOIGET_STORE_ROOT",
196 "DOIGET_LOG_DIR",
197 "DOIGET_CONTACT_EMAIL",
198 "DOIGET_UNPAYWALL_EMAIL",
199 ]
200 .iter()
201 .map(|v| EnvGuard::unset(v))
202 .collect()
203 }
204
205 #[test]
206 #[serial_test::serial]
207 fn from_env_uses_home_default_when_unset() {
208 let _g = unset_all_doiget_config_env();
209 let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
210 assert!(
211 cfg.store_root.as_str().ends_with("papers"),
212 "store_root should fall back to <home>/papers when DOIGET_STORE_ROOT is unset; got {}",
213 cfg.store_root
214 );
215 assert_eq!(cfg.contact_email, None);
216 assert_eq!(cfg.unpaywall_email, None);
217 }
218
219 #[test]
220 #[serial_test::serial]
221 fn from_env_overrides_via_env() {
222 let _g = unset_all_doiget_config_env();
223 // Use a platform-appropriate absolute path so Utf8PathBuf::try_from
224 // succeeds on Windows too (where "/tmp/foo" is a relative path on
225 // the current drive — still UTF-8, still fine for this assertion).
226 let _override = EnvGuard::set("DOIGET_STORE_ROOT", "/tmp/foo");
227 let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
228 assert_eq!(cfg.store_root.as_str(), "/tmp/foo");
229 }
230
231 #[test]
232 #[serial_test::serial]
233 fn doctor_fails_without_contact_email() {
234 let _g = unset_all_doiget_config_env();
235 let err = run("doctor".into())
236 .expect_err("doctor should fail when DOIGET_CONTACT_EMAIL is unset");
237 let msg = format!("{err}");
238 assert!(
239 msg.contains("config doctor"),
240 "unexpected error message: {msg}"
241 );
242 }
243
244 #[test]
245 #[serial_test::serial]
246 fn doctor_passes_with_contact_email() {
247 let _g = unset_all_doiget_config_env();
248 let _email = EnvGuard::set("DOIGET_CONTACT_EMAIL", "alice@example.org");
249 // home_dir() / config_dir() resolve to real, existing parents on
250 // every supported test host (CI runners always have $HOME).
251 run("doctor".into()).expect("doctor should pass with contact email + real home dir");
252 }
253
254 #[test]
255 #[serial_test::serial]
256 fn unknown_action_errors() {
257 let _g = unset_all_doiget_config_env();
258 let err = run("bogus".into()).expect_err("bogus action should error");
259 let msg = format!("{err}");
260 assert!(
261 msg.contains("unknown config action"),
262 "unexpected error message: {msg}"
263 );
264 }
265}