doiget_cli/commands/config.rs
1//! `doiget config <action>` — config introspection.
2//!
3//! This subcommand is intentionally read-only and does NOT touch the network
4//! or instantiate the Store. Phase 1 resolves config from environment
5//! variables only with default fallbacks; the user `config.toml` reader
6//! lands in a follow-up. See `docs/CONFIG.md` for the canonical schema.
7//!
8//! `print_stdout` is denied workspace-wide for MCP stdio safety (ADR-0001 /
9//! `docs/SECURITY.md` §3). The `config show` and `config path` actions are
10//! the *spec'd* stdout channel for human-facing introspection — they are
11//! never invoked from inside an MCP session (`doiget serve` runs a
12//! different code path), so the lint is locally relaxed below.
13
14use anyhow::Result;
15use camino::Utf8PathBuf;
16
17use super::fetch::CliExit;
18
19/// Snapshot of the env-var + default-fallback config that `doiget` would
20/// use on the current machine.
21///
22/// Phase 1 surface: env vars only (`DOIGET_STORE_ROOT`, `DOIGET_LOG_PATH`,
23/// `DOIGET_CONTACT_EMAIL`, `DOIGET_UNPAYWALL_EMAIL`) layered over
24/// XDG / known-folder defaults. Phase 2 will layer the user config.toml
25/// underneath the env vars per `docs/CONFIG.md` §1.
26///
27/// Issue #142: `log_path` is resolved from `DOIGET_LOG_PATH` — the ONLY
28/// log env var `docs/CONFIG.md` §4 documents — using the exact same
29/// resolution the provenance-log *writer*
30/// (`commands::fetch::resolve_log_path` / `commands::audit_log`) uses, so
31/// `config show` reports the path the writer actually uses. The previously
32/// read, undocumented `DOIGET_LOG_DIR` has been dropped.
33#[derive(Debug, serde::Serialize)]
34pub struct ResolvedConfig {
35 /// Root of the on-disk paper store. Default: `$HOME/papers`.
36 pub store_root: Utf8PathBuf,
37 /// Directory holding doiget's append-only logs. Derived from
38 /// `log_path`'s parent so it always agrees with the writer.
39 pub log_dir: Utf8PathBuf,
40 /// JSON-Lines provenance log file path. `DOIGET_LOG_PATH` when set,
41 /// otherwise `<config_dir>/doiget/access.jsonl` (`docs/CONFIG.md` §4).
42 pub log_path: Utf8PathBuf,
43 /// Directory holding `config.toml` and `credentials.toml`.
44 pub config_dir: Utf8PathBuf,
45 /// Path of the user config file (may not exist on disk yet).
46 pub config_path: Utf8PathBuf,
47 /// Contact email for the polite User-Agent header (and Unpaywall fallback).
48 #[serde(skip_serializing_if = "Option::is_none")]
49 pub contact_email: Option<String>,
50 /// Unpaywall-specific contact email; falls back to `contact_email` when unset.
51 #[serde(skip_serializing_if = "Option::is_none")]
52 pub unpaywall_email: Option<String>,
53}
54
55impl ResolvedConfig {
56 /// Resolve the live config from process environment + platform defaults.
57 ///
58 /// Errors only if neither a home directory nor a config directory can
59 /// be determined for the current user (e.g. an unknown / locked-down
60 /// platform); on every realistic POSIX or Windows host this returns
61 /// `Ok` even with no `DOIGET_*` env vars set.
62 pub fn from_env() -> Result<Self> {
63 // `dirs::home_dir()` / `dirs::config_dir()` return `std::path::PathBuf`;
64 // hoist them into `Utf8PathBuf` immediately at the OS boundary so the
65 // rest of the function (and the public struct) stays UTF-8-only per
66 // the workspace `disallowed-types` clippy rule. A non-UTF-8 home dir
67 // is exotic and unsupported; surface it as an explicit error.
68 let home =
69 Utf8PathBuf::try_from(dirs::home_dir().ok_or_else(|| anyhow::anyhow!("no home dir"))?)?;
70 let cfg = Utf8PathBuf::try_from(
71 dirs::config_dir().ok_or_else(|| anyhow::anyhow!("no config dir"))?,
72 )?;
73
74 let store_root = std::env::var("DOIGET_STORE_ROOT")
75 .map(Utf8PathBuf::from)
76 .unwrap_or_else(|_| home.join("papers"));
77
78 // Issue #142: resolve the log path the SAME way the writer does
79 // (`commands::fetch::resolve_log_path` / `commands::audit_log`):
80 // `DOIGET_LOG_PATH` (the only log env var documented in
81 // `docs/CONFIG.md` §4) when set, otherwise
82 // `<config_dir>/doiget/access.jsonl`. The undocumented
83 // `DOIGET_LOG_DIR` is no longer read, so `config show` can no
84 // longer disagree with the path the provenance log is written to.
85 let log_path = match std::env::var("DOIGET_LOG_PATH") {
86 Ok(s) if !s.is_empty() => Utf8PathBuf::from(s),
87 _ => cfg.join("doiget").join("access.jsonl"),
88 };
89 // `log_dir` is purely derived from `log_path` so the two can never
90 // drift; fall back to the config dir for a path with no parent.
91 let log_dir = log_path
92 .parent()
93 .map(Utf8PathBuf::from)
94 .unwrap_or_else(|| cfg.join("doiget"));
95
96 let config_dir = cfg.join("doiget");
97 let config_path = config_dir.join("config.toml");
98
99 Ok(Self {
100 store_root,
101 log_dir,
102 log_path,
103 config_dir,
104 config_path,
105 contact_email: std::env::var("DOIGET_CONTACT_EMAIL").ok(),
106 unpaywall_email: std::env::var("DOIGET_UNPAYWALL_EMAIL").ok(),
107 })
108 }
109}
110
111/// Dispatch entrypoint for `doiget config <action>`.
112///
113/// `action` is one of `show`, `path`, `doctor`. Anything else returns
114/// `Err`; clap currently passes the raw string through.
115//
116// `print_stdout` and `print_stderr` are workspace-deny / workspace-warn for
117// MCP stdio safety. The `config` subcommand is the explicit human-facing
118// stdout channel for the resolved config; `doctor`'s checklist lines also
119// belong on stderr by design (stdout stays clean for `| jq` style pipes
120// when we add `--json` later).
121#[allow(clippy::print_stdout, clippy::print_stderr)]
122pub fn run(action: String) -> Result<()> {
123 let cfg = ResolvedConfig::from_env()?;
124 match action.as_str() {
125 "show" => {
126 let s = toml::to_string_pretty(&cfg)?;
127 print!("{s}");
128 }
129 "path" => {
130 println!("{}", cfg.config_path);
131 }
132 "doctor" => {
133 let mut all_ok = true;
134 check(
135 "store_root parent exists",
136 cfg.store_root.parent().map(|p| p.exists()).unwrap_or(true),
137 &mut all_ok,
138 );
139 check(
140 "log_dir parent exists",
141 cfg.log_dir.parent().map(|p| p.exists()).unwrap_or(true),
142 &mut all_ok,
143 );
144 check(
145 "contact_email set",
146 cfg.contact_email.is_some(),
147 &mut all_ok,
148 );
149 // Trying to actually create the dirs would have side-effects;
150 // keep doctor read-only and just check existence of parents.
151 if !all_ok {
152 // Issue #149: a failing doctor means missing/invalid
153 // config — `docs/ERRORS.md` §4 classes "missing config"
154 // as misuse → exit 2 (the per-check `[FAIL]` lines were
155 // already written to stderr by `check`).
156 eprintln_err("error: config doctor: one or more checks failed");
157 return Err(anyhow::Error::new(CliExit(2)));
158 }
159 }
160 other => {
161 // Issue #149: an unknown subcommand action is clear argument
162 // misuse → `docs/ERRORS.md` §4 exit 2, not the generic exit 1
163 // a bare `bail!` produced.
164 eprintln_err(&format!(
165 "error: unknown config action: {other}; expected `show` / `path` / `doctor`"
166 ));
167 return Err(anyhow::Error::new(CliExit(2)));
168 }
169 }
170 Ok(())
171}
172
173/// Stderr sink for the `docs/ERRORS.md` §3 human-error lines. The
174/// localized `#[allow]` is the minimal intervention for the workspace
175/// `clippy::print_stderr` lint (same pattern as `commands::fetch`).
176#[allow(clippy::print_stderr)]
177fn eprintln_err(msg: &str) {
178 eprintln!("{msg}");
179}
180
181/// Emit one `[ ok ]` / `[FAIL]` checklist line to stderr and update the
182/// running pass/fail flag. Stderr is used so that `doiget config doctor`
183/// stdout stays empty for green runs (script-friendly).
184#[allow(clippy::print_stderr)]
185fn check(label: &str, ok: bool, all_ok: &mut bool) {
186 let mark = if ok { "[ ok ]" } else { "[FAIL]" };
187 eprintln!("{mark} {label}");
188 if !ok {
189 *all_ok = false;
190 }
191}
192
193// ---------------------------------------------------------------------------
194// Tests — env-mutating, serialized via serial_test (same convention as
195// `doiget-core::tests`). Each test resets the four env vars it touches via
196// an EnvGuard RAII drop guard so that prior values are restored on panic.
197// ---------------------------------------------------------------------------
198#[cfg(test)]
199mod tests {
200 #![allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
201
202 use super::*;
203
204 /// RAII guard that captures the prior value of an env var on
205 /// construction and restores it on drop. Mirrors the convention in
206 /// `crates/doiget-core/src/lib.rs::tests`.
207 struct EnvGuard {
208 var: &'static str,
209 prior: Option<std::ffi::OsString>,
210 }
211
212 impl EnvGuard {
213 fn unset(var: &'static str) -> Self {
214 let prior = std::env::var_os(var);
215 // SAFETY: tests are serialized via `#[serial_test::serial]`;
216 // no other thread reads/writes env state concurrently.
217 std::env::remove_var(var);
218 EnvGuard { var, prior }
219 }
220
221 fn set(var: &'static str, value: &str) -> Self {
222 let prior = std::env::var_os(var);
223 std::env::set_var(var, value);
224 EnvGuard { var, prior }
225 }
226 }
227
228 impl Drop for EnvGuard {
229 fn drop(&mut self) {
230 match &self.prior {
231 Some(v) => std::env::set_var(self.var, v),
232 None => std::env::remove_var(self.var),
233 }
234 }
235 }
236
237 /// Unset every env var the `config` subcommand reads. Returns guards
238 /// that restore prior values on drop.
239 fn unset_all_doiget_config_env() -> Vec<EnvGuard> {
240 [
241 "DOIGET_STORE_ROOT",
242 "DOIGET_LOG_PATH",
243 "DOIGET_CONTACT_EMAIL",
244 "DOIGET_UNPAYWALL_EMAIL",
245 ]
246 .iter()
247 .map(|v| EnvGuard::unset(v))
248 .collect()
249 }
250
251 #[test]
252 #[serial_test::serial]
253 fn from_env_uses_home_default_when_unset() {
254 let _g = unset_all_doiget_config_env();
255 let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
256 assert!(
257 cfg.store_root.as_str().ends_with("papers"),
258 "store_root should fall back to <home>/papers when DOIGET_STORE_ROOT is unset; got {}",
259 cfg.store_root
260 );
261 assert_eq!(cfg.contact_email, None);
262 assert_eq!(cfg.unpaywall_email, None);
263 }
264
265 #[test]
266 #[serial_test::serial]
267 fn from_env_overrides_via_env() {
268 let _g = unset_all_doiget_config_env();
269 // Use a platform-appropriate absolute path so Utf8PathBuf::try_from
270 // succeeds on Windows too (where "/tmp/foo" is a relative path on
271 // the current drive — still UTF-8, still fine for this assertion).
272 let _override = EnvGuard::set("DOIGET_STORE_ROOT", "/tmp/foo");
273 let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
274 assert_eq!(cfg.store_root.as_str(), "/tmp/foo");
275 }
276
277 /// Issue #142: `config show` MUST report the same `log_path` the
278 /// provenance-log writer uses. The writer keys off `DOIGET_LOG_PATH`
279 /// (the only log env var documented in `docs/CONFIG.md` §4); the
280 /// resolver must do the same, and `log_dir` must be that path's
281 /// parent — never an independently-resolved (and divergent) value.
282 #[test]
283 #[serial_test::serial]
284 fn log_path_follows_doiget_log_path_env() {
285 let _g = unset_all_doiget_config_env();
286 let _override = EnvGuard::set("DOIGET_LOG_PATH", "/var/lib/doiget/access.jsonl");
287 let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
288 assert_eq!(
289 cfg.log_path.as_str(),
290 "/var/lib/doiget/access.jsonl",
291 "config show must echo DOIGET_LOG_PATH verbatim (issue #142)"
292 );
293 assert_eq!(
294 cfg.log_dir.as_str(),
295 "/var/lib/doiget",
296 "log_dir must be derived from log_path's parent so the two cannot drift"
297 );
298 }
299
300 #[test]
301 #[serial_test::serial]
302 fn doctor_fails_without_contact_email() {
303 // Issue #149: a failing doctor is "missing config" → exit 2.
304 // The human-readable line moved to stderr; the error now carries
305 // a `CliExit(2)` rather than a Display-formatted anyhow string.
306 let _g = unset_all_doiget_config_env();
307 let err = run("doctor".into())
308 .expect_err("doctor should fail when DOIGET_CONTACT_EMAIL is unset");
309 let cli_exit = err
310 .downcast_ref::<CliExit>()
311 .expect("failing doctor must carry a CliExit (issue #149)");
312 assert_eq!(
313 cli_exit.0, 2,
314 "missing/invalid config is misuse → exit 2, not the generic exit 1"
315 );
316 }
317
318 #[test]
319 #[serial_test::serial]
320 fn doctor_passes_with_contact_email() {
321 let _g = unset_all_doiget_config_env();
322 let _email = EnvGuard::set("DOIGET_CONTACT_EMAIL", "alice@example.org");
323 // home_dir() / config_dir() resolve to real, existing parents on
324 // every supported test host (CI runners always have $HOME).
325 run("doctor".into()).expect("doctor should pass with contact email + real home dir");
326 }
327
328 #[test]
329 #[serial_test::serial]
330 fn unknown_action_errors() {
331 // Issue #149: an unknown action is clear argument misuse →
332 // `docs/ERRORS.md` §4 exit 2. The descriptive line moved to
333 // stderr; the error carries `CliExit(2)`.
334 let _g = unset_all_doiget_config_env();
335 let err = run("bogus".into()).expect_err("bogus action should error");
336 let cli_exit = err
337 .downcast_ref::<CliExit>()
338 .expect("unknown config action must carry a CliExit (issue #149)");
339 assert_eq!(
340 cli_exit.0, 2,
341 "unknown config action is misuse → exit 2, not the generic exit 1"
342 );
343 }
344}