Skip to main content

doiget_cli/commands/
config.rs

1//! `doiget config <action>` — config introspection.
2//!
3//! This subcommand is intentionally read-only and does NOT touch the network
4//! or instantiate the Store. Phase 1 resolves config from environment
5//! variables only with default fallbacks; the user `config.toml` reader
6//! lands in a follow-up. See `docs/CONFIG.md` for the canonical schema.
7//!
8//! `print_stdout` is denied workspace-wide for MCP stdio safety (ADR-0001 /
9//! `docs/SECURITY.md` §3). The `config show` and `config path` actions are
10//! the *spec'd* stdout channel for human-facing introspection — they are
11//! never invoked from inside an MCP session (`doiget serve` runs a
12//! different code path), so the lint is locally relaxed below.
13
14use anyhow::Result;
15use camino::Utf8PathBuf;
16
17use super::fetch::CliExit;
18
19/// Snapshot of the env-var + default-fallback config that `doiget` would
20/// use on the current machine.
21///
22/// Phase 1 surface: env vars only (`DOIGET_STORE_ROOT`, `DOIGET_LOG_PATH`,
23/// `DOIGET_CONTACT_EMAIL`, `DOIGET_UNPAYWALL_EMAIL`) layered over
24/// XDG / known-folder defaults. Phase 2 will layer the user config.toml
25/// underneath the env vars per `docs/CONFIG.md` §1.
26///
27/// Issue #142: `log_path` is resolved from `DOIGET_LOG_PATH` — the ONLY
28/// log env var `docs/CONFIG.md` §4 documents — using the exact same
29/// resolution the provenance-log *writer*
30/// (`commands::fetch::resolve_log_path` / `commands::audit_log`) uses, so
31/// `config show` reports the path the writer actually uses. The previously
32/// read, undocumented `DOIGET_LOG_DIR` has been dropped.
33#[derive(Debug, serde::Serialize)]
34pub struct ResolvedConfig {
35    /// Root of the on-disk paper store. Default: `$HOME/papers`.
36    pub store_root: Utf8PathBuf,
37    /// Directory holding doiget's append-only logs. Derived from
38    /// `log_path`'s parent so it always agrees with the writer.
39    pub log_dir: Utf8PathBuf,
40    /// JSON-Lines provenance log file path. `DOIGET_LOG_PATH` when set,
41    /// otherwise `<config_dir>/doiget/access.jsonl` (`docs/CONFIG.md` §4).
42    pub log_path: Utf8PathBuf,
43    /// Directory holding `config.toml` and `credentials.toml`.
44    pub config_dir: Utf8PathBuf,
45    /// Path of the user config file (may not exist on disk yet).
46    pub config_path: Utf8PathBuf,
47    /// Contact email for the polite User-Agent header (and Unpaywall fallback).
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub contact_email: Option<String>,
50    /// Unpaywall-specific contact email; falls back to `contact_email` when unset.
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub unpaywall_email: Option<String>,
53}
54
55impl ResolvedConfig {
56    /// Resolve the live config from process environment + platform defaults.
57    ///
58    /// Errors only if neither a home directory nor a config directory can
59    /// be determined for the current user (e.g. an unknown / locked-down
60    /// platform); on every realistic POSIX or Windows host this returns
61    /// `Ok` even with no `DOIGET_*` env vars set.
62    pub fn from_env() -> Result<Self> {
63        // `dirs::home_dir()` / `dirs::config_dir()` return `std::path::PathBuf`;
64        // hoist them into `Utf8PathBuf` immediately at the OS boundary so the
65        // rest of the function (and the public struct) stays UTF-8-only per
66        // the workspace `disallowed-types` clippy rule. A non-UTF-8 home dir
67        // is exotic and unsupported; surface it as an explicit error.
68        let home =
69            Utf8PathBuf::try_from(dirs::home_dir().ok_or_else(|| anyhow::anyhow!("no home dir"))?)?;
70        let cfg = Utf8PathBuf::try_from(
71            dirs::config_dir().ok_or_else(|| anyhow::anyhow!("no config dir"))?,
72        )?;
73
74        let store_root = std::env::var("DOIGET_STORE_ROOT")
75            .map(Utf8PathBuf::from)
76            .unwrap_or_else(|_| home.join("papers"));
77
78        // Issue #142: resolve the log path the SAME way the writer does
79        // (`commands::fetch::resolve_log_path` / `commands::audit_log`):
80        // `DOIGET_LOG_PATH` (the only log env var documented in
81        // `docs/CONFIG.md` §4) when set, otherwise
82        // `<config_dir>/doiget/access.jsonl`. The undocumented
83        // `DOIGET_LOG_DIR` is no longer read, so `config show` can no
84        // longer disagree with the path the provenance log is written to.
85        let log_path = match std::env::var("DOIGET_LOG_PATH") {
86            Ok(s) if !s.is_empty() => Utf8PathBuf::from(s),
87            _ => cfg.join("doiget").join("access.jsonl"),
88        };
89        // `log_dir` is purely derived from `log_path` so the two can never
90        // drift; fall back to the config dir for a path with no parent.
91        let log_dir = log_path
92            .parent()
93            .map(Utf8PathBuf::from)
94            .unwrap_or_else(|| cfg.join("doiget"));
95
96        let config_dir = cfg.join("doiget");
97        let config_path = config_dir.join("config.toml");
98
99        Ok(Self {
100            store_root,
101            log_dir,
102            log_path,
103            config_dir,
104            config_path,
105            contact_email: std::env::var("DOIGET_CONTACT_EMAIL").ok(),
106            unpaywall_email: std::env::var("DOIGET_UNPAYWALL_EMAIL").ok(),
107        })
108    }
109}
110
111/// Dispatch entrypoint for `doiget config <action>`.
112///
113/// `action` is one of `show`, `path`, `doctor`. Anything else returns
114/// `Err`; clap currently passes the raw string through.
115//
116// `print_stdout` and `print_stderr` are workspace-deny / workspace-warn for
117// MCP stdio safety. The `config` subcommand is the explicit human-facing
118// stdout channel for the resolved config; `doctor`'s checklist lines also
119// belong on stderr by design (stdout stays clean for `| jq` style pipes
120// when we add `--json` later).
121#[allow(clippy::print_stdout, clippy::print_stderr)]
122pub fn run(action: String, mode: super::output::OutputMode) -> Result<()> {
123    // `mode` honors ADR-0017: `Quiet` suppresses the TOML dump (`show`)
124    // and the path println! (`path`); `doctor` is unaffected because its
125    // per-check output is on stderr and only the failure/success exit
126    // code is the user-visible signal (#203). Json body for `show` is
127    // tracked in #204.
128    let cfg = ResolvedConfig::from_env()?;
129    match action.as_str() {
130        "show" => match mode {
131            super::output::OutputMode::Quiet => {}
132            super::output::OutputMode::Json => {
133                // #204: `ResolvedConfig` is `Serialize` (already used for
134                // the TOML branch).
135                let s = serde_json::to_string_pretty(&cfg)
136                    .map_err(|e| anyhow::anyhow!("serialise config to JSON: {e}"))?;
137                println!("{s}");
138            }
139            _ => {
140                let s = toml::to_string_pretty(&cfg)?;
141                print!("{s}");
142            }
143        },
144        "path" => match mode {
145            super::output::OutputMode::Quiet => {}
146            super::output::OutputMode::Json => {
147                // Minimal JSON object so callers can parse the path
148                // uniformly; no trailing-newline ambiguity vs the raw
149                // `path` form.
150                println!(
151                    "{}",
152                    serde_json::json!({ "config_path": cfg.config_path.as_str() })
153                );
154            }
155            _ => {
156                println!("{}", cfg.config_path);
157            }
158        },
159        "doctor" => {
160            let mut all_ok = true;
161            check(
162                "store_root parent exists",
163                cfg.store_root.parent().map(|p| p.exists()).unwrap_or(true),
164                &mut all_ok,
165            );
166            check(
167                "log_dir parent exists",
168                cfg.log_dir.parent().map(|p| p.exists()).unwrap_or(true),
169                &mut all_ok,
170            );
171            check(
172                "contact_email set",
173                cfg.contact_email.is_some(),
174                &mut all_ok,
175            );
176            // ADR-0028 D2: surface user-extension allowlist health. A
177            // missing config.toml is normal (curated set only); a
178            // present-but-malformed config.toml is a doctor failure so
179            // the operator finds out before fetch attempts silently
180            // skip the extension path. `user_extension::load` returns
181            // `Ok(vec![])` for not-found, so the OK arm always reports
182            // a count.
183            match doiget_core::user_extension::load(&cfg.config_path) {
184                Ok(hosts) => check(
185                    &format!("user-extension hosts loaded: {}", hosts.len()),
186                    true,
187                    &mut all_ok,
188                ),
189                Err(e) => check(
190                    &format!("user-extension config invalid: {e}"),
191                    false,
192                    &mut all_ok,
193                ),
194            }
195            // Trying to actually create the dirs would have side-effects;
196            // keep doctor read-only and just check existence of parents.
197            if !all_ok {
198                // Issue #149: a failing doctor means missing/invalid
199                // config — `docs/ERRORS.md` §4 classes "missing config"
200                // as misuse → exit 2 (the per-check `[FAIL]` lines were
201                // already written to stderr by `check`).
202                eprintln_err("error: config doctor: one or more checks failed");
203                return Err(anyhow::Error::new(CliExit(2)));
204            }
205        }
206        other => {
207            // Issue #149: an unknown subcommand action is clear argument
208            // misuse → `docs/ERRORS.md` §4 exit 2, not the generic exit 1
209            // a bare `bail!` produced.
210            eprintln_err(&format!(
211                "error: unknown config action: {other}; expected `show` / `path` / `doctor`"
212            ));
213            return Err(anyhow::Error::new(CliExit(2)));
214        }
215    }
216    Ok(())
217}
218
219/// Stderr sink for the `docs/ERRORS.md` §3 human-error lines. The
220/// localized `#[allow]` is the minimal intervention for the workspace
221/// `clippy::print_stderr` lint (same pattern as `commands::fetch`).
222#[allow(clippy::print_stderr)]
223fn eprintln_err(msg: &str) {
224    eprintln!("{msg}");
225}
226
227/// Emit one `[ ok ]` / `[FAIL]` checklist line to stderr and update the
228/// running pass/fail flag. Stderr is used so that `doiget config doctor`
229/// stdout stays empty for green runs (script-friendly).
230#[allow(clippy::print_stderr)]
231fn check(label: &str, ok: bool, all_ok: &mut bool) {
232    let mark = if ok { "[ ok ]" } else { "[FAIL]" };
233    eprintln!("{mark} {label}");
234    if !ok {
235        *all_ok = false;
236    }
237}
238
239// ---------------------------------------------------------------------------
240// Tests — env-mutating, serialized via serial_test (same convention as
241// `doiget-core::tests`). Each test resets the four env vars it touches via
242// an EnvGuard RAII drop guard so that prior values are restored on panic.
243// ---------------------------------------------------------------------------
244#[cfg(test)]
245mod tests {
246    #![allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
247
248    use super::*;
249
250    /// RAII guard that captures the prior value of an env var on
251    /// construction and restores it on drop. Mirrors the convention in
252    /// `crates/doiget-core/src/lib.rs::tests`.
253    struct EnvGuard {
254        var: &'static str,
255        prior: Option<std::ffi::OsString>,
256    }
257
258    impl EnvGuard {
259        fn unset(var: &'static str) -> Self {
260            let prior = std::env::var_os(var);
261            // SAFETY: tests are serialized via `#[serial_test::serial]`;
262            // no other thread reads/writes env state concurrently.
263            std::env::remove_var(var);
264            EnvGuard { var, prior }
265        }
266
267        fn set(var: &'static str, value: &str) -> Self {
268            let prior = std::env::var_os(var);
269            std::env::set_var(var, value);
270            EnvGuard { var, prior }
271        }
272    }
273
274    impl Drop for EnvGuard {
275        fn drop(&mut self) {
276            match &self.prior {
277                Some(v) => std::env::set_var(self.var, v),
278                None => std::env::remove_var(self.var),
279            }
280        }
281    }
282
283    /// Unset every env var the `config` subcommand reads. Returns guards
284    /// that restore prior values on drop.
285    fn unset_all_doiget_config_env() -> Vec<EnvGuard> {
286        [
287            "DOIGET_STORE_ROOT",
288            "DOIGET_LOG_PATH",
289            "DOIGET_CONTACT_EMAIL",
290            "DOIGET_UNPAYWALL_EMAIL",
291        ]
292        .iter()
293        .map(|v| EnvGuard::unset(v))
294        .collect()
295    }
296
297    #[test]
298    #[serial_test::serial]
299    fn from_env_uses_home_default_when_unset() {
300        let _g = unset_all_doiget_config_env();
301        let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
302        assert!(
303            cfg.store_root.as_str().ends_with("papers"),
304            "store_root should fall back to <home>/papers when DOIGET_STORE_ROOT is unset; got {}",
305            cfg.store_root
306        );
307        assert_eq!(cfg.contact_email, None);
308        assert_eq!(cfg.unpaywall_email, None);
309    }
310
311    #[test]
312    #[serial_test::serial]
313    fn from_env_overrides_via_env() {
314        let _g = unset_all_doiget_config_env();
315        // Use a platform-appropriate absolute path so Utf8PathBuf::try_from
316        // succeeds on Windows too (where "/tmp/foo" is a relative path on
317        // the current drive — still UTF-8, still fine for this assertion).
318        let _override = EnvGuard::set("DOIGET_STORE_ROOT", "/tmp/foo");
319        let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
320        assert_eq!(cfg.store_root.as_str(), "/tmp/foo");
321    }
322
323    /// Issue #142: `config show` MUST report the same `log_path` the
324    /// provenance-log writer uses. The writer keys off `DOIGET_LOG_PATH`
325    /// (the only log env var documented in `docs/CONFIG.md` §4); the
326    /// resolver must do the same, and `log_dir` must be that path's
327    /// parent — never an independently-resolved (and divergent) value.
328    #[test]
329    #[serial_test::serial]
330    fn log_path_follows_doiget_log_path_env() {
331        let _g = unset_all_doiget_config_env();
332        let _override = EnvGuard::set("DOIGET_LOG_PATH", "/var/lib/doiget/access.jsonl");
333        let cfg = ResolvedConfig::from_env().expect("home dir must resolve on test host");
334        assert_eq!(
335            cfg.log_path.as_str(),
336            "/var/lib/doiget/access.jsonl",
337            "config show must echo DOIGET_LOG_PATH verbatim (issue #142)"
338        );
339        assert_eq!(
340            cfg.log_dir.as_str(),
341            "/var/lib/doiget",
342            "log_dir must be derived from log_path's parent so the two cannot drift"
343        );
344    }
345
346    #[test]
347    #[serial_test::serial]
348    fn doctor_fails_without_contact_email() {
349        // Issue #149: a failing doctor is "missing config" → exit 2.
350        // The human-readable line moved to stderr; the error now carries
351        // a `CliExit(2)` rather than a Display-formatted anyhow string.
352        let _g = unset_all_doiget_config_env();
353        let err = run("doctor".into(), crate::commands::output::OutputMode::Human)
354            .expect_err("doctor should fail when DOIGET_CONTACT_EMAIL is unset");
355        let cli_exit = err
356            .downcast_ref::<CliExit>()
357            .expect("failing doctor must carry a CliExit (issue #149)");
358        assert_eq!(
359            cli_exit.0, 2,
360            "missing/invalid config is misuse → exit 2, not the generic exit 1"
361        );
362    }
363
364    #[test]
365    #[serial_test::serial]
366    fn doctor_passes_with_contact_email() {
367        let _g = unset_all_doiget_config_env();
368        let _email = EnvGuard::set("DOIGET_CONTACT_EMAIL", "alice@example.org");
369        // home_dir() / config_dir() resolve to real, existing parents on
370        // every supported test host (CI runners always have $HOME).
371        run("doctor".into(), crate::commands::output::OutputMode::Human)
372            .expect("doctor should pass with contact email + real home dir");
373    }
374
375    /// ADR-0028 D2: a malformed `<config_dir>/doiget/config.toml`
376    /// causes `doiget config doctor` to FAIL (exit 2). Linux-only
377    /// because `dirs::config_dir()` resolves differently on each
378    /// platform:
379    ///   - Linux: `$XDG_CONFIG_HOME` or `$HOME/.config` (env-driven,
380    ///     testable).
381    ///   - macOS: `~/Library/Application Support` (Known Folder via
382    ///     `NSSearchPathForDirectoriesInDomains`, ignores
383    ///     `XDG_CONFIG_HOME`).
384    ///   - Windows: `%FOLDERID_RoamingAppData%` (Known Folder API,
385    ///     ignores `APPDATA` env in child processes via
386    ///     `assert_cmd`).
387    /// The malformed-config FAIL path is platform-independent; this
388    /// test covers the wiring on the one platform where it CAN be
389    /// exercised in a hermetic test.
390    #[cfg(target_os = "linux")]
391    #[test]
392    #[serial_test::serial]
393    fn doctor_fails_with_malformed_user_extension_config() {
394        let _g = unset_all_doiget_config_env();
395        let _email = EnvGuard::set("DOIGET_CONTACT_EMAIL", "alice@example.org");
396
397        let tmp = tempfile::TempDir::new().expect("tempdir");
398        let cfg_root = camino::Utf8Path::from_path(tmp.path()).expect("utf8 tempdir");
399        let doiget_dir = cfg_root.join("doiget");
400        std::fs::create_dir_all(doiget_dir.as_std_path()).expect("mk dir");
401        let config_toml = doiget_dir.join("config.toml");
402        // Empty `host` value triggers `PatternError::Empty`, which
403        // the doctor surfaces as a FAIL. `note` is valid TOML so the
404        // top-level parse succeeds — only the pattern validation
405        // path produces the error we're pinning.
406        std::fs::write(
407            config_toml.as_std_path(),
408            "[[network.additional_hosts]]\nhost = \"\"\n",
409        )
410        .expect("write config.toml");
411
412        // POSIX `dirs::config_dir()` honors `XDG_CONFIG_HOME` first,
413        // so pointing it at our tempdir routes `cfg.config_path` to
414        // our crafted file.
415        let _x = EnvGuard::set("XDG_CONFIG_HOME", cfg_root.as_str());
416
417        let err = run("doctor".into(), crate::commands::output::OutputMode::Human)
418            .expect_err("doctor should fail when user-extension config is malformed");
419        let cli_exit = err
420            .downcast_ref::<CliExit>()
421            .expect("failing doctor must carry a CliExit");
422        assert_eq!(cli_exit.0, 2);
423    }
424
425    #[test]
426    #[serial_test::serial]
427    fn unknown_action_errors() {
428        // Issue #149: an unknown action is clear argument misuse →
429        // `docs/ERRORS.md` §4 exit 2. The descriptive line moved to
430        // stderr; the error carries `CliExit(2)`.
431        let _g = unset_all_doiget_config_env();
432        let err = run("bogus".into(), crate::commands::output::OutputMode::Human)
433            .expect_err("bogus action should error");
434        let cli_exit = err
435            .downcast_ref::<CliExit>()
436            .expect("unknown config action must carry a CliExit (issue #149)");
437        assert_eq!(
438            cli_exit.0, 2,
439            "unknown config action is misuse → exit 2, not the generic exit 1"
440        );
441    }
442}