Skip to main content

rover/doctor/
mod.rs

1//! `rover doctor` — diagnostic checks.
2
3pub mod checks;
4
5use std::sync::Arc;
6use thiserror::Error;
7
8use crate::config::Config;
9use crate::storage::Db;
10
11#[derive(Debug, Error)]
12pub enum DoctorError {
13    #[error("doctor check infrastructure error: {0}")]
14    Infrastructure(String),
15}
16
17#[derive(Debug, Clone, Copy, serde::Serialize, PartialEq, Eq)]
18#[serde(rename_all = "lowercase")]
19pub enum CheckStatus {
20    Ok,
21    Fail,
22    Skip,
23}
24
25#[derive(Debug, Clone, serde::Serialize)]
26pub struct CheckReport {
27    pub check: &'static str,
28    pub status: CheckStatus,
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub detail: Option<String>,
31}
32
33pub struct CheckCtx {
34    pub config: Arc<Config>,
35    pub db: Db,
36}
37
38#[async_trait::async_trait]
39pub trait Check: Send + Sync {
40    fn name(&self) -> &'static str;
41    async fn run(&self, ctx: &CheckCtx) -> CheckReport;
42}
43
44/// Run every built-in check sequentially. Order: cheap → expensive.
45/// Returns the full report list and a summary status (`Fail` if any
46/// check failed; `Ok` otherwise — `Skip` is non-failing).
47pub async fn run_all(ctx: &CheckCtx) -> (Vec<CheckReport>, CheckStatus) {
48    #[allow(unused_mut)]
49    let mut checks: Vec<Box<dyn Check>> = vec![
50        Box::new(checks::SqliteOpen),
51        Box::new(checks::SqliteWalMode),
52        Box::new(checks::SqliteSchemaVersion),
53        Box::new(checks::OutputDirWritable),
54        Box::new(checks::NetworkReachable),
55        Box::new(checks::ExtractiveSynthesis),
56        Box::new(checks::BackendsAuthenticate),
57        Box::new(checks::CaptionersAuthenticate),
58    ];
59    #[cfg(feature = "local-inference")]
60    checks.push(Box::new(checks::LocalInferenceModelCached));
61    #[cfg(feature = "local-inference")]
62    checks.push(Box::new(checks::LocalModelIntegrity));
63    #[cfg(feature = "injection-model")]
64    checks.push(Box::new(checks::PromptInjectionModelCached));
65    #[cfg(feature = "headless")]
66    checks.push(Box::new(checks::HeadlessBrowserLaunches));
67    let mut reports = Vec::with_capacity(checks.len());
68    let mut summary = CheckStatus::Ok;
69    for c in &checks {
70        let r = c.run(ctx).await;
71        if r.status == CheckStatus::Fail {
72            summary = CheckStatus::Fail;
73        }
74        reports.push(r);
75    }
76    (reports, summary)
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    async fn fresh_ctx() -> (CheckCtx, tempfile::TempDir) {
84        let tmp = tempfile::tempdir().unwrap();
85        let db = Db::open(tmp.path().join("rover.db")).await.unwrap();
86        let mut cfg = Config::default();
87        cfg.output.dir = Some(tmp.path().to_path_buf());
88        (
89            CheckCtx {
90                config: Arc::new(cfg),
91                db,
92            },
93            tmp,
94        )
95    }
96
97    #[tokio::test]
98    async fn sqlite_open_passes_on_fresh_db() {
99        let (ctx, _g) = fresh_ctx().await;
100        let r = checks::SqliteOpen.run(&ctx).await;
101        assert_eq!(r.status, CheckStatus::Ok);
102    }
103
104    #[tokio::test]
105    async fn sqlite_wal_mode_passes_on_fresh_db() {
106        let (ctx, _g) = fresh_ctx().await;
107        let r = checks::SqliteWalMode.run(&ctx).await;
108        assert_eq!(r.status, CheckStatus::Ok, "{:?}", r.detail);
109    }
110
111    #[tokio::test]
112    async fn sqlite_schema_version_passes_on_fresh_db() {
113        let (ctx, _g) = fresh_ctx().await;
114        let r = checks::SqliteSchemaVersion.run(&ctx).await;
115        assert_eq!(r.status, CheckStatus::Ok, "{:?}", r.detail);
116    }
117
118    #[tokio::test]
119    async fn output_dir_writable_passes_on_writable_temp() {
120        let (ctx, _g) = fresh_ctx().await;
121        let r = checks::OutputDirWritable.run(&ctx).await;
122        assert_eq!(r.status, CheckStatus::Ok, "{:?}", r.detail);
123    }
124
125    #[tokio::test]
126    async fn backends_authenticate_skips_when_no_cloud_configured() {
127        let (ctx, _g) = fresh_ctx().await;
128        let r = checks::BackendsAuthenticate.run(&ctx).await;
129        assert_eq!(r.status, CheckStatus::Skip);
130    }
131
132    #[tokio::test]
133    async fn extractive_synthesis_produces_output() {
134        let (ctx, _g) = fresh_ctx().await;
135        let r = checks::ExtractiveSynthesis.run(&ctx).await;
136        assert_eq!(r.status, CheckStatus::Ok, "{:?}", r.detail);
137    }
138
139    /// Regression: the check used to invoke the extractive backend without
140    /// loading the tokenizer first, which made the `target_tokens` budget
141    /// code fall back to a chars/4 heuristic and emit a confusing
142    /// `tracing::warn!` immediately before printing a green ✓. Clearing
143    /// the registry pre-test then asserting it's repopulated post-test
144    /// proves the fix's `ensure_loaded` call ran.
145    ///
146    /// We hold the tokenizer test mutex across `.await` to keep parallel
147    /// tests out of the process-global registry; this is a test-only path
148    /// with no production callers, and the alternative (dropping then
149    /// re-acquiring the lock) introduces a real race.
150    #[tokio::test]
151    #[allow(clippy::await_holding_lock)]
152    async fn extractive_synthesis_loads_tokenizer() {
153        let _g = crate::tokenizer::_test_mutex()
154            .lock()
155            .unwrap_or_else(|e| e.into_inner());
156        crate::tokenizer::_clear_registry_for_tests();
157        let (ctx, _tmp) = fresh_ctx().await;
158        let r = checks::ExtractiveSynthesis.run(&ctx).await;
159        assert_eq!(r.status, CheckStatus::Ok, "{:?}", r.detail);
160        assert!(
161            crate::tokenizer::count("hello", crate::tokenizer::Tokenizer::O200k).is_ok(),
162            "ExtractiveSynthesis check must leave the tokenizer registry populated",
163        );
164    }
165
166    #[tokio::test]
167    async fn captioners_authenticate_skips_when_no_cloud_configured() {
168        let (ctx, _g) = fresh_ctx().await;
169        let r = checks::CaptionersAuthenticate.run(&ctx).await;
170        assert_eq!(r.status, CheckStatus::Skip);
171    }
172
173    #[tokio::test]
174    async fn captioners_authenticate_probes_keyless_openai_compat() {
175        use wiremock::matchers::{method, path};
176        use wiremock::{Mock, MockServer, ResponseTemplate};
177
178        // A fake OpenAI-compatible server that answers the caption probe.
179        let server = MockServer::start().await;
180        Mock::given(method("POST"))
181            .and(path("/v1/chat/completions"))
182            .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
183                "id": "probe",
184                "object": "chat.completion",
185                "created": 0,
186                "model": "probe-model",
187                "choices": [{
188                    "index": 0,
189                    "message": {"role": "assistant", "content": "a small blue square"},
190                    "finish_reason": "stop"
191                }],
192                "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}
193            })))
194            .mount(&server)
195            .await;
196
197        let tmp = tempfile::tempdir().unwrap();
198        let db = Db::open(tmp.path().join("rover.db")).await.unwrap();
199        let mut cfg = Config::default();
200        cfg.output.dir = Some(tmp.path().to_path_buf());
201        // Keyless: no api_key_env. Trailing slash so this test is independent
202        // of Fix A.
203        cfg.captioners.insert(
204            "ollama".to_string(),
205            crate::config::CaptionerConfig {
206                kind: "cloud".into(),
207                provider: Some("openai_compat".into()),
208                model: Some("probe-model".into()),
209                base_url: Some(format!("{}/v1/", server.uri())),
210                api_key_env: None,
211            },
212        );
213        let ctx = CheckCtx {
214            config: Arc::new(cfg),
215            db,
216        };
217
218        let r = checks::CaptionersAuthenticate.run(&ctx).await;
219        // Before Fix B this returned Skip (the keyless captioner was filtered
220        // out). Now it must be probed and pass.
221        assert_eq!(r.status, CheckStatus::Ok, "detail: {:?}", r.detail);
222    }
223
224    #[test]
225    fn caption_probe_constants_are_sane() {
226        // Non-degenerate image and a budget that leaves room for output.
227        // Evaluated at compile time (const block) so the invariant is a build
228        // guarantee, not just a runtime check.
229        const {
230            assert!(
231                checks::CAPTION_PROBE_PNG.len() > 67,
232                "probe image must be larger than the old 1x1"
233            );
234            assert!(
235                checks::CAPTION_PROBE_MAX_TOKENS > 1,
236                "probe budget must exceed 1 token"
237            );
238        }
239    }
240
241    #[cfg(feature = "local-inference")]
242    #[tokio::test]
243    async fn local_inference_model_cached_skips_when_no_local_configured() {
244        let (ctx, _g) = fresh_ctx().await;
245        let r = checks::LocalInferenceModelCached.run(&ctx).await;
246        assert_eq!(r.status, CheckStatus::Skip);
247    }
248
249    #[cfg(feature = "injection-model")]
250    #[tokio::test]
251    async fn prompt_injection_model_check_skips_when_disabled() {
252        let (ctx, _g) = fresh_ctx().await; // default config: model = "disabled"
253        let r = checks::PromptInjectionModelCached.run(&ctx).await;
254        assert_eq!(r.status, CheckStatus::Skip);
255    }
256
257    #[cfg(feature = "local-inference")]
258    #[tokio::test]
259    #[allow(clippy::await_holding_lock)]
260    async fn local_model_integrity_passes_intact_and_fails_tampered() {
261        // Serialised against other HF_HOME-mutating tests.
262        let _lock = crate::model_integrity::HF_HOME_TEST_MUTEX
263            .lock()
264            .unwrap_or_else(|e| e.into_inner());
265        let tmp = tempfile::tempdir().unwrap();
266        let prior = std::env::var("HF_HOME").ok();
267        // SAFETY: serialised by HF_HOME_TEST_MUTEX; restored before return.
268        unsafe { std::env::set_var("HF_HOME", tmp.path()) };
269
270        // Build a minimal cache and record its manifest.
271        let snap = tmp
272            .path()
273            .join("hub")
274            .join("models--Acme--tiny")
275            .join("snapshots")
276            .join("rev1");
277        std::fs::create_dir_all(
278            tmp.path()
279                .join("hub")
280                .join("models--Acme--tiny")
281                .join("refs"),
282        )
283        .unwrap();
284        std::fs::write(
285            tmp.path()
286                .join("hub")
287                .join("models--Acme--tiny")
288                .join("refs")
289                .join("main"),
290            "rev1",
291        )
292        .unwrap();
293        std::fs::create_dir_all(&snap).unwrap();
294        std::fs::write(snap.join("model.safetensors"), b"weights").unwrap();
295        crate::model_integrity::bootstrap("Acme/tiny").unwrap();
296
297        let (ctx, _g) = fresh_ctx().await;
298        let r = checks::LocalModelIntegrity.run(&ctx).await;
299        assert_eq!(r.status, CheckStatus::Ok, "intact: {:?}", r.detail);
300
301        // Tamper → Fail, with the failing file named in the detail.
302        std::fs::write(snap.join("model.safetensors"), b"tampered").unwrap();
303        let r = checks::LocalModelIntegrity.run(&ctx).await;
304        assert_eq!(r.status, CheckStatus::Fail, "tampered: {:?}", r.detail);
305        assert!(r.detail.unwrap().contains("model.safetensors"));
306
307        // SAFETY: serialised by HF_HOME_TEST_MUTEX.
308        unsafe {
309            match prior {
310                Some(p) => std::env::set_var("HF_HOME", p),
311                None => std::env::remove_var("HF_HOME"),
312            }
313        }
314    }
315}