Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11#[derive(clap::Args)]
12#[command(after_long_help = "EXAMPLES:\n  \
13    # Check database health (connectivity, integrity, vector index)\n  \
14    sqlite-graphrag health\n\n  \
15    # Check health of a database at a custom path\n  \
16    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
17    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
18    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
19pub struct HealthArgs {
20    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
21    pub db: Option<String>,
22    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
23    #[arg(long, default_value_t = false)]
24    pub json: bool,
25    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
26    #[arg(long, value_parser = ["json", "text"], hide = true)]
27    pub format: Option<String>,
28}
29
30#[derive(Serialize)]
31struct HealthCounts {
32    memories: i64,
33    /// Alias of `memories` for the documented contract in AGENT_PROTOCOL.md.
34    memories_total: i64,
35    entities: i64,
36    relationships: i64,
37    vec_memories: i64,
38}
39
40#[derive(Serialize)]
41struct HealthCheck {
42    name: String,
43    ok: bool,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    detail: Option<String>,
46}
47
48#[derive(Serialize)]
49struct HealthResponse {
50    status: String,
51    integrity: String,
52    integrity_ok: bool,
53    schema_ok: bool,
54    vec_memories_ok: bool,
55    vec_entities_ok: bool,
56    vec_chunks_ok: bool,
57    fts_ok: bool,
58    /// Whether a live FTS5 MATCH query against fts_memories succeeded.
59    fts_query_ok: bool,
60    model_ok: bool,
61    counts: HealthCounts,
62    db_path: String,
63    db_size_bytes: u64,
64    /// MAX(version) from refinery_schema_history — number of the last applied migration.
65    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
66    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
67    schema_version: u32,
68    /// List of entities referenced by memories but absent from the entities table.
69    /// Empty in a healthy DB. Per the contract documented in AGENT_PROTOCOL.md.
70    missing_entities: Vec<String>,
71    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
72    wal_size_mb: f64,
73    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
74    journal_mode: String,
75    /// SQLite version string, e.g. `"3.46.0"`.
76    sqlite_version: String,
77    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
78    /// Omitted when there are no relationships in the database.
79    #[serde(skip_serializing_if = "Option::is_none")]
80    mentions_ratio: Option<f64>,
81    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
82    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    mentions_warning: Option<String>,
85    checks: Vec<HealthCheck>,
86    elapsed_ms: u64,
87}
88
89/// Checks whether a table (including virtual ones) exists in sqlite_master.
90fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
91    conn.query_row(
92        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
93        rusqlite::params![table_name],
94        |r| r.get::<_, i64>(0),
95    )
96    .unwrap_or(0)
97        > 0
98}
99
100pub fn run(args: HealthArgs) -> Result<(), AppError> {
101    let start = Instant::now();
102    let _ = args.json; // --json is a no-op because output is already JSON by default
103    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
104    let paths = AppPaths::resolve(args.db.as_deref())?;
105
106    crate::storage::connection::ensure_db_ready(&paths)?;
107
108    let conn = open_ro(&paths.db)?;
109
110    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
111    let integrity_ok = integrity == "ok";
112    tracing::info!(integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
113
114    if !integrity_ok {
115        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
116        output::emit_json(&HealthResponse {
117            status: "degraded".to_string(),
118            integrity: integrity.clone(),
119            integrity_ok: false,
120            schema_ok: false,
121            vec_memories_ok: false,
122            vec_entities_ok: false,
123            vec_chunks_ok: false,
124            fts_ok: false,
125            fts_query_ok: false,
126            model_ok: false,
127            counts: HealthCounts {
128                memories: 0,
129                memories_total: 0,
130                entities: 0,
131                relationships: 0,
132                vec_memories: 0,
133            },
134            db_path: paths.db.display().to_string(),
135            db_size_bytes,
136            schema_version: 0,
137            sqlite_version: "unknown".to_string(),
138            missing_entities: vec![],
139            wal_size_mb: 0.0,
140            journal_mode: "unknown".to_string(),
141            mentions_ratio: None,
142            mentions_warning: None,
143            checks: vec![HealthCheck {
144                name: "integrity".to_string(),
145                ok: false,
146                detail: Some(integrity),
147            }],
148            elapsed_ms: start.elapsed().as_millis() as u64,
149        })?;
150        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
151            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
152            Some("integrity check failed".to_string()),
153        )));
154    }
155
156    let memories_count: i64 = conn.query_row(
157        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
158        [],
159        |r| r.get(0),
160    )?;
161    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
162    let relationships_count: i64 =
163        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
164    let vec_memories_count: i64 =
165        conn.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))?;
166
167    let mentions_count: i64 = conn.query_row(
168        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
169        [],
170        |r| r.get(0),
171    )?;
172    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
173        let ratio = mentions_count as f64 / relationships_count as f64;
174        let warning = if ratio > 0.5 {
175            Some(format!(
176                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
177                ratio * 100.0,
178                mentions_count,
179                relationships_count
180            ))
181        } else {
182            None
183        };
184        (Some(ratio), warning)
185    } else {
186        (None, None)
187    };
188
189    let status = "ok";
190
191    let schema_version: u32 = conn
192        .query_row(
193            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
194            [],
195            |r| r.get::<_, i64>(0),
196        )
197        .unwrap_or(0) as u32;
198
199    let schema_ok = schema_version > 0;
200
201    // Checks vector tables via sqlite_master
202    let vec_memories_ok = table_exists(&conn, "vec_memories");
203    let vec_entities_ok = table_exists(&conn, "vec_entities");
204    let vec_chunks_ok = table_exists(&conn, "vec_chunks");
205    tracing::info!(vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, "vector table checks complete");
206    let fts_ok = table_exists(&conn, "fts_memories");
207
208    // Verifies that FTS5 can execute a MATCH query (catches index corruption distinct from table absence).
209    let fts_query_ok = if fts_ok {
210        conn.query_row(
211            "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
212            [],
213            |r| r.get::<_, i64>(0),
214        )
215        .is_ok()
216    } else {
217        false
218    };
219
220    tracing::info!(fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
221
222    // Captures the SQLite runtime version for observability.
223    let sqlite_version: String = conn
224        .query_row("SELECT sqlite_version()", [], |r| r.get(0))
225        .unwrap_or_else(|_| "unknown".to_string());
226
227    // Detects orphan entities referenced by memories but absent from the entities table.
228    let mut missing_entities: Vec<String> = Vec::new();
229    let mut stmt = conn.prepare(
230        "SELECT DISTINCT me.entity_id
231         FROM memory_entities me
232         LEFT JOIN entities e ON e.id = me.entity_id
233         WHERE e.id IS NULL",
234    )?;
235    let orphans: Vec<i64> = stmt
236        .query_map([], |r| r.get(0))?
237        .collect::<Result<Vec<_>, _>>()?;
238    for id in orphans {
239        missing_entities.push(format!("entity_id={id}"));
240    }
241
242    let journal_mode: String = conn
243        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
244        .unwrap_or_else(|_| "unknown".to_string());
245
246    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
247        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
248        .unwrap_or(0.0);
249
250    // Database file size in bytes
251    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
252
253    // Checks whether the ONNX model is present in the cache
254    let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
255    let model_ok = model_dir.exists();
256
257    // Builds the checks array for detailed diagnostics
258    let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
259
260    // At this point integrity_ok is always true (corrupt DB returned early above).
261    checks.push(HealthCheck {
262        name: "integrity".to_string(),
263        ok: true,
264        detail: None,
265    });
266
267    checks.push(HealthCheck {
268        name: "schema_version".to_string(),
269        ok: schema_ok,
270        detail: if schema_ok {
271            None
272        } else {
273            Some(format!("schema_version={schema_version} (expected >0)"))
274        },
275    });
276
277    checks.push(HealthCheck {
278        name: "vec_memories".to_string(),
279        ok: vec_memories_ok,
280        detail: if vec_memories_ok {
281            None
282        } else {
283            Some("vec_memories table missing from sqlite_master".to_string())
284        },
285    });
286
287    checks.push(HealthCheck {
288        name: "vec_entities".to_string(),
289        ok: vec_entities_ok,
290        detail: if vec_entities_ok {
291            None
292        } else {
293            Some("vec_entities table missing from sqlite_master".to_string())
294        },
295    });
296
297    checks.push(HealthCheck {
298        name: "vec_chunks".to_string(),
299        ok: vec_chunks_ok,
300        detail: if vec_chunks_ok {
301            None
302        } else {
303            Some("vec_chunks table missing from sqlite_master".to_string())
304        },
305    });
306
307    checks.push(HealthCheck {
308        name: "fts_memories".to_string(),
309        ok: fts_ok,
310        detail: if fts_ok {
311            None
312        } else {
313            Some("fts_memories table missing from sqlite_master".to_string())
314        },
315    });
316
317    checks.push(HealthCheck {
318        name: "fts_query".to_string(),
319        ok: fts_query_ok,
320        detail: if fts_query_ok {
321            None
322        } else {
323            Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
324        },
325    });
326
327    checks.push(HealthCheck {
328        name: "model_onnx".to_string(),
329        ok: model_ok,
330        detail: if model_ok {
331            None
332        } else {
333            Some(format!(
334                "model missing at {}; run 'sqlite-graphrag models download'",
335                model_dir.display()
336            ))
337        },
338    });
339
340    let response = HealthResponse {
341        status: status.to_string(),
342        integrity,
343        integrity_ok,
344        schema_ok,
345        vec_memories_ok,
346        vec_entities_ok,
347        vec_chunks_ok,
348        fts_ok,
349        fts_query_ok,
350        model_ok,
351        counts: HealthCounts {
352            memories: memories_count,
353            memories_total: memories_count,
354            entities: entities_count,
355            relationships: relationships_count,
356            vec_memories: vec_memories_count,
357        },
358        db_path: paths.db.display().to_string(),
359        db_size_bytes,
360        schema_version,
361        sqlite_version,
362        missing_entities,
363        wal_size_mb,
364        journal_mode,
365        mentions_ratio,
366        mentions_warning,
367        checks,
368        elapsed_ms: start.elapsed().as_millis() as u64,
369    };
370
371    output::emit_json(&response)?;
372
373    Ok(())
374}
375
376#[cfg(test)]
377mod tests {
378    use super::*;
379
380    #[test]
381    fn health_check_serializes_all_new_fields() {
382        let response = HealthResponse {
383            status: "ok".to_string(),
384            integrity: "ok".to_string(),
385            integrity_ok: true,
386            schema_ok: true,
387            vec_memories_ok: true,
388            vec_entities_ok: true,
389            vec_chunks_ok: true,
390            fts_ok: true,
391            fts_query_ok: true,
392            model_ok: false,
393            counts: HealthCounts {
394                memories: 5,
395                memories_total: 5,
396                entities: 3,
397                relationships: 2,
398                vec_memories: 5,
399            },
400            db_path: "/tmp/test.sqlite".to_string(),
401            db_size_bytes: 4096,
402            schema_version: 6,
403            sqlite_version: "3.46.0".to_string(),
404            elapsed_ms: 0,
405            missing_entities: vec![],
406            wal_size_mb: 0.0,
407            journal_mode: "wal".to_string(),
408            mentions_ratio: None,
409            mentions_warning: None,
410            checks: vec![
411                HealthCheck {
412                    name: "integrity".to_string(),
413                    ok: true,
414                    detail: None,
415                },
416                HealthCheck {
417                    name: "model_onnx".to_string(),
418                    ok: false,
419                    detail: Some("model missing".to_string()),
420                },
421            ],
422        };
423
424        let json = serde_json::to_value(&response).unwrap();
425        assert_eq!(json["status"], "ok");
426        assert_eq!(json["integrity_ok"], true);
427        assert_eq!(json["schema_ok"], true);
428        assert_eq!(json["vec_memories_ok"], true);
429        assert_eq!(json["vec_entities_ok"], true);
430        assert_eq!(json["vec_chunks_ok"], true);
431        assert_eq!(json["fts_ok"], true);
432        assert_eq!(json["model_ok"], false);
433        assert_eq!(json["db_size_bytes"], 4096u64);
434        assert!(json["checks"].is_array());
435        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
436
437        // Verifies that detail is absent when ok=true (skip_serializing_if)
438        let integrity_check = &json["checks"][0];
439        assert_eq!(integrity_check["name"], "integrity");
440        assert_eq!(integrity_check["ok"], true);
441        assert!(integrity_check.get("detail").is_none());
442
443        // Verifies that detail is present when ok=false
444        let model_check = &json["checks"][1];
445        assert_eq!(model_check["name"], "model_onnx");
446        assert_eq!(model_check["ok"], false);
447        assert_eq!(model_check["detail"], "model missing");
448    }
449
450    #[test]
451    fn health_check_without_detail_omits_field() {
452        let check = HealthCheck {
453            name: "vec_memories".to_string(),
454            ok: true,
455            detail: None,
456        };
457        let json = serde_json::to_value(&check).unwrap();
458        assert!(
459            json.get("detail").is_none(),
460            "detail field must be omitted when None"
461        );
462    }
463
464    #[test]
465    fn health_check_with_detail_serializes_field() {
466        let check = HealthCheck {
467            name: "fts_memories".to_string(),
468            ok: false,
469            detail: Some("fts_memories table missing from sqlite_master".to_string()),
470        };
471        let json = serde_json::to_value(&check).unwrap();
472        assert_eq!(
473            json["detail"],
474            "fts_memories table missing from sqlite_master"
475        );
476    }
477
478    #[test]
479    fn health_response_fts_query_ok_and_sqlite_version_serialize() {
480        // Verifies that fts_query_ok and sqlite_version appear in the serialized JSON
481        // with the expected keys and values.
482        let response = HealthResponse {
483            status: "ok".to_string(),
484            integrity: "ok".to_string(),
485            integrity_ok: true,
486            schema_ok: true,
487            vec_memories_ok: true,
488            vec_entities_ok: true,
489            vec_chunks_ok: true,
490            fts_ok: true,
491            fts_query_ok: true,
492            model_ok: true,
493            counts: HealthCounts {
494                memories: 0,
495                memories_total: 0,
496                entities: 0,
497                relationships: 0,
498                vec_memories: 0,
499            },
500            db_path: "/tmp/test.sqlite".to_string(),
501            db_size_bytes: 0,
502            schema_version: 1,
503            sqlite_version: "3.45.1".to_string(),
504            elapsed_ms: 0,
505            missing_entities: vec![],
506            wal_size_mb: 0.0,
507            journal_mode: "wal".to_string(),
508            mentions_ratio: None,
509            mentions_warning: None,
510            checks: vec![],
511        };
512
513        let json = serde_json::to_value(&response).unwrap();
514
515        // fts_query_ok must appear at the top level
516        assert_eq!(
517            json["fts_query_ok"], true,
518            "fts_query_ok must be present and true in serialized JSON"
519        );
520
521        // sqlite_version must appear at the top level with the exact string
522        assert_eq!(
523            json["sqlite_version"], "3.45.1",
524            "sqlite_version must be present and match the provided string"
525        );
526
527        // Verify fts_query_ok=false path includes the expected detail message
528        let check_fail = HealthCheck {
529            name: "fts_query".to_string(),
530            ok: false,
531            detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
532        };
533        let check_json = serde_json::to_value(&check_fail).unwrap();
534        assert_eq!(check_json["name"], "fts_query");
535        assert_eq!(check_json["ok"], false);
536        assert_eq!(
537            check_json["detail"],
538            "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
539        );
540    }
541}