Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11#[derive(clap::Args)]
12#[command(after_long_help = "EXAMPLES:\n  \
13    # Check database health (connectivity, integrity, vector index)\n  \
14    sqlite-graphrag health\n\n  \
15    # Check health of a database at a custom path\n  \
16    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
17    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
18    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
19pub struct HealthArgs {
20    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
21    pub db: Option<String>,
22    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
23    #[arg(long, default_value_t = false)]
24    pub json: bool,
25    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
26    #[arg(long, value_parser = ["json", "text"], hide = true)]
27    pub format: Option<String>,
28}
29
30#[derive(Serialize)]
31struct HealthCounts {
32    memories: i64,
33    /// Alias of `memories` for the documented contract in AGENT_PROTOCOL.md.
34    memories_total: i64,
35    entities: i64,
36    relationships: i64,
37    vec_memories: i64,
38}
39
40#[derive(Serialize)]
41struct HealthCheck {
42    name: String,
43    ok: bool,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    detail: Option<String>,
46}
47
48#[derive(Serialize)]
49struct HealthResponse {
50    status: String,
51    integrity: String,
52    integrity_ok: bool,
53    schema_ok: bool,
54    vec_memories_ok: bool,
55    vec_entities_ok: bool,
56    vec_chunks_ok: bool,
57    fts_ok: bool,
58    /// Whether a live FTS5 MATCH query against fts_memories succeeded.
59    fts_query_ok: bool,
60    model_ok: bool,
61    counts: HealthCounts,
62    db_path: String,
63    db_size_bytes: u64,
64    /// MAX(version) from refinery_schema_history — number of the last applied migration.
65    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
66    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
67    schema_version: u32,
68    /// List of entities referenced by memories but absent from the entities table.
69    /// Empty in a healthy DB. Per the contract documented in AGENT_PROTOCOL.md.
70    missing_entities: Vec<String>,
71    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
72    wal_size_mb: f64,
73    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
74    journal_mode: String,
75    /// SQLite version string, e.g. `"3.46.0"`.
76    sqlite_version: String,
77    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
78    /// Omitted when there are no relationships in the database.
79    #[serde(skip_serializing_if = "Option::is_none")]
80    mentions_ratio: Option<f64>,
81    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
82    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    mentions_warning: Option<String>,
85    checks: Vec<HealthCheck>,
86    elapsed_ms: u64,
87}
88
89/// Checks whether a table (including virtual ones) exists in sqlite_master.
90fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
91    conn.query_row(
92        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
93        rusqlite::params![table_name],
94        |r| r.get::<_, i64>(0),
95    )
96    .unwrap_or(0)
97        > 0
98}
99
100pub fn run(args: HealthArgs) -> Result<(), AppError> {
101    let start = Instant::now();
102    let _ = args.json; // --json is a no-op because output is already JSON by default
103    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
104    let paths = AppPaths::resolve(args.db.as_deref())?;
105
106    crate::storage::connection::ensure_db_ready(&paths)?;
107
108    let conn = open_ro(&paths.db)?;
109
110    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
111    let integrity_ok = integrity == "ok";
112
113    if !integrity_ok {
114        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
115        output::emit_json(&HealthResponse {
116            status: "degraded".to_string(),
117            integrity: integrity.clone(),
118            integrity_ok: false,
119            schema_ok: false,
120            vec_memories_ok: false,
121            vec_entities_ok: false,
122            vec_chunks_ok: false,
123            fts_ok: false,
124            fts_query_ok: false,
125            model_ok: false,
126            counts: HealthCounts {
127                memories: 0,
128                memories_total: 0,
129                entities: 0,
130                relationships: 0,
131                vec_memories: 0,
132            },
133            db_path: paths.db.display().to_string(),
134            db_size_bytes,
135            schema_version: 0,
136            sqlite_version: "unknown".to_string(),
137            missing_entities: vec![],
138            wal_size_mb: 0.0,
139            journal_mode: "unknown".to_string(),
140            mentions_ratio: None,
141            mentions_warning: None,
142            checks: vec![HealthCheck {
143                name: "integrity".to_string(),
144                ok: false,
145                detail: Some(integrity),
146            }],
147            elapsed_ms: start.elapsed().as_millis() as u64,
148        })?;
149        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
150            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
151            Some("integrity check failed".to_string()),
152        )));
153    }
154
155    let memories_count: i64 = conn.query_row(
156        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
157        [],
158        |r| r.get(0),
159    )?;
160    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
161    let relationships_count: i64 =
162        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
163    let vec_memories_count: i64 =
164        conn.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))?;
165
166    let mentions_count: i64 = conn.query_row(
167        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
168        [],
169        |r| r.get(0),
170    )?;
171    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
172        let ratio = mentions_count as f64 / relationships_count as f64;
173        let warning = if ratio > 0.5 {
174            Some(format!(
175                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
176                ratio * 100.0,
177                mentions_count,
178                relationships_count
179            ))
180        } else {
181            None
182        };
183        (Some(ratio), warning)
184    } else {
185        (None, None)
186    };
187
188    let status = "ok";
189
190    let schema_version: u32 = conn
191        .query_row(
192            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
193            [],
194            |r| r.get::<_, i64>(0),
195        )
196        .unwrap_or(0) as u32;
197
198    let schema_ok = schema_version > 0;
199
200    // Checks vector tables via sqlite_master
201    let vec_memories_ok = table_exists(&conn, "vec_memories");
202    let vec_entities_ok = table_exists(&conn, "vec_entities");
203    let vec_chunks_ok = table_exists(&conn, "vec_chunks");
204    let fts_ok = table_exists(&conn, "fts_memories");
205
206    // Verifies that FTS5 can execute a MATCH query (catches index corruption distinct from table absence).
207    let fts_query_ok = if fts_ok {
208        conn.query_row(
209            "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
210            [],
211            |r| r.get::<_, i64>(0),
212        )
213        .is_ok()
214    } else {
215        false
216    };
217
218    // Captures the SQLite runtime version for observability.
219    let sqlite_version: String = conn
220        .query_row("SELECT sqlite_version()", [], |r| r.get(0))
221        .unwrap_or_else(|_| "unknown".to_string());
222
223    // Detects orphan entities referenced by memories but absent from the entities table.
224    let mut missing_entities: Vec<String> = Vec::new();
225    let mut stmt = conn.prepare(
226        "SELECT DISTINCT me.entity_id
227         FROM memory_entities me
228         LEFT JOIN entities e ON e.id = me.entity_id
229         WHERE e.id IS NULL",
230    )?;
231    let orphans: Vec<i64> = stmt
232        .query_map([], |r| r.get(0))?
233        .collect::<Result<Vec<_>, _>>()?;
234    for id in orphans {
235        missing_entities.push(format!("entity_id={id}"));
236    }
237
238    let journal_mode: String = conn
239        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
240        .unwrap_or_else(|_| "unknown".to_string());
241
242    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
243        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
244        .unwrap_or(0.0);
245
246    // Database file size in bytes
247    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
248
249    // Checks whether the ONNX model is present in the cache
250    let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
251    let model_ok = model_dir.exists();
252
253    // Builds the checks array for detailed diagnostics
254    let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
255
256    // At this point integrity_ok is always true (corrupt DB returned early above).
257    checks.push(HealthCheck {
258        name: "integrity".to_string(),
259        ok: true,
260        detail: None,
261    });
262
263    checks.push(HealthCheck {
264        name: "schema_version".to_string(),
265        ok: schema_ok,
266        detail: if schema_ok {
267            None
268        } else {
269            Some(format!("schema_version={schema_version} (expected >0)"))
270        },
271    });
272
273    checks.push(HealthCheck {
274        name: "vec_memories".to_string(),
275        ok: vec_memories_ok,
276        detail: if vec_memories_ok {
277            None
278        } else {
279            Some("vec_memories table missing from sqlite_master".to_string())
280        },
281    });
282
283    checks.push(HealthCheck {
284        name: "vec_entities".to_string(),
285        ok: vec_entities_ok,
286        detail: if vec_entities_ok {
287            None
288        } else {
289            Some("vec_entities table missing from sqlite_master".to_string())
290        },
291    });
292
293    checks.push(HealthCheck {
294        name: "vec_chunks".to_string(),
295        ok: vec_chunks_ok,
296        detail: if vec_chunks_ok {
297            None
298        } else {
299            Some("vec_chunks table missing from sqlite_master".to_string())
300        },
301    });
302
303    checks.push(HealthCheck {
304        name: "fts_memories".to_string(),
305        ok: fts_ok,
306        detail: if fts_ok {
307            None
308        } else {
309            Some("fts_memories table missing from sqlite_master".to_string())
310        },
311    });
312
313    checks.push(HealthCheck {
314        name: "fts_query".to_string(),
315        ok: fts_query_ok,
316        detail: if fts_query_ok {
317            None
318        } else {
319            Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
320        },
321    });
322
323    checks.push(HealthCheck {
324        name: "model_onnx".to_string(),
325        ok: model_ok,
326        detail: if model_ok {
327            None
328        } else {
329            Some(format!(
330                "model missing at {}; run 'sqlite-graphrag models download'",
331                model_dir.display()
332            ))
333        },
334    });
335
336    let response = HealthResponse {
337        status: status.to_string(),
338        integrity,
339        integrity_ok,
340        schema_ok,
341        vec_memories_ok,
342        vec_entities_ok,
343        vec_chunks_ok,
344        fts_ok,
345        fts_query_ok,
346        model_ok,
347        counts: HealthCounts {
348            memories: memories_count,
349            memories_total: memories_count,
350            entities: entities_count,
351            relationships: relationships_count,
352            vec_memories: vec_memories_count,
353        },
354        db_path: paths.db.display().to_string(),
355        db_size_bytes,
356        schema_version,
357        sqlite_version,
358        missing_entities,
359        wal_size_mb,
360        journal_mode,
361        mentions_ratio,
362        mentions_warning,
363        checks,
364        elapsed_ms: start.elapsed().as_millis() as u64,
365    };
366
367    output::emit_json(&response)?;
368
369    Ok(())
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    #[test]
377    fn health_check_serializes_all_new_fields() {
378        let response = HealthResponse {
379            status: "ok".to_string(),
380            integrity: "ok".to_string(),
381            integrity_ok: true,
382            schema_ok: true,
383            vec_memories_ok: true,
384            vec_entities_ok: true,
385            vec_chunks_ok: true,
386            fts_ok: true,
387            fts_query_ok: true,
388            model_ok: false,
389            counts: HealthCounts {
390                memories: 5,
391                memories_total: 5,
392                entities: 3,
393                relationships: 2,
394                vec_memories: 5,
395            },
396            db_path: "/tmp/test.sqlite".to_string(),
397            db_size_bytes: 4096,
398            schema_version: 6,
399            sqlite_version: "3.46.0".to_string(),
400            elapsed_ms: 0,
401            missing_entities: vec![],
402            wal_size_mb: 0.0,
403            journal_mode: "wal".to_string(),
404            mentions_ratio: None,
405            mentions_warning: None,
406            checks: vec![
407                HealthCheck {
408                    name: "integrity".to_string(),
409                    ok: true,
410                    detail: None,
411                },
412                HealthCheck {
413                    name: "model_onnx".to_string(),
414                    ok: false,
415                    detail: Some("model missing".to_string()),
416                },
417            ],
418        };
419
420        let json = serde_json::to_value(&response).unwrap();
421        assert_eq!(json["status"], "ok");
422        assert_eq!(json["integrity_ok"], true);
423        assert_eq!(json["schema_ok"], true);
424        assert_eq!(json["vec_memories_ok"], true);
425        assert_eq!(json["vec_entities_ok"], true);
426        assert_eq!(json["vec_chunks_ok"], true);
427        assert_eq!(json["fts_ok"], true);
428        assert_eq!(json["model_ok"], false);
429        assert_eq!(json["db_size_bytes"], 4096u64);
430        assert!(json["checks"].is_array());
431        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
432
433        // Verifies that detail is absent when ok=true (skip_serializing_if)
434        let integrity_check = &json["checks"][0];
435        assert_eq!(integrity_check["name"], "integrity");
436        assert_eq!(integrity_check["ok"], true);
437        assert!(integrity_check.get("detail").is_none());
438
439        // Verifies that detail is present when ok=false
440        let model_check = &json["checks"][1];
441        assert_eq!(model_check["name"], "model_onnx");
442        assert_eq!(model_check["ok"], false);
443        assert_eq!(model_check["detail"], "model missing");
444    }
445
446    #[test]
447    fn health_check_without_detail_omits_field() {
448        let check = HealthCheck {
449            name: "vec_memories".to_string(),
450            ok: true,
451            detail: None,
452        };
453        let json = serde_json::to_value(&check).unwrap();
454        assert!(
455            json.get("detail").is_none(),
456            "detail field must be omitted when None"
457        );
458    }
459
460    #[test]
461    fn health_check_with_detail_serializes_field() {
462        let check = HealthCheck {
463            name: "fts_memories".to_string(),
464            ok: false,
465            detail: Some("fts_memories table missing from sqlite_master".to_string()),
466        };
467        let json = serde_json::to_value(&check).unwrap();
468        assert_eq!(
469            json["detail"],
470            "fts_memories table missing from sqlite_master"
471        );
472    }
473
474    #[test]
475    fn health_response_fts_query_ok_and_sqlite_version_serialize() {
476        // Verifies that fts_query_ok and sqlite_version appear in the serialized JSON
477        // with the expected keys and values.
478        let response = HealthResponse {
479            status: "ok".to_string(),
480            integrity: "ok".to_string(),
481            integrity_ok: true,
482            schema_ok: true,
483            vec_memories_ok: true,
484            vec_entities_ok: true,
485            vec_chunks_ok: true,
486            fts_ok: true,
487            fts_query_ok: true,
488            model_ok: true,
489            counts: HealthCounts {
490                memories: 0,
491                memories_total: 0,
492                entities: 0,
493                relationships: 0,
494                vec_memories: 0,
495            },
496            db_path: "/tmp/test.sqlite".to_string(),
497            db_size_bytes: 0,
498            schema_version: 1,
499            sqlite_version: "3.45.1".to_string(),
500            elapsed_ms: 0,
501            missing_entities: vec![],
502            wal_size_mb: 0.0,
503            journal_mode: "wal".to_string(),
504            mentions_ratio: None,
505            mentions_warning: None,
506            checks: vec![],
507        };
508
509        let json = serde_json::to_value(&response).unwrap();
510
511        // fts_query_ok must appear at the top level
512        assert_eq!(
513            json["fts_query_ok"], true,
514            "fts_query_ok must be present and true in serialized JSON"
515        );
516
517        // sqlite_version must appear at the top level with the exact string
518        assert_eq!(
519            json["sqlite_version"], "3.45.1",
520            "sqlite_version must be present and match the provided string"
521        );
522
523        // Verify fts_query_ok=false path includes the expected detail message
524        let check_fail = HealthCheck {
525            name: "fts_query".to_string(),
526            ok: false,
527            detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
528        };
529        let check_json = serde_json::to_value(&check_fail).unwrap();
530        assert_eq!(check_json["name"], "fts_query");
531        assert_eq!(check_json["ok"], false);
532        assert_eq!(
533            check_json["detail"],
534            "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
535        );
536    }
537}