Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11#[derive(clap::Args)]
12#[command(after_long_help = "EXAMPLES:\n  \
13    # Check database health (connectivity, integrity, vector index)\n  \
14    sqlite-graphrag health\n\n  \
15    # Check health of a database at a custom path\n  \
16    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
17    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
18    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
19pub struct HealthArgs {
20    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
21    pub db: Option<String>,
22    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
23    #[arg(long, default_value_t = false)]
24    pub json: bool,
25    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
26    #[arg(long, value_parser = ["json", "text"], hide = true)]
27    pub format: Option<String>,
28}
29
30#[derive(Serialize)]
31struct HealthCounts {
32    memories: i64,
33    /// Alias of `memories` for the documented contract in AGENT_PROTOCOL.md.
34    memories_total: i64,
35    entities: i64,
36    relationships: i64,
37    vec_memories: i64,
38}
39
40#[derive(Serialize)]
41struct HealthCheck {
42    name: String,
43    ok: bool,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    detail: Option<String>,
46}
47
48#[derive(Serialize)]
49struct HealthResponse {
50    status: String,
51    integrity: String,
52    integrity_ok: bool,
53    schema_ok: bool,
54    vec_memories_ok: bool,
55    vec_entities_ok: bool,
56    vec_chunks_ok: bool,
57    fts_ok: bool,
58    /// Whether a live FTS5 MATCH query against fts_memories succeeded.
59    fts_query_ok: bool,
60    model_ok: bool,
61    counts: HealthCounts,
62    db_path: String,
63    db_size_bytes: u64,
64    /// MAX(version) from refinery_schema_history — number of the last applied migration.
65    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
66    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
67    schema_version: u32,
68    /// List of entities referenced by memories but absent from the entities table.
69    /// Empty in a healthy DB. Per the contract documented in AGENT_PROTOCOL.md.
70    missing_entities: Vec<String>,
71    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
72    wal_size_mb: f64,
73    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
74    journal_mode: String,
75    /// SQLite version string, e.g. `"3.46.0"`.
76    sqlite_version: String,
77    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
78    /// Omitted when there are no relationships in the database.
79    #[serde(skip_serializing_if = "Option::is_none")]
80    mentions_ratio: Option<f64>,
81    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
82    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    mentions_warning: Option<String>,
85    checks: Vec<HealthCheck>,
86    elapsed_ms: u64,
87}
88
89/// Checks whether a table (including virtual ones) exists in sqlite_master.
90fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
91    conn.query_row(
92        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
93        rusqlite::params![table_name],
94        |r| r.get::<_, i64>(0),
95    )
96    .unwrap_or(0)
97        > 0
98}
99
100pub fn run(args: HealthArgs) -> Result<(), AppError> {
101    let start = Instant::now();
102    let _ = args.json; // --json is a no-op because output is already JSON by default
103    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
104    let paths = AppPaths::resolve(args.db.as_deref())?;
105
106    crate::storage::connection::ensure_db_ready(&paths)?;
107
108    let conn = open_ro(&paths.db)?;
109
110    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
111    let integrity_ok = integrity == "ok";
112    tracing::info!(integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
113
114    if !integrity_ok {
115        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
116        output::emit_json(&HealthResponse {
117            status: "degraded".to_string(),
118            integrity: integrity.clone(),
119            integrity_ok: false,
120            schema_ok: false,
121            vec_memories_ok: false,
122            vec_entities_ok: false,
123            vec_chunks_ok: false,
124            fts_ok: false,
125            fts_query_ok: false,
126            model_ok: false,
127            counts: HealthCounts {
128                memories: 0,
129                memories_total: 0,
130                entities: 0,
131                relationships: 0,
132                vec_memories: 0,
133            },
134            db_path: paths.db.display().to_string(),
135            db_size_bytes,
136            schema_version: 0,
137            sqlite_version: "unknown".to_string(),
138            missing_entities: vec![],
139            wal_size_mb: 0.0,
140            journal_mode: "unknown".to_string(),
141            mentions_ratio: None,
142            mentions_warning: None,
143            checks: vec![HealthCheck {
144                name: "integrity".to_string(),
145                ok: false,
146                detail: Some(integrity),
147            }],
148            elapsed_ms: start.elapsed().as_millis() as u64,
149        })?;
150        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
151            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
152            Some("integrity check failed".to_string()),
153        )));
154    }
155
156    let memories_count: i64 = conn.query_row(
157        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
158        [],
159        |r| r.get(0),
160    )?;
161    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
162    let relationships_count: i64 =
163        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
164    let vec_memories_count: i64 =
165        conn.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))?;
166
167    let mentions_count: i64 = conn.query_row(
168        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
169        [],
170        |r| r.get(0),
171    )?;
172    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
173        let ratio = mentions_count as f64 / relationships_count as f64;
174        let warning = if ratio > 0.5 {
175            Some(format!(
176                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
177                ratio * 100.0,
178                mentions_count,
179                relationships_count
180            ))
181        } else {
182            None
183        };
184        (Some(ratio), warning)
185    } else {
186        (None, None)
187    };
188
189    let status = "ok";
190
191    let schema_version: u32 = conn
192        .query_row(
193            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
194            [],
195            |r| r.get::<_, i64>(0),
196        )
197        .unwrap_or(0) as u32;
198
199    let schema_ok = schema_version > 0;
200
201    // Checks vector tables via sqlite_master
202    let vec_memories_ok = table_exists(&conn, "vec_memories");
203    let vec_entities_ok = table_exists(&conn, "vec_entities");
204    let vec_chunks_ok = table_exists(&conn, "vec_chunks");
205    tracing::info!(vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, "vector table checks complete");
206    let fts_ok = table_exists(&conn, "fts_memories");
207
208    // Verifies that FTS5 can execute a MATCH query (catches index corruption distinct from table absence).
209    let fts_query_ok = if fts_ok {
210        conn.query_row(
211            "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
212            [],
213            |r| r.get::<_, i64>(0),
214        )
215        .is_ok()
216    } else {
217        false
218    };
219
220    tracing::info!(fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
221
222    // Captures the SQLite runtime version for observability.
223    let sqlite_version: String = conn
224        .query_row("SELECT sqlite_version()", [], |r| r.get(0))
225        .unwrap_or_else(|_| "unknown".to_string());
226
227    // Detects orphan entities referenced by memories but absent from the entities table.
228    let mut missing_entities: Vec<String> = Vec::new();
229    let mut stmt = conn.prepare(
230        "SELECT DISTINCT me.entity_id
231         FROM memory_entities me
232         LEFT JOIN entities e ON e.id = me.entity_id
233         WHERE e.id IS NULL",
234    )?;
235    let orphans: Vec<i64> = stmt
236        .query_map([], |r| r.get(0))?
237        .collect::<Result<Vec<_>, _>>()?;
238    for id in orphans {
239        missing_entities.push(format!("entity_id={id}"));
240    }
241
242    let journal_mode: String = conn
243        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
244        .unwrap_or_else(|_| "unknown".to_string());
245
246    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
247        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
248        .unwrap_or(0.0);
249
250    // Database file size in bytes
251    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
252
253    // Checks whether the ONNX model is present in the cache
254    let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
255    let model_ok = model_dir.exists();
256    tracing::info!(model_ok = %model_ok, "embedding model check complete");
257
258    // Builds the checks array for detailed diagnostics
259    let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
260
261    // At this point integrity_ok is always true (corrupt DB returned early above).
262    checks.push(HealthCheck {
263        name: "integrity".to_string(),
264        ok: true,
265        detail: None,
266    });
267
268    checks.push(HealthCheck {
269        name: "schema_version".to_string(),
270        ok: schema_ok,
271        detail: if schema_ok {
272            None
273        } else {
274            Some(format!("schema_version={schema_version} (expected >0)"))
275        },
276    });
277
278    checks.push(HealthCheck {
279        name: "vec_memories".to_string(),
280        ok: vec_memories_ok,
281        detail: if vec_memories_ok {
282            None
283        } else {
284            Some("vec_memories table missing from sqlite_master".to_string())
285        },
286    });
287
288    checks.push(HealthCheck {
289        name: "vec_entities".to_string(),
290        ok: vec_entities_ok,
291        detail: if vec_entities_ok {
292            None
293        } else {
294            Some("vec_entities table missing from sqlite_master".to_string())
295        },
296    });
297
298    checks.push(HealthCheck {
299        name: "vec_chunks".to_string(),
300        ok: vec_chunks_ok,
301        detail: if vec_chunks_ok {
302            None
303        } else {
304            Some("vec_chunks table missing from sqlite_master".to_string())
305        },
306    });
307
308    checks.push(HealthCheck {
309        name: "fts_memories".to_string(),
310        ok: fts_ok,
311        detail: if fts_ok {
312            None
313        } else {
314            Some("fts_memories table missing from sqlite_master".to_string())
315        },
316    });
317
318    checks.push(HealthCheck {
319        name: "fts_query".to_string(),
320        ok: fts_query_ok,
321        detail: if fts_query_ok {
322            None
323        } else {
324            Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
325        },
326    });
327
328    checks.push(HealthCheck {
329        name: "model_onnx".to_string(),
330        ok: model_ok,
331        detail: if model_ok {
332            None
333        } else {
334            Some(format!(
335                "model missing at {}; run 'sqlite-graphrag models download'",
336                model_dir.display()
337            ))
338        },
339    });
340
341    let response = HealthResponse {
342        status: status.to_string(),
343        integrity,
344        integrity_ok,
345        schema_ok,
346        vec_memories_ok,
347        vec_entities_ok,
348        vec_chunks_ok,
349        fts_ok,
350        fts_query_ok,
351        model_ok,
352        counts: HealthCounts {
353            memories: memories_count,
354            memories_total: memories_count,
355            entities: entities_count,
356            relationships: relationships_count,
357            vec_memories: vec_memories_count,
358        },
359        db_path: paths.db.display().to_string(),
360        db_size_bytes,
361        schema_version,
362        sqlite_version,
363        missing_entities,
364        wal_size_mb,
365        journal_mode,
366        mentions_ratio,
367        mentions_warning,
368        checks,
369        elapsed_ms: start.elapsed().as_millis() as u64,
370    };
371
372    output::emit_json(&response)?;
373
374    Ok(())
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    #[test]
382    fn health_check_serializes_all_new_fields() {
383        let response = HealthResponse {
384            status: "ok".to_string(),
385            integrity: "ok".to_string(),
386            integrity_ok: true,
387            schema_ok: true,
388            vec_memories_ok: true,
389            vec_entities_ok: true,
390            vec_chunks_ok: true,
391            fts_ok: true,
392            fts_query_ok: true,
393            model_ok: false,
394            counts: HealthCounts {
395                memories: 5,
396                memories_total: 5,
397                entities: 3,
398                relationships: 2,
399                vec_memories: 5,
400            },
401            db_path: "/tmp/test.sqlite".to_string(),
402            db_size_bytes: 4096,
403            schema_version: 6,
404            sqlite_version: "3.46.0".to_string(),
405            elapsed_ms: 0,
406            missing_entities: vec![],
407            wal_size_mb: 0.0,
408            journal_mode: "wal".to_string(),
409            mentions_ratio: None,
410            mentions_warning: None,
411            checks: vec![
412                HealthCheck {
413                    name: "integrity".to_string(),
414                    ok: true,
415                    detail: None,
416                },
417                HealthCheck {
418                    name: "model_onnx".to_string(),
419                    ok: false,
420                    detail: Some("model missing".to_string()),
421                },
422            ],
423        };
424
425        let json = serde_json::to_value(&response).unwrap();
426        assert_eq!(json["status"], "ok");
427        assert_eq!(json["integrity_ok"], true);
428        assert_eq!(json["schema_ok"], true);
429        assert_eq!(json["vec_memories_ok"], true);
430        assert_eq!(json["vec_entities_ok"], true);
431        assert_eq!(json["vec_chunks_ok"], true);
432        assert_eq!(json["fts_ok"], true);
433        assert_eq!(json["model_ok"], false);
434        assert_eq!(json["db_size_bytes"], 4096u64);
435        assert!(json["checks"].is_array());
436        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
437
438        // Verifies that detail is absent when ok=true (skip_serializing_if)
439        let integrity_check = &json["checks"][0];
440        assert_eq!(integrity_check["name"], "integrity");
441        assert_eq!(integrity_check["ok"], true);
442        assert!(integrity_check.get("detail").is_none());
443
444        // Verifies that detail is present when ok=false
445        let model_check = &json["checks"][1];
446        assert_eq!(model_check["name"], "model_onnx");
447        assert_eq!(model_check["ok"], false);
448        assert_eq!(model_check["detail"], "model missing");
449    }
450
451    #[test]
452    fn health_check_without_detail_omits_field() {
453        let check = HealthCheck {
454            name: "vec_memories".to_string(),
455            ok: true,
456            detail: None,
457        };
458        let json = serde_json::to_value(&check).unwrap();
459        assert!(
460            json.get("detail").is_none(),
461            "detail field must be omitted when None"
462        );
463    }
464
465    #[test]
466    fn health_check_with_detail_serializes_field() {
467        let check = HealthCheck {
468            name: "fts_memories".to_string(),
469            ok: false,
470            detail: Some("fts_memories table missing from sqlite_master".to_string()),
471        };
472        let json = serde_json::to_value(&check).unwrap();
473        assert_eq!(
474            json["detail"],
475            "fts_memories table missing from sqlite_master"
476        );
477    }
478
479    #[test]
480    fn health_response_fts_query_ok_and_sqlite_version_serialize() {
481        // Verifies that fts_query_ok and sqlite_version appear in the serialized JSON
482        // with the expected keys and values.
483        let response = HealthResponse {
484            status: "ok".to_string(),
485            integrity: "ok".to_string(),
486            integrity_ok: true,
487            schema_ok: true,
488            vec_memories_ok: true,
489            vec_entities_ok: true,
490            vec_chunks_ok: true,
491            fts_ok: true,
492            fts_query_ok: true,
493            model_ok: true,
494            counts: HealthCounts {
495                memories: 0,
496                memories_total: 0,
497                entities: 0,
498                relationships: 0,
499                vec_memories: 0,
500            },
501            db_path: "/tmp/test.sqlite".to_string(),
502            db_size_bytes: 0,
503            schema_version: 1,
504            sqlite_version: "3.45.1".to_string(),
505            elapsed_ms: 0,
506            missing_entities: vec![],
507            wal_size_mb: 0.0,
508            journal_mode: "wal".to_string(),
509            mentions_ratio: None,
510            mentions_warning: None,
511            checks: vec![],
512        };
513
514        let json = serde_json::to_value(&response).unwrap();
515
516        // fts_query_ok must appear at the top level
517        assert_eq!(
518            json["fts_query_ok"], true,
519            "fts_query_ok must be present and true in serialized JSON"
520        );
521
522        // sqlite_version must appear at the top level with the exact string
523        assert_eq!(
524            json["sqlite_version"], "3.45.1",
525            "sqlite_version must be present and match the provided string"
526        );
527
528        // Verify fts_query_ok=false path includes the expected detail message
529        let check_fail = HealthCheck {
530            name: "fts_query".to_string(),
531            ok: false,
532            detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
533        };
534        let check_json = serde_json::to_value(&check_fail).unwrap();
535        assert_eq!(check_json["name"], "fts_query");
536        assert_eq!(check_json["ok"], false);
537        assert_eq!(
538            check_json["detail"],
539            "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
540        );
541    }
542}