Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11#[derive(clap::Args)]
12#[command(after_long_help = "EXAMPLES:\n  \
13    # Check database health (connectivity, integrity, vector index)\n  \
14    sqlite-graphrag health\n\n  \
15    # Check health of a database at a custom path\n  \
16    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
17    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
18    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
19pub struct HealthArgs {
20    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
21    pub db: Option<String>,
22    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
23    #[arg(long, default_value_t = false)]
24    pub json: bool,
25    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
26    #[arg(long, value_parser = ["json", "text"], hide = true)]
27    pub format: Option<String>,
28}
29
30#[derive(Serialize)]
31struct HealthCounts {
32    memories: i64,
33    /// Alias of `memories` for the documented contract in AGENT_PROTOCOL.md.
34    memories_total: i64,
35    entities: i64,
36    relationships: i64,
37    vec_memories: i64,
38}
39
40#[derive(Serialize)]
41struct HealthCheck {
42    name: String,
43    ok: bool,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    detail: Option<String>,
46}
47
48#[derive(Serialize)]
49struct HealthResponse {
50    status: String,
51    integrity: String,
52    integrity_ok: bool,
53    schema_ok: bool,
54    vec_memories_ok: bool,
55    vec_entities_ok: bool,
56    vec_chunks_ok: bool,
57    fts_ok: bool,
58    model_ok: bool,
59    counts: HealthCounts,
60    db_path: String,
61    db_size_bytes: u64,
62    /// MAX(version) from refinery_schema_history — number of the last applied migration.
63    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
64    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
65    schema_version: u32,
66    /// List of entities referenced by memories but absent from the entities table.
67    /// Empty in a healthy DB. Per the contract documented in AGENT_PROTOCOL.md.
68    missing_entities: Vec<String>,
69    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
70    wal_size_mb: f64,
71    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
72    journal_mode: String,
73    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
74    /// Omitted when there are no relationships in the database.
75    #[serde(skip_serializing_if = "Option::is_none")]
76    mentions_ratio: Option<f64>,
77    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
78    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
79    #[serde(skip_serializing_if = "Option::is_none")]
80    mentions_warning: Option<String>,
81    checks: Vec<HealthCheck>,
82    elapsed_ms: u64,
83}
84
85/// Checks whether a table (including virtual ones) exists in sqlite_master.
86fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
87    conn.query_row(
88        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
89        rusqlite::params![table_name],
90        |r| r.get::<_, i64>(0),
91    )
92    .unwrap_or(0)
93        > 0
94}
95
96pub fn run(args: HealthArgs) -> Result<(), AppError> {
97    let start = Instant::now();
98    let _ = args.json; // --json is a no-op because output is already JSON by default
99    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
100    let paths = AppPaths::resolve(args.db.as_deref())?;
101
102    crate::storage::connection::ensure_db_ready(&paths)?;
103
104    let conn = open_ro(&paths.db)?;
105
106    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
107    let integrity_ok = integrity == "ok";
108
109    if !integrity_ok {
110        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
111        output::emit_json(&HealthResponse {
112            status: "degraded".to_string(),
113            integrity: integrity.clone(),
114            integrity_ok: false,
115            schema_ok: false,
116            vec_memories_ok: false,
117            vec_entities_ok: false,
118            vec_chunks_ok: false,
119            fts_ok: false,
120            model_ok: false,
121            counts: HealthCounts {
122                memories: 0,
123                memories_total: 0,
124                entities: 0,
125                relationships: 0,
126                vec_memories: 0,
127            },
128            db_path: paths.db.display().to_string(),
129            db_size_bytes,
130            schema_version: 0,
131            missing_entities: vec![],
132            wal_size_mb: 0.0,
133            journal_mode: "unknown".to_string(),
134            mentions_ratio: None,
135            mentions_warning: None,
136            checks: vec![HealthCheck {
137                name: "integrity".to_string(),
138                ok: false,
139                detail: Some(integrity),
140            }],
141            elapsed_ms: start.elapsed().as_millis() as u64,
142        })?;
143        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
144            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
145            Some("integrity check failed".to_string()),
146        )));
147    }
148
149    let memories_count: i64 = conn.query_row(
150        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
151        [],
152        |r| r.get(0),
153    )?;
154    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
155    let relationships_count: i64 =
156        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
157    let vec_memories_count: i64 =
158        conn.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))?;
159
160    let mentions_count: i64 = conn.query_row(
161        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
162        [],
163        |r| r.get(0),
164    )?;
165    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
166        let ratio = mentions_count as f64 / relationships_count as f64;
167        let warning = if ratio > 0.5 {
168            Some(format!(
169                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
170                ratio * 100.0,
171                mentions_count,
172                relationships_count
173            ))
174        } else {
175            None
176        };
177        (Some(ratio), warning)
178    } else {
179        (None, None)
180    };
181
182    let status = "ok";
183
184    let schema_version: u32 = conn
185        .query_row(
186            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
187            [],
188            |r| r.get::<_, i64>(0),
189        )
190        .unwrap_or(0) as u32;
191
192    let schema_ok = schema_version > 0;
193
194    // Checks vector tables via sqlite_master
195    let vec_memories_ok = table_exists(&conn, "vec_memories");
196    let vec_entities_ok = table_exists(&conn, "vec_entities");
197    let vec_chunks_ok = table_exists(&conn, "vec_chunks");
198    let fts_ok = table_exists(&conn, "fts_memories");
199
200    // Detects orphan entities referenced by memories but absent from the entities table.
201    let mut missing_entities: Vec<String> = Vec::new();
202    let mut stmt = conn.prepare(
203        "SELECT DISTINCT me.entity_id
204         FROM memory_entities me
205         LEFT JOIN entities e ON e.id = me.entity_id
206         WHERE e.id IS NULL",
207    )?;
208    let orphans: Vec<i64> = stmt
209        .query_map([], |r| r.get(0))?
210        .collect::<Result<Vec<_>, _>>()?;
211    for id in orphans {
212        missing_entities.push(format!("entity_id={id}"));
213    }
214
215    let journal_mode: String = conn
216        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
217        .unwrap_or_else(|_| "unknown".to_string());
218
219    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
220        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
221        .unwrap_or(0.0);
222
223    // Database file size in bytes
224    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
225
226    // Checks whether the ONNX model is present in the cache
227    let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
228    let model_ok = model_dir.exists();
229
230    // Builds the checks array for detailed diagnostics
231    let mut checks: Vec<HealthCheck> = Vec::with_capacity(7);
232
233    // At this point integrity_ok is always true (corrupt DB returned early above).
234    checks.push(HealthCheck {
235        name: "integrity".to_string(),
236        ok: true,
237        detail: None,
238    });
239
240    checks.push(HealthCheck {
241        name: "schema_version".to_string(),
242        ok: schema_ok,
243        detail: if schema_ok {
244            None
245        } else {
246            Some(format!("schema_version={schema_version} (expected >0)"))
247        },
248    });
249
250    checks.push(HealthCheck {
251        name: "vec_memories".to_string(),
252        ok: vec_memories_ok,
253        detail: if vec_memories_ok {
254            None
255        } else {
256            Some("vec_memories table missing from sqlite_master".to_string())
257        },
258    });
259
260    checks.push(HealthCheck {
261        name: "vec_entities".to_string(),
262        ok: vec_entities_ok,
263        detail: if vec_entities_ok {
264            None
265        } else {
266            Some("vec_entities table missing from sqlite_master".to_string())
267        },
268    });
269
270    checks.push(HealthCheck {
271        name: "vec_chunks".to_string(),
272        ok: vec_chunks_ok,
273        detail: if vec_chunks_ok {
274            None
275        } else {
276            Some("vec_chunks table missing from sqlite_master".to_string())
277        },
278    });
279
280    checks.push(HealthCheck {
281        name: "fts_memories".to_string(),
282        ok: fts_ok,
283        detail: if fts_ok {
284            None
285        } else {
286            Some("fts_memories table missing from sqlite_master".to_string())
287        },
288    });
289
290    checks.push(HealthCheck {
291        name: "model_onnx".to_string(),
292        ok: model_ok,
293        detail: if model_ok {
294            None
295        } else {
296            Some(format!(
297                "model missing at {}; run 'sqlite-graphrag models download'",
298                model_dir.display()
299            ))
300        },
301    });
302
303    let response = HealthResponse {
304        status: status.to_string(),
305        integrity,
306        integrity_ok,
307        schema_ok,
308        vec_memories_ok,
309        vec_entities_ok,
310        vec_chunks_ok,
311        fts_ok,
312        model_ok,
313        counts: HealthCounts {
314            memories: memories_count,
315            memories_total: memories_count,
316            entities: entities_count,
317            relationships: relationships_count,
318            vec_memories: vec_memories_count,
319        },
320        db_path: paths.db.display().to_string(),
321        db_size_bytes,
322        schema_version,
323        missing_entities,
324        wal_size_mb,
325        journal_mode,
326        mentions_ratio,
327        mentions_warning,
328        checks,
329        elapsed_ms: start.elapsed().as_millis() as u64,
330    };
331
332    output::emit_json(&response)?;
333
334    Ok(())
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn health_check_serializes_all_new_fields() {
343        let resposta = HealthResponse {
344            status: "ok".to_string(),
345            integrity: "ok".to_string(),
346            integrity_ok: true,
347            schema_ok: true,
348            vec_memories_ok: true,
349            vec_entities_ok: true,
350            vec_chunks_ok: true,
351            fts_ok: true,
352            model_ok: false,
353            counts: HealthCounts {
354                memories: 5,
355                memories_total: 5,
356                entities: 3,
357                relationships: 2,
358                vec_memories: 5,
359            },
360            db_path: "/tmp/test.sqlite".to_string(),
361            db_size_bytes: 4096,
362            schema_version: 6,
363            elapsed_ms: 0,
364            missing_entities: vec![],
365            wal_size_mb: 0.0,
366            journal_mode: "wal".to_string(),
367            mentions_ratio: None,
368            mentions_warning: None,
369            checks: vec![
370                HealthCheck {
371                    name: "integrity".to_string(),
372                    ok: true,
373                    detail: None,
374                },
375                HealthCheck {
376                    name: "model_onnx".to_string(),
377                    ok: false,
378                    detail: Some("modelo ausente".to_string()),
379                },
380            ],
381        };
382
383        let json = serde_json::to_value(&resposta).unwrap();
384        assert_eq!(json["status"], "ok");
385        assert_eq!(json["integrity_ok"], true);
386        assert_eq!(json["schema_ok"], true);
387        assert_eq!(json["vec_memories_ok"], true);
388        assert_eq!(json["vec_entities_ok"], true);
389        assert_eq!(json["vec_chunks_ok"], true);
390        assert_eq!(json["fts_ok"], true);
391        assert_eq!(json["model_ok"], false);
392        assert_eq!(json["db_size_bytes"], 4096u64);
393        assert!(json["checks"].is_array());
394        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
395
396        // Verifies that detail is absent when ok=true (skip_serializing_if)
397        let integrity_check = &json["checks"][0];
398        assert_eq!(integrity_check["name"], "integrity");
399        assert_eq!(integrity_check["ok"], true);
400        assert!(integrity_check.get("detail").is_none());
401
402        // Verifies that detail is present when ok=false
403        let model_check = &json["checks"][1];
404        assert_eq!(model_check["name"], "model_onnx");
405        assert_eq!(model_check["ok"], false);
406        assert_eq!(model_check["detail"], "modelo ausente");
407    }
408
409    #[test]
410    fn health_check_without_detail_omits_field() {
411        let check = HealthCheck {
412            name: "vec_memories".to_string(),
413            ok: true,
414            detail: None,
415        };
416        let json = serde_json::to_value(&check).unwrap();
417        assert!(
418            json.get("detail").is_none(),
419            "campo detail deve ser omitido quando None"
420        );
421    }
422
423    #[test]
424    fn health_check_with_detail_serializes_field() {
425        let check = HealthCheck {
426            name: "fts_memories".to_string(),
427            ok: false,
428            detail: Some("tabela fts_memories ausente".to_string()),
429        };
430        let json = serde_json::to_value(&check).unwrap();
431        assert_eq!(json["detail"], "tabela fts_memories ausente");
432    }
433}