Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11#[derive(clap::Args)]
12#[command(after_long_help = "EXAMPLES:\n  \
13    # Check database health (connectivity, integrity, vector index)\n  \
14    sqlite-graphrag health\n\n  \
15    # Check health of a database at a custom path\n  \
16    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
17    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
18    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
19pub struct HealthArgs {
20    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
21    pub db: Option<String>,
22    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
23    #[arg(long, default_value_t = false)]
24    pub json: bool,
25    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
26    #[arg(long, value_parser = ["json", "text"], hide = true)]
27    pub format: Option<String>,
28}
29
30#[derive(Serialize)]
31struct HealthCounts {
32    memories: i64,
33    /// Alias de `memories` para contrato documentado em AGENT_PROTOCOL.md.
34    memories_total: i64,
35    entities: i64,
36    relationships: i64,
37    vec_memories: i64,
38}
39
40#[derive(Serialize)]
41struct HealthCheck {
42    name: String,
43    ok: bool,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    detail: Option<String>,
46}
47
48#[derive(Serialize)]
49struct HealthResponse {
50    status: String,
51    integrity: String,
52    integrity_ok: bool,
53    schema_ok: bool,
54    vec_memories_ok: bool,
55    vec_entities_ok: bool,
56    vec_chunks_ok: bool,
57    fts_ok: bool,
58    model_ok: bool,
59    counts: HealthCounts,
60    db_path: String,
61    db_size_bytes: u64,
62    /// MAX(version) from refinery_schema_history — number of the last applied migration.
63    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
64    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
65    schema_version: u32,
66    /// List of entities referenced by memories but absent from the entities table.
67    /// Empty in a healthy DB. Per the contract documented in AGENT_PROTOCOL.md.
68    missing_entities: Vec<String>,
69    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
70    wal_size_mb: f64,
71    /// Modo de journaling do SQLite (wal, delete, truncate, persist, memory, off).
72    journal_mode: String,
73    checks: Vec<HealthCheck>,
74    elapsed_ms: u64,
75}
76
77/// Checks whether a table (including virtual ones) exists in sqlite_master.
78fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
79    conn.query_row(
80        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
81        rusqlite::params![table_name],
82        |r| r.get::<_, i64>(0),
83    )
84    .unwrap_or(0)
85        > 0
86}
87
88pub fn run(args: HealthArgs) -> Result<(), AppError> {
89    let start = Instant::now();
90    let _ = args.json; // --json is a no-op because output is already JSON by default
91    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
92    let paths = AppPaths::resolve(args.db.as_deref())?;
93
94    crate::storage::connection::ensure_db_ready(&paths)?;
95
96    let conn = open_ro(&paths.db)?;
97
98    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
99    let integrity_ok = integrity == "ok";
100
101    if !integrity_ok {
102        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
103        output::emit_json(&HealthResponse {
104            status: "degraded".to_string(),
105            integrity: integrity.clone(),
106            integrity_ok: false,
107            schema_ok: false,
108            vec_memories_ok: false,
109            vec_entities_ok: false,
110            vec_chunks_ok: false,
111            fts_ok: false,
112            model_ok: false,
113            counts: HealthCounts {
114                memories: 0,
115                memories_total: 0,
116                entities: 0,
117                relationships: 0,
118                vec_memories: 0,
119            },
120            db_path: paths.db.display().to_string(),
121            db_size_bytes,
122            schema_version: 0,
123            missing_entities: vec![],
124            wal_size_mb: 0.0,
125            journal_mode: "unknown".to_string(),
126            checks: vec![HealthCheck {
127                name: "integrity".to_string(),
128                ok: false,
129                detail: Some(integrity),
130            }],
131            elapsed_ms: start.elapsed().as_millis() as u64,
132        })?;
133        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
134            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
135            Some("integrity check failed".to_string()),
136        )));
137    }
138
139    let memories_count: i64 = conn.query_row(
140        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
141        [],
142        |r| r.get(0),
143    )?;
144    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
145    let relationships_count: i64 =
146        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
147    let vec_memories_count: i64 =
148        conn.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))?;
149
150    let status = "ok";
151
152    let schema_version: u32 = conn
153        .query_row(
154            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
155            [],
156            |r| r.get::<_, i64>(0),
157        )
158        .unwrap_or(0) as u32;
159
160    let schema_ok = schema_version > 0;
161
162    // Checks vector tables via sqlite_master
163    let vec_memories_ok = table_exists(&conn, "vec_memories");
164    let vec_entities_ok = table_exists(&conn, "vec_entities");
165    let vec_chunks_ok = table_exists(&conn, "vec_chunks");
166    let fts_ok = table_exists(&conn, "fts_memories");
167
168    // Detects orphan entities referenced by memories but absent from the entities table.
169    let mut missing_entities: Vec<String> = Vec::new();
170    let mut stmt = conn.prepare(
171        "SELECT DISTINCT me.entity_id
172         FROM memory_entities me
173         LEFT JOIN entities e ON e.id = me.entity_id
174         WHERE e.id IS NULL",
175    )?;
176    let orphans: Vec<i64> = stmt
177        .query_map([], |r| r.get(0))?
178        .collect::<Result<Vec<_>, _>>()?;
179    for id in orphans {
180        missing_entities.push(format!("entity_id={id}"));
181    }
182
183    let journal_mode: String = conn
184        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
185        .unwrap_or_else(|_| "unknown".to_string());
186
187    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
188        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
189        .unwrap_or(0.0);
190
191    // Database file size in bytes
192    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
193
194    // Checks whether the ONNX model is present in the cache
195    let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
196    let model_ok = model_dir.exists();
197
198    // Builds the checks array for detailed diagnostics
199    let mut checks: Vec<HealthCheck> = Vec::new();
200
201    // At this point integrity_ok is always true (corrupt DB returned early above).
202    checks.push(HealthCheck {
203        name: "integrity".to_string(),
204        ok: true,
205        detail: None,
206    });
207
208    checks.push(HealthCheck {
209        name: "schema_version".to_string(),
210        ok: schema_ok,
211        detail: if schema_ok {
212            None
213        } else {
214            Some(format!("schema_version={schema_version} (expected >0)"))
215        },
216    });
217
218    checks.push(HealthCheck {
219        name: "vec_memories".to_string(),
220        ok: vec_memories_ok,
221        detail: if vec_memories_ok {
222            None
223        } else {
224            Some("vec_memories table missing from sqlite_master".to_string())
225        },
226    });
227
228    checks.push(HealthCheck {
229        name: "vec_entities".to_string(),
230        ok: vec_entities_ok,
231        detail: if vec_entities_ok {
232            None
233        } else {
234            Some("vec_entities table missing from sqlite_master".to_string())
235        },
236    });
237
238    checks.push(HealthCheck {
239        name: "vec_chunks".to_string(),
240        ok: vec_chunks_ok,
241        detail: if vec_chunks_ok {
242            None
243        } else {
244            Some("vec_chunks table missing from sqlite_master".to_string())
245        },
246    });
247
248    checks.push(HealthCheck {
249        name: "fts_memories".to_string(),
250        ok: fts_ok,
251        detail: if fts_ok {
252            None
253        } else {
254            Some("fts_memories table missing from sqlite_master".to_string())
255        },
256    });
257
258    checks.push(HealthCheck {
259        name: "model_onnx".to_string(),
260        ok: model_ok,
261        detail: if model_ok {
262            None
263        } else {
264            Some(format!(
265                "model missing at {}; run 'sqlite-graphrag models download'",
266                model_dir.display()
267            ))
268        },
269    });
270
271    let response = HealthResponse {
272        status: status.to_string(),
273        integrity,
274        integrity_ok,
275        schema_ok,
276        vec_memories_ok,
277        vec_entities_ok,
278        vec_chunks_ok,
279        fts_ok,
280        model_ok,
281        counts: HealthCounts {
282            memories: memories_count,
283            memories_total: memories_count,
284            entities: entities_count,
285            relationships: relationships_count,
286            vec_memories: vec_memories_count,
287        },
288        db_path: paths.db.display().to_string(),
289        db_size_bytes,
290        schema_version,
291        missing_entities,
292        wal_size_mb,
293        journal_mode,
294        checks,
295        elapsed_ms: start.elapsed().as_millis() as u64,
296    };
297
298    output::emit_json(&response)?;
299
300    Ok(())
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn health_check_serializes_all_new_fields() {
309        let resposta = HealthResponse {
310            status: "ok".to_string(),
311            integrity: "ok".to_string(),
312            integrity_ok: true,
313            schema_ok: true,
314            vec_memories_ok: true,
315            vec_entities_ok: true,
316            vec_chunks_ok: true,
317            fts_ok: true,
318            model_ok: false,
319            counts: HealthCounts {
320                memories: 5,
321                memories_total: 5,
322                entities: 3,
323                relationships: 2,
324                vec_memories: 5,
325            },
326            db_path: "/tmp/test.sqlite".to_string(),
327            db_size_bytes: 4096,
328            schema_version: 6,
329            elapsed_ms: 0,
330            missing_entities: vec![],
331            wal_size_mb: 0.0,
332            journal_mode: "wal".to_string(),
333            checks: vec![
334                HealthCheck {
335                    name: "integrity".to_string(),
336                    ok: true,
337                    detail: None,
338                },
339                HealthCheck {
340                    name: "model_onnx".to_string(),
341                    ok: false,
342                    detail: Some("modelo ausente".to_string()),
343                },
344            ],
345        };
346
347        let json = serde_json::to_value(&resposta).unwrap();
348        assert_eq!(json["status"], "ok");
349        assert_eq!(json["integrity_ok"], true);
350        assert_eq!(json["schema_ok"], true);
351        assert_eq!(json["vec_memories_ok"], true);
352        assert_eq!(json["vec_entities_ok"], true);
353        assert_eq!(json["vec_chunks_ok"], true);
354        assert_eq!(json["fts_ok"], true);
355        assert_eq!(json["model_ok"], false);
356        assert_eq!(json["db_size_bytes"], 4096u64);
357        assert!(json["checks"].is_array());
358        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
359
360        // Verifies that detail is absent when ok=true (skip_serializing_if)
361        let integrity_check = &json["checks"][0];
362        assert_eq!(integrity_check["name"], "integrity");
363        assert_eq!(integrity_check["ok"], true);
364        assert!(integrity_check.get("detail").is_none());
365
366        // Verifies that detail is present when ok=false
367        let model_check = &json["checks"][1];
368        assert_eq!(model_check["name"], "model_onnx");
369        assert_eq!(model_check["ok"], false);
370        assert_eq!(model_check["detail"], "modelo ausente");
371    }
372
373    #[test]
374    fn health_check_without_detail_omits_field() {
375        let check = HealthCheck {
376            name: "vec_memories".to_string(),
377            ok: true,
378            detail: None,
379        };
380        let json = serde_json::to_value(&check).unwrap();
381        assert!(
382            json.get("detail").is_none(),
383            "campo detail deve ser omitido quando None"
384        );
385    }
386
387    #[test]
388    fn health_check_with_detail_serializes_field() {
389        let check = HealthCheck {
390            name: "fts_memories".to_string(),
391            ok: false,
392            detail: Some("tabela fts_memories ausente".to_string()),
393        };
394        let json = serde_json::to_value(&check).unwrap();
395        assert_eq!(json["detail"], "tabela fts_memories ausente");
396    }
397}