1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11#[derive(clap::Args)]
12#[command(after_long_help = "EXAMPLES:\n \
13 # Check database health (connectivity, integrity, vector index)\n \
14 sqlite-graphrag health\n\n \
15 # Check health of a database at a custom path\n \
16 sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n \
17 # Use SQLITE_GRAPHRAG_DB_PATH env var\n \
18 SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
19pub struct HealthArgs {
20 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
21 pub db: Option<String>,
22 #[arg(long, default_value_t = false)]
24 pub json: bool,
25 #[arg(long, value_parser = ["json", "text"], hide = true)]
27 pub format: Option<String>,
28}
29
30#[derive(Serialize)]
31struct HealthCounts {
32 memories: i64,
33 memories_total: i64,
35 entities: i64,
36 relationships: i64,
37 vec_memories: i64,
38}
39
40#[derive(Serialize)]
41struct HealthCheck {
42 name: String,
43 ok: bool,
44 #[serde(skip_serializing_if = "Option::is_none")]
45 detail: Option<String>,
46}
47
48#[derive(Serialize)]
49struct HealthResponse {
50 status: String,
51 integrity: String,
52 integrity_ok: bool,
53 schema_ok: bool,
54 vec_memories_ok: bool,
55 vec_entities_ok: bool,
56 vec_chunks_ok: bool,
57 fts_ok: bool,
58 fts_query_ok: bool,
60 model_ok: bool,
61 counts: HealthCounts,
62 db_path: String,
63 db_size_bytes: u64,
64 schema_version: u32,
68 missing_entities: Vec<String>,
71 wal_size_mb: f64,
73 journal_mode: String,
75 sqlite_version: String,
77 #[serde(skip_serializing_if = "Option::is_none")]
80 mentions_ratio: Option<f64>,
81 #[serde(skip_serializing_if = "Option::is_none")]
84 mentions_warning: Option<String>,
85 #[serde(skip_serializing_if = "Option::is_none")]
88 top_relation: Option<String>,
89 #[serde(skip_serializing_if = "Option::is_none")]
92 top_relation_ratio: Option<f64>,
93 #[serde(skip_serializing_if = "Option::is_none")]
96 applies_to_ratio: Option<f64>,
97 #[serde(skip_serializing_if = "Option::is_none")]
100 relation_concentration_warning: Option<String>,
101 checks: Vec<HealthCheck>,
102 elapsed_ms: u64,
103}
104
105fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
107 conn.query_row(
108 "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
109 rusqlite::params![table_name],
110 |r| r.get::<_, i64>(0),
111 )
112 .unwrap_or(0)
113 > 0
114}
115
116pub fn run(args: HealthArgs) -> Result<(), AppError> {
117 let start = Instant::now();
118 let _ = args.json; let _ = args.format; let paths = AppPaths::resolve(args.db.as_deref())?;
121
122 crate::storage::connection::ensure_db_ready(&paths)?;
123
124 let conn = open_ro(&paths.db)?;
125
126 let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
127 let integrity_ok = integrity == "ok";
128 tracing::info!(integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
129
130 if !integrity_ok {
131 let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
132 output::emit_json(&HealthResponse {
133 status: "degraded".to_string(),
134 integrity: integrity.clone(),
135 integrity_ok: false,
136 schema_ok: false,
137 vec_memories_ok: false,
138 vec_entities_ok: false,
139 vec_chunks_ok: false,
140 fts_ok: false,
141 fts_query_ok: false,
142 model_ok: false,
143 counts: HealthCounts {
144 memories: 0,
145 memories_total: 0,
146 entities: 0,
147 relationships: 0,
148 vec_memories: 0,
149 },
150 db_path: paths.db.display().to_string(),
151 db_size_bytes,
152 schema_version: 0,
153 sqlite_version: "unknown".to_string(),
154 missing_entities: vec![],
155 wal_size_mb: 0.0,
156 journal_mode: "unknown".to_string(),
157 mentions_ratio: None,
158 mentions_warning: None,
159 top_relation: None,
160 top_relation_ratio: None,
161 applies_to_ratio: None,
162 relation_concentration_warning: None,
163 checks: vec![HealthCheck {
164 name: "integrity".to_string(),
165 ok: false,
166 detail: Some(integrity),
167 }],
168 elapsed_ms: start.elapsed().as_millis() as u64,
169 })?;
170 return Err(AppError::Database(rusqlite::Error::SqliteFailure(
171 rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
172 Some("integrity check failed".to_string()),
173 )));
174 }
175
176 let memories_count: i64 = conn.query_row(
177 "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
178 [],
179 |r| r.get(0),
180 )?;
181 let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
182 let relationships_count: i64 =
183 conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
184 let vec_memories_count: i64 =
185 conn.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))?;
186
187 let mentions_count: i64 = conn.query_row(
188 "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
189 [],
190 |r| r.get(0),
191 )?;
192 let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
193 let ratio = mentions_count as f64 / relationships_count as f64;
194 let warning = if ratio > 0.5 {
195 Some(format!(
196 "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
197 ratio * 100.0,
198 mentions_count,
199 relationships_count
200 ))
201 } else {
202 None
203 };
204 (Some(ratio), warning)
205 } else {
206 (None, None)
207 };
208
209 let (top_relation, top_relation_ratio, applies_to_ratio, relation_concentration_warning) =
211 if relationships_count > 0 {
212 let (top_rel, top_count): (String, i64) = conn
214 .query_row(
215 "SELECT relation, COUNT(*) AS cnt
216 FROM relationships
217 GROUP BY relation
218 ORDER BY cnt DESC
219 LIMIT 1",
220 [],
221 |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
222 )
223 .unwrap_or_else(|_| ("unknown".to_string(), 0));
224
225 let top_ratio = top_count as f64 / relationships_count as f64;
226
227 let applies_count: i64 = conn
229 .query_row(
230 "SELECT COUNT(*) FROM relationships WHERE relation = 'applies_to'",
231 [],
232 |r| r.get(0),
233 )
234 .unwrap_or(0);
235 let at_ratio = if applies_count > 0 {
236 Some(applies_count as f64 / relationships_count as f64)
237 } else {
238 None
239 };
240
241 let concentration_warning = if top_ratio > 0.40 {
242 Some(format!(
243 "relation '{}' dominates graph at {:.1}% ({}/{} total); consider running prune-relations --relation {} --dry-run",
244 top_rel,
245 top_ratio * 100.0,
246 top_count,
247 relationships_count,
248 top_rel,
249 ))
250 } else {
251 None
252 };
253
254 (
255 Some(top_rel),
256 Some(top_ratio),
257 at_ratio,
258 concentration_warning,
259 )
260 } else {
261 (None, None, None, None)
262 };
263
264 let status = "ok";
265
266 let schema_version: u32 = conn
267 .query_row(
268 "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
269 [],
270 |r| r.get::<_, i64>(0),
271 )
272 .unwrap_or(0) as u32;
273
274 let schema_ok = schema_version > 0;
275
276 let vec_memories_ok = table_exists(&conn, "vec_memories");
278 let vec_entities_ok = table_exists(&conn, "vec_entities");
279 let vec_chunks_ok = table_exists(&conn, "vec_chunks");
280 tracing::info!(vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, "vector table checks complete");
281 let fts_ok = table_exists(&conn, "fts_memories");
282
283 let fts_query_ok = if fts_ok {
285 conn.query_row(
286 "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
287 [],
288 |r| r.get::<_, i64>(0),
289 )
290 .is_ok()
291 } else {
292 false
293 };
294
295 tracing::info!(fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
296
297 let sqlite_version: String = conn
299 .query_row("SELECT sqlite_version()", [], |r| r.get(0))
300 .unwrap_or_else(|_| "unknown".to_string());
301
302 let mut missing_entities: Vec<String> = Vec::new();
304 let mut stmt = conn.prepare(
305 "SELECT DISTINCT me.entity_id
306 FROM memory_entities me
307 LEFT JOIN entities e ON e.id = me.entity_id
308 WHERE e.id IS NULL",
309 )?;
310 let orphans: Vec<i64> = stmt
311 .query_map([], |r| r.get(0))?
312 .collect::<Result<Vec<_>, _>>()?;
313 for id in orphans {
314 missing_entities.push(format!("entity_id={id}"));
315 }
316
317 let journal_mode: String = conn
318 .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
319 .unwrap_or_else(|_| "unknown".to_string());
320
321 let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
322 .map(|m| m.len() as f64 / 1024.0 / 1024.0)
323 .unwrap_or(0.0);
324
325 let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
327
328 let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
330 let model_ok = model_dir.exists();
331 tracing::info!(model_ok = %model_ok, "embedding model check complete");
332
333 let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
335
336 checks.push(HealthCheck {
338 name: "integrity".to_string(),
339 ok: true,
340 detail: None,
341 });
342
343 checks.push(HealthCheck {
344 name: "schema_version".to_string(),
345 ok: schema_ok,
346 detail: if schema_ok {
347 None
348 } else {
349 Some(format!("schema_version={schema_version} (expected >0)"))
350 },
351 });
352
353 checks.push(HealthCheck {
354 name: "vec_memories".to_string(),
355 ok: vec_memories_ok,
356 detail: if vec_memories_ok {
357 None
358 } else {
359 Some("vec_memories table missing from sqlite_master".to_string())
360 },
361 });
362
363 checks.push(HealthCheck {
364 name: "vec_entities".to_string(),
365 ok: vec_entities_ok,
366 detail: if vec_entities_ok {
367 None
368 } else {
369 Some("vec_entities table missing from sqlite_master".to_string())
370 },
371 });
372
373 checks.push(HealthCheck {
374 name: "vec_chunks".to_string(),
375 ok: vec_chunks_ok,
376 detail: if vec_chunks_ok {
377 None
378 } else {
379 Some("vec_chunks table missing from sqlite_master".to_string())
380 },
381 });
382
383 checks.push(HealthCheck {
384 name: "fts_memories".to_string(),
385 ok: fts_ok,
386 detail: if fts_ok {
387 None
388 } else {
389 Some("fts_memories table missing from sqlite_master".to_string())
390 },
391 });
392
393 checks.push(HealthCheck {
394 name: "fts_query".to_string(),
395 ok: fts_query_ok,
396 detail: if fts_query_ok {
397 None
398 } else {
399 Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
400 },
401 });
402
403 checks.push(HealthCheck {
404 name: "model_onnx".to_string(),
405 ok: model_ok,
406 detail: if model_ok {
407 None
408 } else {
409 Some(format!(
410 "model missing at {}; run 'sqlite-graphrag models download'",
411 model_dir.display()
412 ))
413 },
414 });
415
416 let response = HealthResponse {
417 status: status.to_string(),
418 integrity,
419 integrity_ok,
420 schema_ok,
421 vec_memories_ok,
422 vec_entities_ok,
423 vec_chunks_ok,
424 fts_ok,
425 fts_query_ok,
426 model_ok,
427 counts: HealthCounts {
428 memories: memories_count,
429 memories_total: memories_count,
430 entities: entities_count,
431 relationships: relationships_count,
432 vec_memories: vec_memories_count,
433 },
434 db_path: paths.db.display().to_string(),
435 db_size_bytes,
436 schema_version,
437 sqlite_version,
438 missing_entities,
439 wal_size_mb,
440 journal_mode,
441 mentions_ratio,
442 mentions_warning,
443 top_relation,
444 top_relation_ratio,
445 applies_to_ratio,
446 relation_concentration_warning,
447 checks,
448 elapsed_ms: start.elapsed().as_millis() as u64,
449 };
450
451 output::emit_json(&response)?;
452
453 Ok(())
454}
455
456#[cfg(test)]
457mod tests {
458 use super::*;
459
460 #[test]
461 fn health_check_serializes_all_new_fields() {
462 let response = HealthResponse {
463 status: "ok".to_string(),
464 integrity: "ok".to_string(),
465 integrity_ok: true,
466 schema_ok: true,
467 vec_memories_ok: true,
468 vec_entities_ok: true,
469 vec_chunks_ok: true,
470 fts_ok: true,
471 fts_query_ok: true,
472 model_ok: false,
473 counts: HealthCounts {
474 memories: 5,
475 memories_total: 5,
476 entities: 3,
477 relationships: 2,
478 vec_memories: 5,
479 },
480 db_path: "/tmp/test.sqlite".to_string(),
481 db_size_bytes: 4096,
482 schema_version: 6,
483 sqlite_version: "3.46.0".to_string(),
484 elapsed_ms: 0,
485 missing_entities: vec![],
486 wal_size_mb: 0.0,
487 journal_mode: "wal".to_string(),
488 mentions_ratio: None,
489 mentions_warning: None,
490 top_relation: None,
491 top_relation_ratio: None,
492 applies_to_ratio: None,
493 relation_concentration_warning: None,
494 checks: vec![
495 HealthCheck {
496 name: "integrity".to_string(),
497 ok: true,
498 detail: None,
499 },
500 HealthCheck {
501 name: "model_onnx".to_string(),
502 ok: false,
503 detail: Some("model missing".to_string()),
504 },
505 ],
506 };
507
508 let json = serde_json::to_value(&response).unwrap();
509 assert_eq!(json["status"], "ok");
510 assert_eq!(json["integrity_ok"], true);
511 assert_eq!(json["schema_ok"], true);
512 assert_eq!(json["vec_memories_ok"], true);
513 assert_eq!(json["vec_entities_ok"], true);
514 assert_eq!(json["vec_chunks_ok"], true);
515 assert_eq!(json["fts_ok"], true);
516 assert_eq!(json["model_ok"], false);
517 assert_eq!(json["db_size_bytes"], 4096u64);
518 assert!(json["checks"].is_array());
519 assert_eq!(json["checks"].as_array().unwrap().len(), 2);
520
521 let integrity_check = &json["checks"][0];
523 assert_eq!(integrity_check["name"], "integrity");
524 assert_eq!(integrity_check["ok"], true);
525 assert!(integrity_check.get("detail").is_none());
526
527 let model_check = &json["checks"][1];
529 assert_eq!(model_check["name"], "model_onnx");
530 assert_eq!(model_check["ok"], false);
531 assert_eq!(model_check["detail"], "model missing");
532 }
533
534 #[test]
535 fn health_check_without_detail_omits_field() {
536 let check = HealthCheck {
537 name: "vec_memories".to_string(),
538 ok: true,
539 detail: None,
540 };
541 let json = serde_json::to_value(&check).unwrap();
542 assert!(
543 json.get("detail").is_none(),
544 "detail field must be omitted when None"
545 );
546 }
547
548 #[test]
549 fn health_check_with_detail_serializes_field() {
550 let check = HealthCheck {
551 name: "fts_memories".to_string(),
552 ok: false,
553 detail: Some("fts_memories table missing from sqlite_master".to_string()),
554 };
555 let json = serde_json::to_value(&check).unwrap();
556 assert_eq!(
557 json["detail"],
558 "fts_memories table missing from sqlite_master"
559 );
560 }
561
562 #[test]
563 fn health_response_fts_query_ok_and_sqlite_version_serialize() {
564 let response = HealthResponse {
567 status: "ok".to_string(),
568 integrity: "ok".to_string(),
569 integrity_ok: true,
570 schema_ok: true,
571 vec_memories_ok: true,
572 vec_entities_ok: true,
573 vec_chunks_ok: true,
574 fts_ok: true,
575 fts_query_ok: true,
576 model_ok: true,
577 counts: HealthCounts {
578 memories: 0,
579 memories_total: 0,
580 entities: 0,
581 relationships: 0,
582 vec_memories: 0,
583 },
584 db_path: "/tmp/test.sqlite".to_string(),
585 db_size_bytes: 0,
586 schema_version: 1,
587 sqlite_version: "3.45.1".to_string(),
588 elapsed_ms: 0,
589 missing_entities: vec![],
590 wal_size_mb: 0.0,
591 journal_mode: "wal".to_string(),
592 mentions_ratio: None,
593 mentions_warning: None,
594 top_relation: None,
595 top_relation_ratio: None,
596 applies_to_ratio: None,
597 relation_concentration_warning: None,
598 checks: vec![],
599 };
600
601 let json = serde_json::to_value(&response).unwrap();
602
603 assert_eq!(
605 json["fts_query_ok"], true,
606 "fts_query_ok must be present and true in serialized JSON"
607 );
608
609 assert_eq!(
611 json["sqlite_version"], "3.45.1",
612 "sqlite_version must be present and match the provided string"
613 );
614
615 let check_fail = HealthCheck {
617 name: "fts_query".to_string(),
618 ok: false,
619 detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
620 };
621 let check_json = serde_json::to_value(&check_fail).unwrap();
622 assert_eq!(check_json["name"], "fts_query");
623 assert_eq!(check_json["ok"], false);
624 assert_eq!(
625 check_json["detail"],
626 "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
627 );
628 }
629
630 fn make_full_response(
631 top_relation: Option<String>,
632 top_relation_ratio: Option<f64>,
633 applies_to_ratio: Option<f64>,
634 relation_concentration_warning: Option<String>,
635 ) -> HealthResponse {
636 HealthResponse {
637 status: "ok".to_string(),
638 integrity: "ok".to_string(),
639 integrity_ok: true,
640 schema_ok: true,
641 vec_memories_ok: true,
642 vec_entities_ok: true,
643 vec_chunks_ok: true,
644 fts_ok: true,
645 fts_query_ok: true,
646 model_ok: true,
647 counts: HealthCounts {
648 memories: 10,
649 memories_total: 10,
650 entities: 5,
651 relationships: 20,
652 vec_memories: 10,
653 },
654 db_path: "/tmp/test.sqlite".to_string(),
655 db_size_bytes: 8192,
656 schema_version: 3,
657 sqlite_version: "3.46.0".to_string(),
658 elapsed_ms: 1,
659 missing_entities: vec![],
660 wal_size_mb: 0.0,
661 journal_mode: "wal".to_string(),
662 mentions_ratio: None,
663 mentions_warning: None,
664 top_relation,
665 top_relation_ratio,
666 applies_to_ratio,
667 relation_concentration_warning,
668 checks: vec![],
669 }
670 }
671
672 #[test]
673 fn health_concentration_fields_omitted_when_no_relationships() {
674 let resp = make_full_response(None, None, None, None);
676 let json = serde_json::to_value(&resp).unwrap();
677 assert!(
678 json.get("top_relation").is_none(),
679 "top_relation must be omitted when None"
680 );
681 assert!(
682 json.get("top_relation_ratio").is_none(),
683 "top_relation_ratio must be omitted when None"
684 );
685 assert!(
686 json.get("applies_to_ratio").is_none(),
687 "applies_to_ratio must be omitted when None"
688 );
689 assert!(
690 json.get("relation_concentration_warning").is_none(),
691 "relation_concentration_warning must be omitted when None"
692 );
693 }
694
695 #[test]
696 fn health_concentration_fields_present_with_data() {
697 let resp = make_full_response(
698 Some("mentions".to_string()),
699 Some(0.60),
700 Some(0.10),
701 Some("relation 'mentions' dominates graph at 60.0%".to_string()),
702 );
703 let json = serde_json::to_value(&resp).unwrap();
704 assert_eq!(json["top_relation"], "mentions");
705 assert!((json["top_relation_ratio"].as_f64().unwrap() - 0.60).abs() < 1e-9);
706 assert!((json["applies_to_ratio"].as_f64().unwrap() - 0.10).abs() < 1e-9);
707 assert!(json["relation_concentration_warning"]
708 .as_str()
709 .unwrap()
710 .contains("60.0%"));
711 }
712
713 #[test]
714 fn health_concentration_warning_absent_when_ratio_below_threshold() {
715 let resp = make_full_response(Some("uses".to_string()), Some(0.39), None, None);
717 let json = serde_json::to_value(&resp).unwrap();
718 assert_eq!(json["top_relation"], "uses");
719 assert!(
720 json.get("relation_concentration_warning").is_none(),
721 "warning must be absent when ratio <= 0.40"
722 );
723 }
724
725 #[test]
726 fn health_concentration_warning_present_at_threshold() {
727 let resp = make_full_response(
729 Some("depends_on".to_string()),
730 Some(0.41),
731 None,
732 Some("relation 'depends_on' dominates graph at 41.0%".to_string()),
733 );
734 let json = serde_json::to_value(&resp).unwrap();
735 assert!(
736 json["relation_concentration_warning"].is_string(),
737 "warning must be present when top_relation_ratio > 0.40"
738 );
739 }
740
741 #[test]
742 fn health_applies_to_ratio_omitted_when_none() {
743 let resp = make_full_response(Some("related".to_string()), Some(0.30), None, None);
745 let json = serde_json::to_value(&resp).unwrap();
746 assert!(
747 json.get("applies_to_ratio").is_none(),
748 "applies_to_ratio must be omitted when None"
749 );
750 }
751}