1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11#[derive(clap::Args)]
12#[command(after_long_help = "EXAMPLES:\n \
13 # Check database health (connectivity, integrity, vector index)\n \
14 sqlite-graphrag health\n\n \
15 # Check health of a database at a custom path\n \
16 sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n \
17 # Use SQLITE_GRAPHRAG_DB_PATH env var\n \
18 SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
19pub struct HealthArgs {
20 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
21 pub db: Option<String>,
22 #[arg(long, default_value_t = false)]
24 pub json: bool,
25 #[arg(long, value_parser = ["json", "text"], hide = true)]
27 pub format: Option<String>,
28}
29
30#[derive(Serialize)]
31struct HealthCounts {
32 memories: i64,
33 memories_total: i64,
35 entities: i64,
36 relationships: i64,
37 vec_memories: i64,
38}
39
40#[derive(Serialize)]
41struct HealthCheck {
42 name: String,
43 ok: bool,
44 #[serde(skip_serializing_if = "Option::is_none")]
45 detail: Option<String>,
46}
47
48#[derive(Serialize)]
49struct HealthResponse {
50 status: String,
51 integrity: String,
52 integrity_ok: bool,
53 schema_ok: bool,
54 vec_memories_ok: bool,
55 vec_memories_missing: i64,
56 vec_memories_orphaned: i64,
57 vec_entities_ok: bool,
58 vec_chunks_ok: bool,
59 fts_ok: bool,
60 fts_query_ok: bool,
62 model_ok: bool,
63 counts: HealthCounts,
64 db_path: String,
65 db_size_bytes: u64,
66 schema_version: u32,
70 missing_entities: Vec<String>,
73 wal_size_mb: f64,
75 journal_mode: String,
77 sqlite_version: String,
79 #[serde(skip_serializing_if = "Option::is_none")]
82 mentions_ratio: Option<f64>,
83 #[serde(skip_serializing_if = "Option::is_none")]
86 mentions_warning: Option<String>,
87 #[serde(skip_serializing_if = "Option::is_none")]
90 top_relation: Option<String>,
91 #[serde(skip_serializing_if = "Option::is_none")]
94 top_relation_ratio: Option<f64>,
95 #[serde(skip_serializing_if = "Option::is_none")]
98 applies_to_ratio: Option<f64>,
99 #[serde(skip_serializing_if = "Option::is_none")]
102 relation_concentration_warning: Option<String>,
103 checks: Vec<HealthCheck>,
104 elapsed_ms: u64,
105}
106
107fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
109 conn.query_row(
110 "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
111 rusqlite::params![table_name],
112 |r| r.get::<_, i64>(0),
113 )
114 .unwrap_or(0)
115 > 0
116}
117
118pub fn run(args: HealthArgs) -> Result<(), AppError> {
119 let start = Instant::now();
120 let _ = args.json; let _ = args.format; let paths = AppPaths::resolve(args.db.as_deref())?;
123
124 crate::storage::connection::ensure_db_ready(&paths)?;
125
126 let conn = open_ro(&paths.db)?;
127
128 let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
129 let integrity_ok = integrity == "ok";
130 tracing::info!(integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
131
132 if !integrity_ok {
133 let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
134 output::emit_json(&HealthResponse {
135 status: "degraded".to_string(),
136 integrity: integrity.clone(),
137 integrity_ok: false,
138 schema_ok: false,
139 vec_memories_ok: false,
140 vec_memories_missing: 0,
141 vec_memories_orphaned: 0,
142 vec_entities_ok: false,
143 vec_chunks_ok: false,
144 fts_ok: false,
145 fts_query_ok: false,
146 model_ok: false,
147 counts: HealthCounts {
148 memories: 0,
149 memories_total: 0,
150 entities: 0,
151 relationships: 0,
152 vec_memories: 0,
153 },
154 db_path: paths.db.display().to_string(),
155 db_size_bytes,
156 schema_version: 0,
157 sqlite_version: "unknown".to_string(),
158 missing_entities: vec![],
159 wal_size_mb: 0.0,
160 journal_mode: "unknown".to_string(),
161 mentions_ratio: None,
162 mentions_warning: None,
163 top_relation: None,
164 top_relation_ratio: None,
165 applies_to_ratio: None,
166 relation_concentration_warning: None,
167 checks: vec![HealthCheck {
168 name: "integrity".to_string(),
169 ok: false,
170 detail: Some(integrity),
171 }],
172 elapsed_ms: start.elapsed().as_millis() as u64,
173 })?;
174 return Err(AppError::Database(rusqlite::Error::SqliteFailure(
175 rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
176 Some("integrity check failed".to_string()),
177 )));
178 }
179
180 let memories_count: i64 = conn.query_row(
181 "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
182 [],
183 |r| r.get(0),
184 )?;
185 let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
186 let relationships_count: i64 =
187 conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
188 let vec_memories_count: i64 =
189 conn.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))?;
190
191 let mentions_count: i64 = conn.query_row(
192 "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
193 [],
194 |r| r.get(0),
195 )?;
196 let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
197 let ratio = mentions_count as f64 / relationships_count as f64;
198 let warning = if ratio > 0.5 {
199 Some(format!(
200 "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
201 ratio * 100.0,
202 mentions_count,
203 relationships_count
204 ))
205 } else {
206 None
207 };
208 (Some(ratio), warning)
209 } else {
210 (None, None)
211 };
212
213 let (top_relation, top_relation_ratio, applies_to_ratio, relation_concentration_warning) =
215 if relationships_count > 0 {
216 let (top_rel, top_count): (String, i64) = conn
218 .query_row(
219 "SELECT relation, COUNT(*) AS cnt
220 FROM relationships
221 GROUP BY relation
222 ORDER BY cnt DESC
223 LIMIT 1",
224 [],
225 |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
226 )
227 .unwrap_or_else(|_| ("unknown".to_string(), 0));
228
229 let top_ratio = top_count as f64 / relationships_count as f64;
230
231 let applies_count: i64 = conn
233 .query_row(
234 "SELECT COUNT(*) FROM relationships WHERE relation = 'applies_to'",
235 [],
236 |r| r.get(0),
237 )
238 .unwrap_or(0);
239 let at_ratio = if applies_count > 0 {
240 Some(applies_count as f64 / relationships_count as f64)
241 } else {
242 None
243 };
244
245 let concentration_warning = if top_ratio > 0.40 {
246 Some(format!(
247 "relation '{}' dominates graph at {:.1}% ({}/{} total); consider running prune-relations --relation {} --dry-run",
248 top_rel,
249 top_ratio * 100.0,
250 top_count,
251 relationships_count,
252 top_rel,
253 ))
254 } else {
255 None
256 };
257
258 (
259 Some(top_rel),
260 Some(top_ratio),
261 at_ratio,
262 concentration_warning,
263 )
264 } else {
265 (None, None, None, None)
266 };
267
268 let status = "ok";
269
270 let schema_version: u32 = conn
271 .query_row(
272 "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
273 [],
274 |r| r.get::<_, i64>(0),
275 )
276 .unwrap_or(0) as u32;
277
278 let schema_ok = schema_version > 0;
279
280 let vec_memories_ok = table_exists(&conn, "vec_memories");
282 let vec_entities_ok = table_exists(&conn, "vec_entities");
283 let vec_chunks_ok = table_exists(&conn, "vec_chunks");
284
285 let vec_memories_missing: i64 = if vec_memories_ok {
286 conn.query_row(
287 "SELECT COUNT(*) FROM memories m LEFT JOIN vec_memories v ON v.memory_id = m.id WHERE v.memory_id IS NULL AND m.deleted_at IS NULL",
288 [], |r| r.get(0),
289 ).unwrap_or(0)
290 } else {
291 0
292 };
293
294 let vec_memories_orphaned: i64 = if vec_memories_ok {
295 conn.query_row(
296 "SELECT COUNT(*) FROM vec_memories v LEFT JOIN memories m ON m.id = v.memory_id WHERE m.id IS NULL",
297 [], |r| r.get(0),
298 ).unwrap_or(0)
299 } else {
300 0
301 };
302
303 tracing::info!(vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, vec_missing = vec_memories_missing, vec_orphaned = vec_memories_orphaned, "vector table checks complete");
304 let fts_ok = table_exists(&conn, "fts_memories");
305
306 let fts_query_ok = if fts_ok {
308 conn.query_row(
309 "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
310 [],
311 |r| r.get::<_, i64>(0),
312 )
313 .is_ok()
314 } else {
315 false
316 };
317
318 tracing::info!(fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
319
320 let sqlite_version: String = conn
322 .query_row("SELECT sqlite_version()", [], |r| r.get(0))
323 .unwrap_or_else(|_| "unknown".to_string());
324
325 let mut missing_entities: Vec<String> = Vec::new();
327 let mut stmt = conn.prepare(
328 "SELECT DISTINCT me.entity_id
329 FROM memory_entities me
330 LEFT JOIN entities e ON e.id = me.entity_id
331 WHERE e.id IS NULL",
332 )?;
333 let orphans: Vec<i64> = stmt
334 .query_map([], |r| r.get(0))?
335 .collect::<Result<Vec<_>, _>>()?;
336 for id in orphans {
337 missing_entities.push(format!("entity_id={id}"));
338 }
339
340 let journal_mode: String = conn
341 .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
342 .unwrap_or_else(|_| "unknown".to_string());
343
344 let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
345 .map(|m| m.len() as f64 / 1024.0 / 1024.0)
346 .unwrap_or(0.0);
347
348 let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
350
351 let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
353 let model_ok = model_dir.exists();
354 tracing::info!(model_ok = %model_ok, "embedding model check complete");
355
356 let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
358
359 checks.push(HealthCheck {
361 name: "integrity".to_string(),
362 ok: true,
363 detail: None,
364 });
365
366 checks.push(HealthCheck {
367 name: "schema_version".to_string(),
368 ok: schema_ok,
369 detail: if schema_ok {
370 None
371 } else {
372 Some(format!("schema_version={schema_version} (expected >0)"))
373 },
374 });
375
376 checks.push(HealthCheck {
377 name: "vec_memories".to_string(),
378 ok: vec_memories_ok,
379 detail: if vec_memories_ok {
380 None
381 } else {
382 Some("vec_memories table missing from sqlite_master".to_string())
383 },
384 });
385
386 checks.push(HealthCheck {
387 name: "vec_entities".to_string(),
388 ok: vec_entities_ok,
389 detail: if vec_entities_ok {
390 None
391 } else {
392 Some("vec_entities table missing from sqlite_master".to_string())
393 },
394 });
395
396 checks.push(HealthCheck {
397 name: "vec_chunks".to_string(),
398 ok: vec_chunks_ok,
399 detail: if vec_chunks_ok {
400 None
401 } else {
402 Some("vec_chunks table missing from sqlite_master".to_string())
403 },
404 });
405
406 checks.push(HealthCheck {
407 name: "fts_memories".to_string(),
408 ok: fts_ok,
409 detail: if fts_ok {
410 None
411 } else {
412 Some("fts_memories table missing from sqlite_master".to_string())
413 },
414 });
415
416 checks.push(HealthCheck {
417 name: "fts_query".to_string(),
418 ok: fts_query_ok,
419 detail: if fts_query_ok {
420 None
421 } else {
422 Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
423 },
424 });
425
426 checks.push(HealthCheck {
427 name: "model_onnx".to_string(),
428 ok: model_ok,
429 detail: if model_ok {
430 None
431 } else {
432 Some(format!(
433 "model missing at {}; run 'sqlite-graphrag models download'",
434 model_dir.display()
435 ))
436 },
437 });
438
439 let response = HealthResponse {
440 status: status.to_string(),
441 integrity,
442 integrity_ok,
443 schema_ok,
444 vec_memories_ok,
445 vec_memories_missing,
446 vec_memories_orphaned,
447 vec_entities_ok,
448 vec_chunks_ok,
449 fts_ok,
450 fts_query_ok,
451 model_ok,
452 counts: HealthCounts {
453 memories: memories_count,
454 memories_total: memories_count,
455 entities: entities_count,
456 relationships: relationships_count,
457 vec_memories: vec_memories_count,
458 },
459 db_path: paths.db.display().to_string(),
460 db_size_bytes,
461 schema_version,
462 sqlite_version,
463 missing_entities,
464 wal_size_mb,
465 journal_mode,
466 mentions_ratio,
467 mentions_warning,
468 top_relation,
469 top_relation_ratio,
470 applies_to_ratio,
471 relation_concentration_warning,
472 checks,
473 elapsed_ms: start.elapsed().as_millis() as u64,
474 };
475
476 output::emit_json(&response)?;
477
478 Ok(())
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn health_check_serializes_all_new_fields() {
487 let response = HealthResponse {
488 status: "ok".to_string(),
489 integrity: "ok".to_string(),
490 integrity_ok: true,
491 schema_ok: true,
492 vec_memories_ok: true,
493 vec_memories_missing: 0,
494 vec_memories_orphaned: 0,
495 vec_entities_ok: true,
496 vec_chunks_ok: true,
497 fts_ok: true,
498 fts_query_ok: true,
499 model_ok: false,
500 counts: HealthCounts {
501 memories: 5,
502 memories_total: 5,
503 entities: 3,
504 relationships: 2,
505 vec_memories: 5,
506 },
507 db_path: "/tmp/test.sqlite".to_string(),
508 db_size_bytes: 4096,
509 schema_version: 6,
510 sqlite_version: "3.46.0".to_string(),
511 elapsed_ms: 0,
512 missing_entities: vec![],
513 wal_size_mb: 0.0,
514 journal_mode: "wal".to_string(),
515 mentions_ratio: None,
516 mentions_warning: None,
517 top_relation: None,
518 top_relation_ratio: None,
519 applies_to_ratio: None,
520 relation_concentration_warning: None,
521 checks: vec![
522 HealthCheck {
523 name: "integrity".to_string(),
524 ok: true,
525 detail: None,
526 },
527 HealthCheck {
528 name: "model_onnx".to_string(),
529 ok: false,
530 detail: Some("model missing".to_string()),
531 },
532 ],
533 };
534
535 let json = serde_json::to_value(&response).unwrap();
536 assert_eq!(json["status"], "ok");
537 assert_eq!(json["integrity_ok"], true);
538 assert_eq!(json["schema_ok"], true);
539 assert_eq!(json["vec_memories_ok"], true);
540 assert_eq!(json["vec_entities_ok"], true);
541 assert_eq!(json["vec_chunks_ok"], true);
542 assert_eq!(json["fts_ok"], true);
543 assert_eq!(json["model_ok"], false);
544 assert_eq!(json["db_size_bytes"], 4096u64);
545 assert!(json["checks"].is_array());
546 assert_eq!(json["checks"].as_array().unwrap().len(), 2);
547
548 let integrity_check = &json["checks"][0];
550 assert_eq!(integrity_check["name"], "integrity");
551 assert_eq!(integrity_check["ok"], true);
552 assert!(integrity_check.get("detail").is_none());
553
554 let model_check = &json["checks"][1];
556 assert_eq!(model_check["name"], "model_onnx");
557 assert_eq!(model_check["ok"], false);
558 assert_eq!(model_check["detail"], "model missing");
559 }
560
561 #[test]
562 fn health_check_without_detail_omits_field() {
563 let check = HealthCheck {
564 name: "vec_memories".to_string(),
565 ok: true,
566 detail: None,
567 };
568 let json = serde_json::to_value(&check).unwrap();
569 assert!(
570 json.get("detail").is_none(),
571 "detail field must be omitted when None"
572 );
573 }
574
575 #[test]
576 fn health_check_with_detail_serializes_field() {
577 let check = HealthCheck {
578 name: "fts_memories".to_string(),
579 ok: false,
580 detail: Some("fts_memories table missing from sqlite_master".to_string()),
581 };
582 let json = serde_json::to_value(&check).unwrap();
583 assert_eq!(
584 json["detail"],
585 "fts_memories table missing from sqlite_master"
586 );
587 }
588
589 #[test]
590 fn health_response_fts_query_ok_and_sqlite_version_serialize() {
591 let response = HealthResponse {
594 status: "ok".to_string(),
595 integrity: "ok".to_string(),
596 integrity_ok: true,
597 schema_ok: true,
598 vec_memories_ok: true,
599 vec_memories_missing: 0,
600 vec_memories_orphaned: 0,
601 vec_entities_ok: true,
602 vec_chunks_ok: true,
603 fts_ok: true,
604 fts_query_ok: true,
605 model_ok: true,
606 counts: HealthCounts {
607 memories: 0,
608 memories_total: 0,
609 entities: 0,
610 relationships: 0,
611 vec_memories: 0,
612 },
613 db_path: "/tmp/test.sqlite".to_string(),
614 db_size_bytes: 0,
615 schema_version: 1,
616 sqlite_version: "3.45.1".to_string(),
617 elapsed_ms: 0,
618 missing_entities: vec![],
619 wal_size_mb: 0.0,
620 journal_mode: "wal".to_string(),
621 mentions_ratio: None,
622 mentions_warning: None,
623 top_relation: None,
624 top_relation_ratio: None,
625 applies_to_ratio: None,
626 relation_concentration_warning: None,
627 checks: vec![],
628 };
629
630 let json = serde_json::to_value(&response).unwrap();
631
632 assert_eq!(
634 json["fts_query_ok"], true,
635 "fts_query_ok must be present and true in serialized JSON"
636 );
637
638 assert_eq!(
640 json["sqlite_version"], "3.45.1",
641 "sqlite_version must be present and match the provided string"
642 );
643
644 let check_fail = HealthCheck {
646 name: "fts_query".to_string(),
647 ok: false,
648 detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
649 };
650 let check_json = serde_json::to_value(&check_fail).unwrap();
651 assert_eq!(check_json["name"], "fts_query");
652 assert_eq!(check_json["ok"], false);
653 assert_eq!(
654 check_json["detail"],
655 "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
656 );
657 }
658
659 fn make_full_response(
660 top_relation: Option<String>,
661 top_relation_ratio: Option<f64>,
662 applies_to_ratio: Option<f64>,
663 relation_concentration_warning: Option<String>,
664 ) -> HealthResponse {
665 HealthResponse {
666 status: "ok".to_string(),
667 integrity: "ok".to_string(),
668 integrity_ok: true,
669 schema_ok: true,
670 vec_memories_ok: true,
671 vec_memories_missing: 0,
672 vec_memories_orphaned: 0,
673 vec_entities_ok: true,
674 vec_chunks_ok: true,
675 fts_ok: true,
676 fts_query_ok: true,
677 model_ok: true,
678 counts: HealthCounts {
679 memories: 10,
680 memories_total: 10,
681 entities: 5,
682 relationships: 20,
683 vec_memories: 10,
684 },
685 db_path: "/tmp/test.sqlite".to_string(),
686 db_size_bytes: 8192,
687 schema_version: 3,
688 sqlite_version: "3.46.0".to_string(),
689 elapsed_ms: 1,
690 missing_entities: vec![],
691 wal_size_mb: 0.0,
692 journal_mode: "wal".to_string(),
693 mentions_ratio: None,
694 mentions_warning: None,
695 top_relation,
696 top_relation_ratio,
697 applies_to_ratio,
698 relation_concentration_warning,
699 checks: vec![],
700 }
701 }
702
703 #[test]
704 fn health_concentration_fields_omitted_when_no_relationships() {
705 let resp = make_full_response(None, None, None, None);
707 let json = serde_json::to_value(&resp).unwrap();
708 assert!(
709 json.get("top_relation").is_none(),
710 "top_relation must be omitted when None"
711 );
712 assert!(
713 json.get("top_relation_ratio").is_none(),
714 "top_relation_ratio must be omitted when None"
715 );
716 assert!(
717 json.get("applies_to_ratio").is_none(),
718 "applies_to_ratio must be omitted when None"
719 );
720 assert!(
721 json.get("relation_concentration_warning").is_none(),
722 "relation_concentration_warning must be omitted when None"
723 );
724 }
725
726 #[test]
727 fn health_concentration_fields_present_with_data() {
728 let resp = make_full_response(
729 Some("mentions".to_string()),
730 Some(0.60),
731 Some(0.10),
732 Some("relation 'mentions' dominates graph at 60.0%".to_string()),
733 );
734 let json = serde_json::to_value(&resp).unwrap();
735 assert_eq!(json["top_relation"], "mentions");
736 assert!((json["top_relation_ratio"].as_f64().unwrap() - 0.60).abs() < 1e-9);
737 assert!((json["applies_to_ratio"].as_f64().unwrap() - 0.10).abs() < 1e-9);
738 assert!(json["relation_concentration_warning"]
739 .as_str()
740 .unwrap()
741 .contains("60.0%"));
742 }
743
744 #[test]
745 fn health_concentration_warning_absent_when_ratio_below_threshold() {
746 let resp = make_full_response(Some("uses".to_string()), Some(0.39), None, None);
748 let json = serde_json::to_value(&resp).unwrap();
749 assert_eq!(json["top_relation"], "uses");
750 assert!(
751 json.get("relation_concentration_warning").is_none(),
752 "warning must be absent when ratio <= 0.40"
753 );
754 }
755
756 #[test]
757 fn health_concentration_warning_present_at_threshold() {
758 let resp = make_full_response(
760 Some("depends_on".to_string()),
761 Some(0.41),
762 None,
763 Some("relation 'depends_on' dominates graph at 41.0%".to_string()),
764 );
765 let json = serde_json::to_value(&resp).unwrap();
766 assert!(
767 json["relation_concentration_warning"].is_string(),
768 "warning must be present when top_relation_ratio > 0.40"
769 );
770 }
771
772 #[test]
773 fn health_applies_to_ratio_omitted_when_none() {
774 let resp = make_full_response(Some("related".to_string()), Some(0.30), None, None);
776 let json = serde_json::to_value(&resp).unwrap();
777 assert!(
778 json.get("applies_to_ratio").is_none(),
779 "applies_to_ratio must be omitted when None"
780 );
781 }
782}