1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11const MEMORY_EMBEDDING_TABLES: &[&str] = &["memory_embeddings", "vec_memories"];
12const ENTITY_EMBEDDING_TABLES: &[&str] = &["entity_embeddings", "vec_entities"];
13const CHUNK_EMBEDDING_TABLES: &[&str] = &["chunk_embeddings", "vec_chunks"];
14
15#[derive(clap::Args)]
16#[command(after_long_help = "EXAMPLES:\n \
17 # Check database health (connectivity, integrity, vector index)\n \
18 sqlite-graphrag health\n\n \
19 # Check health of a database at a custom path\n \
20 sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n \
21 # Use SQLITE_GRAPHRAG_DB_PATH env var\n \
22 SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
23pub struct HealthArgs {
24 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
25 pub db: Option<String>,
26 #[arg(long, default_value_t = false)]
28 pub json: bool,
29 #[arg(long, value_parser = ["json", "text"], hide = true)]
31 pub format: Option<String>,
32 #[arg(long)]
36 pub namespace: Option<String>,
37}
38
39#[derive(Serialize, schemars::JsonSchema)]
40pub struct HealthCounts {
41 memories: i64,
42 memories_total: i64,
44 entities: i64,
45 relationships: i64,
46 vec_memories: i64,
47}
48
49#[derive(Serialize, schemars::JsonSchema)]
50pub struct HealthCheck {
51 name: String,
52 ok: bool,
53 #[serde(skip_serializing_if = "Option::is_none")]
54 detail: Option<String>,
55}
56
57#[derive(Serialize, schemars::JsonSchema)]
58pub struct HealthResponse {
59 status: String,
60 #[serde(skip_serializing_if = "Option::is_none")]
62 namespace: Option<String>,
63 integrity: String,
64 integrity_ok: bool,
65 schema_ok: bool,
66 vec_memories_ok: bool,
67 vec_memories_missing: i64,
68 vec_memories_orphaned: i64,
69 vec_entities_ok: bool,
70 vec_entities_missing: i64,
74 vec_chunks_ok: bool,
75 vec_chunks_missing: i64,
77 vec_memories_coverage_pct: f64,
81 vec_entities_coverage_pct: f64,
82 vec_chunks_coverage_pct: f64,
83 fts_ok: bool,
84 fts_query_ok: bool,
86 model_ok: bool,
87 counts: HealthCounts,
88 db_path: String,
89 db_size_bytes: u64,
90 schema_version: u32,
94 missing_entities: Vec<String>,
97 wal_size_mb: f64,
99 journal_mode: String,
101 sqlite_version: String,
103 #[serde(skip_serializing_if = "Option::is_none")]
106 mentions_ratio: Option<f64>,
107 #[serde(skip_serializing_if = "Option::is_none")]
110 mentions_warning: Option<String>,
111 #[serde(skip_serializing_if = "Option::is_none")]
114 top_relation: Option<String>,
115 #[serde(skip_serializing_if = "Option::is_none")]
118 top_relation_ratio: Option<f64>,
119 #[serde(skip_serializing_if = "Option::is_none")]
122 applies_to_ratio: Option<f64>,
123 #[serde(skip_serializing_if = "Option::is_none")]
126 relation_concentration_warning: Option<String>,
127 #[serde(skip_serializing_if = "Option::is_none")]
129 non_normalized_count: Option<i64>,
130 #[serde(skip_serializing_if = "Option::is_none")]
132 normalization_warning: Option<String>,
133 #[serde(skip_serializing_if = "Option::is_none")]
135 super_hub_count: Option<i64>,
136 #[serde(skip_serializing_if = "Option::is_none")]
138 super_hub_warning: Option<String>,
139 #[serde(skip_serializing_if = "Option::is_none")]
142 top_hub_entity: Option<String>,
143 #[serde(skip_serializing_if = "Option::is_none")]
146 top_hub_degree: Option<i64>,
147 #[serde(skip_serializing_if = "Option::is_none")]
150 hub_warning: Option<String>,
151 #[serde(skip_serializing_if = "Option::is_none")]
153 llm_slots_total: Option<u32>,
154 #[serde(skip_serializing_if = "Option::is_none")]
156 llm_slots_occupied: Option<u32>,
157 #[serde(skip_serializing_if = "Option::is_none")]
159 llm_slots_stale: Option<u32>,
160 checks: Vec<HealthCheck>,
161 elapsed_ms: u64,
162}
163
164fn llm_slot_info() -> (u32, u32, u32) {
165 let max = crate::llm_slots::default_max_concurrency();
166 let status = crate::llm_slots::read_status(max);
167 let stale = crate::llm_slots::find_stale_slots(max);
168 (status.max, status.active, stale.len() as u32)
169}
170
171fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
173 conn.query_row(
174 "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
175 rusqlite::params![table_name],
176 |r| r.get::<_, i64>(0),
177 )
178 .unwrap_or(0)
179 > 0
180}
181
182fn first_existing_table<'a>(
183 conn: &rusqlite::Connection,
184 candidates: &'a [&'a str],
185) -> Option<&'a str> {
186 candidates
187 .iter()
188 .copied()
189 .find(|name| table_exists(conn, name))
190}
191
192fn count_rows(conn: &rusqlite::Connection, table_name: &str) -> i64 {
193 conn.query_row(&format!("SELECT COUNT(*) FROM {table_name}"), [], |r| {
194 r.get(0)
195 })
196 .unwrap_or(0)
197}
198
199fn memory_embedding_health(conn: &rusqlite::Connection) -> (bool, i64, i64, i64) {
200 let Some(table_name) = first_existing_table(conn, MEMORY_EMBEDDING_TABLES) else {
201 return (false, 0, 0, 0);
202 };
203
204 let total = count_rows(conn, table_name);
205 let missing = conn
206 .query_row(
207 &format!(
208 "SELECT COUNT(*)
209 FROM memories m
210 LEFT JOIN {table_name} me ON me.memory_id = m.id
211 WHERE me.memory_id IS NULL AND m.deleted_at IS NULL"
212 ),
213 [],
214 |r| r.get(0),
215 )
216 .unwrap_or(0);
217 let orphaned = conn
218 .query_row(
219 &format!(
220 "SELECT COUNT(*)
221 FROM {table_name} me
222 LEFT JOIN memories m ON m.id = me.memory_id
223 WHERE m.id IS NULL OR m.deleted_at IS NOT NULL"
224 ),
225 [],
226 |r| r.get(0),
227 )
228 .unwrap_or(0);
229
230 (true, total, missing, orphaned)
231}
232
233fn entity_embedding_health(conn: &rusqlite::Connection) -> (bool, i64) {
239 let Some(table_name) = first_existing_table(conn, ENTITY_EMBEDDING_TABLES) else {
240 return (false, 0);
241 };
242 let missing = conn
243 .query_row(
244 &format!(
245 "SELECT COUNT(*)
246 FROM entities e
247 LEFT JOIN {table_name} ee ON ee.entity_id = e.id
248 WHERE ee.entity_id IS NULL"
249 ),
250 [],
251 |r| r.get(0),
252 )
253 .unwrap_or(0);
254 (true, missing)
255}
256
257fn chunk_embedding_health(conn: &rusqlite::Connection) -> (bool, i64) {
261 let Some(table_name) = first_existing_table(conn, CHUNK_EMBEDDING_TABLES) else {
262 return (false, 0);
263 };
264 let missing = conn
265 .query_row(
266 &format!(
267 "SELECT COUNT(*)
268 FROM memory_chunks c
269 LEFT JOIN {table_name} ce ON ce.chunk_id = c.id
270 WHERE ce.chunk_id IS NULL"
271 ),
272 [],
273 |r| r.get(0),
274 )
275 .unwrap_or(0);
276 (true, missing)
277}
278
279fn coverage_pct(table_ok: bool, total: i64, missing: i64) -> f64 {
283 if total <= 0 {
284 return 100.0;
285 }
286 if !table_ok {
287 return 0.0;
288 }
289 let covered = (total - missing).max(0) as f64;
290 (covered / total as f64) * 100.0
291}
292
293pub fn run(args: HealthArgs) -> Result<(), AppError> {
294 let start = Instant::now();
295 let _ = args.json; let _ = args.format; let paths = AppPaths::resolve(args.db.as_deref())?;
298 let namespace_filter = match args.namespace.as_deref() {
301 Some(ns) => Some(crate::namespace::resolve_namespace(Some(ns))?),
302 None => None,
303 };
304
305 if !paths.db.exists() {
309 let msg = format!(
310 "database not found at {}; `health` does not auto-create the database — \
311 run `sqlite-graphrag init --db {}` first or pass an existing path",
312 paths.db.display(),
313 paths.db.display(),
314 );
315 tracing::warn!(target: "health", db_path = %paths.db.display(), "database path does not exist; refusing to bootstrap");
316 output::emit_json(&serde_json::json!({
317 "error": true,
318 "code": 4,
319 "message": msg,
320 "db_path": paths.db.display().to_string(),
321 }))?;
322 return Err(AppError::NotFound(msg));
323 }
324
325 let conn = open_ro(&paths.db)?;
326
327 let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
328 let integrity_ok = integrity == "ok";
329 tracing::info!(target: "health", integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
330
331 if !integrity_ok {
332 let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
333 output::emit_json(&HealthResponse {
334 status: "degraded".to_string(),
335 namespace: None,
336 integrity: integrity.clone(),
337 integrity_ok: false,
338 schema_ok: false,
339 vec_memories_ok: false,
340 vec_memories_missing: 0,
341 vec_memories_orphaned: 0,
342 vec_entities_ok: false,
343 vec_entities_missing: 0,
344 vec_chunks_ok: false,
345 vec_chunks_missing: 0,
346 vec_memories_coverage_pct: 0.0,
347 vec_entities_coverage_pct: 0.0,
348 vec_chunks_coverage_pct: 0.0,
349 fts_ok: false,
350 fts_query_ok: false,
351 model_ok: false,
352 counts: HealthCounts {
353 memories: 0,
354 memories_total: 0,
355 entities: 0,
356 relationships: 0,
357 vec_memories: 0,
358 },
359 db_path: paths.db.display().to_string(),
360 db_size_bytes,
361 schema_version: 0,
362 sqlite_version: "unknown".to_string(),
363 missing_entities: vec![],
364 wal_size_mb: 0.0,
365 journal_mode: "unknown".to_string(),
366 mentions_ratio: None,
367 mentions_warning: None,
368 top_relation: None,
369 top_relation_ratio: None,
370 applies_to_ratio: None,
371 relation_concentration_warning: None,
372 non_normalized_count: None,
373 normalization_warning: None,
374 super_hub_count: None,
375 super_hub_warning: None,
376 top_hub_entity: None,
377 top_hub_degree: None,
378 hub_warning: None,
379 llm_slots_total: None,
380 llm_slots_occupied: None,
381 llm_slots_stale: None,
382 checks: vec![HealthCheck {
383 name: "integrity".to_string(),
384 ok: false,
385 detail: Some(integrity),
386 }],
387 elapsed_ms: start.elapsed().as_millis() as u64,
388 })?;
389 return Err(AppError::Database(rusqlite::Error::SqliteFailure(
390 rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
391 Some("integrity check failed".to_string()),
392 )));
393 }
394
395 let memories_count: i64 = match &namespace_filter {
397 Some(ns) => conn.query_row(
398 "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL AND namespace = ?1",
399 rusqlite::params![ns],
400 |r| r.get(0),
401 )?,
402 None => conn.query_row(
403 "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
404 [],
405 |r| r.get(0),
406 )?,
407 };
408 let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
409 let relationships_count: i64 =
410 conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
411 let (vec_memories_ok, vec_memories_count, vec_memories_missing, vec_memories_orphaned) =
412 memory_embedding_health(&conn);
413
414 let mentions_count: i64 = conn.query_row(
415 "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
416 [],
417 |r| r.get(0),
418 )?;
419 let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
420 let ratio = mentions_count as f64 / relationships_count as f64;
421 let warning = if ratio > 0.5 {
422 Some(format!(
423 "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
424 ratio * 100.0,
425 mentions_count,
426 relationships_count
427 ))
428 } else {
429 None
430 };
431 (Some(ratio), warning)
432 } else {
433 (None, None)
434 };
435
436 let (top_relation, top_relation_ratio, applies_to_ratio, relation_concentration_warning) =
438 if relationships_count > 0 {
439 let (top_rel, top_count): (String, i64) = conn
441 .query_row(
442 "SELECT relation, COUNT(*) AS cnt
443 FROM relationships
444 GROUP BY relation
445 ORDER BY cnt DESC
446 LIMIT 1",
447 [],
448 |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
449 )
450 .unwrap_or_else(|_| ("unknown".to_string(), 0));
451
452 let top_ratio = top_count as f64 / relationships_count as f64;
453
454 let applies_count: i64 = conn
456 .query_row(
457 "SELECT COUNT(*) FROM relationships WHERE relation = 'applies_to'",
458 [],
459 |r| r.get(0),
460 )
461 .unwrap_or(0);
462 let at_ratio = if applies_count > 0 {
463 Some(applies_count as f64 / relationships_count as f64)
464 } else {
465 None
466 };
467
468 let concentration_warning = if top_ratio > 0.40 {
469 Some(format!(
470 "relation '{}' dominates graph at {:.1}% ({}/{} total); consider running prune-relations --relation {} --dry-run",
471 top_rel,
472 top_ratio * 100.0,
473 top_count,
474 relationships_count,
475 top_rel,
476 ))
477 } else {
478 None
479 };
480
481 (
482 Some(top_rel),
483 Some(top_ratio),
484 at_ratio,
485 concentration_warning,
486 )
487 } else {
488 (None, None, None, None)
489 };
490
491 let status = "ok";
492
493 let schema_version: u32 = conn
494 .query_row(
495 "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
496 [],
497 |r| r.get::<_, i64>(0),
498 )
499 .unwrap_or(0) as u32;
500
501 let schema_ok = schema_version > 0;
502
503 let (vec_entities_ok, vec_entities_missing) = entity_embedding_health(&conn);
506 let (vec_chunks_ok, vec_chunks_missing) = chunk_embedding_health(&conn);
507
508 let memories_total_global: i64 = conn.query_row(
511 "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
512 [],
513 |r| r.get(0),
514 )?;
515 let chunks_total: i64 = conn
516 .query_row("SELECT COUNT(*) FROM memory_chunks", [], |r| r.get(0))
517 .unwrap_or(0);
518 let vec_memories_coverage_pct =
519 coverage_pct(vec_memories_ok, memories_total_global, vec_memories_missing);
520 let vec_entities_coverage_pct =
521 coverage_pct(vec_entities_ok, entities_count, vec_entities_missing);
522 let vec_chunks_coverage_pct = coverage_pct(vec_chunks_ok, chunks_total, vec_chunks_missing);
523
524 tracing::info!(target: "health", vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, vec_missing = vec_memories_missing, vec_orphaned = vec_memories_orphaned, "vector table checks complete");
525 let fts_ok = table_exists(&conn, "fts_memories");
526
527 let fts_query_ok = if fts_ok {
529 conn.query_row(
530 "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
531 [],
532 |r| r.get::<_, i64>(0),
533 )
534 .is_ok()
535 } else {
536 false
537 };
538
539 tracing::info!(target: "health", fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
540
541 let sqlite_version: String = conn
543 .query_row("SELECT sqlite_version()", [], |r| r.get(0))
544 .unwrap_or_else(|_| "unknown".to_string());
545
546 let mut missing_entities: Vec<String> = Vec::with_capacity(4);
548 let mut stmt = conn.prepare_cached(
549 "SELECT DISTINCT me.entity_id
550 FROM memory_entities me
551 LEFT JOIN entities e ON e.id = me.entity_id
552 WHERE e.id IS NULL",
553 )?;
554 let orphans: Vec<i64> = stmt
555 .query_map([], |r| r.get(0))?
556 .collect::<Result<Vec<_>, _>>()?;
557 for id in orphans {
558 missing_entities.push(format!("entity_id={id}"));
559 }
560
561 let journal_mode: String = conn
562 .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
563 .unwrap_or_else(|_| "unknown".to_string());
564
565 let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
566 .map(|m| m.len() as f64 / 1024.0 / 1024.0)
567 .unwrap_or(0.0);
568
569 let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
571
572 let model_ok = crate::commands::ingest_claude::find_claude_binary(None).is_ok()
576 || crate::commands::ingest_codex::find_codex_binary(None).is_ok();
577 tracing::info!(target: "health", model_ok = %model_ok, "LLM CLI availability check complete");
578
579 let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
581
582 checks.push(HealthCheck {
584 name: "integrity".to_string(),
585 ok: true,
586 detail: None,
587 });
588
589 checks.push(HealthCheck {
590 name: "schema_version".to_string(),
591 ok: schema_ok,
592 detail: if schema_ok {
593 None
594 } else {
595 Some(format!("schema_version={schema_version} (expected >0)"))
596 },
597 });
598
599 checks.push(HealthCheck {
600 name: "vec_memories".to_string(),
601 ok: vec_memories_ok,
602 detail: if vec_memories_ok {
603 None
604 } else {
605 Some("memory_embeddings/vec_memories table missing from sqlite_master".to_string())
606 },
607 });
608
609 checks.push(HealthCheck {
610 name: "vec_entities".to_string(),
611 ok: vec_entities_ok,
612 detail: if vec_entities_ok {
613 None
614 } else {
615 Some("entity_embeddings/vec_entities table missing from sqlite_master".to_string())
616 },
617 });
618
619 checks.push(HealthCheck {
620 name: "vec_chunks".to_string(),
621 ok: vec_chunks_ok,
622 detail: if vec_chunks_ok {
623 None
624 } else {
625 Some("chunk_embeddings/vec_chunks table missing from sqlite_master".to_string())
626 },
627 });
628
629 checks.push(HealthCheck {
630 name: "fts_memories".to_string(),
631 ok: fts_ok,
632 detail: if fts_ok {
633 None
634 } else {
635 Some("fts_memories table missing from sqlite_master".to_string())
636 },
637 });
638
639 checks.push(HealthCheck {
640 name: "fts_query".to_string(),
641 ok: fts_query_ok,
642 detail: if fts_query_ok {
643 None
644 } else {
645 Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
646 },
647 });
648
649 checks.push(HealthCheck {
650 name: "llm_cli".to_string(),
651 ok: model_ok,
652 detail: if model_ok {
653 None
654 } else {
655 Some(
656 "no LLM CLI found on PATH; install 'claude' (Claude Code) or 'codex' \
657 (Codex CLI) — required for embedding generation since v1.0.76"
658 .to_string(),
659 )
660 },
661 });
662
663 let (non_normalized_count, normalization_warning) = {
665 let mut stmt = conn.prepare_cached("SELECT name FROM entities")?;
666 let names: Vec<String> = stmt
667 .query_map([], |r| r.get(0))?
668 .filter_map(|r| r.ok())
669 .collect();
670 let count = names
671 .iter()
672 .filter(|n| crate::parsers::normalize_entity_name(n) != **n)
673 .count() as i64;
674 let warning = if count > 0 {
675 Some(format!(
676 "run 'normalize-entities --yes' to fix {count} non-normalized entities"
677 ))
678 } else {
679 None
680 };
681 (Some(count), warning)
682 };
683
684 let (super_hub_count, super_hub_warning) = {
686 let mut stmt = conn.prepare_cached(
687 "SELECT e.name, COUNT(r.id) as deg FROM entities e \
688 LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id \
689 GROUP BY e.id HAVING deg > 50 ORDER BY deg DESC LIMIT 5",
690 )?;
691 let hubs: Vec<(String, i64)> = stmt
692 .query_map([], |r| Ok((r.get(0)?, r.get(1)?)))?
693 .filter_map(|r| r.ok())
694 .collect();
695 let count = hubs.len() as i64;
696 let warning = if count > 0 {
697 let names: Vec<String> = hubs
698 .iter()
699 .map(|(n, d)| format!("{n} (degree {d})"))
700 .collect();
701 Some(format!("super-hubs detected: {}", names.join(", ")))
702 } else {
703 None
704 };
705 (Some(count), warning)
706 };
707
708 let (top_hub_entity, top_hub_degree, hub_warning) = {
710 let result: Option<(String, i64)> = conn
711 .query_row(
712 "SELECT e.name, COUNT(r.id) AS degree
713 FROM entities e
714 LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id
715 GROUP BY e.id
716 ORDER BY degree DESC
717 LIMIT 1",
718 [],
719 |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
720 )
721 .ok();
722 match result {
723 Some((name, degree)) => {
724 let warning = if degree > 50 {
725 Some(format!(
726 "entity '{name}' has {degree} connections; consider splitting or using --max-neighbors-per-hop"
727 ))
728 } else {
729 None
730 };
731 (Some(name), Some(degree), warning)
732 }
733 None => (None, None, None),
734 }
735 };
736
737 let llm_slots = llm_slot_info();
738 let response = HealthResponse {
739 status: status.to_string(),
740 namespace: namespace_filter.clone(),
741 integrity,
742 integrity_ok,
743 schema_ok,
744 vec_memories_ok,
745 vec_memories_missing,
746 vec_memories_orphaned,
747 vec_entities_ok,
748 vec_entities_missing,
749 vec_chunks_ok,
750 vec_chunks_missing,
751 vec_memories_coverage_pct,
752 vec_entities_coverage_pct,
753 vec_chunks_coverage_pct,
754 fts_ok,
755 fts_query_ok,
756 model_ok,
757 counts: HealthCounts {
758 memories: memories_count,
759 memories_total: memories_count,
760 entities: entities_count,
761 relationships: relationships_count,
762 vec_memories: vec_memories_count,
763 },
764 db_path: paths.db.display().to_string(),
765 db_size_bytes,
766 schema_version,
767 sqlite_version,
768 missing_entities,
769 wal_size_mb,
770 journal_mode,
771 mentions_ratio,
772 mentions_warning,
773 top_relation,
774 top_relation_ratio,
775 applies_to_ratio,
776 relation_concentration_warning,
777 non_normalized_count,
778 normalization_warning,
779 super_hub_count,
780 super_hub_warning,
781 top_hub_entity,
782 top_hub_degree,
783 hub_warning,
784 llm_slots_total: Some(llm_slots.0),
785 llm_slots_occupied: Some(llm_slots.1),
786 llm_slots_stale: Some(llm_slots.2),
787 checks,
788 elapsed_ms: start.elapsed().as_millis() as u64,
789 };
790
791 output::emit_json(&response)?;
792
793 Ok(())
794}
795
796#[cfg(test)]
797mod tests {
798 use super::*;
799 use rusqlite::Connection;
800
801 fn open_health_test_db() -> Connection {
802 let conn = Connection::open_in_memory().unwrap();
803 conn.execute_batch(
804 "CREATE TABLE memories (
805 id INTEGER PRIMARY KEY,
806 deleted_at INTEGER
807 );
808 CREATE TABLE memory_embeddings (
809 memory_id INTEGER PRIMARY KEY,
810 namespace TEXT NOT NULL,
811 embedding BLOB NOT NULL,
812 source TEXT NOT NULL,
813 model TEXT NOT NULL,
814 dim INTEGER NOT NULL DEFAULT 384,
815 created_at TEXT NOT NULL DEFAULT '0'
816 );
817 CREATE TABLE vec_memories (
818 memory_id INTEGER PRIMARY KEY,
819 embedding BLOB NOT NULL,
820 created_at INTEGER NOT NULL DEFAULT 0
821 );",
822 )
823 .unwrap();
824 conn
825 }
826
827 #[test]
828 fn memory_embedding_health_prefers_memory_embeddings_and_counts_soft_deleted_as_orphaned() {
829 let conn = open_health_test_db();
830 conn.execute("INSERT INTO memories (id, deleted_at) VALUES (1, NULL)", [])
831 .unwrap();
832 conn.execute("INSERT INTO memories (id, deleted_at) VALUES (2, NULL)", [])
833 .unwrap();
834 conn.execute("INSERT INTO memories (id, deleted_at) VALUES (3, 123)", [])
835 .unwrap();
836 conn.execute(
837 "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
838 VALUES (1, 'global', X'00', 'llm', 'm', 384, '1')",
839 [],
840 )
841 .unwrap();
842 conn.execute(
843 "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
844 VALUES (3, 'global', X'00', 'llm', 'm', 384, '2')",
845 [],
846 )
847 .unwrap();
848 conn.execute(
849 "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
850 VALUES (99, 'global', X'00', 'llm', 'm', 384, '3')",
851 [],
852 )
853 .unwrap();
854 conn.execute(
855 "INSERT INTO vec_memories(memory_id, embedding, created_at) VALUES (777, X'00', 0)",
856 [],
857 )
858 .unwrap();
859
860 let (ok, total, missing, orphaned) = memory_embedding_health(&conn);
861 assert!(ok);
862 assert_eq!(total, 3);
863 assert_eq!(missing, 1);
864 assert_eq!(orphaned, 2);
865 }
866
867 #[test]
870 fn entity_and_chunk_embedding_health_count_missing_rows() {
871 let conn = Connection::open_in_memory().unwrap();
872 conn.execute_batch(
873 "CREATE TABLE entities (id INTEGER PRIMARY KEY, name TEXT);
874 CREATE TABLE entity_embeddings (
875 entity_id INTEGER PRIMARY KEY,
876 namespace TEXT NOT NULL,
877 embedding BLOB NOT NULL
878 );
879 CREATE TABLE memory_chunks (id INTEGER PRIMARY KEY, memory_id INTEGER);
880 CREATE TABLE chunk_embeddings (
881 chunk_id INTEGER PRIMARY KEY,
882 memory_id INTEGER NOT NULL,
883 embedding BLOB NOT NULL
884 );",
885 )
886 .unwrap();
887 conn.execute(
888 "INSERT INTO entities (id, name) VALUES (1, 'a'), (2, 'b'), (3, 'c')",
889 [],
890 )
891 .unwrap();
892 conn.execute(
893 "INSERT INTO entity_embeddings (entity_id, namespace, embedding)
894 VALUES (1, 'global', X'00')",
895 [],
896 )
897 .unwrap();
898 conn.execute(
899 "INSERT INTO memory_chunks (id, memory_id) VALUES (10, 1), (11, 1)",
900 [],
901 )
902 .unwrap();
903
904 let (e_ok, e_missing) = entity_embedding_health(&conn);
905 assert!(e_ok, "entity_embeddings exists");
906 assert_eq!(e_missing, 2, "2 of 3 entities lack a vector row");
907 let (c_ok, c_missing) = chunk_embedding_health(&conn);
908 assert!(c_ok, "chunk_embeddings exists");
909 assert_eq!(c_missing, 2, "both chunks lack a vector row");
910 }
911
912 #[test]
913 fn entity_embedding_health_absent_table_reports_not_ok() {
914 let conn = Connection::open_in_memory().unwrap();
915 conn.execute_batch("CREATE TABLE entities (id INTEGER PRIMARY KEY, name TEXT);")
916 .unwrap();
917 let (ok, missing) = entity_embedding_health(&conn);
918 assert!(!ok, "no entity vector table exists");
919 assert_eq!(missing, 0);
920 }
921
922 #[test]
923 fn coverage_pct_boundaries() {
924 assert!((coverage_pct(true, 0, 0) - 100.0).abs() < 1e-9);
925 assert!((coverage_pct(false, 5, 0) - 0.0).abs() < 1e-9);
926 assert!((coverage_pct(true, 4, 1) - 75.0).abs() < 1e-9);
927 assert!((coverage_pct(true, 4, 0) - 100.0).abs() < 1e-9);
928 }
929
930 #[test]
931 fn first_existing_table_falls_back_to_legacy_vec_name() {
932 let conn = Connection::open_in_memory().unwrap();
933 conn.execute_batch(
934 "CREATE TABLE vec_memories (
935 memory_id INTEGER PRIMARY KEY,
936 embedding BLOB NOT NULL,
937 created_at INTEGER NOT NULL DEFAULT 0
938 );",
939 )
940 .unwrap();
941
942 let resolved = first_existing_table(&conn, MEMORY_EMBEDDING_TABLES);
943 assert_eq!(resolved, Some("vec_memories"));
944 }
945
946 #[test]
947 fn health_check_serializes_all_new_fields() {
948 let response = HealthResponse {
949 status: "ok".to_string(),
950 namespace: None,
951 integrity: "ok".to_string(),
952 integrity_ok: true,
953 schema_ok: true,
954 vec_memories_ok: true,
955 vec_memories_missing: 0,
956 vec_memories_orphaned: 0,
957 vec_entities_ok: true,
958 vec_entities_missing: 0,
959 vec_chunks_ok: true,
960 vec_chunks_missing: 0,
961 vec_memories_coverage_pct: 100.0,
962 vec_entities_coverage_pct: 100.0,
963 vec_chunks_coverage_pct: 100.0,
964 fts_ok: true,
965 fts_query_ok: true,
966 model_ok: false,
967 counts: HealthCounts {
968 memories: 5,
969 memories_total: 5,
970 entities: 3,
971 relationships: 2,
972 vec_memories: 5,
973 },
974 db_path: "/tmp/test.sqlite".to_string(),
975 db_size_bytes: 4096,
976 schema_version: 6,
977 sqlite_version: "3.46.0".to_string(),
978 elapsed_ms: 0,
979 missing_entities: vec![],
980 wal_size_mb: 0.0,
981 journal_mode: "wal".to_string(),
982 mentions_ratio: None,
983 mentions_warning: None,
984 top_relation: None,
985 top_relation_ratio: None,
986 applies_to_ratio: None,
987 relation_concentration_warning: None,
988 non_normalized_count: None,
989 normalization_warning: None,
990 super_hub_count: None,
991 super_hub_warning: None,
992 top_hub_entity: None,
993 top_hub_degree: None,
994 hub_warning: None,
995 llm_slots_total: None,
996 llm_slots_occupied: None,
997 llm_slots_stale: None,
998 checks: vec![
999 HealthCheck {
1000 name: "integrity".to_string(),
1001 ok: true,
1002 detail: None,
1003 },
1004 HealthCheck {
1005 name: "model_onnx".to_string(),
1006 ok: false,
1007 detail: Some("model missing".to_string()),
1008 },
1009 ],
1010 };
1011
1012 let json = serde_json::to_value(&response).unwrap();
1013 assert_eq!(json["status"], "ok");
1014 assert_eq!(json["integrity_ok"], true);
1015 assert_eq!(json["schema_ok"], true);
1016 assert_eq!(json["vec_memories_ok"], true);
1017 assert_eq!(json["vec_entities_ok"], true);
1018 assert_eq!(json["vec_chunks_ok"], true);
1019 assert_eq!(json["vec_entities_missing"], 0);
1021 assert_eq!(json["vec_chunks_missing"], 0);
1022 assert!((json["vec_memories_coverage_pct"].as_f64().unwrap() - 100.0).abs() < 1e-9);
1023 assert!((json["vec_entities_coverage_pct"].as_f64().unwrap() - 100.0).abs() < 1e-9);
1024 assert!((json["vec_chunks_coverage_pct"].as_f64().unwrap() - 100.0).abs() < 1e-9);
1025 assert_eq!(json["fts_ok"], true);
1026 assert_eq!(json["model_ok"], false);
1027 assert_eq!(json["db_size_bytes"], 4096u64);
1028 assert!(json["checks"].is_array());
1029 assert_eq!(json["checks"].as_array().unwrap().len(), 2);
1030
1031 let integrity_check = &json["checks"][0];
1033 assert_eq!(integrity_check["name"], "integrity");
1034 assert_eq!(integrity_check["ok"], true);
1035 assert!(integrity_check.get("detail").is_none());
1036
1037 let model_check = &json["checks"][1];
1039 assert_eq!(model_check["name"], "model_onnx");
1040 assert_eq!(model_check["ok"], false);
1041 assert_eq!(model_check["detail"], "model missing");
1042 }
1043
1044 #[test]
1045 fn health_check_without_detail_omits_field() {
1046 let check = HealthCheck {
1047 name: "vec_memories".to_string(),
1048 ok: true,
1049 detail: None,
1050 };
1051 let json = serde_json::to_value(&check).unwrap();
1052 assert!(
1053 json.get("detail").is_none(),
1054 "detail field must be omitted when None"
1055 );
1056 }
1057
1058 #[test]
1059 fn health_check_with_detail_serializes_field() {
1060 let check = HealthCheck {
1061 name: "fts_memories".to_string(),
1062 ok: false,
1063 detail: Some("fts_memories table missing from sqlite_master".to_string()),
1064 };
1065 let json = serde_json::to_value(&check).unwrap();
1066 assert_eq!(
1067 json["detail"],
1068 "fts_memories table missing from sqlite_master"
1069 );
1070 }
1071
1072 #[test]
1073 fn health_response_fts_query_ok_and_sqlite_version_serialize() {
1074 let response = HealthResponse {
1077 status: "ok".to_string(),
1078 namespace: Some("test-ns".to_string()),
1079 integrity: "ok".to_string(),
1080 integrity_ok: true,
1081 schema_ok: true,
1082 vec_memories_ok: true,
1083 vec_memories_missing: 0,
1084 vec_memories_orphaned: 0,
1085 vec_entities_ok: true,
1086 vec_entities_missing: 0,
1087 vec_chunks_ok: true,
1088 vec_chunks_missing: 0,
1089 vec_memories_coverage_pct: 100.0,
1090 vec_entities_coverage_pct: 100.0,
1091 vec_chunks_coverage_pct: 100.0,
1092 fts_ok: true,
1093 fts_query_ok: true,
1094 model_ok: true,
1095 counts: HealthCounts {
1096 memories: 0,
1097 memories_total: 0,
1098 entities: 0,
1099 relationships: 0,
1100 vec_memories: 0,
1101 },
1102 db_path: "/tmp/test.sqlite".to_string(),
1103 db_size_bytes: 0,
1104 schema_version: 1,
1105 sqlite_version: "3.45.1".to_string(),
1106 elapsed_ms: 0,
1107 missing_entities: vec![],
1108 wal_size_mb: 0.0,
1109 journal_mode: "wal".to_string(),
1110 mentions_ratio: None,
1111 mentions_warning: None,
1112 top_relation: None,
1113 top_relation_ratio: None,
1114 applies_to_ratio: None,
1115 relation_concentration_warning: None,
1116 non_normalized_count: None,
1117 normalization_warning: None,
1118 super_hub_count: None,
1119 super_hub_warning: None,
1120 top_hub_entity: None,
1121 top_hub_degree: None,
1122 hub_warning: None,
1123 llm_slots_total: None,
1124 llm_slots_occupied: None,
1125 llm_slots_stale: None,
1126 checks: vec![],
1127 };
1128
1129 let json = serde_json::to_value(&response).unwrap();
1130
1131 assert_eq!(
1133 json["fts_query_ok"], true,
1134 "fts_query_ok must be present and true in serialized JSON"
1135 );
1136
1137 assert_eq!(
1139 json["sqlite_version"], "3.45.1",
1140 "sqlite_version must be present and match the provided string"
1141 );
1142
1143 let check_fail = HealthCheck {
1145 name: "fts_query".to_string(),
1146 ok: false,
1147 detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
1148 };
1149 let check_json = serde_json::to_value(&check_fail).unwrap();
1150 assert_eq!(check_json["name"], "fts_query");
1151 assert_eq!(check_json["ok"], false);
1152 assert_eq!(
1153 check_json["detail"],
1154 "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
1155 );
1156 }
1157
1158 fn make_full_response(
1159 top_relation: Option<String>,
1160 top_relation_ratio: Option<f64>,
1161 applies_to_ratio: Option<f64>,
1162 relation_concentration_warning: Option<String>,
1163 ) -> HealthResponse {
1164 HealthResponse {
1165 status: "ok".to_string(),
1166 namespace: None,
1167 integrity: "ok".to_string(),
1168 integrity_ok: true,
1169 schema_ok: true,
1170 vec_memories_ok: true,
1171 vec_memories_missing: 0,
1172 vec_memories_orphaned: 0,
1173 vec_entities_ok: true,
1174 vec_entities_missing: 0,
1175 vec_chunks_ok: true,
1176 vec_chunks_missing: 0,
1177 vec_memories_coverage_pct: 100.0,
1178 vec_entities_coverage_pct: 100.0,
1179 vec_chunks_coverage_pct: 100.0,
1180 fts_ok: true,
1181 fts_query_ok: true,
1182 model_ok: true,
1183 counts: HealthCounts {
1184 memories: 10,
1185 memories_total: 10,
1186 entities: 5,
1187 relationships: 20,
1188 vec_memories: 10,
1189 },
1190 db_path: "/tmp/test.sqlite".to_string(),
1191 db_size_bytes: 8192,
1192 schema_version: 3,
1193 sqlite_version: "3.46.0".to_string(),
1194 elapsed_ms: 1,
1195 missing_entities: vec![],
1196 wal_size_mb: 0.0,
1197 journal_mode: "wal".to_string(),
1198 mentions_ratio: None,
1199 mentions_warning: None,
1200 top_relation,
1201 top_relation_ratio,
1202 applies_to_ratio,
1203 relation_concentration_warning,
1204 non_normalized_count: None,
1205 normalization_warning: None,
1206 super_hub_count: None,
1207 super_hub_warning: None,
1208 top_hub_entity: None,
1209 top_hub_degree: None,
1210 hub_warning: None,
1211 llm_slots_total: None,
1212 llm_slots_occupied: None,
1213 llm_slots_stale: None,
1214 checks: vec![],
1215 }
1216 }
1217
1218 #[test]
1219 fn health_concentration_fields_omitted_when_no_relationships() {
1220 let resp = make_full_response(None, None, None, None);
1222 let json = serde_json::to_value(&resp).unwrap();
1223 assert!(
1224 json.get("top_relation").is_none(),
1225 "top_relation must be omitted when None"
1226 );
1227 assert!(
1228 json.get("top_relation_ratio").is_none(),
1229 "top_relation_ratio must be omitted when None"
1230 );
1231 assert!(
1232 json.get("applies_to_ratio").is_none(),
1233 "applies_to_ratio must be omitted when None"
1234 );
1235 assert!(
1236 json.get("relation_concentration_warning").is_none(),
1237 "relation_concentration_warning must be omitted when None"
1238 );
1239 }
1240
1241 #[test]
1242 fn health_concentration_fields_present_with_data() {
1243 let resp = make_full_response(
1244 Some("mentions".to_string()),
1245 Some(0.60),
1246 Some(0.10),
1247 Some("relation 'mentions' dominates graph at 60.0%".to_string()),
1248 );
1249 let json = serde_json::to_value(&resp).unwrap();
1250 assert_eq!(json["top_relation"], "mentions");
1251 assert!((json["top_relation_ratio"].as_f64().unwrap() - 0.60).abs() < 1e-9);
1252 assert!((json["applies_to_ratio"].as_f64().unwrap() - 0.10).abs() < 1e-9);
1253 assert!(json["relation_concentration_warning"]
1254 .as_str()
1255 .unwrap()
1256 .contains("60.0%"));
1257 }
1258
1259 #[test]
1260 fn health_concentration_warning_absent_when_ratio_below_threshold() {
1261 let resp = make_full_response(Some("uses".to_string()), Some(0.39), None, None);
1263 let json = serde_json::to_value(&resp).unwrap();
1264 assert_eq!(json["top_relation"], "uses");
1265 assert!(
1266 json.get("relation_concentration_warning").is_none(),
1267 "warning must be absent when ratio <= 0.40"
1268 );
1269 }
1270
1271 #[test]
1272 fn health_concentration_warning_present_at_threshold() {
1273 let resp = make_full_response(
1275 Some("depends_on".to_string()),
1276 Some(0.41),
1277 None,
1278 Some("relation 'depends_on' dominates graph at 41.0%".to_string()),
1279 );
1280 let json = serde_json::to_value(&resp).unwrap();
1281 assert!(
1282 json["relation_concentration_warning"].is_string(),
1283 "warning must be present when top_relation_ratio > 0.40"
1284 );
1285 }
1286
1287 #[test]
1288 fn health_applies_to_ratio_omitted_when_none() {
1289 let resp = make_full_response(Some("related".to_string()), Some(0.30), None, None);
1291 let json = serde_json::to_value(&resp).unwrap();
1292 assert!(
1293 json.get("applies_to_ratio").is_none(),
1294 "applies_to_ratio must be omitted when None"
1295 );
1296 }
1297}