Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11const MEMORY_EMBEDDING_TABLES: &[&str] = &["memory_embeddings", "vec_memories"];
12const ENTITY_EMBEDDING_TABLES: &[&str] = &["entity_embeddings", "vec_entities"];
13const CHUNK_EMBEDDING_TABLES: &[&str] = &["chunk_embeddings", "vec_chunks"];
14
15#[derive(clap::Args)]
16#[command(after_long_help = "EXAMPLES:\n  \
17    # Check database health (connectivity, integrity, vector index)\n  \
18    sqlite-graphrag health\n\n  \
19    # Check health of a database at a custom path\n  \
20    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
21    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
22    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
23pub struct HealthArgs {
24    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
25    pub db: Option<String>,
26    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
27    #[arg(long, default_value_t = false)]
28    pub json: bool,
29    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
30    #[arg(long, value_parser = ["json", "text"], hide = true)]
31    pub format: Option<String>,
32    /// Filter health report counts to a specific namespace.
33    /// When omitted, counts are global (sum across all namespaces).
34    /// Global checks (integrity, schema_version, journal_mode) are always reported.
35    #[arg(long)]
36    pub namespace: Option<String>,
37}
38
39#[derive(Serialize, schemars::JsonSchema)]
40pub struct HealthCounts {
41    memories: i64,
42    /// Alias of `memories` for the documented contract in SKILL.md.
43    memories_total: i64,
44    entities: i64,
45    relationships: i64,
46    vec_memories: i64,
47}
48
49#[derive(Serialize, schemars::JsonSchema)]
50pub struct HealthCheck {
51    name: String,
52    ok: bool,
53    #[serde(skip_serializing_if = "Option::is_none")]
54    detail: Option<String>,
55}
56
57#[derive(Serialize, schemars::JsonSchema)]
58pub struct HealthResponse {
59    status: String,
60    /// Namespace filter applied to the counts. None means global (sum across all namespaces).
61    #[serde(skip_serializing_if = "Option::is_none")]
62    namespace: Option<String>,
63    integrity: String,
64    integrity_ok: bool,
65    schema_ok: bool,
66    vec_memories_ok: bool,
67    vec_memories_missing: i64,
68    vec_memories_orphaned: i64,
69    vec_entities_ok: bool,
70    vec_chunks_ok: bool,
71    fts_ok: bool,
72    /// Whether a live FTS5 MATCH query against fts_memories succeeded.
73    fts_query_ok: bool,
74    model_ok: bool,
75    counts: HealthCounts,
76    db_path: String,
77    db_size_bytes: u64,
78    /// MAX(version) from refinery_schema_history — number of the last applied migration.
79    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
80    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
81    schema_version: u32,
82    /// List of entities referenced by memories but absent from the entities table.
83    /// Empty in a healthy DB. Per the contract documented in SKILL.md.
84    missing_entities: Vec<String>,
85    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
86    wal_size_mb: f64,
87    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
88    journal_mode: String,
89    /// SQLite version string, e.g. `"3.46.0"`.
90    sqlite_version: String,
91    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
92    /// Omitted when there are no relationships in the database.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    mentions_ratio: Option<f64>,
95    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
96    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    mentions_warning: Option<String>,
99    /// The relation type with the highest edge count in the namespace.
100    /// Omitted when there are no relationships in the database.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    top_relation: Option<String>,
103    /// Fraction of all edges occupied by `top_relation` (0.0–1.0).
104    /// Omitted when there are no relationships in the database.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    top_relation_ratio: Option<f64>,
107    /// Fraction of relationships that use the `applies_to` relation type (0.0–1.0).
108    /// Omitted when there are no relationships or when `applies_to` is absent.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    applies_to_ratio: Option<f64>,
111    /// Human-readable warning when a single relation type occupies more than 40 % of edges.
112    /// Omitted when concentration is within acceptable bounds or there are no relationships.
113    #[serde(skip_serializing_if = "Option::is_none")]
114    relation_concentration_warning: Option<String>,
115    /// Number of entities whose name differs from its normalized kebab-case form.
116    #[serde(skip_serializing_if = "Option::is_none")]
117    non_normalized_count: Option<i64>,
118    /// Warning when non-normalized entities are detected.
119    #[serde(skip_serializing_if = "Option::is_none")]
120    normalization_warning: Option<String>,
121    /// Number of entities with degree exceeding the super-hub threshold (default 50).
122    #[serde(skip_serializing_if = "Option::is_none")]
123    super_hub_count: Option<i64>,
124    /// Warning listing top super-hub entity names.
125    #[serde(skip_serializing_if = "Option::is_none")]
126    super_hub_warning: Option<String>,
127    /// Name of the entity with the highest connection count in the namespace.
128    /// Omitted when there are no entities in the database.
129    #[serde(skip_serializing_if = "Option::is_none")]
130    top_hub_entity: Option<String>,
131    /// Number of connections (degree) of `top_hub_entity`.
132    /// Omitted when there are no entities in the database.
133    #[serde(skip_serializing_if = "Option::is_none")]
134    top_hub_degree: Option<i64>,
135    /// Human-readable warning when `top_hub_entity` exceeds 50 connections.
136    /// Omitted when degree is within acceptable bounds or there are no entities.
137    #[serde(skip_serializing_if = "Option::is_none")]
138    hub_warning: Option<String>,
139    /// Total LLM embedding slots available on this host.
140    #[serde(skip_serializing_if = "Option::is_none")]
141    llm_slots_total: Option<u32>,
142    /// LLM embedding slots currently occupied (slot file exists).
143    #[serde(skip_serializing_if = "Option::is_none")]
144    llm_slots_occupied: Option<u32>,
145    /// LLM embedding slots held by dead processes (stale).
146    #[serde(skip_serializing_if = "Option::is_none")]
147    llm_slots_stale: Option<u32>,
148    checks: Vec<HealthCheck>,
149    elapsed_ms: u64,
150}
151
152fn llm_slot_info() -> (u32, u32, u32) {
153    let max = crate::llm_slots::default_max_concurrency();
154    let status = crate::llm_slots::read_status(max);
155    let stale = crate::llm_slots::find_stale_slots(max);
156    (status.max, status.active, stale.len() as u32)
157}
158
159/// Checks whether a table (including virtual ones) exists in sqlite_master.
160fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
161    conn.query_row(
162        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
163        rusqlite::params![table_name],
164        |r| r.get::<_, i64>(0),
165    )
166    .unwrap_or(0)
167        > 0
168}
169
170fn first_existing_table<'a>(
171    conn: &rusqlite::Connection,
172    candidates: &'a [&'a str],
173) -> Option<&'a str> {
174    candidates
175        .iter()
176        .copied()
177        .find(|name| table_exists(conn, name))
178}
179
180fn count_rows(conn: &rusqlite::Connection, table_name: &str) -> i64 {
181    conn.query_row(&format!("SELECT COUNT(*) FROM {table_name}"), [], |r| {
182        r.get(0)
183    })
184    .unwrap_or(0)
185}
186
187fn memory_embedding_health(conn: &rusqlite::Connection) -> (bool, i64, i64, i64) {
188    let Some(table_name) = first_existing_table(conn, MEMORY_EMBEDDING_TABLES) else {
189        return (false, 0, 0, 0);
190    };
191
192    let total = count_rows(conn, table_name);
193    let missing = conn
194        .query_row(
195            &format!(
196                "SELECT COUNT(*)
197                 FROM memories m
198                 LEFT JOIN {table_name} me ON me.memory_id = m.id
199                 WHERE me.memory_id IS NULL AND m.deleted_at IS NULL"
200            ),
201            [],
202            |r| r.get(0),
203        )
204        .unwrap_or(0);
205    let orphaned = conn
206        .query_row(
207            &format!(
208                "SELECT COUNT(*)
209                 FROM {table_name} me
210                 LEFT JOIN memories m ON m.id = me.memory_id
211                 WHERE m.id IS NULL OR m.deleted_at IS NOT NULL"
212            ),
213            [],
214            |r| r.get(0),
215        )
216        .unwrap_or(0);
217
218    (true, total, missing, orphaned)
219}
220
221pub fn run(args: HealthArgs) -> Result<(), AppError> {
222    let start = Instant::now();
223    let _ = args.json; // --json is a no-op because output is already JSON by default
224    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
225    let paths = AppPaths::resolve(args.db.as_deref())?;
226    // GAP-E2E-002: resolve --namespace for counts filtering.
227    // Global checks (integrity, schema_version, journal_mode) remain namespace-agnostic.
228    let namespace_filter = match args.namespace.as_deref() {
229        Some(ns) => Some(crate::namespace::resolve_namespace(Some(ns))?),
230        None => None,
231    };
232
233    // BUG-AUDIT-1 (v1.0.88): refuse to silently bootstrap an empty database
234    // when the operator passes a typo'd or non-existent path. `health` must
235    // observe the database as-is, never mutate it.
236    if !paths.db.exists() {
237        let msg = format!(
238            "database not found at {}; `health` does not auto-create the database — \
239             run `sqlite-graphrag init --db {}` first or pass an existing path",
240            paths.db.display(),
241            paths.db.display(),
242        );
243        tracing::warn!(target: "health", db_path = %paths.db.display(), "database path does not exist; refusing to bootstrap");
244        output::emit_json(&serde_json::json!({
245            "error": true,
246            "code": 4,
247            "message": msg,
248            "db_path": paths.db.display().to_string(),
249        }))?;
250        return Err(AppError::NotFound(msg));
251    }
252
253    let conn = open_ro(&paths.db)?;
254
255    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
256    let integrity_ok = integrity == "ok";
257    tracing::info!(target: "health", integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
258
259    if !integrity_ok {
260        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
261        output::emit_json(&HealthResponse {
262            status: "degraded".to_string(),
263            namespace: None,
264            integrity: integrity.clone(),
265            integrity_ok: false,
266            schema_ok: false,
267            vec_memories_ok: false,
268            vec_memories_missing: 0,
269            vec_memories_orphaned: 0,
270            vec_entities_ok: false,
271            vec_chunks_ok: false,
272            fts_ok: false,
273            fts_query_ok: false,
274            model_ok: false,
275            counts: HealthCounts {
276                memories: 0,
277                memories_total: 0,
278                entities: 0,
279                relationships: 0,
280                vec_memories: 0,
281            },
282            db_path: paths.db.display().to_string(),
283            db_size_bytes,
284            schema_version: 0,
285            sqlite_version: "unknown".to_string(),
286            missing_entities: vec![],
287            wal_size_mb: 0.0,
288            journal_mode: "unknown".to_string(),
289            mentions_ratio: None,
290            mentions_warning: None,
291            top_relation: None,
292            top_relation_ratio: None,
293            applies_to_ratio: None,
294            relation_concentration_warning: None,
295            non_normalized_count: None,
296            normalization_warning: None,
297            super_hub_count: None,
298            super_hub_warning: None,
299            top_hub_entity: None,
300            top_hub_degree: None,
301            hub_warning: None,
302            llm_slots_total: None,
303            llm_slots_occupied: None,
304            llm_slots_stale: None,
305            checks: vec![HealthCheck {
306                name: "integrity".to_string(),
307                ok: false,
308                detail: Some(integrity),
309            }],
310            elapsed_ms: start.elapsed().as_millis() as u64,
311        })?;
312        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
313            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
314            Some("integrity check failed".to_string()),
315        )));
316    }
317
318    // GAP-E2E-002: filter memory count by namespace when --namespace is set.
319    let memories_count: i64 = match &namespace_filter {
320        Some(ns) => conn.query_row(
321            "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL AND namespace = ?1",
322            rusqlite::params![ns],
323            |r| r.get(0),
324        )?,
325        None => conn.query_row(
326            "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
327            [],
328            |r| r.get(0),
329        )?,
330    };
331    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
332    let relationships_count: i64 =
333        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
334    let (vec_memories_ok, vec_memories_count, vec_memories_missing, vec_memories_orphaned) =
335        memory_embedding_health(&conn);
336
337    let mentions_count: i64 = conn.query_row(
338        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
339        [],
340        |r| r.get(0),
341    )?;
342    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
343        let ratio = mentions_count as f64 / relationships_count as f64;
344        let warning = if ratio > 0.5 {
345            Some(format!(
346                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
347                ratio * 100.0,
348                mentions_count,
349                relationships_count
350            ))
351        } else {
352            None
353        };
354        (Some(ratio), warning)
355    } else {
356        (None, None)
357    };
358
359    // Relation concentration: find the most frequent relation type and check threshold.
360    let (top_relation, top_relation_ratio, applies_to_ratio, relation_concentration_warning) =
361        if relationships_count > 0 {
362            // Identify the relation with the highest edge count.
363            let (top_rel, top_count): (String, i64) = conn
364                .query_row(
365                    "SELECT relation, COUNT(*) AS cnt
366                     FROM relationships
367                     GROUP BY relation
368                     ORDER BY cnt DESC
369                     LIMIT 1",
370                    [],
371                    |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
372                )
373                .unwrap_or_else(|_| ("unknown".to_string(), 0));
374
375            let top_ratio = top_count as f64 / relationships_count as f64;
376
377            // Compute applies_to ratio separately (may be 0 if absent).
378            let applies_count: i64 = conn
379                .query_row(
380                    "SELECT COUNT(*) FROM relationships WHERE relation = 'applies_to'",
381                    [],
382                    |r| r.get(0),
383                )
384                .unwrap_or(0);
385            let at_ratio = if applies_count > 0 {
386                Some(applies_count as f64 / relationships_count as f64)
387            } else {
388                None
389            };
390
391            let concentration_warning = if top_ratio > 0.40 {
392                Some(format!(
393                    "relation '{}' dominates graph at {:.1}% ({}/{} total); consider running prune-relations --relation {} --dry-run",
394                    top_rel,
395                    top_ratio * 100.0,
396                    top_count,
397                    relationships_count,
398                    top_rel,
399                ))
400            } else {
401                None
402            };
403
404            (
405                Some(top_rel),
406                Some(top_ratio),
407                at_ratio,
408                concentration_warning,
409            )
410        } else {
411            (None, None, None, None)
412        };
413
414    let status = "ok";
415
416    let schema_version: u32 = conn
417        .query_row(
418            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
419            [],
420            |r| r.get::<_, i64>(0),
421        )
422        .unwrap_or(0) as u32;
423
424    let schema_ok = schema_version > 0;
425
426    // Checks vector tables via sqlite_master
427    let vec_entities_ok = first_existing_table(&conn, ENTITY_EMBEDDING_TABLES).is_some();
428    let vec_chunks_ok = first_existing_table(&conn, CHUNK_EMBEDDING_TABLES).is_some();
429
430    tracing::info!(target: "health", vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, vec_missing = vec_memories_missing, vec_orphaned = vec_memories_orphaned, "vector table checks complete");
431    let fts_ok = table_exists(&conn, "fts_memories");
432
433    // Verifies that FTS5 can execute a MATCH query (catches index corruption distinct from table absence).
434    let fts_query_ok = if fts_ok {
435        conn.query_row(
436            "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
437            [],
438            |r| r.get::<_, i64>(0),
439        )
440        .is_ok()
441    } else {
442        false
443    };
444
445    tracing::info!(target: "health", fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
446
447    // Captures the SQLite runtime version for observability.
448    let sqlite_version: String = conn
449        .query_row("SELECT sqlite_version()", [], |r| r.get(0))
450        .unwrap_or_else(|_| "unknown".to_string());
451
452    // Detects orphan entities referenced by memories but absent from the entities table.
453    let mut missing_entities: Vec<String> = Vec::with_capacity(4);
454    let mut stmt = conn.prepare_cached(
455        "SELECT DISTINCT me.entity_id
456         FROM memory_entities me
457         LEFT JOIN entities e ON e.id = me.entity_id
458         WHERE e.id IS NULL",
459    )?;
460    let orphans: Vec<i64> = stmt
461        .query_map([], |r| r.get(0))?
462        .collect::<Result<Vec<_>, _>>()?;
463    for id in orphans {
464        missing_entities.push(format!("entity_id={id}"));
465    }
466
467    let journal_mode: String = conn
468        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
469        .unwrap_or_else(|_| "unknown".to_string());
470
471    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
472        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
473        .unwrap_or(0.0);
474
475    // Database file size in bytes
476    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
477
478    // G46: the ONNX model cache no longer exists in the LLM-only build
479    // (v1.0.76+). model_ok now reports whether an LLM CLI (claude or codex)
480    // is reachable on PATH — the real prerequisite for embedding generation.
481    let model_ok = crate::commands::ingest_claude::find_claude_binary(None).is_ok()
482        || crate::commands::ingest_codex::find_codex_binary(None).is_ok();
483    tracing::info!(target: "health", model_ok = %model_ok, "LLM CLI availability check complete");
484
485    // Builds the checks array for detailed diagnostics
486    let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
487
488    // At this point integrity_ok is always true (corrupt DB returned early above).
489    checks.push(HealthCheck {
490        name: "integrity".to_string(),
491        ok: true,
492        detail: None,
493    });
494
495    checks.push(HealthCheck {
496        name: "schema_version".to_string(),
497        ok: schema_ok,
498        detail: if schema_ok {
499            None
500        } else {
501            Some(format!("schema_version={schema_version} (expected >0)"))
502        },
503    });
504
505    checks.push(HealthCheck {
506        name: "vec_memories".to_string(),
507        ok: vec_memories_ok,
508        detail: if vec_memories_ok {
509            None
510        } else {
511            Some("memory_embeddings/vec_memories table missing from sqlite_master".to_string())
512        },
513    });
514
515    checks.push(HealthCheck {
516        name: "vec_entities".to_string(),
517        ok: vec_entities_ok,
518        detail: if vec_entities_ok {
519            None
520        } else {
521            Some("entity_embeddings/vec_entities table missing from sqlite_master".to_string())
522        },
523    });
524
525    checks.push(HealthCheck {
526        name: "vec_chunks".to_string(),
527        ok: vec_chunks_ok,
528        detail: if vec_chunks_ok {
529            None
530        } else {
531            Some("chunk_embeddings/vec_chunks table missing from sqlite_master".to_string())
532        },
533    });
534
535    checks.push(HealthCheck {
536        name: "fts_memories".to_string(),
537        ok: fts_ok,
538        detail: if fts_ok {
539            None
540        } else {
541            Some("fts_memories table missing from sqlite_master".to_string())
542        },
543    });
544
545    checks.push(HealthCheck {
546        name: "fts_query".to_string(),
547        ok: fts_query_ok,
548        detail: if fts_query_ok {
549            None
550        } else {
551            Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
552        },
553    });
554
555    checks.push(HealthCheck {
556        name: "llm_cli".to_string(),
557        ok: model_ok,
558        detail: if model_ok {
559            None
560        } else {
561            Some(
562                "no LLM CLI found on PATH; install 'claude' (Claude Code) or 'codex' \
563                 (Codex CLI) — required for embedding generation since v1.0.76"
564                    .to_string(),
565            )
566        },
567    });
568
569    // G24: detect non-normalized entity names
570    let (non_normalized_count, normalization_warning) = {
571        let mut stmt = conn.prepare_cached("SELECT name FROM entities")?;
572        let names: Vec<String> = stmt
573            .query_map([], |r| r.get(0))?
574            .filter_map(|r| r.ok())
575            .collect();
576        let count = names
577            .iter()
578            .filter(|n| crate::parsers::normalize_entity_name(n) != **n)
579            .count() as i64;
580        let warning = if count > 0 {
581            Some(format!(
582                "run 'normalize-entities --yes' to fix {count} non-normalized entities"
583            ))
584        } else {
585            None
586        };
587        (Some(count), warning)
588    };
589
590    // G25: detect super-hub entities (degree > 50)
591    let (super_hub_count, super_hub_warning) = {
592        let mut stmt = conn.prepare_cached(
593            "SELECT e.name, COUNT(r.id) as deg FROM entities e \
594             LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id \
595             GROUP BY e.id HAVING deg > 50 ORDER BY deg DESC LIMIT 5",
596        )?;
597        let hubs: Vec<(String, i64)> = stmt
598            .query_map([], |r| Ok((r.get(0)?, r.get(1)?)))?
599            .filter_map(|r| r.ok())
600            .collect();
601        let count = hubs.len() as i64;
602        let warning = if count > 0 {
603            let names: Vec<String> = hubs
604                .iter()
605                .map(|(n, d)| format!("{n} (degree {d})"))
606                .collect();
607            Some(format!("super-hubs detected: {}", names.join(", ")))
608        } else {
609            None
610        };
611        (Some(count), warning)
612    };
613
614    // G25 (extended): identify the single highest-degree entity for programmatic use.
615    let (top_hub_entity, top_hub_degree, hub_warning) = {
616        let result: Option<(String, i64)> = conn
617            .query_row(
618                "SELECT e.name, COUNT(r.id) AS degree
619                 FROM entities e
620                 LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id
621                 GROUP BY e.id
622                 ORDER BY degree DESC
623                 LIMIT 1",
624                [],
625                |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
626            )
627            .ok();
628        match result {
629            Some((name, degree)) => {
630                let warning = if degree > 50 {
631                    Some(format!(
632                        "entity '{name}' has {degree} connections; consider splitting or using --max-neighbors-per-hop"
633                    ))
634                } else {
635                    None
636                };
637                (Some(name), Some(degree), warning)
638            }
639            None => (None, None, None),
640        }
641    };
642
643    let llm_slots = llm_slot_info();
644    let response = HealthResponse {
645        status: status.to_string(),
646        namespace: namespace_filter.clone(),
647        integrity,
648        integrity_ok,
649        schema_ok,
650        vec_memories_ok,
651        vec_memories_missing,
652        vec_memories_orphaned,
653        vec_entities_ok,
654        vec_chunks_ok,
655        fts_ok,
656        fts_query_ok,
657        model_ok,
658        counts: HealthCounts {
659            memories: memories_count,
660            memories_total: memories_count,
661            entities: entities_count,
662            relationships: relationships_count,
663            vec_memories: vec_memories_count,
664        },
665        db_path: paths.db.display().to_string(),
666        db_size_bytes,
667        schema_version,
668        sqlite_version,
669        missing_entities,
670        wal_size_mb,
671        journal_mode,
672        mentions_ratio,
673        mentions_warning,
674        top_relation,
675        top_relation_ratio,
676        applies_to_ratio,
677        relation_concentration_warning,
678        non_normalized_count,
679        normalization_warning,
680        super_hub_count,
681        super_hub_warning,
682        top_hub_entity,
683        top_hub_degree,
684        hub_warning,
685        llm_slots_total: Some(llm_slots.0),
686        llm_slots_occupied: Some(llm_slots.1),
687        llm_slots_stale: Some(llm_slots.2),
688        checks,
689        elapsed_ms: start.elapsed().as_millis() as u64,
690    };
691
692    output::emit_json(&response)?;
693
694    Ok(())
695}
696
697#[cfg(test)]
698mod tests {
699    use super::*;
700    use rusqlite::Connection;
701
702    fn open_health_test_db() -> Connection {
703        let conn = Connection::open_in_memory().unwrap();
704        conn.execute_batch(
705            "CREATE TABLE memories (
706                id INTEGER PRIMARY KEY,
707                deleted_at INTEGER
708            );
709            CREATE TABLE memory_embeddings (
710                memory_id INTEGER PRIMARY KEY,
711                namespace TEXT NOT NULL,
712                embedding BLOB NOT NULL,
713                source TEXT NOT NULL,
714                model TEXT NOT NULL,
715                dim INTEGER NOT NULL DEFAULT 384,
716                created_at TEXT NOT NULL DEFAULT '0'
717            );
718            CREATE TABLE vec_memories (
719                memory_id INTEGER PRIMARY KEY,
720                embedding BLOB NOT NULL,
721                created_at INTEGER NOT NULL DEFAULT 0
722            );",
723        )
724        .unwrap();
725        conn
726    }
727
728    #[test]
729    fn memory_embedding_health_prefers_memory_embeddings_and_counts_soft_deleted_as_orphaned() {
730        let conn = open_health_test_db();
731        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (1, NULL)", [])
732            .unwrap();
733        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (2, NULL)", [])
734            .unwrap();
735        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (3, 123)", [])
736            .unwrap();
737        conn.execute(
738            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
739             VALUES (1, 'global', X'00', 'llm', 'm', 384, '1')",
740            [],
741        )
742        .unwrap();
743        conn.execute(
744            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
745             VALUES (3, 'global', X'00', 'llm', 'm', 384, '2')",
746            [],
747        )
748        .unwrap();
749        conn.execute(
750            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
751             VALUES (99, 'global', X'00', 'llm', 'm', 384, '3')",
752            [],
753        )
754        .unwrap();
755        conn.execute(
756            "INSERT INTO vec_memories(memory_id, embedding, created_at) VALUES (777, X'00', 0)",
757            [],
758        )
759        .unwrap();
760
761        let (ok, total, missing, orphaned) = memory_embedding_health(&conn);
762        assert!(ok);
763        assert_eq!(total, 3);
764        assert_eq!(missing, 1);
765        assert_eq!(orphaned, 2);
766    }
767
768    #[test]
769    fn first_existing_table_falls_back_to_legacy_vec_name() {
770        let conn = Connection::open_in_memory().unwrap();
771        conn.execute_batch(
772            "CREATE TABLE vec_memories (
773                memory_id INTEGER PRIMARY KEY,
774                embedding BLOB NOT NULL,
775                created_at INTEGER NOT NULL DEFAULT 0
776            );",
777        )
778        .unwrap();
779
780        let resolved = first_existing_table(&conn, MEMORY_EMBEDDING_TABLES);
781        assert_eq!(resolved, Some("vec_memories"));
782    }
783
784    #[test]
785    fn health_check_serializes_all_new_fields() {
786        let response = HealthResponse {
787            status: "ok".to_string(),
788            namespace: None,
789            integrity: "ok".to_string(),
790            integrity_ok: true,
791            schema_ok: true,
792            vec_memories_ok: true,
793            vec_memories_missing: 0,
794            vec_memories_orphaned: 0,
795            vec_entities_ok: true,
796            vec_chunks_ok: true,
797            fts_ok: true,
798            fts_query_ok: true,
799            model_ok: false,
800            counts: HealthCounts {
801                memories: 5,
802                memories_total: 5,
803                entities: 3,
804                relationships: 2,
805                vec_memories: 5,
806            },
807            db_path: "/tmp/test.sqlite".to_string(),
808            db_size_bytes: 4096,
809            schema_version: 6,
810            sqlite_version: "3.46.0".to_string(),
811            elapsed_ms: 0,
812            missing_entities: vec![],
813            wal_size_mb: 0.0,
814            journal_mode: "wal".to_string(),
815            mentions_ratio: None,
816            mentions_warning: None,
817            top_relation: None,
818            top_relation_ratio: None,
819            applies_to_ratio: None,
820            relation_concentration_warning: None,
821            non_normalized_count: None,
822            normalization_warning: None,
823            super_hub_count: None,
824            super_hub_warning: None,
825            top_hub_entity: None,
826            top_hub_degree: None,
827            hub_warning: None,
828            llm_slots_total: None,
829            llm_slots_occupied: None,
830            llm_slots_stale: None,
831            checks: vec![
832                HealthCheck {
833                    name: "integrity".to_string(),
834                    ok: true,
835                    detail: None,
836                },
837                HealthCheck {
838                    name: "model_onnx".to_string(),
839                    ok: false,
840                    detail: Some("model missing".to_string()),
841                },
842            ],
843        };
844
845        let json = serde_json::to_value(&response).unwrap();
846        assert_eq!(json["status"], "ok");
847        assert_eq!(json["integrity_ok"], true);
848        assert_eq!(json["schema_ok"], true);
849        assert_eq!(json["vec_memories_ok"], true);
850        assert_eq!(json["vec_entities_ok"], true);
851        assert_eq!(json["vec_chunks_ok"], true);
852        assert_eq!(json["fts_ok"], true);
853        assert_eq!(json["model_ok"], false);
854        assert_eq!(json["db_size_bytes"], 4096u64);
855        assert!(json["checks"].is_array());
856        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
857
858        // Verifies that detail is absent when ok=true (skip_serializing_if)
859        let integrity_check = &json["checks"][0];
860        assert_eq!(integrity_check["name"], "integrity");
861        assert_eq!(integrity_check["ok"], true);
862        assert!(integrity_check.get("detail").is_none());
863
864        // Verifies that detail is present when ok=false
865        let model_check = &json["checks"][1];
866        assert_eq!(model_check["name"], "model_onnx");
867        assert_eq!(model_check["ok"], false);
868        assert_eq!(model_check["detail"], "model missing");
869    }
870
871    #[test]
872    fn health_check_without_detail_omits_field() {
873        let check = HealthCheck {
874            name: "vec_memories".to_string(),
875            ok: true,
876            detail: None,
877        };
878        let json = serde_json::to_value(&check).unwrap();
879        assert!(
880            json.get("detail").is_none(),
881            "detail field must be omitted when None"
882        );
883    }
884
885    #[test]
886    fn health_check_with_detail_serializes_field() {
887        let check = HealthCheck {
888            name: "fts_memories".to_string(),
889            ok: false,
890            detail: Some("fts_memories table missing from sqlite_master".to_string()),
891        };
892        let json = serde_json::to_value(&check).unwrap();
893        assert_eq!(
894            json["detail"],
895            "fts_memories table missing from sqlite_master"
896        );
897    }
898
899    #[test]
900    fn health_response_fts_query_ok_and_sqlite_version_serialize() {
901        // Verifies that fts_query_ok and sqlite_version appear in the serialized JSON
902        // with the expected keys and values.
903        let response = HealthResponse {
904            status: "ok".to_string(),
905            namespace: Some("test-ns".to_string()),
906            integrity: "ok".to_string(),
907            integrity_ok: true,
908            schema_ok: true,
909            vec_memories_ok: true,
910            vec_memories_missing: 0,
911            vec_memories_orphaned: 0,
912            vec_entities_ok: true,
913            vec_chunks_ok: true,
914            fts_ok: true,
915            fts_query_ok: true,
916            model_ok: true,
917            counts: HealthCounts {
918                memories: 0,
919                memories_total: 0,
920                entities: 0,
921                relationships: 0,
922                vec_memories: 0,
923            },
924            db_path: "/tmp/test.sqlite".to_string(),
925            db_size_bytes: 0,
926            schema_version: 1,
927            sqlite_version: "3.45.1".to_string(),
928            elapsed_ms: 0,
929            missing_entities: vec![],
930            wal_size_mb: 0.0,
931            journal_mode: "wal".to_string(),
932            mentions_ratio: None,
933            mentions_warning: None,
934            top_relation: None,
935            top_relation_ratio: None,
936            applies_to_ratio: None,
937            relation_concentration_warning: None,
938            non_normalized_count: None,
939            normalization_warning: None,
940            super_hub_count: None,
941            super_hub_warning: None,
942            top_hub_entity: None,
943            top_hub_degree: None,
944            hub_warning: None,
945            llm_slots_total: None,
946            llm_slots_occupied: None,
947            llm_slots_stale: None,
948            checks: vec![],
949        };
950
951        let json = serde_json::to_value(&response).unwrap();
952
953        // fts_query_ok must appear at the top level
954        assert_eq!(
955            json["fts_query_ok"], true,
956            "fts_query_ok must be present and true in serialized JSON"
957        );
958
959        // sqlite_version must appear at the top level with the exact string
960        assert_eq!(
961            json["sqlite_version"], "3.45.1",
962            "sqlite_version must be present and match the provided string"
963        );
964
965        // Verify fts_query_ok=false path includes the expected detail message
966        let check_fail = HealthCheck {
967            name: "fts_query".to_string(),
968            ok: false,
969            detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
970        };
971        let check_json = serde_json::to_value(&check_fail).unwrap();
972        assert_eq!(check_json["name"], "fts_query");
973        assert_eq!(check_json["ok"], false);
974        assert_eq!(
975            check_json["detail"],
976            "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
977        );
978    }
979
980    fn make_full_response(
981        top_relation: Option<String>,
982        top_relation_ratio: Option<f64>,
983        applies_to_ratio: Option<f64>,
984        relation_concentration_warning: Option<String>,
985    ) -> HealthResponse {
986        HealthResponse {
987            status: "ok".to_string(),
988            namespace: None,
989            integrity: "ok".to_string(),
990            integrity_ok: true,
991            schema_ok: true,
992            vec_memories_ok: true,
993            vec_memories_missing: 0,
994            vec_memories_orphaned: 0,
995            vec_entities_ok: true,
996            vec_chunks_ok: true,
997            fts_ok: true,
998            fts_query_ok: true,
999            model_ok: true,
1000            counts: HealthCounts {
1001                memories: 10,
1002                memories_total: 10,
1003                entities: 5,
1004                relationships: 20,
1005                vec_memories: 10,
1006            },
1007            db_path: "/tmp/test.sqlite".to_string(),
1008            db_size_bytes: 8192,
1009            schema_version: 3,
1010            sqlite_version: "3.46.0".to_string(),
1011            elapsed_ms: 1,
1012            missing_entities: vec![],
1013            wal_size_mb: 0.0,
1014            journal_mode: "wal".to_string(),
1015            mentions_ratio: None,
1016            mentions_warning: None,
1017            top_relation,
1018            top_relation_ratio,
1019            applies_to_ratio,
1020            relation_concentration_warning,
1021            non_normalized_count: None,
1022            normalization_warning: None,
1023            super_hub_count: None,
1024            super_hub_warning: None,
1025            top_hub_entity: None,
1026            top_hub_degree: None,
1027            hub_warning: None,
1028            llm_slots_total: None,
1029            llm_slots_occupied: None,
1030            llm_slots_stale: None,
1031            checks: vec![],
1032        }
1033    }
1034
1035    #[test]
1036    fn health_concentration_fields_omitted_when_no_relationships() {
1037        // Represents a DB with zero relationships.
1038        let resp = make_full_response(None, None, None, None);
1039        let json = serde_json::to_value(&resp).unwrap();
1040        assert!(
1041            json.get("top_relation").is_none(),
1042            "top_relation must be omitted when None"
1043        );
1044        assert!(
1045            json.get("top_relation_ratio").is_none(),
1046            "top_relation_ratio must be omitted when None"
1047        );
1048        assert!(
1049            json.get("applies_to_ratio").is_none(),
1050            "applies_to_ratio must be omitted when None"
1051        );
1052        assert!(
1053            json.get("relation_concentration_warning").is_none(),
1054            "relation_concentration_warning must be omitted when None"
1055        );
1056    }
1057
1058    #[test]
1059    fn health_concentration_fields_present_with_data() {
1060        let resp = make_full_response(
1061            Some("mentions".to_string()),
1062            Some(0.60),
1063            Some(0.10),
1064            Some("relation 'mentions' dominates graph at 60.0%".to_string()),
1065        );
1066        let json = serde_json::to_value(&resp).unwrap();
1067        assert_eq!(json["top_relation"], "mentions");
1068        assert!((json["top_relation_ratio"].as_f64().unwrap() - 0.60).abs() < 1e-9);
1069        assert!((json["applies_to_ratio"].as_f64().unwrap() - 0.10).abs() < 1e-9);
1070        assert!(json["relation_concentration_warning"]
1071            .as_str()
1072            .unwrap()
1073            .contains("60.0%"));
1074    }
1075
1076    #[test]
1077    fn health_concentration_warning_absent_when_ratio_below_threshold() {
1078        // top_relation_ratio of 0.39 is below the 0.40 threshold — no warning.
1079        let resp = make_full_response(Some("uses".to_string()), Some(0.39), None, None);
1080        let json = serde_json::to_value(&resp).unwrap();
1081        assert_eq!(json["top_relation"], "uses");
1082        assert!(
1083            json.get("relation_concentration_warning").is_none(),
1084            "warning must be absent when ratio <= 0.40"
1085        );
1086    }
1087
1088    #[test]
1089    fn health_concentration_warning_present_at_threshold() {
1090        // Exactly at 0.41 (above 0.40) — warning must appear.
1091        let resp = make_full_response(
1092            Some("depends_on".to_string()),
1093            Some(0.41),
1094            None,
1095            Some("relation 'depends_on' dominates graph at 41.0%".to_string()),
1096        );
1097        let json = serde_json::to_value(&resp).unwrap();
1098        assert!(
1099            json["relation_concentration_warning"].is_string(),
1100            "warning must be present when top_relation_ratio > 0.40"
1101        );
1102    }
1103
1104    #[test]
1105    fn health_applies_to_ratio_omitted_when_none() {
1106        // applies_to_ratio is None when there are no applies_to edges.
1107        let resp = make_full_response(Some("related".to_string()), Some(0.30), None, None);
1108        let json = serde_json::to_value(&resp).unwrap();
1109        assert!(
1110            json.get("applies_to_ratio").is_none(),
1111            "applies_to_ratio must be omitted when None"
1112        );
1113    }
1114}