Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11const MEMORY_EMBEDDING_TABLES: &[&str] = &["memory_embeddings", "vec_memories"];
12const ENTITY_EMBEDDING_TABLES: &[&str] = &["entity_embeddings", "vec_entities"];
13const CHUNK_EMBEDDING_TABLES: &[&str] = &["chunk_embeddings", "vec_chunks"];
14
15#[derive(clap::Args)]
16#[command(after_long_help = "EXAMPLES:\n  \
17    # Check database health (connectivity, integrity, vector index)\n  \
18    sqlite-graphrag health\n\n  \
19    # Check health of a database at a custom path\n  \
20    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
21    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
22    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
23pub struct HealthArgs {
24    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
25    pub db: Option<String>,
26    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
27    #[arg(long, default_value_t = false)]
28    pub json: bool,
29    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
30    #[arg(long, value_parser = ["json", "text"], hide = true)]
31    pub format: Option<String>,
32    /// Filter health report counts to a specific namespace.
33    /// When omitted, counts are global (sum across all namespaces).
34    /// Global checks (integrity, schema_version, journal_mode) are always reported.
35    #[arg(long)]
36    pub namespace: Option<String>,
37}
38
39#[derive(Serialize, schemars::JsonSchema)]
40pub struct HealthCounts {
41    memories: i64,
42    /// Alias of `memories` for the documented contract in SKILL.md.
43    memories_total: i64,
44    entities: i64,
45    relationships: i64,
46    vec_memories: i64,
47}
48
49#[derive(Serialize, schemars::JsonSchema)]
50pub struct HealthCheck {
51    name: String,
52    ok: bool,
53    #[serde(skip_serializing_if = "Option::is_none")]
54    detail: Option<String>,
55}
56
57#[derive(Serialize, schemars::JsonSchema)]
58pub struct HealthResponse {
59    status: String,
60    /// Namespace filter applied to the counts. None means global (sum across all namespaces).
61    #[serde(skip_serializing_if = "Option::is_none")]
62    namespace: Option<String>,
63    integrity: String,
64    integrity_ok: bool,
65    schema_ok: bool,
66    vec_memories_ok: bool,
67    vec_memories_missing: i64,
68    vec_memories_orphaned: i64,
69    vec_entities_ok: bool,
70    vec_chunks_ok: bool,
71    fts_ok: bool,
72    /// Whether a live FTS5 MATCH query against fts_memories succeeded.
73    fts_query_ok: bool,
74    model_ok: bool,
75    counts: HealthCounts,
76    db_path: String,
77    db_size_bytes: u64,
78    /// MAX(version) from refinery_schema_history — number of the last applied migration.
79    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
80    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
81    schema_version: u32,
82    /// List of entities referenced by memories but absent from the entities table.
83    /// Empty in a healthy DB. Per the contract documented in SKILL.md.
84    missing_entities: Vec<String>,
85    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
86    wal_size_mb: f64,
87    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
88    journal_mode: String,
89    /// SQLite version string, e.g. `"3.46.0"`.
90    sqlite_version: String,
91    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
92    /// Omitted when there are no relationships in the database.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    mentions_ratio: Option<f64>,
95    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
96    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    mentions_warning: Option<String>,
99    /// The relation type with the highest edge count in the namespace.
100    /// Omitted when there are no relationships in the database.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    top_relation: Option<String>,
103    /// Fraction of all edges occupied by `top_relation` (0.0–1.0).
104    /// Omitted when there are no relationships in the database.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    top_relation_ratio: Option<f64>,
107    /// Fraction of relationships that use the `applies_to` relation type (0.0–1.0).
108    /// Omitted when there are no relationships or when `applies_to` is absent.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    applies_to_ratio: Option<f64>,
111    /// Human-readable warning when a single relation type occupies more than 40 % of edges.
112    /// Omitted when concentration is within acceptable bounds or there are no relationships.
113    #[serde(skip_serializing_if = "Option::is_none")]
114    relation_concentration_warning: Option<String>,
115    /// Number of entities whose name differs from its normalized kebab-case form.
116    #[serde(skip_serializing_if = "Option::is_none")]
117    non_normalized_count: Option<i64>,
118    /// Warning when non-normalized entities are detected.
119    #[serde(skip_serializing_if = "Option::is_none")]
120    normalization_warning: Option<String>,
121    /// Number of entities with degree exceeding the super-hub threshold (default 50).
122    #[serde(skip_serializing_if = "Option::is_none")]
123    super_hub_count: Option<i64>,
124    /// Warning listing top super-hub entity names.
125    #[serde(skip_serializing_if = "Option::is_none")]
126    super_hub_warning: Option<String>,
127    /// Name of the entity with the highest connection count in the namespace.
128    /// Omitted when there are no entities in the database.
129    #[serde(skip_serializing_if = "Option::is_none")]
130    top_hub_entity: Option<String>,
131    /// Number of connections (degree) of `top_hub_entity`.
132    /// Omitted when there are no entities in the database.
133    #[serde(skip_serializing_if = "Option::is_none")]
134    top_hub_degree: Option<i64>,
135    /// Human-readable warning when `top_hub_entity` exceeds 50 connections.
136    /// Omitted when degree is within acceptable bounds or there are no entities.
137    #[serde(skip_serializing_if = "Option::is_none")]
138    hub_warning: Option<String>,
139    checks: Vec<HealthCheck>,
140    elapsed_ms: u64,
141}
142
143/// Checks whether a table (including virtual ones) exists in sqlite_master.
144fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
145    conn.query_row(
146        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
147        rusqlite::params![table_name],
148        |r| r.get::<_, i64>(0),
149    )
150    .unwrap_or(0)
151        > 0
152}
153
154fn first_existing_table<'a>(
155    conn: &rusqlite::Connection,
156    candidates: &'a [&'a str],
157) -> Option<&'a str> {
158    candidates
159        .iter()
160        .copied()
161        .find(|name| table_exists(conn, name))
162}
163
164fn count_rows(conn: &rusqlite::Connection, table_name: &str) -> i64 {
165    conn.query_row(&format!("SELECT COUNT(*) FROM {table_name}"), [], |r| {
166        r.get(0)
167    })
168    .unwrap_or(0)
169}
170
171fn memory_embedding_health(conn: &rusqlite::Connection) -> (bool, i64, i64, i64) {
172    let Some(table_name) = first_existing_table(conn, MEMORY_EMBEDDING_TABLES) else {
173        return (false, 0, 0, 0);
174    };
175
176    let total = count_rows(conn, table_name);
177    let missing = conn
178        .query_row(
179            &format!(
180                "SELECT COUNT(*)
181                 FROM memories m
182                 LEFT JOIN {table_name} me ON me.memory_id = m.id
183                 WHERE me.memory_id IS NULL AND m.deleted_at IS NULL"
184            ),
185            [],
186            |r| r.get(0),
187        )
188        .unwrap_or(0);
189    let orphaned = conn
190        .query_row(
191            &format!(
192                "SELECT COUNT(*)
193                 FROM {table_name} me
194                 LEFT JOIN memories m ON m.id = me.memory_id
195                 WHERE m.id IS NULL OR m.deleted_at IS NOT NULL"
196            ),
197            [],
198            |r| r.get(0),
199        )
200        .unwrap_or(0);
201
202    (true, total, missing, orphaned)
203}
204
205pub fn run(args: HealthArgs) -> Result<(), AppError> {
206    let start = Instant::now();
207    let _ = args.json; // --json is a no-op because output is already JSON by default
208    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
209    let paths = AppPaths::resolve(args.db.as_deref())?;
210    // GAP-E2E-002: resolve --namespace for counts filtering.
211    // Global checks (integrity, schema_version, journal_mode) remain namespace-agnostic.
212    let namespace_filter = match args.namespace.as_deref() {
213        Some(ns) => Some(crate::namespace::resolve_namespace(Some(ns))?),
214        None => None,
215    };
216
217    // BUG-AUDIT-1 (v1.0.88): refuse to silently bootstrap an empty database
218    // when the operator passes a typo'd or non-existent path. `health` must
219    // observe the database as-is, never mutate it.
220    if !paths.db.exists() {
221        let msg = format!(
222            "database not found at {}; `health` does not auto-create the database — \
223             run `sqlite-graphrag init --db {}` first or pass an existing path",
224            paths.db.display(),
225            paths.db.display(),
226        );
227        tracing::warn!(target: "health", db_path = %paths.db.display(), "database path does not exist; refusing to bootstrap");
228        output::emit_json(&serde_json::json!({
229            "error": true,
230            "code": 4,
231            "message": msg,
232            "db_path": paths.db.display().to_string(),
233        }))?;
234        return Err(AppError::NotFound(msg));
235    }
236
237    let conn = open_ro(&paths.db)?;
238
239    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
240    let integrity_ok = integrity == "ok";
241    tracing::info!(target: "health", integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
242
243    if !integrity_ok {
244        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
245        output::emit_json(&HealthResponse {
246            status: "degraded".to_string(),
247            namespace: None,
248            integrity: integrity.clone(),
249            integrity_ok: false,
250            schema_ok: false,
251            vec_memories_ok: false,
252            vec_memories_missing: 0,
253            vec_memories_orphaned: 0,
254            vec_entities_ok: false,
255            vec_chunks_ok: false,
256            fts_ok: false,
257            fts_query_ok: false,
258            model_ok: false,
259            counts: HealthCounts {
260                memories: 0,
261                memories_total: 0,
262                entities: 0,
263                relationships: 0,
264                vec_memories: 0,
265            },
266            db_path: paths.db.display().to_string(),
267            db_size_bytes,
268            schema_version: 0,
269            sqlite_version: "unknown".to_string(),
270            missing_entities: vec![],
271            wal_size_mb: 0.0,
272            journal_mode: "unknown".to_string(),
273            mentions_ratio: None,
274            mentions_warning: None,
275            top_relation: None,
276            top_relation_ratio: None,
277            applies_to_ratio: None,
278            relation_concentration_warning: None,
279            non_normalized_count: None,
280            normalization_warning: None,
281            super_hub_count: None,
282            super_hub_warning: None,
283            top_hub_entity: None,
284            top_hub_degree: None,
285            hub_warning: None,
286            checks: vec![HealthCheck {
287                name: "integrity".to_string(),
288                ok: false,
289                detail: Some(integrity),
290            }],
291            elapsed_ms: start.elapsed().as_millis() as u64,
292        })?;
293        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
294            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
295            Some("integrity check failed".to_string()),
296        )));
297    }
298
299    // GAP-E2E-002: filter memory count by namespace when --namespace is set.
300    let memories_count: i64 = match &namespace_filter {
301        Some(ns) => conn.query_row(
302            "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL AND namespace = ?1",
303            rusqlite::params![ns],
304            |r| r.get(0),
305        )?,
306        None => conn.query_row(
307            "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
308            [],
309            |r| r.get(0),
310        )?,
311    };
312    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
313    let relationships_count: i64 =
314        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
315    let (vec_memories_ok, vec_memories_count, vec_memories_missing, vec_memories_orphaned) =
316        memory_embedding_health(&conn);
317
318    let mentions_count: i64 = conn.query_row(
319        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
320        [],
321        |r| r.get(0),
322    )?;
323    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
324        let ratio = mentions_count as f64 / relationships_count as f64;
325        let warning = if ratio > 0.5 {
326            Some(format!(
327                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
328                ratio * 100.0,
329                mentions_count,
330                relationships_count
331            ))
332        } else {
333            None
334        };
335        (Some(ratio), warning)
336    } else {
337        (None, None)
338    };
339
340    // Relation concentration: find the most frequent relation type and check threshold.
341    let (top_relation, top_relation_ratio, applies_to_ratio, relation_concentration_warning) =
342        if relationships_count > 0 {
343            // Identify the relation with the highest edge count.
344            let (top_rel, top_count): (String, i64) = conn
345                .query_row(
346                    "SELECT relation, COUNT(*) AS cnt
347                     FROM relationships
348                     GROUP BY relation
349                     ORDER BY cnt DESC
350                     LIMIT 1",
351                    [],
352                    |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
353                )
354                .unwrap_or_else(|_| ("unknown".to_string(), 0));
355
356            let top_ratio = top_count as f64 / relationships_count as f64;
357
358            // Compute applies_to ratio separately (may be 0 if absent).
359            let applies_count: i64 = conn
360                .query_row(
361                    "SELECT COUNT(*) FROM relationships WHERE relation = 'applies_to'",
362                    [],
363                    |r| r.get(0),
364                )
365                .unwrap_or(0);
366            let at_ratio = if applies_count > 0 {
367                Some(applies_count as f64 / relationships_count as f64)
368            } else {
369                None
370            };
371
372            let concentration_warning = if top_ratio > 0.40 {
373                Some(format!(
374                    "relation '{}' dominates graph at {:.1}% ({}/{} total); consider running prune-relations --relation {} --dry-run",
375                    top_rel,
376                    top_ratio * 100.0,
377                    top_count,
378                    relationships_count,
379                    top_rel,
380                ))
381            } else {
382                None
383            };
384
385            (
386                Some(top_rel),
387                Some(top_ratio),
388                at_ratio,
389                concentration_warning,
390            )
391        } else {
392            (None, None, None, None)
393        };
394
395    let status = "ok";
396
397    let schema_version: u32 = conn
398        .query_row(
399            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
400            [],
401            |r| r.get::<_, i64>(0),
402        )
403        .unwrap_or(0) as u32;
404
405    let schema_ok = schema_version > 0;
406
407    // Checks vector tables via sqlite_master
408    let vec_entities_ok = first_existing_table(&conn, ENTITY_EMBEDDING_TABLES).is_some();
409    let vec_chunks_ok = first_existing_table(&conn, CHUNK_EMBEDDING_TABLES).is_some();
410
411    tracing::info!(target: "health", vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, vec_missing = vec_memories_missing, vec_orphaned = vec_memories_orphaned, "vector table checks complete");
412    let fts_ok = table_exists(&conn, "fts_memories");
413
414    // Verifies that FTS5 can execute a MATCH query (catches index corruption distinct from table absence).
415    let fts_query_ok = if fts_ok {
416        conn.query_row(
417            "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
418            [],
419            |r| r.get::<_, i64>(0),
420        )
421        .is_ok()
422    } else {
423        false
424    };
425
426    tracing::info!(target: "health", fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
427
428    // Captures the SQLite runtime version for observability.
429    let sqlite_version: String = conn
430        .query_row("SELECT sqlite_version()", [], |r| r.get(0))
431        .unwrap_or_else(|_| "unknown".to_string());
432
433    // Detects orphan entities referenced by memories but absent from the entities table.
434    let mut missing_entities: Vec<String> = Vec::with_capacity(4);
435    let mut stmt = conn.prepare_cached(
436        "SELECT DISTINCT me.entity_id
437         FROM memory_entities me
438         LEFT JOIN entities e ON e.id = me.entity_id
439         WHERE e.id IS NULL",
440    )?;
441    let orphans: Vec<i64> = stmt
442        .query_map([], |r| r.get(0))?
443        .collect::<Result<Vec<_>, _>>()?;
444    for id in orphans {
445        missing_entities.push(format!("entity_id={id}"));
446    }
447
448    let journal_mode: String = conn
449        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
450        .unwrap_or_else(|_| "unknown".to_string());
451
452    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
453        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
454        .unwrap_or(0.0);
455
456    // Database file size in bytes
457    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
458
459    // G46: the ONNX model cache no longer exists in the LLM-only build
460    // (v1.0.76+). model_ok now reports whether an LLM CLI (claude or codex)
461    // is reachable on PATH — the real prerequisite for embedding generation.
462    let model_ok = crate::commands::ingest_claude::find_claude_binary(None).is_ok()
463        || crate::commands::ingest_codex::find_codex_binary(None).is_ok();
464    tracing::info!(target: "health", model_ok = %model_ok, "LLM CLI availability check complete");
465
466    // Builds the checks array for detailed diagnostics
467    let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
468
469    // At this point integrity_ok is always true (corrupt DB returned early above).
470    checks.push(HealthCheck {
471        name: "integrity".to_string(),
472        ok: true,
473        detail: None,
474    });
475
476    checks.push(HealthCheck {
477        name: "schema_version".to_string(),
478        ok: schema_ok,
479        detail: if schema_ok {
480            None
481        } else {
482            Some(format!("schema_version={schema_version} (expected >0)"))
483        },
484    });
485
486    checks.push(HealthCheck {
487        name: "vec_memories".to_string(),
488        ok: vec_memories_ok,
489        detail: if vec_memories_ok {
490            None
491        } else {
492            Some("memory_embeddings/vec_memories table missing from sqlite_master".to_string())
493        },
494    });
495
496    checks.push(HealthCheck {
497        name: "vec_entities".to_string(),
498        ok: vec_entities_ok,
499        detail: if vec_entities_ok {
500            None
501        } else {
502            Some("entity_embeddings/vec_entities table missing from sqlite_master".to_string())
503        },
504    });
505
506    checks.push(HealthCheck {
507        name: "vec_chunks".to_string(),
508        ok: vec_chunks_ok,
509        detail: if vec_chunks_ok {
510            None
511        } else {
512            Some("chunk_embeddings/vec_chunks table missing from sqlite_master".to_string())
513        },
514    });
515
516    checks.push(HealthCheck {
517        name: "fts_memories".to_string(),
518        ok: fts_ok,
519        detail: if fts_ok {
520            None
521        } else {
522            Some("fts_memories table missing from sqlite_master".to_string())
523        },
524    });
525
526    checks.push(HealthCheck {
527        name: "fts_query".to_string(),
528        ok: fts_query_ok,
529        detail: if fts_query_ok {
530            None
531        } else {
532            Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
533        },
534    });
535
536    checks.push(HealthCheck {
537        name: "llm_cli".to_string(),
538        ok: model_ok,
539        detail: if model_ok {
540            None
541        } else {
542            Some(
543                "no LLM CLI found on PATH; install 'claude' (Claude Code) or 'codex' \
544                 (Codex CLI) — required for embedding generation since v1.0.76"
545                    .to_string(),
546            )
547        },
548    });
549
550    // G24: detect non-normalized entity names
551    let (non_normalized_count, normalization_warning) = {
552        let mut stmt = conn.prepare_cached("SELECT name FROM entities")?;
553        let names: Vec<String> = stmt
554            .query_map([], |r| r.get(0))?
555            .filter_map(|r| r.ok())
556            .collect();
557        let count = names
558            .iter()
559            .filter(|n| crate::parsers::normalize_entity_name(n) != **n)
560            .count() as i64;
561        let warning = if count > 0 {
562            Some(format!(
563                "run 'normalize-entities --yes' to fix {count} non-normalized entities"
564            ))
565        } else {
566            None
567        };
568        (Some(count), warning)
569    };
570
571    // G25: detect super-hub entities (degree > 50)
572    let (super_hub_count, super_hub_warning) = {
573        let mut stmt = conn.prepare_cached(
574            "SELECT e.name, COUNT(r.id) as deg FROM entities e \
575             LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id \
576             GROUP BY e.id HAVING deg > 50 ORDER BY deg DESC LIMIT 5",
577        )?;
578        let hubs: Vec<(String, i64)> = stmt
579            .query_map([], |r| Ok((r.get(0)?, r.get(1)?)))?
580            .filter_map(|r| r.ok())
581            .collect();
582        let count = hubs.len() as i64;
583        let warning = if count > 0 {
584            let names: Vec<String> = hubs
585                .iter()
586                .map(|(n, d)| format!("{n} (degree {d})"))
587                .collect();
588            Some(format!("super-hubs detected: {}", names.join(", ")))
589        } else {
590            None
591        };
592        (Some(count), warning)
593    };
594
595    // G25 (extended): identify the single highest-degree entity for programmatic use.
596    let (top_hub_entity, top_hub_degree, hub_warning) = {
597        let result: Option<(String, i64)> = conn
598            .query_row(
599                "SELECT e.name, COUNT(r.id) AS degree
600                 FROM entities e
601                 LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id
602                 GROUP BY e.id
603                 ORDER BY degree DESC
604                 LIMIT 1",
605                [],
606                |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
607            )
608            .ok();
609        match result {
610            Some((name, degree)) => {
611                let warning = if degree > 50 {
612                    Some(format!(
613                        "entity '{name}' has {degree} connections; consider splitting or using --max-neighbors-per-hop"
614                    ))
615                } else {
616                    None
617                };
618                (Some(name), Some(degree), warning)
619            }
620            None => (None, None, None),
621        }
622    };
623
624    let response = HealthResponse {
625        status: status.to_string(),
626        namespace: namespace_filter.clone(),
627        integrity,
628        integrity_ok,
629        schema_ok,
630        vec_memories_ok,
631        vec_memories_missing,
632        vec_memories_orphaned,
633        vec_entities_ok,
634        vec_chunks_ok,
635        fts_ok,
636        fts_query_ok,
637        model_ok,
638        counts: HealthCounts {
639            memories: memories_count,
640            memories_total: memories_count,
641            entities: entities_count,
642            relationships: relationships_count,
643            vec_memories: vec_memories_count,
644        },
645        db_path: paths.db.display().to_string(),
646        db_size_bytes,
647        schema_version,
648        sqlite_version,
649        missing_entities,
650        wal_size_mb,
651        journal_mode,
652        mentions_ratio,
653        mentions_warning,
654        top_relation,
655        top_relation_ratio,
656        applies_to_ratio,
657        relation_concentration_warning,
658        non_normalized_count,
659        normalization_warning,
660        super_hub_count,
661        super_hub_warning,
662        top_hub_entity,
663        top_hub_degree,
664        hub_warning,
665        checks,
666        elapsed_ms: start.elapsed().as_millis() as u64,
667    };
668
669    output::emit_json(&response)?;
670
671    Ok(())
672}
673
674#[cfg(test)]
675mod tests {
676    use super::*;
677    use rusqlite::Connection;
678
679    fn open_health_test_db() -> Connection {
680        let conn = Connection::open_in_memory().unwrap();
681        conn.execute_batch(
682            "CREATE TABLE memories (
683                id INTEGER PRIMARY KEY,
684                deleted_at INTEGER
685            );
686            CREATE TABLE memory_embeddings (
687                memory_id INTEGER PRIMARY KEY,
688                namespace TEXT NOT NULL,
689                embedding BLOB NOT NULL,
690                source TEXT NOT NULL,
691                model TEXT NOT NULL,
692                dim INTEGER NOT NULL DEFAULT 384,
693                created_at TEXT NOT NULL DEFAULT '0'
694            );
695            CREATE TABLE vec_memories (
696                memory_id INTEGER PRIMARY KEY,
697                embedding BLOB NOT NULL,
698                created_at INTEGER NOT NULL DEFAULT 0
699            );",
700        )
701        .unwrap();
702        conn
703    }
704
705    #[test]
706    fn memory_embedding_health_prefers_memory_embeddings_and_counts_soft_deleted_as_orphaned() {
707        let conn = open_health_test_db();
708        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (1, NULL)", [])
709            .unwrap();
710        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (2, NULL)", [])
711            .unwrap();
712        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (3, 123)", [])
713            .unwrap();
714        conn.execute(
715            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
716             VALUES (1, 'global', X'00', 'llm', 'm', 384, '1')",
717            [],
718        )
719        .unwrap();
720        conn.execute(
721            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
722             VALUES (3, 'global', X'00', 'llm', 'm', 384, '2')",
723            [],
724        )
725        .unwrap();
726        conn.execute(
727            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
728             VALUES (99, 'global', X'00', 'llm', 'm', 384, '3')",
729            [],
730        )
731        .unwrap();
732        conn.execute(
733            "INSERT INTO vec_memories(memory_id, embedding, created_at) VALUES (777, X'00', 0)",
734            [],
735        )
736        .unwrap();
737
738        let (ok, total, missing, orphaned) = memory_embedding_health(&conn);
739        assert!(ok);
740        assert_eq!(total, 3);
741        assert_eq!(missing, 1);
742        assert_eq!(orphaned, 2);
743    }
744
745    #[test]
746    fn first_existing_table_falls_back_to_legacy_vec_name() {
747        let conn = Connection::open_in_memory().unwrap();
748        conn.execute_batch(
749            "CREATE TABLE vec_memories (
750                memory_id INTEGER PRIMARY KEY,
751                embedding BLOB NOT NULL,
752                created_at INTEGER NOT NULL DEFAULT 0
753            );",
754        )
755        .unwrap();
756
757        let resolved = first_existing_table(&conn, MEMORY_EMBEDDING_TABLES);
758        assert_eq!(resolved, Some("vec_memories"));
759    }
760
761    #[test]
762    fn health_check_serializes_all_new_fields() {
763        let response = HealthResponse {
764            status: "ok".to_string(),
765            namespace: None,
766            integrity: "ok".to_string(),
767            integrity_ok: true,
768            schema_ok: true,
769            vec_memories_ok: true,
770            vec_memories_missing: 0,
771            vec_memories_orphaned: 0,
772            vec_entities_ok: true,
773            vec_chunks_ok: true,
774            fts_ok: true,
775            fts_query_ok: true,
776            model_ok: false,
777            counts: HealthCounts {
778                memories: 5,
779                memories_total: 5,
780                entities: 3,
781                relationships: 2,
782                vec_memories: 5,
783            },
784            db_path: "/tmp/test.sqlite".to_string(),
785            db_size_bytes: 4096,
786            schema_version: 6,
787            sqlite_version: "3.46.0".to_string(),
788            elapsed_ms: 0,
789            missing_entities: vec![],
790            wal_size_mb: 0.0,
791            journal_mode: "wal".to_string(),
792            mentions_ratio: None,
793            mentions_warning: None,
794            top_relation: None,
795            top_relation_ratio: None,
796            applies_to_ratio: None,
797            relation_concentration_warning: None,
798            non_normalized_count: None,
799            normalization_warning: None,
800            super_hub_count: None,
801            super_hub_warning: None,
802            top_hub_entity: None,
803            top_hub_degree: None,
804            hub_warning: None,
805            checks: vec![
806                HealthCheck {
807                    name: "integrity".to_string(),
808                    ok: true,
809                    detail: None,
810                },
811                HealthCheck {
812                    name: "model_onnx".to_string(),
813                    ok: false,
814                    detail: Some("model missing".to_string()),
815                },
816            ],
817        };
818
819        let json = serde_json::to_value(&response).unwrap();
820        assert_eq!(json["status"], "ok");
821        assert_eq!(json["integrity_ok"], true);
822        assert_eq!(json["schema_ok"], true);
823        assert_eq!(json["vec_memories_ok"], true);
824        assert_eq!(json["vec_entities_ok"], true);
825        assert_eq!(json["vec_chunks_ok"], true);
826        assert_eq!(json["fts_ok"], true);
827        assert_eq!(json["model_ok"], false);
828        assert_eq!(json["db_size_bytes"], 4096u64);
829        assert!(json["checks"].is_array());
830        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
831
832        // Verifies that detail is absent when ok=true (skip_serializing_if)
833        let integrity_check = &json["checks"][0];
834        assert_eq!(integrity_check["name"], "integrity");
835        assert_eq!(integrity_check["ok"], true);
836        assert!(integrity_check.get("detail").is_none());
837
838        // Verifies that detail is present when ok=false
839        let model_check = &json["checks"][1];
840        assert_eq!(model_check["name"], "model_onnx");
841        assert_eq!(model_check["ok"], false);
842        assert_eq!(model_check["detail"], "model missing");
843    }
844
845    #[test]
846    fn health_check_without_detail_omits_field() {
847        let check = HealthCheck {
848            name: "vec_memories".to_string(),
849            ok: true,
850            detail: None,
851        };
852        let json = serde_json::to_value(&check).unwrap();
853        assert!(
854            json.get("detail").is_none(),
855            "detail field must be omitted when None"
856        );
857    }
858
859    #[test]
860    fn health_check_with_detail_serializes_field() {
861        let check = HealthCheck {
862            name: "fts_memories".to_string(),
863            ok: false,
864            detail: Some("fts_memories table missing from sqlite_master".to_string()),
865        };
866        let json = serde_json::to_value(&check).unwrap();
867        assert_eq!(
868            json["detail"],
869            "fts_memories table missing from sqlite_master"
870        );
871    }
872
873    #[test]
874    fn health_response_fts_query_ok_and_sqlite_version_serialize() {
875        // Verifies that fts_query_ok and sqlite_version appear in the serialized JSON
876        // with the expected keys and values.
877        let response = HealthResponse {
878            status: "ok".to_string(),
879            namespace: Some("test-ns".to_string()),
880            integrity: "ok".to_string(),
881            integrity_ok: true,
882            schema_ok: true,
883            vec_memories_ok: true,
884            vec_memories_missing: 0,
885            vec_memories_orphaned: 0,
886            vec_entities_ok: true,
887            vec_chunks_ok: true,
888            fts_ok: true,
889            fts_query_ok: true,
890            model_ok: true,
891            counts: HealthCounts {
892                memories: 0,
893                memories_total: 0,
894                entities: 0,
895                relationships: 0,
896                vec_memories: 0,
897            },
898            db_path: "/tmp/test.sqlite".to_string(),
899            db_size_bytes: 0,
900            schema_version: 1,
901            sqlite_version: "3.45.1".to_string(),
902            elapsed_ms: 0,
903            missing_entities: vec![],
904            wal_size_mb: 0.0,
905            journal_mode: "wal".to_string(),
906            mentions_ratio: None,
907            mentions_warning: None,
908            top_relation: None,
909            top_relation_ratio: None,
910            applies_to_ratio: None,
911            relation_concentration_warning: None,
912            non_normalized_count: None,
913            normalization_warning: None,
914            super_hub_count: None,
915            super_hub_warning: None,
916            top_hub_entity: None,
917            top_hub_degree: None,
918            hub_warning: None,
919            checks: vec![],
920        };
921
922        let json = serde_json::to_value(&response).unwrap();
923
924        // fts_query_ok must appear at the top level
925        assert_eq!(
926            json["fts_query_ok"], true,
927            "fts_query_ok must be present and true in serialized JSON"
928        );
929
930        // sqlite_version must appear at the top level with the exact string
931        assert_eq!(
932            json["sqlite_version"], "3.45.1",
933            "sqlite_version must be present and match the provided string"
934        );
935
936        // Verify fts_query_ok=false path includes the expected detail message
937        let check_fail = HealthCheck {
938            name: "fts_query".to_string(),
939            ok: false,
940            detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
941        };
942        let check_json = serde_json::to_value(&check_fail).unwrap();
943        assert_eq!(check_json["name"], "fts_query");
944        assert_eq!(check_json["ok"], false);
945        assert_eq!(
946            check_json["detail"],
947            "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
948        );
949    }
950
951    fn make_full_response(
952        top_relation: Option<String>,
953        top_relation_ratio: Option<f64>,
954        applies_to_ratio: Option<f64>,
955        relation_concentration_warning: Option<String>,
956    ) -> HealthResponse {
957        HealthResponse {
958            status: "ok".to_string(),
959            namespace: None,
960            integrity: "ok".to_string(),
961            integrity_ok: true,
962            schema_ok: true,
963            vec_memories_ok: true,
964            vec_memories_missing: 0,
965            vec_memories_orphaned: 0,
966            vec_entities_ok: true,
967            vec_chunks_ok: true,
968            fts_ok: true,
969            fts_query_ok: true,
970            model_ok: true,
971            counts: HealthCounts {
972                memories: 10,
973                memories_total: 10,
974                entities: 5,
975                relationships: 20,
976                vec_memories: 10,
977            },
978            db_path: "/tmp/test.sqlite".to_string(),
979            db_size_bytes: 8192,
980            schema_version: 3,
981            sqlite_version: "3.46.0".to_string(),
982            elapsed_ms: 1,
983            missing_entities: vec![],
984            wal_size_mb: 0.0,
985            journal_mode: "wal".to_string(),
986            mentions_ratio: None,
987            mentions_warning: None,
988            top_relation,
989            top_relation_ratio,
990            applies_to_ratio,
991            relation_concentration_warning,
992            non_normalized_count: None,
993            normalization_warning: None,
994            super_hub_count: None,
995            super_hub_warning: None,
996            top_hub_entity: None,
997            top_hub_degree: None,
998            hub_warning: None,
999            checks: vec![],
1000        }
1001    }
1002
1003    #[test]
1004    fn health_concentration_fields_omitted_when_no_relationships() {
1005        // Represents a DB with zero relationships.
1006        let resp = make_full_response(None, None, None, None);
1007        let json = serde_json::to_value(&resp).unwrap();
1008        assert!(
1009            json.get("top_relation").is_none(),
1010            "top_relation must be omitted when None"
1011        );
1012        assert!(
1013            json.get("top_relation_ratio").is_none(),
1014            "top_relation_ratio must be omitted when None"
1015        );
1016        assert!(
1017            json.get("applies_to_ratio").is_none(),
1018            "applies_to_ratio must be omitted when None"
1019        );
1020        assert!(
1021            json.get("relation_concentration_warning").is_none(),
1022            "relation_concentration_warning must be omitted when None"
1023        );
1024    }
1025
1026    #[test]
1027    fn health_concentration_fields_present_with_data() {
1028        let resp = make_full_response(
1029            Some("mentions".to_string()),
1030            Some(0.60),
1031            Some(0.10),
1032            Some("relation 'mentions' dominates graph at 60.0%".to_string()),
1033        );
1034        let json = serde_json::to_value(&resp).unwrap();
1035        assert_eq!(json["top_relation"], "mentions");
1036        assert!((json["top_relation_ratio"].as_f64().unwrap() - 0.60).abs() < 1e-9);
1037        assert!((json["applies_to_ratio"].as_f64().unwrap() - 0.10).abs() < 1e-9);
1038        assert!(json["relation_concentration_warning"]
1039            .as_str()
1040            .unwrap()
1041            .contains("60.0%"));
1042    }
1043
1044    #[test]
1045    fn health_concentration_warning_absent_when_ratio_below_threshold() {
1046        // top_relation_ratio of 0.39 is below the 0.40 threshold — no warning.
1047        let resp = make_full_response(Some("uses".to_string()), Some(0.39), None, None);
1048        let json = serde_json::to_value(&resp).unwrap();
1049        assert_eq!(json["top_relation"], "uses");
1050        assert!(
1051            json.get("relation_concentration_warning").is_none(),
1052            "warning must be absent when ratio <= 0.40"
1053        );
1054    }
1055
1056    #[test]
1057    fn health_concentration_warning_present_at_threshold() {
1058        // Exactly at 0.41 (above 0.40) — warning must appear.
1059        let resp = make_full_response(
1060            Some("depends_on".to_string()),
1061            Some(0.41),
1062            None,
1063            Some("relation 'depends_on' dominates graph at 41.0%".to_string()),
1064        );
1065        let json = serde_json::to_value(&resp).unwrap();
1066        assert!(
1067            json["relation_concentration_warning"].is_string(),
1068            "warning must be present when top_relation_ratio > 0.40"
1069        );
1070    }
1071
1072    #[test]
1073    fn health_applies_to_ratio_omitted_when_none() {
1074        // applies_to_ratio is None when there are no applies_to edges.
1075        let resp = make_full_response(Some("related".to_string()), Some(0.30), None, None);
1076        let json = serde_json::to_value(&resp).unwrap();
1077        assert!(
1078            json.get("applies_to_ratio").is_none(),
1079            "applies_to_ratio must be omitted when None"
1080        );
1081    }
1082}