Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11const MEMORY_EMBEDDING_TABLES: &[&str] = &["memory_embeddings", "vec_memories"];
12const ENTITY_EMBEDDING_TABLES: &[&str] = &["entity_embeddings", "vec_entities"];
13const CHUNK_EMBEDDING_TABLES: &[&str] = &["chunk_embeddings", "vec_chunks"];
14
15#[derive(clap::Args)]
16#[command(after_long_help = "EXAMPLES:\n  \
17    # Check database health (connectivity, integrity, vector index)\n  \
18    sqlite-graphrag health\n\n  \
19    # Check health of a database at a custom path\n  \
20    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
21    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
22    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
23pub struct HealthArgs {
24    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
25    pub db: Option<String>,
26    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
27    #[arg(long, default_value_t = false)]
28    pub json: bool,
29    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
30    #[arg(long, value_parser = ["json", "text"], hide = true)]
31    pub format: Option<String>,
32}
33
34#[derive(Serialize)]
35struct HealthCounts {
36    memories: i64,
37    /// Alias of `memories` for the documented contract in SKILL.md.
38    memories_total: i64,
39    entities: i64,
40    relationships: i64,
41    vec_memories: i64,
42}
43
44#[derive(Serialize)]
45struct HealthCheck {
46    name: String,
47    ok: bool,
48    #[serde(skip_serializing_if = "Option::is_none")]
49    detail: Option<String>,
50}
51
52#[derive(Serialize)]
53struct HealthResponse {
54    status: String,
55    integrity: String,
56    integrity_ok: bool,
57    schema_ok: bool,
58    vec_memories_ok: bool,
59    vec_memories_missing: i64,
60    vec_memories_orphaned: i64,
61    vec_entities_ok: bool,
62    vec_chunks_ok: bool,
63    fts_ok: bool,
64    /// Whether a live FTS5 MATCH query against fts_memories succeeded.
65    fts_query_ok: bool,
66    model_ok: bool,
67    counts: HealthCounts,
68    db_path: String,
69    db_size_bytes: u64,
70    /// MAX(version) from refinery_schema_history — number of the last applied migration.
71    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
72    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
73    schema_version: u32,
74    /// List of entities referenced by memories but absent from the entities table.
75    /// Empty in a healthy DB. Per the contract documented in SKILL.md.
76    missing_entities: Vec<String>,
77    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
78    wal_size_mb: f64,
79    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
80    journal_mode: String,
81    /// SQLite version string, e.g. `"3.46.0"`.
82    sqlite_version: String,
83    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
84    /// Omitted when there are no relationships in the database.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    mentions_ratio: Option<f64>,
87    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
88    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    mentions_warning: Option<String>,
91    /// The relation type with the highest edge count in the namespace.
92    /// Omitted when there are no relationships in the database.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    top_relation: Option<String>,
95    /// Fraction of all edges occupied by `top_relation` (0.0–1.0).
96    /// Omitted when there are no relationships in the database.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    top_relation_ratio: Option<f64>,
99    /// Fraction of relationships that use the `applies_to` relation type (0.0–1.0).
100    /// Omitted when there are no relationships or when `applies_to` is absent.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    applies_to_ratio: Option<f64>,
103    /// Human-readable warning when a single relation type occupies more than 40 % of edges.
104    /// Omitted when concentration is within acceptable bounds or there are no relationships.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    relation_concentration_warning: Option<String>,
107    /// Number of entities whose name differs from its normalized kebab-case form.
108    #[serde(skip_serializing_if = "Option::is_none")]
109    non_normalized_count: Option<i64>,
110    /// Warning when non-normalized entities are detected.
111    #[serde(skip_serializing_if = "Option::is_none")]
112    normalization_warning: Option<String>,
113    /// Number of entities with degree exceeding the super-hub threshold (default 50).
114    #[serde(skip_serializing_if = "Option::is_none")]
115    super_hub_count: Option<i64>,
116    /// Warning listing top super-hub entity names.
117    #[serde(skip_serializing_if = "Option::is_none")]
118    super_hub_warning: Option<String>,
119    /// Name of the entity with the highest connection count in the namespace.
120    /// Omitted when there are no entities in the database.
121    #[serde(skip_serializing_if = "Option::is_none")]
122    top_hub_entity: Option<String>,
123    /// Number of connections (degree) of `top_hub_entity`.
124    /// Omitted when there are no entities in the database.
125    #[serde(skip_serializing_if = "Option::is_none")]
126    top_hub_degree: Option<i64>,
127    /// Human-readable warning when `top_hub_entity` exceeds 50 connections.
128    /// Omitted when degree is within acceptable bounds or there are no entities.
129    #[serde(skip_serializing_if = "Option::is_none")]
130    hub_warning: Option<String>,
131    checks: Vec<HealthCheck>,
132    elapsed_ms: u64,
133}
134
135/// Checks whether a table (including virtual ones) exists in sqlite_master.
136fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
137    conn.query_row(
138        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
139        rusqlite::params![table_name],
140        |r| r.get::<_, i64>(0),
141    )
142    .unwrap_or(0)
143        > 0
144}
145
146fn first_existing_table<'a>(
147    conn: &rusqlite::Connection,
148    candidates: &'a [&'a str],
149) -> Option<&'a str> {
150    candidates
151        .iter()
152        .copied()
153        .find(|name| table_exists(conn, name))
154}
155
156fn count_rows(conn: &rusqlite::Connection, table_name: &str) -> i64 {
157    conn.query_row(&format!("SELECT COUNT(*) FROM {table_name}"), [], |r| {
158        r.get(0)
159    })
160    .unwrap_or(0)
161}
162
163fn memory_embedding_health(conn: &rusqlite::Connection) -> (bool, i64, i64, i64) {
164    let Some(table_name) = first_existing_table(conn, MEMORY_EMBEDDING_TABLES) else {
165        return (false, 0, 0, 0);
166    };
167
168    let total = count_rows(conn, table_name);
169    let missing = conn
170        .query_row(
171            &format!(
172                "SELECT COUNT(*)
173                 FROM memories m
174                 LEFT JOIN {table_name} me ON me.memory_id = m.id
175                 WHERE me.memory_id IS NULL AND m.deleted_at IS NULL"
176            ),
177            [],
178            |r| r.get(0),
179        )
180        .unwrap_or(0);
181    let orphaned = conn
182        .query_row(
183            &format!(
184                "SELECT COUNT(*)
185                 FROM {table_name} me
186                 LEFT JOIN memories m ON m.id = me.memory_id
187                 WHERE m.id IS NULL OR m.deleted_at IS NOT NULL"
188            ),
189            [],
190            |r| r.get(0),
191        )
192        .unwrap_or(0);
193
194    (true, total, missing, orphaned)
195}
196
197pub fn run(args: HealthArgs) -> Result<(), AppError> {
198    let start = Instant::now();
199    let _ = args.json; // --json is a no-op because output is already JSON by default
200    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
201    let paths = AppPaths::resolve(args.db.as_deref())?;
202
203    crate::storage::connection::ensure_db_ready(&paths)?;
204
205    let conn = open_ro(&paths.db)?;
206
207    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
208    let integrity_ok = integrity == "ok";
209    tracing::info!(target: "health", integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
210
211    if !integrity_ok {
212        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
213        output::emit_json(&HealthResponse {
214            status: "degraded".to_string(),
215            integrity: integrity.clone(),
216            integrity_ok: false,
217            schema_ok: false,
218            vec_memories_ok: false,
219            vec_memories_missing: 0,
220            vec_memories_orphaned: 0,
221            vec_entities_ok: false,
222            vec_chunks_ok: false,
223            fts_ok: false,
224            fts_query_ok: false,
225            model_ok: false,
226            counts: HealthCounts {
227                memories: 0,
228                memories_total: 0,
229                entities: 0,
230                relationships: 0,
231                vec_memories: 0,
232            },
233            db_path: paths.db.display().to_string(),
234            db_size_bytes,
235            schema_version: 0,
236            sqlite_version: "unknown".to_string(),
237            missing_entities: vec![],
238            wal_size_mb: 0.0,
239            journal_mode: "unknown".to_string(),
240            mentions_ratio: None,
241            mentions_warning: None,
242            top_relation: None,
243            top_relation_ratio: None,
244            applies_to_ratio: None,
245            relation_concentration_warning: None,
246            non_normalized_count: None,
247            normalization_warning: None,
248            super_hub_count: None,
249            super_hub_warning: None,
250            top_hub_entity: None,
251            top_hub_degree: None,
252            hub_warning: None,
253            checks: vec![HealthCheck {
254                name: "integrity".to_string(),
255                ok: false,
256                detail: Some(integrity),
257            }],
258            elapsed_ms: start.elapsed().as_millis() as u64,
259        })?;
260        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
261            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
262            Some("integrity check failed".to_string()),
263        )));
264    }
265
266    let memories_count: i64 = conn.query_row(
267        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
268        [],
269        |r| r.get(0),
270    )?;
271    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
272    let relationships_count: i64 =
273        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
274    let (vec_memories_ok, vec_memories_count, vec_memories_missing, vec_memories_orphaned) =
275        memory_embedding_health(&conn);
276
277    let mentions_count: i64 = conn.query_row(
278        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
279        [],
280        |r| r.get(0),
281    )?;
282    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
283        let ratio = mentions_count as f64 / relationships_count as f64;
284        let warning = if ratio > 0.5 {
285            Some(format!(
286                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
287                ratio * 100.0,
288                mentions_count,
289                relationships_count
290            ))
291        } else {
292            None
293        };
294        (Some(ratio), warning)
295    } else {
296        (None, None)
297    };
298
299    // Relation concentration: find the most frequent relation type and check threshold.
300    let (top_relation, top_relation_ratio, applies_to_ratio, relation_concentration_warning) =
301        if relationships_count > 0 {
302            // Identify the relation with the highest edge count.
303            let (top_rel, top_count): (String, i64) = conn
304                .query_row(
305                    "SELECT relation, COUNT(*) AS cnt
306                     FROM relationships
307                     GROUP BY relation
308                     ORDER BY cnt DESC
309                     LIMIT 1",
310                    [],
311                    |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
312                )
313                .unwrap_or_else(|_| ("unknown".to_string(), 0));
314
315            let top_ratio = top_count as f64 / relationships_count as f64;
316
317            // Compute applies_to ratio separately (may be 0 if absent).
318            let applies_count: i64 = conn
319                .query_row(
320                    "SELECT COUNT(*) FROM relationships WHERE relation = 'applies_to'",
321                    [],
322                    |r| r.get(0),
323                )
324                .unwrap_or(0);
325            let at_ratio = if applies_count > 0 {
326                Some(applies_count as f64 / relationships_count as f64)
327            } else {
328                None
329            };
330
331            let concentration_warning = if top_ratio > 0.40 {
332                Some(format!(
333                    "relation '{}' dominates graph at {:.1}% ({}/{} total); consider running prune-relations --relation {} --dry-run",
334                    top_rel,
335                    top_ratio * 100.0,
336                    top_count,
337                    relationships_count,
338                    top_rel,
339                ))
340            } else {
341                None
342            };
343
344            (
345                Some(top_rel),
346                Some(top_ratio),
347                at_ratio,
348                concentration_warning,
349            )
350        } else {
351            (None, None, None, None)
352        };
353
354    let status = "ok";
355
356    let schema_version: u32 = conn
357        .query_row(
358            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
359            [],
360            |r| r.get::<_, i64>(0),
361        )
362        .unwrap_or(0) as u32;
363
364    let schema_ok = schema_version > 0;
365
366    // Checks vector tables via sqlite_master
367    let vec_entities_ok = first_existing_table(&conn, ENTITY_EMBEDDING_TABLES).is_some();
368    let vec_chunks_ok = first_existing_table(&conn, CHUNK_EMBEDDING_TABLES).is_some();
369
370    tracing::info!(target: "health", vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, vec_missing = vec_memories_missing, vec_orphaned = vec_memories_orphaned, "vector table checks complete");
371    let fts_ok = table_exists(&conn, "fts_memories");
372
373    // Verifies that FTS5 can execute a MATCH query (catches index corruption distinct from table absence).
374    let fts_query_ok = if fts_ok {
375        conn.query_row(
376            "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
377            [],
378            |r| r.get::<_, i64>(0),
379        )
380        .is_ok()
381    } else {
382        false
383    };
384
385    tracing::info!(target: "health", fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
386
387    // Captures the SQLite runtime version for observability.
388    let sqlite_version: String = conn
389        .query_row("SELECT sqlite_version()", [], |r| r.get(0))
390        .unwrap_or_else(|_| "unknown".to_string());
391
392    // Detects orphan entities referenced by memories but absent from the entities table.
393    let mut missing_entities: Vec<String> = Vec::with_capacity(4);
394    let mut stmt = conn.prepare_cached(
395        "SELECT DISTINCT me.entity_id
396         FROM memory_entities me
397         LEFT JOIN entities e ON e.id = me.entity_id
398         WHERE e.id IS NULL",
399    )?;
400    let orphans: Vec<i64> = stmt
401        .query_map([], |r| r.get(0))?
402        .collect::<Result<Vec<_>, _>>()?;
403    for id in orphans {
404        missing_entities.push(format!("entity_id={id}"));
405    }
406
407    let journal_mode: String = conn
408        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
409        .unwrap_or_else(|_| "unknown".to_string());
410
411    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
412        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
413        .unwrap_or(0.0);
414
415    // Database file size in bytes
416    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
417
418    // G46: the ONNX model cache no longer exists in the LLM-only build
419    // (v1.0.76+). model_ok now reports whether an LLM CLI (claude or codex)
420    // is reachable on PATH — the real prerequisite for embedding generation.
421    let model_ok = crate::commands::ingest_claude::find_claude_binary(None).is_ok()
422        || crate::commands::ingest_codex::find_codex_binary(None).is_ok();
423    tracing::info!(target: "health", model_ok = %model_ok, "LLM CLI availability check complete");
424
425    // Builds the checks array for detailed diagnostics
426    let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
427
428    // At this point integrity_ok is always true (corrupt DB returned early above).
429    checks.push(HealthCheck {
430        name: "integrity".to_string(),
431        ok: true,
432        detail: None,
433    });
434
435    checks.push(HealthCheck {
436        name: "schema_version".to_string(),
437        ok: schema_ok,
438        detail: if schema_ok {
439            None
440        } else {
441            Some(format!("schema_version={schema_version} (expected >0)"))
442        },
443    });
444
445    checks.push(HealthCheck {
446        name: "vec_memories".to_string(),
447        ok: vec_memories_ok,
448        detail: if vec_memories_ok {
449            None
450        } else {
451            Some("memory_embeddings/vec_memories table missing from sqlite_master".to_string())
452        },
453    });
454
455    checks.push(HealthCheck {
456        name: "vec_entities".to_string(),
457        ok: vec_entities_ok,
458        detail: if vec_entities_ok {
459            None
460        } else {
461            Some("entity_embeddings/vec_entities table missing from sqlite_master".to_string())
462        },
463    });
464
465    checks.push(HealthCheck {
466        name: "vec_chunks".to_string(),
467        ok: vec_chunks_ok,
468        detail: if vec_chunks_ok {
469            None
470        } else {
471            Some("chunk_embeddings/vec_chunks table missing from sqlite_master".to_string())
472        },
473    });
474
475    checks.push(HealthCheck {
476        name: "fts_memories".to_string(),
477        ok: fts_ok,
478        detail: if fts_ok {
479            None
480        } else {
481            Some("fts_memories table missing from sqlite_master".to_string())
482        },
483    });
484
485    checks.push(HealthCheck {
486        name: "fts_query".to_string(),
487        ok: fts_query_ok,
488        detail: if fts_query_ok {
489            None
490        } else {
491            Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
492        },
493    });
494
495    checks.push(HealthCheck {
496        name: "llm_cli".to_string(),
497        ok: model_ok,
498        detail: if model_ok {
499            None
500        } else {
501            Some(
502                "no LLM CLI found on PATH; install 'claude' (Claude Code) or 'codex' \
503                 (Codex CLI) — required for embedding generation since v1.0.76"
504                    .to_string(),
505            )
506        },
507    });
508
509    // G24: detect non-normalized entity names
510    let (non_normalized_count, normalization_warning) = {
511        let mut stmt = conn.prepare_cached("SELECT name FROM entities")?;
512        let names: Vec<String> = stmt
513            .query_map([], |r| r.get(0))?
514            .filter_map(|r| r.ok())
515            .collect();
516        let count = names
517            .iter()
518            .filter(|n| crate::parsers::normalize_entity_name(n) != **n)
519            .count() as i64;
520        let warning = if count > 0 {
521            Some(format!(
522                "run 'normalize-entities --yes' to fix {count} non-normalized entities"
523            ))
524        } else {
525            None
526        };
527        (Some(count), warning)
528    };
529
530    // G25: detect super-hub entities (degree > 50)
531    let (super_hub_count, super_hub_warning) = {
532        let mut stmt = conn.prepare_cached(
533            "SELECT e.name, COUNT(r.id) as deg FROM entities e \
534             LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id \
535             GROUP BY e.id HAVING deg > 50 ORDER BY deg DESC LIMIT 5",
536        )?;
537        let hubs: Vec<(String, i64)> = stmt
538            .query_map([], |r| Ok((r.get(0)?, r.get(1)?)))?
539            .filter_map(|r| r.ok())
540            .collect();
541        let count = hubs.len() as i64;
542        let warning = if count > 0 {
543            let names: Vec<String> = hubs
544                .iter()
545                .map(|(n, d)| format!("{n} (degree {d})"))
546                .collect();
547            Some(format!("super-hubs detected: {}", names.join(", ")))
548        } else {
549            None
550        };
551        (Some(count), warning)
552    };
553
554    // G25 (extended): identify the single highest-degree entity for programmatic use.
555    let (top_hub_entity, top_hub_degree, hub_warning) = {
556        let result: Option<(String, i64)> = conn
557            .query_row(
558                "SELECT e.name, COUNT(r.id) AS degree
559                 FROM entities e
560                 LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id
561                 GROUP BY e.id
562                 ORDER BY degree DESC
563                 LIMIT 1",
564                [],
565                |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
566            )
567            .ok();
568        match result {
569            Some((name, degree)) => {
570                let warning = if degree > 50 {
571                    Some(format!(
572                        "entity '{name}' has {degree} connections; consider splitting or using --max-neighbors-per-hop"
573                    ))
574                } else {
575                    None
576                };
577                (Some(name), Some(degree), warning)
578            }
579            None => (None, None, None),
580        }
581    };
582
583    let response = HealthResponse {
584        status: status.to_string(),
585        integrity,
586        integrity_ok,
587        schema_ok,
588        vec_memories_ok,
589        vec_memories_missing,
590        vec_memories_orphaned,
591        vec_entities_ok,
592        vec_chunks_ok,
593        fts_ok,
594        fts_query_ok,
595        model_ok,
596        counts: HealthCounts {
597            memories: memories_count,
598            memories_total: memories_count,
599            entities: entities_count,
600            relationships: relationships_count,
601            vec_memories: vec_memories_count,
602        },
603        db_path: paths.db.display().to_string(),
604        db_size_bytes,
605        schema_version,
606        sqlite_version,
607        missing_entities,
608        wal_size_mb,
609        journal_mode,
610        mentions_ratio,
611        mentions_warning,
612        top_relation,
613        top_relation_ratio,
614        applies_to_ratio,
615        relation_concentration_warning,
616        non_normalized_count,
617        normalization_warning,
618        super_hub_count,
619        super_hub_warning,
620        top_hub_entity,
621        top_hub_degree,
622        hub_warning,
623        checks,
624        elapsed_ms: start.elapsed().as_millis() as u64,
625    };
626
627    output::emit_json(&response)?;
628
629    Ok(())
630}
631
632#[cfg(test)]
633mod tests {
634    use super::*;
635    use rusqlite::Connection;
636
637    fn open_health_test_db() -> Connection {
638        let conn = Connection::open_in_memory().unwrap();
639        conn.execute_batch(
640            "CREATE TABLE memories (
641                id INTEGER PRIMARY KEY,
642                deleted_at INTEGER
643            );
644            CREATE TABLE memory_embeddings (
645                memory_id INTEGER PRIMARY KEY,
646                namespace TEXT NOT NULL,
647                embedding BLOB NOT NULL,
648                source TEXT NOT NULL,
649                model TEXT NOT NULL,
650                dim INTEGER NOT NULL DEFAULT 384,
651                created_at TEXT NOT NULL DEFAULT '0'
652            );
653            CREATE TABLE vec_memories (
654                memory_id INTEGER PRIMARY KEY,
655                embedding BLOB NOT NULL,
656                created_at INTEGER NOT NULL DEFAULT 0
657            );",
658        )
659        .unwrap();
660        conn
661    }
662
663    #[test]
664    fn memory_embedding_health_prefers_memory_embeddings_and_counts_soft_deleted_as_orphaned() {
665        let conn = open_health_test_db();
666        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (1, NULL)", [])
667            .unwrap();
668        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (2, NULL)", [])
669            .unwrap();
670        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (3, 123)", [])
671            .unwrap();
672        conn.execute(
673            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
674             VALUES (1, 'global', X'00', 'llm', 'm', 384, '1')",
675            [],
676        )
677        .unwrap();
678        conn.execute(
679            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
680             VALUES (3, 'global', X'00', 'llm', 'm', 384, '2')",
681            [],
682        )
683        .unwrap();
684        conn.execute(
685            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
686             VALUES (99, 'global', X'00', 'llm', 'm', 384, '3')",
687            [],
688        )
689        .unwrap();
690        conn.execute(
691            "INSERT INTO vec_memories(memory_id, embedding, created_at) VALUES (777, X'00', 0)",
692            [],
693        )
694        .unwrap();
695
696        let (ok, total, missing, orphaned) = memory_embedding_health(&conn);
697        assert!(ok);
698        assert_eq!(total, 3);
699        assert_eq!(missing, 1);
700        assert_eq!(orphaned, 2);
701    }
702
703    #[test]
704    fn first_existing_table_falls_back_to_legacy_vec_name() {
705        let conn = Connection::open_in_memory().unwrap();
706        conn.execute_batch(
707            "CREATE TABLE vec_memories (
708                memory_id INTEGER PRIMARY KEY,
709                embedding BLOB NOT NULL,
710                created_at INTEGER NOT NULL DEFAULT 0
711            );",
712        )
713        .unwrap();
714
715        let resolved = first_existing_table(&conn, MEMORY_EMBEDDING_TABLES);
716        assert_eq!(resolved, Some("vec_memories"));
717    }
718
719    #[test]
720    fn health_check_serializes_all_new_fields() {
721        let response = HealthResponse {
722            status: "ok".to_string(),
723            integrity: "ok".to_string(),
724            integrity_ok: true,
725            schema_ok: true,
726            vec_memories_ok: true,
727            vec_memories_missing: 0,
728            vec_memories_orphaned: 0,
729            vec_entities_ok: true,
730            vec_chunks_ok: true,
731            fts_ok: true,
732            fts_query_ok: true,
733            model_ok: false,
734            counts: HealthCounts {
735                memories: 5,
736                memories_total: 5,
737                entities: 3,
738                relationships: 2,
739                vec_memories: 5,
740            },
741            db_path: "/tmp/test.sqlite".to_string(),
742            db_size_bytes: 4096,
743            schema_version: 6,
744            sqlite_version: "3.46.0".to_string(),
745            elapsed_ms: 0,
746            missing_entities: vec![],
747            wal_size_mb: 0.0,
748            journal_mode: "wal".to_string(),
749            mentions_ratio: None,
750            mentions_warning: None,
751            top_relation: None,
752            top_relation_ratio: None,
753            applies_to_ratio: None,
754            relation_concentration_warning: None,
755            non_normalized_count: None,
756            normalization_warning: None,
757            super_hub_count: None,
758            super_hub_warning: None,
759            top_hub_entity: None,
760            top_hub_degree: None,
761            hub_warning: None,
762            checks: vec![
763                HealthCheck {
764                    name: "integrity".to_string(),
765                    ok: true,
766                    detail: None,
767                },
768                HealthCheck {
769                    name: "model_onnx".to_string(),
770                    ok: false,
771                    detail: Some("model missing".to_string()),
772                },
773            ],
774        };
775
776        let json = serde_json::to_value(&response).unwrap();
777        assert_eq!(json["status"], "ok");
778        assert_eq!(json["integrity_ok"], true);
779        assert_eq!(json["schema_ok"], true);
780        assert_eq!(json["vec_memories_ok"], true);
781        assert_eq!(json["vec_entities_ok"], true);
782        assert_eq!(json["vec_chunks_ok"], true);
783        assert_eq!(json["fts_ok"], true);
784        assert_eq!(json["model_ok"], false);
785        assert_eq!(json["db_size_bytes"], 4096u64);
786        assert!(json["checks"].is_array());
787        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
788
789        // Verifies that detail is absent when ok=true (skip_serializing_if)
790        let integrity_check = &json["checks"][0];
791        assert_eq!(integrity_check["name"], "integrity");
792        assert_eq!(integrity_check["ok"], true);
793        assert!(integrity_check.get("detail").is_none());
794
795        // Verifies that detail is present when ok=false
796        let model_check = &json["checks"][1];
797        assert_eq!(model_check["name"], "model_onnx");
798        assert_eq!(model_check["ok"], false);
799        assert_eq!(model_check["detail"], "model missing");
800    }
801
802    #[test]
803    fn health_check_without_detail_omits_field() {
804        let check = HealthCheck {
805            name: "vec_memories".to_string(),
806            ok: true,
807            detail: None,
808        };
809        let json = serde_json::to_value(&check).unwrap();
810        assert!(
811            json.get("detail").is_none(),
812            "detail field must be omitted when None"
813        );
814    }
815
816    #[test]
817    fn health_check_with_detail_serializes_field() {
818        let check = HealthCheck {
819            name: "fts_memories".to_string(),
820            ok: false,
821            detail: Some("fts_memories table missing from sqlite_master".to_string()),
822        };
823        let json = serde_json::to_value(&check).unwrap();
824        assert_eq!(
825            json["detail"],
826            "fts_memories table missing from sqlite_master"
827        );
828    }
829
830    #[test]
831    fn health_response_fts_query_ok_and_sqlite_version_serialize() {
832        // Verifies that fts_query_ok and sqlite_version appear in the serialized JSON
833        // with the expected keys and values.
834        let response = HealthResponse {
835            status: "ok".to_string(),
836            integrity: "ok".to_string(),
837            integrity_ok: true,
838            schema_ok: true,
839            vec_memories_ok: true,
840            vec_memories_missing: 0,
841            vec_memories_orphaned: 0,
842            vec_entities_ok: true,
843            vec_chunks_ok: true,
844            fts_ok: true,
845            fts_query_ok: true,
846            model_ok: true,
847            counts: HealthCounts {
848                memories: 0,
849                memories_total: 0,
850                entities: 0,
851                relationships: 0,
852                vec_memories: 0,
853            },
854            db_path: "/tmp/test.sqlite".to_string(),
855            db_size_bytes: 0,
856            schema_version: 1,
857            sqlite_version: "3.45.1".to_string(),
858            elapsed_ms: 0,
859            missing_entities: vec![],
860            wal_size_mb: 0.0,
861            journal_mode: "wal".to_string(),
862            mentions_ratio: None,
863            mentions_warning: None,
864            top_relation: None,
865            top_relation_ratio: None,
866            applies_to_ratio: None,
867            relation_concentration_warning: None,
868            non_normalized_count: None,
869            normalization_warning: None,
870            super_hub_count: None,
871            super_hub_warning: None,
872            top_hub_entity: None,
873            top_hub_degree: None,
874            hub_warning: None,
875            checks: vec![],
876        };
877
878        let json = serde_json::to_value(&response).unwrap();
879
880        // fts_query_ok must appear at the top level
881        assert_eq!(
882            json["fts_query_ok"], true,
883            "fts_query_ok must be present and true in serialized JSON"
884        );
885
886        // sqlite_version must appear at the top level with the exact string
887        assert_eq!(
888            json["sqlite_version"], "3.45.1",
889            "sqlite_version must be present and match the provided string"
890        );
891
892        // Verify fts_query_ok=false path includes the expected detail message
893        let check_fail = HealthCheck {
894            name: "fts_query".to_string(),
895            ok: false,
896            detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
897        };
898        let check_json = serde_json::to_value(&check_fail).unwrap();
899        assert_eq!(check_json["name"], "fts_query");
900        assert_eq!(check_json["ok"], false);
901        assert_eq!(
902            check_json["detail"],
903            "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
904        );
905    }
906
907    fn make_full_response(
908        top_relation: Option<String>,
909        top_relation_ratio: Option<f64>,
910        applies_to_ratio: Option<f64>,
911        relation_concentration_warning: Option<String>,
912    ) -> HealthResponse {
913        HealthResponse {
914            status: "ok".to_string(),
915            integrity: "ok".to_string(),
916            integrity_ok: true,
917            schema_ok: true,
918            vec_memories_ok: true,
919            vec_memories_missing: 0,
920            vec_memories_orphaned: 0,
921            vec_entities_ok: true,
922            vec_chunks_ok: true,
923            fts_ok: true,
924            fts_query_ok: true,
925            model_ok: true,
926            counts: HealthCounts {
927                memories: 10,
928                memories_total: 10,
929                entities: 5,
930                relationships: 20,
931                vec_memories: 10,
932            },
933            db_path: "/tmp/test.sqlite".to_string(),
934            db_size_bytes: 8192,
935            schema_version: 3,
936            sqlite_version: "3.46.0".to_string(),
937            elapsed_ms: 1,
938            missing_entities: vec![],
939            wal_size_mb: 0.0,
940            journal_mode: "wal".to_string(),
941            mentions_ratio: None,
942            mentions_warning: None,
943            top_relation,
944            top_relation_ratio,
945            applies_to_ratio,
946            relation_concentration_warning,
947            non_normalized_count: None,
948            normalization_warning: None,
949            super_hub_count: None,
950            super_hub_warning: None,
951            top_hub_entity: None,
952            top_hub_degree: None,
953            hub_warning: None,
954            checks: vec![],
955        }
956    }
957
958    #[test]
959    fn health_concentration_fields_omitted_when_no_relationships() {
960        // Represents a DB with zero relationships.
961        let resp = make_full_response(None, None, None, None);
962        let json = serde_json::to_value(&resp).unwrap();
963        assert!(
964            json.get("top_relation").is_none(),
965            "top_relation must be omitted when None"
966        );
967        assert!(
968            json.get("top_relation_ratio").is_none(),
969            "top_relation_ratio must be omitted when None"
970        );
971        assert!(
972            json.get("applies_to_ratio").is_none(),
973            "applies_to_ratio must be omitted when None"
974        );
975        assert!(
976            json.get("relation_concentration_warning").is_none(),
977            "relation_concentration_warning must be omitted when None"
978        );
979    }
980
981    #[test]
982    fn health_concentration_fields_present_with_data() {
983        let resp = make_full_response(
984            Some("mentions".to_string()),
985            Some(0.60),
986            Some(0.10),
987            Some("relation 'mentions' dominates graph at 60.0%".to_string()),
988        );
989        let json = serde_json::to_value(&resp).unwrap();
990        assert_eq!(json["top_relation"], "mentions");
991        assert!((json["top_relation_ratio"].as_f64().unwrap() - 0.60).abs() < 1e-9);
992        assert!((json["applies_to_ratio"].as_f64().unwrap() - 0.10).abs() < 1e-9);
993        assert!(json["relation_concentration_warning"]
994            .as_str()
995            .unwrap()
996            .contains("60.0%"));
997    }
998
999    #[test]
1000    fn health_concentration_warning_absent_when_ratio_below_threshold() {
1001        // top_relation_ratio of 0.39 is below the 0.40 threshold — no warning.
1002        let resp = make_full_response(Some("uses".to_string()), Some(0.39), None, None);
1003        let json = serde_json::to_value(&resp).unwrap();
1004        assert_eq!(json["top_relation"], "uses");
1005        assert!(
1006            json.get("relation_concentration_warning").is_none(),
1007            "warning must be absent when ratio <= 0.40"
1008        );
1009    }
1010
1011    #[test]
1012    fn health_concentration_warning_present_at_threshold() {
1013        // Exactly at 0.41 (above 0.40) — warning must appear.
1014        let resp = make_full_response(
1015            Some("depends_on".to_string()),
1016            Some(0.41),
1017            None,
1018            Some("relation 'depends_on' dominates graph at 41.0%".to_string()),
1019        );
1020        let json = serde_json::to_value(&resp).unwrap();
1021        assert!(
1022            json["relation_concentration_warning"].is_string(),
1023            "warning must be present when top_relation_ratio > 0.40"
1024        );
1025    }
1026
1027    #[test]
1028    fn health_applies_to_ratio_omitted_when_none() {
1029        // applies_to_ratio is None when there are no applies_to edges.
1030        let resp = make_full_response(Some("related".to_string()), Some(0.30), None, None);
1031        let json = serde_json::to_value(&resp).unwrap();
1032        assert!(
1033            json.get("applies_to_ratio").is_none(),
1034            "applies_to_ratio must be omitted when None"
1035        );
1036    }
1037}