Skip to main content

sqlite_graphrag/commands/
health.rs

1//! Handler for the `health` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::fs;
9use std::time::Instant;
10
11const MEMORY_EMBEDDING_TABLES: &[&str] = &["memory_embeddings", "vec_memories"];
12const ENTITY_EMBEDDING_TABLES: &[&str] = &["entity_embeddings", "vec_entities"];
13const CHUNK_EMBEDDING_TABLES: &[&str] = &["chunk_embeddings", "vec_chunks"];
14
15#[derive(clap::Args)]
16#[command(after_long_help = "EXAMPLES:\n  \
17    # Check database health (connectivity, integrity, vector index)\n  \
18    sqlite-graphrag health\n\n  \
19    # Check health of a database at a custom path\n  \
20    sqlite-graphrag health --db /path/to/graphrag.sqlite\n\n  \
21    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
22    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag health")]
23pub struct HealthArgs {
24    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
25    pub db: Option<String>,
26    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
27    #[arg(long, default_value_t = false)]
28    pub json: bool,
29    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
30    #[arg(long, value_parser = ["json", "text"], hide = true)]
31    pub format: Option<String>,
32}
33
34#[derive(Serialize)]
35struct HealthCounts {
36    memories: i64,
37    /// Alias of `memories` for the documented contract in SKILL.md.
38    memories_total: i64,
39    entities: i64,
40    relationships: i64,
41    vec_memories: i64,
42}
43
44#[derive(Serialize)]
45struct HealthCheck {
46    name: String,
47    ok: bool,
48    #[serde(skip_serializing_if = "Option::is_none")]
49    detail: Option<String>,
50}
51
52#[derive(Serialize)]
53struct HealthResponse {
54    status: String,
55    integrity: String,
56    integrity_ok: bool,
57    schema_ok: bool,
58    vec_memories_ok: bool,
59    vec_memories_missing: i64,
60    vec_memories_orphaned: i64,
61    vec_entities_ok: bool,
62    vec_chunks_ok: bool,
63    fts_ok: bool,
64    /// Whether a live FTS5 MATCH query against fts_memories succeeded.
65    fts_query_ok: bool,
66    model_ok: bool,
67    counts: HealthCounts,
68    db_path: String,
69    db_size_bytes: u64,
70    /// MAX(version) from refinery_schema_history — number of the last applied migration.
71    /// Distinct from PRAGMA schema_version (SQLite DDL counter) and PRAGMA user_version
72    /// (canonical SCHEMA_USER_VERSION from __debug_schema).
73    schema_version: u32,
74    /// List of entities referenced by memories but absent from the entities table.
75    /// Empty in a healthy DB. Per the contract documented in SKILL.md.
76    missing_entities: Vec<String>,
77    /// WAL file size in MB (0.0 if WAL does not exist or journal_mode != wal).
78    wal_size_mb: f64,
79    /// SQLite journaling mode (wal, delete, truncate, persist, memory, off).
80    journal_mode: String,
81    /// SQLite version string, e.g. `"3.46.0"`.
82    sqlite_version: String,
83    /// Fraction of relationships that use the `mentions` relation type (0.0–1.0).
84    /// Omitted when there are no relationships in the database.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    mentions_ratio: Option<f64>,
87    /// Human-readable warning when `mentions` relationships dominate the graph (ratio > 0.5).
88    /// Omitted when the ratio is within acceptable bounds or there are no relationships.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    mentions_warning: Option<String>,
91    /// The relation type with the highest edge count in the namespace.
92    /// Omitted when there are no relationships in the database.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    top_relation: Option<String>,
95    /// Fraction of all edges occupied by `top_relation` (0.0–1.0).
96    /// Omitted when there are no relationships in the database.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    top_relation_ratio: Option<f64>,
99    /// Fraction of relationships that use the `applies_to` relation type (0.0–1.0).
100    /// Omitted when there are no relationships or when `applies_to` is absent.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    applies_to_ratio: Option<f64>,
103    /// Human-readable warning when a single relation type occupies more than 40 % of edges.
104    /// Omitted when concentration is within acceptable bounds or there are no relationships.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    relation_concentration_warning: Option<String>,
107    /// Number of entities whose name differs from its normalized kebab-case form.
108    #[serde(skip_serializing_if = "Option::is_none")]
109    non_normalized_count: Option<i64>,
110    /// Warning when non-normalized entities are detected.
111    #[serde(skip_serializing_if = "Option::is_none")]
112    normalization_warning: Option<String>,
113    /// Number of entities with degree exceeding the super-hub threshold (default 50).
114    #[serde(skip_serializing_if = "Option::is_none")]
115    super_hub_count: Option<i64>,
116    /// Warning listing top super-hub entity names.
117    #[serde(skip_serializing_if = "Option::is_none")]
118    super_hub_warning: Option<String>,
119    /// Name of the entity with the highest connection count in the namespace.
120    /// Omitted when there are no entities in the database.
121    #[serde(skip_serializing_if = "Option::is_none")]
122    top_hub_entity: Option<String>,
123    /// Number of connections (degree) of `top_hub_entity`.
124    /// Omitted when there are no entities in the database.
125    #[serde(skip_serializing_if = "Option::is_none")]
126    top_hub_degree: Option<i64>,
127    /// Human-readable warning when `top_hub_entity` exceeds 50 connections.
128    /// Omitted when degree is within acceptable bounds or there are no entities.
129    #[serde(skip_serializing_if = "Option::is_none")]
130    hub_warning: Option<String>,
131    checks: Vec<HealthCheck>,
132    elapsed_ms: u64,
133}
134
135/// Checks whether a table (including virtual ones) exists in sqlite_master.
136fn table_exists(conn: &rusqlite::Connection, table_name: &str) -> bool {
137    conn.query_row(
138        "SELECT COUNT(*) FROM sqlite_master WHERE type IN ('table', 'shadow') AND name = ?1",
139        rusqlite::params![table_name],
140        |r| r.get::<_, i64>(0),
141    )
142    .unwrap_or(0)
143        > 0
144}
145
146fn first_existing_table<'a>(
147    conn: &rusqlite::Connection,
148    candidates: &'a [&'a str],
149) -> Option<&'a str> {
150    candidates
151        .iter()
152        .copied()
153        .find(|name| table_exists(conn, name))
154}
155
156fn count_rows(conn: &rusqlite::Connection, table_name: &str) -> i64 {
157    conn.query_row(&format!("SELECT COUNT(*) FROM {table_name}"), [], |r| {
158        r.get(0)
159    })
160    .unwrap_or(0)
161}
162
163fn memory_embedding_health(conn: &rusqlite::Connection) -> (bool, i64, i64, i64) {
164    let Some(table_name) = first_existing_table(conn, MEMORY_EMBEDDING_TABLES) else {
165        return (false, 0, 0, 0);
166    };
167
168    let total = count_rows(conn, table_name);
169    let missing = conn
170        .query_row(
171            &format!(
172                "SELECT COUNT(*)
173                 FROM memories m
174                 LEFT JOIN {table_name} me ON me.memory_id = m.id
175                 WHERE me.memory_id IS NULL AND m.deleted_at IS NULL"
176            ),
177            [],
178            |r| r.get(0),
179        )
180        .unwrap_or(0);
181    let orphaned = conn
182        .query_row(
183            &format!(
184                "SELECT COUNT(*)
185                 FROM {table_name} me
186                 LEFT JOIN memories m ON m.id = me.memory_id
187                 WHERE m.id IS NULL OR m.deleted_at IS NOT NULL"
188            ),
189            [],
190            |r| r.get(0),
191        )
192        .unwrap_or(0);
193
194    (true, total, missing, orphaned)
195}
196
197pub fn run(args: HealthArgs) -> Result<(), AppError> {
198    let start = Instant::now();
199    let _ = args.json; // --json is a no-op because output is already JSON by default
200    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
201    let paths = AppPaths::resolve(args.db.as_deref())?;
202
203    crate::storage::connection::ensure_db_ready(&paths)?;
204
205    let conn = open_ro(&paths.db)?;
206
207    let integrity: String = conn.query_row("PRAGMA integrity_check;", [], |r| r.get(0))?;
208    let integrity_ok = integrity == "ok";
209    tracing::info!(target: "health", integrity_ok = %integrity_ok, "PRAGMA integrity_check complete");
210
211    if !integrity_ok {
212        let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
213        output::emit_json(&HealthResponse {
214            status: "degraded".to_string(),
215            integrity: integrity.clone(),
216            integrity_ok: false,
217            schema_ok: false,
218            vec_memories_ok: false,
219            vec_memories_missing: 0,
220            vec_memories_orphaned: 0,
221            vec_entities_ok: false,
222            vec_chunks_ok: false,
223            fts_ok: false,
224            fts_query_ok: false,
225            model_ok: false,
226            counts: HealthCounts {
227                memories: 0,
228                memories_total: 0,
229                entities: 0,
230                relationships: 0,
231                vec_memories: 0,
232            },
233            db_path: paths.db.display().to_string(),
234            db_size_bytes,
235            schema_version: 0,
236            sqlite_version: "unknown".to_string(),
237            missing_entities: vec![],
238            wal_size_mb: 0.0,
239            journal_mode: "unknown".to_string(),
240            mentions_ratio: None,
241            mentions_warning: None,
242            top_relation: None,
243            top_relation_ratio: None,
244            applies_to_ratio: None,
245            relation_concentration_warning: None,
246            non_normalized_count: None,
247            normalization_warning: None,
248            super_hub_count: None,
249            super_hub_warning: None,
250            top_hub_entity: None,
251            top_hub_degree: None,
252            hub_warning: None,
253            checks: vec![HealthCheck {
254                name: "integrity".to_string(),
255                ok: false,
256                detail: Some(integrity),
257            }],
258            elapsed_ms: start.elapsed().as_millis() as u64,
259        })?;
260        return Err(AppError::Database(rusqlite::Error::SqliteFailure(
261            rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_CORRUPT),
262            Some("integrity check failed".to_string()),
263        )));
264    }
265
266    let memories_count: i64 = conn.query_row(
267        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
268        [],
269        |r| r.get(0),
270    )?;
271    let entities_count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
272    let relationships_count: i64 =
273        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
274    let (vec_memories_ok, vec_memories_count, vec_memories_missing, vec_memories_orphaned) =
275        memory_embedding_health(&conn);
276
277    let mentions_count: i64 = conn.query_row(
278        "SELECT COUNT(*) FROM relationships WHERE relation = 'mentions'",
279        [],
280        |r| r.get(0),
281    )?;
282    let (mentions_ratio, mentions_warning) = if relationships_count > 0 {
283        let ratio = mentions_count as f64 / relationships_count as f64;
284        let warning = if ratio > 0.5 {
285            Some(format!(
286                "mentions relationships dominate graph at {:.1}% ({}/{} total); consider running prune-relations --relation mentions --dry-run",
287                ratio * 100.0,
288                mentions_count,
289                relationships_count
290            ))
291        } else {
292            None
293        };
294        (Some(ratio), warning)
295    } else {
296        (None, None)
297    };
298
299    // Relation concentration: find the most frequent relation type and check threshold.
300    let (top_relation, top_relation_ratio, applies_to_ratio, relation_concentration_warning) =
301        if relationships_count > 0 {
302            // Identify the relation with the highest edge count.
303            let (top_rel, top_count): (String, i64) = conn
304                .query_row(
305                    "SELECT relation, COUNT(*) AS cnt
306                     FROM relationships
307                     GROUP BY relation
308                     ORDER BY cnt DESC
309                     LIMIT 1",
310                    [],
311                    |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
312                )
313                .unwrap_or_else(|_| ("unknown".to_string(), 0));
314
315            let top_ratio = top_count as f64 / relationships_count as f64;
316
317            // Compute applies_to ratio separately (may be 0 if absent).
318            let applies_count: i64 = conn
319                .query_row(
320                    "SELECT COUNT(*) FROM relationships WHERE relation = 'applies_to'",
321                    [],
322                    |r| r.get(0),
323                )
324                .unwrap_or(0);
325            let at_ratio = if applies_count > 0 {
326                Some(applies_count as f64 / relationships_count as f64)
327            } else {
328                None
329            };
330
331            let concentration_warning = if top_ratio > 0.40 {
332                Some(format!(
333                    "relation '{}' dominates graph at {:.1}% ({}/{} total); consider running prune-relations --relation {} --dry-run",
334                    top_rel,
335                    top_ratio * 100.0,
336                    top_count,
337                    relationships_count,
338                    top_rel,
339                ))
340            } else {
341                None
342            };
343
344            (
345                Some(top_rel),
346                Some(top_ratio),
347                at_ratio,
348                concentration_warning,
349            )
350        } else {
351            (None, None, None, None)
352        };
353
354    let status = "ok";
355
356    let schema_version: u32 = conn
357        .query_row(
358            "SELECT COALESCE(MAX(version), 0) FROM refinery_schema_history",
359            [],
360            |r| r.get::<_, i64>(0),
361        )
362        .unwrap_or(0) as u32;
363
364    let schema_ok = schema_version > 0;
365
366    // Checks vector tables via sqlite_master
367    let vec_entities_ok = first_existing_table(&conn, ENTITY_EMBEDDING_TABLES).is_some();
368    let vec_chunks_ok = first_existing_table(&conn, CHUNK_EMBEDDING_TABLES).is_some();
369
370    tracing::info!(target: "health", vec_memories_ok = %vec_memories_ok, vec_entities_ok = %vec_entities_ok, vec_missing = vec_memories_missing, vec_orphaned = vec_memories_orphaned, "vector table checks complete");
371    let fts_ok = table_exists(&conn, "fts_memories");
372
373    // Verifies that FTS5 can execute a MATCH query (catches index corruption distinct from table absence).
374    let fts_query_ok = if fts_ok {
375        conn.query_row(
376            "SELECT COUNT(*) FROM fts_memories WHERE fts_memories MATCH 'a' LIMIT 1",
377            [],
378            |r| r.get::<_, i64>(0),
379        )
380        .is_ok()
381    } else {
382        false
383    };
384
385    tracing::info!(target: "health", fts_ok = %fts_ok, fts_query_ok = %fts_query_ok, "FTS5 checks complete");
386
387    // Captures the SQLite runtime version for observability.
388    let sqlite_version: String = conn
389        .query_row("SELECT sqlite_version()", [], |r| r.get(0))
390        .unwrap_or_else(|_| "unknown".to_string());
391
392    // Detects orphan entities referenced by memories but absent from the entities table.
393    let mut missing_entities: Vec<String> = Vec::with_capacity(4);
394    let mut stmt = conn.prepare_cached(
395        "SELECT DISTINCT me.entity_id
396         FROM memory_entities me
397         LEFT JOIN entities e ON e.id = me.entity_id
398         WHERE e.id IS NULL",
399    )?;
400    let orphans: Vec<i64> = stmt
401        .query_map([], |r| r.get(0))?
402        .collect::<Result<Vec<_>, _>>()?;
403    for id in orphans {
404        missing_entities.push(format!("entity_id={id}"));
405    }
406
407    let journal_mode: String = conn
408        .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))
409        .unwrap_or_else(|_| "unknown".to_string());
410
411    let wal_size_mb = fs::metadata(format!("{}-wal", paths.db.display()))
412        .map(|m| m.len() as f64 / 1024.0 / 1024.0)
413        .unwrap_or(0.0);
414
415    // Database file size in bytes
416    let db_size_bytes = fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
417
418    // Checks whether the ONNX model is present in the cache
419    let model_dir = paths.models.join("models--intfloat--multilingual-e5-small");
420    let model_ok = model_dir.exists();
421    tracing::info!(target: "health", model_ok = %model_ok, "embedding model check complete");
422
423    // Builds the checks array for detailed diagnostics
424    let mut checks: Vec<HealthCheck> = Vec::with_capacity(8);
425
426    // At this point integrity_ok is always true (corrupt DB returned early above).
427    checks.push(HealthCheck {
428        name: "integrity".to_string(),
429        ok: true,
430        detail: None,
431    });
432
433    checks.push(HealthCheck {
434        name: "schema_version".to_string(),
435        ok: schema_ok,
436        detail: if schema_ok {
437            None
438        } else {
439            Some(format!("schema_version={schema_version} (expected >0)"))
440        },
441    });
442
443    checks.push(HealthCheck {
444        name: "vec_memories".to_string(),
445        ok: vec_memories_ok,
446        detail: if vec_memories_ok {
447            None
448        } else {
449            Some("memory_embeddings/vec_memories table missing from sqlite_master".to_string())
450        },
451    });
452
453    checks.push(HealthCheck {
454        name: "vec_entities".to_string(),
455        ok: vec_entities_ok,
456        detail: if vec_entities_ok {
457            None
458        } else {
459            Some("entity_embeddings/vec_entities table missing from sqlite_master".to_string())
460        },
461    });
462
463    checks.push(HealthCheck {
464        name: "vec_chunks".to_string(),
465        ok: vec_chunks_ok,
466        detail: if vec_chunks_ok {
467            None
468        } else {
469            Some("chunk_embeddings/vec_chunks table missing from sqlite_master".to_string())
470        },
471    });
472
473    checks.push(HealthCheck {
474        name: "fts_memories".to_string(),
475        ok: fts_ok,
476        detail: if fts_ok {
477            None
478        } else {
479            Some("fts_memories table missing from sqlite_master".to_string())
480        },
481    });
482
483    checks.push(HealthCheck {
484        name: "fts_query".to_string(),
485        ok: fts_query_ok,
486        detail: if fts_query_ok {
487            None
488        } else {
489            Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string())
490        },
491    });
492
493    checks.push(HealthCheck {
494        name: "model_onnx".to_string(),
495        ok: model_ok,
496        detail: if model_ok {
497            None
498        } else {
499            Some(format!(
500                "model missing at {}; run 'sqlite-graphrag models download'",
501                model_dir.display()
502            ))
503        },
504    });
505
506    // G24: detect non-normalized entity names
507    let (non_normalized_count, normalization_warning) = {
508        let mut stmt = conn.prepare_cached("SELECT name FROM entities")?;
509        let names: Vec<String> = stmt
510            .query_map([], |r| r.get(0))?
511            .filter_map(|r| r.ok())
512            .collect();
513        let count = names
514            .iter()
515            .filter(|n| crate::parsers::normalize_entity_name(n) != **n)
516            .count() as i64;
517        let warning = if count > 0 {
518            Some(format!(
519                "run 'normalize-entities --yes' to fix {count} non-normalized entities"
520            ))
521        } else {
522            None
523        };
524        (Some(count), warning)
525    };
526
527    // G25: detect super-hub entities (degree > 50)
528    let (super_hub_count, super_hub_warning) = {
529        let mut stmt = conn.prepare_cached(
530            "SELECT e.name, COUNT(r.id) as deg FROM entities e \
531             LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id \
532             GROUP BY e.id HAVING deg > 50 ORDER BY deg DESC LIMIT 5",
533        )?;
534        let hubs: Vec<(String, i64)> = stmt
535            .query_map([], |r| Ok((r.get(0)?, r.get(1)?)))?
536            .filter_map(|r| r.ok())
537            .collect();
538        let count = hubs.len() as i64;
539        let warning = if count > 0 {
540            let names: Vec<String> = hubs
541                .iter()
542                .map(|(n, d)| format!("{n} (degree {d})"))
543                .collect();
544            Some(format!("super-hubs detected: {}", names.join(", ")))
545        } else {
546            None
547        };
548        (Some(count), warning)
549    };
550
551    // G25 (extended): identify the single highest-degree entity for programmatic use.
552    let (top_hub_entity, top_hub_degree, hub_warning) = {
553        let result: Option<(String, i64)> = conn
554            .query_row(
555                "SELECT e.name, COUNT(r.id) AS degree
556                 FROM entities e
557                 LEFT JOIN relationships r ON e.id = r.source_id OR e.id = r.target_id
558                 GROUP BY e.id
559                 ORDER BY degree DESC
560                 LIMIT 1",
561                [],
562                |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)),
563            )
564            .ok();
565        match result {
566            Some((name, degree)) => {
567                let warning = if degree > 50 {
568                    Some(format!(
569                        "entity '{name}' has {degree} connections; consider splitting or using --max-neighbors-per-hop"
570                    ))
571                } else {
572                    None
573                };
574                (Some(name), Some(degree), warning)
575            }
576            None => (None, None, None),
577        }
578    };
579
580    let response = HealthResponse {
581        status: status.to_string(),
582        integrity,
583        integrity_ok,
584        schema_ok,
585        vec_memories_ok,
586        vec_memories_missing,
587        vec_memories_orphaned,
588        vec_entities_ok,
589        vec_chunks_ok,
590        fts_ok,
591        fts_query_ok,
592        model_ok,
593        counts: HealthCounts {
594            memories: memories_count,
595            memories_total: memories_count,
596            entities: entities_count,
597            relationships: relationships_count,
598            vec_memories: vec_memories_count,
599        },
600        db_path: paths.db.display().to_string(),
601        db_size_bytes,
602        schema_version,
603        sqlite_version,
604        missing_entities,
605        wal_size_mb,
606        journal_mode,
607        mentions_ratio,
608        mentions_warning,
609        top_relation,
610        top_relation_ratio,
611        applies_to_ratio,
612        relation_concentration_warning,
613        non_normalized_count,
614        normalization_warning,
615        super_hub_count,
616        super_hub_warning,
617        top_hub_entity,
618        top_hub_degree,
619        hub_warning,
620        checks,
621        elapsed_ms: start.elapsed().as_millis() as u64,
622    };
623
624    output::emit_json(&response)?;
625
626    Ok(())
627}
628
629#[cfg(test)]
630mod tests {
631    use super::*;
632    use rusqlite::Connection;
633
634    fn open_health_test_db() -> Connection {
635        let conn = Connection::open_in_memory().unwrap();
636        conn.execute_batch(
637            "CREATE TABLE memories (
638                id INTEGER PRIMARY KEY,
639                deleted_at INTEGER
640            );
641            CREATE TABLE memory_embeddings (
642                memory_id INTEGER PRIMARY KEY,
643                namespace TEXT NOT NULL,
644                embedding BLOB NOT NULL,
645                source TEXT NOT NULL,
646                model TEXT NOT NULL,
647                dim INTEGER NOT NULL DEFAULT 384,
648                created_at TEXT NOT NULL DEFAULT '0'
649            );
650            CREATE TABLE vec_memories (
651                memory_id INTEGER PRIMARY KEY,
652                embedding BLOB NOT NULL,
653                created_at INTEGER NOT NULL DEFAULT 0
654            );",
655        )
656        .unwrap();
657        conn
658    }
659
660    #[test]
661    fn memory_embedding_health_prefers_memory_embeddings_and_counts_soft_deleted_as_orphaned() {
662        let conn = open_health_test_db();
663        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (1, NULL)", [])
664            .unwrap();
665        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (2, NULL)", [])
666            .unwrap();
667        conn.execute("INSERT INTO memories (id, deleted_at) VALUES (3, 123)", [])
668            .unwrap();
669        conn.execute(
670            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
671             VALUES (1, 'global', X'00', 'llm', 'm', 384, '1')",
672            [],
673        )
674        .unwrap();
675        conn.execute(
676            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
677             VALUES (3, 'global', X'00', 'llm', 'm', 384, '2')",
678            [],
679        )
680        .unwrap();
681        conn.execute(
682            "INSERT INTO memory_embeddings(memory_id, namespace, embedding, source, model, dim, created_at)
683             VALUES (99, 'global', X'00', 'llm', 'm', 384, '3')",
684            [],
685        )
686        .unwrap();
687        conn.execute(
688            "INSERT INTO vec_memories(memory_id, embedding, created_at) VALUES (777, X'00', 0)",
689            [],
690        )
691        .unwrap();
692
693        let (ok, total, missing, orphaned) = memory_embedding_health(&conn);
694        assert!(ok);
695        assert_eq!(total, 3);
696        assert_eq!(missing, 1);
697        assert_eq!(orphaned, 2);
698    }
699
700    #[test]
701    fn first_existing_table_falls_back_to_legacy_vec_name() {
702        let conn = Connection::open_in_memory().unwrap();
703        conn.execute_batch(
704            "CREATE TABLE vec_memories (
705                memory_id INTEGER PRIMARY KEY,
706                embedding BLOB NOT NULL,
707                created_at INTEGER NOT NULL DEFAULT 0
708            );",
709        )
710        .unwrap();
711
712        let resolved = first_existing_table(&conn, MEMORY_EMBEDDING_TABLES);
713        assert_eq!(resolved, Some("vec_memories"));
714    }
715
716    #[test]
717    fn health_check_serializes_all_new_fields() {
718        let response = HealthResponse {
719            status: "ok".to_string(),
720            integrity: "ok".to_string(),
721            integrity_ok: true,
722            schema_ok: true,
723            vec_memories_ok: true,
724            vec_memories_missing: 0,
725            vec_memories_orphaned: 0,
726            vec_entities_ok: true,
727            vec_chunks_ok: true,
728            fts_ok: true,
729            fts_query_ok: true,
730            model_ok: false,
731            counts: HealthCounts {
732                memories: 5,
733                memories_total: 5,
734                entities: 3,
735                relationships: 2,
736                vec_memories: 5,
737            },
738            db_path: "/tmp/test.sqlite".to_string(),
739            db_size_bytes: 4096,
740            schema_version: 6,
741            sqlite_version: "3.46.0".to_string(),
742            elapsed_ms: 0,
743            missing_entities: vec![],
744            wal_size_mb: 0.0,
745            journal_mode: "wal".to_string(),
746            mentions_ratio: None,
747            mentions_warning: None,
748            top_relation: None,
749            top_relation_ratio: None,
750            applies_to_ratio: None,
751            relation_concentration_warning: None,
752            non_normalized_count: None,
753            normalization_warning: None,
754            super_hub_count: None,
755            super_hub_warning: None,
756            top_hub_entity: None,
757            top_hub_degree: None,
758            hub_warning: None,
759            checks: vec![
760                HealthCheck {
761                    name: "integrity".to_string(),
762                    ok: true,
763                    detail: None,
764                },
765                HealthCheck {
766                    name: "model_onnx".to_string(),
767                    ok: false,
768                    detail: Some("model missing".to_string()),
769                },
770            ],
771        };
772
773        let json = serde_json::to_value(&response).unwrap();
774        assert_eq!(json["status"], "ok");
775        assert_eq!(json["integrity_ok"], true);
776        assert_eq!(json["schema_ok"], true);
777        assert_eq!(json["vec_memories_ok"], true);
778        assert_eq!(json["vec_entities_ok"], true);
779        assert_eq!(json["vec_chunks_ok"], true);
780        assert_eq!(json["fts_ok"], true);
781        assert_eq!(json["model_ok"], false);
782        assert_eq!(json["db_size_bytes"], 4096u64);
783        assert!(json["checks"].is_array());
784        assert_eq!(json["checks"].as_array().unwrap().len(), 2);
785
786        // Verifies that detail is absent when ok=true (skip_serializing_if)
787        let integrity_check = &json["checks"][0];
788        assert_eq!(integrity_check["name"], "integrity");
789        assert_eq!(integrity_check["ok"], true);
790        assert!(integrity_check.get("detail").is_none());
791
792        // Verifies that detail is present when ok=false
793        let model_check = &json["checks"][1];
794        assert_eq!(model_check["name"], "model_onnx");
795        assert_eq!(model_check["ok"], false);
796        assert_eq!(model_check["detail"], "model missing");
797    }
798
799    #[test]
800    fn health_check_without_detail_omits_field() {
801        let check = HealthCheck {
802            name: "vec_memories".to_string(),
803            ok: true,
804            detail: None,
805        };
806        let json = serde_json::to_value(&check).unwrap();
807        assert!(
808            json.get("detail").is_none(),
809            "detail field must be omitted when None"
810        );
811    }
812
813    #[test]
814    fn health_check_with_detail_serializes_field() {
815        let check = HealthCheck {
816            name: "fts_memories".to_string(),
817            ok: false,
818            detail: Some("fts_memories table missing from sqlite_master".to_string()),
819        };
820        let json = serde_json::to_value(&check).unwrap();
821        assert_eq!(
822            json["detail"],
823            "fts_memories table missing from sqlite_master"
824        );
825    }
826
827    #[test]
828    fn health_response_fts_query_ok_and_sqlite_version_serialize() {
829        // Verifies that fts_query_ok and sqlite_version appear in the serialized JSON
830        // with the expected keys and values.
831        let response = HealthResponse {
832            status: "ok".to_string(),
833            integrity: "ok".to_string(),
834            integrity_ok: true,
835            schema_ok: true,
836            vec_memories_ok: true,
837            vec_memories_missing: 0,
838            vec_memories_orphaned: 0,
839            vec_entities_ok: true,
840            vec_chunks_ok: true,
841            fts_ok: true,
842            fts_query_ok: true,
843            model_ok: true,
844            counts: HealthCounts {
845                memories: 0,
846                memories_total: 0,
847                entities: 0,
848                relationships: 0,
849                vec_memories: 0,
850            },
851            db_path: "/tmp/test.sqlite".to_string(),
852            db_size_bytes: 0,
853            schema_version: 1,
854            sqlite_version: "3.45.1".to_string(),
855            elapsed_ms: 0,
856            missing_entities: vec![],
857            wal_size_mb: 0.0,
858            journal_mode: "wal".to_string(),
859            mentions_ratio: None,
860            mentions_warning: None,
861            top_relation: None,
862            top_relation_ratio: None,
863            applies_to_ratio: None,
864            relation_concentration_warning: None,
865            non_normalized_count: None,
866            normalization_warning: None,
867            super_hub_count: None,
868            super_hub_warning: None,
869            top_hub_entity: None,
870            top_hub_degree: None,
871            hub_warning: None,
872            checks: vec![],
873        };
874
875        let json = serde_json::to_value(&response).unwrap();
876
877        // fts_query_ok must appear at the top level
878        assert_eq!(
879            json["fts_query_ok"], true,
880            "fts_query_ok must be present and true in serialized JSON"
881        );
882
883        // sqlite_version must appear at the top level with the exact string
884        assert_eq!(
885            json["sqlite_version"], "3.45.1",
886            "sqlite_version must be present and match the provided string"
887        );
888
889        // Verify fts_query_ok=false path includes the expected detail message
890        let check_fail = HealthCheck {
891            name: "fts_query".to_string(),
892            ok: false,
893            detail: Some("FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'".to_string()),
894        };
895        let check_json = serde_json::to_value(&check_fail).unwrap();
896        assert_eq!(check_json["name"], "fts_query");
897        assert_eq!(check_json["ok"], false);
898        assert_eq!(
899            check_json["detail"],
900            "FTS5 MATCH query failed — run 'sqlite-graphrag fts rebuild'"
901        );
902    }
903
904    fn make_full_response(
905        top_relation: Option<String>,
906        top_relation_ratio: Option<f64>,
907        applies_to_ratio: Option<f64>,
908        relation_concentration_warning: Option<String>,
909    ) -> HealthResponse {
910        HealthResponse {
911            status: "ok".to_string(),
912            integrity: "ok".to_string(),
913            integrity_ok: true,
914            schema_ok: true,
915            vec_memories_ok: true,
916            vec_memories_missing: 0,
917            vec_memories_orphaned: 0,
918            vec_entities_ok: true,
919            vec_chunks_ok: true,
920            fts_ok: true,
921            fts_query_ok: true,
922            model_ok: true,
923            counts: HealthCounts {
924                memories: 10,
925                memories_total: 10,
926                entities: 5,
927                relationships: 20,
928                vec_memories: 10,
929            },
930            db_path: "/tmp/test.sqlite".to_string(),
931            db_size_bytes: 8192,
932            schema_version: 3,
933            sqlite_version: "3.46.0".to_string(),
934            elapsed_ms: 1,
935            missing_entities: vec![],
936            wal_size_mb: 0.0,
937            journal_mode: "wal".to_string(),
938            mentions_ratio: None,
939            mentions_warning: None,
940            top_relation,
941            top_relation_ratio,
942            applies_to_ratio,
943            relation_concentration_warning,
944            non_normalized_count: None,
945            normalization_warning: None,
946            super_hub_count: None,
947            super_hub_warning: None,
948            top_hub_entity: None,
949            top_hub_degree: None,
950            hub_warning: None,
951            checks: vec![],
952        }
953    }
954
955    #[test]
956    fn health_concentration_fields_omitted_when_no_relationships() {
957        // Represents a DB with zero relationships.
958        let resp = make_full_response(None, None, None, None);
959        let json = serde_json::to_value(&resp).unwrap();
960        assert!(
961            json.get("top_relation").is_none(),
962            "top_relation must be omitted when None"
963        );
964        assert!(
965            json.get("top_relation_ratio").is_none(),
966            "top_relation_ratio must be omitted when None"
967        );
968        assert!(
969            json.get("applies_to_ratio").is_none(),
970            "applies_to_ratio must be omitted when None"
971        );
972        assert!(
973            json.get("relation_concentration_warning").is_none(),
974            "relation_concentration_warning must be omitted when None"
975        );
976    }
977
978    #[test]
979    fn health_concentration_fields_present_with_data() {
980        let resp = make_full_response(
981            Some("mentions".to_string()),
982            Some(0.60),
983            Some(0.10),
984            Some("relation 'mentions' dominates graph at 60.0%".to_string()),
985        );
986        let json = serde_json::to_value(&resp).unwrap();
987        assert_eq!(json["top_relation"], "mentions");
988        assert!((json["top_relation_ratio"].as_f64().unwrap() - 0.60).abs() < 1e-9);
989        assert!((json["applies_to_ratio"].as_f64().unwrap() - 0.10).abs() < 1e-9);
990        assert!(json["relation_concentration_warning"]
991            .as_str()
992            .unwrap()
993            .contains("60.0%"));
994    }
995
996    #[test]
997    fn health_concentration_warning_absent_when_ratio_below_threshold() {
998        // top_relation_ratio of 0.39 is below the 0.40 threshold — no warning.
999        let resp = make_full_response(Some("uses".to_string()), Some(0.39), None, None);
1000        let json = serde_json::to_value(&resp).unwrap();
1001        assert_eq!(json["top_relation"], "uses");
1002        assert!(
1003            json.get("relation_concentration_warning").is_none(),
1004            "warning must be absent when ratio <= 0.40"
1005        );
1006    }
1007
1008    #[test]
1009    fn health_concentration_warning_present_at_threshold() {
1010        // Exactly at 0.41 (above 0.40) — warning must appear.
1011        let resp = make_full_response(
1012            Some("depends_on".to_string()),
1013            Some(0.41),
1014            None,
1015            Some("relation 'depends_on' dominates graph at 41.0%".to_string()),
1016        );
1017        let json = serde_json::to_value(&resp).unwrap();
1018        assert!(
1019            json["relation_concentration_warning"].is_string(),
1020            "warning must be present when top_relation_ratio > 0.40"
1021        );
1022    }
1023
1024    #[test]
1025    fn health_applies_to_ratio_omitted_when_none() {
1026        // applies_to_ratio is None when there are no applies_to edges.
1027        let resp = make_full_response(Some("related".to_string()), Some(0.30), None, None);
1028        let json = serde_json::to_value(&resp).unwrap();
1029        assert!(
1030            json.get("applies_to_ratio").is_none(),
1031            "applies_to_ratio must be omitted when None"
1032        );
1033    }
1034}