use crate::errors::AppError;
use crate::output;
use crate::paths::AppPaths;
use crate::storage::connection::{open_ro, open_rw};
use serde::Serialize;
#[derive(clap::Args)]
#[command(
about = "Vector index maintenance (orphan detection, purge, stats)",
after_long_help = "EXAMPLES:\n \
# List orphan vec_memories rows whose memory_id is gone\n \
sqlite-graphrag vec orphan-list\n\n \
# Dry-run the purge (does not delete)\n \
sqlite-graphrag vec purge-orphan --dry-run\n\n \
# Actually purge orphans\n \
sqlite-graphrag vec purge-orphan --yes\n\n \
# Show stats for all vec0 tables\n \
sqlite-graphrag vec stats --json"
)]
pub struct VecArgs {
#[command(subcommand)]
pub command: VecSubcommand,
}
#[derive(clap::Subcommand)]
pub enum VecSubcommand {
OrphanList(VecOrphanListArgs),
PurgeOrphan(VecPurgeOrphanArgs),
Stats(VecStatsArgs),
}
#[derive(clap::Args)]
pub struct VecOrphanListArgs {
#[arg(long, hide = true)]
pub json: bool,
#[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
pub db: Option<String>,
}
#[derive(clap::Args)]
pub struct VecOrphanListInner {
pub json: bool,
pub db: Option<String>,
}
#[derive(clap::Args)]
pub struct VecPurgeOrphanArgs {
#[arg(long, hide = true)]
pub json: bool,
#[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
pub db: Option<String>,
#[arg(long, default_value_t = false)]
pub yes: bool,
#[arg(long, default_value_t = false)]
pub dry_run: bool,
}
#[derive(clap::Args)]
pub struct VecStatsArgs {
#[arg(long, hide = true)]
pub json: bool,
#[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
pub db: Option<String>,
}
#[derive(Serialize)]
struct VecOrphanListItem {
memory_id: i64,
vector_hash: String,
created_at: i64,
}
#[derive(Serialize)]
struct VecOrphanListResponse {
action: String,
count: i64,
items: Vec<VecOrphanListItem>,
elapsed_ms: u64,
}
#[derive(Serialize)]
struct VecPurgeOrphanResponse {
action: String,
deleted: i64,
deleted_entities: i64,
deleted_chunks: i64,
dry_run: bool,
elapsed_ms: u64,
}
#[derive(Serialize)]
struct VecStatsResponse {
total_rows: i64,
orphaned: i64,
coverage_percent: f64,
#[serde(skip_serializing_if = "Option::is_none")]
vec_entities_rows: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
vec_chunks_rows: Option<i64>,
fts_memories_rows: i64,
elapsed_ms: u64,
}
pub fn run(args: VecArgs) -> Result<(), AppError> {
match args.command {
VecSubcommand::OrphanList(a) => run_orphan_list(a),
VecSubcommand::PurgeOrphan(a) => run_purge_orphan(a),
VecSubcommand::Stats(a) => run_stats(a),
}
}
fn run_orphan_list(args: VecOrphanListArgs) -> Result<(), AppError> {
let start = std::time::Instant::now();
let paths = AppPaths::resolve(args.db.as_deref())?;
crate::storage::connection::ensure_db_ready(&paths)?;
let conn = open_ro(&paths.db)?;
let table_exists: bool = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='vec_memories'",
[],
|r| r.get::<_, i64>(0).map(|v| v > 0),
)
.unwrap_or(false);
if !table_exists {
return output::emit_json(&VecOrphanListResponse {
action: "orphan_list".to_string(),
count: 0,
items: Vec::new(),
elapsed_ms: start.elapsed().as_millis() as u64,
});
}
let mut stmt = conn.prepare(
"SELECT v.memory_id, v.embedding, v.created_at
FROM vec_memories v
LEFT JOIN memories m ON m.id = v.memory_id
WHERE m.id IS NULL
ORDER BY v.memory_id",
)?;
let rows: Vec<VecOrphanListItem> = stmt
.query_map([], |r| {
let memory_id: i64 = r.get(0)?;
let blob: Vec<u8> = r.get(1)?;
let created_at: i64 = r.get(2)?;
let vector_hash = blake3::hash(&blob).to_hex().to_string();
Ok(VecOrphanListItem {
memory_id,
vector_hash,
created_at,
})
})?
.collect::<Result<Vec<_>, _>>()?;
let count = rows.len() as i64;
output::emit_json(&VecOrphanListResponse {
action: "orphan_list".to_string(),
count,
items: rows,
elapsed_ms: start.elapsed().as_millis() as u64,
})?;
Ok(())
}
fn run_purge_orphan(args: VecPurgeOrphanArgs) -> Result<(), AppError> {
let start = std::time::Instant::now();
let paths = AppPaths::resolve(args.db.as_deref())?;
crate::storage::connection::ensure_db_ready(&paths)?;
let conn = open_rw(&paths.db)?;
let table_exists: bool = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='vec_memories'",
[],
|r| r.get::<_, i64>(0).map(|v| v > 0),
)
.unwrap_or(false);
if !table_exists {
return output::emit_json(&VecPurgeOrphanResponse {
action: "purge_orphan".to_string(),
deleted: 0,
deleted_entities: 0,
deleted_chunks: 0,
dry_run: args.dry_run,
elapsed_ms: start.elapsed().as_millis() as u64,
});
}
let orphan_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM vec_memories v
LEFT JOIN memories m ON m.id = v.memory_id
WHERE m.id IS NULL",
[],
|r| r.get(0),
)
.unwrap_or(0);
let orphan_entities_count: i64 = if vec_table_exists(&conn, "vec_entities") {
conn.query_row(
"SELECT COUNT(*) FROM vec_entities v
LEFT JOIN memories m ON m.id = v.memory_id
WHERE m.id IS NULL",
[],
|r| r.get(0),
)
.unwrap_or(0)
} else {
0
};
let orphan_chunks_count: i64 = if vec_table_exists(&conn, "vec_chunks") {
conn.query_row(
"SELECT COUNT(*) FROM vec_chunks v
LEFT JOIN memories m ON m.id = v.memory_id
WHERE m.id IS NULL",
[],
|r| r.get(0),
)
.unwrap_or(0)
} else {
0
};
if args.dry_run {
tracing::info!(target: "vec", orphan_count, orphan_entities_count, orphan_chunks_count, "dry-run: would delete orphans");
return output::emit_json(&VecPurgeOrphanResponse {
action: "purge_orphan_dry_run".to_string(),
deleted: 0,
deleted_entities: 0,
deleted_chunks: 0,
dry_run: true,
elapsed_ms: start.elapsed().as_millis() as u64,
});
}
if !args.yes {
return Err(AppError::Validation(format!(
"refusing to delete {orphan_count} vec_memories + {orphan_entities_count} vec_entities + {orphan_chunks_count} vec_chunks orphan rows without --yes (use --dry-run to preview)"
)));
}
let deleted: i64 = conn.execute(
"DELETE FROM vec_memories
WHERE memory_id NOT IN (SELECT id FROM memories)",
[],
)? as i64;
let deleted_entities: i64 = if vec_table_exists(&conn, "vec_entities") {
conn.execute(
"DELETE FROM vec_entities
WHERE memory_id NOT IN (SELECT id FROM memories)",
[],
)
.unwrap_or(0) as i64
} else {
0
};
let deleted_chunks: i64 = if vec_table_exists(&conn, "vec_chunks") {
conn.execute(
"DELETE FROM vec_chunks
WHERE memory_id NOT IN (SELECT id FROM memories)",
[],
)
.unwrap_or(0) as i64
} else {
0
};
tracing::info!(target: "vec", deleted, deleted_entities, deleted_chunks, "purged orphan vec rows");
output::emit_json(&VecPurgeOrphanResponse {
action: "purged_orphan".to_string(),
deleted,
deleted_entities,
deleted_chunks,
dry_run: false,
elapsed_ms: start.elapsed().as_millis() as u64,
})?;
Ok(())
}
fn run_stats(args: VecStatsArgs) -> Result<(), AppError> {
let start = std::time::Instant::now();
let paths = AppPaths::resolve(args.db.as_deref())?;
crate::storage::connection::ensure_db_ready(&paths)?;
let conn = open_ro(&paths.db)?;
let vec_memories_exists: bool = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='vec_memories'",
[],
|r| r.get::<_, i64>(0).map(|v| v > 0),
)
.unwrap_or(false);
let (total_rows, orphaned) = if vec_memories_exists {
let total: i64 = conn
.query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))
.unwrap_or(0);
let orph: i64 = conn
.query_row(
"SELECT COUNT(*) FROM vec_memories v
LEFT JOIN memories m ON m.id = v.memory_id
WHERE m.id IS NULL",
[],
|r| r.get(0),
)
.unwrap_or(0);
(total, orph)
} else {
(0, 0)
};
let coverage_percent = if total_rows > 0 {
((total_rows - orphaned) as f64 / total_rows as f64) * 100.0
} else {
100.0
};
let vec_entities_rows = if vec_table_exists(&conn, "vec_entities") {
conn.query_row("SELECT COUNT(*) FROM vec_entities", [], |r| r.get(0))
.ok()
} else {
None
};
let vec_chunks_rows = if vec_table_exists(&conn, "vec_chunks") {
conn.query_row("SELECT COUNT(*) FROM vec_chunks", [], |r| r.get(0))
.ok()
} else {
None
};
let fts_memories_rows = conn
.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
.unwrap_or(0);
output::emit_json(&VecStatsResponse {
total_rows,
orphaned,
coverage_percent,
vec_entities_rows,
vec_chunks_rows,
fts_memories_rows,
elapsed_ms: start.elapsed().as_millis() as u64,
})?;
Ok(())
}
fn vec_table_exists(conn: &rusqlite::Connection, name: &str) -> bool {
conn.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
rusqlite::params![name],
|r| r.get::<_, i64>(0).map(|v| v > 0),
)
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn vec_orphan_list_response_serializes_all_fields() {
let resp = VecOrphanListResponse {
action: "orphan_list".into(),
count: 0,
items: Vec::new(),
elapsed_ms: 5,
};
let v = serde_json::to_value(&resp).unwrap();
assert_eq!(v["action"], "orphan_list");
assert_eq!(v["count"], 0i64);
assert_eq!(v["elapsed_ms"], 5u64);
assert!(v["items"].is_array());
}
#[test]
fn vec_purge_orphan_response_serializes_dry_run_flag() {
let resp = VecPurgeOrphanResponse {
action: "purge_orphan_dry_run".into(),
deleted: 0,
deleted_entities: 0,
deleted_chunks: 0,
dry_run: true,
elapsed_ms: 1,
};
let v = serde_json::to_value(&resp).unwrap();
assert_eq!(v["dry_run"], true);
assert_eq!(v["deleted"], 0i64);
}
#[test]
fn vec_stats_response_computes_coverage() {
let resp = VecStatsResponse {
total_rows: 100,
orphaned: 25,
coverage_percent: 75.0,
vec_entities_rows: Some(50),
vec_chunks_rows: None,
fts_memories_rows: 100,
elapsed_ms: 10,
};
let v = serde_json::to_value(&resp).unwrap();
assert_eq!(v["coverage_percent"], 75.0);
assert_eq!(v["vec_entities_rows"], 50i64);
assert!(v.get("vec_chunks_rows").is_none());
}
}