1use crate::errors::AppError;
16use crate::output;
17use crate::paths::AppPaths;
18use crate::storage::connection::{open_ro, open_rw};
19use serde::Serialize;
20
21#[derive(clap::Args)]
23#[command(
24 about = "Vector index maintenance (orphan detection, purge, stats)",
25 after_long_help = "EXAMPLES:\n \
26 # List orphan vec_memories rows whose memory_id is gone\n \
27 sqlite-graphrag vec orphan-list\n\n \
28 # Dry-run the purge (does not delete)\n \
29 sqlite-graphrag vec purge-orphan --dry-run\n\n \
30 # Actually purge orphans\n \
31 sqlite-graphrag vec purge-orphan --yes\n\n \
32 # Show stats for all vec0 tables\n \
33 sqlite-graphrag vec stats --json"
34)]
35pub struct VecArgs {
36 #[command(subcommand)]
37 pub command: VecSubcommand,
38}
39
40#[derive(clap::Subcommand)]
42pub enum VecSubcommand {
43 OrphanList(VecOrphanListArgs),
45 PurgeOrphan(VecPurgeOrphanArgs),
47 Stats(VecStatsArgs),
49}
50
51#[derive(clap::Args)]
53pub struct VecOrphanListArgs {
54 #[arg(long, hide = true)]
56 pub json: bool,
57 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
59 pub db: Option<String>,
60}
61
62#[derive(clap::Args)]
64pub struct VecOrphanListInner {
65 pub json: bool,
66 pub db: Option<String>,
67}
68
69#[derive(clap::Args)]
71pub struct VecPurgeOrphanArgs {
72 #[arg(long, hide = true)]
74 pub json: bool,
75 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
77 pub db: Option<String>,
78 #[arg(long, default_value_t = false)]
80 pub yes: bool,
81 #[arg(long, default_value_t = false)]
83 pub dry_run: bool,
84}
85
86#[derive(clap::Args)]
88pub struct VecStatsArgs {
89 #[arg(long, hide = true)]
91 pub json: bool,
92 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
94 pub db: Option<String>,
95}
96
97#[derive(Serialize)]
98struct VecOrphanListItem {
99 memory_id: i64,
101 vector_hash: String,
103 created_at: i64,
105}
106
107#[derive(Serialize)]
108struct VecOrphanListResponse {
109 action: String,
110 count: i64,
111 items: Vec<VecOrphanListItem>,
112 elapsed_ms: u64,
113}
114
115#[derive(Serialize)]
116struct VecPurgeOrphanResponse {
117 action: String,
118 deleted: i64,
119 deleted_entities: i64,
121 deleted_chunks: i64,
123 dry_run: bool,
124 elapsed_ms: u64,
125}
126
127#[derive(Serialize)]
128struct VecStatsResponse {
129 total_rows: i64,
130 orphaned: i64,
131 coverage_percent: f64,
132 #[serde(skip_serializing_if = "Option::is_none")]
133 vec_entities_rows: Option<i64>,
134 #[serde(skip_serializing_if = "Option::is_none")]
135 vec_chunks_rows: Option<i64>,
136 fts_memories_rows: i64,
137 elapsed_ms: u64,
138}
139
140pub fn run(args: VecArgs) -> Result<(), AppError> {
145 match args.command {
146 VecSubcommand::OrphanList(a) => run_orphan_list(a),
147 VecSubcommand::PurgeOrphan(a) => run_purge_orphan(a),
148 VecSubcommand::Stats(a) => run_stats(a),
149 }
150}
151
152fn run_orphan_list(args: VecOrphanListArgs) -> Result<(), AppError> {
153 let start = std::time::Instant::now();
154 let paths = AppPaths::resolve(args.db.as_deref())?;
155 crate::storage::connection::ensure_db_ready(&paths)?;
156 let conn = open_ro(&paths.db)?;
157
158 let table_exists: bool = conn
161 .query_row(
162 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='vec_memories'",
163 [],
164 |r| r.get::<_, i64>(0).map(|v| v > 0),
165 )
166 .unwrap_or(false);
167 if !table_exists {
168 return output::emit_json(&VecOrphanListResponse {
169 action: "orphan_list".to_string(),
170 count: 0,
171 items: Vec::new(),
172 elapsed_ms: start.elapsed().as_millis() as u64,
173 });
174 }
175
176 let mut stmt = conn.prepare(
181 "SELECT v.memory_id, v.embedding, v.created_at
182 FROM vec_memories v
183 LEFT JOIN memories m ON m.id = v.memory_id
184 WHERE m.id IS NULL
185 ORDER BY v.memory_id",
186 )?;
187 let rows: Vec<VecOrphanListItem> = stmt
188 .query_map([], |r| {
189 let memory_id: i64 = r.get(0)?;
190 let blob: Vec<u8> = r.get(1)?;
191 let created_at: i64 = r.get(2)?;
192 let vector_hash = blake3::hash(&blob).to_hex().to_string();
193 Ok(VecOrphanListItem {
194 memory_id,
195 vector_hash,
196 created_at,
197 })
198 })?
199 .collect::<Result<Vec<_>, _>>()?;
200 let count = rows.len() as i64;
201
202 output::emit_json(&VecOrphanListResponse {
203 action: "orphan_list".to_string(),
204 count,
205 items: rows,
206 elapsed_ms: start.elapsed().as_millis() as u64,
207 })?;
208 Ok(())
209}
210
211fn run_purge_orphan(args: VecPurgeOrphanArgs) -> Result<(), AppError> {
212 let start = std::time::Instant::now();
213 let paths = AppPaths::resolve(args.db.as_deref())?;
214 crate::storage::connection::ensure_db_ready(&paths)?;
215 let conn = open_rw(&paths.db)?;
216
217 let table_exists: bool = conn
219 .query_row(
220 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='vec_memories'",
221 [],
222 |r| r.get::<_, i64>(0).map(|v| v > 0),
223 )
224 .unwrap_or(false);
225 if !table_exists {
226 return output::emit_json(&VecPurgeOrphanResponse {
227 action: "purge_orphan".to_string(),
228 deleted: 0,
229 deleted_entities: 0,
230 deleted_chunks: 0,
231 dry_run: args.dry_run,
232 elapsed_ms: start.elapsed().as_millis() as u64,
233 });
234 }
235
236 let orphan_count: i64 = conn
237 .query_row(
238 "SELECT COUNT(*) FROM vec_memories v
239 LEFT JOIN memories m ON m.id = v.memory_id
240 WHERE m.id IS NULL",
241 [],
242 |r| r.get(0),
243 )
244 .unwrap_or(0);
245
246 let orphan_entities_count: i64 = if vec_table_exists(&conn, "vec_entities") {
250 conn.query_row(
251 "SELECT COUNT(*) FROM vec_entities v
252 LEFT JOIN memories m ON m.id = v.memory_id
253 WHERE m.id IS NULL",
254 [],
255 |r| r.get(0),
256 )
257 .unwrap_or(0)
258 } else {
259 0
260 };
261 let orphan_chunks_count: i64 = if vec_table_exists(&conn, "vec_chunks") {
262 conn.query_row(
263 "SELECT COUNT(*) FROM vec_chunks v
264 LEFT JOIN memories m ON m.id = v.memory_id
265 WHERE m.id IS NULL",
266 [],
267 |r| r.get(0),
268 )
269 .unwrap_or(0)
270 } else {
271 0
272 };
273
274 if args.dry_run {
275 tracing::info!(target: "vec", orphan_count, orphan_entities_count, orphan_chunks_count, "dry-run: would delete orphans");
276 return output::emit_json(&VecPurgeOrphanResponse {
277 action: "purge_orphan_dry_run".to_string(),
278 deleted: 0,
279 deleted_entities: 0,
280 deleted_chunks: 0,
281 dry_run: true,
282 elapsed_ms: start.elapsed().as_millis() as u64,
283 });
284 }
285
286 if !args.yes {
287 return Err(AppError::Validation(format!(
288 "refusing to delete {orphan_count} vec_memories + {orphan_entities_count} vec_entities + {orphan_chunks_count} vec_chunks orphan rows without --yes (use --dry-run to preview)"
289 )));
290 }
291
292 let deleted: i64 = conn.execute(
293 "DELETE FROM vec_memories
294 WHERE memory_id NOT IN (SELECT id FROM memories)",
295 [],
296 )? as i64;
297
298 let deleted_entities: i64 = if vec_table_exists(&conn, "vec_entities") {
299 conn.execute(
300 "DELETE FROM vec_entities
301 WHERE memory_id NOT IN (SELECT id FROM memories)",
302 [],
303 )
304 .unwrap_or(0) as i64
305 } else {
306 0
307 };
308 let deleted_chunks: i64 = if vec_table_exists(&conn, "vec_chunks") {
309 conn.execute(
310 "DELETE FROM vec_chunks
311 WHERE memory_id NOT IN (SELECT id FROM memories)",
312 [],
313 )
314 .unwrap_or(0) as i64
315 } else {
316 0
317 };
318
319 tracing::info!(target: "vec", deleted, deleted_entities, deleted_chunks, "purged orphan vec rows");
320
321 output::emit_json(&VecPurgeOrphanResponse {
322 action: "purged_orphan".to_string(),
323 deleted,
324 deleted_entities,
325 deleted_chunks,
326 dry_run: false,
327 elapsed_ms: start.elapsed().as_millis() as u64,
328 })?;
329 Ok(())
330}
331
332fn run_stats(args: VecStatsArgs) -> Result<(), AppError> {
333 let start = std::time::Instant::now();
334 let paths = AppPaths::resolve(args.db.as_deref())?;
335 crate::storage::connection::ensure_db_ready(&paths)?;
336 let conn = open_ro(&paths.db)?;
337
338 let vec_memories_exists: bool = conn
339 .query_row(
340 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='vec_memories'",
341 [],
342 |r| r.get::<_, i64>(0).map(|v| v > 0),
343 )
344 .unwrap_or(false);
345 let (total_rows, orphaned) = if vec_memories_exists {
346 let total: i64 = conn
347 .query_row("SELECT COUNT(*) FROM vec_memories", [], |r| r.get(0))
348 .unwrap_or(0);
349 let orph: i64 = conn
350 .query_row(
351 "SELECT COUNT(*) FROM vec_memories v
352 LEFT JOIN memories m ON m.id = v.memory_id
353 WHERE m.id IS NULL",
354 [],
355 |r| r.get(0),
356 )
357 .unwrap_or(0);
358 (total, orph)
359 } else {
360 (0, 0)
361 };
362 let coverage_percent = if total_rows > 0 {
363 ((total_rows - orphaned) as f64 / total_rows as f64) * 100.0
364 } else {
365 100.0
366 };
367
368 let vec_entities_rows = if vec_table_exists(&conn, "vec_entities") {
369 conn.query_row("SELECT COUNT(*) FROM vec_entities", [], |r| r.get(0))
370 .ok()
371 } else {
372 None
373 };
374 let vec_chunks_rows = if vec_table_exists(&conn, "vec_chunks") {
375 conn.query_row("SELECT COUNT(*) FROM vec_chunks", [], |r| r.get(0))
376 .ok()
377 } else {
378 None
379 };
380 let fts_memories_rows = conn
381 .query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
382 .unwrap_or(0);
383
384 output::emit_json(&VecStatsResponse {
385 total_rows,
386 orphaned,
387 coverage_percent,
388 vec_entities_rows,
389 vec_chunks_rows,
390 fts_memories_rows,
391 elapsed_ms: start.elapsed().as_millis() as u64,
392 })?;
393 Ok(())
394}
395
396fn vec_table_exists(conn: &rusqlite::Connection, name: &str) -> bool {
397 conn.query_row(
398 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
399 rusqlite::params![name],
400 |r| r.get::<_, i64>(0).map(|v| v > 0),
401 )
402 .unwrap_or(false)
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408
409 #[test]
410 fn vec_orphan_list_response_serializes_all_fields() {
411 let resp = VecOrphanListResponse {
412 action: "orphan_list".into(),
413 count: 0,
414 items: Vec::new(),
415 elapsed_ms: 5,
416 };
417 let v = serde_json::to_value(&resp).unwrap();
418 assert_eq!(v["action"], "orphan_list");
419 assert_eq!(v["count"], 0i64);
420 assert_eq!(v["elapsed_ms"], 5u64);
421 assert!(v["items"].is_array());
422 }
423
424 #[test]
425 fn vec_purge_orphan_response_serializes_dry_run_flag() {
426 let resp = VecPurgeOrphanResponse {
427 action: "purge_orphan_dry_run".into(),
428 deleted: 0,
429 deleted_entities: 0,
430 deleted_chunks: 0,
431 dry_run: true,
432 elapsed_ms: 1,
433 };
434 let v = serde_json::to_value(&resp).unwrap();
435 assert_eq!(v["dry_run"], true);
436 assert_eq!(v["deleted"], 0i64);
437 }
438
439 #[test]
440 fn vec_stats_response_computes_coverage() {
441 let resp = VecStatsResponse {
442 total_rows: 100,
443 orphaned: 25,
444 coverage_percent: 75.0,
445 vec_entities_rows: Some(50),
446 vec_chunks_rows: None,
447 fts_memories_rows: 100,
448 elapsed_ms: 10,
449 };
450 let v = serde_json::to_value(&resp).unwrap();
451 assert_eq!(v["coverage_percent"], 75.0);
452 assert_eq!(v["vec_entities_rows"], 50i64);
453 assert!(v.get("vec_chunks_rows").is_none());
454 }
455}