1use crate::errors::AppError;
4use crate::i18n::errors_msg;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n \
12 # Permanently delete soft-deleted memories older than 90 days (default retention)\n \
13 sqlite-graphrag purge\n\n \
14 # Custom retention window in days\n \
15 sqlite-graphrag purge --retention-days 30\n\n \
16 # Purge ALL soft-deleted memories regardless of age\n \
17 sqlite-graphrag purge --retention-days 0\n\n \
18 # Preview what would be purged without deleting\n \
19 sqlite-graphrag purge --dry-run\n\n \
20 # Purge a specific memory by name\n \
21 sqlite-graphrag purge --name old-memory --namespace my-project\n\n\
22NOTES:\n \
23 `--yes` only confirms intent and does NOT override `--retention-days`.\n \
24 To wipe every soft-deleted memory immediately, pair `--yes` with `--retention-days 0`.")]
25pub struct PurgeArgs {
26 #[arg(long)]
27 pub name: Option<String>,
28 #[arg(long)]
30 pub namespace: Option<String>,
31 #[arg(
35 long,
36 alias = "days",
37 alias = "max-age-days",
38 value_name = "DAYS",
39 default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT
40 )]
41 pub retention_days: u32,
42 #[arg(long, hide = true)]
44 pub older_than_seconds: Option<u64>,
45 #[arg(long, default_value_t = false)]
47 pub dry_run: bool,
48 #[arg(long, default_value_t = false)]
52 pub yes: bool,
53 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
54 pub json: bool,
55 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
56 pub db: Option<String>,
57}
58
59#[derive(Serialize)]
60pub struct PurgeResponse {
61 pub action: String,
62 pub purged_count: usize,
63 pub bytes_freed: i64,
64 pub oldest_deleted_at: Option<i64>,
65 pub retention_days_used: u32,
66 pub dry_run: bool,
67 pub namespace: Option<String>,
68 pub cutoff_epoch: i64,
69 pub warnings: Vec<String>,
70 pub elapsed_ms: u64,
72 #[serde(skip_serializing_if = "Option::is_none")]
77 pub message: Option<String>,
78}
79
80pub fn run(args: PurgeArgs) -> Result<(), AppError> {
85 let inicio = std::time::Instant::now();
86 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
87 let paths = AppPaths::resolve(args.db.as_deref())?;
88
89 crate::storage::connection::ensure_db_ready(&paths)?;
90
91 let mut warnings: Vec<String> = Vec::with_capacity(1);
92 let now = current_epoch()?;
93
94 let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
95 warnings.push(
96 "--older-than-seconds is deprecated; use --retention-days in v2.0.0+".to_string(),
97 );
98 now - secs as i64
99 } else {
100 now - (args.retention_days as i64) * 86_400
101 };
102
103 let namespace_opt: Option<&str> = Some(namespace.as_str());
104
105 let mut conn = open_rw(&paths.db)?;
106
107 let (bytes_freed, oldest_deleted_at, candidates_count) =
108 compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
109
110 if candidates_count == 0 && args.name.is_some() {
111 return Err(AppError::NotFound(
112 errors_msg::soft_deleted_memory_not_found(
113 args.name.as_deref().unwrap_or_default(),
114 &namespace,
115 ),
116 ));
117 }
118
119 if !args.dry_run && !args.yes {
120 return Err(AppError::Validation(
121 "destructive operation: pass --yes to confirm purge (use --dry-run to preview)"
122 .to_string(),
123 ));
124 }
125
126 if !args.dry_run {
127 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
128 execute_purge(
129 &tx,
130 &paths.db,
131 &namespace,
132 args.name.as_deref(),
133 cutoff_epoch,
134 &mut warnings,
135 )?;
136 tx.commit()?;
137 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
138 }
139
140 let message = if candidates_count == 0 {
141 Some(format!(
142 "no soft-deleted memories older than {retention_days} day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age",
143 retention_days = args.retention_days
144 ))
145 } else {
146 None
147 };
148
149 output::emit_json(&PurgeResponse {
150 action: if args.dry_run {
151 "dry_run".to_string()
152 } else {
153 "purged".to_string()
154 },
155 purged_count: candidates_count,
156 bytes_freed,
157 oldest_deleted_at,
158 retention_days_used: args.retention_days,
159 dry_run: args.dry_run,
160 namespace: Some(namespace),
161 cutoff_epoch,
162 warnings,
163 elapsed_ms: inicio.elapsed().as_millis() as u64,
164 message,
165 })?;
166
167 Ok(())
168}
169
170fn current_epoch() -> Result<i64, AppError> {
171 let now = std::time::SystemTime::now()
172 .duration_since(std::time::UNIX_EPOCH)
173 .map_err(|err| AppError::Internal(anyhow::anyhow!("system clock error: {err}")))?;
174 Ok(now.as_secs() as i64)
175}
176
177fn compute_metrics(
178 conn: &rusqlite::Connection,
179 cutoff_epoch: i64,
180 namespace_opt: Option<&str>,
181 name: Option<&str>,
182) -> Result<(i64, Option<i64>, usize), AppError> {
183 let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
184 conn.query_row(
185 "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
186 MIN(deleted_at)
187 FROM memories
188 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
189 AND (?2 IS NULL OR namespace = ?2)
190 AND name = ?3",
191 rusqlite::params![cutoff_epoch, namespace_opt, name],
192 |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
193 )?
194 } else {
195 conn.query_row(
196 "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
197 MIN(deleted_at)
198 FROM memories
199 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
200 AND (?2 IS NULL OR namespace = ?2)",
201 rusqlite::params![cutoff_epoch, namespace_opt],
202 |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
203 )?
204 };
205
206 let count: usize = if let Some(name) = name {
207 conn.query_row(
208 "SELECT COUNT(*) FROM memories
209 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
210 AND (?2 IS NULL OR namespace = ?2)
211 AND name = ?3",
212 rusqlite::params![cutoff_epoch, namespace_opt, name],
213 |r| r.get::<_, usize>(0),
214 )?
215 } else {
216 conn.query_row(
217 "SELECT COUNT(*) FROM memories
218 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
219 AND (?2 IS NULL OR namespace = ?2)",
220 rusqlite::params![cutoff_epoch, namespace_opt],
221 |r| r.get::<_, usize>(0),
222 )?
223 };
224
225 Ok((bytes_freed, oldest_deleted_at, count))
226}
227
228fn execute_purge(
229 tx: &rusqlite::Transaction,
230 db_path: &std::path::Path,
231 namespace: &str,
232 name: Option<&str>,
233 cutoff_epoch: i64,
234 warnings: &mut Vec<String>,
235) -> Result<(), AppError> {
236 let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
237
238 for (memory_id, name) in &candidates {
239 crate::commands::enrich::cleanup_queue_entry(db_path, *memory_id, name);
242 if let Err(err) = tx.execute(
243 "DELETE FROM vec_chunks WHERE memory_id = ?1",
244 rusqlite::params![memory_id],
245 ) {
246 warnings.push(format!(
247 "failed to clean vec_chunks for memory_id {memory_id}: {err}"
248 ));
249 }
250 if let Err(err) = tx.execute(
251 "DELETE FROM vec_memories WHERE memory_id = ?1",
252 rusqlite::params![memory_id],
253 ) {
254 warnings.push(format!(
255 "failed to clean vec_memories for memory_id {memory_id}: {err}"
256 ));
257 }
258 tx.execute(
259 "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
260 rusqlite::params![memory_id, namespace],
261 )?;
262 }
263
264 Ok(())
265}
266
267fn select_candidates(
268 conn: &rusqlite::Connection,
269 namespace: &str,
270 name: Option<&str>,
271 cutoff_epoch: i64,
272) -> Result<Vec<(i64, String)>, AppError> {
273 let query = if name.is_some() {
274 "SELECT id, name FROM memories
275 WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
276 ORDER BY deleted_at ASC"
277 } else {
278 "SELECT id, name FROM memories
279 WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
280 ORDER BY deleted_at ASC"
281 };
282
283 let mut stmt = conn.prepare_cached(query)?;
284 let rows = if let Some(name) = name {
285 stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
286 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
287 })?
288 .collect::<Result<Vec<_>, _>>()?
289 } else {
290 stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
291 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
292 })?
293 .collect::<Result<Vec<_>, _>>()?
294 };
295 Ok(rows)
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301 use rusqlite::Connection;
302
303 fn setup_test_db() -> Connection {
304 let conn = Connection::open_in_memory().expect("failed to open in-memory db");
305 conn.execute_batch(
306 "CREATE TABLE memories (
307 id INTEGER PRIMARY KEY AUTOINCREMENT,
308 name TEXT NOT NULL,
309 namespace TEXT NOT NULL DEFAULT 'global',
310 description TEXT,
311 body TEXT,
312 deleted_at INTEGER
313 );
314 CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
315 CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
316 )
317 .expect("failed to create test tables");
318 conn
319 }
320
321 fn insert_deleted_memory(
322 conn: &Connection,
323 name: &str,
324 namespace: &str,
325 body: &str,
326 deleted_at: i64,
327 ) -> i64 {
328 conn.execute(
329 "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
330 rusqlite::params![name, namespace, body, deleted_at],
331 )
332 .expect("failed to insert test memory");
333 conn.last_insert_rowid()
334 }
335
336 #[test]
337 fn retention_days_used_default_is_90() {
338 assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
339 }
340
341 #[test]
342 fn compute_metrics_bytes_freed_positive_for_populated_body() {
343 let conn = setup_test_db();
344 let now = current_epoch().expect("epoch failed");
345 let old_epoch = now - 100 * 86_400;
346 insert_deleted_memory(&conn, "mem-test", "global", "memory body", old_epoch);
347
348 let cutoff = now - 30 * 86_400;
349 let (bytes, oldest, count) =
350 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
351
352 assert!(bytes > 0, "bytes_freed must be > 0 for populated body");
353 assert!(oldest.is_some(), "oldest_deleted_at must be Some");
354 assert_eq!(count, 1);
355 }
356
357 #[test]
358 fn compute_metrics_returns_zero_without_candidates() {
359 let conn = setup_test_db();
360 let now = current_epoch().expect("epoch failed");
361 let cutoff = now - 90 * 86_400;
362
363 let (bytes, oldest, count) =
364 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
365
366 assert_eq!(bytes, 0);
367 assert!(oldest.is_none());
368 assert_eq!(count, 0);
369 }
370
371 #[test]
372 fn dry_run_does_not_delete_records() {
373 let conn = setup_test_db();
374 let now = current_epoch().expect("epoch failed");
375 let old_epoch = now - 200 * 86_400;
376 insert_deleted_memory(&conn, "mem-dry", "global", "dry run content", old_epoch);
377
378 let cutoff = now - 30 * 86_400;
379 let (_, _, count_before) =
380 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
381 assert_eq!(count_before, 1, "must have 1 candidate before dry run");
382
383 let (_, _, count_after) =
384 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
385 assert_eq!(
386 count_after, 1,
387 "dry_run must not remove records: count must remain 1"
388 );
389 }
390
391 #[test]
392 fn oldest_deleted_at_returns_smallest_epoch() {
393 let conn = setup_test_db();
394 let now = current_epoch().expect("epoch failed");
395 let epoch_old = now - 300 * 86_400;
396 let epoch_recent = now - 200 * 86_400;
397
398 insert_deleted_memory(&conn, "mem-a", "global", "body-a", epoch_old);
399 insert_deleted_memory(&conn, "mem-b", "global", "body-b", epoch_recent);
400
401 let cutoff = now - 30 * 86_400;
402 let (_, oldest, count) =
403 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
404
405 assert_eq!(count, 2);
406 assert_eq!(
407 oldest,
408 Some(epoch_old),
409 "oldest_deleted_at must be the oldest epoch"
410 );
411 }
412
413 #[test]
414 fn purge_args_namespace_accepts_none_without_default() {
415 let resolved = crate::namespace::resolve_namespace(None)
420 .expect("resolve_namespace(None) must return Ok");
421 assert_eq!(
422 resolved, "global",
423 "without env var, resolve_namespace(None) must fall back to 'global'"
424 );
425 }
426
427 #[test]
428 fn purge_response_serializes_all_new_fields() {
429 let resp = PurgeResponse {
430 action: "purged".to_string(),
431 purged_count: 3,
432 bytes_freed: 1024,
433 oldest_deleted_at: Some(1_700_000_000),
434 retention_days_used: 90,
435 dry_run: false,
436 namespace: Some("global".to_string()),
437 cutoff_epoch: 1_710_000_000,
438 warnings: vec![],
439 elapsed_ms: 42,
440 message: None,
441 };
442 let json = serde_json::to_string(&resp).expect("serialization failed");
443 assert!(json.contains("bytes_freed"));
444 assert!(json.contains("oldest_deleted_at"));
445 assert!(json.contains("retention_days_used"));
446 assert!(json.contains("dry_run"));
447 assert!(json.contains("elapsed_ms"));
448 assert!(!json.contains("\"message\""));
450 }
451
452 #[test]
453 fn purge_response_serializes_message_when_present() {
454 let resp = PurgeResponse {
456 action: "purged".to_string(),
457 purged_count: 0,
458 bytes_freed: 0,
459 oldest_deleted_at: None,
460 retention_days_used: 90,
461 dry_run: false,
462 namespace: Some("global".to_string()),
463 cutoff_epoch: 1_710_000_000,
464 warnings: vec![],
465 elapsed_ms: 5,
466 message: Some(
467 "no soft-deleted memories older than 90 day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age"
468 .to_string(),
469 ),
470 };
471 let json = serde_json::to_string(&resp).expect("serialization failed");
472 assert!(json.contains("\"message\""));
473 assert!(json.contains("--retention-days 0"));
474 }
475}