1use crate::errors::AppError;
4use crate::i18n::errors_msg;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n \
12 # Permanently delete soft-deleted memories older than 90 days (default retention)\n \
13 sqlite-graphrag purge\n\n \
14 # Custom retention window in days\n \
15 sqlite-graphrag purge --retention-days 30\n\n \
16 # Purge ALL soft-deleted memories regardless of age\n \
17 sqlite-graphrag purge --retention-days 0\n\n \
18 # Preview what would be purged without deleting\n \
19 sqlite-graphrag purge --dry-run\n\n \
20 # Purge a specific memory by name\n \
21 sqlite-graphrag purge --name old-memory --namespace my-project\n\n\
22NOTES:\n \
23 `--yes` only confirms intent and does NOT override `--retention-days`.\n \
24 To wipe every soft-deleted memory immediately, pair `--yes` with `--retention-days 0`.")]
25pub struct PurgeArgs {
26 #[arg(long)]
27 pub name: Option<String>,
28 #[arg(long)]
30 pub namespace: Option<String>,
31 #[arg(
35 long,
36 alias = "days",
37 alias = "max-age-days",
38 value_name = "DAYS",
39 default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT
40 )]
41 pub retention_days: u32,
42 #[arg(long, hide = true)]
44 pub older_than_seconds: Option<u64>,
45 #[arg(long, default_value_t = false)]
47 pub dry_run: bool,
48 #[arg(long, default_value_t = false)]
52 pub yes: bool,
53 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
54 pub json: bool,
55 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
56 pub db: Option<String>,
57}
58
59#[derive(Serialize)]
60pub struct PurgeResponse {
61 pub purged_count: usize,
62 pub bytes_freed: i64,
63 pub oldest_deleted_at: Option<i64>,
64 pub retention_days_used: u32,
65 pub dry_run: bool,
66 pub namespace: Option<String>,
67 pub cutoff_epoch: i64,
68 pub warnings: Vec<String>,
69 pub elapsed_ms: u64,
71 #[serde(skip_serializing_if = "Option::is_none")]
76 pub message: Option<String>,
77}
78
79pub fn run(args: PurgeArgs) -> Result<(), AppError> {
84 let inicio = std::time::Instant::now();
85 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
86 let paths = AppPaths::resolve(args.db.as_deref())?;
87
88 crate::storage::connection::ensure_db_ready(&paths)?;
89
90 let mut warnings: Vec<String> = Vec::with_capacity(1);
91 let now = current_epoch()?;
92
93 let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
94 warnings.push(
95 "--older-than-seconds is deprecated; use --retention-days in v2.0.0+".to_string(),
96 );
97 now - secs as i64
98 } else {
99 now - (args.retention_days as i64) * 86_400
100 };
101
102 let namespace_opt: Option<&str> = Some(namespace.as_str());
103
104 let mut conn = open_rw(&paths.db)?;
105
106 let (bytes_freed, oldest_deleted_at, candidates_count) =
107 compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
108
109 if candidates_count == 0 && args.name.is_some() {
110 return Err(AppError::NotFound(
111 errors_msg::soft_deleted_memory_not_found(
112 args.name.as_deref().unwrap_or_default(),
113 &namespace,
114 ),
115 ));
116 }
117
118 if !args.dry_run {
119 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
120 execute_purge(
121 &tx,
122 &namespace,
123 args.name.as_deref(),
124 cutoff_epoch,
125 &mut warnings,
126 )?;
127 tx.commit()?;
128 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
129 }
130
131 let message = if candidates_count == 0 {
132 Some(format!(
133 "no soft-deleted memories older than {retention_days} day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age",
134 retention_days = args.retention_days
135 ))
136 } else {
137 None
138 };
139
140 output::emit_json(&PurgeResponse {
141 purged_count: candidates_count,
142 bytes_freed,
143 oldest_deleted_at,
144 retention_days_used: args.retention_days,
145 dry_run: args.dry_run,
146 namespace: Some(namespace),
147 cutoff_epoch,
148 warnings,
149 elapsed_ms: inicio.elapsed().as_millis() as u64,
150 message,
151 })?;
152
153 Ok(())
154}
155
156fn current_epoch() -> Result<i64, AppError> {
157 let now = std::time::SystemTime::now()
158 .duration_since(std::time::UNIX_EPOCH)
159 .map_err(|err| AppError::Internal(anyhow::anyhow!("system clock error: {err}")))?;
160 Ok(now.as_secs() as i64)
161}
162
163fn compute_metrics(
164 conn: &rusqlite::Connection,
165 cutoff_epoch: i64,
166 namespace_opt: Option<&str>,
167 name: Option<&str>,
168) -> Result<(i64, Option<i64>, usize), AppError> {
169 let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
170 conn.query_row(
171 "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
172 MIN(deleted_at)
173 FROM memories
174 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
175 AND (?2 IS NULL OR namespace = ?2)
176 AND name = ?3",
177 rusqlite::params![cutoff_epoch, namespace_opt, name],
178 |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
179 )?
180 } else {
181 conn.query_row(
182 "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
183 MIN(deleted_at)
184 FROM memories
185 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
186 AND (?2 IS NULL OR namespace = ?2)",
187 rusqlite::params![cutoff_epoch, namespace_opt],
188 |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
189 )?
190 };
191
192 let count: usize = if let Some(name) = name {
193 conn.query_row(
194 "SELECT COUNT(*) FROM memories
195 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
196 AND (?2 IS NULL OR namespace = ?2)
197 AND name = ?3",
198 rusqlite::params![cutoff_epoch, namespace_opt, name],
199 |r| r.get::<_, usize>(0),
200 )?
201 } else {
202 conn.query_row(
203 "SELECT COUNT(*) FROM memories
204 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
205 AND (?2 IS NULL OR namespace = ?2)",
206 rusqlite::params![cutoff_epoch, namespace_opt],
207 |r| r.get::<_, usize>(0),
208 )?
209 };
210
211 Ok((bytes_freed, oldest_deleted_at, count))
212}
213
214fn execute_purge(
215 tx: &rusqlite::Transaction,
216 namespace: &str,
217 name: Option<&str>,
218 cutoff_epoch: i64,
219 warnings: &mut Vec<String>,
220) -> Result<(), AppError> {
221 let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
222
223 for (memory_id, _name) in &candidates {
224 if let Err(err) = tx.execute(
225 "DELETE FROM vec_chunks WHERE memory_id = ?1",
226 rusqlite::params![memory_id],
227 ) {
228 warnings.push(format!(
229 "failed to clean vec_chunks for memory_id {memory_id}: {err}"
230 ));
231 }
232 if let Err(err) = tx.execute(
233 "DELETE FROM vec_memories WHERE memory_id = ?1",
234 rusqlite::params![memory_id],
235 ) {
236 warnings.push(format!(
237 "failed to clean vec_memories for memory_id {memory_id}: {err}"
238 ));
239 }
240 tx.execute(
241 "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
242 rusqlite::params![memory_id, namespace],
243 )?;
244 }
245
246 Ok(())
247}
248
249fn select_candidates(
250 conn: &rusqlite::Connection,
251 namespace: &str,
252 name: Option<&str>,
253 cutoff_epoch: i64,
254) -> Result<Vec<(i64, String)>, AppError> {
255 let query = if name.is_some() {
256 "SELECT id, name FROM memories
257 WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
258 ORDER BY deleted_at ASC"
259 } else {
260 "SELECT id, name FROM memories
261 WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
262 ORDER BY deleted_at ASC"
263 };
264
265 let mut stmt = conn.prepare(query)?;
266 let rows = if let Some(name) = name {
267 stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
268 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
269 })?
270 .collect::<Result<Vec<_>, _>>()?
271 } else {
272 stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
273 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
274 })?
275 .collect::<Result<Vec<_>, _>>()?
276 };
277 Ok(rows)
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283 use rusqlite::Connection;
284
285 fn setup_test_db() -> Connection {
286 let conn = Connection::open_in_memory().expect("failed to open in-memory db");
287 conn.execute_batch(
288 "CREATE TABLE memories (
289 id INTEGER PRIMARY KEY AUTOINCREMENT,
290 name TEXT NOT NULL,
291 namespace TEXT NOT NULL DEFAULT 'global',
292 description TEXT,
293 body TEXT,
294 deleted_at INTEGER
295 );
296 CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
297 CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
298 )
299 .expect("failed to create test tables");
300 conn
301 }
302
303 fn insert_deleted_memory(
304 conn: &Connection,
305 name: &str,
306 namespace: &str,
307 body: &str,
308 deleted_at: i64,
309 ) -> i64 {
310 conn.execute(
311 "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
312 rusqlite::params![name, namespace, body, deleted_at],
313 )
314 .expect("failed to insert test memory");
315 conn.last_insert_rowid()
316 }
317
318 #[test]
319 fn retention_days_used_default_is_90() {
320 assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
321 }
322
323 #[test]
324 fn compute_metrics_bytes_freed_positive_for_populated_body() {
325 let conn = setup_test_db();
326 let now = current_epoch().expect("epoch failed");
327 let old_epoch = now - 100 * 86_400;
328 insert_deleted_memory(&conn, "mem-test", "global", "memory body", old_epoch);
329
330 let cutoff = now - 30 * 86_400;
331 let (bytes, oldest, count) =
332 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
333
334 assert!(bytes > 0, "bytes_freed must be > 0 for populated body");
335 assert!(oldest.is_some(), "oldest_deleted_at must be Some");
336 assert_eq!(count, 1);
337 }
338
339 #[test]
340 fn compute_metrics_returns_zero_without_candidates() {
341 let conn = setup_test_db();
342 let now = current_epoch().expect("epoch failed");
343 let cutoff = now - 90 * 86_400;
344
345 let (bytes, oldest, count) =
346 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
347
348 assert_eq!(bytes, 0);
349 assert!(oldest.is_none());
350 assert_eq!(count, 0);
351 }
352
353 #[test]
354 fn dry_run_does_not_delete_records() {
355 let conn = setup_test_db();
356 let now = current_epoch().expect("epoch failed");
357 let old_epoch = now - 200 * 86_400;
358 insert_deleted_memory(&conn, "mem-dry", "global", "dry run content", old_epoch);
359
360 let cutoff = now - 30 * 86_400;
361 let (_, _, count_before) =
362 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
363 assert_eq!(count_before, 1, "must have 1 candidate before dry run");
364
365 let (_, _, count_after) =
366 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
367 assert_eq!(
368 count_after, 1,
369 "dry_run must not remove records: count must remain 1"
370 );
371 }
372
373 #[test]
374 fn oldest_deleted_at_returns_smallest_epoch() {
375 let conn = setup_test_db();
376 let now = current_epoch().expect("epoch failed");
377 let epoch_old = now - 300 * 86_400;
378 let epoch_recent = now - 200 * 86_400;
379
380 insert_deleted_memory(&conn, "mem-a", "global", "body-a", epoch_old);
381 insert_deleted_memory(&conn, "mem-b", "global", "body-b", epoch_recent);
382
383 let cutoff = now - 30 * 86_400;
384 let (_, oldest, count) =
385 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
386
387 assert_eq!(count, 2);
388 assert_eq!(
389 oldest,
390 Some(epoch_old),
391 "oldest_deleted_at must be the oldest epoch"
392 );
393 }
394
395 #[test]
396 fn purge_args_namespace_accepts_none_without_default() {
397 let resolved = crate::namespace::resolve_namespace(None)
402 .expect("resolve_namespace(None) must return Ok");
403 assert_eq!(
404 resolved, "global",
405 "without env var, resolve_namespace(None) must fall back to 'global'"
406 );
407 }
408
409 #[test]
410 fn purge_response_serializes_all_new_fields() {
411 let resp = PurgeResponse {
412 purged_count: 3,
413 bytes_freed: 1024,
414 oldest_deleted_at: Some(1_700_000_000),
415 retention_days_used: 90,
416 dry_run: false,
417 namespace: Some("global".to_string()),
418 cutoff_epoch: 1_710_000_000,
419 warnings: vec![],
420 elapsed_ms: 42,
421 message: None,
422 };
423 let json = serde_json::to_string(&resp).expect("serialization failed");
424 assert!(json.contains("bytes_freed"));
425 assert!(json.contains("oldest_deleted_at"));
426 assert!(json.contains("retention_days_used"));
427 assert!(json.contains("dry_run"));
428 assert!(json.contains("elapsed_ms"));
429 assert!(!json.contains("\"message\""));
431 }
432
433 #[test]
434 fn purge_response_serializes_message_when_present() {
435 let resp = PurgeResponse {
437 purged_count: 0,
438 bytes_freed: 0,
439 oldest_deleted_at: None,
440 retention_days_used: 90,
441 dry_run: false,
442 namespace: Some("global".to_string()),
443 cutoff_epoch: 1_710_000_000,
444 warnings: vec![],
445 elapsed_ms: 5,
446 message: Some(
447 "no soft-deleted memories older than 90 day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age"
448 .to_string(),
449 ),
450 };
451 let json = serde_json::to_string(&resp).expect("serialization failed");
452 assert!(json.contains("\"message\""));
453 assert!(json.contains("--retention-days 0"));
454 }
455}