1use crate::errors::AppError;
4use crate::i18n::errors_msg;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n \
12 # Permanently delete soft-deleted memories older than 90 days (default retention)\n \
13 sqlite-graphrag purge\n\n \
14 # Custom retention window in days\n \
15 sqlite-graphrag purge --retention-days 30\n\n \
16 # Purge ALL soft-deleted memories regardless of age\n \
17 sqlite-graphrag purge --retention-days 0\n\n \
18 # Preview what would be purged without deleting\n \
19 sqlite-graphrag purge --dry-run\n\n \
20 # Purge a specific memory by name\n \
21 sqlite-graphrag purge --name old-memory --namespace my-project\n\n\
22NOTES:\n \
23 `--yes` only confirms intent and does NOT override `--retention-days`.\n \
24 To wipe every soft-deleted memory immediately, pair `--yes` with `--retention-days 0`.")]
25pub struct PurgeArgs {
26 #[arg(long)]
27 pub name: Option<String>,
28 #[arg(long)]
30 pub namespace: Option<String>,
31 #[arg(
35 long,
36 alias = "days",
37 alias = "max-age-days",
38 value_name = "DAYS",
39 default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT
40 )]
41 pub retention_days: u32,
42 #[arg(long, hide = true)]
44 pub older_than_seconds: Option<u64>,
45 #[arg(long, default_value_t = false)]
47 pub dry_run: bool,
48 #[arg(long, default_value_t = false)]
52 pub yes: bool,
53 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
54 pub json: bool,
55 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
56 pub db: Option<String>,
57}
58
59#[derive(Serialize)]
60pub struct PurgeResponse {
61 pub action: String,
62 pub purged_count: usize,
63 pub bytes_freed: i64,
64 pub oldest_deleted_at: Option<i64>,
65 pub retention_days_used: u32,
66 pub dry_run: bool,
67 pub namespace: Option<String>,
68 pub cutoff_epoch: i64,
69 pub warnings: Vec<String>,
70 pub elapsed_ms: u64,
72 #[serde(skip_serializing_if = "Option::is_none")]
77 pub message: Option<String>,
78}
79
80pub fn run(args: PurgeArgs) -> Result<(), AppError> {
85 let inicio = std::time::Instant::now();
86 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
87 let paths = AppPaths::resolve(args.db.as_deref())?;
88
89 crate::storage::connection::ensure_db_ready(&paths)?;
90
91 let mut warnings: Vec<String> = Vec::with_capacity(1);
92 let now = current_epoch()?;
93
94 let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
95 warnings.push(
96 "--older-than-seconds is deprecated; use --retention-days in v2.0.0+".to_string(),
97 );
98 now - secs as i64
99 } else {
100 now - (args.retention_days as i64) * 86_400
101 };
102
103 let namespace_opt: Option<&str> = Some(namespace.as_str());
104
105 let mut conn = open_rw(&paths.db)?;
106
107 let (bytes_freed, oldest_deleted_at, candidates_count) =
108 compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
109
110 if candidates_count == 0 && args.name.is_some() {
111 return Err(AppError::NotFound(
112 errors_msg::soft_deleted_memory_not_found(
113 args.name.as_deref().unwrap_or_default(),
114 &namespace,
115 ),
116 ));
117 }
118
119 if !args.dry_run {
120 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
121 execute_purge(
122 &tx,
123 &namespace,
124 args.name.as_deref(),
125 cutoff_epoch,
126 &mut warnings,
127 )?;
128 tx.commit()?;
129 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
130 }
131
132 let message = if candidates_count == 0 {
133 Some(format!(
134 "no soft-deleted memories older than {retention_days} day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age",
135 retention_days = args.retention_days
136 ))
137 } else {
138 None
139 };
140
141 output::emit_json(&PurgeResponse {
142 action: if args.dry_run {
143 "dry_run".to_string()
144 } else {
145 "purged".to_string()
146 },
147 purged_count: candidates_count,
148 bytes_freed,
149 oldest_deleted_at,
150 retention_days_used: args.retention_days,
151 dry_run: args.dry_run,
152 namespace: Some(namespace),
153 cutoff_epoch,
154 warnings,
155 elapsed_ms: inicio.elapsed().as_millis() as u64,
156 message,
157 })?;
158
159 Ok(())
160}
161
162fn current_epoch() -> Result<i64, AppError> {
163 let now = std::time::SystemTime::now()
164 .duration_since(std::time::UNIX_EPOCH)
165 .map_err(|err| AppError::Internal(anyhow::anyhow!("system clock error: {err}")))?;
166 Ok(now.as_secs() as i64)
167}
168
169fn compute_metrics(
170 conn: &rusqlite::Connection,
171 cutoff_epoch: i64,
172 namespace_opt: Option<&str>,
173 name: Option<&str>,
174) -> Result<(i64, Option<i64>, usize), AppError> {
175 let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
176 conn.query_row(
177 "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
178 MIN(deleted_at)
179 FROM memories
180 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
181 AND (?2 IS NULL OR namespace = ?2)
182 AND name = ?3",
183 rusqlite::params![cutoff_epoch, namespace_opt, name],
184 |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
185 )?
186 } else {
187 conn.query_row(
188 "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
189 MIN(deleted_at)
190 FROM memories
191 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
192 AND (?2 IS NULL OR namespace = ?2)",
193 rusqlite::params![cutoff_epoch, namespace_opt],
194 |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
195 )?
196 };
197
198 let count: usize = if let Some(name) = name {
199 conn.query_row(
200 "SELECT COUNT(*) FROM memories
201 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
202 AND (?2 IS NULL OR namespace = ?2)
203 AND name = ?3",
204 rusqlite::params![cutoff_epoch, namespace_opt, name],
205 |r| r.get::<_, usize>(0),
206 )?
207 } else {
208 conn.query_row(
209 "SELECT COUNT(*) FROM memories
210 WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
211 AND (?2 IS NULL OR namespace = ?2)",
212 rusqlite::params![cutoff_epoch, namespace_opt],
213 |r| r.get::<_, usize>(0),
214 )?
215 };
216
217 Ok((bytes_freed, oldest_deleted_at, count))
218}
219
220fn execute_purge(
221 tx: &rusqlite::Transaction,
222 namespace: &str,
223 name: Option<&str>,
224 cutoff_epoch: i64,
225 warnings: &mut Vec<String>,
226) -> Result<(), AppError> {
227 let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
228
229 for (memory_id, _name) in &candidates {
230 if let Err(err) = tx.execute(
231 "DELETE FROM vec_chunks WHERE memory_id = ?1",
232 rusqlite::params![memory_id],
233 ) {
234 warnings.push(format!(
235 "failed to clean vec_chunks for memory_id {memory_id}: {err}"
236 ));
237 }
238 if let Err(err) = tx.execute(
239 "DELETE FROM vec_memories WHERE memory_id = ?1",
240 rusqlite::params![memory_id],
241 ) {
242 warnings.push(format!(
243 "failed to clean vec_memories for memory_id {memory_id}: {err}"
244 ));
245 }
246 tx.execute(
247 "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
248 rusqlite::params![memory_id, namespace],
249 )?;
250 }
251
252 Ok(())
253}
254
255fn select_candidates(
256 conn: &rusqlite::Connection,
257 namespace: &str,
258 name: Option<&str>,
259 cutoff_epoch: i64,
260) -> Result<Vec<(i64, String)>, AppError> {
261 let query = if name.is_some() {
262 "SELECT id, name FROM memories
263 WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
264 ORDER BY deleted_at ASC"
265 } else {
266 "SELECT id, name FROM memories
267 WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
268 ORDER BY deleted_at ASC"
269 };
270
271 let mut stmt = conn.prepare(query)?;
272 let rows = if let Some(name) = name {
273 stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
274 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
275 })?
276 .collect::<Result<Vec<_>, _>>()?
277 } else {
278 stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
279 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
280 })?
281 .collect::<Result<Vec<_>, _>>()?
282 };
283 Ok(rows)
284}
285
286#[cfg(test)]
287mod tests {
288 use super::*;
289 use rusqlite::Connection;
290
291 fn setup_test_db() -> Connection {
292 let conn = Connection::open_in_memory().expect("failed to open in-memory db");
293 conn.execute_batch(
294 "CREATE TABLE memories (
295 id INTEGER PRIMARY KEY AUTOINCREMENT,
296 name TEXT NOT NULL,
297 namespace TEXT NOT NULL DEFAULT 'global',
298 description TEXT,
299 body TEXT,
300 deleted_at INTEGER
301 );
302 CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
303 CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
304 )
305 .expect("failed to create test tables");
306 conn
307 }
308
309 fn insert_deleted_memory(
310 conn: &Connection,
311 name: &str,
312 namespace: &str,
313 body: &str,
314 deleted_at: i64,
315 ) -> i64 {
316 conn.execute(
317 "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
318 rusqlite::params![name, namespace, body, deleted_at],
319 )
320 .expect("failed to insert test memory");
321 conn.last_insert_rowid()
322 }
323
324 #[test]
325 fn retention_days_used_default_is_90() {
326 assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
327 }
328
329 #[test]
330 fn compute_metrics_bytes_freed_positive_for_populated_body() {
331 let conn = setup_test_db();
332 let now = current_epoch().expect("epoch failed");
333 let old_epoch = now - 100 * 86_400;
334 insert_deleted_memory(&conn, "mem-test", "global", "memory body", old_epoch);
335
336 let cutoff = now - 30 * 86_400;
337 let (bytes, oldest, count) =
338 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
339
340 assert!(bytes > 0, "bytes_freed must be > 0 for populated body");
341 assert!(oldest.is_some(), "oldest_deleted_at must be Some");
342 assert_eq!(count, 1);
343 }
344
345 #[test]
346 fn compute_metrics_returns_zero_without_candidates() {
347 let conn = setup_test_db();
348 let now = current_epoch().expect("epoch failed");
349 let cutoff = now - 90 * 86_400;
350
351 let (bytes, oldest, count) =
352 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
353
354 assert_eq!(bytes, 0);
355 assert!(oldest.is_none());
356 assert_eq!(count, 0);
357 }
358
359 #[test]
360 fn dry_run_does_not_delete_records() {
361 let conn = setup_test_db();
362 let now = current_epoch().expect("epoch failed");
363 let old_epoch = now - 200 * 86_400;
364 insert_deleted_memory(&conn, "mem-dry", "global", "dry run content", old_epoch);
365
366 let cutoff = now - 30 * 86_400;
367 let (_, _, count_before) =
368 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
369 assert_eq!(count_before, 1, "must have 1 candidate before dry run");
370
371 let (_, _, count_after) =
372 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
373 assert_eq!(
374 count_after, 1,
375 "dry_run must not remove records: count must remain 1"
376 );
377 }
378
379 #[test]
380 fn oldest_deleted_at_returns_smallest_epoch() {
381 let conn = setup_test_db();
382 let now = current_epoch().expect("epoch failed");
383 let epoch_old = now - 300 * 86_400;
384 let epoch_recent = now - 200 * 86_400;
385
386 insert_deleted_memory(&conn, "mem-a", "global", "body-a", epoch_old);
387 insert_deleted_memory(&conn, "mem-b", "global", "body-b", epoch_recent);
388
389 let cutoff = now - 30 * 86_400;
390 let (_, oldest, count) =
391 compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
392
393 assert_eq!(count, 2);
394 assert_eq!(
395 oldest,
396 Some(epoch_old),
397 "oldest_deleted_at must be the oldest epoch"
398 );
399 }
400
401 #[test]
402 fn purge_args_namespace_accepts_none_without_default() {
403 let resolved = crate::namespace::resolve_namespace(None)
408 .expect("resolve_namespace(None) must return Ok");
409 assert_eq!(
410 resolved, "global",
411 "without env var, resolve_namespace(None) must fall back to 'global'"
412 );
413 }
414
415 #[test]
416 fn purge_response_serializes_all_new_fields() {
417 let resp = PurgeResponse {
418 action: "purged".to_string(),
419 purged_count: 3,
420 bytes_freed: 1024,
421 oldest_deleted_at: Some(1_700_000_000),
422 retention_days_used: 90,
423 dry_run: false,
424 namespace: Some("global".to_string()),
425 cutoff_epoch: 1_710_000_000,
426 warnings: vec![],
427 elapsed_ms: 42,
428 message: None,
429 };
430 let json = serde_json::to_string(&resp).expect("serialization failed");
431 assert!(json.contains("bytes_freed"));
432 assert!(json.contains("oldest_deleted_at"));
433 assert!(json.contains("retention_days_used"));
434 assert!(json.contains("dry_run"));
435 assert!(json.contains("elapsed_ms"));
436 assert!(!json.contains("\"message\""));
438 }
439
440 #[test]
441 fn purge_response_serializes_message_when_present() {
442 let resp = PurgeResponse {
444 action: "purged".to_string(),
445 purged_count: 0,
446 bytes_freed: 0,
447 oldest_deleted_at: None,
448 retention_days_used: 90,
449 dry_run: false,
450 namespace: Some("global".to_string()),
451 cutoff_epoch: 1_710_000_000,
452 warnings: vec![],
453 elapsed_ms: 5,
454 message: Some(
455 "no soft-deleted memories older than 90 day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age"
456 .to_string(),
457 ),
458 };
459 let json = serde_json::to_string(&resp).expect("serialization failed");
460 assert!(json.contains("\"message\""));
461 assert!(json.contains("--retention-days 0"));
462 }
463}