Skip to main content

sqlite_graphrag/commands/
purge.rs

1//! Handler for the `purge` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::i18n::errors_msg;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Permanently delete soft-deleted memories older than 90 days (default retention)\n  \
13    sqlite-graphrag purge\n\n  \
14    # Custom retention window in days\n  \
15    sqlite-graphrag purge --retention-days 30\n\n  \
16    # Purge ALL soft-deleted memories regardless of age\n  \
17    sqlite-graphrag purge --retention-days 0\n\n  \
18    # Preview what would be purged without deleting\n  \
19    sqlite-graphrag purge --dry-run\n\n  \
20    # Purge a specific memory by name\n  \
21    sqlite-graphrag purge --name old-memory --namespace my-project\n\n\
22NOTES:\n  \
23    `--yes` only confirms intent and does NOT override `--retention-days`.\n  \
24    To wipe every soft-deleted memory immediately, pair `--yes` with `--retention-days 0`.")]
25pub struct PurgeArgs {
26    #[arg(long)]
27    pub name: Option<String>,
28    /// Namespace to purge. Defaults to the contextual namespace (SQLITE_GRAPHRAG_NAMESPACE env var or "global").
29    #[arg(long)]
30    pub namespace: Option<String>,
31    /// Retention days: memories with deleted_at older than (now - retention_days*86400) will be
32    /// permanently removed. Default: PURGE_RETENTION_DAYS_DEFAULT (90). Use 0 to purge all
33    /// soft-deleted memories regardless of age. Alias: `--max-age-days`.
34    #[arg(
35        long,
36        alias = "days",
37        alias = "max-age-days",
38        value_name = "DAYS",
39        default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT
40    )]
41    pub retention_days: u32,
42    /// [DEPRECATED in v2.0.0] Legacy alias — use --retention-days instead.
43    #[arg(long, hide = true)]
44    pub older_than_seconds: Option<u64>,
45    /// Does not execute DELETE: computes and reports what WOULD be purged.
46    #[arg(long, default_value_t = false)]
47    pub dry_run: bool,
48    /// Confirms destructive intent for tools that require explicit acknowledgement.
49    /// Does NOT override `--retention-days`: combine with `--retention-days 0` to wipe
50    /// every soft-deleted memory regardless of age.
51    #[arg(long, default_value_t = false)]
52    pub yes: bool,
53    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
54    pub json: bool,
55    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
56    pub db: Option<String>,
57}
58
59#[derive(Serialize)]
60pub struct PurgeResponse {
61    pub purged_count: usize,
62    pub bytes_freed: i64,
63    pub oldest_deleted_at: Option<i64>,
64    pub retention_days_used: u32,
65    pub dry_run: bool,
66    pub namespace: Option<String>,
67    pub cutoff_epoch: i64,
68    pub warnings: Vec<String>,
69    /// Total execution time in milliseconds from handler start to serialisation.
70    pub elapsed_ms: u64,
71    /// Human-readable explanation surfaced when nothing was purged so callers
72    /// understand the retention semantics. Present only when
73    /// `purged_count == 0` (M2 in v1.0.32) — kept absent otherwise to preserve
74    /// the existing JSON contract.
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub message: Option<String>,
77}
78
79/// Permanently delete soft-deleted memories that have exceeded the retention window.
80///
81/// Only memories with `deleted_at IS NOT NULL AND deleted_at <= cutoff_epoch` are affected.
82/// When `--dry-run` is set the DELETE is skipped and the response reflects candidates only.
83pub fn run(args: PurgeArgs) -> Result<(), AppError> {
84    let inicio = std::time::Instant::now();
85    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
86    let paths = AppPaths::resolve(args.db.as_deref())?;
87
88    crate::storage::connection::ensure_db_ready(&paths)?;
89
90    let mut warnings: Vec<String> = Vec::with_capacity(1);
91    let now = current_epoch()?;
92
93    let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
94        warnings.push(
95            "--older-than-seconds is deprecated; use --retention-days in v2.0.0+".to_string(),
96        );
97        now - secs as i64
98    } else {
99        now - (args.retention_days as i64) * 86_400
100    };
101
102    let namespace_opt: Option<&str> = Some(namespace.as_str());
103
104    let mut conn = open_rw(&paths.db)?;
105
106    let (bytes_freed, oldest_deleted_at, candidates_count) =
107        compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
108
109    if candidates_count == 0 && args.name.is_some() {
110        return Err(AppError::NotFound(
111            errors_msg::soft_deleted_memory_not_found(
112                args.name.as_deref().unwrap_or_default(),
113                &namespace,
114            ),
115        ));
116    }
117
118    if !args.dry_run {
119        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
120        execute_purge(
121            &tx,
122            &namespace,
123            args.name.as_deref(),
124            cutoff_epoch,
125            &mut warnings,
126        )?;
127        tx.commit()?;
128        conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
129    }
130
131    let message = if candidates_count == 0 {
132        Some(format!(
133            "no soft-deleted memories older than {retention_days} day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age",
134            retention_days = args.retention_days
135        ))
136    } else {
137        None
138    };
139
140    output::emit_json(&PurgeResponse {
141        purged_count: candidates_count,
142        bytes_freed,
143        oldest_deleted_at,
144        retention_days_used: args.retention_days,
145        dry_run: args.dry_run,
146        namespace: Some(namespace),
147        cutoff_epoch,
148        warnings,
149        elapsed_ms: inicio.elapsed().as_millis() as u64,
150        message,
151    })?;
152
153    Ok(())
154}
155
156fn current_epoch() -> Result<i64, AppError> {
157    let now = std::time::SystemTime::now()
158        .duration_since(std::time::UNIX_EPOCH)
159        .map_err(|err| AppError::Internal(anyhow::anyhow!("system clock error: {err}")))?;
160    Ok(now.as_secs() as i64)
161}
162
163fn compute_metrics(
164    conn: &rusqlite::Connection,
165    cutoff_epoch: i64,
166    namespace_opt: Option<&str>,
167    name: Option<&str>,
168) -> Result<(i64, Option<i64>, usize), AppError> {
169    let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
170        conn.query_row(
171            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
172                    MIN(deleted_at)
173             FROM memories
174             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
175                   AND (?2 IS NULL OR namespace = ?2)
176                   AND name = ?3",
177            rusqlite::params![cutoff_epoch, namespace_opt, name],
178            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
179        )?
180    } else {
181        conn.query_row(
182            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
183                    MIN(deleted_at)
184             FROM memories
185             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
186                   AND (?2 IS NULL OR namespace = ?2)",
187            rusqlite::params![cutoff_epoch, namespace_opt],
188            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
189        )?
190    };
191
192    let count: usize = if let Some(name) = name {
193        conn.query_row(
194            "SELECT COUNT(*) FROM memories
195             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
196                   AND (?2 IS NULL OR namespace = ?2)
197                   AND name = ?3",
198            rusqlite::params![cutoff_epoch, namespace_opt, name],
199            |r| r.get::<_, usize>(0),
200        )?
201    } else {
202        conn.query_row(
203            "SELECT COUNT(*) FROM memories
204             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
205                   AND (?2 IS NULL OR namespace = ?2)",
206            rusqlite::params![cutoff_epoch, namespace_opt],
207            |r| r.get::<_, usize>(0),
208        )?
209    };
210
211    Ok((bytes_freed, oldest_deleted_at, count))
212}
213
214fn execute_purge(
215    tx: &rusqlite::Transaction,
216    namespace: &str,
217    name: Option<&str>,
218    cutoff_epoch: i64,
219    warnings: &mut Vec<String>,
220) -> Result<(), AppError> {
221    let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
222
223    for (memory_id, _name) in &candidates {
224        if let Err(err) = tx.execute(
225            "DELETE FROM vec_chunks WHERE memory_id = ?1",
226            rusqlite::params![memory_id],
227        ) {
228            warnings.push(format!(
229                "failed to clean vec_chunks for memory_id {memory_id}: {err}"
230            ));
231        }
232        if let Err(err) = tx.execute(
233            "DELETE FROM vec_memories WHERE memory_id = ?1",
234            rusqlite::params![memory_id],
235        ) {
236            warnings.push(format!(
237                "failed to clean vec_memories for memory_id {memory_id}: {err}"
238            ));
239        }
240        tx.execute(
241            "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
242            rusqlite::params![memory_id, namespace],
243        )?;
244    }
245
246    Ok(())
247}
248
249fn select_candidates(
250    conn: &rusqlite::Connection,
251    namespace: &str,
252    name: Option<&str>,
253    cutoff_epoch: i64,
254) -> Result<Vec<(i64, String)>, AppError> {
255    let query = if name.is_some() {
256        "SELECT id, name FROM memories
257         WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
258         ORDER BY deleted_at ASC"
259    } else {
260        "SELECT id, name FROM memories
261         WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
262         ORDER BY deleted_at ASC"
263    };
264
265    let mut stmt = conn.prepare(query)?;
266    let rows = if let Some(name) = name {
267        stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
268            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
269        })?
270        .collect::<Result<Vec<_>, _>>()?
271    } else {
272        stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
273            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
274        })?
275        .collect::<Result<Vec<_>, _>>()?
276    };
277    Ok(rows)
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283    use rusqlite::Connection;
284
285    fn setup_test_db() -> Connection {
286        let conn = Connection::open_in_memory().expect("failed to open in-memory db");
287        conn.execute_batch(
288            "CREATE TABLE memories (
289                id INTEGER PRIMARY KEY AUTOINCREMENT,
290                name TEXT NOT NULL,
291                namespace TEXT NOT NULL DEFAULT 'global',
292                description TEXT,
293                body TEXT,
294                deleted_at INTEGER
295            );
296            CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
297            CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
298        )
299        .expect("failed to create test tables");
300        conn
301    }
302
303    fn insert_deleted_memory(
304        conn: &Connection,
305        name: &str,
306        namespace: &str,
307        body: &str,
308        deleted_at: i64,
309    ) -> i64 {
310        conn.execute(
311            "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
312            rusqlite::params![name, namespace, body, deleted_at],
313        )
314        .expect("failed to insert test memory");
315        conn.last_insert_rowid()
316    }
317
318    #[test]
319    fn retention_days_used_default_is_90() {
320        assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
321    }
322
323    #[test]
324    fn compute_metrics_bytes_freed_positive_for_populated_body() {
325        let conn = setup_test_db();
326        let now = current_epoch().expect("epoch failed");
327        let old_epoch = now - 100 * 86_400;
328        insert_deleted_memory(&conn, "mem-test", "global", "memory body", old_epoch);
329
330        let cutoff = now - 30 * 86_400;
331        let (bytes, oldest, count) =
332            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
333
334        assert!(bytes > 0, "bytes_freed must be > 0 for populated body");
335        assert!(oldest.is_some(), "oldest_deleted_at must be Some");
336        assert_eq!(count, 1);
337    }
338
339    #[test]
340    fn compute_metrics_returns_zero_without_candidates() {
341        let conn = setup_test_db();
342        let now = current_epoch().expect("epoch failed");
343        let cutoff = now - 90 * 86_400;
344
345        let (bytes, oldest, count) =
346            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
347
348        assert_eq!(bytes, 0);
349        assert!(oldest.is_none());
350        assert_eq!(count, 0);
351    }
352
353    #[test]
354    fn dry_run_does_not_delete_records() {
355        let conn = setup_test_db();
356        let now = current_epoch().expect("epoch failed");
357        let old_epoch = now - 200 * 86_400;
358        insert_deleted_memory(&conn, "mem-dry", "global", "dry run content", old_epoch);
359
360        let cutoff = now - 30 * 86_400;
361        let (_, _, count_before) =
362            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
363        assert_eq!(count_before, 1, "must have 1 candidate before dry run");
364
365        let (_, _, count_after) =
366            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
367        assert_eq!(
368            count_after, 1,
369            "dry_run must not remove records: count must remain 1"
370        );
371    }
372
373    #[test]
374    fn oldest_deleted_at_returns_smallest_epoch() {
375        let conn = setup_test_db();
376        let now = current_epoch().expect("epoch failed");
377        let epoch_old = now - 300 * 86_400;
378        let epoch_recent = now - 200 * 86_400;
379
380        insert_deleted_memory(&conn, "mem-a", "global", "body-a", epoch_old);
381        insert_deleted_memory(&conn, "mem-b", "global", "body-b", epoch_recent);
382
383        let cutoff = now - 30 * 86_400;
384        let (_, oldest, count) =
385            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
386
387        assert_eq!(count, 2);
388        assert_eq!(
389            oldest,
390            Some(epoch_old),
391            "oldest_deleted_at must be the oldest epoch"
392        );
393    }
394
395    #[test]
396    fn purge_args_namespace_accepts_none_without_default() {
397        // P1-C: namespace must be None when not provided, allowing resolve_namespace
398        // to consult SQLITE_GRAPHRAG_NAMESPACE before falling back to "global".
399        // The field was `default_value = "global"` before P1-C; with that removed,
400        // resolve_namespace(None) consults the env var correctly.
401        let resolved = crate::namespace::resolve_namespace(None)
402            .expect("resolve_namespace(None) must return Ok");
403        assert_eq!(
404            resolved, "global",
405            "without env var, resolve_namespace(None) must fall back to 'global'"
406        );
407    }
408
409    #[test]
410    fn purge_response_serializes_all_new_fields() {
411        let resp = PurgeResponse {
412            purged_count: 3,
413            bytes_freed: 1024,
414            oldest_deleted_at: Some(1_700_000_000),
415            retention_days_used: 90,
416            dry_run: false,
417            namespace: Some("global".to_string()),
418            cutoff_epoch: 1_710_000_000,
419            warnings: vec![],
420            elapsed_ms: 42,
421            message: None,
422        };
423        let json = serde_json::to_string(&resp).expect("serialization failed");
424        assert!(json.contains("bytes_freed"));
425        assert!(json.contains("oldest_deleted_at"));
426        assert!(json.contains("retention_days_used"));
427        assert!(json.contains("dry_run"));
428        assert!(json.contains("elapsed_ms"));
429        // M2: when no purge happened, `message` is omitted to keep payloads stable.
430        assert!(!json.contains("\"message\""));
431    }
432
433    #[test]
434    fn purge_response_serializes_message_when_present() {
435        // M2 (v1.0.32): zero purges include a human-readable hint message.
436        let resp = PurgeResponse {
437            purged_count: 0,
438            bytes_freed: 0,
439            oldest_deleted_at: None,
440            retention_days_used: 90,
441            dry_run: false,
442            namespace: Some("global".to_string()),
443            cutoff_epoch: 1_710_000_000,
444            warnings: vec![],
445            elapsed_ms: 5,
446            message: Some(
447                "no soft-deleted memories older than 90 day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age"
448                    .to_string(),
449            ),
450        };
451        let json = serde_json::to_string(&resp).expect("serialization failed");
452        assert!(json.contains("\"message\""));
453        assert!(json.contains("--retention-days 0"));
454    }
455}