Skip to main content

sqlite_graphrag/commands/
purge.rs

1//! Handler for the `purge` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::i18n::errors_msg;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Permanently delete soft-deleted memories older than 90 days (default retention)\n  \
13    sqlite-graphrag purge\n\n  \
14    # Custom retention window in days\n  \
15    sqlite-graphrag purge --retention-days 30\n\n  \
16    # Purge ALL soft-deleted memories regardless of age\n  \
17    sqlite-graphrag purge --retention-days 0\n\n  \
18    # Preview what would be purged without deleting\n  \
19    sqlite-graphrag purge --dry-run\n\n  \
20    # Purge a specific memory by name\n  \
21    sqlite-graphrag purge --name old-memory --namespace my-project\n\n\
22NOTES:\n  \
23    `--yes` only confirms intent and does NOT override `--retention-days`.\n  \
24    To wipe every soft-deleted memory immediately, pair `--yes` with `--retention-days 0`.")]
25pub struct PurgeArgs {
26    #[arg(long)]
27    pub name: Option<String>,
28    /// Namespace to purge. Defaults to the contextual namespace (SQLITE_GRAPHRAG_NAMESPACE env var or "global").
29    #[arg(long)]
30    pub namespace: Option<String>,
31    /// Retention days: memories with deleted_at older than (now - retention_days*86400) will be
32    /// permanently removed. Default: PURGE_RETENTION_DAYS_DEFAULT (90). Use 0 to purge all
33    /// soft-deleted memories regardless of age. Alias: `--max-age-days`.
34    #[arg(
35        long,
36        alias = "days",
37        alias = "max-age-days",
38        value_name = "DAYS",
39        default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT
40    )]
41    pub retention_days: u32,
42    /// [DEPRECATED in v2.0.0] Legacy alias — use --retention-days instead.
43    #[arg(long, hide = true)]
44    pub older_than_seconds: Option<u64>,
45    /// Does not execute DELETE: computes and reports what WOULD be purged.
46    #[arg(long, default_value_t = false)]
47    pub dry_run: bool,
48    /// Confirms destructive intent for tools that require explicit acknowledgement.
49    /// Does NOT override `--retention-days`: combine with `--retention-days 0` to wipe
50    /// every soft-deleted memory regardless of age.
51    #[arg(long, default_value_t = false)]
52    pub yes: bool,
53    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
54    pub json: bool,
55    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
56    pub db: Option<String>,
57}
58
59#[derive(Serialize)]
60pub struct PurgeResponse {
61    pub action: String,
62    pub purged_count: usize,
63    pub bytes_freed: i64,
64    pub oldest_deleted_at: Option<i64>,
65    pub retention_days_used: u32,
66    pub dry_run: bool,
67    pub namespace: Option<String>,
68    pub cutoff_epoch: i64,
69    pub warnings: Vec<String>,
70    /// Total execution time in milliseconds from handler start to serialisation.
71    pub elapsed_ms: u64,
72    /// Human-readable explanation surfaced when nothing was purged so callers
73    /// understand the retention semantics. Present only when
74    /// `purged_count == 0` (M2 in v1.0.32) — kept absent otherwise to preserve
75    /// the existing JSON contract.
76    #[serde(skip_serializing_if = "Option::is_none")]
77    pub message: Option<String>,
78}
79
80/// Permanently delete soft-deleted memories that have exceeded the retention window.
81///
82/// Only memories with `deleted_at IS NOT NULL AND deleted_at <= cutoff_epoch` are affected.
83/// When `--dry-run` is set the DELETE is skipped and the response reflects candidates only.
84pub fn run(args: PurgeArgs) -> Result<(), AppError> {
85    let inicio = std::time::Instant::now();
86    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
87    let paths = AppPaths::resolve(args.db.as_deref())?;
88
89    crate::storage::connection::ensure_db_ready(&paths)?;
90
91    let mut warnings: Vec<String> = Vec::with_capacity(1);
92    let now = current_epoch()?;
93
94    let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
95        warnings.push(
96            "--older-than-seconds is deprecated; use --retention-days in v2.0.0+".to_string(),
97        );
98        now - secs as i64
99    } else {
100        now - (args.retention_days as i64) * 86_400
101    };
102
103    let namespace_opt: Option<&str> = Some(namespace.as_str());
104
105    let mut conn = open_rw(&paths.db)?;
106
107    let (bytes_freed, oldest_deleted_at, candidates_count) =
108        compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
109
110    if candidates_count == 0 && args.name.is_some() {
111        return Err(AppError::NotFound(
112            errors_msg::soft_deleted_memory_not_found(
113                args.name.as_deref().unwrap_or_default(),
114                &namespace,
115            ),
116        ));
117    }
118
119    if !args.dry_run && !args.yes {
120        return Err(AppError::Validation(
121            "destructive operation: pass --yes to confirm purge (use --dry-run to preview)"
122                .to_string(),
123        ));
124    }
125
126    if !args.dry_run {
127        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
128        execute_purge(
129            &tx,
130            &paths.db,
131            &namespace,
132            args.name.as_deref(),
133            cutoff_epoch,
134            &mut warnings,
135        )?;
136        tx.commit()?;
137        conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
138    }
139
140    let message = if candidates_count == 0 {
141        Some(format!(
142            "no soft-deleted memories older than {retention_days} day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age",
143            retention_days = args.retention_days
144        ))
145    } else {
146        None
147    };
148
149    output::emit_json(&PurgeResponse {
150        action: if args.dry_run {
151            "dry_run".to_string()
152        } else {
153            "purged".to_string()
154        },
155        purged_count: candidates_count,
156        bytes_freed,
157        oldest_deleted_at,
158        retention_days_used: args.retention_days,
159        dry_run: args.dry_run,
160        namespace: Some(namespace),
161        cutoff_epoch,
162        warnings,
163        elapsed_ms: inicio.elapsed().as_millis() as u64,
164        message,
165    })?;
166
167    Ok(())
168}
169
170fn current_epoch() -> Result<i64, AppError> {
171    let now = std::time::SystemTime::now()
172        .duration_since(std::time::UNIX_EPOCH)
173        .map_err(|err| AppError::Internal(anyhow::anyhow!("system clock error: {err}")))?;
174    Ok(now.as_secs() as i64)
175}
176
177fn compute_metrics(
178    conn: &rusqlite::Connection,
179    cutoff_epoch: i64,
180    namespace_opt: Option<&str>,
181    name: Option<&str>,
182) -> Result<(i64, Option<i64>, usize), AppError> {
183    let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
184        conn.query_row(
185            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
186                    MIN(deleted_at)
187             FROM memories
188             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
189                   AND (?2 IS NULL OR namespace = ?2)
190                   AND name = ?3",
191            rusqlite::params![cutoff_epoch, namespace_opt, name],
192            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
193        )?
194    } else {
195        conn.query_row(
196            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
197                    MIN(deleted_at)
198             FROM memories
199             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
200                   AND (?2 IS NULL OR namespace = ?2)",
201            rusqlite::params![cutoff_epoch, namespace_opt],
202            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
203        )?
204    };
205
206    let count: usize = if let Some(name) = name {
207        conn.query_row(
208            "SELECT COUNT(*) FROM memories
209             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
210                   AND (?2 IS NULL OR namespace = ?2)
211                   AND name = ?3",
212            rusqlite::params![cutoff_epoch, namespace_opt, name],
213            |r| r.get::<_, usize>(0),
214        )?
215    } else {
216        conn.query_row(
217            "SELECT COUNT(*) FROM memories
218             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
219                   AND (?2 IS NULL OR namespace = ?2)",
220            rusqlite::params![cutoff_epoch, namespace_opt],
221            |r| r.get::<_, usize>(0),
222        )?
223    };
224
225    Ok((bytes_freed, oldest_deleted_at, count))
226}
227
228fn execute_purge(
229    tx: &rusqlite::Transaction,
230    db_path: &std::path::Path,
231    namespace: &str,
232    name: Option<&str>,
233    cutoff_epoch: i64,
234    warnings: &mut Vec<String>,
235) -> Result<(), AppError> {
236    let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
237
238    for (memory_id, name) in &candidates {
239        // GAP-SG-13: cascade-clean the enrich-queue sidecar for each purged
240        // memory (best-effort; no-op when the queue file is absent).
241        crate::commands::enrich::cleanup_queue_entry(db_path, *memory_id, name);
242        if let Err(err) = tx.execute(
243            "DELETE FROM vec_chunks WHERE memory_id = ?1",
244            rusqlite::params![memory_id],
245        ) {
246            warnings.push(format!(
247                "failed to clean vec_chunks for memory_id {memory_id}: {err}"
248            ));
249        }
250        if let Err(err) = tx.execute(
251            "DELETE FROM vec_memories WHERE memory_id = ?1",
252            rusqlite::params![memory_id],
253        ) {
254            warnings.push(format!(
255                "failed to clean vec_memories for memory_id {memory_id}: {err}"
256            ));
257        }
258        tx.execute(
259            "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
260            rusqlite::params![memory_id, namespace],
261        )?;
262    }
263
264    Ok(())
265}
266
267fn select_candidates(
268    conn: &rusqlite::Connection,
269    namespace: &str,
270    name: Option<&str>,
271    cutoff_epoch: i64,
272) -> Result<Vec<(i64, String)>, AppError> {
273    let query = if name.is_some() {
274        "SELECT id, name FROM memories
275         WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
276         ORDER BY deleted_at ASC"
277    } else {
278        "SELECT id, name FROM memories
279         WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
280         ORDER BY deleted_at ASC"
281    };
282
283    let mut stmt = conn.prepare_cached(query)?;
284    let rows = if let Some(name) = name {
285        stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
286            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
287        })?
288        .collect::<Result<Vec<_>, _>>()?
289    } else {
290        stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
291            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
292        })?
293        .collect::<Result<Vec<_>, _>>()?
294    };
295    Ok(rows)
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use rusqlite::Connection;
302
303    fn setup_test_db() -> Connection {
304        let conn = Connection::open_in_memory().expect("failed to open in-memory db");
305        conn.execute_batch(
306            "CREATE TABLE memories (
307                id INTEGER PRIMARY KEY AUTOINCREMENT,
308                name TEXT NOT NULL,
309                namespace TEXT NOT NULL DEFAULT 'global',
310                description TEXT,
311                body TEXT,
312                deleted_at INTEGER
313            );
314            CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
315            CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
316        )
317        .expect("failed to create test tables");
318        conn
319    }
320
321    fn insert_deleted_memory(
322        conn: &Connection,
323        name: &str,
324        namespace: &str,
325        body: &str,
326        deleted_at: i64,
327    ) -> i64 {
328        conn.execute(
329            "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
330            rusqlite::params![name, namespace, body, deleted_at],
331        )
332        .expect("failed to insert test memory");
333        conn.last_insert_rowid()
334    }
335
336    #[test]
337    fn retention_days_used_default_is_90() {
338        assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
339    }
340
341    #[test]
342    fn compute_metrics_bytes_freed_positive_for_populated_body() {
343        let conn = setup_test_db();
344        let now = current_epoch().expect("epoch failed");
345        let old_epoch = now - 100 * 86_400;
346        insert_deleted_memory(&conn, "mem-test", "global", "memory body", old_epoch);
347
348        let cutoff = now - 30 * 86_400;
349        let (bytes, oldest, count) =
350            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
351
352        assert!(bytes > 0, "bytes_freed must be > 0 for populated body");
353        assert!(oldest.is_some(), "oldest_deleted_at must be Some");
354        assert_eq!(count, 1);
355    }
356
357    #[test]
358    fn compute_metrics_returns_zero_without_candidates() {
359        let conn = setup_test_db();
360        let now = current_epoch().expect("epoch failed");
361        let cutoff = now - 90 * 86_400;
362
363        let (bytes, oldest, count) =
364            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
365
366        assert_eq!(bytes, 0);
367        assert!(oldest.is_none());
368        assert_eq!(count, 0);
369    }
370
371    #[test]
372    fn dry_run_does_not_delete_records() {
373        let conn = setup_test_db();
374        let now = current_epoch().expect("epoch failed");
375        let old_epoch = now - 200 * 86_400;
376        insert_deleted_memory(&conn, "mem-dry", "global", "dry run content", old_epoch);
377
378        let cutoff = now - 30 * 86_400;
379        let (_, _, count_before) =
380            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
381        assert_eq!(count_before, 1, "must have 1 candidate before dry run");
382
383        let (_, _, count_after) =
384            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
385        assert_eq!(
386            count_after, 1,
387            "dry_run must not remove records: count must remain 1"
388        );
389    }
390
391    #[test]
392    fn oldest_deleted_at_returns_smallest_epoch() {
393        let conn = setup_test_db();
394        let now = current_epoch().expect("epoch failed");
395        let epoch_old = now - 300 * 86_400;
396        let epoch_recent = now - 200 * 86_400;
397
398        insert_deleted_memory(&conn, "mem-a", "global", "body-a", epoch_old);
399        insert_deleted_memory(&conn, "mem-b", "global", "body-b", epoch_recent);
400
401        let cutoff = now - 30 * 86_400;
402        let (_, oldest, count) =
403            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics failed");
404
405        assert_eq!(count, 2);
406        assert_eq!(
407            oldest,
408            Some(epoch_old),
409            "oldest_deleted_at must be the oldest epoch"
410        );
411    }
412
413    #[test]
414    fn purge_args_namespace_accepts_none_without_default() {
415        // P1-C: namespace must be None when not provided, allowing resolve_namespace
416        // to consult SQLITE_GRAPHRAG_NAMESPACE before falling back to "global".
417        // The field was `default_value = "global"` before P1-C; with that removed,
418        // resolve_namespace(None) consults the env var correctly.
419        let resolved = crate::namespace::resolve_namespace(None)
420            .expect("resolve_namespace(None) must return Ok");
421        assert_eq!(
422            resolved, "global",
423            "without env var, resolve_namespace(None) must fall back to 'global'"
424        );
425    }
426
427    #[test]
428    fn purge_response_serializes_all_new_fields() {
429        let resp = PurgeResponse {
430            action: "purged".to_string(),
431            purged_count: 3,
432            bytes_freed: 1024,
433            oldest_deleted_at: Some(1_700_000_000),
434            retention_days_used: 90,
435            dry_run: false,
436            namespace: Some("global".to_string()),
437            cutoff_epoch: 1_710_000_000,
438            warnings: vec![],
439            elapsed_ms: 42,
440            message: None,
441        };
442        let json = serde_json::to_string(&resp).expect("serialization failed");
443        assert!(json.contains("bytes_freed"));
444        assert!(json.contains("oldest_deleted_at"));
445        assert!(json.contains("retention_days_used"));
446        assert!(json.contains("dry_run"));
447        assert!(json.contains("elapsed_ms"));
448        // M2: when no purge happened, `message` is omitted to keep payloads stable.
449        assert!(!json.contains("\"message\""));
450    }
451
452    #[test]
453    fn purge_response_serializes_message_when_present() {
454        // M2 (v1.0.32): zero purges include a human-readable hint message.
455        let resp = PurgeResponse {
456            action: "purged".to_string(),
457            purged_count: 0,
458            bytes_freed: 0,
459            oldest_deleted_at: None,
460            retention_days_used: 90,
461            dry_run: false,
462            namespace: Some("global".to_string()),
463            cutoff_epoch: 1_710_000_000,
464            warnings: vec![],
465            elapsed_ms: 5,
466            message: Some(
467                "no soft-deleted memories older than 90 day(s); use --retention-days 0 to purge all soft-deleted memories regardless of age"
468                    .to_string(),
469            ),
470        };
471        let json = serde_json::to_string(&resp).expect("serialization failed");
472        assert!(json.contains("\"message\""));
473        assert!(json.contains("--retention-days 0"));
474    }
475}