Skip to main content

sqlite_graphrag/commands/
purge.rs

1//! Handler for the `purge` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::i18n::errors_msg;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11pub struct PurgeArgs {
12    #[arg(long)]
13    pub name: Option<String>,
14    /// Namespace to purge. Defaults to the contextual namespace (SQLITE_GRAPHRAG_NAMESPACE env var or "global").
15    #[arg(long)]
16    pub namespace: Option<String>,
17    /// Retention days: memories with deleted_at older than (now - retention_days*86400) will be
18    /// permanently removed. Default: PURGE_RETENTION_DAYS_DEFAULT (90).
19    #[arg(long, alias = "days", value_name = "DAYS", default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT)]
20    pub retention_days: u32,
21    /// [DEPRECATED em v2.0.0] Alias legado — use --retention-days em vez disso.
22    #[arg(long, hide = true)]
23    pub older_than_seconds: Option<u64>,
24    /// Does not execute DELETE: computes and reports what WOULD be purged.
25    #[arg(long, default_value_t = false)]
26    pub dry_run: bool,
27    /// Compatibility with tools that pass --yes to confirm destructive operations.
28    #[arg(long, hide = true, default_value_t = false)]
29    pub yes: bool,
30    #[arg(long, help = "No-op; JSON is always emitted on stdout")]
31    pub json: bool,
32    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
33    pub db: Option<String>,
34}
35
36#[derive(Serialize)]
37pub struct PurgeResponse {
38    pub purged_count: usize,
39    pub bytes_freed: i64,
40    pub oldest_deleted_at: Option<i64>,
41    pub retention_days_used: u32,
42    pub dry_run: bool,
43    pub namespace: Option<String>,
44    pub cutoff_epoch: i64,
45    pub warnings: Vec<String>,
46    /// Total execution time in milliseconds from handler start to serialisation.
47    pub elapsed_ms: u64,
48}
49
50/// Permanently delete soft-deleted memories that have exceeded the retention window.
51///
52/// Only memories with `deleted_at IS NOT NULL AND deleted_at <= cutoff_epoch` are affected.
53/// When `--dry-run` is set the DELETE is skipped and the response reflects candidates only.
54pub fn run(args: PurgeArgs) -> Result<(), AppError> {
55    let inicio = std::time::Instant::now();
56    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
57    let paths = AppPaths::resolve(args.db.as_deref())?;
58
59    if !paths.db.exists() {
60        return Err(AppError::NotFound(errors_msg::database_not_found(
61            &paths.db.display().to_string(),
62        )));
63    }
64
65    let mut warnings: Vec<String> = Vec::new();
66    let now = current_epoch()?;
67
68    let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
69        warnings.push(
70            "--older-than-seconds está deprecado; use --retention-days em v2.0.0+".to_string(),
71        );
72        now - secs as i64
73    } else {
74        now - (args.retention_days as i64) * 86_400
75    };
76
77    let namespace_opt: Option<&str> = Some(namespace.as_str());
78
79    let mut conn = open_rw(&paths.db)?;
80
81    let (bytes_freed, oldest_deleted_at, candidates_count) =
82        compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
83
84    if candidates_count == 0 && args.name.is_some() {
85        return Err(AppError::NotFound(
86            errors_msg::soft_deleted_memory_not_found(
87                args.name.as_deref().unwrap_or_default(),
88                &namespace,
89            ),
90        ));
91    }
92
93    if !args.dry_run {
94        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
95        execute_purge(
96            &tx,
97            &namespace,
98            args.name.as_deref(),
99            cutoff_epoch,
100            &mut warnings,
101        )?;
102        tx.commit()?;
103    }
104
105    output::emit_json(&PurgeResponse {
106        purged_count: candidates_count,
107        bytes_freed,
108        oldest_deleted_at,
109        retention_days_used: args.retention_days,
110        dry_run: args.dry_run,
111        namespace: Some(namespace),
112        cutoff_epoch,
113        warnings,
114        elapsed_ms: inicio.elapsed().as_millis() as u64,
115    })?;
116
117    Ok(())
118}
119
120fn current_epoch() -> Result<i64, AppError> {
121    let now = std::time::SystemTime::now()
122        .duration_since(std::time::UNIX_EPOCH)
123        .map_err(|err| AppError::Internal(anyhow::anyhow!("erro de relógio do sistema: {err}")))?;
124    Ok(now.as_secs() as i64)
125}
126
127fn compute_metrics(
128    conn: &rusqlite::Connection,
129    cutoff_epoch: i64,
130    namespace_opt: Option<&str>,
131    name: Option<&str>,
132) -> Result<(i64, Option<i64>, usize), AppError> {
133    let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
134        conn.query_row(
135            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
136                    MIN(deleted_at)
137             FROM memories
138             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
139                   AND (?2 IS NULL OR namespace = ?2)
140                   AND name = ?3",
141            rusqlite::params![cutoff_epoch, namespace_opt, name],
142            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
143        )?
144    } else {
145        conn.query_row(
146            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
147                    MIN(deleted_at)
148             FROM memories
149             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
150                   AND (?2 IS NULL OR namespace = ?2)",
151            rusqlite::params![cutoff_epoch, namespace_opt],
152            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
153        )?
154    };
155
156    let count: usize = if let Some(name) = name {
157        conn.query_row(
158            "SELECT COUNT(*) FROM memories
159             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
160                   AND (?2 IS NULL OR namespace = ?2)
161                   AND name = ?3",
162            rusqlite::params![cutoff_epoch, namespace_opt, name],
163            |r| r.get::<_, usize>(0),
164        )?
165    } else {
166        conn.query_row(
167            "SELECT COUNT(*) FROM memories
168             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
169                   AND (?2 IS NULL OR namespace = ?2)",
170            rusqlite::params![cutoff_epoch, namespace_opt],
171            |r| r.get::<_, usize>(0),
172        )?
173    };
174
175    Ok((bytes_freed, oldest_deleted_at, count))
176}
177
178fn execute_purge(
179    tx: &rusqlite::Transaction,
180    namespace: &str,
181    name: Option<&str>,
182    cutoff_epoch: i64,
183    warnings: &mut Vec<String>,
184) -> Result<(), AppError> {
185    let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
186
187    for (memory_id, _name) in &candidates {
188        if let Err(err) = tx.execute(
189            "DELETE FROM vec_chunks WHERE memory_id = ?1",
190            rusqlite::params![memory_id],
191        ) {
192            warnings.push(format!(
193                "falha ao limpar vec_chunks para memory_id {memory_id}: {err}"
194            ));
195        }
196        if let Err(err) = tx.execute(
197            "DELETE FROM vec_memories WHERE memory_id = ?1",
198            rusqlite::params![memory_id],
199        ) {
200            warnings.push(format!(
201                "falha ao limpar vec_memories para memory_id {memory_id}: {err}"
202            ));
203        }
204        tx.execute(
205            "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
206            rusqlite::params![memory_id, namespace],
207        )?;
208    }
209
210    Ok(())
211}
212
213fn select_candidates(
214    conn: &rusqlite::Connection,
215    namespace: &str,
216    name: Option<&str>,
217    cutoff_epoch: i64,
218) -> Result<Vec<(i64, String)>, AppError> {
219    let query = if name.is_some() {
220        "SELECT id, name FROM memories
221         WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
222         ORDER BY deleted_at ASC"
223    } else {
224        "SELECT id, name FROM memories
225         WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
226         ORDER BY deleted_at ASC"
227    };
228
229    let mut stmt = conn.prepare(query)?;
230    let rows = if let Some(name) = name {
231        stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
232            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
233        })?
234        .collect::<Result<Vec<_>, _>>()?
235    } else {
236        stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
237            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
238        })?
239        .collect::<Result<Vec<_>, _>>()?
240    };
241    Ok(rows)
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use rusqlite::Connection;
248
249    fn setup_test_db() -> Connection {
250        let conn = Connection::open_in_memory().expect("falha ao abrir banco em memória");
251        conn.execute_batch(
252            "CREATE TABLE memories (
253                id INTEGER PRIMARY KEY AUTOINCREMENT,
254                name TEXT NOT NULL,
255                namespace TEXT NOT NULL DEFAULT 'global',
256                description TEXT,
257                body TEXT,
258                deleted_at INTEGER
259            );
260            CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
261            CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
262        )
263        .expect("falha ao criar tabelas de teste");
264        conn
265    }
266
267    fn insert_deleted_memory(
268        conn: &Connection,
269        name: &str,
270        namespace: &str,
271        body: &str,
272        deleted_at: i64,
273    ) -> i64 {
274        conn.execute(
275            "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
276            rusqlite::params![name, namespace, body, deleted_at],
277        )
278        .expect("falha ao inserir memória de teste");
279        conn.last_insert_rowid()
280    }
281
282    #[test]
283    fn retention_days_used_padrao_eh_90() {
284        assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
285    }
286
287    #[test]
288    fn compute_metrics_bytes_freed_positivo_para_body_populado() {
289        let conn = setup_test_db();
290        let now = current_epoch().expect("epoch falhou");
291        let old_epoch = now - 100 * 86_400;
292        insert_deleted_memory(&conn, "mem-teste", "global", "corpo da memória", old_epoch);
293
294        let cutoff = now - 30 * 86_400;
295        let (bytes, oldest, count) =
296            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
297
298        assert!(bytes > 0, "bytes_freed deve ser > 0 para body populado");
299        assert!(oldest.is_some(), "oldest_deleted_at deve ser Some");
300        assert_eq!(count, 1);
301    }
302
303    #[test]
304    fn compute_metrics_retorna_zero_sem_candidatos() {
305        let conn = setup_test_db();
306        let now = current_epoch().expect("epoch falhou");
307        let cutoff = now - 90 * 86_400;
308
309        let (bytes, oldest, count) =
310            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
311
312        assert_eq!(bytes, 0);
313        assert!(oldest.is_none());
314        assert_eq!(count, 0);
315    }
316
317    #[test]
318    fn dry_run_nao_deleta_registros() {
319        let conn = setup_test_db();
320        let now = current_epoch().expect("epoch falhou");
321        let old_epoch = now - 200 * 86_400;
322        insert_deleted_memory(&conn, "mem-dry", "global", "conteúdo dry run", old_epoch);
323
324        let cutoff = now - 30 * 86_400;
325        let (_, _, count_antes) =
326            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
327        assert_eq!(count_antes, 1, "deve haver 1 candidato antes do dry run");
328
329        let (_, _, count_depois) =
330            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
331        assert_eq!(
332            count_depois, 1,
333            "dry_run não deve remover registros: count deve permanecer 1"
334        );
335    }
336
337    #[test]
338    fn oldest_deleted_at_retorna_menor_epoch() {
339        let conn = setup_test_db();
340        let now = current_epoch().expect("epoch falhou");
341        let epoch_antigo = now - 300 * 86_400;
342        let epoch_recente = now - 200 * 86_400;
343
344        insert_deleted_memory(&conn, "mem-a", "global", "corpo-a", epoch_antigo);
345        insert_deleted_memory(&conn, "mem-b", "global", "corpo-b", epoch_recente);
346
347        let cutoff = now - 30 * 86_400;
348        let (_, oldest, count) =
349            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
350
351        assert_eq!(count, 2);
352        assert_eq!(
353            oldest,
354            Some(epoch_antigo),
355            "oldest_deleted_at deve ser o epoch mais antigo"
356        );
357    }
358
359    #[test]
360    fn purge_args_namespace_aceita_none_sem_default() {
361        // P1-C: namespace deve ser None quando não fornecido, permitindo resolve_namespace
362        // consultar SQLITE_GRAPHRAG_NAMESPACE antes de cair em "global".
363        // O campo era `default_value = "global"` antes de P1-C; com isso removido,
364        // resolve_namespace(None) consulta o env var corretamente.
365        let resolved = crate::namespace::resolve_namespace(None)
366            .expect("resolve_namespace(None) deve retornar Ok");
367        assert_eq!(
368            resolved, "global",
369            "sem env var, resolve_namespace(None) deve cair em 'global'"
370        );
371    }
372
373    #[test]
374    fn purge_response_serializa_todos_campos_novos() {
375        let resp = PurgeResponse {
376            purged_count: 3,
377            bytes_freed: 1024,
378            oldest_deleted_at: Some(1_700_000_000),
379            retention_days_used: 90,
380            dry_run: false,
381            namespace: Some("global".to_string()),
382            cutoff_epoch: 1_710_000_000,
383            warnings: vec![],
384            elapsed_ms: 42,
385        };
386        let json = serde_json::to_string(&resp).expect("serialização falhou");
387        assert!(json.contains("bytes_freed"));
388        assert!(json.contains("oldest_deleted_at"));
389        assert!(json.contains("retention_days_used"));
390        assert!(json.contains("dry_run"));
391        assert!(json.contains("elapsed_ms"));
392    }
393}