Skip to main content

sqlite_graphrag/commands/
purge.rs

1use crate::errors::AppError;
2use crate::i18n::erros;
3use crate::output;
4use crate::paths::AppPaths;
5use crate::storage::connection::open_rw;
6use serde::Serialize;
7
8#[derive(clap::Args)]
9pub struct PurgeArgs {
10    #[arg(long)]
11    pub name: Option<String>,
12    /// Namespace to purge. Defaults to the contextual namespace (SQLITE_GRAPHRAG_NAMESPACE env var or "global").
13    #[arg(long)]
14    pub namespace: Option<String>,
15    /// Dias de retenção: memórias com deleted_at mais antigo que (now - retention_days*86400) serão
16    /// permanentemente removidas. Default: PURGE_RETENTION_DAYS_DEFAULT (90).
17    #[arg(long, alias = "days", value_name = "DAYS", default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT)]
18    pub retention_days: u32,
19    /// [DEPRECATED em v2.0.0] Alias legado — use --retention-days em vez disso.
20    #[arg(long, hide = true)]
21    pub older_than_seconds: Option<u64>,
22    /// Não executa DELETE: calcula e reporta o que SERIA purgado.
23    #[arg(long, default_value_t = false)]
24    pub dry_run: bool,
25    /// Compatibilidade com ferramentas que passam --yes para confirmar operações destrutivas.
26    #[arg(long, hide = true, default_value_t = false)]
27    pub yes: bool,
28    #[arg(long, help = "No-op; JSON is always emitted on stdout")]
29    pub json: bool,
30    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
31    pub db: Option<String>,
32}
33
34#[derive(Serialize)]
35pub struct PurgeResponse {
36    pub purged_count: usize,
37    pub bytes_freed: i64,
38    pub oldest_deleted_at: Option<i64>,
39    pub retention_days_used: u32,
40    pub dry_run: bool,
41    pub namespace: Option<String>,
42    pub cutoff_epoch: i64,
43    pub warnings: Vec<String>,
44    /// Tempo total de execução em milissegundos desde início do handler até serialização.
45    pub elapsed_ms: u64,
46}
47
48/// Permanently delete soft-deleted memories that have exceeded the retention window.
49///
50/// Only memories with `deleted_at IS NOT NULL AND deleted_at <= cutoff_epoch` are affected.
51/// When `--dry-run` is set the DELETE is skipped and the response reflects candidates only.
52pub fn run(args: PurgeArgs) -> Result<(), AppError> {
53    let inicio = std::time::Instant::now();
54    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
55    let paths = AppPaths::resolve(args.db.as_deref())?;
56
57    if !paths.db.exists() {
58        return Err(AppError::NotFound(erros::banco_nao_encontrado(
59            &paths.db.display().to_string(),
60        )));
61    }
62
63    let mut warnings: Vec<String> = Vec::new();
64    let now = current_epoch()?;
65
66    let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
67        warnings.push(
68            "--older-than-seconds está deprecado; use --retention-days em v2.0.0+".to_string(),
69        );
70        now - secs as i64
71    } else {
72        now - (args.retention_days as i64) * 86_400
73    };
74
75    let namespace_opt: Option<&str> = Some(namespace.as_str());
76
77    let mut conn = open_rw(&paths.db)?;
78
79    let (bytes_freed, oldest_deleted_at, candidates_count) =
80        compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
81
82    if candidates_count == 0 && args.name.is_some() {
83        return Err(AppError::NotFound(
84            erros::memoria_soft_deleted_nao_encontrada(
85                args.name.as_deref().unwrap_or_default(),
86                &namespace,
87            ),
88        ));
89    }
90
91    if !args.dry_run {
92        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
93        execute_purge(
94            &tx,
95            &namespace,
96            args.name.as_deref(),
97            cutoff_epoch,
98            &mut warnings,
99        )?;
100        tx.commit()?;
101    }
102
103    output::emit_json(&PurgeResponse {
104        purged_count: candidates_count,
105        bytes_freed,
106        oldest_deleted_at,
107        retention_days_used: args.retention_days,
108        dry_run: args.dry_run,
109        namespace: Some(namespace),
110        cutoff_epoch,
111        warnings,
112        elapsed_ms: inicio.elapsed().as_millis() as u64,
113    })?;
114
115    Ok(())
116}
117
118fn current_epoch() -> Result<i64, AppError> {
119    let now = std::time::SystemTime::now()
120        .duration_since(std::time::UNIX_EPOCH)
121        .map_err(|err| AppError::Internal(anyhow::anyhow!("erro de relógio do sistema: {err}")))?;
122    Ok(now.as_secs() as i64)
123}
124
125fn compute_metrics(
126    conn: &rusqlite::Connection,
127    cutoff_epoch: i64,
128    namespace_opt: Option<&str>,
129    name: Option<&str>,
130) -> Result<(i64, Option<i64>, usize), AppError> {
131    let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
132        conn.query_row(
133            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
134                    MIN(deleted_at)
135             FROM memories
136             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
137                   AND (?2 IS NULL OR namespace = ?2)
138                   AND name = ?3",
139            rusqlite::params![cutoff_epoch, namespace_opt, name],
140            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
141        )?
142    } else {
143        conn.query_row(
144            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
145                    MIN(deleted_at)
146             FROM memories
147             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
148                   AND (?2 IS NULL OR namespace = ?2)",
149            rusqlite::params![cutoff_epoch, namespace_opt],
150            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
151        )?
152    };
153
154    let count: usize = if let Some(name) = name {
155        conn.query_row(
156            "SELECT COUNT(*) FROM memories
157             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
158                   AND (?2 IS NULL OR namespace = ?2)
159                   AND name = ?3",
160            rusqlite::params![cutoff_epoch, namespace_opt, name],
161            |r| r.get::<_, usize>(0),
162        )?
163    } else {
164        conn.query_row(
165            "SELECT COUNT(*) FROM memories
166             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
167                   AND (?2 IS NULL OR namespace = ?2)",
168            rusqlite::params![cutoff_epoch, namespace_opt],
169            |r| r.get::<_, usize>(0),
170        )?
171    };
172
173    Ok((bytes_freed, oldest_deleted_at, count))
174}
175
176fn execute_purge(
177    tx: &rusqlite::Transaction,
178    namespace: &str,
179    name: Option<&str>,
180    cutoff_epoch: i64,
181    warnings: &mut Vec<String>,
182) -> Result<(), AppError> {
183    let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
184
185    for (memory_id, _name) in &candidates {
186        if let Err(err) = tx.execute(
187            "DELETE FROM vec_chunks WHERE memory_id = ?1",
188            rusqlite::params![memory_id],
189        ) {
190            warnings.push(format!(
191                "falha ao limpar vec_chunks para memory_id {memory_id}: {err}"
192            ));
193        }
194        if let Err(err) = tx.execute(
195            "DELETE FROM vec_memories WHERE memory_id = ?1",
196            rusqlite::params![memory_id],
197        ) {
198            warnings.push(format!(
199                "falha ao limpar vec_memories para memory_id {memory_id}: {err}"
200            ));
201        }
202        tx.execute(
203            "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
204            rusqlite::params![memory_id, namespace],
205        )?;
206    }
207
208    Ok(())
209}
210
211fn select_candidates(
212    conn: &rusqlite::Connection,
213    namespace: &str,
214    name: Option<&str>,
215    cutoff_epoch: i64,
216) -> Result<Vec<(i64, String)>, AppError> {
217    let query = if name.is_some() {
218        "SELECT id, name FROM memories
219         WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
220         ORDER BY deleted_at ASC"
221    } else {
222        "SELECT id, name FROM memories
223         WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
224         ORDER BY deleted_at ASC"
225    };
226
227    let mut stmt = conn.prepare(query)?;
228    let rows = if let Some(name) = name {
229        stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
230            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
231        })?
232        .collect::<Result<Vec<_>, _>>()?
233    } else {
234        stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
235            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
236        })?
237        .collect::<Result<Vec<_>, _>>()?
238    };
239    Ok(rows)
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245    use rusqlite::Connection;
246
247    fn setup_test_db() -> Connection {
248        let conn = Connection::open_in_memory().expect("falha ao abrir banco em memória");
249        conn.execute_batch(
250            "CREATE TABLE memories (
251                id INTEGER PRIMARY KEY AUTOINCREMENT,
252                name TEXT NOT NULL,
253                namespace TEXT NOT NULL DEFAULT 'global',
254                description TEXT,
255                body TEXT,
256                deleted_at INTEGER
257            );
258            CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
259            CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
260        )
261        .expect("falha ao criar tabelas de teste");
262        conn
263    }
264
265    fn insert_deleted_memory(
266        conn: &Connection,
267        name: &str,
268        namespace: &str,
269        body: &str,
270        deleted_at: i64,
271    ) -> i64 {
272        conn.execute(
273            "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
274            rusqlite::params![name, namespace, body, deleted_at],
275        )
276        .expect("falha ao inserir memória de teste");
277        conn.last_insert_rowid()
278    }
279
280    #[test]
281    fn retention_days_used_padrao_eh_90() {
282        assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
283    }
284
285    #[test]
286    fn compute_metrics_bytes_freed_positivo_para_body_populado() {
287        let conn = setup_test_db();
288        let now = current_epoch().expect("epoch falhou");
289        let old_epoch = now - 100 * 86_400;
290        insert_deleted_memory(&conn, "mem-teste", "global", "corpo da memória", old_epoch);
291
292        let cutoff = now - 30 * 86_400;
293        let (bytes, oldest, count) =
294            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
295
296        assert!(bytes > 0, "bytes_freed deve ser > 0 para body populado");
297        assert!(oldest.is_some(), "oldest_deleted_at deve ser Some");
298        assert_eq!(count, 1);
299    }
300
301    #[test]
302    fn compute_metrics_retorna_zero_sem_candidatos() {
303        let conn = setup_test_db();
304        let now = current_epoch().expect("epoch falhou");
305        let cutoff = now - 90 * 86_400;
306
307        let (bytes, oldest, count) =
308            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
309
310        assert_eq!(bytes, 0);
311        assert!(oldest.is_none());
312        assert_eq!(count, 0);
313    }
314
315    #[test]
316    fn dry_run_nao_deleta_registros() {
317        let conn = setup_test_db();
318        let now = current_epoch().expect("epoch falhou");
319        let old_epoch = now - 200 * 86_400;
320        insert_deleted_memory(&conn, "mem-dry", "global", "conteúdo dry run", old_epoch);
321
322        let cutoff = now - 30 * 86_400;
323        let (_, _, count_antes) =
324            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
325        assert_eq!(count_antes, 1, "deve haver 1 candidato antes do dry run");
326
327        let (_, _, count_depois) =
328            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
329        assert_eq!(
330            count_depois, 1,
331            "dry_run não deve remover registros: count deve permanecer 1"
332        );
333    }
334
335    #[test]
336    fn oldest_deleted_at_retorna_menor_epoch() {
337        let conn = setup_test_db();
338        let now = current_epoch().expect("epoch falhou");
339        let epoch_antigo = now - 300 * 86_400;
340        let epoch_recente = now - 200 * 86_400;
341
342        insert_deleted_memory(&conn, "mem-a", "global", "corpo-a", epoch_antigo);
343        insert_deleted_memory(&conn, "mem-b", "global", "corpo-b", epoch_recente);
344
345        let cutoff = now - 30 * 86_400;
346        let (_, oldest, count) =
347            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
348
349        assert_eq!(count, 2);
350        assert_eq!(
351            oldest,
352            Some(epoch_antigo),
353            "oldest_deleted_at deve ser o epoch mais antigo"
354        );
355    }
356
357    #[test]
358    fn purge_args_namespace_aceita_none_sem_default() {
359        // P1-C: namespace deve ser None quando não fornecido, permitindo resolve_namespace
360        // consultar SQLITE_GRAPHRAG_NAMESPACE antes de cair em "global".
361        // O campo era `default_value = "global"` antes de P1-C; com isso removido,
362        // resolve_namespace(None) consulta o env var corretamente.
363        let resolved = crate::namespace::resolve_namespace(None)
364            .expect("resolve_namespace(None) deve retornar Ok");
365        assert_eq!(
366            resolved, "global",
367            "sem env var, resolve_namespace(None) deve cair em 'global'"
368        );
369    }
370
371    #[test]
372    fn purge_response_serializa_todos_campos_novos() {
373        let resp = PurgeResponse {
374            purged_count: 3,
375            bytes_freed: 1024,
376            oldest_deleted_at: Some(1_700_000_000),
377            retention_days_used: 90,
378            dry_run: false,
379            namespace: Some("global".to_string()),
380            cutoff_epoch: 1_710_000_000,
381            warnings: vec![],
382            elapsed_ms: 42,
383        };
384        let json = serde_json::to_string(&resp).expect("serialização falhou");
385        assert!(json.contains("bytes_freed"));
386        assert!(json.contains("oldest_deleted_at"));
387        assert!(json.contains("retention_days_used"));
388        assert!(json.contains("dry_run"));
389        assert!(json.contains("elapsed_ms"));
390    }
391}