Skip to main content

sqlite_graphrag/commands/
purge.rs

1use crate::errors::AppError;
2use crate::i18n::erros;
3use crate::output;
4use crate::paths::AppPaths;
5use crate::storage::connection::open_rw;
6use serde::Serialize;
7
8#[derive(clap::Args)]
9pub struct PurgeArgs {
10    #[arg(long)]
11    pub name: Option<String>,
12    #[arg(long, default_value = "global")]
13    pub namespace: Option<String>,
14    /// Dias de retenção: memórias com deleted_at mais antigo que (now - retention_days*86400) serão
15    /// permanentemente removidas. Default: PURGE_RETENTION_DAYS_DEFAULT (90).
16    #[arg(long, alias = "days", value_name = "DAYS", default_value_t = crate::constants::PURGE_RETENTION_DAYS_DEFAULT)]
17    pub retention_days: u32,
18    /// [DEPRECATED em v2.0.0] Alias legado — use --retention-days em vez disso.
19    #[arg(long, hide = true)]
20    pub older_than_seconds: Option<u64>,
21    /// Não executa DELETE: calcula e reporta o que SERIA purgado.
22    #[arg(long, default_value_t = false)]
23    pub dry_run: bool,
24    /// Compatibilidade com ferramentas que passam --yes para confirmar operações destrutivas.
25    #[arg(long, hide = true, default_value_t = false)]
26    pub yes: bool,
27    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
28    pub json: bool,
29    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
30    pub db: Option<String>,
31}
32
33#[derive(Serialize)]
34pub struct PurgeResponse {
35    pub purged_count: usize,
36    pub bytes_freed: i64,
37    pub oldest_deleted_at: Option<i64>,
38    pub retention_days_used: u32,
39    pub dry_run: bool,
40    pub namespace: Option<String>,
41    pub cutoff_epoch: i64,
42    pub warnings: Vec<String>,
43    /// Tempo total de execução em milissegundos desde início do handler até serialização.
44    pub elapsed_ms: u64,
45}
46
47pub fn run(args: PurgeArgs) -> Result<(), AppError> {
48    let inicio = std::time::Instant::now();
49    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
50    let paths = AppPaths::resolve(args.db.as_deref())?;
51
52    if !paths.db.exists() {
53        return Err(AppError::NotFound(erros::banco_nao_encontrado(
54            &paths.db.display().to_string(),
55        )));
56    }
57
58    let mut warnings: Vec<String> = Vec::new();
59    let now = current_epoch()?;
60
61    let cutoff_epoch = if let Some(secs) = args.older_than_seconds {
62        warnings.push(
63            "--older-than-seconds está deprecado; use --retention-days em v2.0.0+".to_string(),
64        );
65        now - secs as i64
66    } else {
67        now - (args.retention_days as i64) * 86_400
68    };
69
70    let namespace_opt: Option<&str> = Some(namespace.as_str());
71
72    let mut conn = open_rw(&paths.db)?;
73
74    let (bytes_freed, oldest_deleted_at, candidates_count) =
75        compute_metrics(&conn, cutoff_epoch, namespace_opt, args.name.as_deref())?;
76
77    if candidates_count == 0 && args.name.is_some() {
78        return Err(AppError::NotFound(
79            erros::memoria_soft_deleted_nao_encontrada(
80                args.name.as_deref().unwrap_or_default(),
81                &namespace,
82            ),
83        ));
84    }
85
86    if !args.dry_run {
87        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
88        execute_purge(
89            &tx,
90            &namespace,
91            args.name.as_deref(),
92            cutoff_epoch,
93            &mut warnings,
94        )?;
95        tx.commit()?;
96    }
97
98    output::emit_json(&PurgeResponse {
99        purged_count: candidates_count,
100        bytes_freed,
101        oldest_deleted_at,
102        retention_days_used: args.retention_days,
103        dry_run: args.dry_run,
104        namespace: Some(namespace),
105        cutoff_epoch,
106        warnings,
107        elapsed_ms: inicio.elapsed().as_millis() as u64,
108    })?;
109
110    Ok(())
111}
112
113fn current_epoch() -> Result<i64, AppError> {
114    let now = std::time::SystemTime::now()
115        .duration_since(std::time::UNIX_EPOCH)
116        .map_err(|err| AppError::Internal(anyhow::anyhow!("erro de relógio do sistema: {err}")))?;
117    Ok(now.as_secs() as i64)
118}
119
120fn compute_metrics(
121    conn: &rusqlite::Connection,
122    cutoff_epoch: i64,
123    namespace_opt: Option<&str>,
124    name: Option<&str>,
125) -> Result<(i64, Option<i64>, usize), AppError> {
126    let (bytes_freed, oldest_deleted_at): (i64, Option<i64>) = if let Some(name) = name {
127        conn.query_row(
128            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
129                    MIN(deleted_at)
130             FROM memories
131             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
132                   AND (?2 IS NULL OR namespace = ?2)
133                   AND name = ?3",
134            rusqlite::params![cutoff_epoch, namespace_opt, name],
135            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
136        )?
137    } else {
138        conn.query_row(
139            "SELECT COALESCE(SUM(LENGTH(COALESCE(body,'')) + LENGTH(COALESCE(description,'')) + LENGTH(name)), 0),
140                    MIN(deleted_at)
141             FROM memories
142             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
143                   AND (?2 IS NULL OR namespace = ?2)",
144            rusqlite::params![cutoff_epoch, namespace_opt],
145            |r| Ok((r.get::<_, i64>(0)?, r.get::<_, Option<i64>>(1)?)),
146        )?
147    };
148
149    let count: usize = if let Some(name) = name {
150        conn.query_row(
151            "SELECT COUNT(*) FROM memories
152             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
153                   AND (?2 IS NULL OR namespace = ?2)
154                   AND name = ?3",
155            rusqlite::params![cutoff_epoch, namespace_opt, name],
156            |r| r.get::<_, usize>(0),
157        )?
158    } else {
159        conn.query_row(
160            "SELECT COUNT(*) FROM memories
161             WHERE deleted_at IS NOT NULL AND deleted_at <= ?1
162                   AND (?2 IS NULL OR namespace = ?2)",
163            rusqlite::params![cutoff_epoch, namespace_opt],
164            |r| r.get::<_, usize>(0),
165        )?
166    };
167
168    Ok((bytes_freed, oldest_deleted_at, count))
169}
170
171fn execute_purge(
172    tx: &rusqlite::Transaction,
173    namespace: &str,
174    name: Option<&str>,
175    cutoff_epoch: i64,
176    warnings: &mut Vec<String>,
177) -> Result<(), AppError> {
178    let candidates = select_candidates(tx, namespace, name, cutoff_epoch)?;
179
180    for (memory_id, _name) in &candidates {
181        if let Err(err) = tx.execute(
182            "DELETE FROM vec_chunks WHERE memory_id = ?1",
183            rusqlite::params![memory_id],
184        ) {
185            warnings.push(format!(
186                "falha ao limpar vec_chunks para memory_id {memory_id}: {err}"
187            ));
188        }
189        if let Err(err) = tx.execute(
190            "DELETE FROM vec_memories WHERE memory_id = ?1",
191            rusqlite::params![memory_id],
192        ) {
193            warnings.push(format!(
194                "falha ao limpar vec_memories para memory_id {memory_id}: {err}"
195            ));
196        }
197        tx.execute(
198            "DELETE FROM memories WHERE id = ?1 AND namespace = ?2 AND deleted_at IS NOT NULL",
199            rusqlite::params![memory_id, namespace],
200        )?;
201    }
202
203    Ok(())
204}
205
206fn select_candidates(
207    conn: &rusqlite::Connection,
208    namespace: &str,
209    name: Option<&str>,
210    cutoff_epoch: i64,
211) -> Result<Vec<(i64, String)>, AppError> {
212    let query = if name.is_some() {
213        "SELECT id, name FROM memories
214         WHERE namespace = ?1 AND name = ?2 AND deleted_at IS NOT NULL AND deleted_at <= ?3
215         ORDER BY deleted_at ASC"
216    } else {
217        "SELECT id, name FROM memories
218         WHERE namespace = ?1 AND deleted_at IS NOT NULL AND deleted_at <= ?2
219         ORDER BY deleted_at ASC"
220    };
221
222    let mut stmt = conn.prepare(query)?;
223    let rows = if let Some(name) = name {
224        stmt.query_map(rusqlite::params![namespace, name, cutoff_epoch], |row| {
225            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
226        })?
227        .collect::<Result<Vec<_>, _>>()?
228    } else {
229        stmt.query_map(rusqlite::params![namespace, cutoff_epoch], |row| {
230            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
231        })?
232        .collect::<Result<Vec<_>, _>>()?
233    };
234    Ok(rows)
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240    use rusqlite::Connection;
241
242    fn setup_test_db() -> Connection {
243        let conn = Connection::open_in_memory().expect("falha ao abrir banco em memória");
244        conn.execute_batch(
245            "CREATE TABLE memories (
246                id INTEGER PRIMARY KEY AUTOINCREMENT,
247                name TEXT NOT NULL,
248                namespace TEXT NOT NULL DEFAULT 'global',
249                description TEXT,
250                body TEXT,
251                deleted_at INTEGER
252            );
253            CREATE TABLE IF NOT EXISTS vec_chunks (memory_id INTEGER);
254            CREATE TABLE IF NOT EXISTS vec_memories (memory_id INTEGER);",
255        )
256        .expect("falha ao criar tabelas de teste");
257        conn
258    }
259
260    fn insert_deleted_memory(
261        conn: &Connection,
262        name: &str,
263        namespace: &str,
264        body: &str,
265        deleted_at: i64,
266    ) -> i64 {
267        conn.execute(
268            "INSERT INTO memories (name, namespace, body, deleted_at) VALUES (?1, ?2, ?3, ?4)",
269            rusqlite::params![name, namespace, body, deleted_at],
270        )
271        .expect("falha ao inserir memória de teste");
272        conn.last_insert_rowid()
273    }
274
275    #[test]
276    fn retention_days_used_padrao_eh_90() {
277        assert_eq!(crate::constants::PURGE_RETENTION_DAYS_DEFAULT, 90u32);
278    }
279
280    #[test]
281    fn compute_metrics_bytes_freed_positivo_para_body_populado() {
282        let conn = setup_test_db();
283        let now = current_epoch().expect("epoch falhou");
284        let old_epoch = now - 100 * 86_400;
285        insert_deleted_memory(&conn, "mem-teste", "global", "corpo da memória", old_epoch);
286
287        let cutoff = now - 30 * 86_400;
288        let (bytes, oldest, count) =
289            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
290
291        assert!(bytes > 0, "bytes_freed deve ser > 0 para body populado");
292        assert!(oldest.is_some(), "oldest_deleted_at deve ser Some");
293        assert_eq!(count, 1);
294    }
295
296    #[test]
297    fn compute_metrics_retorna_zero_sem_candidatos() {
298        let conn = setup_test_db();
299        let now = current_epoch().expect("epoch falhou");
300        let cutoff = now - 90 * 86_400;
301
302        let (bytes, oldest, count) =
303            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
304
305        assert_eq!(bytes, 0);
306        assert!(oldest.is_none());
307        assert_eq!(count, 0);
308    }
309
310    #[test]
311    fn dry_run_nao_deleta_registros() {
312        let conn = setup_test_db();
313        let now = current_epoch().expect("epoch falhou");
314        let old_epoch = now - 200 * 86_400;
315        insert_deleted_memory(&conn, "mem-dry", "global", "conteúdo dry run", old_epoch);
316
317        let cutoff = now - 30 * 86_400;
318        let (_, _, count_antes) =
319            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
320        assert_eq!(count_antes, 1, "deve haver 1 candidato antes do dry run");
321
322        let (_, _, count_depois) =
323            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
324        assert_eq!(
325            count_depois, 1,
326            "dry_run não deve remover registros: count deve permanecer 1"
327        );
328    }
329
330    #[test]
331    fn oldest_deleted_at_retorna_menor_epoch() {
332        let conn = setup_test_db();
333        let now = current_epoch().expect("epoch falhou");
334        let epoch_antigo = now - 300 * 86_400;
335        let epoch_recente = now - 200 * 86_400;
336
337        insert_deleted_memory(&conn, "mem-a", "global", "corpo-a", epoch_antigo);
338        insert_deleted_memory(&conn, "mem-b", "global", "corpo-b", epoch_recente);
339
340        let cutoff = now - 30 * 86_400;
341        let (_, oldest, count) =
342            compute_metrics(&conn, cutoff, Some("global"), None).expect("compute_metrics falhou");
343
344        assert_eq!(count, 2);
345        assert_eq!(
346            oldest,
347            Some(epoch_antigo),
348            "oldest_deleted_at deve ser o epoch mais antigo"
349        );
350    }
351
352    #[test]
353    fn purge_response_serializa_todos_campos_novos() {
354        let resp = PurgeResponse {
355            purged_count: 3,
356            bytes_freed: 1024,
357            oldest_deleted_at: Some(1_700_000_000),
358            retention_days_used: 90,
359            dry_run: false,
360            namespace: Some("global".to_string()),
361            cutoff_epoch: 1_710_000_000,
362            warnings: vec![],
363            elapsed_ms: 42,
364        };
365        let json = serde_json::to_string(&resp).expect("serialização falhou");
366        assert!(json.contains("bytes_freed"));
367        assert!(json.contains("oldest_deleted_at"));
368        assert!(json.contains("retention_days_used"));
369        assert!(json.contains("dry_run"));
370        assert!(json.contains("elapsed_ms"));
371    }
372}