Skip to main content

sqlite_graphrag/commands/
cleanup_orphans.rs

1//! Handler for the `cleanup-orphans` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output::{self, OutputFormat};
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use crate::storage::entities;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Remove orphan entities (no memories, no relationships) from the global namespace\n  \
13    sqlite-graphrag cleanup-orphans\n\n  \
14    # Preview which entities would be removed without deleting\n  \
15    sqlite-graphrag cleanup-orphans --dry-run\n\n  \
16    # Cleanup within a specific namespace\n  \
17    sqlite-graphrag cleanup-orphans --namespace my-project --yes")]
18pub struct CleanupOrphansArgs {
19    #[arg(long)]
20    pub namespace: Option<String>,
21    #[arg(long)]
22    pub dry_run: bool,
23    #[arg(long)]
24    pub yes: bool,
25    #[arg(long, value_enum, default_value = "json")]
26    pub format: OutputFormat,
27    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
28    pub json: bool,
29    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
30    pub db: Option<String>,
31}
32
33#[derive(Serialize)]
34struct CleanupResponse {
35    orphan_count: usize,
36    deleted: usize,
37    dry_run: bool,
38    namespace: Option<String>,
39    /// Total execution time in milliseconds from handler start to serialisation.
40    elapsed_ms: u64,
41}
42
43pub fn run(args: CleanupOrphansArgs) -> Result<(), AppError> {
44    let inicio = std::time::Instant::now();
45    let paths = AppPaths::resolve(args.db.as_deref())?;
46
47    crate::storage::connection::ensure_db_ready(&paths)?;
48
49    let mut conn = open_rw(&paths.db)?;
50
51    let orphan_ids = entities::find_orphan_entity_ids(&conn, args.namespace.as_deref())?;
52    let orphan_count = orphan_ids.len();
53
54    let deleted = if args.dry_run {
55        0
56    } else {
57        if orphan_count > 0 && !args.yes {
58            output::emit_progress(&format!(
59                "removing {orphan_count} orphan entities (use --yes to skip this notice)"
60            ));
61        }
62        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
63        let removed = entities::delete_entities_by_ids(&tx, &orphan_ids)?;
64        tx.commit()?;
65        conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
66        removed
67    };
68
69    let response = CleanupResponse {
70        orphan_count,
71        deleted,
72        dry_run: args.dry_run,
73        namespace: args.namespace.clone(),
74        elapsed_ms: inicio.elapsed().as_millis() as u64,
75    };
76
77    match args.format {
78        OutputFormat::Json => output::emit_json(&response)?,
79        OutputFormat::Text | OutputFormat::Markdown => {
80            let ns = response.namespace.as_deref().unwrap_or("<all>");
81            output::emit_text(&format!(
82                "orphans: {} found, {} deleted (dry_run={}) [{}]",
83                response.orphan_count, response.deleted, response.dry_run, ns
84            ));
85        }
86    }
87
88    Ok(())
89}
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94
95    #[test]
96    fn cleanup_response_serializes_dry_run_true() {
97        let resp = CleanupResponse {
98            orphan_count: 5,
99            deleted: 0,
100            dry_run: true,
101            namespace: Some("global".to_string()),
102            elapsed_ms: 12,
103        };
104        let json = serde_json::to_value(&resp).expect("serialization failed");
105        assert_eq!(json["orphan_count"], 5);
106        assert_eq!(json["deleted"], 0);
107        assert_eq!(json["dry_run"], true);
108        assert_eq!(json["namespace"], "global");
109        assert!(json["elapsed_ms"].is_number());
110    }
111
112    #[test]
113    fn cleanup_response_deleted_zero_when_dry_run() {
114        let resp = CleanupResponse {
115            orphan_count: 10,
116            deleted: 0,
117            dry_run: true,
118            namespace: None,
119            elapsed_ms: 5,
120        };
121        assert_eq!(resp.deleted, 0, "dry_run must keep deleted at 0");
122        assert_eq!(resp.orphan_count, 10);
123    }
124
125    #[test]
126    fn cleanup_response_namespace_none_serializes_null() {
127        let resp = CleanupResponse {
128            orphan_count: 0,
129            deleted: 0,
130            dry_run: false,
131            namespace: None,
132            elapsed_ms: 1,
133        };
134        let json = serde_json::to_value(&resp).expect("serialization failed");
135        assert!(
136            json["namespace"].is_null(),
137            "namespace None must serialize as null"
138        );
139    }
140
141    #[test]
142    fn cleanup_response_deleted_equals_orphan_count_when_executed() {
143        let resp = CleanupResponse {
144            orphan_count: 3,
145            deleted: 3,
146            dry_run: false,
147            namespace: Some("projeto".to_string()),
148            elapsed_ms: 20,
149        };
150        assert_eq!(
151            resp.deleted, resp.orphan_count,
152            "when running without dry_run, deleted must equal orphan_count"
153        );
154    }
155}