Skip to main content

sqlite_graphrag/commands/
cleanup_orphans.rs

1//! Handler for the `cleanup-orphans` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output::{self, OutputFormat};
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use crate::storage::entities;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Remove orphan entities (no memories, no relationships) from the global namespace\n  \
13    sqlite-graphrag cleanup-orphans\n\n  \
14    # Preview which entities would be removed without deleting\n  \
15    sqlite-graphrag cleanup-orphans --dry-run\n\n  \
16    # Cleanup within a specific namespace\n  \
17    sqlite-graphrag cleanup-orphans --namespace my-project --yes")]
18pub struct CleanupOrphansArgs {
19    #[arg(long)]
20    pub namespace: Option<String>,
21    #[arg(long)]
22    pub dry_run: bool,
23    #[arg(long)]
24    pub yes: bool,
25    #[arg(long, value_enum, default_value = "json")]
26    pub format: OutputFormat,
27    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
28    pub json: bool,
29    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
30    pub db: Option<String>,
31}
32
33#[derive(Serialize)]
34struct CleanupResponse {
35    orphan_count: usize,
36    deleted: usize,
37    dry_run: bool,
38    namespace: Option<String>,
39    /// Total execution time in milliseconds from handler start to serialisation.
40    elapsed_ms: u64,
41}
42
43pub fn run(args: CleanupOrphansArgs) -> Result<(), AppError> {
44    let inicio = std::time::Instant::now();
45    let paths = AppPaths::resolve(args.db.as_deref())?;
46
47    crate::storage::connection::ensure_db_ready(&paths)?;
48
49    let mut conn = open_rw(&paths.db)?;
50
51    let orphan_ids = entities::find_orphan_entity_ids(&conn, args.namespace.as_deref())?;
52    let orphan_count = orphan_ids.len();
53
54    let deleted = if args.dry_run {
55        0
56    } else {
57        if orphan_count > 0 && !args.yes {
58            output::emit_progress(&format!(
59                "removing {orphan_count} orphan entities (use --yes to skip this notice)"
60            ));
61        }
62        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
63        let removed = entities::delete_entities_by_ids(&tx, &orphan_ids)?;
64        tx.commit()?;
65        removed
66    };
67
68    let response = CleanupResponse {
69        orphan_count,
70        deleted,
71        dry_run: args.dry_run,
72        namespace: args.namespace.clone(),
73        elapsed_ms: inicio.elapsed().as_millis() as u64,
74    };
75
76    match args.format {
77        OutputFormat::Json => output::emit_json(&response)?,
78        OutputFormat::Text | OutputFormat::Markdown => {
79            let ns = response.namespace.as_deref().unwrap_or("<all>");
80            output::emit_text(&format!(
81                "orphans: {} found, {} deleted (dry_run={}) [{}]",
82                response.orphan_count, response.deleted, response.dry_run, ns
83            ));
84        }
85    }
86
87    Ok(())
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn cleanup_response_serializes_dry_run_true() {
96        let resp = CleanupResponse {
97            orphan_count: 5,
98            deleted: 0,
99            dry_run: true,
100            namespace: Some("global".to_string()),
101            elapsed_ms: 12,
102        };
103        let json = serde_json::to_value(&resp).expect("serialization failed");
104        assert_eq!(json["orphan_count"], 5);
105        assert_eq!(json["deleted"], 0);
106        assert_eq!(json["dry_run"], true);
107        assert_eq!(json["namespace"], "global");
108        assert!(json["elapsed_ms"].is_number());
109    }
110
111    #[test]
112    fn cleanup_response_deleted_zero_when_dry_run() {
113        let resp = CleanupResponse {
114            orphan_count: 10,
115            deleted: 0,
116            dry_run: true,
117            namespace: None,
118            elapsed_ms: 5,
119        };
120        assert_eq!(resp.deleted, 0, "dry_run must keep deleted at 0");
121        assert_eq!(resp.orphan_count, 10);
122    }
123
124    #[test]
125    fn cleanup_response_namespace_none_serializes_null() {
126        let resp = CleanupResponse {
127            orphan_count: 0,
128            deleted: 0,
129            dry_run: false,
130            namespace: None,
131            elapsed_ms: 1,
132        };
133        let json = serde_json::to_value(&resp).expect("serialization failed");
134        assert!(
135            json["namespace"].is_null(),
136            "namespace None must serialize as null"
137        );
138    }
139
140    #[test]
141    fn cleanup_response_deleted_equals_orphan_count_when_executed() {
142        let resp = CleanupResponse {
143            orphan_count: 3,
144            deleted: 3,
145            dry_run: false,
146            namespace: Some("projeto".to_string()),
147            elapsed_ms: 20,
148        };
149        assert_eq!(
150            resp.deleted, resp.orphan_count,
151            "when running without dry_run, deleted must equal orphan_count"
152        );
153    }
154}