sqlite_graphrag/commands/
cleanup_orphans.rs1use crate::errors::AppError;
4use crate::output::{self, OutputFormat};
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use crate::storage::entities;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n \
12 # Remove orphan entities (no memories, no relationships) from the global namespace\n \
13 sqlite-graphrag cleanup-orphans\n\n \
14 # Preview which entities would be removed without deleting\n \
15 sqlite-graphrag cleanup-orphans --dry-run\n\n \
16 # Cleanup within a specific namespace\n \
17 sqlite-graphrag cleanup-orphans --namespace my-project --yes")]
18pub struct CleanupOrphansArgs {
19 #[arg(long)]
20 pub namespace: Option<String>,
21 #[arg(long)]
22 pub dry_run: bool,
23 #[arg(long)]
24 pub yes: bool,
25 #[arg(long, value_enum, default_value = "json")]
26 pub format: OutputFormat,
27 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
28 pub json: bool,
29 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
30 pub db: Option<String>,
31}
32
33#[derive(Serialize)]
34struct CleanupResponse {
35 orphan_count: usize,
36 deleted: usize,
37 dry_run: bool,
38 namespace: Option<String>,
39 elapsed_ms: u64,
41}
42
43pub fn run(args: CleanupOrphansArgs) -> Result<(), AppError> {
44 let inicio = std::time::Instant::now();
45 let paths = AppPaths::resolve(args.db.as_deref())?;
46
47 crate::storage::connection::ensure_db_ready(&paths)?;
48
49 let mut conn = open_rw(&paths.db)?;
50
51 let orphan_ids = entities::find_orphan_entity_ids(&conn, args.namespace.as_deref())?;
52 let orphan_count = orphan_ids.len();
53
54 let deleted = if args.dry_run {
55 0
56 } else {
57 if orphan_count > 0 && !args.yes {
58 output::emit_progress(&format!(
59 "removing {orphan_count} orphan entities (use --yes to skip this notice)"
60 ));
61 }
62 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
63 let removed = entities::delete_entities_by_ids(&tx, &orphan_ids)?;
64 tx.commit()?;
65 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
66 removed
67 };
68
69 let response = CleanupResponse {
70 orphan_count,
71 deleted,
72 dry_run: args.dry_run,
73 namespace: args.namespace.clone(),
74 elapsed_ms: inicio.elapsed().as_millis() as u64,
75 };
76
77 match args.format {
78 OutputFormat::Json => output::emit_json(&response)?,
79 OutputFormat::Text | OutputFormat::Markdown => {
80 let ns = response.namespace.as_deref().unwrap_or("<all>");
81 output::emit_text(&format!(
82 "orphans: {} found, {} deleted (dry_run={}) [{}]",
83 response.orphan_count, response.deleted, response.dry_run, ns
84 ));
85 }
86 }
87
88 Ok(())
89}
90
91#[cfg(test)]
92mod tests {
93 use super::*;
94
95 #[test]
96 fn cleanup_response_serializes_dry_run_true() {
97 let resp = CleanupResponse {
98 orphan_count: 5,
99 deleted: 0,
100 dry_run: true,
101 namespace: Some("global".to_string()),
102 elapsed_ms: 12,
103 };
104 let json = serde_json::to_value(&resp).expect("serialization failed");
105 assert_eq!(json["orphan_count"], 5);
106 assert_eq!(json["deleted"], 0);
107 assert_eq!(json["dry_run"], true);
108 assert_eq!(json["namespace"], "global");
109 assert!(json["elapsed_ms"].is_number());
110 }
111
112 #[test]
113 fn cleanup_response_deleted_zero_when_dry_run() {
114 let resp = CleanupResponse {
115 orphan_count: 10,
116 deleted: 0,
117 dry_run: true,
118 namespace: None,
119 elapsed_ms: 5,
120 };
121 assert_eq!(resp.deleted, 0, "dry_run must keep deleted at 0");
122 assert_eq!(resp.orphan_count, 10);
123 }
124
125 #[test]
126 fn cleanup_response_namespace_none_serializes_null() {
127 let resp = CleanupResponse {
128 orphan_count: 0,
129 deleted: 0,
130 dry_run: false,
131 namespace: None,
132 elapsed_ms: 1,
133 };
134 let json = serde_json::to_value(&resp).expect("serialization failed");
135 assert!(
136 json["namespace"].is_null(),
137 "namespace None must serialize as null"
138 );
139 }
140
141 #[test]
142 fn cleanup_response_deleted_equals_orphan_count_when_executed() {
143 let resp = CleanupResponse {
144 orphan_count: 3,
145 deleted: 3,
146 dry_run: false,
147 namespace: Some("projeto".to_string()),
148 elapsed_ms: 20,
149 };
150 assert_eq!(
151 resp.deleted, resp.orphan_count,
152 "when running without dry_run, deleted must equal orphan_count"
153 );
154 }
155}