sqlite_graphrag/commands/
cleanup_orphans.rs1use crate::errors::AppError;
4use crate::output::{self, OutputFormat};
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use crate::storage::entities;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n \
12 # Remove orphan entities (no memories, no relationships) from the global namespace\n \
13 sqlite-graphrag cleanup-orphans\n\n \
14 # Preview which entities would be removed without deleting\n \
15 sqlite-graphrag cleanup-orphans --dry-run\n\n \
16 # Cleanup within a specific namespace\n \
17 sqlite-graphrag cleanup-orphans --namespace my-project --yes")]
18pub struct CleanupOrphansArgs {
19 #[arg(long)]
20 pub namespace: Option<String>,
21 #[arg(long)]
22 pub dry_run: bool,
23 #[arg(long)]
24 pub yes: bool,
25 #[arg(long, value_enum, default_value = "json")]
26 pub format: OutputFormat,
27 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
28 pub json: bool,
29 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
30 pub db: Option<String>,
31}
32
33#[derive(Serialize)]
34struct CleanupResponse {
35 orphan_count: usize,
36 deleted: usize,
37 dry_run: bool,
38 namespace: Option<String>,
39 elapsed_ms: u64,
41}
42
43pub fn run(args: CleanupOrphansArgs) -> Result<(), AppError> {
44 let inicio = std::time::Instant::now();
45 let paths = AppPaths::resolve(args.db.as_deref())?;
46
47 crate::storage::connection::ensure_db_ready(&paths)?;
48
49 let mut conn = open_rw(&paths.db)?;
50
51 let orphan_ids = entities::find_orphan_entity_ids(&conn, args.namespace.as_deref())?;
52 let orphan_count = orphan_ids.len();
53
54 let deleted = if args.dry_run {
55 0
56 } else {
57 if orphan_count > 0 && !args.yes {
58 output::emit_progress(&format!(
59 "removing {orphan_count} orphan entities (use --yes to skip this notice)"
60 ));
61 }
62 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
63 let removed = entities::delete_entities_by_ids(&tx, &orphan_ids)?;
64 tx.commit()?;
65 removed
66 };
67
68 let response = CleanupResponse {
69 orphan_count,
70 deleted,
71 dry_run: args.dry_run,
72 namespace: args.namespace.clone(),
73 elapsed_ms: inicio.elapsed().as_millis() as u64,
74 };
75
76 match args.format {
77 OutputFormat::Json => output::emit_json(&response)?,
78 OutputFormat::Text | OutputFormat::Markdown => {
79 let ns = response.namespace.as_deref().unwrap_or("<all>");
80 output::emit_text(&format!(
81 "orphans: {} found, {} deleted (dry_run={}) [{}]",
82 response.orphan_count, response.deleted, response.dry_run, ns
83 ));
84 }
85 }
86
87 Ok(())
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93
94 #[test]
95 fn cleanup_response_serializes_dry_run_true() {
96 let resp = CleanupResponse {
97 orphan_count: 5,
98 deleted: 0,
99 dry_run: true,
100 namespace: Some("global".to_string()),
101 elapsed_ms: 12,
102 };
103 let json = serde_json::to_value(&resp).expect("serialization failed");
104 assert_eq!(json["orphan_count"], 5);
105 assert_eq!(json["deleted"], 0);
106 assert_eq!(json["dry_run"], true);
107 assert_eq!(json["namespace"], "global");
108 assert!(json["elapsed_ms"].is_number());
109 }
110
111 #[test]
112 fn cleanup_response_deleted_zero_when_dry_run() {
113 let resp = CleanupResponse {
114 orphan_count: 10,
115 deleted: 0,
116 dry_run: true,
117 namespace: None,
118 elapsed_ms: 5,
119 };
120 assert_eq!(resp.deleted, 0, "dry_run must keep deleted at 0");
121 assert_eq!(resp.orphan_count, 10);
122 }
123
124 #[test]
125 fn cleanup_response_namespace_none_serializes_null() {
126 let resp = CleanupResponse {
127 orphan_count: 0,
128 deleted: 0,
129 dry_run: false,
130 namespace: None,
131 elapsed_ms: 1,
132 };
133 let json = serde_json::to_value(&resp).expect("serialization failed");
134 assert!(
135 json["namespace"].is_null(),
136 "namespace None must serialize as null"
137 );
138 }
139
140 #[test]
141 fn cleanup_response_deleted_equals_orphan_count_when_executed() {
142 let resp = CleanupResponse {
143 orphan_count: 3,
144 deleted: 3,
145 dry_run: false,
146 namespace: Some("projeto".to_string()),
147 elapsed_ms: 20,
148 };
149 assert_eq!(
150 resp.deleted, resp.orphan_count,
151 "when running without dry_run, deleted must equal orphan_count"
152 );
153 }
154}