Skip to main content

sqlite_graphrag/commands/
cleanup_orphans.rs

1//! Handler for the `cleanup-orphans` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::i18n::errors_msg;
5use crate::output::{self, OutputFormat};
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use crate::storage::entities;
9use serde::Serialize;
10
11#[derive(clap::Args)]
12pub struct CleanupOrphansArgs {
13    #[arg(long)]
14    pub namespace: Option<String>,
15    #[arg(long)]
16    pub dry_run: bool,
17    #[arg(long)]
18    pub yes: bool,
19    #[arg(long, value_enum, default_value = "json")]
20    pub format: OutputFormat,
21    #[arg(long, help = "No-op; JSON is always emitted on stdout")]
22    pub json: bool,
23    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
24    pub db: Option<String>,
25}
26
27#[derive(Serialize)]
28struct CleanupResponse {
29    orphan_count: usize,
30    deleted: usize,
31    dry_run: bool,
32    namespace: Option<String>,
33    /// Total execution time in milliseconds from handler start to serialisation.
34    elapsed_ms: u64,
35}
36
37pub fn run(args: CleanupOrphansArgs) -> Result<(), AppError> {
38    let inicio = std::time::Instant::now();
39    let paths = AppPaths::resolve(args.db.as_deref())?;
40
41    if !paths.db.exists() {
42        return Err(AppError::NotFound(errors_msg::database_not_found(
43            &paths.db.display().to_string(),
44        )));
45    }
46
47    let mut conn = open_rw(&paths.db)?;
48
49    let orphan_ids = entities::find_orphan_entity_ids(&conn, args.namespace.as_deref())?;
50    let orphan_count = orphan_ids.len();
51
52    let deleted = if args.dry_run {
53        0
54    } else {
55        if orphan_count > 0 && !args.yes {
56            output::emit_progress(&format!(
57                "removing {orphan_count} orphan entities (use --yes to skip this notice)"
58            ));
59        }
60        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
61        let removed = entities::delete_entities_by_ids(&tx, &orphan_ids)?;
62        tx.commit()?;
63        removed
64    };
65
66    let response = CleanupResponse {
67        orphan_count,
68        deleted,
69        dry_run: args.dry_run,
70        namespace: args.namespace.clone(),
71        elapsed_ms: inicio.elapsed().as_millis() as u64,
72    };
73
74    match args.format {
75        OutputFormat::Json => output::emit_json(&response)?,
76        OutputFormat::Text | OutputFormat::Markdown => {
77            let ns = response.namespace.as_deref().unwrap_or("<all>");
78            output::emit_text(&format!(
79                "orphans: {} found, {} deleted (dry_run={}) [{}]",
80                response.orphan_count, response.deleted, response.dry_run, ns
81            ));
82        }
83    }
84
85    Ok(())
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn cleanup_response_serializa_dry_run_true() {
94        let resp = CleanupResponse {
95            orphan_count: 5,
96            deleted: 0,
97            dry_run: true,
98            namespace: Some("global".to_string()),
99            elapsed_ms: 12,
100        };
101        let json = serde_json::to_value(&resp).expect("serialização falhou");
102        assert_eq!(json["orphan_count"], 5);
103        assert_eq!(json["deleted"], 0);
104        assert_eq!(json["dry_run"], true);
105        assert_eq!(json["namespace"], "global");
106        assert!(json["elapsed_ms"].is_number());
107    }
108
109    #[test]
110    fn cleanup_response_deleted_zero_quando_dry_run() {
111        let resp = CleanupResponse {
112            orphan_count: 10,
113            deleted: 0,
114            dry_run: true,
115            namespace: None,
116            elapsed_ms: 5,
117        };
118        assert_eq!(resp.deleted, 0, "dry_run deve manter deleted em 0");
119        assert_eq!(resp.orphan_count, 10);
120    }
121
122    #[test]
123    fn cleanup_response_namespace_none_serializa_null() {
124        let resp = CleanupResponse {
125            orphan_count: 0,
126            deleted: 0,
127            dry_run: false,
128            namespace: None,
129            elapsed_ms: 1,
130        };
131        let json = serde_json::to_value(&resp).expect("serialização falhou");
132        assert!(
133            json["namespace"].is_null(),
134            "namespace None deve serializar como null"
135        );
136    }
137
138    #[test]
139    fn cleanup_response_deleted_igual_orphan_count_quando_executado() {
140        let resp = CleanupResponse {
141            orphan_count: 3,
142            deleted: 3,
143            dry_run: false,
144            namespace: Some("projeto".to_string()),
145            elapsed_ms: 20,
146        };
147        assert_eq!(
148            resp.deleted, resp.orphan_count,
149            "ao executar sem dry_run, deleted deve igualar orphan_count"
150        );
151    }
152}