Skip to main content

sqlite_graphrag/commands/
vacuum.rs

1//! Handler for the `vacuum` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::output::JsonOutputFormat;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Run VACUUM after WAL checkpoint (default)\n  \
13    sqlite-graphrag vacuum\n\n  \
14    # Vacuum a database at a custom path\n  \
15    sqlite-graphrag vacuum --db /path/to/graphrag.sqlite\n\n  \
16    # Vacuum via SQLITE_GRAPHRAG_DB_PATH env var\n  \
17    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag vacuum")]
18pub struct VacuumArgs {
19    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
20    pub json: bool,
21    /// Run a WAL checkpoint before and after `VACUUM`.
22    #[arg(long, default_value_t = true)]
23    pub checkpoint: bool,
24    /// Output format.
25    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
26    pub format: JsonOutputFormat,
27    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
28    pub db: Option<String>,
29}
30
31#[derive(Serialize)]
32struct VacuumResponse {
33    db_path: String,
34    size_before_bytes: u64,
35    size_after_bytes: u64,
36    /// Bytes reclaimed by VACUUM (size_before_bytes - size_after_bytes), saturating to zero.
37    /// Derived field added in v1.0.34 so callers do not have to compute the delta themselves.
38    reclaimed_bytes: u64,
39    status: String,
40    /// Total execution time in milliseconds from handler start to serialisation.
41    elapsed_ms: u64,
42}
43
44pub fn run(args: VacuumArgs) -> Result<(), AppError> {
45    let start = std::time::Instant::now();
46    let _ = args.format;
47    let paths = AppPaths::resolve(args.db.as_deref())?;
48
49    crate::storage::connection::ensure_db_ready(&paths)?;
50
51    let size_before_bytes = std::fs::metadata(&paths.db)
52        .map(|meta| meta.len())
53        .unwrap_or(0);
54    let conn = open_rw(&paths.db)?;
55    if args.checkpoint {
56        conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
57    }
58    conn.execute_batch("VACUUM;")?;
59    if args.checkpoint {
60        conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
61    }
62    drop(conn);
63    let size_after_bytes = std::fs::metadata(&paths.db)
64        .map(|meta| meta.len())
65        .unwrap_or(0);
66
67    output::emit_json(&VacuumResponse {
68        db_path: paths.db.display().to_string(),
69        size_before_bytes,
70        size_after_bytes,
71        reclaimed_bytes: size_before_bytes.saturating_sub(size_after_bytes),
72        status: "ok".to_string(),
73        elapsed_ms: start.elapsed().as_millis() as u64,
74    })?;
75
76    Ok(())
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn vacuum_response_serializes_all_fields() {
85        let resp = VacuumResponse {
86            db_path: "/home/user/.local/share/sqlite-graphrag/db.sqlite".to_string(),
87            size_before_bytes: 32768,
88            size_after_bytes: 16384,
89            reclaimed_bytes: 16384,
90            status: "ok".to_string(),
91            elapsed_ms: 55,
92        };
93        let json = serde_json::to_value(&resp).expect("serialization failed");
94        assert_eq!(
95            json["db_path"],
96            "/home/user/.local/share/sqlite-graphrag/db.sqlite"
97        );
98        assert_eq!(json["size_before_bytes"], 32768u64);
99        assert_eq!(json["size_after_bytes"], 16384u64);
100        assert_eq!(json["reclaimed_bytes"], 16384u64);
101        assert_eq!(json["status"], "ok");
102        assert_eq!(json["elapsed_ms"], 55u64);
103    }
104
105    #[test]
106    fn vacuum_response_size_after_less_than_or_equal_to_before() {
107        let resp = VacuumResponse {
108            db_path: "/data/db.sqlite".to_string(),
109            size_before_bytes: 65536,
110            size_after_bytes: 32768,
111            reclaimed_bytes: 32768,
112            status: "ok".to_string(),
113            elapsed_ms: 100,
114        };
115        let json = serde_json::to_value(&resp).expect("serialization failed");
116        let before = json["size_before_bytes"].as_u64().unwrap();
117        let after = json["size_after_bytes"].as_u64().unwrap();
118        let reclaimed = json["reclaimed_bytes"].as_u64().unwrap();
119        assert!(
120            after <= before,
121            "size_after_bytes must be <= size_before_bytes after VACUUM"
122        );
123        assert_eq!(
124            reclaimed,
125            before - after,
126            "reclaimed_bytes must equal size_before_bytes - size_after_bytes"
127        );
128    }
129
130    #[test]
131    fn vacuum_response_status_ok() {
132        let resp = VacuumResponse {
133            db_path: "/data/db.sqlite".to_string(),
134            size_before_bytes: 0,
135            size_after_bytes: 0,
136            reclaimed_bytes: 0,
137            status: "ok".to_string(),
138            elapsed_ms: 0,
139        };
140        let json = serde_json::to_value(&resp).expect("serialization failed");
141        assert_eq!(json["status"], "ok");
142    }
143
144    #[test]
145    fn vacuum_response_elapsed_ms_present_and_non_negative() {
146        let resp = VacuumResponse {
147            db_path: "/data/db.sqlite".to_string(),
148            size_before_bytes: 1024,
149            size_after_bytes: 1024,
150            reclaimed_bytes: 0,
151            status: "ok".to_string(),
152            elapsed_ms: 0,
153        };
154        let json = serde_json::to_value(&resp).expect("serialization failed");
155        assert!(
156            json.get("elapsed_ms").is_some(),
157            "elapsed_ms field must be present"
158        );
159        assert!(
160            json["elapsed_ms"].as_u64().is_some(),
161            "elapsed_ms must be a non-negative integer"
162        );
163    }
164}