Skip to main content

sqlite_graphrag/commands/
vacuum.rs

1//! Handler for the `vacuum` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::output::JsonOutputFormat;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Run VACUUM after WAL checkpoint (default)\n  \
13    sqlite-graphrag vacuum\n\n  \
14    # Vacuum a database at a custom path\n  \
15    sqlite-graphrag vacuum --db /path/to/graphrag.sqlite\n\n  \
16    # Vacuum via SQLITE_GRAPHRAG_DB_PATH env var\n  \
17    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag vacuum\n\n\
18NOTE:\n  \
19    reclaimed_bytes may report 0 even after `purge` if removed memories did not\n  \
20    span entire SQLite pages (page size = 4 KB). Run `vacuum` regularly only on\n  \
21    large databases (> 10 MB) for measurable gains.")]
22pub struct VacuumArgs {
23    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
24    pub json: bool,
25    /// Run a WAL checkpoint before and after `VACUUM`.
26    #[arg(long, default_value_t = true)]
27    pub checkpoint: bool,
28    /// Output format.
29    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
30    pub format: JsonOutputFormat,
31    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
32    pub db: Option<String>,
33}
34
35#[derive(Serialize)]
36struct VacuumResponse {
37    db_path: String,
38    size_before_bytes: u64,
39    size_after_bytes: u64,
40    /// Bytes reclaimed by VACUUM (size_before_bytes - size_after_bytes), saturating to zero.
41    /// Derived field added in v1.0.34 so callers do not have to compute the delta themselves.
42    reclaimed_bytes: u64,
43    status: String,
44    /// Total execution time in milliseconds from handler start to serialisation.
45    elapsed_ms: u64,
46}
47
48pub fn run(args: VacuumArgs) -> Result<(), AppError> {
49    let start = std::time::Instant::now();
50    let _ = args.format;
51    let paths = AppPaths::resolve(args.db.as_deref())?;
52
53    crate::storage::connection::ensure_db_ready(&paths)?;
54
55    let size_before_bytes = std::fs::metadata(&paths.db)
56        .map(|meta| meta.len())
57        .unwrap_or(0);
58    let conn = open_rw(&paths.db)?;
59    if args.checkpoint {
60        conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
61    }
62    conn.execute_batch("VACUUM;")?;
63    if args.checkpoint {
64        conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
65    }
66    drop(conn);
67    let size_after_bytes = std::fs::metadata(&paths.db)
68        .map(|meta| meta.len())
69        .unwrap_or(0);
70
71    output::emit_json(&VacuumResponse {
72        db_path: paths.db.display().to_string(),
73        size_before_bytes,
74        size_after_bytes,
75        reclaimed_bytes: size_before_bytes.saturating_sub(size_after_bytes),
76        status: "ok".to_string(),
77        elapsed_ms: start.elapsed().as_millis() as u64,
78    })?;
79
80    Ok(())
81}
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86
87    #[test]
88    fn vacuum_response_serializes_all_fields() {
89        let resp = VacuumResponse {
90            db_path: "/home/user/.local/share/sqlite-graphrag/db.sqlite".to_string(),
91            size_before_bytes: 32768,
92            size_after_bytes: 16384,
93            reclaimed_bytes: 16384,
94            status: "ok".to_string(),
95            elapsed_ms: 55,
96        };
97        let json = serde_json::to_value(&resp).expect("serialization failed");
98        assert_eq!(
99            json["db_path"],
100            "/home/user/.local/share/sqlite-graphrag/db.sqlite"
101        );
102        assert_eq!(json["size_before_bytes"], 32768u64);
103        assert_eq!(json["size_after_bytes"], 16384u64);
104        assert_eq!(json["reclaimed_bytes"], 16384u64);
105        assert_eq!(json["status"], "ok");
106        assert_eq!(json["elapsed_ms"], 55u64);
107    }
108
109    #[test]
110    fn vacuum_response_size_after_less_than_or_equal_to_before() {
111        let resp = VacuumResponse {
112            db_path: "/data/db.sqlite".to_string(),
113            size_before_bytes: 65536,
114            size_after_bytes: 32768,
115            reclaimed_bytes: 32768,
116            status: "ok".to_string(),
117            elapsed_ms: 100,
118        };
119        let json = serde_json::to_value(&resp).expect("serialization failed");
120        let before = json["size_before_bytes"].as_u64().unwrap();
121        let after = json["size_after_bytes"].as_u64().unwrap();
122        let reclaimed = json["reclaimed_bytes"].as_u64().unwrap();
123        assert!(
124            after <= before,
125            "size_after_bytes must be <= size_before_bytes after VACUUM"
126        );
127        assert_eq!(
128            reclaimed,
129            before - after,
130            "reclaimed_bytes must equal size_before_bytes - size_after_bytes"
131        );
132    }
133
134    #[test]
135    fn vacuum_response_status_ok() {
136        let resp = VacuumResponse {
137            db_path: "/data/db.sqlite".to_string(),
138            size_before_bytes: 0,
139            size_after_bytes: 0,
140            reclaimed_bytes: 0,
141            status: "ok".to_string(),
142            elapsed_ms: 0,
143        };
144        let json = serde_json::to_value(&resp).expect("serialization failed");
145        assert_eq!(json["status"], "ok");
146    }
147
148    #[test]
149    fn vacuum_response_elapsed_ms_present_and_non_negative() {
150        let resp = VacuumResponse {
151            db_path: "/data/db.sqlite".to_string(),
152            size_before_bytes: 1024,
153            size_after_bytes: 1024,
154            reclaimed_bytes: 0,
155            status: "ok".to_string(),
156            elapsed_ms: 0,
157        };
158        let json = serde_json::to_value(&resp).expect("serialization failed");
159        assert!(
160            json.get("elapsed_ms").is_some(),
161            "elapsed_ms field must be present"
162        );
163        assert!(
164            json["elapsed_ms"].as_u64().is_some(),
165            "elapsed_ms must be a non-negative integer"
166        );
167    }
168}