sqlite_graphrag/commands/
vacuum.rs1use crate::errors::AppError;
4use crate::output;
5use crate::output::JsonOutputFormat;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n \
12 # Run VACUUM after WAL checkpoint (default)\n \
13 sqlite-graphrag vacuum\n\n \
14 # Vacuum a database at a custom path\n \
15 sqlite-graphrag vacuum --db /path/to/graphrag.sqlite\n\n \
16 # Vacuum via SQLITE_GRAPHRAG_DB_PATH env var\n \
17 SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag vacuum\n\n\
18NOTE:\n \
19 reclaimed_bytes may report 0 even after `purge` if removed memories did not\n \
20 span entire SQLite pages (page size = 4 KB). Run `vacuum` regularly only on\n \
21 large databases (> 10 MB) for measurable gains.")]
22pub struct VacuumArgs {
23 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
24 pub json: bool,
25 #[arg(long, default_value_t = true)]
27 pub checkpoint: bool,
28 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
30 pub format: JsonOutputFormat,
31 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
32 pub db: Option<String>,
33}
34
35#[derive(Serialize)]
36struct VacuumResponse {
37 db_path: String,
38 size_before_bytes: u64,
39 size_after_bytes: u64,
40 reclaimed_bytes: u64,
43 status: String,
44 elapsed_ms: u64,
46}
47
48pub fn run(args: VacuumArgs) -> Result<(), AppError> {
49 let start = std::time::Instant::now();
50 let _ = args.format;
51 let paths = AppPaths::resolve(args.db.as_deref())?;
52
53 crate::storage::connection::ensure_db_ready(&paths)?;
54
55 let size_before_bytes = std::fs::metadata(&paths.db)
56 .map(|meta| meta.len())
57 .unwrap_or(0);
58 let conn = open_rw(&paths.db)?;
59 if args.checkpoint {
60 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
61 }
62 conn.execute_batch("VACUUM;")?;
63 if args.checkpoint {
64 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
65 }
66 drop(conn);
67 let size_after_bytes = std::fs::metadata(&paths.db)
68 .map(|meta| meta.len())
69 .unwrap_or(0);
70
71 output::emit_json(&VacuumResponse {
72 db_path: paths.db.display().to_string(),
73 size_before_bytes,
74 size_after_bytes,
75 reclaimed_bytes: size_before_bytes.saturating_sub(size_after_bytes),
76 status: "ok".to_string(),
77 elapsed_ms: start.elapsed().as_millis() as u64,
78 })?;
79
80 Ok(())
81}
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86
87 #[test]
88 fn vacuum_response_serializes_all_fields() {
89 let resp = VacuumResponse {
90 db_path: "/home/user/.local/share/sqlite-graphrag/db.sqlite".to_string(),
91 size_before_bytes: 32768,
92 size_after_bytes: 16384,
93 reclaimed_bytes: 16384,
94 status: "ok".to_string(),
95 elapsed_ms: 55,
96 };
97 let json = serde_json::to_value(&resp).expect("serialization failed");
98 assert_eq!(
99 json["db_path"],
100 "/home/user/.local/share/sqlite-graphrag/db.sqlite"
101 );
102 assert_eq!(json["size_before_bytes"], 32768u64);
103 assert_eq!(json["size_after_bytes"], 16384u64);
104 assert_eq!(json["reclaimed_bytes"], 16384u64);
105 assert_eq!(json["status"], "ok");
106 assert_eq!(json["elapsed_ms"], 55u64);
107 }
108
109 #[test]
110 fn vacuum_response_size_after_less_than_or_equal_to_before() {
111 let resp = VacuumResponse {
112 db_path: "/data/db.sqlite".to_string(),
113 size_before_bytes: 65536,
114 size_after_bytes: 32768,
115 reclaimed_bytes: 32768,
116 status: "ok".to_string(),
117 elapsed_ms: 100,
118 };
119 let json = serde_json::to_value(&resp).expect("serialization failed");
120 let before = json["size_before_bytes"].as_u64().unwrap();
121 let after = json["size_after_bytes"].as_u64().unwrap();
122 let reclaimed = json["reclaimed_bytes"].as_u64().unwrap();
123 assert!(
124 after <= before,
125 "size_after_bytes must be <= size_before_bytes after VACUUM"
126 );
127 assert_eq!(
128 reclaimed,
129 before - after,
130 "reclaimed_bytes must equal size_before_bytes - size_after_bytes"
131 );
132 }
133
134 #[test]
135 fn vacuum_response_status_ok() {
136 let resp = VacuumResponse {
137 db_path: "/data/db.sqlite".to_string(),
138 size_before_bytes: 0,
139 size_after_bytes: 0,
140 reclaimed_bytes: 0,
141 status: "ok".to_string(),
142 elapsed_ms: 0,
143 };
144 let json = serde_json::to_value(&resp).expect("serialization failed");
145 assert_eq!(json["status"], "ok");
146 }
147
148 #[test]
149 fn vacuum_response_elapsed_ms_present_and_non_negative() {
150 let resp = VacuumResponse {
151 db_path: "/data/db.sqlite".to_string(),
152 size_before_bytes: 1024,
153 size_after_bytes: 1024,
154 reclaimed_bytes: 0,
155 status: "ok".to_string(),
156 elapsed_ms: 0,
157 };
158 let json = serde_json::to_value(&resp).expect("serialization failed");
159 assert!(
160 json.get("elapsed_ms").is_some(),
161 "elapsed_ms field must be present"
162 );
163 assert!(
164 json["elapsed_ms"].as_u64().is_some(),
165 "elapsed_ms must be a non-negative integer"
166 );
167 }
168}