sqlite_graphrag/commands/
optimize.rs1use crate::commands::fts::check_fts_functional;
4use crate::errors::AppError;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n \
12 # Run PRAGMA optimize on the default database\n \
13 sqlite-graphrag optimize\n\n \
14 # Optimize a database at a custom path\n \
15 sqlite-graphrag optimize --db /path/to/graphrag.sqlite\n\n \
16 # Skip the FTS5 rebuild even if the index looks unhealthy\n \
17 sqlite-graphrag optimize --skip-fts\n\n \
18 # Dry-run: only report FTS5 health status, do not rebuild\n \
19 sqlite-graphrag optimize --fts-dry-run\n\n \
20 # Run optimize non-interactively (skip confirmation prompts)\n \
21 sqlite-graphrag optimize --yes\n\n \
22 # Force a full FTS5 rebuild even if the index already passes integrity-check\n \
23 sqlite-graphrag optimize --no-fts-skip-when-functional\n\n \
24 # Optimize via SQLITE_GRAPHRAG_DB_PATH env var\n \
25 SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag optimize")]
26pub struct OptimizeArgs {
27 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
28 pub json: bool,
29 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
30 pub db: Option<String>,
31 #[arg(long, default_value_t = false, help = "Skip FTS5 index rebuild")]
32 pub skip_fts: bool,
33 #[arg(
37 long,
38 default_value_t = true,
39 help = "Skip FTS5 rebuild when index is already functional (saves minutes on big DBs)"
40 )]
41 pub fts_skip_when_functional: bool,
42 #[arg(
46 long,
47 default_value_t = false,
48 help = "G36: only run fts check + fts stats, do not rebuild (exit 1 if rebuild recommended)"
49 )]
50 pub fts_dry_run: bool,
51 #[arg(
56 long,
57 default_value_t = 30,
58 help = "G36: emit progress line every N seconds during FTS5 rebuild (0 to disable)"
59 )]
60 pub fts_progress: u64,
61 #[arg(
64 long,
65 default_value_t = false,
66 help = "G36: skip confirmation prompts (required for non-interactive CI)"
67 )]
68 pub yes: bool,
69}
70
71#[derive(Serialize)]
72struct OptimizeResponse {
73 db_path: String,
74 status: String,
75 fts_rebuilt: bool,
77 fts_skipped_functional: bool,
79 fts_unhealthy: bool,
81 fts_rows_indexed: Option<i64>,
83 elapsed_ms: u64,
85}
86
87pub fn run(args: OptimizeArgs) -> Result<(), AppError> {
88 let inicio = std::time::Instant::now();
89 let paths = AppPaths::resolve(args.db.as_deref())?;
90
91 crate::storage::connection::ensure_db_ready(&paths)?;
92
93 let conn = open_rw(&paths.db)?;
94 conn.execute_batch("PRAGMA optimize;")?;
95
96 let fts_functional = if !args.skip_fts {
98 check_fts_functional(&conn).unwrap_or(false)
99 } else {
100 false
101 };
102
103 if args.fts_dry_run {
106 let recommend_rebuild = !fts_functional;
107 output::emit_json(&OptimizeResponse {
108 db_path: paths.db.display().to_string(),
109 status: if recommend_rebuild {
110 "rebuild_recommended".to_string()
111 } else {
112 "ok".to_string()
113 },
114 fts_rebuilt: false,
115 fts_skipped_functional: false,
116 fts_unhealthy: !fts_functional,
117 fts_rows_indexed: None,
118 elapsed_ms: inicio.elapsed().as_millis() as u64,
119 })?;
120 if recommend_rebuild {
121 std::process::exit(1);
122 }
123 return Ok(());
124 }
125
126 let (fts_rebuilt, fts_skipped_functional, fts_unhealthy, fts_rows_indexed) = if args.skip_fts {
127 (false, false, false, None)
128 } else if args.fts_skip_when_functional && fts_functional {
129 tracing::info!(target: "optimize",
130 "FTS5 index already functional; skipping rebuild (use --no-fts-skip-when-functional to override)"
131 );
132 (false, true, false, None)
133 } else {
134 if !fts_functional {
135 tracing::warn!(target: "optimize",
136 "FTS5 index reported unhealthy; running full rebuild"
137 );
138 }
139 let before: i64 = conn
144 .query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
145 .unwrap_or(0);
146 let progress_thread = if args.fts_progress > 0 {
155 let interval = std::time::Duration::from_secs(args.fts_progress);
156 let db_path = paths.db.clone();
157 let child = std::thread::spawn(move || loop {
158 std::thread::sleep(interval);
159 let count: i64 = match crate::storage::connection::open_ro(&db_path) {
160 Ok(c) => c
161 .query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
162 .unwrap_or(-1),
163 Err(_) => -1,
164 };
165 tracing::info!(target: "optimize", fts_rows = count, "FTS5 rebuild progress sample");
166 });
167 Some(child)
168 } else {
169 None
170 };
171 let rebuilt_ok = conn
172 .execute_batch("INSERT INTO fts_memories(fts_memories) VALUES('rebuild');")
173 .is_ok();
174 if let Some(handle) = progress_thread {
175 std::mem::forget(handle);
180 }
181 let after: i64 = if rebuilt_ok {
182 conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
183 .unwrap_or(0)
184 } else {
185 0
186 };
187 tracing::info!(target: "optimize", before, after, "FTS5 rebuild complete");
191 (rebuilt_ok, false, !fts_functional, Some(after - before))
192 };
193
194 let _ = args.yes;
201
202 output::emit_json(&OptimizeResponse {
203 db_path: paths.db.display().to_string(),
204 status: "ok".to_string(),
205 fts_rebuilt,
206 fts_skipped_functional,
207 fts_unhealthy,
208 fts_rows_indexed,
209 elapsed_ms: inicio.elapsed().as_millis() as u64,
210 })?;
211
212 Ok(())
213}
214
215#[cfg(test)]
216mod tests {
217 use super::*;
218 use serial_test::serial;
219 use tempfile::TempDir;
220
221 #[test]
222 fn optimize_response_serializes_required_fields() {
223 let resp = OptimizeResponse {
224 db_path: "/tmp/graphrag.sqlite".to_string(),
225 status: "ok".to_string(),
226 fts_rebuilt: false,
227 fts_rows_indexed: None,
228 fts_skipped_functional: false,
229 fts_unhealthy: false,
230 elapsed_ms: 5,
231 };
232 let json = serde_json::to_value(&resp).unwrap();
233 assert_eq!(json["status"], "ok");
234 assert_eq!(json["db_path"], "/tmp/graphrag.sqlite");
235 assert_eq!(json["elapsed_ms"], 5);
236 }
237
238 #[test]
239 #[serial]
240 fn optimize_auto_inits_when_db_missing() {
241 let dir = TempDir::new().unwrap();
242 let db_path = dir.path().join("missing.sqlite");
243 unsafe {
245 std::env::set_var("SQLITE_GRAPHRAG_DB_PATH", db_path.to_str().unwrap());
246 std::env::set_var("LOG_LEVEL", "error");
247 }
248
249 let args = OptimizeArgs {
250 json: false,
251 db: Some(db_path.to_string_lossy().into_owned()),
252 skip_fts: false,
253 fts_skip_when_functional: true,
254 fts_dry_run: false,
255 fts_progress: 30,
256 yes: true,
257 };
258 let result = run(args);
259 assert!(
260 result.is_ok(),
261 "auto-init must succeed and PRAGMA optimize must run on the fresh database, got {result:?}"
262 );
263 assert!(
264 db_path.exists(),
265 "auto-init must create the database file at {}",
266 db_path.display()
267 );
268 unsafe {
270 std::env::remove_var("SQLITE_GRAPHRAG_DB_PATH");
271 std::env::remove_var("LOG_LEVEL");
272 }
273 }
274
275 #[test]
276 fn optimize_response_status_ok_fixo() {
277 let resp = OptimizeResponse {
278 db_path: "/qualquer/caminho".to_string(),
279 status: "ok".to_string(),
280 fts_rebuilt: false,
281 fts_rows_indexed: None,
282 fts_skipped_functional: false,
283 fts_unhealthy: false,
284 elapsed_ms: 0,
285 };
286 let json = serde_json::to_value(&resp).unwrap();
287 assert_eq!(json["status"], "ok", "status deve ser sempre 'ok'");
288 }
289
290 #[test]
291 fn optimize_response_serializes_all_fields() {
292 let resp = OptimizeResponse {
293 db_path: "/data/x.sqlite".into(),
294 status: "ok".into(),
295 fts_rebuilt: true,
296 fts_rows_indexed: Some(0),
297 fts_skipped_functional: false,
298 fts_unhealthy: true,
299 elapsed_ms: 120,
300 };
301 let v = serde_json::to_value(&resp).unwrap();
302 assert_eq!(v["db_path"], "/data/x.sqlite");
303 assert_eq!(v["status"], "ok");
304 assert_eq!(v["fts_rebuilt"], true);
305 assert_eq!(v["fts_skipped_functional"], false);
306 assert_eq!(v["fts_unhealthy"], true);
307 assert_eq!(v["elapsed_ms"], 120u64);
308 }
309
310 #[test]
311 fn optimize_response_includes_fts_flags() {
312 let resp = OptimizeResponse {
316 db_path: "/x".into(),
317 status: "ok".into(),
318 fts_rebuilt: true,
319 fts_rows_indexed: Some(0),
320 fts_skipped_functional: false,
321 fts_unhealthy: true,
322 elapsed_ms: 1,
323 };
324 let v = serde_json::to_value(&resp).unwrap();
325 assert_eq!(v["fts_rebuilt"], true);
326 assert_eq!(v["fts_skipped_functional"], false);
327 assert_eq!(v["fts_unhealthy"], true);
328 }
329}