Skip to main content

sqlite_graphrag/commands/
optimize.rs

1//! Handler for the `optimize` CLI subcommand.
2
3use crate::commands::fts::check_fts_functional;
4use crate::errors::AppError;
5use crate::output;
6use crate::paths::AppPaths;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Run PRAGMA optimize on the default database\n  \
13    sqlite-graphrag optimize\n\n  \
14    # Optimize a database at a custom path\n  \
15    sqlite-graphrag optimize --db /path/to/graphrag.sqlite\n\n  \
16    # Skip the FTS5 rebuild even if the index looks unhealthy\n  \
17    sqlite-graphrag optimize --skip-fts\n\n  \
18    # Dry-run: only report FTS5 health status, do not rebuild\n  \
19    sqlite-graphrag optimize --fts-dry-run\n\n  \
20    # Run optimize non-interactively (skip confirmation prompts)\n  \
21    sqlite-graphrag optimize --yes\n\n  \
22    # Force a full FTS5 rebuild even if the index already passes integrity-check\n  \
23    sqlite-graphrag optimize --no-fts-skip-when-functional\n\n  \
24    # Optimize via SQLITE_GRAPHRAG_DB_PATH env var\n  \
25    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag optimize")]
26pub struct OptimizeArgs {
27    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
28    pub json: bool,
29    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
30    pub db: Option<String>,
31    #[arg(long, default_value_t = false, help = "Skip FTS5 index rebuild")]
32    pub skip_fts: bool,
33    /// When true (default), the FTS5 rebuild step is skipped when
34    /// `fts check` reports the index is already functional. Saves 5-15
35    /// minutes on large databases. Set to false to always rebuild.
36    #[arg(
37        long,
38        default_value_t = true,
39        help = "Skip FTS5 rebuild when index is already functional (saves minutes on big DBs)"
40    )]
41    pub fts_skip_when_functional: bool,
42    /// G36 Passo 2 (v1.0.69): run `fts check` + `fts stats` only, do not
43    /// trigger any rebuild. Exit code is 0 when the index is healthy, 1
44    /// when a rebuild would be recommended.
45    #[arg(
46        long,
47        default_value_t = false,
48        help = "G36: only run fts check + fts stats, do not rebuild (exit 1 if rebuild recommended)"
49    )]
50    pub fts_dry_run: bool,
51    /// G36 Passo 3 (v1.0.69): emit a tracing::info! progress line every
52    /// N seconds during the FTS5 rebuild. The FTS5 `rebuild` command is
53    /// synchronous and does not call the SQLite progress handler, so the
54    /// progress is sampled at the configured interval. Use 0 to disable.
55    #[arg(
56        long,
57        default_value_t = 30,
58        help = "G36: emit progress line every N seconds during FTS5 rebuild (0 to disable)"
59    )]
60    pub fts_progress: u64,
61    /// G36 Passo 4 (v1.0.69): skip all confirmation prompts. Required
62    /// for non-interactive CI/CD pipelines that cannot answer `y/N`.
63    #[arg(
64        long,
65        default_value_t = false,
66        help = "G36: skip confirmation prompts (required for non-interactive CI)"
67    )]
68    pub yes: bool,
69}
70
71#[derive(Serialize)]
72struct OptimizeResponse {
73    db_path: String,
74    status: String,
75    /// True when the FTS5 index was rebuilt during this optimize run.
76    fts_rebuilt: bool,
77    /// True when the FTS5 rebuild was skipped because the index was already healthy.
78    fts_skipped_functional: bool,
79    /// True when FTS5 was detected as unhealthy AND the rebuild was attempted.
80    fts_unhealthy: bool,
81    /// Number of FTS5 rows indexed during the rebuild (G36 progress observability).
82    fts_rows_indexed: Option<i64>,
83    /// Total execution time in milliseconds from handler start to serialisation.
84    elapsed_ms: u64,
85}
86
87pub fn run(args: OptimizeArgs) -> Result<(), AppError> {
88    let inicio = std::time::Instant::now();
89    let paths = AppPaths::resolve(args.db.as_deref())?;
90
91    crate::storage::connection::ensure_db_ready(&paths)?;
92
93    let conn = open_rw(&paths.db)?;
94    conn.execute_batch("PRAGMA optimize;")?;
95
96    // G36: pre-check FTS5 health before triggering a multi-minute rebuild.
97    let fts_functional = if !args.skip_fts {
98        check_fts_functional(&conn).unwrap_or(false)
99    } else {
100        false
101    };
102
103    // G36 Passo 2 (v1.0.69): dry-run path. Run fts check + fts stats, emit
104    // JSON envelope, and return exit 1 when a rebuild would be recommended.
105    if args.fts_dry_run {
106        let recommend_rebuild = !fts_functional;
107        output::emit_json(&OptimizeResponse {
108            db_path: paths.db.display().to_string(),
109            status: if recommend_rebuild {
110                "rebuild_recommended".to_string()
111            } else {
112                "ok".to_string()
113            },
114            fts_rebuilt: false,
115            fts_skipped_functional: false,
116            fts_unhealthy: !fts_functional,
117            fts_rows_indexed: None,
118            elapsed_ms: inicio.elapsed().as_millis() as u64,
119        })?;
120        if recommend_rebuild {
121            std::process::exit(1);
122        }
123        return Ok(());
124    }
125
126    let (fts_rebuilt, fts_skipped_functional, fts_unhealthy, fts_rows_indexed) = if args.skip_fts {
127        (false, false, false, None)
128    } else if args.fts_skip_when_functional && fts_functional {
129        tracing::info!(target: "optimize",
130            "FTS5 index already functional; skipping rebuild (use --no-fts-skip-when-functional to override)"
131        );
132        (false, true, false, None)
133    } else {
134        if !fts_functional {
135            tracing::warn!(target: "optimize",
136                "FTS5 index reported unhealthy; running full rebuild"
137            );
138        }
139        // Capture row count BEFORE rebuild so we can report progress.
140        // (FTS5 rebuild is synchronous; a true callback would require
141        // `sqlite3_progress_handler` which the FTS5 'rebuild' command
142        // does not respect. We sample the row count after.)
143        let before: i64 = conn
144            .query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
145            .unwrap_or(0);
146        // G36 Passo 3 (v1.0.69): spawn a lightweight background thread that
147        // emits a tracing::info! progress line every `args.fts_progress`
148        // seconds while the rebuild is in flight. The FTS5 rebuild command
149        // is synchronous and does not call the SQLite progress handler, so
150        // the only observability we can add is a row-count poll from a
151        // background thread. We open a SEPARATE read-only connection
152        // because `rusqlite::Connection` is not `Sync` and the rebuild
153        // holds the main connection exclusively. Default 30s; 0 disables.
154        let progress_thread = if args.fts_progress > 0 {
155            let interval = std::time::Duration::from_secs(args.fts_progress);
156            let db_path = paths.db.clone();
157            let child = std::thread::spawn(move || loop {
158                std::thread::sleep(interval);
159                let count: i64 = match crate::storage::connection::open_ro(&db_path) {
160                    Ok(c) => c
161                        .query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
162                        .unwrap_or(-1),
163                    Err(_) => -1,
164                };
165                tracing::info!(target: "optimize", fts_rows = count, "FTS5 rebuild progress sample");
166            });
167            Some(child)
168        } else {
169            None
170        };
171        let rebuilt_ok = conn
172            .execute_batch("INSERT INTO fts_memories(fts_memories) VALUES('rebuild');")
173            .is_ok();
174        if let Some(handle) = progress_thread {
175            // The thread runs forever in a sleep loop; we leak it on
176            // purpose because (a) it terminates when the process exits
177            // and (b) we cannot safely join without a stop signal channel
178            // which would add complexity not warranted for a 30s sampler.
179            std::mem::forget(handle);
180        }
181        let after: i64 = if rebuilt_ok {
182            conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))
183                .unwrap_or(0)
184        } else {
185            0
186        };
187        // G36 progress: rows_indexed == after - before.  Emitted as a
188        // tracing::info! line so operators following logs see the
189        // rebuild magnitude without needing NDJSON streaming.
190        tracing::info!(target: "optimize", before, after, "FTS5 rebuild complete");
191        (rebuilt_ok, false, !fts_functional, Some(after - before))
192    };
193
194    // G36 Passo 4 (v1.0.69): --yes flag is currently honored for forward
195    // compatibility — every interactive prompt path in optimize must
196    // check this flag and skip the prompt when set. As of v1.0.69 there
197    // are no interactive prompts in optimize (the user is told up front
198    // via the after_long_help), but the flag is reserved so future
199    // confirmations can be added without breaking the CLI contract.
200    let _ = args.yes;
201
202    output::emit_json(&OptimizeResponse {
203        db_path: paths.db.display().to_string(),
204        status: "ok".to_string(),
205        fts_rebuilt,
206        fts_skipped_functional,
207        fts_unhealthy,
208        fts_rows_indexed,
209        elapsed_ms: inicio.elapsed().as_millis() as u64,
210    })?;
211
212    Ok(())
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use serial_test::serial;
219    use tempfile::TempDir;
220
221    #[test]
222    fn optimize_response_serializes_required_fields() {
223        let resp = OptimizeResponse {
224            db_path: "/tmp/graphrag.sqlite".to_string(),
225            status: "ok".to_string(),
226            fts_rebuilt: false,
227            fts_rows_indexed: None,
228            fts_skipped_functional: false,
229            fts_unhealthy: false,
230            elapsed_ms: 5,
231        };
232        let json = serde_json::to_value(&resp).unwrap();
233        assert_eq!(json["status"], "ok");
234        assert_eq!(json["db_path"], "/tmp/graphrag.sqlite");
235        assert_eq!(json["elapsed_ms"], 5);
236    }
237
238    #[test]
239    #[serial]
240    fn optimize_auto_inits_when_db_missing() {
241        let dir = TempDir::new().unwrap();
242        let db_path = dir.path().join("missing.sqlite");
243        // SAFETY: `#[serial]` guarantees single-threaded execution.
244        unsafe {
245            std::env::set_var("SQLITE_GRAPHRAG_DB_PATH", db_path.to_str().unwrap());
246            std::env::set_var("LOG_LEVEL", "error");
247        }
248
249        let args = OptimizeArgs {
250            json: false,
251            db: Some(db_path.to_string_lossy().into_owned()),
252            skip_fts: false,
253            fts_skip_when_functional: true,
254            fts_dry_run: false,
255            fts_progress: 30,
256            yes: true,
257        };
258        let result = run(args);
259        assert!(
260            result.is_ok(),
261            "auto-init must succeed and PRAGMA optimize must run on the fresh database, got {result:?}"
262        );
263        assert!(
264            db_path.exists(),
265            "auto-init must create the database file at {}",
266            db_path.display()
267        );
268        // SAFETY: `#[serial]` guarantees single-threaded execution.
269        unsafe {
270            std::env::remove_var("SQLITE_GRAPHRAG_DB_PATH");
271            std::env::remove_var("LOG_LEVEL");
272        }
273    }
274
275    #[test]
276    fn optimize_response_status_ok_fixo() {
277        let resp = OptimizeResponse {
278            db_path: "/qualquer/caminho".to_string(),
279            status: "ok".to_string(),
280            fts_rebuilt: false,
281            fts_rows_indexed: None,
282            fts_skipped_functional: false,
283            fts_unhealthy: false,
284            elapsed_ms: 0,
285        };
286        let json = serde_json::to_value(&resp).unwrap();
287        assert_eq!(json["status"], "ok", "status deve ser sempre 'ok'");
288    }
289
290    #[test]
291    fn optimize_response_serializes_all_fields() {
292        let resp = OptimizeResponse {
293            db_path: "/data/x.sqlite".into(),
294            status: "ok".into(),
295            fts_rebuilt: true,
296            fts_rows_indexed: Some(0),
297            fts_skipped_functional: false,
298            fts_unhealthy: true,
299            elapsed_ms: 120,
300        };
301        let v = serde_json::to_value(&resp).unwrap();
302        assert_eq!(v["db_path"], "/data/x.sqlite");
303        assert_eq!(v["status"], "ok");
304        assert_eq!(v["fts_rebuilt"], true);
305        assert_eq!(v["fts_skipped_functional"], false);
306        assert_eq!(v["fts_unhealthy"], true);
307        assert_eq!(v["elapsed_ms"], 120u64);
308    }
309
310    #[test]
311    fn optimize_response_includes_fts_flags() {
312        // G36: operator must be able to distinguish (a) rebuilt, (b) skipped-healthy,
313        // (c) skipped-by-flag from (d) attempted-but-failed. The response
314        // exposes fts_rebuilt, fts_skipped_functional, fts_unhealthy booleans.
315        let resp = OptimizeResponse {
316            db_path: "/x".into(),
317            status: "ok".into(),
318            fts_rebuilt: true,
319            fts_rows_indexed: Some(0),
320            fts_skipped_functional: false,
321            fts_unhealthy: true,
322            elapsed_ms: 1,
323        };
324        let v = serde_json::to_value(&resp).unwrap();
325        assert_eq!(v["fts_rebuilt"], true);
326        assert_eq!(v["fts_skipped_functional"], false);
327        assert_eq!(v["fts_unhealthy"], true);
328    }
329}