Skip to main content

sqlite_graphrag/commands/
fts.rs

1//! Handler for the `fts` CLI subcommand family.
2//!
3//! Provides two maintenance operations for the FTS5 full-text search index:
4//! - `rebuild`: drops and reconstructs the index from the `memories` table.
5//! - `check`: runs the FTS5 integrity-check without modifying the index.
6
7use crate::errors::AppError;
8use crate::output;
9use crate::paths::AppPaths;
10use crate::storage::connection::{open_ro, open_rw};
11use serde::Serialize;
12
13/// Arguments for the `fts` subcommand family.
14#[derive(clap::Args)]
15#[command(about = "FTS5 full-text search index management")]
16pub struct FtsArgs {
17    #[command(subcommand)]
18    pub command: FtsSubcommand,
19}
20
21/// Subcommands nested under `fts`.
22#[derive(clap::Subcommand)]
23pub enum FtsSubcommand {
24    /// Rebuild the FTS5 index from the memories table.
25    #[command(after_long_help = "EXAMPLES:\n  \
26        # Rebuild the full-text search index\n  \
27        sqlite-graphrag fts rebuild\n\n  \
28        # Rebuild with custom database path\n  \
29        sqlite-graphrag fts rebuild --db /path/to/graphrag.sqlite")]
30    Rebuild(FtsRebuildArgs),
31    /// Run FTS5 integrity-check without modifying the index.
32    #[command(after_long_help = "EXAMPLES:\n  \
33        # Check FTS5 index integrity\n  \
34        sqlite-graphrag fts check\n\n  \
35        # Check with custom database path\n  \
36        sqlite-graphrag fts check --db /path/to/graphrag.sqlite")]
37    Check(FtsCheckArgs),
38    /// Show FTS5 index statistics (row count, shadow pages, functional status).
39    #[command(after_long_help = "EXAMPLES:\n  \
40        # Show FTS5 index statistics\n  \
41        sqlite-graphrag fts stats\n\n  \
42        # Stats with custom database path\n  \
43        sqlite-graphrag fts stats --db /path/to/graphrag.sqlite")]
44    Stats(FtsStatsArgs),
45}
46
47/// Arguments for `fts rebuild`.
48#[derive(clap::Args)]
49pub struct FtsRebuildArgs {
50    /// No-op; JSON is always emitted on stdout.
51    #[arg(long, hide = true)]
52    pub json: bool,
53    /// Path to the SQLite database file.
54    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
55    pub db: Option<String>,
56}
57
58/// Arguments for `fts check`.
59#[derive(clap::Args)]
60pub struct FtsCheckArgs {
61    /// No-op; JSON is always emitted on stdout.
62    #[arg(long, hide = true)]
63    pub json: bool,
64    /// Path to the SQLite database file.
65    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
66    pub db: Option<String>,
67}
68
69/// Arguments for `fts stats`.
70#[derive(clap::Args)]
71pub struct FtsStatsArgs {
72    /// No-op; JSON is always emitted on stdout.
73    #[arg(long, hide = true)]
74    pub json: bool,
75    /// Path to the SQLite database file.
76    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
77    pub db: Option<String>,
78}
79
80#[derive(Serialize)]
81struct FtsRebuildResponse {
82    action: String,
83    rows_indexed: i64,
84    elapsed_ms: u64,
85}
86
87#[derive(Serialize)]
88struct FtsCheckResponse {
89    action: String,
90    integrity_ok: bool,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    detail: Option<String>,
93    elapsed_ms: u64,
94}
95
96#[derive(Serialize)]
97struct FtsStatsResponse {
98    total_rows: i64,
99    #[serde(skip_serializing_if = "Option::is_none")]
100    shadow_pages: Option<i64>,
101    fts_functional: bool,
102    elapsed_ms: u64,
103}
104
105/// Dispatch entry point called from `main`.
106///
107/// # Errors
108/// Propagates any [`AppError`] raised by the underlying subcommand.
109pub fn run(args: FtsArgs) -> Result<(), AppError> {
110    match args.command {
111        FtsSubcommand::Rebuild(a) => run_rebuild(a),
112        FtsSubcommand::Check(a) => run_check(a),
113        FtsSubcommand::Stats(a) => run_stats(a),
114    }
115}
116
117/// Rebuilds the FTS5 index by issuing the `'rebuild'` special command.
118///
119/// The FTS5 `INSERT INTO fts_memories(fts_memories) VALUES('rebuild')` statement
120/// drops all index data and re-populates it from the content table in a single
121/// transaction. Use this after bulk imports or when `fts check` reports a failure.
122///
123/// # Errors
124/// Returns [`AppError::Database`] on any SQLite failure.
125fn run_rebuild(args: FtsRebuildArgs) -> Result<(), AppError> {
126    let start = std::time::Instant::now();
127    let paths = AppPaths::resolve(args.db.as_deref())?;
128    crate::storage::connection::ensure_db_ready(&paths)?;
129    let conn = open_rw(&paths.db)?;
130
131    conn.execute_batch("INSERT INTO fts_memories(fts_memories) VALUES('rebuild');")?;
132
133    let rows: i64 = conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))?;
134
135    output::emit_json(&FtsRebuildResponse {
136        action: "rebuilt".to_string(),
137        rows_indexed: rows,
138        elapsed_ms: start.elapsed().as_millis() as u64,
139    })?;
140
141    Ok(())
142}
143
144/// Runs the FTS5 integrity-check without modifying the index.
145///
146/// The FTS5 integrity-check is triggered by:
147/// ```sql
148/// INSERT INTO fts_memories(fts_memories, rank) VALUES('integrity-check', 1);
149/// ```
150/// SQLite raises an error if the index is corrupt, so a successful `execute_batch`
151/// means the index is healthy. On failure, `integrity_ok` is `false` and the
152/// `detail` field carries an actionable hint.
153///
154/// # Errors
155/// Returns [`AppError`] only on unexpected I/O or path resolution failures;
156/// an FTS5 corruption is reported as `integrity_ok: false`, not as a Rust error.
157fn run_check(args: FtsCheckArgs) -> Result<(), AppError> {
158    let start = std::time::Instant::now();
159    let paths = AppPaths::resolve(args.db.as_deref())?;
160    crate::storage::connection::ensure_db_ready(&paths)?;
161    let conn = open_rw(&paths.db)?;
162
163    let integrity_ok = conn
164        .execute_batch("INSERT INTO fts_memories(fts_memories, rank) VALUES('integrity-check', 1);")
165        .is_ok();
166
167    output::emit_json(&FtsCheckResponse {
168        action: "checked".to_string(),
169        integrity_ok,
170        detail: if integrity_ok {
171            None
172        } else {
173            Some("FTS5 integrity-check failed — run 'sqlite-graphrag fts rebuild'".to_string())
174        },
175        elapsed_ms: start.elapsed().as_millis() as u64,
176    })?;
177
178    Ok(())
179}
180
181/// Returns FTS5 index statistics: total indexed rows, shadow table page count (best-effort),
182/// and a functional liveness check.
183///
184/// # Errors
185/// Returns [`AppError`] only on unexpected I/O or path resolution failures.
186fn run_stats(args: FtsStatsArgs) -> Result<(), AppError> {
187    let start = std::time::Instant::now();
188    let paths = AppPaths::resolve(args.db.as_deref())?;
189    crate::storage::connection::ensure_db_ready(&paths)?;
190    let conn = open_ro(&paths.db)?;
191
192    // 1. Total indexed rows in the FTS5 content table.
193    let total_rows: i64 = conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))?;
194
195    // 2. Shadow pages — queries the internal `_data` shadow table.
196    //    This may not exist on all SQLite builds; treat any failure as None.
197    let shadow_pages: Option<i64> = conn
198        .query_row("SELECT COUNT(*) FROM fts_memories_data", [], |r| r.get(0))
199        .ok();
200
201    // 3. Functional liveness: SELECT with FTS5 match syntax against a wildcard.
202    //    A successful LIMIT 0 query confirms the FTS5 module is operational.
203    let fts_functional = conn
204        .execute_batch("SELECT * FROM fts_memories('*') LIMIT 0;")
205        .is_ok();
206
207    output::emit_json(&FtsStatsResponse {
208        total_rows,
209        shadow_pages,
210        fts_functional,
211        elapsed_ms: start.elapsed().as_millis() as u64,
212    })?;
213
214    Ok(())
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    #[test]
222    fn fts_rebuild_response_serializes_all_fields() {
223        let resp = FtsRebuildResponse {
224            action: "rebuilt".to_string(),
225            rows_indexed: 42,
226            elapsed_ms: 10,
227        };
228        let json = serde_json::to_value(&resp).expect("serialization failed");
229        assert_eq!(json["action"], "rebuilt");
230        assert_eq!(json["rows_indexed"], 42i64);
231        assert_eq!(json["elapsed_ms"], 10u64);
232    }
233
234    #[test]
235    fn fts_check_response_integrity_ok_omits_detail() {
236        let resp = FtsCheckResponse {
237            action: "checked".to_string(),
238            integrity_ok: true,
239            detail: None,
240            elapsed_ms: 5,
241        };
242        let json = serde_json::to_value(&resp).expect("serialization failed");
243        assert_eq!(json["action"], "checked");
244        assert_eq!(json["integrity_ok"], true);
245        assert!(
246            json.get("detail").is_none(),
247            "detail must be absent when integrity_ok is true"
248        );
249        assert_eq!(json["elapsed_ms"], 5u64);
250    }
251
252    #[test]
253    fn fts_check_response_corruption_includes_detail() {
254        let resp = FtsCheckResponse {
255            action: "checked".to_string(),
256            integrity_ok: false,
257            detail: Some(
258                "FTS5 integrity-check failed — run 'sqlite-graphrag fts rebuild'".to_string(),
259            ),
260            elapsed_ms: 3,
261        };
262        let json = serde_json::to_value(&resp).expect("serialization failed");
263        assert_eq!(json["integrity_ok"], false);
264        assert!(
265            json["detail"].as_str().unwrap().contains("fts rebuild"),
266            "detail must mention the remediation command"
267        );
268    }
269
270    #[test]
271    fn fts_rebuild_response_elapsed_ms_non_negative() {
272        let resp = FtsRebuildResponse {
273            action: "rebuilt".to_string(),
274            rows_indexed: 0,
275            elapsed_ms: 0,
276        };
277        let json = serde_json::to_value(&resp).expect("serialization failed");
278        assert!(json["elapsed_ms"].as_u64().is_some());
279    }
280
281    #[test]
282    fn fts_check_response_elapsed_ms_non_negative() {
283        let resp = FtsCheckResponse {
284            action: "checked".to_string(),
285            integrity_ok: true,
286            detail: None,
287            elapsed_ms: 0,
288        };
289        let json = serde_json::to_value(&resp).expect("serialization failed");
290        assert!(json["elapsed_ms"].as_u64().is_some());
291    }
292
293    #[test]
294    fn fts_stats_response_serializes_all_fields() {
295        let resp = FtsStatsResponse {
296            total_rows: 150,
297            shadow_pages: Some(12),
298            fts_functional: true,
299            elapsed_ms: 8,
300        };
301        let json = serde_json::to_value(&resp).expect("serialization failed");
302        assert_eq!(json["total_rows"], 150i64);
303        assert_eq!(json["shadow_pages"], 12i64);
304        assert_eq!(json["fts_functional"], true);
305        assert_eq!(json["elapsed_ms"], 8u64);
306    }
307
308    #[test]
309    fn fts_stats_response_omits_shadow_pages_when_none() {
310        let resp = FtsStatsResponse {
311            total_rows: 0,
312            shadow_pages: None,
313            fts_functional: false,
314            elapsed_ms: 2,
315        };
316        let json = serde_json::to_value(&resp).expect("serialization failed");
317        assert!(
318            json.get("shadow_pages").is_none(),
319            "shadow_pages must be absent when None"
320        );
321        assert_eq!(json["fts_functional"], false);
322    }
323
324    #[test]
325    fn fts_stats_response_fts_not_functional() {
326        let resp = FtsStatsResponse {
327            total_rows: 5,
328            shadow_pages: None,
329            fts_functional: false,
330            elapsed_ms: 1,
331        };
332        let json = serde_json::to_value(&resp).expect("serialization failed");
333        assert_eq!(json["fts_functional"], false);
334        assert_eq!(json["total_rows"], 5i64);
335    }
336}