Skip to main content

sqlite_graphrag/commands/
fts.rs

1//! Handler for the `fts` CLI subcommand family.
2//!
3//! Provides two maintenance operations for the FTS5 full-text search index:
4//! - `rebuild`: drops and reconstructs the index from the `memories` table.
5//! - `check`: runs the FTS5 integrity-check without modifying the index.
6
7use crate::errors::AppError;
8use crate::output;
9use crate::paths::AppPaths;
10use crate::storage::connection::{open_ro, open_rw};
11use serde::Serialize;
12
13/// Arguments for the `fts` subcommand family.
14#[derive(clap::Args)]
15#[command(
16    about = "FTS5 full-text search index management",
17    after_long_help = "EXAMPLES:\n  \
18        # Rebuild the full-text search index from memories table\n  \
19        sqlite-graphrag fts rebuild\n\n  \
20        # Check FTS5 index integrity\n  \
21        sqlite-graphrag fts check --json\n\n  \
22        # Show FTS5 index statistics\n  \
23        sqlite-graphrag fts stats --json"
24)]
25pub struct FtsArgs {
26    #[command(subcommand)]
27    pub command: FtsSubcommand,
28}
29
30/// Subcommands nested under `fts`.
31#[derive(clap::Subcommand)]
32pub enum FtsSubcommand {
33    /// Rebuild the FTS5 index from the memories table.
34    #[command(after_long_help = "EXAMPLES:\n  \
35        # Rebuild the full-text search index\n  \
36        sqlite-graphrag fts rebuild\n\n  \
37        # Rebuild with custom database path\n  \
38        sqlite-graphrag fts rebuild --db /path/to/graphrag.sqlite")]
39    Rebuild(FtsRebuildArgs),
40    /// Run FTS5 integrity-check without modifying the index.
41    #[command(after_long_help = "EXAMPLES:\n  \
42        # Check FTS5 index integrity\n  \
43        sqlite-graphrag fts check\n\n  \
44        # Check with custom database path\n  \
45        sqlite-graphrag fts check --db /path/to/graphrag.sqlite")]
46    Check(FtsCheckArgs),
47    /// Show FTS5 index statistics (row count, shadow pages, functional status).
48    #[command(after_long_help = "EXAMPLES:\n  \
49        # Show FTS5 index statistics\n  \
50        sqlite-graphrag fts stats\n\n  \
51        # Stats with custom database path\n  \
52        sqlite-graphrag fts stats --db /path/to/graphrag.sqlite")]
53    Stats(FtsStatsArgs),
54}
55
56/// Arguments for `fts rebuild`.
57#[derive(clap::Args)]
58pub struct FtsRebuildArgs {
59    /// No-op; JSON is always emitted on stdout.
60    #[arg(long, hide = true)]
61    pub json: bool,
62    /// Path to the SQLite database file.
63    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
64    pub db: Option<String>,
65}
66
67/// Arguments for `fts check`.
68#[derive(clap::Args)]
69pub struct FtsCheckArgs {
70    /// No-op; JSON is always emitted on stdout.
71    #[arg(long, hide = true)]
72    pub json: bool,
73    /// Path to the SQLite database file.
74    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
75    pub db: Option<String>,
76}
77
78/// Arguments for `fts stats`.
79#[derive(clap::Args)]
80pub struct FtsStatsArgs {
81    /// No-op; JSON is always emitted on stdout.
82    #[arg(long, hide = true)]
83    pub json: bool,
84    /// Path to the SQLite database file.
85    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
86    pub db: Option<String>,
87}
88
89#[derive(Serialize)]
90struct FtsRebuildResponse {
91    action: String,
92    rows_indexed: i64,
93    elapsed_ms: u64,
94}
95
96#[derive(Serialize)]
97struct FtsCheckResponse {
98    action: String,
99    integrity_ok: bool,
100    #[serde(skip_serializing_if = "Option::is_none")]
101    detail: Option<String>,
102    elapsed_ms: u64,
103}
104
105#[derive(Serialize)]
106struct FtsStatsResponse {
107    total_rows: i64,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    shadow_pages: Option<i64>,
110    fts_functional: bool,
111    elapsed_ms: u64,
112}
113
114/// Dispatch entry point called from `main`.
115///
116/// # Errors
117/// Propagates any [`AppError`] raised by the underlying subcommand.
118pub fn run(args: FtsArgs) -> Result<(), AppError> {
119    match args.command {
120        FtsSubcommand::Rebuild(a) => run_rebuild(a),
121        FtsSubcommand::Check(a) => run_check(a),
122        FtsSubcommand::Stats(a) => run_stats(a),
123    }
124}
125
126/// Rebuilds the FTS5 index by issuing the `'rebuild'` special command.
127///
128/// The FTS5 `INSERT INTO fts_memories(fts_memories) VALUES('rebuild')` statement
129/// drops all index data and re-populates it from the content table in a single
130/// transaction. Use this after bulk imports or when `fts check` reports a failure.
131///
132/// # Errors
133/// Returns [`AppError::Database`] on any SQLite failure.
134fn run_rebuild(args: FtsRebuildArgs) -> Result<(), AppError> {
135    let start = std::time::Instant::now();
136    let paths = AppPaths::resolve(args.db.as_deref())?;
137    crate::storage::connection::ensure_db_ready(&paths)?;
138    let conn = open_rw(&paths.db)?;
139
140    let table_exists: bool = conn.query_row(
141        "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='fts_memories'",
142        [],
143        |r| r.get::<_, i64>(0).map(|v| v > 0),
144    )?;
145    if !table_exists {
146        return Err(AppError::Validation(
147            "FTS5 table 'fts_memories' does not exist — run 'sqlite-graphrag init' first"
148                .to_string(),
149        ));
150    }
151
152    conn.execute_batch("INSERT INTO fts_memories(fts_memories) VALUES('rebuild');")?;
153
154    let rows: i64 = conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))?;
155
156    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
157
158    output::emit_json(&FtsRebuildResponse {
159        action: "rebuilt".to_string(),
160        rows_indexed: rows,
161        elapsed_ms: start.elapsed().as_millis() as u64,
162    })?;
163
164    Ok(())
165}
166
167/// Runs the FTS5 integrity-check without modifying the index.
168///
169/// The FTS5 integrity-check is triggered by:
170/// ```sql
171/// INSERT INTO fts_memories(fts_memories, rank) VALUES('integrity-check', 1);
172/// ```
173/// SQLite raises an error if the index is corrupt, so a successful `execute_batch`
174/// means the index is healthy. On failure, `integrity_ok` is `false` and the
175/// `detail` field carries an actionable hint.
176///
177/// # Errors
178/// Returns [`AppError`] only on unexpected I/O or path resolution failures;
179/// an FTS5 corruption is reported as `integrity_ok: false`, not as a Rust error.
180fn run_check(args: FtsCheckArgs) -> Result<(), AppError> {
181    let start = std::time::Instant::now();
182    let paths = AppPaths::resolve(args.db.as_deref())?;
183    crate::storage::connection::ensure_db_ready(&paths)?;
184    let conn = open_rw(&paths.db)?;
185
186    let integrity_ok = conn
187        .execute_batch("INSERT INTO fts_memories(fts_memories, rank) VALUES('integrity-check', 1);")
188        .is_ok();
189
190    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);").ok();
191
192    output::emit_json(&FtsCheckResponse {
193        action: "checked".to_string(),
194        integrity_ok,
195        detail: if integrity_ok {
196            None
197        } else {
198            Some("FTS5 integrity-check failed — run 'sqlite-graphrag fts rebuild'".to_string())
199        },
200        elapsed_ms: start.elapsed().as_millis() as u64,
201    })?;
202
203    Ok(())
204}
205
206/// Returns FTS5 index statistics: total indexed rows, shadow table page count (best-effort),
207/// and a functional liveness check.
208///
209/// # Errors
210/// Returns [`AppError`] only on unexpected I/O or path resolution failures.
211fn run_stats(args: FtsStatsArgs) -> Result<(), AppError> {
212    let start = std::time::Instant::now();
213    let paths = AppPaths::resolve(args.db.as_deref())?;
214    crate::storage::connection::ensure_db_ready(&paths)?;
215    let conn = open_ro(&paths.db)?;
216
217    // 1. Total indexed rows in the FTS5 content table.
218    let total_rows: i64 = conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))?;
219
220    // 2. Shadow pages — queries the internal `_data` shadow table.
221    //    This may not exist on all SQLite builds; treat any failure as None.
222    let shadow_pages: Option<i64> = conn
223        .query_row("SELECT COUNT(*) FROM fts_memories_data", [], |r| r.get(0))
224        .ok();
225
226    // 3. Functional liveness: SELECT with FTS5 match syntax against a wildcard.
227    //    A successful LIMIT 0 query confirms the FTS5 module is operational.
228    let fts_functional = conn
229        .execute_batch("SELECT * FROM fts_memories('*') LIMIT 0;")
230        .is_ok();
231
232    output::emit_json(&FtsStatsResponse {
233        total_rows,
234        shadow_pages,
235        fts_functional,
236        elapsed_ms: start.elapsed().as_millis() as u64,
237    })?;
238
239    Ok(())
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn fts_rebuild_response_serializes_all_fields() {
248        let resp = FtsRebuildResponse {
249            action: "rebuilt".to_string(),
250            rows_indexed: 42,
251            elapsed_ms: 10,
252        };
253        let json = serde_json::to_value(&resp).expect("serialization failed");
254        assert_eq!(json["action"], "rebuilt");
255        assert_eq!(json["rows_indexed"], 42i64);
256        assert_eq!(json["elapsed_ms"], 10u64);
257    }
258
259    #[test]
260    fn fts_check_response_integrity_ok_omits_detail() {
261        let resp = FtsCheckResponse {
262            action: "checked".to_string(),
263            integrity_ok: true,
264            detail: None,
265            elapsed_ms: 5,
266        };
267        let json = serde_json::to_value(&resp).expect("serialization failed");
268        assert_eq!(json["action"], "checked");
269        assert_eq!(json["integrity_ok"], true);
270        assert!(
271            json.get("detail").is_none(),
272            "detail must be absent when integrity_ok is true"
273        );
274        assert_eq!(json["elapsed_ms"], 5u64);
275    }
276
277    #[test]
278    fn fts_check_response_corruption_includes_detail() {
279        let resp = FtsCheckResponse {
280            action: "checked".to_string(),
281            integrity_ok: false,
282            detail: Some(
283                "FTS5 integrity-check failed — run 'sqlite-graphrag fts rebuild'".to_string(),
284            ),
285            elapsed_ms: 3,
286        };
287        let json = serde_json::to_value(&resp).expect("serialization failed");
288        assert_eq!(json["integrity_ok"], false);
289        assert!(
290            json["detail"].as_str().unwrap().contains("fts rebuild"),
291            "detail must mention the remediation command"
292        );
293    }
294
295    #[test]
296    fn fts_rebuild_response_elapsed_ms_non_negative() {
297        let resp = FtsRebuildResponse {
298            action: "rebuilt".to_string(),
299            rows_indexed: 0,
300            elapsed_ms: 0,
301        };
302        let json = serde_json::to_value(&resp).expect("serialization failed");
303        assert!(json["elapsed_ms"].as_u64().is_some());
304    }
305
306    #[test]
307    fn fts_check_response_elapsed_ms_non_negative() {
308        let resp = FtsCheckResponse {
309            action: "checked".to_string(),
310            integrity_ok: true,
311            detail: None,
312            elapsed_ms: 0,
313        };
314        let json = serde_json::to_value(&resp).expect("serialization failed");
315        assert!(json["elapsed_ms"].as_u64().is_some());
316    }
317
318    #[test]
319    fn fts_stats_response_serializes_all_fields() {
320        let resp = FtsStatsResponse {
321            total_rows: 150,
322            shadow_pages: Some(12),
323            fts_functional: true,
324            elapsed_ms: 8,
325        };
326        let json = serde_json::to_value(&resp).expect("serialization failed");
327        assert_eq!(json["total_rows"], 150i64);
328        assert_eq!(json["shadow_pages"], 12i64);
329        assert_eq!(json["fts_functional"], true);
330        assert_eq!(json["elapsed_ms"], 8u64);
331    }
332
333    #[test]
334    fn fts_stats_response_omits_shadow_pages_when_none() {
335        let resp = FtsStatsResponse {
336            total_rows: 0,
337            shadow_pages: None,
338            fts_functional: false,
339            elapsed_ms: 2,
340        };
341        let json = serde_json::to_value(&resp).expect("serialization failed");
342        assert!(
343            json.get("shadow_pages").is_none(),
344            "shadow_pages must be absent when None"
345        );
346        assert_eq!(json["fts_functional"], false);
347    }
348
349    #[test]
350    fn fts_stats_response_fts_not_functional() {
351        let resp = FtsStatsResponse {
352            total_rows: 5,
353            shadow_pages: None,
354            fts_functional: false,
355            elapsed_ms: 1,
356        };
357        let json = serde_json::to_value(&resp).expect("serialization failed");
358        assert_eq!(json["fts_functional"], false);
359        assert_eq!(json["total_rows"], 5i64);
360    }
361}