Skip to main content

sqlite_graphrag/commands/
fts.rs

1//! Handler for the `fts` CLI subcommand family.
2//!
3//! Provides two maintenance operations for the FTS5 full-text search index:
4//! - `rebuild`: drops and reconstructs the index from the `memories` table.
5//! - `check`: runs the FTS5 integrity-check without modifying the index.
6
7use crate::errors::AppError;
8use crate::output;
9use crate::paths::AppPaths;
10use crate::storage::connection::{open_ro, open_rw};
11use serde::Serialize;
12
13/// Arguments for the `fts` subcommand family.
14#[derive(clap::Args)]
15#[command(about = "FTS5 full-text search index management")]
16pub struct FtsArgs {
17    #[command(subcommand)]
18    pub command: FtsSubcommand,
19}
20
21/// Subcommands nested under `fts`.
22#[derive(clap::Subcommand)]
23pub enum FtsSubcommand {
24    /// Rebuild the FTS5 index from the memories table.
25    #[command(after_long_help = "EXAMPLES:\n  \
26        # Rebuild the full-text search index\n  \
27        sqlite-graphrag fts rebuild\n\n  \
28        # Rebuild with custom database path\n  \
29        sqlite-graphrag fts rebuild --db /path/to/graphrag.sqlite")]
30    Rebuild(FtsRebuildArgs),
31    /// Run FTS5 integrity-check without modifying the index.
32    #[command(after_long_help = "EXAMPLES:\n  \
33        # Check FTS5 index integrity\n  \
34        sqlite-graphrag fts check\n\n  \
35        # Check with custom database path\n  \
36        sqlite-graphrag fts check --db /path/to/graphrag.sqlite")]
37    Check(FtsCheckArgs),
38    /// Show FTS5 index statistics (row count, shadow pages, functional status).
39    #[command(after_long_help = "EXAMPLES:\n  \
40        # Show FTS5 index statistics\n  \
41        sqlite-graphrag fts stats\n\n  \
42        # Stats with custom database path\n  \
43        sqlite-graphrag fts stats --db /path/to/graphrag.sqlite")]
44    Stats(FtsStatsArgs),
45}
46
47/// Arguments for `fts rebuild`.
48#[derive(clap::Args)]
49pub struct FtsRebuildArgs {
50    /// No-op; JSON is always emitted on stdout.
51    #[arg(long, hide = true)]
52    pub json: bool,
53    /// Path to the SQLite database file.
54    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
55    pub db: Option<String>,
56}
57
58/// Arguments for `fts check`.
59#[derive(clap::Args)]
60pub struct FtsCheckArgs {
61    /// No-op; JSON is always emitted on stdout.
62    #[arg(long, hide = true)]
63    pub json: bool,
64    /// Path to the SQLite database file.
65    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
66    pub db: Option<String>,
67}
68
69/// Arguments for `fts stats`.
70#[derive(clap::Args)]
71pub struct FtsStatsArgs {
72    /// No-op; JSON is always emitted on stdout.
73    #[arg(long, hide = true)]
74    pub json: bool,
75    /// Path to the SQLite database file.
76    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
77    pub db: Option<String>,
78}
79
80#[derive(Serialize)]
81struct FtsRebuildResponse {
82    action: String,
83    rows_indexed: i64,
84    elapsed_ms: u64,
85}
86
87#[derive(Serialize)]
88struct FtsCheckResponse {
89    action: String,
90    integrity_ok: bool,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    detail: Option<String>,
93    elapsed_ms: u64,
94}
95
96#[derive(Serialize)]
97struct FtsStatsResponse {
98    total_rows: i64,
99    #[serde(skip_serializing_if = "Option::is_none")]
100    shadow_pages: Option<i64>,
101    fts_functional: bool,
102    elapsed_ms: u64,
103}
104
105/// Dispatch entry point called from `main`.
106///
107/// # Errors
108/// Propagates any [`AppError`] raised by the underlying subcommand.
109pub fn run(args: FtsArgs) -> Result<(), AppError> {
110    match args.command {
111        FtsSubcommand::Rebuild(a) => run_rebuild(a),
112        FtsSubcommand::Check(a) => run_check(a),
113        FtsSubcommand::Stats(a) => run_stats(a),
114    }
115}
116
117/// Rebuilds the FTS5 index by issuing the `'rebuild'` special command.
118///
119/// The FTS5 `INSERT INTO fts_memories(fts_memories) VALUES('rebuild')` statement
120/// drops all index data and re-populates it from the content table in a single
121/// transaction. Use this after bulk imports or when `fts check` reports a failure.
122///
123/// # Errors
124/// Returns [`AppError::Database`] on any SQLite failure.
125fn run_rebuild(args: FtsRebuildArgs) -> Result<(), AppError> {
126    let start = std::time::Instant::now();
127    let paths = AppPaths::resolve(args.db.as_deref())?;
128    crate::storage::connection::ensure_db_ready(&paths)?;
129    let conn = open_rw(&paths.db)?;
130
131    let table_exists: bool = conn.query_row(
132        "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='fts_memories'",
133        [],
134        |r| r.get::<_, i64>(0).map(|v| v > 0),
135    )?;
136    if !table_exists {
137        return Err(AppError::Validation(
138            "FTS5 table 'fts_memories' does not exist — run 'sqlite-graphrag init' first"
139                .to_string(),
140        ));
141    }
142
143    conn.execute_batch("INSERT INTO fts_memories(fts_memories) VALUES('rebuild');")?;
144
145    let rows: i64 = conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))?;
146
147    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
148
149    output::emit_json(&FtsRebuildResponse {
150        action: "rebuilt".to_string(),
151        rows_indexed: rows,
152        elapsed_ms: start.elapsed().as_millis() as u64,
153    })?;
154
155    Ok(())
156}
157
158/// Runs the FTS5 integrity-check without modifying the index.
159///
160/// The FTS5 integrity-check is triggered by:
161/// ```sql
162/// INSERT INTO fts_memories(fts_memories, rank) VALUES('integrity-check', 1);
163/// ```
164/// SQLite raises an error if the index is corrupt, so a successful `execute_batch`
165/// means the index is healthy. On failure, `integrity_ok` is `false` and the
166/// `detail` field carries an actionable hint.
167///
168/// # Errors
169/// Returns [`AppError`] only on unexpected I/O or path resolution failures;
170/// an FTS5 corruption is reported as `integrity_ok: false`, not as a Rust error.
171fn run_check(args: FtsCheckArgs) -> Result<(), AppError> {
172    let start = std::time::Instant::now();
173    let paths = AppPaths::resolve(args.db.as_deref())?;
174    crate::storage::connection::ensure_db_ready(&paths)?;
175    let conn = open_rw(&paths.db)?;
176
177    let integrity_ok = conn
178        .execute_batch("INSERT INTO fts_memories(fts_memories, rank) VALUES('integrity-check', 1);")
179        .is_ok();
180
181    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);").ok();
182
183    output::emit_json(&FtsCheckResponse {
184        action: "checked".to_string(),
185        integrity_ok,
186        detail: if integrity_ok {
187            None
188        } else {
189            Some("FTS5 integrity-check failed — run 'sqlite-graphrag fts rebuild'".to_string())
190        },
191        elapsed_ms: start.elapsed().as_millis() as u64,
192    })?;
193
194    Ok(())
195}
196
197/// Returns FTS5 index statistics: total indexed rows, shadow table page count (best-effort),
198/// and a functional liveness check.
199///
200/// # Errors
201/// Returns [`AppError`] only on unexpected I/O or path resolution failures.
202fn run_stats(args: FtsStatsArgs) -> Result<(), AppError> {
203    let start = std::time::Instant::now();
204    let paths = AppPaths::resolve(args.db.as_deref())?;
205    crate::storage::connection::ensure_db_ready(&paths)?;
206    let conn = open_ro(&paths.db)?;
207
208    // 1. Total indexed rows in the FTS5 content table.
209    let total_rows: i64 = conn.query_row("SELECT COUNT(*) FROM fts_memories", [], |r| r.get(0))?;
210
211    // 2. Shadow pages — queries the internal `_data` shadow table.
212    //    This may not exist on all SQLite builds; treat any failure as None.
213    let shadow_pages: Option<i64> = conn
214        .query_row("SELECT COUNT(*) FROM fts_memories_data", [], |r| r.get(0))
215        .ok();
216
217    // 3. Functional liveness: SELECT with FTS5 match syntax against a wildcard.
218    //    A successful LIMIT 0 query confirms the FTS5 module is operational.
219    let fts_functional = conn
220        .execute_batch("SELECT * FROM fts_memories('*') LIMIT 0;")
221        .is_ok();
222
223    output::emit_json(&FtsStatsResponse {
224        total_rows,
225        shadow_pages,
226        fts_functional,
227        elapsed_ms: start.elapsed().as_millis() as u64,
228    })?;
229
230    Ok(())
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn fts_rebuild_response_serializes_all_fields() {
239        let resp = FtsRebuildResponse {
240            action: "rebuilt".to_string(),
241            rows_indexed: 42,
242            elapsed_ms: 10,
243        };
244        let json = serde_json::to_value(&resp).expect("serialization failed");
245        assert_eq!(json["action"], "rebuilt");
246        assert_eq!(json["rows_indexed"], 42i64);
247        assert_eq!(json["elapsed_ms"], 10u64);
248    }
249
250    #[test]
251    fn fts_check_response_integrity_ok_omits_detail() {
252        let resp = FtsCheckResponse {
253            action: "checked".to_string(),
254            integrity_ok: true,
255            detail: None,
256            elapsed_ms: 5,
257        };
258        let json = serde_json::to_value(&resp).expect("serialization failed");
259        assert_eq!(json["action"], "checked");
260        assert_eq!(json["integrity_ok"], true);
261        assert!(
262            json.get("detail").is_none(),
263            "detail must be absent when integrity_ok is true"
264        );
265        assert_eq!(json["elapsed_ms"], 5u64);
266    }
267
268    #[test]
269    fn fts_check_response_corruption_includes_detail() {
270        let resp = FtsCheckResponse {
271            action: "checked".to_string(),
272            integrity_ok: false,
273            detail: Some(
274                "FTS5 integrity-check failed — run 'sqlite-graphrag fts rebuild'".to_string(),
275            ),
276            elapsed_ms: 3,
277        };
278        let json = serde_json::to_value(&resp).expect("serialization failed");
279        assert_eq!(json["integrity_ok"], false);
280        assert!(
281            json["detail"].as_str().unwrap().contains("fts rebuild"),
282            "detail must mention the remediation command"
283        );
284    }
285
286    #[test]
287    fn fts_rebuild_response_elapsed_ms_non_negative() {
288        let resp = FtsRebuildResponse {
289            action: "rebuilt".to_string(),
290            rows_indexed: 0,
291            elapsed_ms: 0,
292        };
293        let json = serde_json::to_value(&resp).expect("serialization failed");
294        assert!(json["elapsed_ms"].as_u64().is_some());
295    }
296
297    #[test]
298    fn fts_check_response_elapsed_ms_non_negative() {
299        let resp = FtsCheckResponse {
300            action: "checked".to_string(),
301            integrity_ok: true,
302            detail: None,
303            elapsed_ms: 0,
304        };
305        let json = serde_json::to_value(&resp).expect("serialization failed");
306        assert!(json["elapsed_ms"].as_u64().is_some());
307    }
308
309    #[test]
310    fn fts_stats_response_serializes_all_fields() {
311        let resp = FtsStatsResponse {
312            total_rows: 150,
313            shadow_pages: Some(12),
314            fts_functional: true,
315            elapsed_ms: 8,
316        };
317        let json = serde_json::to_value(&resp).expect("serialization failed");
318        assert_eq!(json["total_rows"], 150i64);
319        assert_eq!(json["shadow_pages"], 12i64);
320        assert_eq!(json["fts_functional"], true);
321        assert_eq!(json["elapsed_ms"], 8u64);
322    }
323
324    #[test]
325    fn fts_stats_response_omits_shadow_pages_when_none() {
326        let resp = FtsStatsResponse {
327            total_rows: 0,
328            shadow_pages: None,
329            fts_functional: false,
330            elapsed_ms: 2,
331        };
332        let json = serde_json::to_value(&resp).expect("serialization failed");
333        assert!(
334            json.get("shadow_pages").is_none(),
335            "shadow_pages must be absent when None"
336        );
337        assert_eq!(json["fts_functional"], false);
338    }
339
340    #[test]
341    fn fts_stats_response_fts_not_functional() {
342        let resp = FtsStatsResponse {
343            total_rows: 5,
344            shadow_pages: None,
345            fts_functional: false,
346            elapsed_ms: 1,
347        };
348        let json = serde_json::to_value(&resp).expect("serialization failed");
349        assert_eq!(json["fts_functional"], false);
350        assert_eq!(json["total_rows"], 5i64);
351    }
352}