Skip to main content

bones_core/db/
fts.rs

1//! FTS5 full-text search with BM25 ranking.
2//!
3//! This module provides search helpers on top of the `items_fts` FTS5 virtual
4//! table defined in [`super::schema`]. The FTS5 table is automatically kept
5//! in sync with the `items` table via INSERT/UPDATE/DELETE triggers.
6//!
7//! # Column Weights (BM25)
8//!
9//! | Column      | Weight | Rationale                                  |
10//! |-------------|--------|--------------------------------------------|
11//! | title       | 3.0    | Most specific, short, high signal           |
12//! | description | 2.0    | Detailed context, moderate signal           |
13//! | labels      | 1.0    | Namespace tags, low cardinality             |
14//!
15//! # Tokenizer
16//!
17//! Porter stemmer + `unicode61` tokenizer with prefix indexes on 2 and 3
18//! characters. This supports:
19//! - **Stemming**: searching "running" matches "run", "runs", "runner"
20//! - **Prefix search**: "auth*" matches "authentication", "authorize"
21//! - **Unicode**: full Unicode word-breaking
22//!
23//! # Performance
24//!
25//! Sub-1ms query time at Tier S (≤1k items). FTS5 lookups are O(log N) via
26//! the b-tree index and prefix tables.
27
28use anyhow::{Context, Result};
29use rusqlite::{Connection, params};
30
31use super::query::SearchHit;
32
33/// Default BM25 column weights: title=3, description=2, labels=1.
34pub const BM25_WEIGHT_TITLE: f64 = 3.0;
35pub const BM25_WEIGHT_DESCRIPTION: f64 = 2.0;
36pub const BM25_WEIGHT_LABELS: f64 = 1.0;
37
38/// Search the FTS5 index with BM25 ranking and column weights.
39///
40/// This is the primary search entry point for the `bn search` command.
41/// It joins FTS5 results with the `items` table to exclude soft-deleted
42/// items and return full titles.
43///
44/// # Arguments
45///
46/// * `conn` — `SQLite` connection with the projection database open
47/// * `query` — FTS5 query string (supports stemming, prefix `*`, boolean ops)
48/// * `limit` — Maximum number of results
49///
50/// # BM25 Ranking
51///
52/// Results are sorted by BM25 relevance score (lower = better match).
53/// Column weights: title 3×, description 2×, labels 1×.
54///
55/// # Errors
56///
57/// Returns an error if the FTS5 query is malformed or the database is
58/// not properly initialized.
59pub fn search_bm25(conn: &Connection, query: &str, limit: u32) -> Result<Vec<SearchHit>> {
60    let sql = "SELECT f.item_id, i.title, bm25(items_fts, ?1, ?2, ?3) AS rank \
61               FROM items_fts f \
62               INNER JOIN items i ON i.item_id = f.item_id \
63               WHERE items_fts MATCH ?4 AND i.is_deleted = 0 \
64               ORDER BY rank \
65               LIMIT ?5";
66
67    let mut stmt = conn
68        .prepare(sql)
69        .context("prepare FTS5 BM25 search query")?;
70
71    let rows = stmt
72        .query_map(
73            params![
74                BM25_WEIGHT_TITLE,
75                BM25_WEIGHT_DESCRIPTION,
76                BM25_WEIGHT_LABELS,
77                query,
78                limit,
79            ],
80            |row| {
81                Ok(SearchHit {
82                    item_id: row.get(0)?,
83                    title: row.get(1)?,
84                    rank: row.get(2)?,
85                })
86            },
87        )
88        .with_context(|| format!("execute FTS5 search for '{query}'"))?;
89
90    let mut hits = Vec::new();
91    for row in rows {
92        hits.push(row.context("read FTS5 search hit")?);
93    }
94    Ok(hits)
95}
96
97/// Rebuild the FTS5 index from the current `items` table.
98///
99/// This drops and recreates all FTS5 index content. Useful after a full
100/// projection rebuild or when the FTS index is suspected to be out of sync.
101///
102/// # Errors
103///
104/// Returns an error if the rebuild SQL fails.
105pub fn rebuild_fts_index(conn: &Connection) -> Result<()> {
106    conn.execute_batch(
107        "DELETE FROM items_fts;
108         INSERT INTO items_fts(rowid, title, description, labels, item_id)
109         SELECT rowid, title, COALESCE(description, ''), COALESCE(search_labels, ''), item_id
110         FROM items;",
111    )
112    .context("rebuild FTS5 index from items table")?;
113    Ok(())
114}
115
116/// Return the number of rows in the FTS5 index.
117///
118/// Useful for diagnostics and health checks.
119///
120/// # Errors
121///
122/// Returns an error if the query fails.
123pub fn fts_row_count(conn: &Connection) -> Result<u64> {
124    let count: i64 = conn
125        .query_row("SELECT COUNT(*) FROM items_fts", [], |row| row.get(0))
126        .context("count FTS5 rows")?;
127    Ok(u64::try_from(count).unwrap_or(0))
128}
129
130/// Validate that the FTS5 index is in sync with the `items` table.
131///
132/// Returns `true` if the row counts match (excluding deleted items).
133///
134/// # Errors
135///
136/// Returns an error if the query fails.
137pub fn fts_in_sync(conn: &Connection) -> Result<bool> {
138    let items_count: i64 = conn
139        .query_row(
140            "SELECT COUNT(*) FROM items WHERE is_deleted = 0",
141            [],
142            |row| row.get(0),
143        )
144        .context("count active items")?;
145
146    let fts_count: i64 = conn
147        .query_row("SELECT COUNT(*) FROM items_fts", [], |row| row.get(0))
148        .context("count FTS5 rows")?;
149
150    // FTS includes deleted items until triggers fire on DELETE,
151    // but triggers fire on UPDATE too, so soft-deleted items stay in FTS.
152    // The items table has triggers that update FTS on every INSERT/UPDATE/DELETE,
153    // so the FTS count matches the total items count (including deleted).
154    let total_items: i64 = conn
155        .query_row("SELECT COUNT(*) FROM items", [], |row| row.get(0))
156        .context("count total items")?;
157
158    Ok(fts_count == total_items || fts_count == items_count)
159}
160
161// ---------------------------------------------------------------------------
162// Tests
163// ---------------------------------------------------------------------------
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168    use crate::db::migrations;
169    use crate::db::project::{Projector, ensure_tracking_table};
170    use crate::event::data::*;
171    use crate::event::types::EventType;
172    use crate::event::{Event, EventData};
173    use crate::model::item::{Kind, Size, Urgency};
174    use crate::model::item_id::ItemId;
175    use std::collections::BTreeMap;
176
177    fn test_db() -> Connection {
178        let mut conn = Connection::open_in_memory().expect("open in-memory db");
179        migrations::migrate(&mut conn).expect("migrate");
180        ensure_tracking_table(&conn).expect("create tracking table");
181        conn
182    }
183
184    fn make_create(
185        id: &str,
186        title: &str,
187        desc: Option<&str>,
188        labels: &[&str],
189        hash: &str,
190    ) -> Event {
191        Event {
192            wall_ts_us: 1000,
193            agent: "test-agent".into(),
194            itc: "itc:AQ".into(),
195            parents: vec![],
196            event_type: EventType::Create,
197            item_id: ItemId::new_unchecked(id),
198            data: EventData::Create(CreateData {
199                title: title.into(),
200                kind: Kind::Task,
201                size: Some(Size::M),
202                urgency: Urgency::Default,
203                labels: labels.iter().map(|s| s.to_string()).collect(),
204                parent: None,
205                causation: None,
206                description: desc.map(String::from),
207                extra: BTreeMap::new(),
208            }),
209            event_hash: format!("blake3:{hash}"),
210        }
211    }
212
213    #[test]
214    fn search_bm25_finds_by_title() {
215        let conn = test_db();
216        let proj = Projector::new(&conn);
217        proj.project_event(&make_create(
218            "bn-001",
219            "Authentication timeout regression",
220            Some("Retries fail after 30 seconds"),
221            &["auth", "backend"],
222            "h1",
223        ))
224        .unwrap();
225        proj.project_event(&make_create(
226            "bn-002",
227            "Update documentation",
228            Some("Fix typos in README"),
229            &["docs"],
230            "h2",
231        ))
232        .unwrap();
233
234        let hits = search_bm25(&conn, "authentication", 10).unwrap();
235        assert_eq!(hits.len(), 1);
236        assert_eq!(hits[0].item_id, "bn-001");
237    }
238
239    #[test]
240    fn search_bm25_stemming() {
241        let conn = test_db();
242        let proj = Projector::new(&conn);
243        proj.project_event(&make_create(
244            "bn-001",
245            "Running tests slowly",
246            None,
247            &[],
248            "h1",
249        ))
250        .unwrap();
251
252        // Porter stemmer: "run" matches "running"
253        let hits = search_bm25(&conn, "run", 10).unwrap();
254        assert_eq!(hits.len(), 1);
255    }
256
257    #[test]
258    fn search_bm25_prefix() {
259        let conn = test_db();
260        let proj = Projector::new(&conn);
261        proj.project_event(&make_create(
262            "bn-001",
263            "Authentication service broken",
264            None,
265            &[],
266            "h1",
267        ))
268        .unwrap();
269
270        let hits = search_bm25(&conn, "auth*", 10).unwrap();
271        assert_eq!(hits.len(), 1);
272    }
273
274    #[test]
275    fn search_bm25_excludes_deleted() {
276        let conn = test_db();
277        let proj = Projector::new(&conn);
278        proj.project_event(&make_create(
279            "bn-001",
280            "Important auth bug",
281            None,
282            &[],
283            "h1",
284        ))
285        .unwrap();
286
287        // Soft-delete
288        proj.project_event(&Event {
289            wall_ts_us: 2000,
290            agent: "test-agent".into(),
291            itc: "itc:AQ".into(),
292            parents: vec![],
293            event_type: EventType::Delete,
294            item_id: ItemId::new_unchecked("bn-001"),
295            data: EventData::Delete(DeleteData {
296                reason: None,
297                extra: BTreeMap::new(),
298            }),
299            event_hash: "blake3:del1".into(),
300        })
301        .unwrap();
302
303        let hits = search_bm25(&conn, "auth", 10).unwrap();
304        assert!(hits.is_empty());
305    }
306
307    #[test]
308    fn search_bm25_title_weighted_higher() {
309        let conn = test_db();
310        let proj = Projector::new(&conn);
311
312        // Item with "auth" in title
313        proj.project_event(&make_create(
314            "bn-title",
315            "Authentication regression",
316            Some("A minor bug"),
317            &[],
318            "h1",
319        ))
320        .unwrap();
321
322        // Item with "auth" only in description
323        proj.project_event(&make_create(
324            "bn-desc",
325            "Minor bug fix",
326            Some("Related to authentication module"),
327            &[],
328            "h2",
329        ))
330        .unwrap();
331
332        let hits = search_bm25(&conn, "authentication", 10).unwrap();
333        assert_eq!(hits.len(), 2);
334        // Title match should rank better (lower BM25 score)
335        assert_eq!(hits[0].item_id, "bn-title");
336    }
337
338    #[test]
339    fn search_bm25_label_match() {
340        let conn = test_db();
341        let proj = Projector::new(&conn);
342        proj.project_event(&make_create(
343            "bn-001",
344            "Fix something",
345            None,
346            &["backend", "security"],
347            "h1",
348        ))
349        .unwrap();
350
351        let hits = search_bm25(&conn, "security", 10).unwrap();
352        assert_eq!(hits.len(), 1);
353    }
354
355    #[test]
356    fn search_bm25_limit() {
357        let conn = test_db();
358        let proj = Projector::new(&conn);
359        for i in 0..20_u32 {
360            proj.project_event(&make_create(
361                &format!("bn-{i:03}"),
362                &format!("Authentication bug {i}"),
363                None,
364                &[],
365                &format!("h{i}"),
366            ))
367            .unwrap();
368        }
369
370        let hits = search_bm25(&conn, "authentication", 5).unwrap();
371        assert_eq!(hits.len(), 5);
372    }
373
374    #[test]
375    fn rebuild_fts_index_restores_data() {
376        let conn = test_db();
377        let proj = Projector::new(&conn);
378        proj.project_event(&make_create("bn-001", "Auth bug", None, &[], "h1"))
379            .unwrap();
380
381        // Manually corrupt FTS
382        conn.execute_batch("DELETE FROM items_fts").unwrap();
383        let hits_before = search_bm25(&conn, "auth", 10).unwrap();
384        assert!(hits_before.is_empty());
385
386        // Rebuild
387        rebuild_fts_index(&conn).unwrap();
388        let hits_after = search_bm25(&conn, "auth", 10).unwrap();
389        assert_eq!(hits_after.len(), 1);
390    }
391
392    #[test]
393    fn fts_row_count_reports_correctly() {
394        let conn = test_db();
395        let proj = Projector::new(&conn);
396
397        assert_eq!(fts_row_count(&conn).unwrap(), 0);
398
399        proj.project_event(&make_create("bn-001", "Item 1", None, &[], "h1"))
400            .unwrap();
401        proj.project_event(&make_create("bn-002", "Item 2", None, &[], "h2"))
402            .unwrap();
403
404        assert_eq!(fts_row_count(&conn).unwrap(), 2);
405    }
406
407    #[test]
408    fn fts_in_sync_after_projection() {
409        let conn = test_db();
410        let proj = Projector::new(&conn);
411        proj.project_event(&make_create("bn-001", "Item", None, &[], "h1"))
412            .unwrap();
413
414        assert!(fts_in_sync(&conn).unwrap());
415    }
416
417    #[test]
418    fn search_bm25_empty_query_returns_empty() {
419        let conn = test_db();
420        let proj = Projector::new(&conn);
421        proj.project_event(&make_create("bn-001", "Item", None, &[], "h1"))
422            .unwrap();
423
424        // Empty match expression — FTS5 returns error for empty string
425        let result = search_bm25(&conn, "nonexistent_term_xyz", 10).unwrap();
426        assert!(result.is_empty());
427    }
428}