Skip to main content

assay/
search_fts5.rs

1use crate::search::{SearchEngine, SearchResult};
2use sqlx::sqlite::SqlitePoolOptions;
3use sqlx::{Pool, Row, Sqlite};
4use tokio::runtime::Runtime;
5
6/// FTS5-backed search engine using SQLite's built-in full-text search.
7///
8/// Uses an in-memory SQLite database with FTS5 extension for BM25 ranking.
9/// This is the high-quality search backend, enabled when the `db` feature is active.
10///
11/// Column weights for BM25 scoring:
12/// - name: 2.0
13/// - description: 1.0
14/// - keywords: 3.0
15/// - functions: 1.0
16pub struct FTS5Index {
17    pool: Pool<Sqlite>,
18    rt: Runtime,
19}
20
21impl std::fmt::Debug for FTS5Index {
22    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23        f.debug_struct("FTS5Index").finish_non_exhaustive()
24    }
25}
26
27impl Default for FTS5Index {
28    fn default() -> Self {
29        Self::new()
30    }
31}
32
33impl FTS5Index {
34    /// Create a new FTS5 search index backed by an in-memory SQLite database.
35    ///
36    /// Initializes the FTS5 virtual table with columns for document fields
37    /// and unicode61 tokenization.
38    pub fn new() -> Self {
39        let rt = Runtime::new().expect("tokio runtime");
40        let pool = rt.block_on(async {
41            SqlitePoolOptions::new()
42                .max_connections(1)
43                .connect("sqlite::memory:")
44                .await
45                .expect("sqlite in-memory connection")
46        });
47
48        rt.block_on(async {
49            sqlx::query(
50                "CREATE VIRTUAL TABLE IF NOT EXISTS modules USING fts5(\
51                 doc_id UNINDEXED, \
52                 name, \
53                 description, \
54                 keywords, \
55                 functions, \
56                 tokenize=\"unicode61\"\
57                 )",
58            )
59            .execute(&pool)
60            .await
61            .expect("create FTS5 table");
62        });
63
64        Self { pool, rt }
65    }
66}
67
68/// Sanitize a query string for FTS5 by quoting each alphanumeric token.
69///
70/// This prevents FTS5 syntax errors from special characters (*, :, ^)
71/// and reserved keywords (OR, AND, NOT, NEAR).
72fn sanitize_fts5_query(query: &str) -> String {
73    query
74        .split(|c: char| !c.is_alphanumeric() && c != '_')
75        .filter(|s| !s.is_empty())
76        .map(|s| format!("\"{s}\""))
77        .collect::<Vec<_>>()
78        .join(" ")
79}
80
81impl SearchEngine for FTS5Index {
82    fn add_document(&mut self, id: &str, fields: &[(&str, &str, f64)]) {
83        let mut name_val = String::new();
84        let mut desc_val = String::new();
85        let mut kw_val = String::new();
86        let mut func_val = String::new();
87
88        for &(field_name, field_value, _) in fields {
89            match field_name {
90                "name" => name_val = field_value.to_string(),
91                "description" => desc_val = field_value.to_string(),
92                "keywords" => kw_val = field_value.to_string(),
93                _ => func_val = field_value.to_string(),
94            }
95        }
96
97        self.rt.block_on(async {
98            sqlx::query(
99                "INSERT INTO modules(doc_id, name, description, keywords, functions) \
100                 VALUES (?, ?, ?, ?, ?)",
101            )
102            .bind(id)
103            .bind(&name_val)
104            .bind(&desc_val)
105            .bind(&kw_val)
106            .bind(&func_val)
107            .execute(&self.pool)
108            .await
109            .expect("insert document");
110        });
111    }
112
113    fn search(&self, query: &str, limit: usize) -> Vec<SearchResult> {
114        if query.trim().is_empty() {
115            return Vec::new();
116        }
117
118        let sanitized = sanitize_fts5_query(query);
119        if sanitized.is_empty() {
120            return Vec::new();
121        }
122
123        self.rt.block_on(async {
124            // bm25 weights: doc_id=0 (UNINDEXED), name=2, description=1, keywords=3, functions=1
125            // bm25() returns negative values; more negative = better match.
126            // ORDER BY rank (ascending) puts best matches first.
127            let rows = sqlx::query(
128                "SELECT doc_id, bm25(modules, 0.0, 2.0, 1.0, 3.0, 1.0) as rank \
129                 FROM modules WHERE modules MATCH ? ORDER BY rank LIMIT ?",
130            )
131            .bind(&sanitized)
132            .bind(limit as i64)
133            .fetch_all(&self.pool)
134            .await;
135
136            match rows {
137                Ok(rows) => rows
138                    .iter()
139                    .map(|row| {
140                        let id: String = row.get("doc_id");
141                        let rank: f64 = row.get("rank");
142                        SearchResult {
143                            id,
144                            score: -rank, // negate: higher positive = better match
145                        }
146                    })
147                    .collect(),
148                Err(_) => Vec::new(),
149            }
150        })
151    }
152}