agentroot_core/search/
bm25.rs

1//! BM25 full-text search via FTS5
2
3use super::{SearchOptions, SearchResult, SearchSource};
4use crate::db::{docid_from_hash, Database};
5use crate::error::Result;
6
7impl Database {
8    /// Perform BM25 full-text search
9    pub fn search_fts(&self, query: &str, options: &SearchOptions) -> Result<Vec<SearchResult>> {
10        let mut sql = String::from(
11            r#"
12            SELECT
13                'agentroot://' || d.collection || '/' || d.path as filepath,
14                d.collection || '/' || d.path as display_path,
15                d.title,
16                d.hash,
17                d.collection,
18                d.modified_at,
19                c.doc,
20                LENGTH(c.doc),
21                1.0 / (1.0 + (-1.0 * bm25(documents_fts, 1.0, 10.0, 1.0))) as score,
22                d.llm_summary,
23                d.llm_title,
24                d.llm_keywords,
25                d.llm_category,
26                d.llm_difficulty,
27                d.user_metadata
28            FROM documents_fts fts
29            JOIN documents d ON d.id = fts.rowid
30            JOIN content c ON c.hash = d.hash
31            JOIN collections coll ON coll.name = d.collection
32            WHERE documents_fts MATCH ?1 AND d.active = 1
33        "#,
34        );
35
36        let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = vec![Box::new(query.to_string())];
37
38        if let Some(ref coll) = options.collection {
39            sql.push_str(" AND d.collection = ?");
40            sql.push_str(&(params_vec.len() + 1).to_string());
41            params_vec.push(Box::new(coll.clone()));
42        }
43
44        if let Some(ref provider) = options.provider {
45            sql.push_str(" AND coll.provider_type = ?");
46            sql.push_str(&(params_vec.len() + 1).to_string());
47            params_vec.push(Box::new(provider.clone()));
48        }
49
50        sql.push_str(" ORDER BY score DESC");
51
52        if options.limit > 0 {
53            sql.push_str(&format!(" LIMIT {}", options.limit));
54        }
55
56        let mut stmt = self.conn.prepare(&sql)?;
57        let results = stmt
58            .query_map(
59                rusqlite::params_from_iter(params_vec.iter().map(|p| p.as_ref())),
60                |row| {
61                    let score: f64 = row.get(8)?;
62                    let keywords_json: Option<String> = row.get(11)?;
63                    let keywords = keywords_json
64                        .and_then(|json| serde_json::from_str::<Vec<String>>(&json).ok());
65
66                    let user_metadata_json: Option<String> = row.get(14)?;
67                    let user_metadata = user_metadata_json
68                        .and_then(|json| crate::db::UserMetadata::from_json(&json).ok());
69
70                    Ok(SearchResult {
71                        filepath: row.get(0)?,
72                        display_path: row.get(1)?,
73                        title: row.get(2)?,
74                        hash: row.get(3)?,
75                        collection_name: row.get(4)?,
76                        modified_at: row.get(5)?,
77                        body: if options.full_content {
78                            Some(row.get(6)?)
79                        } else {
80                            None
81                        },
82                        body_length: row.get(7)?,
83                        docid: docid_from_hash(&row.get::<_, String>(3)?),
84                        context: None,
85                        score,
86                        source: SearchSource::Bm25,
87                        chunk_pos: None,
88                        llm_summary: row.get(9)?,
89                        llm_title: row.get(10)?,
90                        llm_keywords: keywords,
91                        llm_category: row.get(12)?,
92                        llm_difficulty: row.get(13)?,
93                        user_metadata,
94                    })
95                },
96            )?
97            .collect::<std::result::Result<Vec<_>, _>>()?;
98
99        // Filter by min_score
100        let filtered: Vec<SearchResult> = results
101            .into_iter()
102            .filter(|r| r.score >= options.min_score)
103            .collect();
104
105        Ok(filtered)
106    }
107}