agentroot_core/search/
bm25.rs1use super::{SearchOptions, SearchResult, SearchSource};
4use crate::db::{docid_from_hash, Database};
5use crate::error::Result;
6
7impl Database {
8 pub fn search_fts(&self, query: &str, options: &SearchOptions) -> Result<Vec<SearchResult>> {
10 let mut sql = String::from(
11 r#"
12 SELECT
13 'agentroot://' || d.collection || '/' || d.path as filepath,
14 d.collection || '/' || d.path as display_path,
15 d.title,
16 d.hash,
17 d.collection,
18 d.modified_at,
19 c.doc,
20 LENGTH(c.doc),
21 1.0 / (1.0 + (-1.0 * bm25(documents_fts, 1.0, 10.0, 1.0))) as score,
22 d.llm_summary,
23 d.llm_title,
24 d.llm_keywords,
25 d.llm_category,
26 d.llm_difficulty,
27 d.user_metadata
28 FROM documents_fts fts
29 JOIN documents d ON d.id = fts.rowid
30 JOIN content c ON c.hash = d.hash
31 JOIN collections coll ON coll.name = d.collection
32 WHERE documents_fts MATCH ?1 AND d.active = 1
33 "#,
34 );
35
36 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = vec![Box::new(query.to_string())];
37
38 if let Some(ref coll) = options.collection {
39 sql.push_str(" AND d.collection = ?");
40 sql.push_str(&(params_vec.len() + 1).to_string());
41 params_vec.push(Box::new(coll.clone()));
42 }
43
44 if let Some(ref provider) = options.provider {
45 sql.push_str(" AND coll.provider_type = ?");
46 sql.push_str(&(params_vec.len() + 1).to_string());
47 params_vec.push(Box::new(provider.clone()));
48 }
49
50 sql.push_str(" ORDER BY score DESC");
51
52 if options.limit > 0 {
53 sql.push_str(&format!(" LIMIT {}", options.limit));
54 }
55
56 let mut stmt = self.conn.prepare(&sql)?;
57 let results = stmt
58 .query_map(
59 rusqlite::params_from_iter(params_vec.iter().map(|p| p.as_ref())),
60 |row| {
61 let score: f64 = row.get(8)?;
62 let keywords_json: Option<String> = row.get(11)?;
63 let keywords = keywords_json
64 .and_then(|json| serde_json::from_str::<Vec<String>>(&json).ok());
65
66 let user_metadata_json: Option<String> = row.get(14)?;
67 let user_metadata = user_metadata_json
68 .and_then(|json| crate::db::UserMetadata::from_json(&json).ok());
69
70 Ok(SearchResult {
71 filepath: row.get(0)?,
72 display_path: row.get(1)?,
73 title: row.get(2)?,
74 hash: row.get(3)?,
75 collection_name: row.get(4)?,
76 modified_at: row.get(5)?,
77 body: if options.full_content {
78 Some(row.get(6)?)
79 } else {
80 None
81 },
82 body_length: row.get(7)?,
83 docid: docid_from_hash(&row.get::<_, String>(3)?),
84 context: None,
85 score,
86 source: SearchSource::Bm25,
87 chunk_pos: None,
88 llm_summary: row.get(9)?,
89 llm_title: row.get(10)?,
90 llm_keywords: keywords,
91 llm_category: row.get(12)?,
92 llm_difficulty: row.get(13)?,
93 user_metadata,
94 })
95 },
96 )?
97 .collect::<std::result::Result<Vec<_>, _>>()?;
98
99 let filtered: Vec<SearchResult> = results
101 .into_iter()
102 .filter(|r| r.score >= options.min_score)
103 .collect();
104
105 Ok(filtered)
106 }
107}