seekr_code/search/
semantic.rs1use crate::embedder::traits::Embedder;
7use crate::error::SearchError;
8use crate::index::store::SeekrIndex;
9use crate::index::SearchHit;
10
11#[derive(Debug, Clone)]
13pub struct SemanticSearchOptions {
14 pub top_k: usize,
16
17 pub score_threshold: f32,
19}
20
21impl Default for SemanticSearchOptions {
22 fn default() -> Self {
23 Self {
24 top_k: 20,
25 score_threshold: 0.0,
26 }
27 }
28}
29
30pub fn search_semantic(
35 index: &SeekrIndex,
36 query: &str,
37 embedder: &dyn Embedder,
38 options: &SemanticSearchOptions,
39) -> Result<Vec<SearchHit>, SearchError> {
40 let query_embedding = embedder
42 .embed(query)
43 .map_err(SearchError::Embedder)?;
44
45 let results = index.search_vector(&query_embedding, options.top_k, options.score_threshold);
47
48 Ok(results)
49}
50
51#[cfg(test)]
52mod tests {
53 use super::*;
54 use crate::embedder::batch::DummyEmbedder;
55 use crate::parser::{ChunkKind, CodeChunk};
56 use std::path::PathBuf;
57
58 fn make_chunk(id: u64, body: &str) -> CodeChunk {
59 CodeChunk {
60 id,
61 file_path: PathBuf::from("test.rs"),
62 language: "rust".to_string(),
63 kind: ChunkKind::Function,
64 name: Some("test".to_string()),
65 signature: None,
66 doc_comment: None,
67 body: body.to_string(),
68 byte_range: 0..body.len(),
69 line_range: 0..1,
70 }
71 }
72
73 #[test]
74 fn test_semantic_search() {
75 let embedder = DummyEmbedder::new(8);
76
77 let chunks = vec![
79 make_chunk(1, "fn authenticate(user: &str) {}"),
80 make_chunk(2, "fn calculate(x: f64, y: f64) -> f64 {}"),
81 ];
82
83 let embeddings: Vec<Vec<f32>> = chunks
84 .iter()
85 .map(|c| embedder.embed(&c.body).unwrap())
86 .collect();
87
88 let index = SeekrIndex::build_from(&chunks, &embeddings, 8);
89
90 let options = SemanticSearchOptions {
91 top_k: 10,
92 score_threshold: 0.0,
93 };
94
95 let results = search_semantic(&index, "fn authenticate(user: &str) {}", &embedder, &options).unwrap();
97 assert!(!results.is_empty());
98 assert_eq!(results[0].chunk_id, 1);
100 }
101
102 #[test]
103 fn test_semantic_search_with_threshold() {
104 let embedder = DummyEmbedder::new(8);
105
106 let chunks = vec![make_chunk(1, "fn foo() {}")];
107 let embeddings: Vec<Vec<f32>> = chunks
108 .iter()
109 .map(|c| embedder.embed(&c.body).unwrap())
110 .collect();
111 let index = SeekrIndex::build_from(&chunks, &embeddings, 8);
112
113 let options = SemanticSearchOptions {
115 top_k: 10,
116 score_threshold: 0.99,
117 };
118
119 let results = search_semantic(&index, "completely unrelated text", &embedder, &options).unwrap();
120 assert!(results.len() <= 1);
123 }
124}