Skip to main content

seekr_code/search/
semantic.rs

1//! Semantic vector search.
2//!
3//! Converts query text to embedding via Embedder, then performs
4//! KNN search on the vector index for semantically similar code chunks.
5
6use crate::embedder::traits::Embedder;
7use crate::error::SearchError;
8use crate::index::SearchHit;
9use crate::index::store::SeekrIndex;
10
11/// Options for semantic search.
12#[derive(Debug, Clone)]
13pub struct SemanticSearchOptions {
14    /// Maximum number of results.
15    pub top_k: usize,
16
17    /// Minimum cosine similarity score threshold.
18    pub score_threshold: f32,
19}
20
21impl Default for SemanticSearchOptions {
22    fn default() -> Self {
23        Self {
24            top_k: 20,
25            score_threshold: 0.0,
26        }
27    }
28}
29
30/// Perform semantic vector search.
31///
32/// Embeds the query string using the provided embedder and searches
33/// the index for the most similar code chunks by cosine similarity.
34pub fn search_semantic(
35    index: &SeekrIndex,
36    query: &str,
37    embedder: &dyn Embedder,
38    options: &SemanticSearchOptions,
39) -> Result<Vec<SearchHit>, SearchError> {
40    // Embed the query
41    let query_embedding = embedder.embed(query).map_err(SearchError::Embedder)?;
42
43    // Search the vector index
44    let results = index.search_vector(&query_embedding, options.top_k, options.score_threshold);
45
46    Ok(results)
47}
48
49#[cfg(test)]
50mod tests {
51    use super::*;
52    use crate::embedder::batch::DummyEmbedder;
53    use crate::parser::{ChunkKind, CodeChunk};
54    use std::path::PathBuf;
55
56    fn make_chunk(id: u64, body: &str) -> CodeChunk {
57        CodeChunk {
58            id,
59            file_path: PathBuf::from("test.rs"),
60            language: "rust".to_string(),
61            kind: ChunkKind::Function,
62            name: Some("test".to_string()),
63            signature: None,
64            doc_comment: None,
65            body: body.to_string(),
66            byte_range: 0..body.len(),
67            line_range: 0..1,
68        }
69    }
70
71    #[test]
72    fn test_semantic_search() {
73        let embedder = DummyEmbedder::new(8);
74
75        // Build index with embeddings from the embedder
76        let chunks = vec![
77            make_chunk(1, "fn authenticate(user: &str) {}"),
78            make_chunk(2, "fn calculate(x: f64, y: f64) -> f64 {}"),
79        ];
80
81        let embeddings: Vec<Vec<f32>> = chunks
82            .iter()
83            .map(|c| embedder.embed(&c.body).unwrap())
84            .collect();
85
86        let index = SeekrIndex::build_from(&chunks, &embeddings, 8);
87
88        let options = SemanticSearchOptions {
89            top_k: 10,
90            score_threshold: 0.0,
91        };
92
93        // Search for something similar
94        let results = search_semantic(
95            &index,
96            "fn authenticate(user: &str) {}",
97            &embedder,
98            &options,
99        )
100        .unwrap();
101        assert!(!results.is_empty());
102        // The first result should be the authenticate function (most similar to itself)
103        assert_eq!(results[0].chunk_id, 1);
104    }
105
106    #[test]
107    fn test_semantic_search_with_threshold() {
108        let embedder = DummyEmbedder::new(8);
109
110        let chunks = vec![make_chunk(1, "fn foo() {}")];
111        let embeddings: Vec<Vec<f32>> = chunks
112            .iter()
113            .map(|c| embedder.embed(&c.body).unwrap())
114            .collect();
115        let index = SeekrIndex::build_from(&chunks, &embeddings, 8);
116
117        // Very high threshold should filter out most results
118        let options = SemanticSearchOptions {
119            top_k: 10,
120            score_threshold: 0.99,
121        };
122
123        let results =
124            search_semantic(&index, "completely unrelated text", &embedder, &options).unwrap();
125        // With dummy embedder, similarity between different texts should be low
126        // This may or may not return results depending on the dummy embedder
127        assert!(results.len() <= 1);
128    }
129}