Skip to main content

seekr_code/search/
semantic.rs

1//! Semantic vector search.
2//!
3//! Converts query text to embedding via Embedder, then performs
4//! KNN search on the vector index for semantically similar code chunks.
5
6use crate::embedder::traits::Embedder;
7use crate::error::SearchError;
8use crate::index::store::SeekrIndex;
9use crate::index::SearchHit;
10
11/// Options for semantic search.
12#[derive(Debug, Clone)]
13pub struct SemanticSearchOptions {
14    /// Maximum number of results.
15    pub top_k: usize,
16
17    /// Minimum cosine similarity score threshold.
18    pub score_threshold: f32,
19}
20
21impl Default for SemanticSearchOptions {
22    fn default() -> Self {
23        Self {
24            top_k: 20,
25            score_threshold: 0.0,
26        }
27    }
28}
29
30/// Perform semantic vector search.
31///
32/// Embeds the query string using the provided embedder and searches
33/// the index for the most similar code chunks by cosine similarity.
34pub fn search_semantic(
35    index: &SeekrIndex,
36    query: &str,
37    embedder: &dyn Embedder,
38    options: &SemanticSearchOptions,
39) -> Result<Vec<SearchHit>, SearchError> {
40    // Embed the query
41    let query_embedding = embedder
42        .embed(query)
43        .map_err(SearchError::Embedder)?;
44
45    // Search the vector index
46    let results = index.search_vector(&query_embedding, options.top_k, options.score_threshold);
47
48    Ok(results)
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54    use crate::embedder::batch::DummyEmbedder;
55    use crate::parser::{ChunkKind, CodeChunk};
56    use std::path::PathBuf;
57
58    fn make_chunk(id: u64, body: &str) -> CodeChunk {
59        CodeChunk {
60            id,
61            file_path: PathBuf::from("test.rs"),
62            language: "rust".to_string(),
63            kind: ChunkKind::Function,
64            name: Some("test".to_string()),
65            signature: None,
66            doc_comment: None,
67            body: body.to_string(),
68            byte_range: 0..body.len(),
69            line_range: 0..1,
70        }
71    }
72
73    #[test]
74    fn test_semantic_search() {
75        let embedder = DummyEmbedder::new(8);
76
77        // Build index with embeddings from the embedder
78        let chunks = vec![
79            make_chunk(1, "fn authenticate(user: &str) {}"),
80            make_chunk(2, "fn calculate(x: f64, y: f64) -> f64 {}"),
81        ];
82
83        let embeddings: Vec<Vec<f32>> = chunks
84            .iter()
85            .map(|c| embedder.embed(&c.body).unwrap())
86            .collect();
87
88        let index = SeekrIndex::build_from(&chunks, &embeddings, 8);
89
90        let options = SemanticSearchOptions {
91            top_k: 10,
92            score_threshold: 0.0,
93        };
94
95        // Search for something similar
96        let results = search_semantic(&index, "fn authenticate(user: &str) {}", &embedder, &options).unwrap();
97        assert!(!results.is_empty());
98        // The first result should be the authenticate function (most similar to itself)
99        assert_eq!(results[0].chunk_id, 1);
100    }
101
102    #[test]
103    fn test_semantic_search_with_threshold() {
104        let embedder = DummyEmbedder::new(8);
105
106        let chunks = vec![make_chunk(1, "fn foo() {}")];
107        let embeddings: Vec<Vec<f32>> = chunks
108            .iter()
109            .map(|c| embedder.embed(&c.body).unwrap())
110            .collect();
111        let index = SeekrIndex::build_from(&chunks, &embeddings, 8);
112
113        // Very high threshold should filter out most results
114        let options = SemanticSearchOptions {
115            top_k: 10,
116            score_threshold: 0.99,
117        };
118
119        let results = search_semantic(&index, "completely unrelated text", &embedder, &options).unwrap();
120        // With dummy embedder, similarity between different texts should be low
121        // This may or may not return results depending on the dummy embedder
122        assert!(results.len() <= 1);
123    }
124}