codelens_engine/embedding/engine_impl/
search.rs1use anyhow::Result;
2
3use super::super::EmbeddingEngine;
4use super::super::prompt::split_identifier;
5use super::super::runtime::configured_rerank_blend;
6use crate::embedding_store::ScoredChunk;
7use crate::embedding_types::SemanticMatch;
8
9impl EmbeddingEngine {
10 pub fn search(&self, query: &str, max_results: usize) -> Result<Vec<SemanticMatch>> {
12 let results = self.search_scored(query, max_results)?;
13 Ok(results.into_iter().map(SemanticMatch::from).collect())
14 }
15
16 pub fn search_scored(&self, query: &str, max_results: usize) -> Result<Vec<ScoredChunk>> {
23 self.search_scored_inner(query, max_results, None)
24 }
25
26 pub fn search_scored_in_scope(
33 &self,
34 query: &str,
35 max_results: usize,
36 path_scope: Option<&str>,
37 ) -> Result<Vec<ScoredChunk>> {
38 self.search_scored_inner(query, max_results, path_scope)
39 }
40
41 fn search_scored_inner(
42 &self,
43 query: &str,
44 max_results: usize,
45 path_scope: Option<&str>,
46 ) -> Result<Vec<ScoredChunk>> {
47 let query_embedding = self.embed_query_cached(query)?;
48
49 let factor = std::env::var("CODELENS_RERANK_FACTOR")
53 .ok()
54 .and_then(|v| v.parse::<usize>().ok())
55 .unwrap_or(5);
56 let candidate_count = max_results.saturating_mul(factor).max(max_results);
57 let mut candidates =
58 self.store
59 .search_scoped(&query_embedding, candidate_count, path_scope)?;
60
61 if candidates.len() <= max_results {
62 return Ok(candidates);
63 }
64
65 let query_lower = query.to_lowercase();
68 let query_tokens: Vec<&str> = query_lower
69 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
70 .filter(|t| t.len() >= 2)
71 .collect();
72
73 if query_tokens.is_empty() {
74 candidates.truncate(max_results);
75 return Ok(candidates);
76 }
77
78 let blend = configured_rerank_blend();
79 for chunk in &mut candidates {
80 let split_name = split_identifier(&chunk.symbol_name);
85 let searchable = format!(
86 "{} {} {} {} {}",
87 chunk.symbol_name.to_lowercase(),
88 split_name.to_lowercase(),
89 chunk.name_path.to_lowercase(),
90 chunk.signature.to_lowercase(),
91 chunk.file_path.to_lowercase(),
92 );
93 let overlap = query_tokens
94 .iter()
95 .filter(|t| searchable.contains(**t))
96 .count() as f64;
97 let overlap_ratio = overlap / query_tokens.len().max(1) as f64;
98 chunk.score = chunk.score * blend + overlap_ratio * (1.0 - blend);
100 }
101
102 candidates.sort_by(|a, b| {
103 b.score
104 .partial_cmp(&a.score)
105 .unwrap_or(std::cmp::Ordering::Equal)
106 });
107 candidates.truncate(max_results);
108 Ok(candidates)
109 }
110}