impl HybridSearchEngine {
pub async fn new(db_path: &str, search_root: &Path) -> Result<Self, String> {
let semantic_engine = SemanticSearchEngine::new(db_path).await?;
Ok(Self {
semantic_engine: Arc::new(semantic_engine),
search_root: search_root.to_path_buf(),
})
}
#[deprecated(note = "Use new() without api_key - local embeddings don't require API keys")]
pub async fn new_with_key(
_api_key: &str,
db_path: &str,
search_root: &Path,
) -> Result<Self, String> {
Self::new(db_path, search_root).await
}
pub async fn search(
&self,
query: &HybridSearchQuery,
) -> Result<Vec<HybridSearchResult>, String> {
if query.query.trim().is_empty() {
return Err("Query cannot be empty".to_string());
}
match query.mode {
HybridSearchMode::KeywordOnly => self.keyword_only_search(query).await,
HybridSearchMode::VectorOnly => self.vector_only_search(query).await,
HybridSearchMode::Hybrid => self.hybrid_search(query).await,
}
}
async fn keyword_only_search(
&self,
query: &HybridSearchQuery,
) -> Result<Vec<HybridSearchResult>, String> {
let matches = self.keyword_search(&query.query, query.limit * 2).await?;
let mut results: Vec<HybridSearchResult> = matches
.into_iter()
.enumerate()
.map(|(rank, m)| {
let keyword_score = Self::compute_rrf_score(rank + 1, 60);
HybridSearchResult {
file_path: m.file_path.clone(),
chunk_name: Self::extract_chunk_name(&m.content),
chunk_type: "file".to_string(),
language: Self::detect_language(&m.file_path),
start_line: m.line_number,
end_line: m.line_number,
keyword_score,
vector_score: 0.0,
hybrid_score: keyword_score,
snippet: Self::truncate(&m.content, 200),
}
})
.collect();
results = Self::apply_filters(results, query);
results.truncate(query.limit);
Ok(results)
}
async fn vector_only_search(
&self,
query: &HybridSearchQuery,
) -> Result<Vec<HybridSearchResult>, String> {
let semantic_query = SearchQuery {
query: query.query.clone(),
mode: super::SearchMode::SemanticOnly,
language_filter: query.language_filter.clone(),
file_pattern: query.file_pattern.clone(),
chunk_type_filter: None,
limit: query.limit,
};
let semantic_results = self.semantic_engine.search(&semantic_query).await?;
let results = semantic_results
.into_iter()
.map(|r| HybridSearchResult {
file_path: r.file_path,
chunk_name: r.chunk_name,
chunk_type: r.chunk_type,
language: r.language,
start_line: r.start_line,
end_line: r.end_line,
keyword_score: 0.0,
vector_score: r.similarity_score,
hybrid_score: r.similarity_score,
snippet: r.snippet,
})
.collect();
Ok(results)
}
async fn hybrid_search(
&self,
query: &HybridSearchQuery,
) -> Result<Vec<HybridSearchResult>, String> {
let keyword_matches = self.keyword_search(&query.query, query.limit * 2).await?;
let semantic_query = SearchQuery {
query: query.query.clone(),
mode: super::SearchMode::SemanticOnly,
language_filter: query.language_filter.clone(),
file_pattern: query.file_pattern.clone(),
chunk_type_filter: None,
limit: query.limit * 2,
};
let semantic_results = self.semantic_engine.search(&semantic_query).await?;
let merged = self.merge_results(
keyword_matches,
semantic_results,
(query.keyword_weight, query.vector_weight),
);
let mut filtered = Self::apply_filters(merged, query);
filtered.truncate(query.limit);
Ok(filtered)
}
async fn keyword_search(&self, query: &str, limit: usize) -> Result<Vec<KeywordMatch>, String> {
let output = Command::new("rg")
.arg("--line-number")
.arg("--no-heading")
.arg("--max-count")
.arg(limit.to_string())
.arg(query)
.arg(&self.search_root)
.output()
.map_err(|e| format!("Failed to run ripgrep: {e}"))?;
if !output.status.success() && !output.stdout.is_empty() {
if output.stdout.is_empty() {
return Ok(Vec::new());
}
}
let stdout = String::from_utf8_lossy(&output.stdout);
let mut matches = Vec::new();
for line in stdout.lines().take(limit) {
let parts: Vec<&str> = line.splitn(3, ':').collect();
if parts.len() == 3 {
if let Ok(line_num) = parts[1].parse::<usize>() {
matches.push(KeywordMatch {
file_path: parts[0].to_string(),
line_number: line_num,
content: parts[2].to_string(),
});
}
}
}
Ok(matches)
}
fn merge_results(
&self,
keyword_matches: Vec<KeywordMatch>,
semantic_results: Vec<SearchResult>,
weights: (f64, f64),
) -> Vec<HybridSearchResult> {
let mut result_map: HashMap<String, HybridSearchResult> = HashMap::new();
for (rank, km) in keyword_matches.iter().enumerate() {
let keyword_score = Self::compute_rrf_score(rank + 1, 60);
let key = format!("{}:{}", km.file_path, km.line_number);
result_map.insert(
key,
HybridSearchResult {
file_path: km.file_path.clone(),
chunk_name: Self::extract_chunk_name(&km.content),
chunk_type: "file".to_string(),
language: Self::detect_language(&km.file_path),
start_line: km.line_number,
end_line: km.line_number,
keyword_score,
vector_score: 0.0,
hybrid_score: weights.0 * keyword_score,
snippet: Self::truncate(&km.content, 200),
},
);
}
for (rank, sr) in semantic_results.iter().enumerate() {
let vector_score = Self::compute_rrf_score(rank + 1, 60);
let key = format!("{}:{}", sr.file_path, sr.chunk_name);
if let Some(existing) = result_map.get_mut(&key) {
existing.vector_score = vector_score;
existing.hybrid_score =
weights.0 * existing.keyword_score + weights.1 * vector_score;
} else {
result_map.insert(
key,
HybridSearchResult {
file_path: sr.file_path.clone(),
chunk_name: sr.chunk_name.clone(),
chunk_type: sr.chunk_type.clone(),
language: sr.language.clone(),
start_line: sr.start_line,
end_line: sr.end_line,
keyword_score: 0.0,
vector_score,
hybrid_score: weights.1 * vector_score,
snippet: sr.snippet.clone(),
},
);
}
}
let mut results: Vec<HybridSearchResult> = result_map.into_values().collect();
results.sort_by(|a, b| {
b.hybrid_score
.partial_cmp(&a.hybrid_score)
.expect("internal error")
});
results
}
pub fn compute_rrf_score(rank: usize, k: usize) -> f64 {
1.0 / (k as f64 + rank as f64)
}
fn apply_filters(
results: Vec<HybridSearchResult>,
query: &HybridSearchQuery,
) -> Vec<HybridSearchResult> {
results
.into_iter()
.filter(|r| {
if let Some(ref lang) = query.language_filter {
if &r.language != lang {
return false;
}
}
if let Some(ref pattern) = query.file_pattern {
if !Self::matches_pattern(&r.file_path, pattern) {
return false;
}
}
true
})
.collect()
}
pub async fn index_directory(&self, path: &Path) -> Result<(), String> {
self.semantic_engine.index_directory(path).await?;
Ok(())
}
fn detect_language(path: &str) -> String {
if path.ends_with(".rs") {
"rust".to_string()
} else if path.ends_with(".ts") || path.ends_with(".tsx") {
"typescript".to_string()
} else if path.ends_with(".py") {
"python".to_string()
} else if path.ends_with(".go") {
"go".to_string()
} else if path.ends_with(".c") || path.ends_with(".h") {
"c".to_string()
} else if path.ends_with(".cpp") || path.ends_with(".hpp") || path.ends_with(".cc") || path.ends_with(".cxx") || path.ends_with(".cu") || path.ends_with(".cuh") {
"cpp".to_string()
} else {
"unknown".to_string()
}
}
fn extract_chunk_name(content: &str) -> String {
content
.split_whitespace()
.find(|s| s.chars().all(|c| c.is_alphanumeric() || c == '_'))
.unwrap_or("unknown")
.to_string()
}
fn matches_pattern(path: &str, pattern: &str) -> bool {
if let Some(suffix) = pattern.strip_prefix('*') {
path.ends_with(suffix)
} else {
path.contains(pattern)
}
}
fn truncate(s: &str, max_len: usize) -> String {
if s.len() <= max_len {
s.to_string()
} else {
format!("{}...", s.get(..max_len).unwrap_or(s))
}
}
}