use anyhow::Result;
use crate::retrieval::client::RetrievalClient;
#[derive(Debug, Clone)]
pub struct SearchOpts {
pub limit: usize,
pub overfetch: usize,
pub rerank: bool,
pub exclude_languages: Vec<String>,
}
impl SearchOpts {
pub fn new(limit: usize) -> Self {
Self {
limit,
overfetch: limit * 2,
rerank: true,
exclude_languages: Vec::new(),
}
}
}
impl Default for SearchOpts {
fn default() -> Self {
Self {
limit: 10,
overfetch: 20,
rerank: true,
exclude_languages: Vec::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct Hit {
pub chunk_id: String,
pub file_path: String,
pub start_line: i64,
pub end_line: i64,
pub content: String,
pub score: f32,
pub rerank_score: Option<f32>,
}
impl RetrievalClient {
async fn search_in(
&self,
collection: &str,
project_id: &str,
query: &str,
opts: SearchOpts,
) -> Result<Vec<Hit>> {
let q = self.embedder.embed(query).await?;
let candidates = self
.code_store
.query(
collection,
project_id,
&q.dense,
&q.sparse,
opts.overfetch,
self.config.bm25_boost,
self.config.disable_sparse,
&opts.exclude_languages,
)
.await?;
if !opts.rerank || self.lite || candidates.is_empty() {
return Ok(candidates.into_iter().take(opts.limit).collect());
}
let texts: Vec<String> = candidates.iter().map(|h| h.content.clone()).collect();
match self.reranker.rerank(query, &texts).await {
Ok(scores) => {
let mut zipped: Vec<(Hit, f32)> = candidates.into_iter().zip(scores).collect();
zipped.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(zipped
.into_iter()
.take(opts.limit)
.map(|(mut h, s)| {
h.rerank_score = Some(s);
h
})
.collect())
}
Err(e) => {
tracing::warn!("reranker degraded: {e}");
Ok(candidates.into_iter().take(opts.limit).collect())
}
}
}
pub async fn search_code(
&self,
project_id: &str,
query: &str,
opts: SearchOpts,
) -> Result<Vec<Hit>> {
self.search_in(
&self.config.collection("code_chunks"),
project_id,
query,
opts,
)
.await
}
pub async fn search_memories(
&self,
project_id: &str,
query: &str,
opts: SearchOpts,
) -> Result<Vec<Hit>> {
self.search_in(&self.config.collection("memories"), project_id, query, opts)
.await
}
pub async fn search_libraries(&self, query: &str, opts: SearchOpts) -> Result<Vec<Hit>> {
self.search_in(&self.config.collection("library_chunks"), "*", query, opts)
.await
}
}