vyctor 0.1.0

A fast CLI tool for semantic file search using vector embeddings
Documentation
//! Search query processing

use crate::config::VyctorConfig;
use crate::embeddings::{create_provider, EmbeddingProvider};
use crate::reranker::{create_reranker, DocumentToRerank, Reranker};
use crate::storage::{SearchResult, Storage};
use anyhow::Result;
use std::path::Path;
use std::sync::Arc;
use std::time::{Duration, Instant};

/// Search engine for querying the index
pub struct SearchEngine {
    storage: Storage,
    embedder: Arc<dyn EmbeddingProvider>,
    reranker: Option<Arc<dyn Reranker>>,
    /// Number of candidates to retrieve before reranking
    rerank_top_k: usize,
}

/// Timing breakdown for a search operation
#[derive(Debug, Clone)]
pub struct SearchTiming {
    pub embed_time: Duration,
    pub search_time: Duration,
    pub rerank_time: Option<Duration>,
    pub total_time: Duration,
}

impl SearchEngine {
    /// Create a new search engine from configuration
    pub fn new(root: &Path, config: &VyctorConfig, verbose: bool) -> Result<Self> {
        let db_path = root.join(".vyctor").join("index.duckdb");
        let storage = Storage::new(&db_path, config.embedding.dimensions)?;
        let embedder = create_provider(&config.embedding, verbose)?;
        let reranker = create_reranker(&config.reranker)?;
        let rerank_top_k = config.reranker.top_k;

        Ok(Self {
            storage,
            embedder,
            reranker,
            rerank_top_k,
        })
    }

    /// Search for chunks matching a natural language query, returning timing info
    pub async fn search(
        &self,
        query: &str,
        limit: usize,
        folder_filter: Option<&str>,
    ) -> Result<(Vec<SearchResult>, SearchTiming)> {
        let total_start = Instant::now();

        // Generate embedding for the query
        let embed_start = Instant::now();
        let query_result = self.embedder.embed(query).await?;
        let embed_time = embed_start.elapsed();

        // Determine how many candidates to retrieve
        // If reranking, get more candidates; otherwise just get the requested limit
        let initial_limit = if self.reranker.is_some() {
            self.rerank_top_k.max(limit)
        } else {
            limit
        };

        // Search the storage for initial candidates
        let search_start = Instant::now();
        let mut results =
            self.storage
                .search(&query_result.embedding, initial_limit, folder_filter)?;
        let search_time = search_start.elapsed();

        // Apply reranking if enabled
        let rerank_time = if let Some(ref reranker) = self.reranker {
            let rerank_start = Instant::now();

            // Convert results to documents for reranking
            let documents: Vec<DocumentToRerank> = results
                .iter()
                .enumerate()
                .map(|(i, r)| DocumentToRerank {
                    id: i,
                    content: r.chunk_content.clone(),
                })
                .collect();

            // Rerank the documents
            let rerank_results = reranker.rerank(query, documents).await?;

            // Reorder results based on reranking scores
            let reranked_results: Vec<SearchResult> = rerank_results
                .into_iter()
                .take(limit)
                .map(|rr| {
                    let mut result = results[rr.id].clone();
                    // Update score to the reranker's relevance score
                    result.score = rr.relevance_score;
                    result
                })
                .collect();

            // Results are already sorted by the reranker
            results = reranked_results;

            Some(rerank_start.elapsed())
        } else {
            // No reranking, just truncate to limit
            results.truncate(limit);
            None
        };

        let timing = SearchTiming {
            embed_time,
            search_time,
            rerank_time,
            total_time: total_start.elapsed(),
        };

        Ok((results, timing))
    }

    /// Check if reranking is enabled
    #[allow(dead_code)]
    pub fn has_reranker(&self) -> bool {
        self.reranker.is_some()
    }

    /// Get the reranker model name if available
    pub fn reranker_model(&self) -> Option<&str> {
        self.reranker.as_ref().map(|r| r.model_name())
    }

    /// Get the storage for direct access (e.g., for stats)
    #[allow(dead_code)]
    pub fn storage(&self) -> &Storage {
        &self.storage
    }
}

#[cfg(test)]
mod tests {
    // Tests require a real database and embeddings
}