project_rag/vector_db/
mod.rs

1// LanceDB is the default embedded vector database (stable, feature-rich)
2pub mod lance_client;
3pub use lance_client::LanceVectorDB;
4
5// Qdrant is optional (requires external server)
6#[cfg(feature = "qdrant-backend")]
7pub mod qdrant_client;
8#[cfg(feature = "qdrant-backend")]
9pub use qdrant_client::QdrantVectorDB;
10
11use crate::types::{ChunkMetadata, SearchResult};
12use anyhow::Result;
13
14/// Trait for vector database operations
15#[async_trait::async_trait]
16pub trait VectorDatabase: Send + Sync {
17    /// Initialize the database and create collections if needed
18    async fn initialize(&self, dimension: usize) -> Result<()>;
19
20    /// Store embeddings with metadata
21    /// root_path: The normalized root path being indexed (for per-project BM25 isolation)
22    async fn store_embeddings(
23        &self,
24        embeddings: Vec<Vec<f32>>,
25        metadata: Vec<ChunkMetadata>,
26        contents: Vec<String>,
27        root_path: &str,
28    ) -> Result<usize>;
29
30    /// Search for similar vectors
31    #[allow(clippy::too_many_arguments)]
32    async fn search(
33        &self,
34        query_vector: Vec<f32>,
35        query_text: &str,
36        limit: usize,
37        min_score: f32,
38        project: Option<String>,
39        root_path: Option<String>,
40        hybrid: bool,
41    ) -> Result<Vec<SearchResult>>;
42
43    /// Search with filters
44    #[allow(clippy::too_many_arguments)]
45    async fn search_filtered(
46        &self,
47        query_vector: Vec<f32>,
48        query_text: &str,
49        limit: usize,
50        min_score: f32,
51        project: Option<String>,
52        root_path: Option<String>,
53        hybrid: bool,
54        file_extensions: Vec<String>,
55        languages: Vec<String>,
56        path_patterns: Vec<String>,
57    ) -> Result<Vec<SearchResult>>;
58
59    /// Delete embeddings for a specific file
60    async fn delete_by_file(&self, file_path: &str) -> Result<usize>;
61
62    /// Clear all embeddings
63    async fn clear(&self) -> Result<()>;
64
65    /// Get statistics
66    async fn get_statistics(&self) -> Result<DatabaseStats>;
67
68    /// Flush/save changes to disk
69    async fn flush(&self) -> Result<()>;
70
71    /// Count embeddings for a specific root path
72    /// Used to validate dirty flags - if embeddings exist, the index may be valid
73    async fn count_by_root_path(&self, root_path: &str) -> Result<usize>;
74
75    /// Get unique file paths indexed for a specific root path
76    /// Returns a list of file paths that have embeddings in the database
77    async fn get_indexed_files(&self, root_path: &str) -> Result<Vec<String>>;
78}
79
80#[derive(Debug, Clone)]
81pub struct DatabaseStats {
82    pub total_points: usize,
83    pub total_vectors: usize,
84    pub language_breakdown: Vec<(String, usize)>,
85}