Skip to main content

cognee_vector/
vector_db_trait.rs

1use crate::error::VectorDBResult;
2use crate::models::{SearchResult, VectorPoint};
3use async_trait::async_trait;
4use uuid::Uuid;
5
6/// Vector database trait
7#[async_trait]
8pub trait VectorDB: Send + Sync {
9    /// Create a collection for (data_type, field_name) pair
10    ///
11    /// # Arguments
12    /// * `data_type` - Type name (e.g., "DocumentChunk", "Entity")
13    /// * `field_name` - Field name (e.g., "text", "name")
14    /// * `dimension` - Vector dimension (e.g., 384 for MiniLM)
15    ///
16    /// # Example
17    /// ```ignore
18    /// vector_db.create_collection("DocumentChunk", "text", 384).await?;
19    /// ```
20    async fn create_collection(
21        &self,
22        data_type: &str,
23        field_name: &str,
24        dimension: usize,
25    ) -> VectorDBResult<()>;
26
27    /// Check if collection exists
28    ///
29    /// # Arguments
30    /// * `data_type` - Type name
31    /// * `field_name` - Field name
32    async fn has_collection(&self, data_type: &str, field_name: &str) -> VectorDBResult<bool>;
33
34    /// Index data points (batch upsert with embeddings already generated)
35    ///
36    /// # Arguments
37    /// * `data_type` - Type name
38    /// * `field_name` - Field name
39    /// * `points` - Vector points with embeddings
40    ///
41    /// # Example
42    /// ```ignore
43    /// let points = vec![
44    ///     VectorPoint::new(chunk_id, embedding)
45    ///         .with_metadata("type", json!("DocumentChunk"))
46    ///         .with_metadata("field", json!("text")),
47    /// ];
48    /// vector_db.index_points("DocumentChunk", "text", &points).await?;
49    /// ```
50    async fn index_points(
51        &self,
52        data_type: &str,
53        field_name: &str,
54        points: &[VectorPoint],
55    ) -> VectorDBResult<()>;
56
57    /// Search for similar vectors
58    ///
59    /// # Arguments
60    /// * `data_type` - Type name
61    /// * `field_name` - Field name
62    /// * `query_vector` - Query embedding vector
63    /// * `top_k` - Number of results to return
64    ///
65    /// # Returns
66    /// Vector of search results sorted by similarity (descending)
67    async fn search_similar(
68        &self,
69        data_type: &str,
70        field_name: &str,
71        query_vector: &[f32],
72        top_k: usize,
73    ) -> VectorDBResult<Vec<SearchResult>>;
74
75    /// Delete collection
76    async fn delete_collection(&self, data_type: &str, field_name: &str) -> VectorDBResult<()>;
77
78    /// Delete points by IDs from an existing collection.
79    async fn delete_points(
80        &self,
81        data_type: &str,
82        field_name: &str,
83        point_ids: &[Uuid],
84    ) -> VectorDBResult<()> {
85        let _ = (data_type, field_name, point_ids);
86        Ok(())
87    }
88
89    /// Get collection statistics
90    async fn collection_size(&self, data_type: &str, field_name: &str) -> VectorDBResult<usize>;
91
92    /// List all existing vector collections as `(data_type, field_name)` pairs.
93    ///
94    /// Default implementation returns an empty list. Backends should override
95    /// to return the actual collections they hold.
96    async fn list_collections(&self) -> VectorDBResult<Vec<(String, String)>> {
97        Ok(vec![])
98    }
99
100    /// Remove all vector collections.
101    ///
102    /// Default implementation lists all collections and deletes each one.
103    /// Backends may override with a more efficient bulk operation.
104    ///
105    /// Equivalent to Python's `vector_engine.prune()`.
106    async fn prune(&self) -> VectorDBResult<()> {
107        let collections = self.list_collections().await?;
108        for (data_type, field_name) in collections {
109            self.delete_collection(&data_type, &field_name).await?;
110        }
111        Ok(())
112    }
113
114    /// Perform multiple vector similarity searches in sequence.
115    ///
116    /// Default implementation loops over [`search_similar`]. Backends may override
117    /// this with a native batch API for better performance.
118    async fn batch_search_similar(
119        &self,
120        data_type: &str,
121        field_name: &str,
122        query_vectors: &[Vec<f32>],
123        top_k: usize,
124    ) -> VectorDBResult<Vec<Vec<SearchResult>>> {
125        let mut results = Vec::with_capacity(query_vectors.len());
126        for query_vector in query_vectors {
127            results.push(
128                self.search_similar(data_type, field_name, query_vector, top_k)
129                    .await?,
130            );
131        }
132        Ok(results)
133    }
134}
135
136#[cfg(all(test, feature = "testing"))]
137mod tests {
138    #![allow(
139        clippy::unwrap_used,
140        clippy::expect_used,
141        reason = "test code — panics are acceptable"
142    )]
143    use super::*;
144    use crate::mock_vector_db::MockVectorDB;
145
146    #[tokio::test]
147    async fn batch_search_similar_returns_one_result_per_query() {
148        let db = MockVectorDB::new();
149        db.create_collection("TestType", "field", 3).await.unwrap();
150
151        // No points indexed — each search returns an empty Vec.
152        let query_vectors = vec![vec![1.0_f32, 0.0, 0.0], vec![0.0_f32, 1.0, 0.0]];
153
154        let results = db
155            .batch_search_similar("TestType", "field", &query_vectors, 5)
156            .await
157            .unwrap();
158
159        assert_eq!(results.len(), 2, "one result set per query vector");
160        assert!(results[0].is_empty(), "no indexed points → empty result");
161        assert!(results[1].is_empty(), "no indexed points → empty result");
162    }
163}