cognee_vector/vector_db_trait.rs
1use crate::error::VectorDBResult;
2use crate::models::{SearchResult, VectorPoint};
3use async_trait::async_trait;
4use uuid::Uuid;
5
6/// Vector database trait
7#[async_trait]
8pub trait VectorDB: Send + Sync {
9 /// Create a collection for (data_type, field_name) pair
10 ///
11 /// # Arguments
12 /// * `data_type` - Type name (e.g., "DocumentChunk", "Entity")
13 /// * `field_name` - Field name (e.g., "text", "name")
14 /// * `dimension` - Vector dimension (e.g., 384 for MiniLM)
15 ///
16 /// # Example
17 /// ```ignore
18 /// vector_db.create_collection("DocumentChunk", "text", 384).await?;
19 /// ```
20 async fn create_collection(
21 &self,
22 data_type: &str,
23 field_name: &str,
24 dimension: usize,
25 ) -> VectorDBResult<()>;
26
27 /// Check if collection exists
28 ///
29 /// # Arguments
30 /// * `data_type` - Type name
31 /// * `field_name` - Field name
32 async fn has_collection(&self, data_type: &str, field_name: &str) -> VectorDBResult<bool>;
33
34 /// Index data points (batch upsert with embeddings already generated)
35 ///
36 /// # Arguments
37 /// * `data_type` - Type name
38 /// * `field_name` - Field name
39 /// * `points` - Vector points with embeddings
40 ///
41 /// # Example
42 /// ```ignore
43 /// let points = vec![
44 /// VectorPoint::new(chunk_id, embedding)
45 /// .with_metadata("type", json!("DocumentChunk"))
46 /// .with_metadata("field", json!("text")),
47 /// ];
48 /// vector_db.index_points("DocumentChunk", "text", &points).await?;
49 /// ```
50 async fn index_points(
51 &self,
52 data_type: &str,
53 field_name: &str,
54 points: &[VectorPoint],
55 ) -> VectorDBResult<()>;
56
57 /// Search for similar vectors
58 ///
59 /// # Arguments
60 /// * `data_type` - Type name
61 /// * `field_name` - Field name
62 /// * `query_vector` - Query embedding vector
63 /// * `top_k` - Number of results to return
64 ///
65 /// # Returns
66 /// Vector of search results sorted by similarity (descending)
67 async fn search_similar(
68 &self,
69 data_type: &str,
70 field_name: &str,
71 query_vector: &[f32],
72 top_k: usize,
73 ) -> VectorDBResult<Vec<SearchResult>>;
74
75 /// Delete collection
76 async fn delete_collection(&self, data_type: &str, field_name: &str) -> VectorDBResult<()>;
77
78 /// Delete points by IDs from an existing collection.
79 async fn delete_points(
80 &self,
81 data_type: &str,
82 field_name: &str,
83 point_ids: &[Uuid],
84 ) -> VectorDBResult<()> {
85 let _ = (data_type, field_name, point_ids);
86 Ok(())
87 }
88
89 /// Get collection statistics
90 async fn collection_size(&self, data_type: &str, field_name: &str) -> VectorDBResult<usize>;
91
92 /// List all existing vector collections as `(data_type, field_name)` pairs.
93 ///
94 /// Default implementation returns an empty list. Backends should override
95 /// to return the actual collections they hold.
96 async fn list_collections(&self) -> VectorDBResult<Vec<(String, String)>> {
97 Ok(vec![])
98 }
99
100 /// Remove all vector collections.
101 ///
102 /// Default implementation lists all collections and deletes each one.
103 /// Backends may override with a more efficient bulk operation.
104 ///
105 /// Equivalent to Python's `vector_engine.prune()`.
106 async fn prune(&self) -> VectorDBResult<()> {
107 let collections = self.list_collections().await?;
108 for (data_type, field_name) in collections {
109 self.delete_collection(&data_type, &field_name).await?;
110 }
111 Ok(())
112 }
113
114 /// Perform multiple vector similarity searches in sequence.
115 ///
116 /// Default implementation loops over [`search_similar`]. Backends may override
117 /// this with a native batch API for better performance.
118 async fn batch_search_similar(
119 &self,
120 data_type: &str,
121 field_name: &str,
122 query_vectors: &[Vec<f32>],
123 top_k: usize,
124 ) -> VectorDBResult<Vec<Vec<SearchResult>>> {
125 let mut results = Vec::with_capacity(query_vectors.len());
126 for query_vector in query_vectors {
127 results.push(
128 self.search_similar(data_type, field_name, query_vector, top_k)
129 .await?,
130 );
131 }
132 Ok(results)
133 }
134}
135
136#[cfg(all(test, feature = "testing"))]
137mod tests {
138 #![allow(
139 clippy::unwrap_used,
140 clippy::expect_used,
141 reason = "test code — panics are acceptable"
142 )]
143 use super::*;
144 use crate::mock_vector_db::MockVectorDB;
145
146 #[tokio::test]
147 async fn batch_search_similar_returns_one_result_per_query() {
148 let db = MockVectorDB::new();
149 db.create_collection("TestType", "field", 3).await.unwrap();
150
151 // No points indexed — each search returns an empty Vec.
152 let query_vectors = vec![vec![1.0_f32, 0.0, 0.0], vec![0.0_f32, 1.0, 0.0]];
153
154 let results = db
155 .batch_search_similar("TestType", "field", &query_vectors, 5)
156 .await
157 .unwrap();
158
159 assert_eq!(results.len(), 2, "one result set per query vector");
160 assert!(results[0].is_empty(), "no indexed points → empty result");
161 assert!(results[1].is_empty(), "no indexed points → empty result");
162 }
163}