1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
use crate::error::VectorDBResult;
use crate::models::{SearchResult, VectorPoint};
use async_trait::async_trait;
use uuid::Uuid;
/// Vector database trait
#[async_trait]
pub trait VectorDB: Send + Sync {
/// Create a collection for (data_type, field_name) pair
///
/// # Arguments
/// * `data_type` - Type name (e.g., "DocumentChunk", "Entity")
/// * `field_name` - Field name (e.g., "text", "name")
/// * `dimension` - Vector dimension (e.g., 384 for MiniLM)
///
/// # Example
/// ```ignore
/// vector_db.create_collection("DocumentChunk", "text", 384).await?;
/// ```
async fn create_collection(
&self,
data_type: &str,
field_name: &str,
dimension: usize,
) -> VectorDBResult<()>;
/// Check if collection exists
///
/// # Arguments
/// * `data_type` - Type name
/// * `field_name` - Field name
async fn has_collection(&self, data_type: &str, field_name: &str) -> VectorDBResult<bool>;
/// Index data points (batch upsert with embeddings already generated)
///
/// # Arguments
/// * `data_type` - Type name
/// * `field_name` - Field name
/// * `points` - Vector points with embeddings
///
/// # Example
/// ```ignore
/// let points = vec![
/// VectorPoint::new(chunk_id, embedding)
/// .with_metadata("type", json!("DocumentChunk"))
/// .with_metadata("field", json!("text")),
/// ];
/// vector_db.index_points("DocumentChunk", "text", &points).await?;
/// ```
async fn index_points(
&self,
data_type: &str,
field_name: &str,
points: &[VectorPoint],
) -> VectorDBResult<()>;
/// Search for similar vectors
///
/// # Arguments
/// * `data_type` - Type name
/// * `field_name` - Field name
/// * `query_vector` - Query embedding vector
/// * `top_k` - Number of results to return
///
/// # Returns
/// Vector of search results sorted by similarity (descending)
async fn search_similar(
&self,
data_type: &str,
field_name: &str,
query_vector: &[f32],
top_k: usize,
) -> VectorDBResult<Vec<SearchResult>>;
/// Delete collection
async fn delete_collection(&self, data_type: &str, field_name: &str) -> VectorDBResult<()>;
/// Delete points by IDs from an existing collection.
async fn delete_points(
&self,
data_type: &str,
field_name: &str,
point_ids: &[Uuid],
) -> VectorDBResult<()> {
let _ = (data_type, field_name, point_ids);
Ok(())
}
/// Get collection statistics
async fn collection_size(&self, data_type: &str, field_name: &str) -> VectorDBResult<usize>;
/// List all existing vector collections as `(data_type, field_name)` pairs.
///
/// Default implementation returns an empty list. Backends should override
/// to return the actual collections they hold.
async fn list_collections(&self) -> VectorDBResult<Vec<(String, String)>> {
Ok(vec![])
}
/// Remove all vector collections.
///
/// Default implementation lists all collections and deletes each one.
/// Backends may override with a more efficient bulk operation.
///
/// Equivalent to Python's `vector_engine.prune()`.
async fn prune(&self) -> VectorDBResult<()> {
let collections = self.list_collections().await?;
for (data_type, field_name) in collections {
self.delete_collection(&data_type, &field_name).await?;
}
Ok(())
}
/// Perform multiple vector similarity searches in sequence.
///
/// Default implementation loops over [`search_similar`]. Backends may override
/// this with a native batch API for better performance.
async fn batch_search_similar(
&self,
data_type: &str,
field_name: &str,
query_vectors: &[Vec<f32>],
top_k: usize,
) -> VectorDBResult<Vec<Vec<SearchResult>>> {
let mut results = Vec::with_capacity(query_vectors.len());
for query_vector in query_vectors {
results.push(
self.search_similar(data_type, field_name, query_vector, top_k)
.await?,
);
}
Ok(results)
}
}
#[cfg(all(test, feature = "testing"))]
mod tests {
#![allow(
clippy::unwrap_used,
clippy::expect_used,
reason = "test code — panics are acceptable"
)]
use super::*;
use crate::mock_vector_db::MockVectorDB;
#[tokio::test]
async fn batch_search_similar_returns_one_result_per_query() {
let db = MockVectorDB::new();
db.create_collection("TestType", "field", 3).await.unwrap();
// No points indexed — each search returns an empty Vec.
let query_vectors = vec![vec![1.0_f32, 0.0, 0.0], vec![0.0_f32, 1.0, 0.0]];
let results = db
.batch_search_similar("TestType", "field", &query_vectors, 5)
.await
.unwrap();
assert_eq!(results.len(), 2, "one result set per query vector");
assert!(results[0].is_empty(), "no indexed points → empty result");
assert!(results[1].is_empty(), "no indexed points → empty result");
}
}