1pub mod memory;
52
53#[cfg(feature = "qdrant")]
54pub mod qdrant;
55
56#[cfg(feature = "pinecone")]
57pub mod pinecone;
58
59#[cfg(feature = "weaviate")]
60pub mod weaviate;
61
62use async_trait::async_trait;
63use serde::{Deserialize, Serialize};
64use std::collections::HashMap;
65use thiserror::Error;
66
67#[derive(Error, Debug)]
69pub enum VectorError {
70 #[error("Connection error: {0}")]
71 Connection(String),
72
73 #[error("Invalid vector dimension: expected {expected}, got {actual}")]
74 DimensionMismatch { expected: usize, actual: usize },
75
76 #[error("Record not found: {0}")]
77 NotFound(String),
78
79 #[error("Collection/index error: {0}")]
80 Collection(String),
81
82 #[error("Serialization error: {0}")]
83 Serialization(String),
84
85 #[error("API error: {0}")]
86 Api(String),
87
88 #[error("Configuration error: {0}")]
89 Config(String),
90}
91
92pub type VectorResult<T> = Result<T, VectorError>;
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct VectorRecord {
98 pub id: String,
100 pub vector: Vec<f32>,
102 pub metadata: HashMap<String, serde_json::Value>,
104}
105
106impl VectorRecord {
107 pub fn new(id: impl Into<String>, vector: Vec<f32>) -> Self {
109 Self {
110 id: id.into(),
111 vector,
112 metadata: HashMap::new(),
113 }
114 }
115
116 pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
118 self.metadata.insert(key.into(), value.into());
119 self
120 }
121
122 pub fn dimension(&self) -> usize {
124 self.vector.len()
125 }
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct SearchResult {
131 pub id: String,
133 pub score: f32,
135 pub vector: Option<Vec<f32>>,
137 pub metadata: HashMap<String, serde_json::Value>,
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct VectorStoreConfig {
144 pub dimension: usize,
146 pub collection: String,
148 pub metric: DistanceMetric,
150 pub backend: BackendConfig,
152}
153
154impl Default for VectorStoreConfig {
155 fn default() -> Self {
156 Self {
157 dimension: 384,
158 collection: "phago".to_string(),
159 metric: DistanceMetric::Cosine,
160 backend: BackendConfig::InMemory,
161 }
162 }
163}
164
165#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
167pub enum DistanceMetric {
168 Cosine,
170 Euclidean,
172 DotProduct,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
178#[serde(tag = "type")]
179pub enum BackendConfig {
180 InMemory,
182
183 #[cfg(feature = "qdrant")]
185 Qdrant {
186 url: String,
188 api_key: Option<String>,
190 },
191
192 #[cfg(feature = "pinecone")]
194 Pinecone {
195 api_key: String,
197 environment: String,
199 index: String,
201 },
202
203 #[cfg(feature = "weaviate")]
205 Weaviate {
206 url: String,
208 api_key: Option<String>,
210 class_name: String,
212 },
213}
214
215#[async_trait]
220pub trait VectorStore: Send + Sync {
221 fn name(&self) -> &str;
223
224 fn dimension(&self) -> usize;
226
227 fn metric(&self) -> DistanceMetric;
229
230 async fn upsert(&self, records: Vec<VectorRecord>) -> VectorResult<()>;
234
235 async fn search(&self, vector: &[f32], k: usize) -> VectorResult<Vec<SearchResult>>;
239
240 async fn search_with_filter(
244 &self,
245 vector: &[f32],
246 k: usize,
247 filter: &HashMap<String, serde_json::Value>,
248 ) -> VectorResult<Vec<SearchResult>>;
249
250 async fn get(&self, id: &str) -> VectorResult<Option<VectorRecord>>;
252
253 async fn get_batch(&self, ids: &[&str]) -> VectorResult<Vec<VectorRecord>>;
255
256 async fn delete(&self, id: &str) -> VectorResult<()>;
258
259 async fn delete_batch(&self, ids: &[&str]) -> VectorResult<()>;
261
262 async fn count(&self) -> VectorResult<usize>;
264
265 async fn clear(&self) -> VectorResult<()>;
267}
268
269pub async fn create_store(config: VectorStoreConfig) -> VectorResult<Box<dyn VectorStore>> {
271 match config.backend {
272 BackendConfig::InMemory => {
273 Ok(Box::new(memory::InMemoryStore::with_config(
274 config.dimension,
275 config.metric,
276 )))
277 }
278
279 #[cfg(feature = "qdrant")]
280 BackendConfig::Qdrant { url, api_key } => {
281 let store = qdrant::QdrantStore::connect(
282 &url,
283 api_key.as_deref(),
284 &config.collection,
285 config.dimension,
286 config.metric,
287 ).await?;
288 Ok(Box::new(store))
289 }
290
291 #[cfg(feature = "pinecone")]
292 BackendConfig::Pinecone { api_key, environment, index } => {
293 let store = pinecone::PineconeStore::connect(
294 &api_key,
295 &environment,
296 &index,
297 config.dimension,
298 ).await?;
299 Ok(Box::new(store))
300 }
301
302 #[cfg(feature = "weaviate")]
303 BackendConfig::Weaviate { url, api_key, class_name } => {
304 let store = weaviate::WeaviateStore::connect(
305 &url,
306 api_key.as_deref(),
307 &class_name,
308 config.dimension,
309 ).await?;
310 Ok(Box::new(store))
311 }
312 }
313}
314
315pub use memory::InMemoryStore;
317
318#[cfg(feature = "qdrant")]
319pub use qdrant::QdrantStore;
320
321#[cfg(feature = "pinecone")]
322pub use pinecone::PineconeStore;
323
324#[cfg(feature = "weaviate")]
325pub use weaviate::WeaviateStore;
326
327pub mod util {
329 pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
331 assert_eq!(a.len(), b.len(), "Vectors must have same dimension");
332
333 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
334 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
335 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
336
337 if norm_a == 0.0 || norm_b == 0.0 {
338 0.0
339 } else {
340 dot / (norm_a * norm_b)
341 }
342 }
343
344 pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
346 assert_eq!(a.len(), b.len(), "Vectors must have same dimension");
347
348 a.iter()
349 .zip(b.iter())
350 .map(|(x, y)| (x - y).powi(2))
351 .sum::<f32>()
352 .sqrt()
353 }
354
355 pub fn dot_product(a: &[f32], b: &[f32]) -> f32 {
357 assert_eq!(a.len(), b.len(), "Vectors must have same dimension");
358
359 a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
360 }
361
362 pub fn normalize(v: &mut [f32]) {
364 let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
365 if norm > 0.0 {
366 for x in v.iter_mut() {
367 *x /= norm;
368 }
369 }
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376
377 #[test]
378 fn test_cosine_similarity() {
379 let a = vec![1.0, 0.0, 0.0];
380 let b = vec![1.0, 0.0, 0.0];
381 assert!((util::cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
382
383 let c = vec![0.0, 1.0, 0.0];
384 assert!((util::cosine_similarity(&a, &c) - 0.0).abs() < 1e-6);
385 }
386
387 #[test]
388 fn test_euclidean_distance() {
389 let a = vec![0.0, 0.0, 0.0];
390 let b = vec![1.0, 0.0, 0.0];
391 assert!((util::euclidean_distance(&a, &b) - 1.0).abs() < 1e-6);
392 }
393
394 #[test]
395 fn test_normalize() {
396 let mut v = vec![3.0, 4.0];
397 util::normalize(&mut v);
398 assert!((v[0] - 0.6).abs() < 1e-6);
399 assert!((v[1] - 0.8).abs() < 1e-6);
400 }
401
402 #[test]
403 fn test_vector_record() {
404 let record = VectorRecord::new("test", vec![0.1, 0.2, 0.3])
405 .with_metadata("title", "Test Document")
406 .with_metadata("score", 0.95);
407
408 assert_eq!(record.id, "test");
409 assert_eq!(record.dimension(), 3);
410 assert_eq!(record.metadata.len(), 2);
411 }
412}