Skip to main content

phago_vectors/
lib.rs

1//! # Phago Vectors
2//!
3//! Vector database adapters for Phago biological computing framework.
4//!
5//! This crate provides a unified interface for storing and searching embeddings
6//! across different vector database backends.
7//!
8//! ## Supported Backends
9//!
10//! | Backend | Feature Flag | Description |
11//! |---------|--------------|-------------|
12//! | In-Memory | (default) | Simple brute-force search, good for testing |
13//! | Qdrant | `qdrant` | High-performance vector database |
14//! | Pinecone | `pinecone` | Managed vector database service |
15//! | Weaviate | `weaviate` | Open-source vector search engine |
16//!
17//! ## Quick Start
18//!
19//! ```rust,ignore
20//! use phago_vectors::{VectorStore, InMemoryStore, VectorRecord};
21//!
22//! #[tokio::main]
23//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
24//!     let store = InMemoryStore::new(384); // 384-dimensional vectors
25//!
26//!     // Store a vector
27//!     let record = VectorRecord::new("doc1", vec![0.1; 384])
28//!         .with_metadata("title", "Introduction to Cells");
29//!     store.upsert(vec![record]).await?;
30//!
31//!     // Search for similar vectors
32//!     let results = store.search(&[0.1; 384], 5).await?;
33//!     for result in results {
34//!         println!("{}: {:.3}", result.id, result.score);
35//!     }
36//!
37//!     Ok(())
38//! }
39//! ```
40//!
41//! ## Feature Flags
42//!
43//! ```toml
44//! # Use Qdrant backend
45//! phago-vectors = { version = "0.6", features = ["qdrant"] }
46//!
47//! # Use all backends
48//! phago-vectors = { version = "0.6", features = ["all"] }
49//! ```
50
51pub mod memory;
52
53#[cfg(feature = "qdrant")]
54pub mod qdrant;
55
56#[cfg(feature = "pinecone")]
57pub mod pinecone;
58
59#[cfg(feature = "weaviate")]
60pub mod weaviate;
61
62use async_trait::async_trait;
63use serde::{Deserialize, Serialize};
64use std::collections::HashMap;
65use thiserror::Error;
66
67/// Errors that can occur when working with vector stores.
68#[derive(Error, Debug)]
69pub enum VectorError {
70    #[error("Connection error: {0}")]
71    Connection(String),
72
73    #[error("Invalid vector dimension: expected {expected}, got {actual}")]
74    DimensionMismatch { expected: usize, actual: usize },
75
76    #[error("Record not found: {0}")]
77    NotFound(String),
78
79    #[error("Collection/index error: {0}")]
80    Collection(String),
81
82    #[error("Serialization error: {0}")]
83    Serialization(String),
84
85    #[error("API error: {0}")]
86    Api(String),
87
88    #[error("Configuration error: {0}")]
89    Config(String),
90}
91
92/// Result type for vector operations.
93pub type VectorResult<T> = Result<T, VectorError>;
94
95/// A vector record to store in the database.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct VectorRecord {
98    /// Unique identifier for this record.
99    pub id: String,
100    /// The embedding vector.
101    pub vector: Vec<f32>,
102    /// Optional metadata associated with the record.
103    pub metadata: HashMap<String, serde_json::Value>,
104}
105
106impl VectorRecord {
107    /// Create a new vector record.
108    pub fn new(id: impl Into<String>, vector: Vec<f32>) -> Self {
109        Self {
110            id: id.into(),
111            vector,
112            metadata: HashMap::new(),
113        }
114    }
115
116    /// Add metadata to the record.
117    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
118        self.metadata.insert(key.into(), value.into());
119        self
120    }
121
122    /// Get the vector dimension.
123    pub fn dimension(&self) -> usize {
124        self.vector.len()
125    }
126}
127
128/// A search result from the vector store.
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct SearchResult {
131    /// The record ID.
132    pub id: String,
133    /// Similarity score (higher is more similar).
134    pub score: f32,
135    /// The vector (if requested).
136    pub vector: Option<Vec<f32>>,
137    /// Metadata associated with the record.
138    pub metadata: HashMap<String, serde_json::Value>,
139}
140
141/// Configuration for creating a vector store.
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct VectorStoreConfig {
144    /// Vector dimension.
145    pub dimension: usize,
146    /// Collection/index name.
147    pub collection: String,
148    /// Distance metric.
149    pub metric: DistanceMetric,
150    /// Backend-specific configuration.
151    pub backend: BackendConfig,
152}
153
154impl Default for VectorStoreConfig {
155    fn default() -> Self {
156        Self {
157            dimension: 384,
158            collection: "phago".to_string(),
159            metric: DistanceMetric::Cosine,
160            backend: BackendConfig::InMemory,
161        }
162    }
163}
164
165/// Distance metric for similarity search.
166#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
167pub enum DistanceMetric {
168    /// Cosine similarity (normalized dot product).
169    Cosine,
170    /// Euclidean distance (L2).
171    Euclidean,
172    /// Dot product (inner product).
173    DotProduct,
174}
175
176/// Backend-specific configuration.
177#[derive(Debug, Clone, Serialize, Deserialize)]
178#[serde(tag = "type")]
179pub enum BackendConfig {
180    /// In-memory vector store (for testing).
181    InMemory,
182
183    /// Qdrant vector database.
184    #[cfg(feature = "qdrant")]
185    Qdrant {
186        /// Qdrant server URL.
187        url: String,
188        /// API key (optional).
189        api_key: Option<String>,
190    },
191
192    /// Pinecone managed service.
193    #[cfg(feature = "pinecone")]
194    Pinecone {
195        /// Pinecone API key.
196        api_key: String,
197        /// Environment (e.g., "us-east-1-aws").
198        environment: String,
199        /// Index name.
200        index: String,
201    },
202
203    /// Weaviate vector search.
204    #[cfg(feature = "weaviate")]
205    Weaviate {
206        /// Weaviate server URL.
207        url: String,
208        /// API key (optional).
209        api_key: Option<String>,
210        /// Class name.
211        class_name: String,
212    },
213}
214
215/// Abstract interface for vector storage and search.
216///
217/// Implementations of this trait provide vector storage and similarity search
218/// capabilities, allowing Phago to use different vector database backends.
219#[async_trait]
220pub trait VectorStore: Send + Sync {
221    /// Get the name of this backend.
222    fn name(&self) -> &str;
223
224    /// Get the vector dimension.
225    fn dimension(&self) -> usize;
226
227    /// Get the distance metric.
228    fn metric(&self) -> DistanceMetric;
229
230    /// Insert or update records in the store.
231    ///
232    /// If a record with the same ID exists, it will be updated.
233    async fn upsert(&self, records: Vec<VectorRecord>) -> VectorResult<()>;
234
235    /// Search for similar vectors.
236    ///
237    /// Returns the top `k` most similar records.
238    async fn search(&self, vector: &[f32], k: usize) -> VectorResult<Vec<SearchResult>>;
239
240    /// Search with metadata filter.
241    ///
242    /// Only returns records matching the filter criteria.
243    async fn search_with_filter(
244        &self,
245        vector: &[f32],
246        k: usize,
247        filter: &HashMap<String, serde_json::Value>,
248    ) -> VectorResult<Vec<SearchResult>>;
249
250    /// Get a record by ID.
251    async fn get(&self, id: &str) -> VectorResult<Option<VectorRecord>>;
252
253    /// Get multiple records by ID.
254    async fn get_batch(&self, ids: &[&str]) -> VectorResult<Vec<VectorRecord>>;
255
256    /// Delete a record by ID.
257    async fn delete(&self, id: &str) -> VectorResult<()>;
258
259    /// Delete multiple records by ID.
260    async fn delete_batch(&self, ids: &[&str]) -> VectorResult<()>;
261
262    /// Get the total number of records in the store.
263    async fn count(&self) -> VectorResult<usize>;
264
265    /// Clear all records from the store.
266    async fn clear(&self) -> VectorResult<()>;
267}
268
269/// Create a vector store from configuration.
270pub async fn create_store(config: VectorStoreConfig) -> VectorResult<Box<dyn VectorStore>> {
271    match config.backend {
272        BackendConfig::InMemory => {
273            Ok(Box::new(memory::InMemoryStore::with_config(
274                config.dimension,
275                config.metric,
276            )))
277        }
278
279        #[cfg(feature = "qdrant")]
280        BackendConfig::Qdrant { url, api_key } => {
281            let store = qdrant::QdrantStore::connect(
282                &url,
283                api_key.as_deref(),
284                &config.collection,
285                config.dimension,
286                config.metric,
287            ).await?;
288            Ok(Box::new(store))
289        }
290
291        #[cfg(feature = "pinecone")]
292        BackendConfig::Pinecone { api_key, environment, index } => {
293            let store = pinecone::PineconeStore::connect(
294                &api_key,
295                &environment,
296                &index,
297                config.dimension,
298            ).await?;
299            Ok(Box::new(store))
300        }
301
302        #[cfg(feature = "weaviate")]
303        BackendConfig::Weaviate { url, api_key, class_name } => {
304            let store = weaviate::WeaviateStore::connect(
305                &url,
306                api_key.as_deref(),
307                &class_name,
308                config.dimension,
309            ).await?;
310            Ok(Box::new(store))
311        }
312    }
313}
314
315// Re-export commonly used types
316pub use memory::InMemoryStore;
317
318#[cfg(feature = "qdrant")]
319pub use qdrant::QdrantStore;
320
321#[cfg(feature = "pinecone")]
322pub use pinecone::PineconeStore;
323
324#[cfg(feature = "weaviate")]
325pub use weaviate::WeaviateStore;
326
327/// Utility functions for vector operations.
328pub mod util {
329    /// Compute cosine similarity between two vectors.
330    pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
331        assert_eq!(a.len(), b.len(), "Vectors must have same dimension");
332
333        let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
334        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
335        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
336
337        if norm_a == 0.0 || norm_b == 0.0 {
338            0.0
339        } else {
340            dot / (norm_a * norm_b)
341        }
342    }
343
344    /// Compute Euclidean distance between two vectors.
345    pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
346        assert_eq!(a.len(), b.len(), "Vectors must have same dimension");
347
348        a.iter()
349            .zip(b.iter())
350            .map(|(x, y)| (x - y).powi(2))
351            .sum::<f32>()
352            .sqrt()
353    }
354
355    /// Compute dot product between two vectors.
356    pub fn dot_product(a: &[f32], b: &[f32]) -> f32 {
357        assert_eq!(a.len(), b.len(), "Vectors must have same dimension");
358
359        a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
360    }
361
362    /// Normalize a vector to unit length.
363    pub fn normalize(v: &mut [f32]) {
364        let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
365        if norm > 0.0 {
366            for x in v.iter_mut() {
367                *x /= norm;
368            }
369        }
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn test_cosine_similarity() {
379        let a = vec![1.0, 0.0, 0.0];
380        let b = vec![1.0, 0.0, 0.0];
381        assert!((util::cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
382
383        let c = vec![0.0, 1.0, 0.0];
384        assert!((util::cosine_similarity(&a, &c) - 0.0).abs() < 1e-6);
385    }
386
387    #[test]
388    fn test_euclidean_distance() {
389        let a = vec![0.0, 0.0, 0.0];
390        let b = vec![1.0, 0.0, 0.0];
391        assert!((util::euclidean_distance(&a, &b) - 1.0).abs() < 1e-6);
392    }
393
394    #[test]
395    fn test_normalize() {
396        let mut v = vec![3.0, 4.0];
397        util::normalize(&mut v);
398        assert!((v[0] - 0.6).abs() < 1e-6);
399        assert!((v[1] - 0.8).abs() < 1e-6);
400    }
401
402    #[test]
403    fn test_vector_record() {
404        let record = VectorRecord::new("test", vec![0.1, 0.2, 0.3])
405            .with_metadata("title", "Test Document")
406            .with_metadata("score", 0.95);
407
408        assert_eq!(record.id, "test");
409        assert_eq!(record.dimension(), 3);
410        assert_eq!(record.metadata.len(), 2);
411    }
412}