skill_runtime/vector_store/mod.rs
1//! Vector Store abstraction for pluggable vector database backends
2//!
3//! This module provides a trait-based abstraction for vector storage,
4//! enabling different backends (in-memory, Qdrant, Pinecone, etc.)
5//! to be used interchangeably for semantic skill search.
6//!
7//! # Architecture
8//!
9//! ```text
10//! ┌──────────────────────────────────────────────────────────────┐
11//! │ VectorStore Trait │
12//! │ upsert, search, delete, get, count, health_check │
13//! └──────────────────────────────────────────────────────────────┘
14//! │
15//! ┌───────────────────┼───────────────────┐
16//! ▼ ▼ ▼
17//! ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
18//! │ InMemory │ │ Qdrant │ │ Pinecone │
19//! │ (default) │ │ (local/ │ │ (cloud) │
20//! │ │ │ cloud) │ │ │
21//! └─────────────┘ └─────────────┘ └─────────────┘
22//! ```
23//!
24//! # Example
25//!
26//! ```ignore
27//! use skill_runtime::vector_store::{VectorStore, InMemoryVectorStore, EmbeddedDocument};
28//!
29//! let store = InMemoryVectorStore::new();
30//!
31//! // Upsert documents
32//! let docs = vec![EmbeddedDocument::new("id1", vec![0.1, 0.2, 0.3])];
33//! store.upsert(docs).await?;
34//!
35//! // Search
36//! let results = store.search(vec![0.1, 0.2, 0.3], None, 5).await?;
37//! ```
38
39mod types;
40mod in_memory;
41mod file;
42
43#[cfg(feature = "qdrant")]
44mod qdrant;
45
46pub use types::*;
47pub use in_memory::InMemoryVectorStore;
48pub use file::{FileVectorStore, FileConfig};
49
50#[cfg(feature = "qdrant")]
51pub use qdrant::{QdrantVectorStore, QdrantConfig};
52
53use async_trait::async_trait;
54use anyhow::Result;
55
56/// Trait for vector storage backends
57///
58/// Implementors provide vector similarity search with metadata filtering.
59/// All operations are async to support both local and remote backends.
60#[async_trait]
61pub trait VectorStore: Send + Sync {
62 /// Insert or update documents in the store
63 ///
64 /// Documents with existing IDs will be updated, new IDs will be inserted.
65 async fn upsert(&self, documents: Vec<EmbeddedDocument>) -> Result<UpsertStats>;
66
67 /// Search for similar vectors
68 ///
69 /// # Arguments
70 /// * `query_embedding` - The query vector to find similar documents for
71 /// * `filter` - Optional metadata filter to narrow results
72 /// * `top_k` - Maximum number of results to return
73 ///
74 /// # Returns
75 /// Results sorted by descending similarity score
76 async fn search(
77 &self,
78 query_embedding: Vec<f32>,
79 filter: Option<Filter>,
80 top_k: usize,
81 ) -> Result<Vec<SearchResult>>;
82
83 /// Delete documents by ID
84 async fn delete(&self, ids: Vec<String>) -> Result<DeleteStats>;
85
86 /// Get documents by ID
87 async fn get(&self, ids: Vec<String>) -> Result<Vec<EmbeddedDocument>>;
88
89 /// Count documents, optionally filtered
90 async fn count(&self, filter: Option<Filter>) -> Result<usize>;
91
92 /// Check backend health/connectivity
93 async fn health_check(&self) -> Result<HealthStatus>;
94
95 /// Get the name of this backend (for logging/debugging)
96 fn backend_name(&self) -> &'static str;
97
98 /// Get configured vector dimensions (if known)
99 fn dimensions(&self) -> Option<usize> {
100 None
101 }
102}
103
104/// Compute cosine similarity between two vectors
105///
106/// Returns a value between -1 and 1, where 1 means identical direction,
107/// 0 means orthogonal, and -1 means opposite direction.
108pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
109 if a.len() != b.len() || a.is_empty() {
110 return 0.0;
111 }
112
113 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
114 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
115 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
116
117 if norm_a == 0.0 || norm_b == 0.0 {
118 return 0.0;
119 }
120
121 dot_product / (norm_a * norm_b)
122}
123
124/// Compute euclidean distance between two vectors
125pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
126 if a.len() != b.len() {
127 return f32::MAX;
128 }
129
130 a.iter()
131 .zip(b.iter())
132 .map(|(x, y)| (x - y).powi(2))
133 .sum::<f32>()
134 .sqrt()
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 #[test]
142 fn test_cosine_similarity_identical() {
143 let a = vec![1.0, 0.0, 0.0];
144 let b = vec![1.0, 0.0, 0.0];
145 assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
146 }
147
148 #[test]
149 fn test_cosine_similarity_orthogonal() {
150 let a = vec![1.0, 0.0, 0.0];
151 let b = vec![0.0, 1.0, 0.0];
152 assert!(cosine_similarity(&a, &b).abs() < 1e-6);
153 }
154
155 #[test]
156 fn test_cosine_similarity_opposite() {
157 let a = vec![1.0, 0.0, 0.0];
158 let b = vec![-1.0, 0.0, 0.0];
159 assert!((cosine_similarity(&a, &b) + 1.0).abs() < 1e-6);
160 }
161
162 #[test]
163 fn test_cosine_similarity_empty() {
164 let a: Vec<f32> = vec![];
165 let b: Vec<f32> = vec![];
166 assert_eq!(cosine_similarity(&a, &b), 0.0);
167 }
168
169 #[test]
170 fn test_cosine_similarity_different_lengths() {
171 let a = vec![1.0, 0.0];
172 let b = vec![1.0, 0.0, 0.0];
173 assert_eq!(cosine_similarity(&a, &b), 0.0);
174 }
175
176 #[test]
177 fn test_euclidean_distance_same_point() {
178 let a = vec![1.0, 2.0, 3.0];
179 let b = vec![1.0, 2.0, 3.0];
180 assert!(euclidean_distance(&a, &b) < 1e-6);
181 }
182
183 #[test]
184 fn test_euclidean_distance_unit() {
185 let a = vec![0.0, 0.0, 0.0];
186 let b = vec![1.0, 0.0, 0.0];
187 assert!((euclidean_distance(&a, &b) - 1.0).abs() < 1e-6);
188 }
189}