Skip to main content

synaptic_pinecone/
vector_store.rs

1use std::collections::HashMap;
2
3use async_trait::async_trait;
4use serde_json::Value;
5use synaptic_core::{Document, Embeddings, SynapticError, VectorStore};
6
7// ---------------------------------------------------------------------------
8// PineconeConfig
9// ---------------------------------------------------------------------------
10
11/// Configuration for connecting to a Pinecone index.
12#[derive(Debug, Clone)]
13pub struct PineconeConfig {
14    /// Pinecone API key.
15    pub api_key: String,
16    /// The index host URL (e.g. `https://my-index-abc123.svc.pinecone.io`).
17    pub host: String,
18    /// Optional namespace for partitioning vectors within the index.
19    pub namespace: Option<String>,
20}
21
22impl PineconeConfig {
23    /// Create a new config with the required parameters.
24    pub fn new(api_key: impl Into<String>, host: impl Into<String>) -> Self {
25        Self {
26            api_key: api_key.into(),
27            host: host.into(),
28            namespace: None,
29        }
30    }
31
32    /// Set the namespace for vector operations.
33    pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
34        self.namespace = Some(namespace.into());
35        self
36    }
37}
38
39// ---------------------------------------------------------------------------
40// PineconeVectorStore
41// ---------------------------------------------------------------------------
42
43/// A [`VectorStore`] implementation backed by [Pinecone](https://www.pinecone.io/).
44///
45/// Uses the Pinecone REST API for all operations. Each document is stored as a
46/// vector with:
47/// - **id**: the document ID (auto-generated UUID v4 if empty)
48/// - **values**: the embedding vector
49/// - **metadata**: includes `content` (the document text) plus all document metadata
50pub struct PineconeVectorStore {
51    config: PineconeConfig,
52    client: reqwest::Client,
53}
54
55impl PineconeVectorStore {
56    /// Create a new store with the given configuration.
57    pub fn new(config: PineconeConfig) -> Self {
58        Self {
59            config,
60            client: reqwest::Client::new(),
61        }
62    }
63
64    /// Return a reference to the configuration.
65    pub fn config(&self) -> &PineconeConfig {
66        &self.config
67    }
68
69    /// Build the full URL for an API endpoint.
70    fn url(&self, path: &str) -> String {
71        let host = self.config.host.trim_end_matches('/');
72        format!("{host}{path}")
73    }
74
75    /// Create a JSON body that includes the namespace field if configured.
76    fn with_namespace(&self, mut body: serde_json::Value) -> serde_json::Value {
77        if let Some(ref ns) = self.config.namespace {
78            body["namespace"] = Value::String(ns.clone());
79        }
80        body
81    }
82
83    /// Send a POST request to the Pinecone API.
84    async fn post(&self, path: &str, body: serde_json::Value) -> Result<Value, SynapticError> {
85        let response = self
86            .client
87            .post(self.url(path))
88            .header("Api-Key", &self.config.api_key)
89            .header("Content-Type", "application/json")
90            .json(&body)
91            .send()
92            .await
93            .map_err(|e| SynapticError::VectorStore(format!("Pinecone request failed: {e}")))?;
94
95        let status = response.status();
96        let text = response
97            .text()
98            .await
99            .map_err(|e| SynapticError::VectorStore(format!("failed to read response: {e}")))?;
100
101        if !status.is_success() {
102            return Err(SynapticError::VectorStore(format!(
103                "Pinecone API error (HTTP {status}): {text}"
104            )));
105        }
106
107        serde_json::from_str(&text).map_err(|e| {
108            SynapticError::VectorStore(format!("failed to parse Pinecone response: {e}"))
109        })
110    }
111}
112
113// ---------------------------------------------------------------------------
114// VectorStore implementation
115// ---------------------------------------------------------------------------
116
117#[async_trait]
118impl VectorStore for PineconeVectorStore {
119    async fn add_documents(
120        &self,
121        docs: Vec<Document>,
122        embeddings: &dyn Embeddings,
123    ) -> Result<Vec<String>, SynapticError> {
124        if docs.is_empty() {
125            return Ok(Vec::new());
126        }
127
128        // Compute embeddings for all documents.
129        let texts: Vec<&str> = docs.iter().map(|d| d.content.as_str()).collect();
130        let vectors = embeddings.embed_documents(&texts).await?;
131
132        let mut ids = Vec::with_capacity(docs.len());
133        let mut pinecone_vectors = Vec::with_capacity(docs.len());
134
135        for (doc, vector) in docs.into_iter().zip(vectors) {
136            let id = if doc.id.is_empty() {
137                uuid::Uuid::new_v4().to_string()
138            } else {
139                doc.id.clone()
140            };
141
142            // Build metadata: store the document content plus all existing metadata.
143            let mut metadata = serde_json::Map::new();
144            metadata.insert("content".to_string(), Value::String(doc.content));
145            for (k, v) in doc.metadata {
146                metadata.insert(k, v);
147            }
148
149            pinecone_vectors.push(serde_json::json!({
150                "id": id,
151                "values": vector,
152                "metadata": metadata,
153            }));
154
155            ids.push(id);
156        }
157
158        let body = self.with_namespace(serde_json::json!({
159            "vectors": pinecone_vectors,
160        }));
161
162        self.post("/vectors/upsert", body).await?;
163
164        Ok(ids)
165    }
166
167    async fn similarity_search(
168        &self,
169        query: &str,
170        k: usize,
171        embeddings: &dyn Embeddings,
172    ) -> Result<Vec<Document>, SynapticError> {
173        let results = self
174            .similarity_search_with_score(query, k, embeddings)
175            .await?;
176        Ok(results.into_iter().map(|(doc, _)| doc).collect())
177    }
178
179    async fn similarity_search_with_score(
180        &self,
181        query: &str,
182        k: usize,
183        embeddings: &dyn Embeddings,
184    ) -> Result<Vec<(Document, f32)>, SynapticError> {
185        let query_vec = embeddings.embed_query(query).await?;
186        self.similarity_search_by_vector_with_score(&query_vec, k)
187            .await
188    }
189
190    async fn similarity_search_by_vector(
191        &self,
192        embedding: &[f32],
193        k: usize,
194    ) -> Result<Vec<Document>, SynapticError> {
195        let results = self
196            .similarity_search_by_vector_with_score(embedding, k)
197            .await?;
198        Ok(results.into_iter().map(|(doc, _)| doc).collect())
199    }
200
201    async fn delete(&self, ids: &[&str]) -> Result<(), SynapticError> {
202        if ids.is_empty() {
203            return Ok(());
204        }
205
206        let id_values: Vec<Value> = ids.iter().map(|id| Value::String(id.to_string())).collect();
207        let body = self.with_namespace(serde_json::json!({
208            "ids": id_values,
209        }));
210
211        self.post("/vectors/delete", body).await?;
212
213        Ok(())
214    }
215}
216
217impl PineconeVectorStore {
218    /// Search by vector and return documents with their similarity scores.
219    async fn similarity_search_by_vector_with_score(
220        &self,
221        embedding: &[f32],
222        k: usize,
223    ) -> Result<Vec<(Document, f32)>, SynapticError> {
224        let body = self.with_namespace(serde_json::json!({
225            "vector": embedding,
226            "topK": k,
227            "includeMetadata": true,
228        }));
229
230        let response = self.post("/query", body).await?;
231
232        let matches = response
233            .get("matches")
234            .and_then(|m| m.as_array())
235            .cloned()
236            .unwrap_or_default();
237
238        let mut results = Vec::with_capacity(matches.len());
239
240        for m in matches {
241            let id = m
242                .get("id")
243                .and_then(|v| v.as_str())
244                .unwrap_or("")
245                .to_string();
246
247            let score = m.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0) as f32;
248
249            let metadata_obj = m
250                .get("metadata")
251                .and_then(|v| v.as_object())
252                .cloned()
253                .unwrap_or_default();
254
255            // Extract document content from metadata.
256            let content = metadata_obj
257                .get("content")
258                .and_then(|v| v.as_str())
259                .unwrap_or("")
260                .to_string();
261
262            // Build document metadata (exclude the "content" key).
263            let metadata: HashMap<String, Value> = metadata_obj
264                .into_iter()
265                .filter(|(k, _)| k != "content")
266                .collect();
267
268            let doc = Document::with_metadata(id, content, metadata);
269            results.push((doc, score));
270        }
271
272        Ok(results)
273    }
274}
275
276// ---------------------------------------------------------------------------
277// Tests
278// ---------------------------------------------------------------------------
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    #[test]
285    fn config_new_sets_fields() {
286        let config = PineconeConfig::new("my-api-key", "https://my-index.svc.pinecone.io");
287        assert_eq!(config.api_key, "my-api-key");
288        assert_eq!(config.host, "https://my-index.svc.pinecone.io");
289        assert!(config.namespace.is_none());
290    }
291
292    #[test]
293    fn config_with_namespace() {
294        let config =
295            PineconeConfig::new("key", "https://host.pinecone.io").with_namespace("my-namespace");
296        assert_eq!(config.namespace.as_deref(), Some("my-namespace"));
297    }
298
299    #[test]
300    fn config_builder_chain() {
301        let config = PineconeConfig::new("key123", "https://idx.svc.pinecone.io")
302            .with_namespace("production");
303
304        assert_eq!(config.api_key, "key123");
305        assert_eq!(config.host, "https://idx.svc.pinecone.io");
306        assert_eq!(config.namespace.as_deref(), Some("production"));
307    }
308
309    #[test]
310    fn store_new_creates_instance() {
311        let config = PineconeConfig::new("key", "https://host.pinecone.io");
312        let store = PineconeVectorStore::new(config);
313        assert_eq!(store.config().api_key, "key");
314        assert_eq!(store.config().host, "https://host.pinecone.io");
315    }
316
317    #[test]
318    fn url_construction() {
319        let config = PineconeConfig::new("key", "https://my-index.svc.pinecone.io");
320        let store = PineconeVectorStore::new(config);
321        assert_eq!(
322            store.url("/vectors/upsert"),
323            "https://my-index.svc.pinecone.io/vectors/upsert"
324        );
325    }
326
327    #[test]
328    fn url_construction_trailing_slash() {
329        let config = PineconeConfig::new("key", "https://my-index.svc.pinecone.io/");
330        let store = PineconeVectorStore::new(config);
331        assert_eq!(
332            store.url("/vectors/query"),
333            "https://my-index.svc.pinecone.io/vectors/query"
334        );
335    }
336
337    #[test]
338    fn with_namespace_adds_field() {
339        let config =
340            PineconeConfig::new("key", "https://host.pinecone.io").with_namespace("test-ns");
341        let store = PineconeVectorStore::new(config);
342
343        let body = store.with_namespace(serde_json::json!({"vector": [1.0]}));
344        assert_eq!(body["namespace"], "test-ns");
345    }
346
347    #[test]
348    fn with_namespace_omits_when_none() {
349        let config = PineconeConfig::new("key", "https://host.pinecone.io");
350        let store = PineconeVectorStore::new(config);
351
352        let body = store.with_namespace(serde_json::json!({"vector": [1.0]}));
353        assert!(body.get("namespace").is_none());
354    }
355}