Skip to main content

synaptic_pinecone/
vector_store.rs

1use std::collections::HashMap;
2
3use async_trait::async_trait;
4use serde_json::Value;
5use synaptic_core::{Document, Embeddings, SynapticError, VectorStore};
6
7// ---------------------------------------------------------------------------
8// PineconeConfig
9// ---------------------------------------------------------------------------
10
11/// Configuration for connecting to a Pinecone index.
12#[derive(Debug, Clone)]
13pub struct PineconeConfig {
14    /// Pinecone API key.
15    pub api_key: String,
16    /// The index host URL (e.g. `https://my-index-abc123.svc.pinecone.io`).
17    pub host: String,
18    /// Optional namespace for partitioning vectors within the index.
19    pub namespace: Option<String>,
20}
21
22impl PineconeConfig {
23    /// Create a new config with the required parameters.
24    pub fn new(api_key: impl Into<String>, host: impl Into<String>) -> Self {
25        Self {
26            api_key: api_key.into(),
27            host: host.into(),
28            namespace: None,
29        }
30    }
31
32    /// Set the namespace for vector operations.
33    pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
34        self.namespace = Some(namespace.into());
35        self
36    }
37}
38
39// ---------------------------------------------------------------------------
40// PineconeVectorStore
41// ---------------------------------------------------------------------------
42
43/// A [`VectorStore`] implementation backed by [Pinecone](https://www.pinecone.io/).
44///
45/// Uses the Pinecone REST API for all operations. Each document is stored as a
46/// vector with:
47/// - **id**: the document ID (auto-generated UUID v4 if empty)
48/// - **values**: the embedding vector
49/// - **metadata**: includes `content` (the document text) plus all document metadata
50pub struct PineconeVectorStore {
51    config: PineconeConfig,
52    client: reqwest::Client,
53}
54
55impl PineconeVectorStore {
56    /// Create a new store with the given configuration.
57    pub fn new(config: PineconeConfig) -> Self {
58        Self {
59            config,
60            client: reqwest::Client::new(),
61        }
62    }
63
64    /// Return a reference to the configuration.
65    pub fn config(&self) -> &PineconeConfig {
66        &self.config
67    }
68
69    /// Build the full URL for an API endpoint.
70    fn url(&self, path: &str) -> String {
71        let host = self.config.host.trim_end_matches('/');
72        format!("{host}{path}")
73    }
74
75    /// Create a JSON body that includes the namespace field if configured.
76    fn with_namespace(&self, mut body: serde_json::Value) -> serde_json::Value {
77        if let Some(ref ns) = self.config.namespace {
78            body["namespace"] = Value::String(ns.clone());
79        }
80        body
81    }
82
83    /// Send a POST request to the Pinecone API.
84    async fn post(&self, path: &str, body: serde_json::Value) -> Result<Value, SynapticError> {
85        let response = self
86            .client
87            .post(self.url(path))
88            .header("Api-Key", &self.config.api_key)
89            .header("Content-Type", "application/json")
90            .json(&body)
91            .send()
92            .await
93            .map_err(|e| SynapticError::VectorStore(format!("Pinecone request failed: {e}")))?;
94
95        let status = response.status();
96        let text = response
97            .text()
98            .await
99            .map_err(|e| SynapticError::VectorStore(format!("failed to read response: {e}")))?;
100
101        if !status.is_success() {
102            return Err(SynapticError::VectorStore(format!(
103                "Pinecone API error (HTTP {status}): {text}"
104            )));
105        }
106
107        serde_json::from_str(&text).map_err(|e| {
108            SynapticError::VectorStore(format!("failed to parse Pinecone response: {e}"))
109        })
110    }
111}
112
113// ---------------------------------------------------------------------------
114// VectorStore implementation
115// ---------------------------------------------------------------------------
116
117#[async_trait]
118impl VectorStore for PineconeVectorStore {
119    async fn add_documents(
120        &self,
121        docs: Vec<Document>,
122        embeddings: &dyn Embeddings,
123    ) -> Result<Vec<String>, SynapticError> {
124        if docs.is_empty() {
125            return Ok(Vec::new());
126        }
127
128        // Compute embeddings for all documents.
129        let texts: Vec<&str> = docs.iter().map(|d| d.content.as_str()).collect();
130        let vectors = embeddings.embed_documents(&texts).await?;
131
132        let mut ids = Vec::with_capacity(docs.len());
133        let mut pinecone_vectors = Vec::with_capacity(docs.len());
134
135        for (doc, vector) in docs.into_iter().zip(vectors) {
136            let id = if doc.id.is_empty() {
137                uuid::Uuid::new_v4().to_string()
138            } else {
139                doc.id.clone()
140            };
141
142            // Build metadata: store the document content plus all existing metadata.
143            let mut metadata = serde_json::Map::new();
144            metadata.insert("content".to_string(), Value::String(doc.content));
145            for (k, v) in doc.metadata {
146                metadata.insert(k, v);
147            }
148
149            pinecone_vectors.push(serde_json::json!({
150                "id": id,
151                "values": vector,
152                "metadata": metadata,
153            }));
154
155            ids.push(id);
156        }
157
158        let body = self.with_namespace(serde_json::json!({
159            "vectors": pinecone_vectors,
160        }));
161
162        self.post("/vectors/upsert", body).await?;
163
164        Ok(ids)
165    }
166
167    async fn similarity_search(
168        &self,
169        query: &str,
170        k: usize,
171        embeddings: &dyn Embeddings,
172    ) -> Result<Vec<Document>, SynapticError> {
173        let results = self.similarity_search_with_score(query, k, embeddings).await?;
174        Ok(results.into_iter().map(|(doc, _)| doc).collect())
175    }
176
177    async fn similarity_search_with_score(
178        &self,
179        query: &str,
180        k: usize,
181        embeddings: &dyn Embeddings,
182    ) -> Result<Vec<(Document, f32)>, SynapticError> {
183        let query_vec = embeddings.embed_query(query).await?;
184        self.similarity_search_by_vector_with_score(&query_vec, k)
185            .await
186    }
187
188    async fn similarity_search_by_vector(
189        &self,
190        embedding: &[f32],
191        k: usize,
192    ) -> Result<Vec<Document>, SynapticError> {
193        let results = self
194            .similarity_search_by_vector_with_score(embedding, k)
195            .await?;
196        Ok(results.into_iter().map(|(doc, _)| doc).collect())
197    }
198
199    async fn delete(&self, ids: &[&str]) -> Result<(), SynapticError> {
200        if ids.is_empty() {
201            return Ok(());
202        }
203
204        let id_values: Vec<Value> = ids.iter().map(|id| Value::String(id.to_string())).collect();
205        let body = self.with_namespace(serde_json::json!({
206            "ids": id_values,
207        }));
208
209        self.post("/vectors/delete", body).await?;
210
211        Ok(())
212    }
213}
214
215impl PineconeVectorStore {
216    /// Search by vector and return documents with their similarity scores.
217    async fn similarity_search_by_vector_with_score(
218        &self,
219        embedding: &[f32],
220        k: usize,
221    ) -> Result<Vec<(Document, f32)>, SynapticError> {
222        let body = self.with_namespace(serde_json::json!({
223            "vector": embedding,
224            "topK": k,
225            "includeMetadata": true,
226        }));
227
228        let response = self.post("/query", body).await?;
229
230        let matches = response
231            .get("matches")
232            .and_then(|m| m.as_array())
233            .cloned()
234            .unwrap_or_default();
235
236        let mut results = Vec::with_capacity(matches.len());
237
238        for m in matches {
239            let id = m
240                .get("id")
241                .and_then(|v| v.as_str())
242                .unwrap_or("")
243                .to_string();
244
245            let score = m
246                .get("score")
247                .and_then(|v| v.as_f64())
248                .unwrap_or(0.0) as f32;
249
250            let metadata_obj = m
251                .get("metadata")
252                .and_then(|v| v.as_object())
253                .cloned()
254                .unwrap_or_default();
255
256            // Extract document content from metadata.
257            let content = metadata_obj
258                .get("content")
259                .and_then(|v| v.as_str())
260                .unwrap_or("")
261                .to_string();
262
263            // Build document metadata (exclude the "content" key).
264            let metadata: HashMap<String, Value> = metadata_obj
265                .into_iter()
266                .filter(|(k, _)| k != "content")
267                .collect();
268
269            let doc = Document::with_metadata(id, content, metadata);
270            results.push((doc, score));
271        }
272
273        Ok(results)
274    }
275}
276
277// ---------------------------------------------------------------------------
278// Tests
279// ---------------------------------------------------------------------------
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284
285    #[test]
286    fn config_new_sets_fields() {
287        let config = PineconeConfig::new("my-api-key", "https://my-index.svc.pinecone.io");
288        assert_eq!(config.api_key, "my-api-key");
289        assert_eq!(config.host, "https://my-index.svc.pinecone.io");
290        assert!(config.namespace.is_none());
291    }
292
293    #[test]
294    fn config_with_namespace() {
295        let config = PineconeConfig::new("key", "https://host.pinecone.io")
296            .with_namespace("my-namespace");
297        assert_eq!(config.namespace.as_deref(), Some("my-namespace"));
298    }
299
300    #[test]
301    fn config_builder_chain() {
302        let config = PineconeConfig::new("key123", "https://idx.svc.pinecone.io")
303            .with_namespace("production");
304
305        assert_eq!(config.api_key, "key123");
306        assert_eq!(config.host, "https://idx.svc.pinecone.io");
307        assert_eq!(config.namespace.as_deref(), Some("production"));
308    }
309
310    #[test]
311    fn store_new_creates_instance() {
312        let config = PineconeConfig::new("key", "https://host.pinecone.io");
313        let store = PineconeVectorStore::new(config);
314        assert_eq!(store.config().api_key, "key");
315        assert_eq!(store.config().host, "https://host.pinecone.io");
316    }
317
318    #[test]
319    fn url_construction() {
320        let config = PineconeConfig::new("key", "https://my-index.svc.pinecone.io");
321        let store = PineconeVectorStore::new(config);
322        assert_eq!(
323            store.url("/vectors/upsert"),
324            "https://my-index.svc.pinecone.io/vectors/upsert"
325        );
326    }
327
328    #[test]
329    fn url_construction_trailing_slash() {
330        let config = PineconeConfig::new("key", "https://my-index.svc.pinecone.io/");
331        let store = PineconeVectorStore::new(config);
332        assert_eq!(
333            store.url("/vectors/query"),
334            "https://my-index.svc.pinecone.io/vectors/query"
335        );
336    }
337
338    #[test]
339    fn with_namespace_adds_field() {
340        let config = PineconeConfig::new("key", "https://host.pinecone.io")
341            .with_namespace("test-ns");
342        let store = PineconeVectorStore::new(config);
343
344        let body = store.with_namespace(serde_json::json!({"vector": [1.0]}));
345        assert_eq!(body["namespace"], "test-ns");
346    }
347
348    #[test]
349    fn with_namespace_omits_when_none() {
350        let config = PineconeConfig::new("key", "https://host.pinecone.io");
351        let store = PineconeVectorStore::new(config);
352
353        let body = store.with_namespace(serde_json::json!({"vector": [1.0]}));
354        assert!(body.get("namespace").is_none());
355    }
356}