Skip to main content

synaptic_vectorstores/
multi_vector.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3
4use async_trait::async_trait;
5use synaptic_core::{Document, Embeddings, Retriever, SynapticError};
6use tokio::sync::RwLock;
7
8use crate::VectorStore;
9
10/// A retriever that maps multiple child vectors back to parent documents.
11///
12/// Each parent document can have multiple sub-documents (e.g., summaries,
13/// smaller chunks) stored in the vector store. Retrieval finds relevant
14/// sub-documents, then returns the original parent documents.
15pub struct MultiVectorRetriever<S: VectorStore> {
16    vectorstore: Arc<S>,
17    embeddings: Arc<dyn Embeddings>,
18    /// Parent document store, keyed by document ID.
19    docstore: Arc<RwLock<HashMap<String, Document>>>,
20    /// Metadata key linking child documents to their parent.
21    id_key: String,
22    k: usize,
23}
24
25impl<S: VectorStore + 'static> MultiVectorRetriever<S> {
26    /// Create a new `MultiVectorRetriever`.
27    ///
28    /// - `vectorstore`: the vector store to search for child documents.
29    /// - `embeddings`: the embeddings provider for embedding child documents.
30    /// - `k`: the number of child documents to retrieve for parent lookup.
31    pub fn new(vectorstore: Arc<S>, embeddings: Arc<dyn Embeddings>, k: usize) -> Self {
32        Self {
33            vectorstore,
34            embeddings,
35            docstore: Arc::new(RwLock::new(HashMap::new())),
36            id_key: "parent_id".to_string(),
37            k,
38        }
39    }
40
41    /// Set a custom metadata key linking child documents to their parent ID.
42    /// Defaults to `"parent_id"`.
43    pub fn with_id_key(mut self, key: impl Into<String>) -> Self {
44        self.id_key = key.into();
45        self
46    }
47
48    /// Add parent documents and their associated child documents.
49    ///
50    /// Parents are stored in the internal docstore. Children are embedded and
51    /// added to the vector store. Each child document must have the `id_key`
52    /// metadata field set to the parent document's ID.
53    pub async fn add_documents(
54        &self,
55        parent_docs: Vec<Document>,
56        child_docs: Vec<Document>,
57    ) -> Result<(), SynapticError> {
58        // Store parents in the docstore
59        {
60            let mut store = self.docstore.write().await;
61            for doc in parent_docs {
62                store.insert(doc.id.clone(), doc);
63            }
64        }
65
66        // Add children to the vector store
67        self.vectorstore
68            .add_documents(child_docs, self.embeddings.as_ref())
69            .await?;
70
71        Ok(())
72    }
73}
74
75#[async_trait]
76impl<S: VectorStore + 'static> Retriever for MultiVectorRetriever<S> {
77    async fn retrieve(&self, query: &str, top_k: usize) -> Result<Vec<Document>, SynapticError> {
78        let k = if top_k > 0 { top_k } else { self.k };
79
80        // Search vectorstore for child documents
81        let children = self
82            .vectorstore
83            .similarity_search(query, k, self.embeddings.as_ref())
84            .await?;
85
86        // Look up parent documents from the docstore, deduplicating
87        let docstore = self.docstore.read().await;
88        let mut seen = std::collections::HashSet::new();
89        let mut parents = Vec::new();
90
91        for child in &children {
92            if let Some(parent_id_value) = child.metadata.get(&self.id_key) {
93                if let Some(parent_id) = parent_id_value.as_str() {
94                    if seen.insert(parent_id.to_string()) {
95                        if let Some(parent) = docstore.get(parent_id) {
96                            parents.push(parent.clone());
97                        }
98                    }
99                }
100            }
101        }
102
103        Ok(parents)
104    }
105}