rig/vector_store/
builder.rs

1use serde::Serialize;
2use std::collections::HashMap;
3
4use crate::{OneOrMany, embeddings::Embedding};
5
6use super::{IndexStrategy, in_memory_store::InMemoryVectorStore};
7
8/// Builder for creating an [InMemoryVectorStore] with custom configuration.
9pub struct InMemoryVectorStoreBuilder<D>
10where
11    D: Serialize,
12{
13    /// Embeddings of the documents.
14    embeddings: HashMap<String, (D, OneOrMany<Embedding>)>,
15
16    /// Index strategy for the vector store.
17    index_strategy: IndexStrategy,
18}
19
20impl<D> Default for InMemoryVectorStoreBuilder<D>
21where
22    D: Serialize + Eq,
23{
24    fn default() -> Self {
25        Self::new()
26    }
27}
28
29impl<D> InMemoryVectorStoreBuilder<D>
30where
31    D: Serialize + Eq,
32{
33    /// Create a new builder with default settings.
34    /// Default index strategy is BruteForce.
35    pub fn new() -> Self {
36        Self {
37            embeddings: HashMap::new(),
38            index_strategy: IndexStrategy::default(),
39        }
40    }
41
42    /// Set the index strategy for the vector store.
43    ///
44    /// # Examples
45    ///
46    /// ```ignore
47    /// use rig::vector_store::{InMemoryVectorStoreBuilder, IndexStrategy};
48    ///
49    /// let store = InMemoryVectorStoreBuilder::<String>::new()
50    ///     .index_strategy(IndexStrategy::LSH {
51    ///         num_tables: 5,
52    ///         num_hyperplanes: 10,
53    ///     })
54    ///     .build();
55    /// ```
56    pub fn index_strategy(mut self, index_strategy: IndexStrategy) -> Self {
57        self.index_strategy = index_strategy;
58        self
59    }
60
61    /// Add documents with auto-generated IDs.
62    /// IDs will have the form `"doc{n}"` where `n` is the index.
63    pub fn documents(
64        mut self,
65        documents: impl IntoIterator<Item = (D, OneOrMany<Embedding>)>,
66    ) -> Self {
67        let current_index = self.embeddings.len();
68        documents
69            .into_iter()
70            .enumerate()
71            .for_each(|(i, (doc, embeddings))| {
72                self.embeddings
73                    .insert(format!("doc{}", i + current_index), (doc, embeddings));
74            });
75        self
76    }
77
78    /// Add documents with explicit IDs.
79    pub fn documents_with_ids(
80        mut self,
81        documents: impl IntoIterator<Item = (impl ToString, D, OneOrMany<Embedding>)>,
82    ) -> Self {
83        documents.into_iter().for_each(|(id, doc, embeddings)| {
84            self.embeddings.insert(id.to_string(), (doc, embeddings));
85        });
86        self
87    }
88
89    /// Add documents with IDs generated by a function.
90    pub fn documents_with_id_f(
91        mut self,
92        documents: impl IntoIterator<Item = (D, OneOrMany<Embedding>)>,
93        f: fn(&D) -> String,
94    ) -> Self {
95        documents.into_iter().for_each(|(doc, embeddings)| {
96            let id = f(&doc);
97            self.embeddings.insert(id, (doc, embeddings));
98        });
99        self
100    }
101
102    /// Build the [InMemoryVectorStore] with the configured settings.
103    pub fn build(self) -> InMemoryVectorStore<D> {
104        InMemoryVectorStore::from_builder(self.embeddings, self.index_strategy)
105    }
106}