omendb_core/vector/store/options.rs
1//! `VectorStore` builder pattern configuration
2//!
3//! Follows `std::fs::OpenOptions` pattern for familiar, ergonomic API.
4
5use super::VectorStore;
6use crate::compression::RaBitQParams;
7use crate::omen::Metric;
8use crate::text::TextSearchConfig;
9use crate::vector::QuantizationMode;
10use anyhow::Result;
11use std::path::Path;
12
13/// Configuration options for opening or creating a vector store.
14///
15/// Follows the `std::fs::OpenOptions` pattern for familiar, ergonomic API.
16///
17/// # Examples
18///
19/// ```rust,no_run
20/// use omendb::vector::store::VectorStoreOptions;
21///
22/// // Simple persistent store
23/// let store = VectorStoreOptions::default()
24/// .dimensions(384)
25/// .open("./vectors")?;
26///
27/// // With custom HNSW parameters
28/// let store = VectorStoreOptions::default()
29/// .dimensions(384)
30/// .m(32)
31/// .ef_construction(400)
32/// .ef_search(100)
33/// .open("./vectors")?;
34///
35/// // In-memory store
36/// let store = VectorStoreOptions::default()
37/// .dimensions(384)
38/// .build()?;
39/// # Ok::<(), anyhow::Error>(())
40/// ```
41#[derive(Debug, Clone, Default)]
42pub struct VectorStoreOptions {
43 /// Vector dimensionality (0 = infer from first insert or existing data)
44 pub(super) dimensions: usize,
45
46 /// HNSW M parameter: neighbors per node (default: 16)
47 pub(super) m: Option<usize>,
48
49 /// HNSW `ef_construction`: build quality (default: 100)
50 pub(super) ef_construction: Option<usize>,
51
52 /// HNSW `ef_search`: search quality/speed tradeoff (default: 100)
53 pub(super) ef_search: Option<usize>,
54
55 /// Quantization mode (SQ8 or `RaBitQ` for asymmetric HNSW search)
56 pub(super) quantization: Option<QuantizationMode>,
57
58 /// Rescore candidates with original vectors (default: true when quantization enabled)
59 /// When true, search fetches `k * oversample` candidates using quantized distance,
60 /// then reranks with full precision distance for final k results.
61 pub(super) rescore: Option<bool>,
62
63 /// Oversampling factor for rescore (default: 3.0)
64 /// Fetches `k * oversample` candidates during quantized search.
65 pub(super) oversample: Option<f32>,
66
67 /// Distance metric for similarity search (default: L2)
68 pub(super) metric: Option<Metric>,
69
70 /// Text search configuration (None = disabled)
71 pub(super) text_search_config: Option<TextSearchConfig>,
72}
73
74impl VectorStoreOptions {
75 /// Create new options with defaults.
76 #[must_use]
77 pub fn new() -> Self {
78 Self::default()
79 }
80
81 /// Set vector dimensionality.
82 ///
83 /// If not set, dimensions will be inferred from:
84 /// 1. Existing data when opening a persistent store
85 /// 2. First inserted vector
86 #[must_use]
87 pub fn dimensions(mut self, dim: usize) -> Self {
88 self.dimensions = dim;
89 self
90 }
91
92 /// Set HNSW M parameter (neighbors per node).
93 ///
94 /// Higher M = better recall, more memory. Range: 4-64, default: 16.
95 #[must_use]
96 pub fn m(mut self, m: usize) -> Self {
97 self.m = Some(m);
98 self
99 }
100
101 /// Set HNSW `ef_construction` (build quality).
102 ///
103 /// Higher = better graph quality, slower build. Default: 100.
104 #[must_use]
105 pub fn ef_construction(mut self, ef: usize) -> Self {
106 self.ef_construction = Some(ef);
107 self
108 }
109
110 /// Set HNSW `ef_search` (search quality/speed tradeoff).
111 ///
112 /// Higher = better recall, slower search. Default: 100.
113 #[must_use]
114 pub fn ef_search(mut self, ef: usize) -> Self {
115 self.ef_search = Some(ef);
116 self
117 }
118
119 /// Enable quantization for memory-efficient storage.
120 ///
121 /// # Modes
122 /// - `QuantizationMode::SQ8`: 4x compression, similar speed, ~99% recall (default)
123 /// - `QuantizationMode::RaBitQ(params)`: 8x compression, ~0.5x speed, 93-99% recall
124 ///
125 /// # Example
126 /// ```ignore
127 /// // SQ8 (recommended for most cases)
128 /// let store = VectorStoreOptions::default()
129 /// .dimensions(768)
130 /// .quantization(QuantizationMode::sq8())
131 /// .open("./vectors")?;
132 ///
133 /// // RaBitQ for higher compression
134 /// let store = VectorStoreOptions::default()
135 /// .dimensions(768)
136 /// .quantization(QuantizationMode::rabitq())
137 /// .open("./vectors")?;
138 /// ```
139 #[must_use]
140 pub fn quantization(mut self, mode: QuantizationMode) -> Self {
141 self.quantization = Some(mode);
142 self
143 }
144
145 /// Enable SQ8 quantization (4x compression, similar speed, ~99% recall)
146 ///
147 /// Convenience method for the most common quantization mode.
148 #[must_use]
149 pub fn quantization_sq8(self) -> Self {
150 self.quantization(QuantizationMode::SQ8)
151 }
152
153 /// Enable `RaBitQ` quantization with default 4-bit parameters (8x compression)
154 #[must_use]
155 pub fn quantization_rabitq(self) -> Self {
156 self.quantization(QuantizationMode::rabitq())
157 }
158
159 /// Enable `RaBitQ` quantization with custom parameters
160 #[must_use]
161 pub fn quantization_rabitq_params(self, params: RaBitQParams) -> Self {
162 self.quantization(QuantizationMode::RaBitQ(params))
163 }
164
165 /// Enable/disable rescoring with original vectors (default: true when quantization enabled).
166 ///
167 /// When rescoring is enabled, search uses quantized vectors for fast candidate selection,
168 /// then reranks candidates using full-precision vectors for accuracy.
169 ///
170 /// # Arguments
171 /// * `enable` - Whether to rescore candidates
172 #[must_use]
173 pub fn rescore(mut self, enable: bool) -> Self {
174 self.rescore = Some(enable);
175 self
176 }
177
178 /// Set oversampling factor for rescoring (default: 3.0).
179 ///
180 /// When rescoring, fetches `k * oversample` candidates during quantized search,
181 /// then returns top k after reranking with full precision.
182 ///
183 /// Higher values improve recall but increase latency.
184 ///
185 /// # Arguments
186 /// * `factor` - Oversampling multiplier (must be >= 1.0)
187 #[must_use]
188 pub fn oversample(mut self, factor: f32) -> Self {
189 self.oversample = Some(factor.max(1.0));
190 self
191 }
192
193 /// Set distance metric for similarity search.
194 ///
195 /// # Metrics
196 /// - `"l2"` or `"euclidean"`: Euclidean distance (default)
197 /// - `"cosine"`: Cosine distance (1 - cosine similarity)
198 /// - `"dot"` or `"ip"`: Inner product (for MIPS)
199 ///
200 /// # Errors
201 /// Returns error if metric string is not recognized.
202 ///
203 /// # Example
204 /// ```ignore
205 /// let store = VectorStoreOptions::default()
206 /// .dimensions(768)
207 /// .metric("cosine")?
208 /// .open("./vectors")?;
209 /// ```
210 pub fn metric(mut self, m: &str) -> Result<Self, String> {
211 self.metric = Some(Metric::parse(m)?);
212 Ok(self)
213 }
214
215 /// Set distance metric directly (no parsing).
216 #[must_use]
217 pub fn metric_fn(mut self, m: Metric) -> Self {
218 self.metric = Some(m);
219 self
220 }
221
222 /// Enable tantivy-based full-text search with default configuration.
223 ///
224 /// When enabled, you can use `set_with_text()` to index text alongside vectors,
225 /// and `hybrid_search()` to search both with RRF fusion.
226 ///
227 /// Uses 50MB writer buffer by default. For custom memory settings,
228 /// use `text_search_config()` instead.
229 #[must_use]
230 pub fn text_search(mut self, enabled: bool) -> Self {
231 self.text_search_config = if enabled {
232 Some(TextSearchConfig::default())
233 } else {
234 None
235 };
236 self
237 }
238
239 /// Enable text search with custom configuration.
240 ///
241 /// # Example
242 /// ```ignore
243 /// // Mobile: lower memory
244 /// let store = VectorStoreOptions::default()
245 /// .text_search_config(TextSearchConfig { writer_buffer_mb: 15 })
246 /// .open("./db")?;
247 ///
248 /// // Cloud: higher throughput
249 /// let store = VectorStoreOptions::default()
250 /// .text_search_config(TextSearchConfig { writer_buffer_mb: 200 })
251 /// .open("./db")?;
252 /// ```
253 #[must_use]
254 pub fn text_search_config(mut self, config: TextSearchConfig) -> Self {
255 self.text_search_config = Some(config);
256 self
257 }
258
259 /// Open or create a persistent vector store at the given path.
260 ///
261 /// Creates the directory if it doesn't exist.
262 /// Loads existing data if the store already exists.
263 pub fn open(&self, path: impl AsRef<Path>) -> Result<VectorStore> {
264 VectorStore::open_with_options(path, self)
265 }
266
267 /// Build an in-memory vector store (no persistence).
268 pub fn build(&self) -> Result<VectorStore> {
269 VectorStore::build_with_options(self)
270 }
271}