omendb_core/vector/store/
options.rs

1//! `VectorStore` builder pattern configuration
2//!
3//! Follows `std::fs::OpenOptions` pattern for familiar, ergonomic API.
4
5use super::VectorStore;
6use crate::compression::RaBitQParams;
7use crate::omen::Metric;
8use crate::text::TextSearchConfig;
9use crate::vector::QuantizationMode;
10use anyhow::Result;
11use std::path::Path;
12
13/// Configuration options for opening or creating a vector store.
14///
15/// Follows the `std::fs::OpenOptions` pattern for familiar, ergonomic API.
16///
17/// # Examples
18///
19/// ```rust,no_run
20/// use omendb::vector::store::VectorStoreOptions;
21///
22/// // Simple persistent store
23/// let store = VectorStoreOptions::default()
24///     .dimensions(384)
25///     .open("./vectors")?;
26///
27/// // With custom HNSW parameters
28/// let store = VectorStoreOptions::default()
29///     .dimensions(384)
30///     .m(32)
31///     .ef_construction(400)
32///     .ef_search(100)
33///     .open("./vectors")?;
34///
35/// // In-memory store
36/// let store = VectorStoreOptions::default()
37///     .dimensions(384)
38///     .build()?;
39/// # Ok::<(), anyhow::Error>(())
40/// ```
41#[derive(Debug, Clone, Default)]
42pub struct VectorStoreOptions {
43    /// Vector dimensionality (0 = infer from first insert or existing data)
44    pub(super) dimensions: usize,
45
46    /// HNSW M parameter: neighbors per node (default: 16)
47    pub(super) m: Option<usize>,
48
49    /// HNSW `ef_construction`: build quality (default: 100)
50    pub(super) ef_construction: Option<usize>,
51
52    /// HNSW `ef_search`: search quality/speed tradeoff (default: 100)
53    pub(super) ef_search: Option<usize>,
54
55    /// Quantization mode (SQ8 or `RaBitQ` for asymmetric HNSW search)
56    pub(super) quantization: Option<QuantizationMode>,
57
58    /// Rescore candidates with original vectors (default: true when quantization enabled)
59    /// When true, search fetches `k * oversample` candidates using quantized distance,
60    /// then reranks with full precision distance for final k results.
61    pub(super) rescore: Option<bool>,
62
63    /// Oversampling factor for rescore (default: 3.0)
64    /// Fetches `k * oversample` candidates during quantized search.
65    pub(super) oversample: Option<f32>,
66
67    /// Distance metric for similarity search (default: L2)
68    pub(super) metric: Option<Metric>,
69
70    /// Text search configuration (None = disabled)
71    pub(super) text_search_config: Option<TextSearchConfig>,
72}
73
74impl VectorStoreOptions {
75    /// Create new options with defaults.
76    #[must_use]
77    pub fn new() -> Self {
78        Self::default()
79    }
80
81    /// Set vector dimensionality.
82    ///
83    /// If not set, dimensions will be inferred from:
84    /// 1. Existing data when opening a persistent store
85    /// 2. First inserted vector
86    #[must_use]
87    pub fn dimensions(mut self, dim: usize) -> Self {
88        self.dimensions = dim;
89        self
90    }
91
92    /// Set HNSW M parameter (neighbors per node).
93    ///
94    /// Higher M = better recall, more memory. Range: 4-64, default: 16.
95    #[must_use]
96    pub fn m(mut self, m: usize) -> Self {
97        self.m = Some(m);
98        self
99    }
100
101    /// Set HNSW `ef_construction` (build quality).
102    ///
103    /// Higher = better graph quality, slower build. Default: 100.
104    #[must_use]
105    pub fn ef_construction(mut self, ef: usize) -> Self {
106        self.ef_construction = Some(ef);
107        self
108    }
109
110    /// Set HNSW `ef_search` (search quality/speed tradeoff).
111    ///
112    /// Higher = better recall, slower search. Default: 100.
113    #[must_use]
114    pub fn ef_search(mut self, ef: usize) -> Self {
115        self.ef_search = Some(ef);
116        self
117    }
118
119    /// Enable quantization for memory-efficient storage.
120    ///
121    /// # Modes
122    /// - `QuantizationMode::SQ8`: 4x compression, similar speed, ~99% recall (default)
123    /// - `QuantizationMode::RaBitQ(params)`: 8x compression, ~0.5x speed, 93-99% recall
124    ///
125    /// # Example
126    /// ```ignore
127    /// // SQ8 (recommended for most cases)
128    /// let store = VectorStoreOptions::default()
129    ///     .dimensions(768)
130    ///     .quantization(QuantizationMode::sq8())
131    ///     .open("./vectors")?;
132    ///
133    /// // RaBitQ for higher compression
134    /// let store = VectorStoreOptions::default()
135    ///     .dimensions(768)
136    ///     .quantization(QuantizationMode::rabitq())
137    ///     .open("./vectors")?;
138    /// ```
139    #[must_use]
140    pub fn quantization(mut self, mode: QuantizationMode) -> Self {
141        self.quantization = Some(mode);
142        self
143    }
144
145    /// Enable SQ8 quantization (4x compression, similar speed, ~99% recall)
146    ///
147    /// Convenience method for the most common quantization mode.
148    #[must_use]
149    pub fn quantization_sq8(self) -> Self {
150        self.quantization(QuantizationMode::SQ8)
151    }
152
153    /// Enable `RaBitQ` quantization with default 4-bit parameters (8x compression)
154    #[must_use]
155    pub fn quantization_rabitq(self) -> Self {
156        self.quantization(QuantizationMode::rabitq())
157    }
158
159    /// Enable `RaBitQ` quantization with custom parameters
160    #[must_use]
161    pub fn quantization_rabitq_params(self, params: RaBitQParams) -> Self {
162        self.quantization(QuantizationMode::RaBitQ(params))
163    }
164
165    /// Enable/disable rescoring with original vectors (default: true when quantization enabled).
166    ///
167    /// When rescoring is enabled, search uses quantized vectors for fast candidate selection,
168    /// then reranks candidates using full-precision vectors for accuracy.
169    ///
170    /// # Arguments
171    /// * `enable` - Whether to rescore candidates
172    #[must_use]
173    pub fn rescore(mut self, enable: bool) -> Self {
174        self.rescore = Some(enable);
175        self
176    }
177
178    /// Set oversampling factor for rescoring (default: 3.0).
179    ///
180    /// When rescoring, fetches `k * oversample` candidates during quantized search,
181    /// then returns top k after reranking with full precision.
182    ///
183    /// Higher values improve recall but increase latency.
184    ///
185    /// # Arguments
186    /// * `factor` - Oversampling multiplier (must be >= 1.0)
187    #[must_use]
188    pub fn oversample(mut self, factor: f32) -> Self {
189        self.oversample = Some(factor.max(1.0));
190        self
191    }
192
193    /// Set distance metric for similarity search.
194    ///
195    /// # Metrics
196    /// - `"l2"` or `"euclidean"`: Euclidean distance (default)
197    /// - `"cosine"`: Cosine distance (1 - cosine similarity)
198    /// - `"dot"` or `"ip"`: Inner product (for MIPS)
199    ///
200    /// # Errors
201    /// Returns error if metric string is not recognized.
202    ///
203    /// # Example
204    /// ```ignore
205    /// let store = VectorStoreOptions::default()
206    ///     .dimensions(768)
207    ///     .metric("cosine")?
208    ///     .open("./vectors")?;
209    /// ```
210    pub fn metric(mut self, m: &str) -> Result<Self, String> {
211        self.metric = Some(Metric::parse(m)?);
212        Ok(self)
213    }
214
215    /// Set distance metric directly (no parsing).
216    #[must_use]
217    pub fn metric_fn(mut self, m: Metric) -> Self {
218        self.metric = Some(m);
219        self
220    }
221
222    /// Enable tantivy-based full-text search with default configuration.
223    ///
224    /// When enabled, you can use `set_with_text()` to index text alongside vectors,
225    /// and `hybrid_search()` to search both with RRF fusion.
226    ///
227    /// Uses 50MB writer buffer by default. For custom memory settings,
228    /// use `text_search_config()` instead.
229    #[must_use]
230    pub fn text_search(mut self, enabled: bool) -> Self {
231        self.text_search_config = if enabled {
232            Some(TextSearchConfig::default())
233        } else {
234            None
235        };
236        self
237    }
238
239    /// Enable text search with custom configuration.
240    ///
241    /// # Example
242    /// ```ignore
243    /// // Mobile: lower memory
244    /// let store = VectorStoreOptions::default()
245    ///     .text_search_config(TextSearchConfig { writer_buffer_mb: 15 })
246    ///     .open("./db")?;
247    ///
248    /// // Cloud: higher throughput
249    /// let store = VectorStoreOptions::default()
250    ///     .text_search_config(TextSearchConfig { writer_buffer_mb: 200 })
251    ///     .open("./db")?;
252    /// ```
253    #[must_use]
254    pub fn text_search_config(mut self, config: TextSearchConfig) -> Self {
255        self.text_search_config = Some(config);
256        self
257    }
258
259    /// Open or create a persistent vector store at the given path.
260    ///
261    /// Creates the directory if it doesn't exist.
262    /// Loads existing data if the store already exists.
263    pub fn open(&self, path: impl AsRef<Path>) -> Result<VectorStore> {
264        VectorStore::open_with_options(path, self)
265    }
266
267    /// Build an in-memory vector store (no persistence).
268    pub fn build(&self) -> Result<VectorStore> {
269        VectorStore::build_with_options(self)
270    }
271}