1mod filter;
11mod options;
12
13pub use crate::omen::Metric;
14pub use filter::MetadataFilter;
15pub use options::VectorStoreOptions;
16
17use super::hnsw::HNSWParams;
18use super::hnsw_index::HNSWIndex;
19use super::types::Vector;
20use super::QuantizationMode;
21use crate::compression::{QuantizationBits, RaBitQParams};
22use crate::distance::l2_distance;
23use crate::omen::{MetadataIndex, OmenFile};
24use crate::text::{
25 weighted_reciprocal_rank_fusion, weighted_reciprocal_rank_fusion_with_subscores, HybridResult,
26 TextIndex, TextSearchConfig, DEFAULT_RRF_K,
27};
28use anyhow::Result;
29use rayon::prelude::*;
30use rustc_hash::FxHashMap;
31use serde_json::Value as JsonValue;
32use std::collections::HashMap;
33use std::path::{Path, PathBuf};
34
35const DEFAULT_HNSW_M: usize = 16;
41const DEFAULT_HNSW_EF_CONSTRUCTION: usize = 100;
43const DEFAULT_HNSW_EF_SEARCH: usize = 100;
45const DEFAULT_OVERSAMPLE_FACTOR: f32 = 3.0;
47
48#[inline]
56fn compute_effective_ef(ef: Option<usize>, stored_ef: usize, k: usize) -> usize {
57 ef.unwrap_or(stored_ef).max(k)
58}
59
60#[cfg(debug_assertions)]
64fn debug_assert_mapping_consistency(
65 id_to_index: &FxHashMap<String, usize>,
66 index_to_id: &FxHashMap<usize, String>,
67) {
68 debug_assert_eq!(
70 id_to_index.len(),
71 index_to_id.len(),
72 "ID mapping size mismatch: id_to_index={}, index_to_id={}",
73 id_to_index.len(),
74 index_to_id.len()
75 );
76
77 for (id, &idx) in id_to_index {
79 debug_assert_eq!(
80 index_to_id.get(&idx),
81 Some(id),
82 "Mapping inconsistency: id_to_index[{id}]={idx} but index_to_id[{idx}]={:?}",
83 index_to_id.get(&idx)
84 );
85 }
86}
87
88#[cfg(not(debug_assertions))]
89#[inline]
90fn debug_assert_mapping_consistency(
91 _id_to_index: &FxHashMap<String, usize>,
92 _index_to_id: &FxHashMap<usize, String>,
93) {
94 }
96
97#[cfg(test)]
98mod tests;
99
100fn default_oversample_for_quantization(mode: Option<&QuantizationMode>) -> f32 {
110 match mode {
111 None => 1.0,
112 Some(QuantizationMode::Binary) => 5.0, Some(QuantizationMode::SQ8) => 2.0,
114 Some(QuantizationMode::RaBitQ(params)) => match params.bits_per_dim.to_u8() {
115 2 => 4.0, 8 => 2.0, _ => 3.0, },
119 }
120}
121
122fn quantization_mode_from_id(mode_id: u64) -> Option<QuantizationMode> {
126 match mode_id {
127 1 => Some(QuantizationMode::SQ8),
128 2 => Some(QuantizationMode::RaBitQ(RaBitQParams {
129 bits_per_dim: QuantizationBits::Bits4,
130 ..RaBitQParams::default()
131 })),
132 3 => Some(QuantizationMode::RaBitQ(RaBitQParams {
133 bits_per_dim: QuantizationBits::Bits2,
134 ..RaBitQParams::default()
135 })),
136 4 => Some(QuantizationMode::RaBitQ(RaBitQParams {
137 bits_per_dim: QuantizationBits::Bits8,
138 ..RaBitQParams::default()
139 })),
140 5 => Some(QuantizationMode::Binary),
141 _ => None, }
143}
144
145fn create_hnsw_index(
149 dimensions: usize,
150 hnsw_m: usize,
151 hnsw_ef_construction: usize,
152 hnsw_ef_search: usize,
153 distance_metric: Metric,
154 quantization_mode: Option<&QuantizationMode>,
155 training_vectors: &[Vec<f32>],
156) -> Result<HNSWIndex> {
157 use super::hnsw_index::HNSWQuantization;
158
159 let m = hnsw_m.max(DEFAULT_HNSW_M);
161 let ef_construction = hnsw_ef_construction.max(DEFAULT_HNSW_EF_CONSTRUCTION);
162 let ef_search = hnsw_ef_search.max(DEFAULT_HNSW_EF_SEARCH);
163
164 let quantization = match quantization_mode {
166 Some(QuantizationMode::Binary) => HNSWQuantization::Binary,
167 Some(QuantizationMode::SQ8) => HNSWQuantization::SQ8,
168 Some(QuantizationMode::RaBitQ(params)) => HNSWQuantization::RaBitQ(params.clone()),
169 None => HNSWQuantization::None,
170 };
171
172 HNSWIndex::builder()
174 .dimensions(dimensions)
175 .max_elements(training_vectors.len().max(10_000))
176 .m(m)
177 .ef_construction(ef_construction)
178 .ef_search(ef_search)
179 .metric(distance_metric.into())
180 .quantization(quantization)
181 .build_with_training(training_vectors)
182}
183
184pub struct VectorStore {
186 pub vectors: Vec<Vector>,
188
189 pub hnsw_index: Option<HNSWIndex>,
191
192 dimensions: usize,
194
195 rescore_enabled: bool,
197
198 oversample_factor: f32,
200
201 metadata: HashMap<usize, JsonValue>,
203
204 pub id_to_index: FxHashMap<String, usize>,
206
207 index_to_id: FxHashMap<usize, String>,
209
210 deleted: HashMap<usize, bool>,
212
213 metadata_index: MetadataIndex,
215
216 storage: Option<OmenFile>,
218
219 storage_path: Option<PathBuf>,
221
222 text_index: Option<TextIndex>,
224
225 text_search_config: Option<TextSearchConfig>,
227
228 pending_quantization: Option<QuantizationMode>,
230
231 hnsw_m: usize,
233 hnsw_ef_construction: usize,
234 hnsw_ef_search: usize,
235
236 distance_metric: Metric,
238
239 next_index: usize,
241}
242
243impl VectorStore {
244 #[must_use]
250 pub fn new(dimensions: usize) -> Self {
251 Self {
252 vectors: Vec::new(),
253 hnsw_index: None,
254 dimensions,
255 rescore_enabled: false,
256 oversample_factor: DEFAULT_OVERSAMPLE_FACTOR,
257 metadata: HashMap::new(),
258 id_to_index: FxHashMap::default(),
259 index_to_id: FxHashMap::default(),
260 deleted: HashMap::new(),
261 metadata_index: MetadataIndex::new(),
262 storage: None,
263 storage_path: None,
264 text_index: None,
265 text_search_config: None,
266 pending_quantization: None,
267 hnsw_m: DEFAULT_HNSW_M,
268 hnsw_ef_construction: DEFAULT_HNSW_EF_CONSTRUCTION,
269 hnsw_ef_search: DEFAULT_HNSW_EF_SEARCH,
270 distance_metric: Metric::L2,
271 next_index: 0,
272 }
273 }
274
275 #[must_use]
279 pub fn new_with_quantization(dimensions: usize, mode: QuantizationMode) -> Self {
280 Self {
281 vectors: Vec::new(),
282 hnsw_index: None,
283 dimensions,
284 rescore_enabled: true,
285 oversample_factor: DEFAULT_OVERSAMPLE_FACTOR,
286 metadata: HashMap::new(),
287 id_to_index: FxHashMap::default(),
288 index_to_id: FxHashMap::default(),
289 deleted: HashMap::new(),
290 metadata_index: MetadataIndex::new(),
291 storage: None,
292 storage_path: None,
293 text_index: None,
294 text_search_config: None,
295 pending_quantization: Some(mode),
296 hnsw_m: DEFAULT_HNSW_M,
297 hnsw_ef_construction: DEFAULT_HNSW_EF_CONSTRUCTION,
298 hnsw_ef_search: DEFAULT_HNSW_EF_SEARCH,
299 distance_metric: Metric::L2,
300 next_index: 0,
301 }
302 }
303
304 pub fn new_with_params(
306 dimensions: usize,
307 m: usize,
308 ef_construction: usize,
309 ef_search: usize,
310 distance_metric: Metric,
311 ) -> Result<Self> {
312 let hnsw_index = Some(HNSWIndex::new_with_params(
313 1_000_000,
314 dimensions,
315 m,
316 ef_construction,
317 ef_search,
318 distance_metric.into(),
319 )?);
320
321 Ok(Self {
322 vectors: Vec::new(),
323 hnsw_index,
324 dimensions,
325 rescore_enabled: false,
326 oversample_factor: DEFAULT_OVERSAMPLE_FACTOR,
327 metadata: HashMap::new(),
328 id_to_index: FxHashMap::default(),
329 index_to_id: FxHashMap::default(),
330 deleted: HashMap::new(),
331 metadata_index: MetadataIndex::new(),
332 storage: None,
333 storage_path: None,
334 text_index: None,
335 text_search_config: None,
336 pending_quantization: None,
337 hnsw_m: m,
338 hnsw_ef_construction: ef_construction,
339 hnsw_ef_search: ef_search,
340 distance_metric,
341 next_index: 0,
342 })
343 }
344
345 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
364 let path = path.as_ref();
365 let omen_path = OmenFile::compute_omen_path(path);
366 let storage = if omen_path.exists() {
367 OmenFile::open(path)?
368 } else {
369 OmenFile::create(path, 0)?
370 };
371
372 let is_quantized = storage.is_quantized()?;
374 let quantization_mode =
375 quantization_mode_from_id(storage.get_quantization_mode()?.unwrap_or(0));
376
377 let metadata = storage.load_all_metadata()?;
379 let id_to_index: FxHashMap<String, usize> =
380 storage.load_all_id_mappings()?.into_iter().collect();
381 let deleted = storage.load_all_deleted()?;
382
383 let dimensions = storage.get_config("dimensions")?.unwrap_or(0) as usize;
385
386 let header = storage.header();
388 let distance_metric = header.distance_fn;
389 let hnsw_m = header.m as usize;
390 let hnsw_ef_construction = header.ef_construction as usize;
391 let hnsw_ef_search = header.ef_search as usize;
392
393 let (vectors, real_indices) = if is_quantized {
395 (Vec::new(), std::collections::HashSet::new())
396 } else {
397 let vectors_data = storage.load_all_vectors()?;
398 let mut vectors: Vec<Vector> = Vec::new();
399 let mut real_indices: std::collections::HashSet<usize> =
400 std::collections::HashSet::new();
401
402 for (id, data) in &vectors_data {
403 while vectors.len() < *id {
404 vectors.push(Vector::new(vec![0.0; dimensions.max(1)]));
405 }
406 vectors.push(Vector::new(data.clone()));
407 real_indices.insert(*id);
408 }
409 (vectors, real_indices)
410 };
411
412 let mut deleted = deleted;
414 for idx in 0..vectors.len() {
415 if !real_indices.contains(&idx) && !deleted.contains_key(&idx) {
416 deleted.insert(idx, true);
417 }
418 }
419
420 let active_vector_count = vectors
423 .iter()
424 .enumerate()
425 .filter(|(i, _)| !deleted.contains_key(i))
426 .count();
427
428 let hnsw_index = if let Some(hnsw_bytes) = storage.get_hnsw_index() {
429 match bincode::deserialize::<HNSWIndex>(hnsw_bytes) {
430 Ok(index) => {
431 if index.len() != active_vector_count && !vectors.is_empty() {
433 tracing::info!(
434 "HNSW index count ({}) differs from vector count ({}), rebuilding",
435 index.len(),
436 active_vector_count
437 );
438 let vector_data: Vec<Vec<f32>> =
439 vectors.iter().map(|v| v.data.clone()).collect();
440 let mut new_index = create_hnsw_index(
441 dimensions,
442 hnsw_m,
443 hnsw_ef_construction,
444 hnsw_ef_search,
445 distance_metric,
446 quantization_mode.as_ref(),
447 &vector_data,
448 )?;
449 new_index.batch_insert(&vector_data)?;
450 Some(new_index)
451 } else {
452 Some(index)
453 }
454 }
455 Err(e) => {
456 tracing::warn!("Failed to deserialize HNSW index, rebuilding: {}", e);
457 None
458 }
459 }
460 } else if !vectors.is_empty() {
461 let vector_data: Vec<Vec<f32>> = vectors.iter().map(|v| v.data.clone()).collect();
462 let mut index = create_hnsw_index(
463 dimensions,
464 hnsw_m,
465 hnsw_ef_construction,
466 hnsw_ef_search,
467 distance_metric,
468 quantization_mode.as_ref(),
469 &vector_data,
470 )?;
471 index.batch_insert(&vector_data)?;
472 Some(index)
473 } else if is_quantized && dimensions > 0 {
474 let vectors_data = storage.load_all_vectors()?;
475 if vectors_data.is_empty() {
476 None
477 } else {
478 let vector_data: Vec<Vec<f32>> =
479 vectors_data.iter().map(|(_, v)| v.clone()).collect();
480 let mut index = create_hnsw_index(
481 dimensions,
482 hnsw_m,
483 hnsw_ef_construction,
484 hnsw_ef_search,
485 distance_metric,
486 quantization_mode.as_ref(),
487 &vector_data,
488 )?;
489 index.batch_insert(&vector_data)?;
490 Some(index)
491 }
492 } else {
493 None
494 };
495
496 let text_index_path = path.join("text_index");
498 let text_index = if text_index_path.exists() {
499 Some(TextIndex::open(&text_index_path)?)
500 } else {
501 None
502 };
503
504 let index_to_id: FxHashMap<usize, String> = id_to_index
506 .iter()
507 .map(|(id, &idx)| (idx, id.clone()))
508 .collect();
509
510 let mut metadata_index = MetadataIndex::new();
512 for (&idx, meta) in &metadata {
513 if !deleted.contains_key(&idx) {
514 metadata_index.index_json(idx as u32, meta);
515 }
516 }
517
518 let rescore_enabled = hnsw_index
520 .as_ref()
521 .is_some_and(super::hnsw_index::HNSWIndex::is_asymmetric);
522
523 debug_assert_mapping_consistency(&id_to_index, &index_to_id);
525
526 let next_index = id_to_index.values().max().map_or(0, |&max| max + 1);
528
529 Ok(Self {
530 vectors,
531 hnsw_index,
532 dimensions,
533 rescore_enabled,
534 oversample_factor: DEFAULT_OVERSAMPLE_FACTOR,
535 metadata,
536 id_to_index,
537 index_to_id,
538 deleted,
539 metadata_index,
540 storage: Some(storage),
541 storage_path: Some(path.to_path_buf()),
542 text_index,
543 text_search_config: None,
544 pending_quantization: None,
545 hnsw_m: hnsw_m.max(DEFAULT_HNSW_M),
546 hnsw_ef_construction: hnsw_ef_construction.max(DEFAULT_HNSW_EF_CONSTRUCTION),
547 hnsw_ef_search: hnsw_ef_search.max(DEFAULT_HNSW_EF_SEARCH),
548 distance_metric,
549 next_index,
550 })
551 }
552
553 pub fn open_with_dimensions(path: impl AsRef<Path>, dimensions: usize) -> Result<Self> {
557 let mut store = Self::open(path)?;
558 if store.dimensions == 0 {
559 store.dimensions = dimensions;
560 if let Some(ref mut storage) = store.storage {
561 storage.put_config("dimensions", dimensions as u64)?;
562 }
563 }
564 Ok(store)
565 }
566
567 pub fn open_with_options(path: impl AsRef<Path>, options: &VectorStoreOptions) -> Result<Self> {
571 let path = path.as_ref();
572 let omen_path = OmenFile::compute_omen_path(path);
573
574 if path.exists() || omen_path.exists() {
576 let mut store = Self::open(path)?;
577
578 if store.dimensions == 0 && options.dimensions > 0 {
580 store.dimensions = options.dimensions;
581 if let Some(ref mut storage) = store.storage {
582 storage.put_config("dimensions", options.dimensions as u64)?;
583 }
584 }
585
586 if let Some(ef) = options.ef_search {
588 store.set_ef_search(ef);
589 }
590
591 return Ok(store);
592 }
593
594 let mut storage = OmenFile::create(path, options.dimensions as u32)?;
596 let dimensions = options.dimensions;
597
598 let m = options.m.unwrap_or(16);
600 let ef_construction = options.ef_construction.unwrap_or(100);
601 let ef_search = options.ef_search.unwrap_or(100);
602
603 let distance_metric = options.metric.unwrap_or(Metric::L2);
605
606 let (hnsw_index, pending_quantization) = if options.quantization.is_some() {
608 (None, options.quantization.clone())
609 } else if dimensions > 0 {
610 if options.m.is_some() || options.ef_construction.is_some() {
611 (
612 Some(HNSWIndex::new_with_params(
613 10_000,
614 dimensions,
615 m,
616 ef_construction,
617 ef_search,
618 distance_metric.into(),
619 )?),
620 None,
621 )
622 } else {
623 (None, None)
624 }
625 } else {
626 (None, None)
627 };
628
629 if dimensions > 0 {
631 storage.put_config("dimensions", dimensions as u64)?;
632 }
633
634 let text_index = if let Some(ref config) = options.text_search_config {
636 let text_path = path.join("text_index");
637 Some(TextIndex::open_with_config(&text_path, config)?)
638 } else {
639 None
640 };
641
642 let rescore_enabled = options.rescore.unwrap_or(options.quantization.is_some());
644 let oversample_factor = options
645 .oversample
646 .unwrap_or_else(|| default_oversample_for_quantization(options.quantization.as_ref()));
647
648 let distance_metric = options.metric.unwrap_or(Metric::L2);
650
651 Ok(Self {
652 vectors: Vec::new(),
653 hnsw_index,
654 dimensions,
655 rescore_enabled,
656 oversample_factor,
657 metadata: HashMap::new(),
658 id_to_index: FxHashMap::default(),
659 index_to_id: FxHashMap::default(),
660 deleted: HashMap::new(),
661 metadata_index: MetadataIndex::new(),
662 storage: Some(storage),
663 storage_path: Some(path.to_path_buf()),
664 text_index,
665 text_search_config: options.text_search_config.clone(),
666 pending_quantization,
667 hnsw_m: m,
668 hnsw_ef_construction: ef_construction,
669 hnsw_ef_search: ef_search,
670 distance_metric,
671 next_index: 0,
672 })
673 }
674
675 pub fn build_with_options(options: &VectorStoreOptions) -> Result<Self> {
677 let dimensions = options.dimensions;
678
679 let m = options.m.unwrap_or(16);
681 let ef_construction = options.ef_construction.unwrap_or(100);
682 let ef_search = options.ef_search.unwrap_or(100);
683
684 let distance_metric = options.metric.unwrap_or(Metric::L2);
686
687 let (hnsw_index, pending_quantization) = if options.quantization.is_some() {
689 (None, options.quantization.clone())
690 } else if dimensions > 0 {
691 if options.m.is_some() || options.ef_construction.is_some() {
692 (
693 Some(HNSWIndex::new_with_params(
694 10_000,
695 dimensions,
696 m,
697 ef_construction,
698 ef_search,
699 distance_metric.into(),
700 )?),
701 None,
702 )
703 } else {
704 (None, None)
705 }
706 } else {
707 (None, None)
708 };
709
710 let text_index = if let Some(ref config) = options.text_search_config {
712 Some(TextIndex::open_in_memory_with_config(config)?)
713 } else {
714 None
715 };
716
717 let rescore_enabled = options.rescore.unwrap_or(options.quantization.is_some());
719 let oversample_factor = options
720 .oversample
721 .unwrap_or_else(|| default_oversample_for_quantization(options.quantization.as_ref()));
722
723 Ok(Self {
724 vectors: Vec::new(),
725 hnsw_index,
726 dimensions,
727 rescore_enabled,
728 oversample_factor,
729 metadata: HashMap::new(),
730 id_to_index: FxHashMap::default(),
731 index_to_id: FxHashMap::default(),
732 deleted: HashMap::new(),
733 metadata_index: MetadataIndex::new(),
734 storage: None,
735 storage_path: None,
736 text_index,
737 text_search_config: options.text_search_config.clone(),
738 pending_quantization,
739 hnsw_m: m,
740 hnsw_ef_construction: ef_construction,
741 hnsw_ef_search: ef_search,
742 distance_metric,
743 next_index: 0,
744 })
745 }
746
747 pub fn insert(&mut self, vector: Vector) -> Result<usize> {
753 let id = self.next_index;
755
756 if self.hnsw_index.is_none() {
758 let dimensions = if self.dimensions == 0 {
759 vector.dim()
760 } else {
761 if vector.dim() != self.dimensions {
762 anyhow::bail!(
763 "Vector dimension mismatch: store expects {}, got {}",
764 self.dimensions,
765 vector.dim()
766 );
767 }
768 self.dimensions
769 };
770
771 if let Some(quant_mode) = self.pending_quantization.take() {
773 let hnsw_params = HNSWParams::default()
774 .with_m(self.hnsw_m)
775 .with_ef_construction(self.hnsw_ef_construction)
776 .with_ef_search(self.hnsw_ef_search);
777
778 let quant_mode_id = match &quant_mode {
780 QuantizationMode::Binary => 5u64,
781 QuantizationMode::SQ8 => 1u64,
782 QuantizationMode::RaBitQ(p) => match p.bits_per_dim.to_u8() {
783 2 => 3u64,
784 8 => 4u64,
785 _ => 2u64,
786 },
787 };
788 if let Some(ref mut storage) = self.storage {
789 storage.put_quantization_mode(quant_mode_id)?;
790 }
791
792 let index = match quant_mode {
793 QuantizationMode::Binary => {
794 let mut idx = HNSWIndex::new_with_binary(
795 dimensions,
796 hnsw_params,
797 self.distance_metric.into(),
798 )?;
799 idx.train_quantizer(std::slice::from_ref(&vector.data))?;
800 idx
801 }
802 QuantizationMode::SQ8 => HNSWIndex::new_with_sq8(
803 dimensions,
804 hnsw_params,
805 self.distance_metric.into(),
806 )?,
807 QuantizationMode::RaBitQ(params) => {
808 let mut idx = HNSWIndex::new_with_asymmetric(
809 dimensions,
810 hnsw_params,
811 self.distance_metric.into(),
812 params,
813 )?;
814 idx.train_quantizer(std::slice::from_ref(&vector.data))?;
815 idx
816 }
817 };
818 self.hnsw_index = Some(index);
819 } else {
820 self.hnsw_index = Some(HNSWIndex::new_with_params(
821 10_000,
822 dimensions,
823 self.hnsw_m,
824 self.hnsw_ef_construction,
825 self.hnsw_ef_search,
826 self.distance_metric.into(),
827 )?);
828 }
829 self.dimensions = dimensions;
830 } else if vector.dim() != self.dimensions {
831 anyhow::bail!(
832 "Vector dimension mismatch: store expects {}, got {}. All vectors in same store must have same dimension.",
833 self.dimensions,
834 vector.dim()
835 );
836 }
837
838 if let Some(ref mut index) = self.hnsw_index {
840 index.insert(&vector.data)?;
841 }
842
843 if let Some(ref mut storage) = self.storage {
845 storage.put_vector(id, &vector.data)?;
846 storage.increment_count()?;
847 if id == 0 {
848 storage.put_config("dimensions", self.dimensions as u64)?;
849 }
850 }
851
852 if !self.is_quantized() || self.storage.is_none() {
855 self.vectors.push(vector);
856 }
857
858 self.next_index += 1;
860
861 Ok(id)
862 }
863
864 pub fn insert_with_metadata(
869 &mut self,
870 id: String,
871 vector: Vector,
872 metadata: JsonValue,
873 ) -> Result<usize> {
874 if self.id_to_index.contains_key(&id) {
875 anyhow::bail!("Vector with ID '{id}' already exists. Use set() to update.");
876 }
877
878 let index = self.insert(vector)?;
879
880 self.metadata.insert(index, metadata.clone());
881 self.metadata_index.index_json(index as u32, &metadata);
882 self.id_to_index.insert(id.clone(), index);
883 self.index_to_id.insert(index, id.clone());
884
885 debug_assert_mapping_consistency(&self.id_to_index, &self.index_to_id);
887
888 if let Some(ref mut storage) = self.storage {
889 storage.put_metadata(index, &metadata)?;
890 storage.put_id_mapping(&id, index)?;
891 }
892
893 Ok(index)
894 }
895
896 pub fn set(&mut self, id: String, vector: Vector, metadata: JsonValue) -> Result<usize> {
900 if let Some(&index) = self.id_to_index.get(&id) {
901 self.update_by_index(index, Some(vector), Some(metadata))?;
902 Ok(index)
903 } else {
904 self.insert_with_metadata(id, vector, metadata)
905 }
906 }
907
908 pub fn set_batch(&mut self, batch: Vec<(String, Vector, JsonValue)>) -> Result<Vec<usize>> {
912 if batch.is_empty() {
913 return Ok(Vec::new());
914 }
915
916 let mut updates: Vec<(usize, Vector, JsonValue)> = Vec::new();
918 let mut inserts: Vec<(String, Vector, JsonValue)> = Vec::new();
919
920 for (id, vector, metadata) in batch {
921 if let Some(&index) = self.id_to_index.get(&id) {
922 updates.push((index, vector, metadata));
923 } else {
924 inserts.push((id, vector, metadata));
925 }
926 }
927
928 let mut result_indices = Vec::new();
929
930 for (index, vector, metadata) in updates {
932 self.update_by_index(index, Some(vector), Some(metadata))?;
933 result_indices.push(index);
934 }
935
936 if !inserts.is_empty() {
938 if self.hnsw_index.is_none() {
940 let dimensions = if self.dimensions == 0 {
941 inserts[0].1.dim()
942 } else {
943 self.dimensions
944 };
945
946 if let Some(quant_mode) = self.pending_quantization.take() {
947 let hnsw_params = HNSWParams::default()
948 .with_m(self.hnsw_m)
949 .with_ef_construction(self.hnsw_ef_construction)
950 .with_ef_search(self.hnsw_ef_search);
951
952 let quant_mode_id = match &quant_mode {
953 QuantizationMode::Binary => 5u64,
954 QuantizationMode::SQ8 => 1u64,
955 QuantizationMode::RaBitQ(p) => match p.bits_per_dim.to_u8() {
956 2 => 3u64,
957 8 => 4u64,
958 _ => 2u64,
959 },
960 };
961 if let Some(ref mut storage) = self.storage {
962 storage.put_quantization_mode(quant_mode_id)?;
963 }
964
965 let index = match quant_mode {
966 QuantizationMode::Binary => {
967 let mut idx = HNSWIndex::new_with_binary(
968 dimensions,
969 hnsw_params,
970 self.distance_metric.into(),
971 )?;
972 let training_vectors: Vec<Vec<f32>> =
973 inserts.iter().map(|(_, v, _)| v.data.clone()).collect();
974 idx.train_quantizer(&training_vectors)?;
975 idx
976 }
977 QuantizationMode::SQ8 => HNSWIndex::new_with_sq8(
978 dimensions,
979 hnsw_params,
980 self.distance_metric.into(),
981 )?,
982 QuantizationMode::RaBitQ(params) => {
983 let mut idx = HNSWIndex::new_with_asymmetric(
984 dimensions,
985 hnsw_params,
986 self.distance_metric.into(),
987 params,
988 )?;
989 let training_vectors: Vec<Vec<f32>> =
990 inserts.iter().map(|(_, v, _)| v.data.clone()).collect();
991 idx.train_quantizer(&training_vectors)?;
992 idx
993 }
994 };
995
996 self.hnsw_index = Some(index);
997 } else {
998 self.hnsw_index = Some(HNSWIndex::new_with_params(
999 10_000,
1000 dimensions,
1001 self.hnsw_m,
1002 self.hnsw_ef_construction,
1003 self.hnsw_ef_search,
1004 self.distance_metric.into(),
1005 )?);
1006 }
1007 self.dimensions = dimensions;
1008 }
1009
1010 for (i, (_, vector, _)) in inserts.iter().enumerate() {
1012 if vector.dim() != self.dimensions {
1013 anyhow::bail!(
1014 "Vector {} dimension mismatch: expected {}, got {}",
1015 i,
1016 self.dimensions,
1017 vector.dim()
1018 );
1019 }
1020 }
1021
1022 let vectors_data: Vec<Vec<f32>> =
1024 inserts.iter().map(|(_, v, _)| v.data.clone()).collect();
1025
1026 let base_index = self.next_index;
1030 let insert_count = inserts.len();
1031 if let Some(ref mut index) = self.hnsw_index {
1032 index.batch_insert(&vectors_data)?;
1033 }
1034
1035 if let Some(ref mut storage) = self.storage {
1037 if base_index == 0 {
1038 storage.put_config("dimensions", self.dimensions as u64)?;
1039 }
1040
1041 let batch_items: Vec<(usize, String, Vec<f32>, serde_json::Value)> = inserts
1042 .iter()
1043 .enumerate()
1044 .map(|(i, (id, vector, metadata))| {
1045 (
1046 base_index + i,
1047 id.clone(),
1048 vector.data.clone(),
1049 metadata.clone(),
1050 )
1051 })
1052 .collect();
1053
1054 storage.put_batch(batch_items)?;
1055 }
1056
1057 let skip_ram = self.is_quantized() && self.storage.is_some();
1060 for (i, (id, vector, metadata)) in inserts.into_iter().enumerate() {
1061 let idx = base_index + i;
1062 if !skip_ram {
1063 self.vectors.push(vector);
1064 }
1065 self.metadata.insert(idx, metadata.clone());
1066 self.metadata_index.index_json(idx as u32, &metadata);
1067 self.index_to_id.insert(idx, id.clone());
1068 self.id_to_index.insert(id, idx);
1069 result_indices.push(idx);
1070 }
1071
1072 self.next_index += insert_count;
1074
1075 debug_assert_mapping_consistency(&self.id_to_index, &self.index_to_id);
1077 }
1078
1079 Ok(result_indices)
1080 }
1081
1082 pub fn enable_text_search(&mut self) -> Result<()> {
1088 self.enable_text_search_with_config(None)
1089 }
1090
1091 pub fn enable_text_search_with_config(
1093 &mut self,
1094 config: Option<TextSearchConfig>,
1095 ) -> Result<()> {
1096 if self.text_index.is_some() {
1097 return Ok(());
1098 }
1099
1100 let config = config
1101 .or_else(|| self.text_search_config.clone())
1102 .unwrap_or_default();
1103
1104 self.text_index = if let Some(ref path) = self.storage_path {
1105 let text_path = path.join("text_index");
1106 Some(TextIndex::open_with_config(&text_path, &config)?)
1107 } else {
1108 Some(TextIndex::open_in_memory_with_config(&config)?)
1109 };
1110
1111 Ok(())
1112 }
1113
1114 #[must_use]
1116 pub fn has_text_search(&self) -> bool {
1117 self.text_index.is_some()
1118 }
1119
1120 pub fn set_with_text(
1122 &mut self,
1123 id: String,
1124 vector: Vector,
1125 text: &str,
1126 metadata: JsonValue,
1127 ) -> Result<usize> {
1128 let Some(ref mut text_index) = self.text_index else {
1129 anyhow::bail!("Text search not enabled. Call enable_text_search() first.");
1130 };
1131
1132 text_index.index_document(&id, text)?;
1133 self.set(id, vector, metadata)
1134 }
1135
1136 pub fn set_batch_with_text(
1138 &mut self,
1139 batch: Vec<(String, Vector, String, JsonValue)>,
1140 ) -> Result<Vec<usize>> {
1141 let Some(ref mut text_index) = self.text_index else {
1142 anyhow::bail!("Text search not enabled. Call enable_text_search() first.");
1143 };
1144
1145 for (id, _, text, _) in &batch {
1146 text_index.index_document(id, text)?;
1147 }
1148
1149 let vector_batch: Vec<(String, Vector, JsonValue)> = batch
1150 .into_iter()
1151 .map(|(id, vector, _, metadata)| (id, vector, metadata))
1152 .collect();
1153
1154 self.set_batch(vector_batch)
1155 }
1156
1157 pub fn text_search(&self, query: &str, k: usize) -> Result<Vec<(String, f32)>> {
1159 let Some(ref text_index) = self.text_index else {
1160 anyhow::bail!("Text search not enabled. Call enable_text_search() first.");
1161 };
1162
1163 text_index.search(query, k)
1164 }
1165
1166 pub fn hybrid_search(
1168 &mut self,
1169 query_vector: &Vector,
1170 query_text: &str,
1171 k: usize,
1172 alpha: Option<f32>,
1173 ) -> Result<Vec<(String, f32, JsonValue)>> {
1174 self.hybrid_search_with_rrf_k(query_vector, query_text, k, alpha, None)
1175 }
1176
1177 pub fn hybrid_search_with_rrf_k(
1179 &mut self,
1180 query_vector: &Vector,
1181 query_text: &str,
1182 k: usize,
1183 alpha: Option<f32>,
1184 rrf_k: Option<usize>,
1185 ) -> Result<Vec<(String, f32, JsonValue)>> {
1186 if query_vector.data.len() != self.dimensions {
1187 anyhow::bail!(
1188 "Query vector dimension {} does not match store dimension {}",
1189 query_vector.data.len(),
1190 self.dimensions
1191 );
1192 }
1193 if self.text_index.is_none() {
1194 anyhow::bail!("Text search not enabled. Call enable_text_search() first.");
1195 }
1196
1197 let fetch_k = k * 2;
1198
1199 let vector_results = self.knn_search(query_vector, fetch_k)?;
1200 let vector_results: Vec<(String, f32)> = vector_results
1201 .into_iter()
1202 .filter_map(|(idx, distance)| {
1203 self.index_to_id.get(&idx).map(|id| (id.clone(), distance))
1204 })
1205 .collect();
1206
1207 let text_results = self.text_search(query_text, fetch_k)?;
1208
1209 let fused = weighted_reciprocal_rank_fusion(
1210 vector_results,
1211 text_results,
1212 k,
1213 rrf_k.unwrap_or(DEFAULT_RRF_K),
1214 alpha.unwrap_or(0.5),
1215 );
1216
1217 Ok(self.attach_metadata(fused))
1218 }
1219
1220 pub fn hybrid_search_with_filter(
1222 &mut self,
1223 query_vector: &Vector,
1224 query_text: &str,
1225 k: usize,
1226 filter: &MetadataFilter,
1227 alpha: Option<f32>,
1228 ) -> Result<Vec<(String, f32, JsonValue)>> {
1229 self.hybrid_search_with_filter_rrf_k(query_vector, query_text, k, filter, alpha, None)
1230 }
1231
1232 pub fn hybrid_search_with_filter_rrf_k(
1234 &mut self,
1235 query_vector: &Vector,
1236 query_text: &str,
1237 k: usize,
1238 filter: &MetadataFilter,
1239 alpha: Option<f32>,
1240 rrf_k: Option<usize>,
1241 ) -> Result<Vec<(String, f32, JsonValue)>> {
1242 if query_vector.data.len() != self.dimensions {
1243 anyhow::bail!(
1244 "Query vector dimension {} does not match store dimension {}",
1245 query_vector.data.len(),
1246 self.dimensions
1247 );
1248 }
1249 if self.text_index.is_none() {
1250 anyhow::bail!("Text search not enabled. Call enable_text_search() first.");
1251 }
1252
1253 let fetch_k = k * 4;
1254
1255 let vector_results = self.knn_search_with_filter(query_vector, fetch_k, filter)?;
1256 let vector_results: Vec<(String, f32)> = vector_results
1257 .into_iter()
1258 .filter_map(|(idx, distance, _)| {
1259 self.index_to_id.get(&idx).map(|id| (id.clone(), distance))
1260 })
1261 .collect();
1262
1263 let text_results = self.text_search(query_text, fetch_k)?;
1264 let text_results: Vec<(String, f32)> = text_results
1265 .into_iter()
1266 .filter(|(id, _)| {
1267 self.id_to_index
1268 .get(id)
1269 .and_then(|&idx| self.metadata.get(&idx))
1270 .is_some_and(|meta| filter.matches(meta))
1271 })
1272 .collect();
1273
1274 let fused = weighted_reciprocal_rank_fusion(
1275 vector_results,
1276 text_results,
1277 k,
1278 rrf_k.unwrap_or(DEFAULT_RRF_K),
1279 alpha.unwrap_or(0.5),
1280 );
1281
1282 Ok(self.attach_metadata(fused))
1283 }
1284
1285 fn attach_metadata(&self, results: Vec<(String, f32)>) -> Vec<(String, f32, JsonValue)> {
1287 results
1288 .into_iter()
1289 .map(|(id, score)| {
1290 let metadata = self
1291 .id_to_index
1292 .get(&id)
1293 .and_then(|&idx| self.metadata.get(&idx))
1294 .cloned()
1295 .unwrap_or(serde_json::json!({}));
1296 (id, score, metadata)
1297 })
1298 .collect()
1299 }
1300
1301 pub fn hybrid_search_with_subscores(
1306 &mut self,
1307 query_vector: &Vector,
1308 query_text: &str,
1309 k: usize,
1310 alpha: Option<f32>,
1311 rrf_k: Option<usize>,
1312 ) -> Result<Vec<(HybridResult, JsonValue)>> {
1313 if query_vector.data.len() != self.dimensions {
1314 anyhow::bail!(
1315 "Query vector dimension {} does not match store dimension {}",
1316 query_vector.data.len(),
1317 self.dimensions
1318 );
1319 }
1320 if self.text_index.is_none() {
1321 anyhow::bail!("Text search not enabled. Call enable_text_search() first.");
1322 }
1323
1324 let fetch_k = k * 2;
1325
1326 let vector_results = self.knn_search(query_vector, fetch_k)?;
1327 let vector_results: Vec<(String, f32)> = vector_results
1328 .into_iter()
1329 .filter_map(|(idx, distance)| {
1330 self.index_to_id.get(&idx).map(|id| (id.clone(), distance))
1331 })
1332 .collect();
1333
1334 let text_results = self.text_search(query_text, fetch_k)?;
1335
1336 let fused = weighted_reciprocal_rank_fusion_with_subscores(
1337 vector_results,
1338 text_results,
1339 k,
1340 rrf_k.unwrap_or(DEFAULT_RRF_K),
1341 alpha.unwrap_or(0.5),
1342 );
1343
1344 Ok(self.attach_metadata_to_hybrid_results(fused))
1345 }
1346
1347 pub fn hybrid_search_with_filter_subscores(
1349 &mut self,
1350 query_vector: &Vector,
1351 query_text: &str,
1352 k: usize,
1353 filter: &MetadataFilter,
1354 alpha: Option<f32>,
1355 rrf_k: Option<usize>,
1356 ) -> Result<Vec<(HybridResult, JsonValue)>> {
1357 if query_vector.data.len() != self.dimensions {
1358 anyhow::bail!(
1359 "Query vector dimension {} does not match store dimension {}",
1360 query_vector.data.len(),
1361 self.dimensions
1362 );
1363 }
1364 if self.text_index.is_none() {
1365 anyhow::bail!("Text search not enabled. Call enable_text_search() first.");
1366 }
1367
1368 let fetch_k = k * 4;
1369
1370 let vector_results = self.knn_search_with_filter(query_vector, fetch_k, filter)?;
1371 let vector_results: Vec<(String, f32)> = vector_results
1372 .into_iter()
1373 .filter_map(|(idx, distance, _)| {
1374 self.index_to_id.get(&idx).map(|id| (id.clone(), distance))
1375 })
1376 .collect();
1377
1378 let text_results = self.text_search(query_text, fetch_k)?;
1379 let text_results: Vec<(String, f32)> = text_results
1380 .into_iter()
1381 .filter(|(id, _)| {
1382 self.id_to_index
1383 .get(id)
1384 .and_then(|&idx| self.metadata.get(&idx))
1385 .is_some_and(|meta| filter.matches(meta))
1386 })
1387 .collect();
1388
1389 let fused = weighted_reciprocal_rank_fusion_with_subscores(
1390 vector_results,
1391 text_results,
1392 k,
1393 rrf_k.unwrap_or(DEFAULT_RRF_K),
1394 alpha.unwrap_or(0.5),
1395 );
1396
1397 Ok(self.attach_metadata_to_hybrid_results(fused))
1398 }
1399
1400 fn attach_metadata_to_hybrid_results(
1402 &self,
1403 results: Vec<HybridResult>,
1404 ) -> Vec<(HybridResult, JsonValue)> {
1405 results
1406 .into_iter()
1407 .map(|result| {
1408 let metadata = self
1409 .id_to_index
1410 .get(&result.id)
1411 .and_then(|&idx| self.metadata.get(&idx))
1412 .cloned()
1413 .unwrap_or(serde_json::json!({}));
1414 (result, metadata)
1415 })
1416 .collect()
1417 }
1418
1419 fn update_by_index(
1425 &mut self,
1426 index: usize,
1427 vector: Option<Vector>,
1428 metadata: Option<JsonValue>,
1429 ) -> Result<()> {
1430 if index >= self.next_index {
1432 anyhow::bail!("Vector index {index} does not exist");
1433 }
1434 if self.deleted.contains_key(&index) {
1435 anyhow::bail!("Vector index {index} has been deleted");
1436 }
1437
1438 if let Some(new_vector) = vector {
1439 if new_vector.dim() != self.dimensions {
1440 anyhow::bail!(
1441 "Vector dimension mismatch: expected {}, got {}",
1442 self.dimensions,
1443 new_vector.dim()
1444 );
1445 }
1446
1447 if let Some(v) = self.vectors.get_mut(index) {
1449 *v = new_vector.clone();
1450 }
1451
1452 if let Some(ref mut storage) = self.storage {
1453 storage.put_vector(index, &new_vector.data)?;
1454 }
1455 }
1456
1457 if let Some(ref new_metadata) = metadata {
1458 self.metadata_index.remove(index as u32);
1460 self.metadata_index.index_json(index as u32, new_metadata);
1461 self.metadata.insert(index, new_metadata.clone());
1462
1463 if let Some(ref mut storage) = self.storage {
1464 storage.put_metadata(index, new_metadata)?;
1465 }
1466 }
1467
1468 Ok(())
1469 }
1470
1471 pub fn update(
1473 &mut self,
1474 id: &str,
1475 vector: Option<Vector>,
1476 metadata: Option<JsonValue>,
1477 ) -> Result<()> {
1478 let index = self
1479 .id_to_index
1480 .get(id)
1481 .copied()
1482 .ok_or_else(|| anyhow::anyhow!("Vector with ID '{id}' not found"))?;
1483
1484 self.update_by_index(index, vector, metadata)
1485 }
1486
1487 pub fn delete(&mut self, id: &str) -> Result<()> {
1495 let index = self
1496 .id_to_index
1497 .get(id)
1498 .copied()
1499 .ok_or_else(|| anyhow::anyhow!("Vector with ID '{id}' not found"))?;
1500
1501 self.deleted.insert(index, true);
1502 self.metadata_index.remove(index as u32);
1503
1504 if let Some(ref mut hnsw) = self.hnsw_index {
1507 if let Err(e) = hnsw.mark_deleted(index as u32) {
1508 tracing::warn!(
1509 id = id,
1510 index = index,
1511 error = ?e,
1512 "Failed to repair HNSW graph after deletion"
1513 );
1514 }
1515 }
1516
1517 if let Some(ref mut storage) = self.storage {
1519 storage.delete(id)?;
1520 }
1521
1522 if let Some(ref mut text_index) = self.text_index {
1523 text_index.delete_document(id)?;
1524 }
1525
1526 self.id_to_index.remove(id);
1527 self.index_to_id.remove(&index);
1528
1529 debug_assert_mapping_consistency(&self.id_to_index, &self.index_to_id);
1531
1532 Ok(())
1533 }
1534
1535 pub fn delete_batch(&mut self, ids: &[String]) -> Result<usize> {
1539 let mut node_ids: Vec<u32> = Vec::with_capacity(ids.len());
1541 let mut valid_ids: Vec<String> = Vec::with_capacity(ids.len());
1542
1543 for id in ids {
1544 if let Some(&index) = self.id_to_index.get(id) {
1545 self.deleted.insert(index, true);
1546 self.metadata_index.remove(index as u32);
1547 node_ids.push(index as u32);
1548 valid_ids.push(id.clone());
1549 }
1550 }
1551
1552 if !node_ids.is_empty() {
1554 if let Some(ref mut hnsw) = self.hnsw_index {
1555 if let Err(e) = hnsw.mark_deleted_batch(&node_ids) {
1556 tracing::warn!(
1557 count = node_ids.len(),
1558 error = ?e,
1559 "Failed to batch repair HNSW graph after deletion"
1560 );
1561 }
1562 }
1563 }
1564
1565 for (id, &node_id) in valid_ids.iter().zip(node_ids.iter()) {
1567 if let Some(ref mut storage) = self.storage {
1568 if let Err(e) = storage.delete(id) {
1569 tracing::warn!(id = %id, error = ?e, "Failed to persist deletion to storage");
1570 }
1571 }
1572 if let Some(ref mut text_index) = self.text_index {
1573 if let Err(e) = text_index.delete_document(id) {
1574 tracing::warn!(id = %id, error = ?e, "Failed to delete from text index");
1575 }
1576 }
1577 self.id_to_index.remove(id);
1578 self.index_to_id.remove(&(node_id as usize));
1579 }
1580
1581 debug_assert_mapping_consistency(&self.id_to_index, &self.index_to_id);
1583
1584 Ok(valid_ids.len())
1585 }
1586
1587 pub fn delete_by_filter(&mut self, filter: &MetadataFilter) -> Result<usize> {
1598 let ids_to_delete: Vec<String> = self
1600 .id_to_index
1601 .iter()
1602 .filter_map(|(id, &idx)| {
1603 if self.deleted.contains_key(&idx) {
1604 return None;
1605 }
1606 let metadata = self.metadata.get(&idx)?;
1607 if filter.matches(metadata) {
1608 Some(id.clone())
1609 } else {
1610 None
1611 }
1612 })
1613 .collect();
1614
1615 if ids_to_delete.is_empty() {
1616 return Ok(0);
1617 }
1618
1619 self.delete_batch(&ids_to_delete)
1620 }
1621
1622 #[must_use]
1633 pub fn count_by_filter(&self, filter: &MetadataFilter) -> usize {
1634 self.id_to_index
1635 .iter()
1636 .filter(|(_, &idx)| {
1637 if self.deleted.contains_key(&idx) {
1638 return false;
1639 }
1640 self.metadata
1641 .get(&idx)
1642 .is_some_and(|metadata| filter.matches(metadata))
1643 })
1644 .count()
1645 }
1646
1647 #[must_use]
1651 pub fn get(&self, id: &str) -> Option<(Vector, JsonValue)> {
1652 let &index = self.id_to_index.get(id)?;
1653 if self.deleted.contains_key(&index) {
1654 return None;
1655 }
1656
1657 if let Some(vec) = self.vectors.get(index) {
1659 return self
1660 .metadata
1661 .get(&index)
1662 .map(|meta| (vec.clone(), meta.clone()));
1663 }
1664
1665 if let Some(ref storage) = self.storage {
1667 if let Ok(Some(vec_data)) = storage.get_vector(index) {
1668 return self
1669 .metadata
1670 .get(&index)
1671 .map(|meta| (Vector::new(vec_data), meta.clone()));
1672 }
1673 }
1674
1675 None
1676 }
1677
1678 #[must_use]
1683 pub fn get_batch(&self, ids: &[impl AsRef<str>]) -> Vec<Option<(Vector, JsonValue)>> {
1684 ids.iter().map(|id| self.get(id.as_ref())).collect()
1685 }
1686
1687 #[must_use]
1689 pub fn get_metadata_by_id(&self, id: &str) -> Option<&JsonValue> {
1690 self.id_to_index.get(id).and_then(|&index| {
1691 if self.deleted.contains_key(&index) {
1692 return None;
1693 }
1694 self.metadata.get(&index)
1695 })
1696 }
1697
1698 pub fn batch_insert(&mut self, vectors: Vec<Vector>) -> Result<Vec<usize>> {
1704 const CHUNK_SIZE: usize = 10_000;
1705
1706 if vectors.is_empty() {
1707 return Ok(Vec::new());
1708 }
1709
1710 for (i, vector) in vectors.iter().enumerate() {
1711 if vector.dim() != self.dimensions {
1712 anyhow::bail!(
1713 "Vector {} dimension mismatch: expected {}, got {}",
1714 i,
1715 self.dimensions,
1716 vector.dim()
1717 );
1718 }
1719 }
1720
1721 if self.hnsw_index.is_none() {
1722 if let Some(quant_mode) = self.pending_quantization.take() {
1723 let hnsw_params = HNSWParams::default()
1724 .with_m(self.hnsw_m)
1725 .with_ef_construction(self.hnsw_ef_construction)
1726 .with_ef_search(self.hnsw_ef_search);
1727
1728 let index = match quant_mode {
1729 QuantizationMode::Binary => {
1730 let mut idx = HNSWIndex::new_with_binary(
1731 self.dimensions,
1732 hnsw_params,
1733 self.distance_metric.into(),
1734 )?;
1735 let training_vectors: Vec<Vec<f32>> =
1736 vectors.iter().map(|v| v.data.clone()).collect();
1737 idx.train_quantizer(&training_vectors)?;
1738 idx
1739 }
1740 QuantizationMode::SQ8 => HNSWIndex::new_with_sq8(
1741 self.dimensions,
1742 hnsw_params,
1743 self.distance_metric.into(),
1744 )?,
1745 QuantizationMode::RaBitQ(params) => {
1746 let mut idx = HNSWIndex::new_with_asymmetric(
1747 self.dimensions,
1748 hnsw_params,
1749 self.distance_metric.into(),
1750 params,
1751 )?;
1752 let training_vectors: Vec<Vec<f32>> =
1753 vectors.iter().map(|v| v.data.clone()).collect();
1754 idx.train_quantizer(&training_vectors)?;
1755 idx
1756 }
1757 };
1758
1759 self.hnsw_index = Some(index);
1760 } else {
1761 let capacity = vectors.len().max(1_000_000);
1762 self.hnsw_index = Some(HNSWIndex::new_with_params(
1763 capacity,
1764 self.dimensions,
1765 self.hnsw_m,
1766 self.hnsw_ef_construction,
1767 self.hnsw_ef_search,
1768 self.distance_metric.into(),
1769 )?);
1770 }
1771 }
1772
1773 let _start_id = self.vectors.len();
1774 let mut all_ids = Vec::with_capacity(vectors.len());
1775
1776 for chunk in vectors.chunks(CHUNK_SIZE) {
1777 let vector_data: Vec<Vec<f32>> = chunk.iter().map(|v| v.data.clone()).collect();
1778
1779 if let Some(ref mut index) = self.hnsw_index {
1780 let chunk_ids = index.batch_insert(&vector_data)?;
1781 all_ids.extend(chunk_ids);
1782 }
1783 }
1784
1785 self.vectors.extend(vectors);
1786 Ok(all_ids)
1787 }
1788
1789 pub fn rebuild_index(&mut self) -> Result<()> {
1791 if self.vectors.is_empty() {
1792 return Ok(());
1793 }
1794
1795 let mut index = HNSWIndex::new_with_params(
1796 self.vectors.len().max(1_000_000),
1797 self.dimensions,
1798 self.hnsw_m,
1799 self.hnsw_ef_construction,
1800 self.hnsw_ef_search,
1801 self.distance_metric.into(),
1802 )?;
1803
1804 for vector in &self.vectors {
1805 index.insert(&vector.data)?;
1806 }
1807
1808 self.hnsw_index = Some(index);
1809 Ok(())
1810 }
1811
1812 pub fn merge_from(&mut self, other: &VectorStore) -> Result<usize> {
1814 if other.dimensions != self.dimensions {
1815 anyhow::bail!(
1816 "Dimension mismatch: self={}, other={}",
1817 self.dimensions,
1818 other.dimensions
1819 );
1820 }
1821
1822 if other.vectors.is_empty() {
1823 return Ok(0);
1824 }
1825
1826 if self.hnsw_index.is_none() {
1827 let capacity = (self.vectors.len() + other.vectors.len()).max(1_000_000);
1828 self.hnsw_index = Some(HNSWIndex::new_with_params(
1829 capacity,
1830 self.dimensions,
1831 self.hnsw_m,
1832 self.hnsw_ef_construction,
1833 self.hnsw_ef_search,
1834 self.distance_metric.into(),
1835 )?);
1836 }
1837
1838 let mut merged_count = 0;
1839 let base_index = self.vectors.len();
1840
1841 for (other_idx, vector) in other.vectors.iter().enumerate() {
1842 let has_conflict = other
1843 .id_to_index
1844 .iter()
1845 .find(|(_, &idx)| idx == other_idx)
1846 .is_some_and(|(string_id, _)| self.id_to_index.contains_key(string_id));
1847
1848 if has_conflict {
1849 continue;
1850 }
1851
1852 self.vectors.push(vector.clone());
1853
1854 if let Some(meta) = other.metadata.get(&other_idx) {
1855 self.metadata
1856 .insert(base_index + merged_count, meta.clone());
1857 }
1858
1859 if let Some((string_id, _)) =
1860 other.id_to_index.iter().find(|(_, &idx)| idx == other_idx)
1861 {
1862 self.id_to_index
1863 .insert(string_id.clone(), base_index + merged_count);
1864 }
1865
1866 merged_count += 1;
1867 }
1868
1869 self.rebuild_index()?;
1872
1873 Ok(merged_count)
1874 }
1875
1876 #[inline]
1878 #[must_use]
1879 pub fn needs_index_rebuild(&self) -> bool {
1880 self.hnsw_index.is_none() && self.vectors.len() > 100
1881 }
1882
1883 pub fn ensure_index_ready(&mut self) -> Result<()> {
1885 if self.needs_index_rebuild() {
1886 self.rebuild_index()?;
1887 }
1888 Ok(())
1889 }
1890
1891 pub fn knn_search(&mut self, query: &Vector, k: usize) -> Result<Vec<(usize, f32)>> {
1897 self.knn_search_with_ef(query, k, None)
1898 }
1899
1900 pub fn knn_search_with_ef(
1902 &mut self,
1903 query: &Vector,
1904 k: usize,
1905 ef: Option<usize>,
1906 ) -> Result<Vec<(usize, f32)>> {
1907 self.ensure_index_ready()?;
1908 self.knn_search_readonly(query, k, ef)
1909 }
1910
1911 #[inline]
1913 pub fn knn_search_readonly(
1914 &self,
1915 query: &Vector,
1916 k: usize,
1917 ef: Option<usize>,
1918 ) -> Result<Vec<(usize, f32)>> {
1919 let effective_ef = compute_effective_ef(ef, self.hnsw_ef_search, k);
1922 self.knn_search_ef(query, k, effective_ef)
1923 }
1924
1925 #[inline]
1927 pub fn knn_search_ef(&self, query: &Vector, k: usize, ef: usize) -> Result<Vec<(usize, f32)>> {
1928 if query.dim() != self.dimensions {
1929 anyhow::bail!(
1930 "Query dimension mismatch: expected {}, got {}",
1931 self.dimensions,
1932 query.dim()
1933 );
1934 }
1935
1936 let has_data =
1937 !self.vectors.is_empty() || self.hnsw_index.as_ref().is_some_and(|idx| !idx.is_empty());
1938
1939 if !has_data {
1940 return Ok(Vec::new());
1941 }
1942
1943 if let Some(ref index) = self.hnsw_index {
1944 let results = if index.is_asymmetric() {
1945 let can_rescore = self.storage.is_some() || !self.vectors.is_empty();
1947 if self.rescore_enabled && can_rescore {
1948 self.knn_search_with_rescore(query, k, ef)?
1949 } else {
1950 index.search_asymmetric_ef(&query.data, k, ef)?
1951 }
1952 } else {
1953 index.search_ef(&query.data, k, ef)?
1954 };
1955
1956 if results.is_empty() && self.has_live_vectors() {
1959 return self.knn_search_brute_force(query, k);
1960 }
1961 return Ok(results);
1962 }
1963
1964 self.knn_search_brute_force(query, k)
1965 }
1966
1967 fn knn_search_with_rescore(
1969 &self,
1970 query: &Vector,
1971 k: usize,
1972 ef: usize,
1973 ) -> Result<Vec<(usize, f32)>> {
1974 let index = self
1975 .hnsw_index
1976 .as_ref()
1977 .ok_or_else(|| anyhow::anyhow!("HNSW index required for rescore"))?;
1978
1979 let oversample_k = ((k as f32) * self.oversample_factor).ceil() as usize;
1980 let candidates = index.search_asymmetric_ef(&query.data, oversample_k, ef)?;
1981
1982 if candidates.is_empty() {
1983 return Ok(Vec::new());
1984 }
1985
1986 let mut rescored: Vec<(usize, f32)> = candidates
1989 .iter()
1990 .filter_map(|&(id, _quantized_dist)| {
1991 if let Some(ref storage) = self.storage {
1994 storage
1995 .get_vector(id)
1996 .ok()
1997 .flatten()
1998 .map(|data| (id, l2_distance(&query.data, &data)))
1999 } else {
2000 self.vectors
2001 .get(id)
2002 .map(|v| (id, l2_distance(&query.data, &v.data)))
2003 }
2004 })
2005 .collect();
2006
2007 rescored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
2008 rescored.truncate(k);
2009
2010 Ok(rescored)
2011 }
2012
2013 pub fn knn_search_with_filter(
2015 &mut self,
2016 query: &Vector,
2017 k: usize,
2018 filter: &MetadataFilter,
2019 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2020 self.ensure_index_ready()?;
2021 self.knn_search_with_filter_ef_readonly(query, k, filter, None)
2022 }
2023
2024 pub fn knn_search_with_filter_ef(
2026 &mut self,
2027 query: &Vector,
2028 k: usize,
2029 filter: &MetadataFilter,
2030 ef: Option<usize>,
2031 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2032 self.ensure_index_ready()?;
2033 self.knn_search_with_filter_ef_readonly(query, k, filter, ef)
2034 }
2035
2036 pub fn knn_search_with_filter_ef_readonly(
2041 &self,
2042 query: &Vector,
2043 k: usize,
2044 filter: &MetadataFilter,
2045 ef: Option<usize>,
2046 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2047 let effective_ef = compute_effective_ef(ef, self.hnsw_ef_search, k);
2050
2051 let filter_bitmap = filter.evaluate_bitmap(&self.metadata_index);
2053
2054 if let Some(ref hnsw) = self.hnsw_index {
2055 let metadata_map = &self.metadata;
2056 let deleted_map = &self.deleted;
2057
2058 let search_results = if let Some(ref bitmap) = filter_bitmap {
2059 let filter_fn = |node_id: u32| -> bool {
2061 let index = node_id as usize;
2062 !deleted_map.contains_key(&index) && bitmap.contains(node_id)
2063 };
2064 hnsw.search_with_filter_ef(&query.data, k, Some(effective_ef), filter_fn)?
2065 } else {
2066 let filter_fn = |node_id: u32| -> bool {
2068 let index = node_id as usize;
2069 if deleted_map.contains_key(&index) {
2070 return false;
2071 }
2072 let metadata = metadata_map
2073 .get(&index)
2074 .cloned()
2075 .unwrap_or(serde_json::json!({}));
2076 filter.matches(&metadata)
2077 };
2078 hnsw.search_with_filter_ef(&query.data, k, Some(effective_ef), filter_fn)?
2079 };
2080
2081 let filtered_results: Vec<(usize, f32, JsonValue)> = search_results
2082 .into_iter()
2083 .map(|(index, distance)| {
2084 let metadata = self
2085 .metadata
2086 .get(&index)
2087 .cloned()
2088 .unwrap_or(serde_json::json!({}));
2089 (index, distance, metadata)
2090 })
2091 .collect();
2092
2093 return Ok(filtered_results);
2094 }
2095
2096 let mut all_results: Vec<(usize, f32, JsonValue)> = self
2098 .vectors
2099 .iter()
2100 .enumerate()
2101 .filter_map(|(index, vec)| {
2102 if self.deleted.contains_key(&index) {
2103 return None;
2104 }
2105
2106 let passes_filter = if let Some(ref bitmap) = filter_bitmap {
2108 bitmap.contains(index as u32)
2109 } else {
2110 let metadata = self
2111 .metadata
2112 .get(&index)
2113 .cloned()
2114 .unwrap_or(serde_json::json!({}));
2115 filter.matches(&metadata)
2116 };
2117
2118 if !passes_filter {
2119 return None;
2120 }
2121
2122 let metadata = self
2123 .metadata
2124 .get(&index)
2125 .cloned()
2126 .unwrap_or(serde_json::json!({}));
2127 let distance = query.l2_distance(vec).unwrap_or(f32::MAX);
2128 Some((index, distance, metadata))
2129 })
2130 .collect();
2131
2132 all_results.sort_by(|a, b| a.1.total_cmp(&b.1));
2133 all_results.truncate(k);
2134
2135 Ok(all_results)
2136 }
2137
2138 pub fn search(
2140 &mut self,
2141 query: &Vector,
2142 k: usize,
2143 filter: Option<&MetadataFilter>,
2144 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2145 self.search_with_options(query, k, filter, None, None)
2146 }
2147
2148 pub fn search_with_ef(
2150 &mut self,
2151 query: &Vector,
2152 k: usize,
2153 filter: Option<&MetadataFilter>,
2154 ef: Option<usize>,
2155 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2156 self.search_with_options(query, k, filter, ef, None)
2157 }
2158
2159 pub fn search_with_options(
2161 &mut self,
2162 query: &Vector,
2163 k: usize,
2164 filter: Option<&MetadataFilter>,
2165 ef: Option<usize>,
2166 max_distance: Option<f32>,
2167 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2168 self.ensure_index_ready()?;
2169 self.search_with_options_readonly(query, k, filter, ef, max_distance)
2170 }
2171
2172 pub fn search_with_ef_readonly(
2174 &self,
2175 query: &Vector,
2176 k: usize,
2177 filter: Option<&MetadataFilter>,
2178 ef: Option<usize>,
2179 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2180 self.search_with_options_readonly(query, k, filter, ef, None)
2181 }
2182
2183 pub fn search_with_options_readonly(
2185 &self,
2186 query: &Vector,
2187 k: usize,
2188 filter: Option<&MetadataFilter>,
2189 ef: Option<usize>,
2190 max_distance: Option<f32>,
2191 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2192 let mut results = if let Some(f) = filter {
2193 self.knn_search_with_filter_ef_readonly(query, k, f, ef)?
2194 } else {
2195 let results = self.knn_search_readonly(query, k, ef)?;
2196 let filtered: Vec<(usize, f32, JsonValue)> = results
2197 .into_iter()
2198 .filter_map(|(index, distance)| {
2199 if self.deleted.contains_key(&index) {
2200 return None;
2201 }
2202 let metadata = self
2203 .metadata
2204 .get(&index)
2205 .cloned()
2206 .unwrap_or(serde_json::json!({}));
2207 Some((index, distance, metadata))
2208 })
2209 .collect();
2210
2211 if filtered.is_empty() && self.has_live_vectors() {
2213 self.knn_search_brute_force_with_metadata(query, k)?
2214 } else {
2215 filtered
2216 }
2217 };
2218
2219 if let Some(max_dist) = max_distance {
2220 results.retain(|(_, distance, _)| *distance <= max_dist);
2221 }
2222
2223 Ok(results)
2224 }
2225
2226 fn has_live_vectors(&self) -> bool {
2228 let total = self
2229 .vectors
2230 .len()
2231 .max(self.hnsw_index.as_ref().map_or(0, HNSWIndex::len));
2232 total > self.deleted.len()
2233 }
2234
2235 fn is_quantized(&self) -> bool {
2237 self.pending_quantization.is_some()
2238 || self
2239 .hnsw_index
2240 .as_ref()
2241 .is_some_and(|idx| idx.is_asymmetric() || idx.is_sq8())
2242 }
2243
2244 fn knn_search_brute_force_with_metadata(
2246 &self,
2247 query: &Vector,
2248 k: usize,
2249 ) -> Result<Vec<(usize, f32, JsonValue)>> {
2250 let results = self.knn_search_brute_force(query, k)?;
2251 Ok(results
2252 .into_iter()
2253 .filter_map(|(index, distance)| {
2254 if self.deleted.contains_key(&index) {
2255 return None;
2256 }
2257 let metadata = self
2258 .metadata
2259 .get(&index)
2260 .cloned()
2261 .unwrap_or(serde_json::json!({}));
2262 Some((index, distance, metadata))
2263 })
2264 .collect())
2265 }
2266
2267 #[must_use]
2269 pub fn search_batch(
2270 &self,
2271 queries: &[Vector],
2272 k: usize,
2273 ef: Option<usize>,
2274 ) -> Vec<Result<Vec<(usize, f32)>>> {
2275 let effective_ef = compute_effective_ef(ef, self.hnsw_ef_search, k);
2278 queries
2279 .par_iter()
2280 .map(|q| self.knn_search_ef(q, k, effective_ef))
2281 .collect()
2282 }
2283
2284 #[must_use]
2286 pub fn search_batch_with_metadata(
2287 &self,
2288 queries: &[Vector],
2289 k: usize,
2290 ef: Option<usize>,
2291 ) -> Vec<Result<Vec<(usize, f32, JsonValue)>>> {
2292 queries
2293 .par_iter()
2294 .map(|q| self.search_with_ef_readonly(q, k, None, ef))
2295 .collect()
2296 }
2297
2298 pub fn knn_search_brute_force(&self, query: &Vector, k: usize) -> Result<Vec<(usize, f32)>> {
2300 if query.dim() != self.dimensions {
2301 anyhow::bail!(
2302 "Query dimension mismatch: expected {}, got {}",
2303 self.dimensions,
2304 query.dim()
2305 );
2306 }
2307
2308 let total_count = if !self.vectors.is_empty() {
2310 self.vectors.len()
2311 } else if let Some(ref idx) = self.hnsw_index {
2312 idx.len()
2313 } else {
2314 return Ok(Vec::new());
2315 };
2316
2317 if total_count == 0 {
2318 return Ok(Vec::new());
2319 }
2320
2321 let mut distances: Vec<(usize, f32)> = (0..total_count)
2322 .filter_map(|id| {
2323 let data = if let Some(vec) = self.vectors.get(id) {
2325 Some(vec.data.clone())
2326 } else if let Some(ref storage) = self.storage {
2327 storage.get_vector(id).ok().flatten()
2328 } else {
2329 None
2330 };
2331
2332 data.map(|vec_data| {
2333 let dist = l2_distance(&query.data, &vec_data);
2334 (id, dist)
2335 })
2336 })
2337 .collect();
2338
2339 distances.sort_by(|a, b| a.1.total_cmp(&b.1));
2340 Ok(distances.into_iter().take(k).collect())
2341 }
2342
2343 pub fn optimize(&mut self) -> Result<usize> {
2358 let Some(ref mut index) = self.hnsw_index else {
2359 return Ok(0);
2360 };
2361
2362 let old_to_new = index
2364 .optimize_cache_locality()
2365 .map_err(|e| anyhow::anyhow!("Optimization failed: {e}"))?;
2366
2367 if old_to_new.is_empty() {
2368 return Ok(0);
2369 }
2370
2371 let num_reordered = old_to_new.len();
2372
2373 if !self.vectors.is_empty() {
2375 let old_vectors = std::mem::take(&mut self.vectors);
2376 let mut new_vectors = Vec::with_capacity(old_vectors.len());
2377 new_vectors.resize_with(old_vectors.len(), || Vector::new(Vec::new()));
2378
2379 for (old_idx, &new_idx) in old_to_new.iter().enumerate() {
2380 let new_idx = new_idx as usize;
2381 if old_idx < old_vectors.len() && new_idx < new_vectors.len() {
2382 new_vectors[new_idx] = old_vectors[old_idx].clone();
2383 }
2384 }
2385 self.vectors = new_vectors;
2386 }
2387
2388 let mut new_id_to_index: FxHashMap<String, usize> =
2390 FxHashMap::with_capacity_and_hasher(self.id_to_index.len(), rustc_hash::FxBuildHasher);
2391 let mut new_index_to_id: FxHashMap<usize, String> =
2392 FxHashMap::with_capacity_and_hasher(self.index_to_id.len(), rustc_hash::FxBuildHasher);
2393
2394 for (string_id, &old_idx) in &self.id_to_index {
2395 if old_idx < old_to_new.len() {
2396 let new_idx = old_to_new[old_idx] as usize;
2397 new_id_to_index.insert(string_id.clone(), new_idx);
2398 new_index_to_id.insert(new_idx, string_id.clone());
2399 }
2400 }
2401
2402 self.id_to_index = new_id_to_index;
2403 self.index_to_id = new_index_to_id;
2404
2405 if !self.deleted.is_empty() {
2407 let mut new_deleted = HashMap::with_capacity(self.deleted.len());
2408 for (&old_idx, &is_deleted) in &self.deleted {
2409 if old_idx < old_to_new.len() {
2410 let new_idx = old_to_new[old_idx] as usize;
2411 new_deleted.insert(new_idx, is_deleted);
2412 }
2413 }
2414 self.deleted = new_deleted;
2415 }
2416
2417 Ok(num_reordered)
2420 }
2421
2422 #[must_use]
2428 #[allow(dead_code)] pub(crate) fn get_by_internal_index(&self, idx: usize) -> Option<&Vector> {
2430 self.vectors.get(idx)
2431 }
2432
2433 #[must_use]
2435 #[allow(dead_code)] pub(crate) fn get_by_internal_index_owned(&self, idx: usize) -> Option<Vector> {
2437 if let Some(v) = self.vectors.get(idx) {
2438 return Some(v.clone());
2439 }
2440
2441 if let Some(ref storage) = self.storage {
2442 if let Ok(Some(data)) = storage.get_vector(idx) {
2443 return Some(Vector::new(data));
2444 }
2445 }
2446
2447 None
2448 }
2449
2450 #[must_use]
2452 pub fn len(&self) -> usize {
2453 if let Some(ref index) = self.hnsw_index {
2454 let hnsw_len = index.len();
2455 if hnsw_len > 0 {
2456 return hnsw_len.saturating_sub(self.deleted.len());
2457 }
2458 }
2459 self.vectors.len().saturating_sub(self.deleted.len())
2460 }
2461
2462 #[must_use]
2466 pub fn count(&self) -> usize {
2467 self.len()
2468 }
2469
2470 #[must_use]
2472 pub fn is_empty(&self) -> bool {
2473 self.len() == 0
2474 }
2475
2476 #[must_use]
2481 pub fn ids(&self) -> Vec<String> {
2482 self.id_to_index
2483 .iter()
2484 .filter_map(|(id, &idx)| {
2485 if self.deleted.contains_key(&idx) {
2486 None
2487 } else {
2488 Some(id.clone())
2489 }
2490 })
2491 .collect()
2492 }
2493
2494 #[must_use]
2498 pub fn items(&self) -> Vec<(String, Vec<f32>, JsonValue)> {
2499 self.id_to_index
2500 .iter()
2501 .filter_map(|(id, &idx)| {
2502 if self.deleted.contains_key(&idx) {
2503 return None;
2504 }
2505
2506 let vec_data = if let Some(vec) = self.vectors.get(idx) {
2508 vec.data.clone()
2509 } else if let Some(ref storage) = self.storage {
2510 storage.get_vector(idx).ok().flatten()?
2512 } else {
2513 return None;
2514 };
2515
2516 let metadata = self.metadata.get(&idx).cloned().unwrap_or_default();
2517 Some((id.clone(), vec_data, metadata))
2518 })
2519 .collect()
2520 }
2521
2522 #[must_use]
2524 pub fn contains(&self, id: &str) -> bool {
2525 self.id_to_index
2526 .get(id)
2527 .is_some_and(|&idx| !self.deleted.contains_key(&idx))
2528 }
2529
2530 #[must_use]
2532 pub fn memory_usage(&self) -> usize {
2533 self.vectors.iter().map(|v| v.dim() * 4).sum::<usize>()
2534 }
2535
2536 #[must_use]
2538 pub fn bytes_per_vector(&self) -> f32 {
2539 if self.vectors.is_empty() {
2540 return 0.0;
2541 }
2542 self.memory_usage() as f32 / self.vectors.len() as f32
2543 }
2544
2545 pub fn set_ef_search(&mut self, ef_search: usize) {
2547 self.hnsw_ef_search = ef_search;
2548 if let Some(ref mut index) = self.hnsw_index {
2549 index.set_ef_search(ef_search);
2550 }
2551 }
2552
2553 #[must_use]
2555 pub fn get_ef_search(&self) -> Option<usize> {
2556 Some(self.hnsw_ef_search)
2558 }
2559
2560 pub fn flush(&mut self) -> Result<()> {
2568 let hnsw_bytes = self
2569 .hnsw_index
2570 .as_ref()
2571 .map(bincode::serialize)
2572 .transpose()?;
2573
2574 if let Some(ref mut storage) = self.storage {
2575 storage.set_hnsw_params(
2577 self.hnsw_m as u16,
2578 self.hnsw_ef_construction as u16,
2579 self.hnsw_ef_search as u16,
2580 );
2581
2582 if let Some(bytes) = hnsw_bytes {
2583 storage.put_hnsw_index(bytes);
2584 }
2585 storage.flush()?;
2586 }
2587
2588 if let Some(ref mut text_index) = self.text_index {
2589 text_index.commit()?;
2590 }
2591
2592 Ok(())
2593 }
2594
2595 #[must_use]
2597 pub fn is_persistent(&self) -> bool {
2598 self.storage.is_some()
2599 }
2600
2601 #[must_use]
2603 pub fn storage(&self) -> Option<&OmenFile> {
2604 self.storage.as_ref()
2605 }
2606}