1mod accessor;
82mod distance;
83mod mmr;
84pub mod quantization;
85mod simd;
86pub mod storage;
87pub mod zone_map;
88
89#[cfg(feature = "vector-index")]
90mod config;
91#[cfg(feature = "vector-index")]
92mod hnsw;
93#[cfg(feature = "vector-index")]
94mod quantized_hnsw;
95#[cfg(feature = "vector-index")]
96pub mod section;
97
98pub use accessor::{
99 PropertyVectorAccessor, SpillableVectorAccessor, VectorAccessor, VectorAccessorKind,
100};
101pub use distance::{
102 DistanceMetric, compute_distance, cosine_distance, cosine_similarity, dot_product,
103 euclidean_distance, euclidean_distance_squared, l2_norm, manhattan_distance, normalize,
104 simd_support,
105};
106pub use mmr::mmr_select;
107pub use quantization::{BinaryQuantizer, ProductQuantizer, QuantizationType, ScalarQuantizer};
108#[cfg(feature = "mmap")]
109pub use storage::MmapStorage;
110pub use storage::{RamStorage, StorageBackend, VectorStorage};
111pub use zone_map::VectorZoneMap;
112
113#[cfg(feature = "vector-index")]
114pub use config::HnswConfig;
115#[cfg(feature = "vector-index")]
116pub use hnsw::HnswIndex;
117#[cfg(feature = "vector-index")]
118pub use quantized_hnsw::QuantizedHnswIndex;
119#[cfg(feature = "vector-index")]
120pub use section::VectorStoreSection;
121use grafeo_common::types::NodeId;
124#[cfg(feature = "vector-index")]
125use std::collections::HashSet;
126
127#[cfg(feature = "vector-index")]
134pub enum VectorIndexKind {
135 Hnsw(HnswIndex),
137 Quantized(QuantizedHnswIndex),
139}
140
141#[cfg(feature = "vector-index")]
142impl VectorIndexKind {
143 #[must_use]
145 pub fn config(&self) -> &HnswConfig {
146 match self {
147 Self::Hnsw(idx) => idx.config(),
148 Self::Quantized(idx) => idx.config(),
149 }
150 }
151
152 #[must_use]
154 pub fn len(&self) -> usize {
155 match self {
156 Self::Hnsw(idx) => idx.len(),
157 Self::Quantized(idx) => idx.len(),
158 }
159 }
160
161 #[must_use]
163 pub fn is_empty(&self) -> bool {
164 match self {
165 Self::Hnsw(idx) => idx.is_empty(),
166 Self::Quantized(idx) => idx.is_empty(),
167 }
168 }
169
170 #[must_use]
172 pub fn contains(&self, id: NodeId) -> bool {
173 match self {
174 Self::Hnsw(idx) => idx.contains(id),
175 Self::Quantized(idx) => idx.contains(id),
176 }
177 }
178
179 pub fn remove(&self, id: NodeId) -> bool {
181 match self {
182 Self::Hnsw(idx) => idx.remove(id),
183 Self::Quantized(idx) => idx.remove(id),
184 }
185 }
186
187 pub fn insert(&self, id: NodeId, vector: &[f32], accessor: &impl VectorAccessor) {
192 match self {
193 Self::Hnsw(idx) => idx.insert(id, vector, accessor),
194 Self::Quantized(idx) => idx.insert(id, vector),
195 }
196 }
197
198 #[must_use]
200 pub fn search(
201 &self,
202 query: &[f32],
203 k: usize,
204 accessor: &impl VectorAccessor,
205 ) -> Vec<(NodeId, f32)> {
206 match self {
207 Self::Hnsw(idx) => idx.search(query, k, accessor),
208 Self::Quantized(idx) => idx.search(query, k),
209 }
210 }
211
212 #[must_use]
214 pub fn search_with_ef(
215 &self,
216 query: &[f32],
217 k: usize,
218 ef: usize,
219 accessor: &impl VectorAccessor,
220 ) -> Vec<(NodeId, f32)> {
221 match self {
222 Self::Hnsw(idx) => idx.search_with_ef(query, k, ef, accessor),
223 Self::Quantized(idx) => idx.search_with_ef(query, k, ef),
224 }
225 }
226
227 #[must_use]
229 pub fn search_with_filter(
230 &self,
231 query: &[f32],
232 k: usize,
233 allowlist: &HashSet<NodeId>,
234 accessor: &impl VectorAccessor,
235 ) -> Vec<(NodeId, f32)> {
236 match self {
237 Self::Hnsw(idx) => idx.search_with_filter(query, k, allowlist, accessor),
238 Self::Quantized(idx) => idx.search_with_filter(query, k, allowlist),
239 }
240 }
241
242 #[must_use]
244 pub fn search_with_ef_and_filter(
245 &self,
246 query: &[f32],
247 k: usize,
248 ef: usize,
249 allowlist: &HashSet<NodeId>,
250 accessor: &impl VectorAccessor,
251 ) -> Vec<(NodeId, f32)> {
252 match self {
253 Self::Hnsw(idx) => idx.search_with_ef_and_filter(query, k, ef, allowlist, accessor),
254 Self::Quantized(idx) => idx.search_with_ef_and_filter(query, k, ef, allowlist),
255 }
256 }
257
258 #[must_use]
260 pub fn batch_search(
261 &self,
262 queries: &[Vec<f32>],
263 k: usize,
264 accessor: &impl VectorAccessor,
265 ) -> Vec<Vec<(NodeId, f32)>> {
266 match self {
267 Self::Hnsw(idx) => idx.batch_search(queries, k, accessor),
268 Self::Quantized(idx) => idx.batch_search(queries, k),
269 }
270 }
271
272 #[must_use]
274 pub fn batch_search_with_ef(
275 &self,
276 queries: &[Vec<f32>],
277 k: usize,
278 ef: usize,
279 accessor: &impl VectorAccessor,
280 ) -> Vec<Vec<(NodeId, f32)>> {
281 match self {
282 Self::Hnsw(idx) => idx.batch_search_with_ef(queries, k, ef, accessor),
283 Self::Quantized(idx) => idx.batch_search_with_ef(queries, k, ef),
284 }
285 }
286
287 #[must_use]
289 pub fn batch_search_with_filter(
290 &self,
291 queries: &[Vec<f32>],
292 k: usize,
293 allowlist: &HashSet<NodeId>,
294 accessor: &impl VectorAccessor,
295 ) -> Vec<Vec<(NodeId, f32)>> {
296 match self {
297 Self::Hnsw(idx) => idx.batch_search_with_filter(queries, k, allowlist, accessor),
298 Self::Quantized(idx) => idx.batch_search_with_filter(queries, k, allowlist),
299 }
300 }
301
302 #[must_use]
304 pub fn batch_search_with_ef_and_filter(
305 &self,
306 queries: &[Vec<f32>],
307 k: usize,
308 ef: usize,
309 allowlist: &HashSet<NodeId>,
310 accessor: &impl VectorAccessor,
311 ) -> Vec<Vec<(NodeId, f32)>> {
312 match self {
313 Self::Hnsw(idx) => {
314 idx.batch_search_with_ef_and_filter(queries, k, ef, allowlist, accessor)
315 }
316 Self::Quantized(idx) => idx.batch_search_with_ef_and_filter(queries, k, ef, allowlist),
317 }
318 }
319
320 #[must_use]
322 pub fn snapshot_topology(&self) -> (Option<NodeId>, usize, Vec<(NodeId, Vec<Vec<NodeId>>)>) {
323 match self {
324 Self::Hnsw(idx) => idx.snapshot_topology(),
325 Self::Quantized(idx) => idx.snapshot_topology(),
326 }
327 }
328
329 pub fn restore_topology(
331 &self,
332 entry_point: Option<NodeId>,
333 max_level: usize,
334 node_data: Vec<(NodeId, Vec<Vec<NodeId>>)>,
335 ) {
336 match self {
337 Self::Hnsw(idx) => idx.restore_topology(entry_point, max_level, node_data),
338 Self::Quantized(idx) => idx.restore_topology(entry_point, max_level, node_data),
339 }
340 }
341
342 #[must_use]
344 pub fn heap_memory_bytes(&self) -> usize {
345 match self {
346 Self::Hnsw(idx) => idx.heap_memory_bytes(),
347 Self::Quantized(idx) => idx.heap_memory_bytes(),
348 }
349 }
350
351 #[must_use]
353 pub fn quantization_type(&self) -> Option<QuantizationType> {
354 match self {
355 Self::Hnsw(_) => None,
356 Self::Quantized(idx) => Some(idx.quantization_type()),
357 }
358 }
359
360 #[must_use]
362 pub fn as_hnsw(&self) -> Option<&HnswIndex> {
363 match self {
364 Self::Hnsw(idx) => Some(idx),
365 Self::Quantized(_) => None,
366 }
367 }
368
369 #[must_use]
371 pub fn as_quantized(&self) -> Option<&QuantizedHnswIndex> {
372 match self {
373 Self::Hnsw(_) => None,
374 Self::Quantized(idx) => Some(idx),
375 }
376 }
377}
378
379#[cfg(feature = "vector-index")]
380impl From<HnswIndex> for VectorIndexKind {
381 fn from(idx: HnswIndex) -> Self {
382 Self::Hnsw(idx)
383 }
384}
385
386#[cfg(feature = "vector-index")]
387impl From<QuantizedHnswIndex> for VectorIndexKind {
388 fn from(idx: QuantizedHnswIndex) -> Self {
389 Self::Quantized(idx)
390 }
391}
392
393#[derive(Debug, Clone)]
395pub struct VectorConfig {
396 pub dimensions: usize,
398 pub metric: DistanceMetric,
400}
401
402impl VectorConfig {
403 #[must_use]
405 pub const fn new(dimensions: usize, metric: DistanceMetric) -> Self {
406 Self { dimensions, metric }
407 }
408
409 #[must_use]
411 pub const fn cosine(dimensions: usize) -> Self {
412 Self::new(dimensions, DistanceMetric::Cosine)
413 }
414
415 #[must_use]
417 pub const fn euclidean(dimensions: usize) -> Self {
418 Self::new(dimensions, DistanceMetric::Euclidean)
419 }
420}
421
422impl Default for VectorConfig {
423 fn default() -> Self {
424 Self {
425 dimensions: 384, metric: DistanceMetric::default(),
427 }
428 }
429}
430
431pub fn brute_force_knn<'a, I>(
470 vectors: I,
471 query: &[f32],
472 k: usize,
473 metric: DistanceMetric,
474) -> Vec<(NodeId, f32)>
475where
476 I: Iterator<Item = (NodeId, &'a [f32])>,
477{
478 let mut results: Vec<(NodeId, f32)> = vectors
479 .map(|(id, vec)| (id, compute_distance(query, vec, metric)))
480 .collect();
481
482 results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
484
485 results.truncate(k);
487 results
488}
489
490pub fn brute_force_knn_filtered<'a, I, F>(
506 vectors: I,
507 query: &[f32],
508 k: usize,
509 metric: DistanceMetric,
510 predicate: F,
511) -> Vec<(NodeId, f32)>
512where
513 I: Iterator<Item = (NodeId, &'a [f32])>,
514 F: Fn(NodeId) -> bool,
515{
516 let mut results: Vec<(NodeId, f32)> = vectors
517 .filter(|(id, _)| predicate(*id))
518 .map(|(id, vec)| (id, compute_distance(query, vec, metric)))
519 .collect();
520
521 results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
522 results.truncate(k);
523 results
524}
525
526pub fn batch_distances<'a, I>(
534 vectors: I,
535 query: &[f32],
536 metric: DistanceMetric,
537) -> Vec<(NodeId, f32)>
538where
539 I: Iterator<Item = (NodeId, &'a [f32])>,
540{
541 vectors
542 .map(|(id, vec)| (id, compute_distance(query, vec, metric)))
543 .collect()
544}
545
546#[cfg(test)]
547mod tests {
548 use super::*;
549
550 #[test]
551 fn test_vector_config_default() {
552 let config = VectorConfig::default();
553 assert_eq!(config.dimensions, 384);
554 assert_eq!(config.metric, DistanceMetric::Cosine);
555 }
556
557 #[test]
558 fn test_vector_config_constructors() {
559 let cosine = VectorConfig::cosine(768);
560 assert_eq!(cosine.dimensions, 768);
561 assert_eq!(cosine.metric, DistanceMetric::Cosine);
562
563 let euclidean = VectorConfig::euclidean(1536);
564 assert_eq!(euclidean.dimensions, 1536);
565 assert_eq!(euclidean.metric, DistanceMetric::Euclidean);
566 }
567
568 #[test]
569 fn test_brute_force_knn() {
570 let vectors = vec![
571 (NodeId::new(1), [0.0f32, 0.0, 0.0].as_slice()),
572 (NodeId::new(2), [1.0f32, 0.0, 0.0].as_slice()),
573 (NodeId::new(3), [2.0f32, 0.0, 0.0].as_slice()),
574 (NodeId::new(4), [3.0f32, 0.0, 0.0].as_slice()),
575 ];
576
577 let query = [0.5f32, 0.0, 0.0];
578 let results = brute_force_knn(vectors.into_iter(), &query, 2, DistanceMetric::Euclidean);
579
580 assert_eq!(results.len(), 2);
581 assert!(results[0].0 == NodeId::new(1) || results[0].0 == NodeId::new(2));
583 }
584
585 #[test]
586 fn test_brute_force_knn_empty() {
587 let vectors: Vec<(NodeId, &[f32])> = vec![];
588 let query = [0.0f32, 0.0];
589 let results = brute_force_knn(vectors.into_iter(), &query, 10, DistanceMetric::Cosine);
590 assert!(results.is_empty());
591 }
592
593 #[test]
594 fn test_brute_force_knn_k_larger_than_n() {
595 let vectors = vec![
596 (NodeId::new(1), [0.0f32, 0.0].as_slice()),
597 (NodeId::new(2), [1.0f32, 0.0].as_slice()),
598 ];
599
600 let query = [0.0f32, 0.0];
601 let results = brute_force_knn(vectors.into_iter(), &query, 10, DistanceMetric::Euclidean);
602
603 assert_eq!(results.len(), 2);
605 }
606
607 #[test]
608 fn test_brute_force_knn_filtered() {
609 let vectors = vec![
610 (NodeId::new(1), [0.0f32, 0.0].as_slice()),
611 (NodeId::new(2), [1.0f32, 0.0].as_slice()),
612 (NodeId::new(3), [2.0f32, 0.0].as_slice()),
613 ];
614
615 let query = [0.0f32, 0.0];
616
617 let results = brute_force_knn_filtered(
619 vectors.into_iter(),
620 &query,
621 10,
622 DistanceMetric::Euclidean,
623 |id| id.as_u64() % 2 == 0,
624 );
625
626 assert_eq!(results.len(), 1);
627 assert_eq!(results[0].0, NodeId::new(2));
628 }
629
630 #[test]
631 fn test_batch_distances() {
632 let vectors = vec![
633 (NodeId::new(1), [0.0f32, 0.0].as_slice()),
634 (NodeId::new(2), [3.0f32, 4.0].as_slice()),
635 ];
636
637 let query = [0.0f32, 0.0];
638 let results = batch_distances(vectors.into_iter(), &query, DistanceMetric::Euclidean);
639
640 assert_eq!(results.len(), 2);
641 assert_eq!(results[0].0, NodeId::new(1));
642 assert!((results[0].1 - 0.0).abs() < 0.001);
643 assert_eq!(results[1].0, NodeId::new(2));
644 assert!((results[1].1 - 5.0).abs() < 0.001); }
646
647 #[cfg(feature = "vector-index")]
650 mod vector_index_kind_tests {
651 use super::super::*;
652 use std::collections::HashSet;
653
654 struct NoopAccessor;
657 impl VectorAccessor for NoopAccessor {
658 fn get_vector(&self, _id: NodeId) -> Option<std::sync::Arc<[f32]>> {
659 None
660 }
661 }
662
663 fn build_quantized_kind(n: usize) -> VectorIndexKind {
664 let config = HnswConfig::new(4, DistanceMetric::Euclidean);
665 let q = QuantizedHnswIndex::new(config, QuantizationType::Scalar);
666 for i in 0..n {
667 let vec: Vec<f32> = (0..4)
668 .map(|j| ((i * 4 + j) as f32) / (n * 4) as f32)
669 .collect();
670 q.insert(NodeId::new(i as u64 + 1), &vec);
671 }
672 VectorIndexKind::Quantized(q)
673 }
674
675 #[test]
676 fn quantized_kind_basic_ops() {
677 let kind = build_quantized_kind(20);
678 assert_eq!(kind.len(), 20);
679 assert!(!kind.is_empty());
680 assert!(kind.contains(NodeId::new(1)));
681 assert!(!kind.contains(NodeId::new(999)));
682 }
683
684 #[test]
685 fn quantized_kind_insert_and_search() {
686 let kind = build_quantized_kind(30);
687 let query = vec![0.5, 0.5, 0.0, 0.0];
688 let results = kind.search(&query, 3, &NoopAccessor);
689 assert_eq!(results.len(), 3);
690 }
691
692 #[test]
693 fn quantized_kind_search_with_ef() {
694 let kind = build_quantized_kind(30);
695 let query = vec![0.5, 0.5, 0.0, 0.0];
696 let results = kind.search_with_ef(&query, 3, 50, &NoopAccessor);
697 assert_eq!(results.len(), 3);
698 }
699
700 #[test]
701 fn quantized_kind_search_with_filter() {
702 let kind = build_quantized_kind(30);
703 let allowlist: HashSet<NodeId> = (1..=10).map(NodeId::new).collect();
704 let query = vec![0.1, 0.1, 0.0, 0.0];
705 let results = kind.search_with_filter(&query, 5, &allowlist, &NoopAccessor);
706 assert!(!results.is_empty());
707 for (id, _) in &results {
708 assert!(allowlist.contains(id));
709 }
710 }
711
712 #[test]
713 fn quantized_kind_search_with_ef_and_filter() {
714 let kind = build_quantized_kind(30);
715 let allowlist: HashSet<NodeId> = (5..=15).map(NodeId::new).collect();
716 let query = vec![0.3, 0.3, 0.0, 0.0];
717 let results = kind.search_with_ef_and_filter(&query, 3, 50, &allowlist, &NoopAccessor);
718 for (id, _) in &results {
719 assert!(allowlist.contains(id));
720 }
721 }
722
723 #[test]
724 fn quantized_kind_batch_search() {
725 let kind = build_quantized_kind(30);
726 let queries = vec![vec![0.1, 0.0, 0.0, 0.0], vec![0.9, 0.9, 0.0, 0.0]];
727 let results = kind.batch_search(&queries, 2, &NoopAccessor);
728 assert_eq!(results.len(), 2);
729 }
730
731 #[test]
732 fn quantized_kind_batch_search_with_ef() {
733 let kind = build_quantized_kind(30);
734 let queries = vec![vec![0.1, 0.0, 0.0, 0.0]];
735 let results = kind.batch_search_with_ef(&queries, 2, 50, &NoopAccessor);
736 assert_eq!(results.len(), 1);
737 }
738
739 #[test]
740 fn quantized_kind_batch_search_with_filter() {
741 let kind = build_quantized_kind(30);
742 let allowlist: HashSet<NodeId> = (1..=10).map(NodeId::new).collect();
743 let queries = vec![vec![0.1, 0.0, 0.0, 0.0]];
744 let results = kind.batch_search_with_filter(&queries, 5, &allowlist, &NoopAccessor);
745 assert_eq!(results.len(), 1);
746 for (id, _) in &results[0] {
747 assert!(allowlist.contains(id));
748 }
749 }
750
751 #[test]
752 fn quantized_kind_batch_search_with_ef_and_filter() {
753 let kind = build_quantized_kind(30);
754 let allowlist: HashSet<NodeId> = (1..=15).map(NodeId::new).collect();
755 let queries = vec![vec![0.2, 0.0, 0.0, 0.0]];
756 let results =
757 kind.batch_search_with_ef_and_filter(&queries, 3, 50, &allowlist, &NoopAccessor);
758 assert_eq!(results.len(), 1);
759 for (id, _) in &results[0] {
760 assert!(allowlist.contains(id));
761 }
762 }
763
764 #[test]
765 fn quantized_kind_remove() {
766 let kind = build_quantized_kind(5);
767 assert!(kind.remove(NodeId::new(1)));
768 assert_eq!(kind.len(), 4);
769 assert!(!kind.contains(NodeId::new(1)));
770 }
771
772 #[test]
773 #[allow(clippy::cast_sign_loss)]
775 fn quantized_kind_snapshot_restore() {
776 let kind = build_quantized_kind(10);
777 let (entry, level, nodes) = kind.snapshot_topology();
778
779 let config = HnswConfig::new(4, DistanceMetric::Euclidean);
780 let kind2 = VectorIndexKind::Quantized(QuantizedHnswIndex::new(
781 config,
782 QuantizationType::Scalar,
783 ));
784 for i in 0..10 {
785 let vec: Vec<f32> = (0..4).map(|j| ((i * 4 + j) as f32) / 40.0).collect();
786 kind2.insert(NodeId::new(i as u64 + 1), &vec, &NoopAccessor);
787 }
788 kind2.restore_topology(entry, level, nodes);
789 assert_eq!(kind2.len(), 10);
790 }
791
792 #[test]
793 fn quantized_kind_heap_memory() {
794 let kind = build_quantized_kind(10);
795 assert!(kind.heap_memory_bytes() > 0);
796 }
797
798 #[test]
799 fn quantized_kind_type_accessors() {
800 let kind = build_quantized_kind(1);
801 assert!(kind.as_quantized().is_some());
802 assert!(kind.as_hnsw().is_none());
803 assert_eq!(kind.quantization_type(), Some(QuantizationType::Scalar));
804 }
805
806 #[test]
807 fn from_trait_impls() {
808 let config = HnswConfig::new(4, DistanceMetric::Euclidean);
809
810 let hnsw = HnswIndex::new(config.clone());
811 let kind: VectorIndexKind = hnsw.into();
812 assert!(kind.as_hnsw().is_some());
813
814 let quantized = QuantizedHnswIndex::new(config, QuantizationType::Binary);
815 let kind2: VectorIndexKind = quantized.into();
816 assert!(kind2.as_quantized().is_some());
817 assert_eq!(kind2.quantization_type(), Some(QuantizationType::Binary));
818 }
819 }
820}