grafeo_core/index/vector/
mod.rs

1//! Vector similarity search support.
2//!
3//! This module provides infrastructure for storing and searching vector embeddings,
4//! enabling AI/ML use cases like RAG, semantic search, and recommendations.
5//!
6//! # Distance Metrics
7//!
8//! Choose the metric based on your embedding type:
9//!
10//! | Metric | Best For | Range |
11//! |--------|----------|-------|
12//! | [`Cosine`](DistanceMetric::Cosine) | Normalized embeddings (text) | [0, 2] |
13//! | [`Euclidean`](DistanceMetric::Euclidean) | Raw embeddings | [0, inf) |
14//! | [`DotProduct`](DistanceMetric::DotProduct) | Max inner product search | (-inf, inf) |
15//! | [`Manhattan`](DistanceMetric::Manhattan) | Outlier-resistant | [0, inf) |
16//!
17//! # Index Types
18//!
19//! | Index | Complexity | Use Case |
20//! |-------|------------|----------|
21//! | [`brute_force_knn`] | O(n) | Small datasets, exact results |
22//! | [`HnswIndex`] | O(log n) | Large datasets, approximate results |
23//!
24//! # Example
25//!
26//! ```
27//! use grafeo_core::index::vector::{compute_distance, DistanceMetric, brute_force_knn};
28//! use grafeo_common::types::NodeId;
29//!
30//! // Compute distance between two vectors
31//! let query = [0.1f32, 0.2, 0.3];
32//! let doc1 = [0.1f32, 0.2, 0.35];
33//! let doc2 = [0.5f32, 0.6, 0.7];
34//!
35//! let dist1 = compute_distance(&query, &doc1, DistanceMetric::Cosine);
36//! let dist2 = compute_distance(&query, &doc2, DistanceMetric::Cosine);
37//!
38//! // doc1 is more similar (smaller distance)
39//! assert!(dist1 < dist2);
40//!
41//! // Brute-force k-NN search
42//! let vectors = vec![
43//!     (NodeId::new(1), doc1.as_slice()),
44//!     (NodeId::new(2), doc2.as_slice()),
45//! ];
46//!
47//! let results = brute_force_knn(vectors.into_iter(), &query, 1, DistanceMetric::Cosine);
48//! assert_eq!(results[0].0, NodeId::new(1)); // doc1 is closest
49//! ```
50//!
51//! # HNSW Index (requires `vector-index` feature)
52//!
53//! For larger datasets, use the HNSW approximate nearest neighbor index:
54//!
55//! ```no_run
56//! # #[cfg(feature = "vector-index")]
57//! # {
58//! use grafeo_core::index::vector::{HnswIndex, HnswConfig, DistanceMetric, VectorAccessor};
59//! use grafeo_common::types::NodeId;
60//! use std::sync::Arc;
61//! use std::collections::HashMap;
62//!
63//! let config = HnswConfig::new(384, DistanceMetric::Cosine);
64//! let index = HnswIndex::new(config);
65//!
66//! // Build an accessor backed by a HashMap
67//! let mut map: HashMap<NodeId, Arc<[f32]>> = HashMap::new();
68//! let embedding: Arc<[f32]> = vec![0.1f32; 384].into();
69//! map.insert(NodeId::new(1), embedding.clone());
70//! let accessor = |id: NodeId| -> Option<Arc<[f32]>> { map.get(&id).cloned() };
71//!
72//! // Insert vectors (requires accessor for neighbor lookups)
73//! index.insert(NodeId::new(1), &embedding, &accessor);
74//!
75//! // Search (O(log n))
76//! let query = vec![0.15f32; 384];
77//! let results = index.search(&query, 10, &accessor);
78//! # }
79//! ```
80
81mod accessor;
82mod distance;
83mod mmr;
84pub mod quantization;
85mod simd;
86pub mod storage;
87pub mod zone_map;
88
89#[cfg(feature = "vector-index")]
90mod config;
91#[cfg(feature = "vector-index")]
92mod hnsw;
93#[cfg(feature = "vector-index")]
94mod quantized_hnsw;
95#[cfg(feature = "vector-index")]
96pub mod section;
97
98pub use accessor::{
99    PropertyVectorAccessor, SpillableVectorAccessor, VectorAccessor, VectorAccessorKind,
100};
101pub use distance::{
102    DistanceMetric, compute_distance, cosine_distance, cosine_similarity, dot_product,
103    euclidean_distance, euclidean_distance_squared, l2_norm, manhattan_distance, normalize,
104    simd_support,
105};
106pub use mmr::mmr_select;
107pub use quantization::{BinaryQuantizer, ProductQuantizer, QuantizationType, ScalarQuantizer};
108#[cfg(feature = "mmap")]
109pub use storage::MmapStorage;
110pub use storage::{RamStorage, StorageBackend, VectorStorage};
111pub use zone_map::VectorZoneMap;
112
113#[cfg(feature = "vector-index")]
114pub use config::HnswConfig;
115#[cfg(feature = "vector-index")]
116pub use hnsw::HnswIndex;
117#[cfg(feature = "vector-index")]
118pub use quantized_hnsw::QuantizedHnswIndex;
119#[cfg(feature = "vector-index")]
120pub use section::VectorStoreSection;
121// VectorIndexKind is defined below in this file (not in a sub-module).
122
123use grafeo_common::types::NodeId;
124#[cfg(feature = "vector-index")]
125use std::collections::HashSet;
126
127// ── VectorIndexKind ────────────────────────────────────────────────
128
129/// Unified enum for vector indexes stored in the LPG store.
130///
131/// Wraps either a plain [`HnswIndex`] or a [`QuantizedHnswIndex`],
132/// allowing the store and engine to handle both through a single type.
133#[cfg(feature = "vector-index")]
134pub enum VectorIndexKind {
135    /// Standard full-precision HNSW index.
136    Hnsw(HnswIndex),
137    /// Quantized HNSW index (scalar, binary, or product quantization).
138    Quantized(QuantizedHnswIndex),
139}
140
141#[cfg(feature = "vector-index")]
142impl VectorIndexKind {
143    /// Returns the HNSW configuration.
144    #[must_use]
145    pub fn config(&self) -> &HnswConfig {
146        match self {
147            Self::Hnsw(idx) => idx.config(),
148            Self::Quantized(idx) => idx.config(),
149        }
150    }
151
152    /// Returns the number of vectors in the index.
153    #[must_use]
154    pub fn len(&self) -> usize {
155        match self {
156            Self::Hnsw(idx) => idx.len(),
157            Self::Quantized(idx) => idx.len(),
158        }
159    }
160
161    /// Returns true if the index is empty.
162    #[must_use]
163    pub fn is_empty(&self) -> bool {
164        match self {
165            Self::Hnsw(idx) => idx.is_empty(),
166            Self::Quantized(idx) => idx.is_empty(),
167        }
168    }
169
170    /// Returns true if the index contains the given ID.
171    #[must_use]
172    pub fn contains(&self, id: NodeId) -> bool {
173        match self {
174            Self::Hnsw(idx) => idx.contains(id),
175            Self::Quantized(idx) => idx.contains(id),
176        }
177    }
178
179    /// Removes a vector from the index.
180    pub fn remove(&self, id: NodeId) -> bool {
181        match self {
182            Self::Hnsw(idx) => idx.remove(id),
183            Self::Quantized(idx) => idx.remove(id),
184        }
185    }
186
187    /// Inserts a vector into the index.
188    ///
189    /// For `Hnsw`, the accessor is used for neighbor distance lookups.
190    /// For `Quantized`, the vector is stored internally and the accessor is unused.
191    pub fn insert(&self, id: NodeId, vector: &[f32], accessor: &impl VectorAccessor) {
192        match self {
193            Self::Hnsw(idx) => idx.insert(id, vector, accessor),
194            Self::Quantized(idx) => idx.insert(id, vector),
195        }
196    }
197
198    /// Searches for the k nearest neighbors.
199    #[must_use]
200    pub fn search(
201        &self,
202        query: &[f32],
203        k: usize,
204        accessor: &impl VectorAccessor,
205    ) -> Vec<(NodeId, f32)> {
206        match self {
207            Self::Hnsw(idx) => idx.search(query, k, accessor),
208            Self::Quantized(idx) => idx.search(query, k),
209        }
210    }
211
212    /// Searches with a custom ef (beam width) parameter.
213    #[must_use]
214    pub fn search_with_ef(
215        &self,
216        query: &[f32],
217        k: usize,
218        ef: usize,
219        accessor: &impl VectorAccessor,
220    ) -> Vec<(NodeId, f32)> {
221        match self {
222            Self::Hnsw(idx) => idx.search_with_ef(query, k, ef, accessor),
223            Self::Quantized(idx) => idx.search_with_ef(query, k, ef),
224        }
225    }
226
227    /// Searches with an allowlist filter.
228    #[must_use]
229    pub fn search_with_filter(
230        &self,
231        query: &[f32],
232        k: usize,
233        allowlist: &HashSet<NodeId>,
234        accessor: &impl VectorAccessor,
235    ) -> Vec<(NodeId, f32)> {
236        match self {
237            Self::Hnsw(idx) => idx.search_with_filter(query, k, allowlist, accessor),
238            Self::Quantized(idx) => idx.search_with_filter(query, k, allowlist),
239        }
240    }
241
242    /// Searches with a custom ef and an allowlist filter.
243    #[must_use]
244    pub fn search_with_ef_and_filter(
245        &self,
246        query: &[f32],
247        k: usize,
248        ef: usize,
249        allowlist: &HashSet<NodeId>,
250        accessor: &impl VectorAccessor,
251    ) -> Vec<(NodeId, f32)> {
252        match self {
253            Self::Hnsw(idx) => idx.search_with_ef_and_filter(query, k, ef, allowlist, accessor),
254            Self::Quantized(idx) => idx.search_with_ef_and_filter(query, k, ef, allowlist),
255        }
256    }
257
258    /// Batch search for multiple queries.
259    #[must_use]
260    pub fn batch_search(
261        &self,
262        queries: &[Vec<f32>],
263        k: usize,
264        accessor: &impl VectorAccessor,
265    ) -> Vec<Vec<(NodeId, f32)>> {
266        match self {
267            Self::Hnsw(idx) => idx.batch_search(queries, k, accessor),
268            Self::Quantized(idx) => idx.batch_search(queries, k),
269        }
270    }
271
272    /// Batch search with custom ef for multiple queries.
273    #[must_use]
274    pub fn batch_search_with_ef(
275        &self,
276        queries: &[Vec<f32>],
277        k: usize,
278        ef: usize,
279        accessor: &impl VectorAccessor,
280    ) -> Vec<Vec<(NodeId, f32)>> {
281        match self {
282            Self::Hnsw(idx) => idx.batch_search_with_ef(queries, k, ef, accessor),
283            Self::Quantized(idx) => idx.batch_search_with_ef(queries, k, ef),
284        }
285    }
286
287    /// Batch search with an allowlist filter for multiple queries.
288    #[must_use]
289    pub fn batch_search_with_filter(
290        &self,
291        queries: &[Vec<f32>],
292        k: usize,
293        allowlist: &HashSet<NodeId>,
294        accessor: &impl VectorAccessor,
295    ) -> Vec<Vec<(NodeId, f32)>> {
296        match self {
297            Self::Hnsw(idx) => idx.batch_search_with_filter(queries, k, allowlist, accessor),
298            Self::Quantized(idx) => idx.batch_search_with_filter(queries, k, allowlist),
299        }
300    }
301
302    /// Batch search with custom ef and an allowlist filter.
303    #[must_use]
304    pub fn batch_search_with_ef_and_filter(
305        &self,
306        queries: &[Vec<f32>],
307        k: usize,
308        ef: usize,
309        allowlist: &HashSet<NodeId>,
310        accessor: &impl VectorAccessor,
311    ) -> Vec<Vec<(NodeId, f32)>> {
312        match self {
313            Self::Hnsw(idx) => {
314                idx.batch_search_with_ef_and_filter(queries, k, ef, allowlist, accessor)
315            }
316            Self::Quantized(idx) => idx.batch_search_with_ef_and_filter(queries, k, ef, allowlist),
317        }
318    }
319
320    /// Snapshot the HNSW topology for serialization.
321    #[must_use]
322    pub fn snapshot_topology(&self) -> (Option<NodeId>, usize, Vec<(NodeId, Vec<Vec<NodeId>>)>) {
323        match self {
324            Self::Hnsw(idx) => idx.snapshot_topology(),
325            Self::Quantized(idx) => idx.snapshot_topology(),
326        }
327    }
328
329    /// Restore topology from a snapshot.
330    pub fn restore_topology(
331        &self,
332        entry_point: Option<NodeId>,
333        max_level: usize,
334        node_data: Vec<(NodeId, Vec<Vec<NodeId>>)>,
335    ) {
336        match self {
337            Self::Hnsw(idx) => idx.restore_topology(entry_point, max_level, node_data),
338            Self::Quantized(idx) => idx.restore_topology(entry_point, max_level, node_data),
339        }
340    }
341
342    /// Returns estimated heap memory in bytes.
343    #[must_use]
344    pub fn heap_memory_bytes(&self) -> usize {
345        match self {
346            Self::Hnsw(idx) => idx.heap_memory_bytes(),
347            Self::Quantized(idx) => idx.heap_memory_bytes(),
348        }
349    }
350
351    /// Returns the quantization type, if this is a quantized index.
352    #[must_use]
353    pub fn quantization_type(&self) -> Option<QuantizationType> {
354        match self {
355            Self::Hnsw(_) => None,
356            Self::Quantized(idx) => Some(idx.quantization_type()),
357        }
358    }
359
360    /// Returns a reference to the inner `HnswIndex`, if this is a plain HNSW variant.
361    #[must_use]
362    pub fn as_hnsw(&self) -> Option<&HnswIndex> {
363        match self {
364            Self::Hnsw(idx) => Some(idx),
365            Self::Quantized(_) => None,
366        }
367    }
368
369    /// Returns a reference to the inner `QuantizedHnswIndex`, if quantized.
370    #[must_use]
371    pub fn as_quantized(&self) -> Option<&QuantizedHnswIndex> {
372        match self {
373            Self::Hnsw(_) => None,
374            Self::Quantized(idx) => Some(idx),
375        }
376    }
377}
378
379#[cfg(feature = "vector-index")]
380impl From<HnswIndex> for VectorIndexKind {
381    fn from(idx: HnswIndex) -> Self {
382        Self::Hnsw(idx)
383    }
384}
385
386#[cfg(feature = "vector-index")]
387impl From<QuantizedHnswIndex> for VectorIndexKind {
388    fn from(idx: QuantizedHnswIndex) -> Self {
389        Self::Quantized(idx)
390    }
391}
392
393/// Configuration for vector search operations.
394#[derive(Debug, Clone)]
395pub struct VectorConfig {
396    /// Expected vector dimensions (for validation).
397    pub dimensions: usize,
398    /// Distance metric for similarity computation.
399    pub metric: DistanceMetric,
400}
401
402impl VectorConfig {
403    /// Creates a new vector configuration.
404    #[must_use]
405    pub const fn new(dimensions: usize, metric: DistanceMetric) -> Self {
406        Self { dimensions, metric }
407    }
408
409    /// Creates a configuration for cosine similarity with the given dimensions.
410    #[must_use]
411    pub const fn cosine(dimensions: usize) -> Self {
412        Self::new(dimensions, DistanceMetric::Cosine)
413    }
414
415    /// Creates a configuration for Euclidean distance with the given dimensions.
416    #[must_use]
417    pub const fn euclidean(dimensions: usize) -> Self {
418        Self::new(dimensions, DistanceMetric::Euclidean)
419    }
420}
421
422impl Default for VectorConfig {
423    fn default() -> Self {
424        Self {
425            dimensions: 384, // Common embedding size (MiniLM, etc.)
426            metric: DistanceMetric::default(),
427        }
428    }
429}
430
431/// Performs brute-force k-nearest neighbor search.
432///
433/// This is O(n) where n is the number of vectors. Use this for:
434/// - Small datasets (< 10K vectors)
435/// - Baseline comparisons
436/// - Exact nearest neighbor search
437///
438/// For larger datasets, use an approximate index like HNSW.
439///
440/// # Arguments
441///
442/// * `vectors` - Iterator of (id, vector) pairs to search
443/// * `query` - The query vector
444/// * `k` - Number of nearest neighbors to return
445/// * `metric` - Distance metric to use
446///
447/// # Returns
448///
449/// Vector of (id, distance) pairs sorted by distance (ascending).
450///
451/// # Example
452///
453/// ```
454/// use grafeo_core::index::vector::{brute_force_knn, DistanceMetric};
455/// use grafeo_common::types::NodeId;
456///
457/// let vectors = vec![
458///     (NodeId::new(1), [0.1f32, 0.2, 0.3].as_slice()),
459///     (NodeId::new(2), [0.4f32, 0.5, 0.6].as_slice()),
460///     (NodeId::new(3), [0.7f32, 0.8, 0.9].as_slice()),
461/// ];
462///
463/// let query = [0.15f32, 0.25, 0.35];
464/// let results = brute_force_knn(vectors.into_iter(), &query, 2, DistanceMetric::Euclidean);
465///
466/// assert_eq!(results.len(), 2);
467/// assert_eq!(results[0].0, NodeId::new(1)); // Closest
468/// ```
469pub fn brute_force_knn<'a, I>(
470    vectors: I,
471    query: &[f32],
472    k: usize,
473    metric: DistanceMetric,
474) -> Vec<(NodeId, f32)>
475where
476    I: Iterator<Item = (NodeId, &'a [f32])>,
477{
478    let mut results: Vec<(NodeId, f32)> = vectors
479        .map(|(id, vec)| (id, compute_distance(query, vec, metric)))
480        .collect();
481
482    // Sort by distance (ascending)
483    results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
484
485    // Truncate to k
486    results.truncate(k);
487    results
488}
489
490/// Performs brute-force k-nearest neighbor search with a filter predicate.
491///
492/// Only considers vectors where the predicate returns true.
493///
494/// # Arguments
495///
496/// * `vectors` - Iterator of (id, vector) pairs to search
497/// * `query` - The query vector
498/// * `k` - Number of nearest neighbors to return
499/// * `metric` - Distance metric to use
500/// * `predicate` - Filter function; only vectors where this returns true are considered
501///
502/// # Returns
503///
504/// Vector of (id, distance) pairs sorted by distance (ascending).
505pub fn brute_force_knn_filtered<'a, I, F>(
506    vectors: I,
507    query: &[f32],
508    k: usize,
509    metric: DistanceMetric,
510    predicate: F,
511) -> Vec<(NodeId, f32)>
512where
513    I: Iterator<Item = (NodeId, &'a [f32])>,
514    F: Fn(NodeId) -> bool,
515{
516    let mut results: Vec<(NodeId, f32)> = vectors
517        .filter(|(id, _)| predicate(*id))
518        .map(|(id, vec)| (id, compute_distance(query, vec, metric)))
519        .collect();
520
521    results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
522    results.truncate(k);
523    results
524}
525
526/// Computes the distance between a query and multiple vectors in batch.
527///
528/// More efficient than computing distances one by one for large batches.
529///
530/// # Returns
531///
532/// Vector of (id, distance) pairs in the same order as input.
533pub fn batch_distances<'a, I>(
534    vectors: I,
535    query: &[f32],
536    metric: DistanceMetric,
537) -> Vec<(NodeId, f32)>
538where
539    I: Iterator<Item = (NodeId, &'a [f32])>,
540{
541    vectors
542        .map(|(id, vec)| (id, compute_distance(query, vec, metric)))
543        .collect()
544}
545
546#[cfg(test)]
547mod tests {
548    use super::*;
549
550    #[test]
551    fn test_vector_config_default() {
552        let config = VectorConfig::default();
553        assert_eq!(config.dimensions, 384);
554        assert_eq!(config.metric, DistanceMetric::Cosine);
555    }
556
557    #[test]
558    fn test_vector_config_constructors() {
559        let cosine = VectorConfig::cosine(768);
560        assert_eq!(cosine.dimensions, 768);
561        assert_eq!(cosine.metric, DistanceMetric::Cosine);
562
563        let euclidean = VectorConfig::euclidean(1536);
564        assert_eq!(euclidean.dimensions, 1536);
565        assert_eq!(euclidean.metric, DistanceMetric::Euclidean);
566    }
567
568    #[test]
569    fn test_brute_force_knn() {
570        let vectors = vec![
571            (NodeId::new(1), [0.0f32, 0.0, 0.0].as_slice()),
572            (NodeId::new(2), [1.0f32, 0.0, 0.0].as_slice()),
573            (NodeId::new(3), [2.0f32, 0.0, 0.0].as_slice()),
574            (NodeId::new(4), [3.0f32, 0.0, 0.0].as_slice()),
575        ];
576
577        let query = [0.5f32, 0.0, 0.0];
578        let results = brute_force_knn(vectors.into_iter(), &query, 2, DistanceMetric::Euclidean);
579
580        assert_eq!(results.len(), 2);
581        // Closest should be node 1 (dist 0.5) or node 2 (dist 0.5)
582        assert!(results[0].0 == NodeId::new(1) || results[0].0 == NodeId::new(2));
583    }
584
585    #[test]
586    fn test_brute_force_knn_empty() {
587        let vectors: Vec<(NodeId, &[f32])> = vec![];
588        let query = [0.0f32, 0.0];
589        let results = brute_force_knn(vectors.into_iter(), &query, 10, DistanceMetric::Cosine);
590        assert!(results.is_empty());
591    }
592
593    #[test]
594    fn test_brute_force_knn_k_larger_than_n() {
595        let vectors = vec![
596            (NodeId::new(1), [0.0f32, 0.0].as_slice()),
597            (NodeId::new(2), [1.0f32, 0.0].as_slice()),
598        ];
599
600        let query = [0.0f32, 0.0];
601        let results = brute_force_knn(vectors.into_iter(), &query, 10, DistanceMetric::Euclidean);
602
603        // Should return all 2 vectors, not 10
604        assert_eq!(results.len(), 2);
605    }
606
607    #[test]
608    fn test_brute_force_knn_filtered() {
609        let vectors = vec![
610            (NodeId::new(1), [0.0f32, 0.0].as_slice()),
611            (NodeId::new(2), [1.0f32, 0.0].as_slice()),
612            (NodeId::new(3), [2.0f32, 0.0].as_slice()),
613        ];
614
615        let query = [0.0f32, 0.0];
616
617        // Only consider even IDs
618        let results = brute_force_knn_filtered(
619            vectors.into_iter(),
620            &query,
621            10,
622            DistanceMetric::Euclidean,
623            |id| id.as_u64() % 2 == 0,
624        );
625
626        assert_eq!(results.len(), 1);
627        assert_eq!(results[0].0, NodeId::new(2));
628    }
629
630    #[test]
631    fn test_batch_distances() {
632        let vectors = vec![
633            (NodeId::new(1), [0.0f32, 0.0].as_slice()),
634            (NodeId::new(2), [3.0f32, 4.0].as_slice()),
635        ];
636
637        let query = [0.0f32, 0.0];
638        let results = batch_distances(vectors.into_iter(), &query, DistanceMetric::Euclidean);
639
640        assert_eq!(results.len(), 2);
641        assert_eq!(results[0].0, NodeId::new(1));
642        assert!((results[0].1 - 0.0).abs() < 0.001);
643        assert_eq!(results[1].0, NodeId::new(2));
644        assert!((results[1].1 - 5.0).abs() < 0.001); // 3-4-5 triangle
645    }
646
647    // ── VectorIndexKind Quantized dispatch ────────────────────────────
648
649    #[cfg(feature = "vector-index")]
650    mod vector_index_kind_tests {
651        use super::super::*;
652        use std::collections::HashSet;
653
654        /// Minimal accessor that always returns None (quantized indexes
655        /// store vectors internally so the accessor is unused).
656        struct NoopAccessor;
657        impl VectorAccessor for NoopAccessor {
658            fn get_vector(&self, _id: NodeId) -> Option<std::sync::Arc<[f32]>> {
659                None
660            }
661        }
662
663        fn build_quantized_kind(n: usize) -> VectorIndexKind {
664            let config = HnswConfig::new(4, DistanceMetric::Euclidean);
665            let q = QuantizedHnswIndex::new(config, QuantizationType::Scalar);
666            for i in 0..n {
667                let vec: Vec<f32> = (0..4)
668                    .map(|j| ((i * 4 + j) as f32) / (n * 4) as f32)
669                    .collect();
670                q.insert(NodeId::new(i as u64 + 1), &vec);
671            }
672            VectorIndexKind::Quantized(q)
673        }
674
675        #[test]
676        fn quantized_kind_basic_ops() {
677            let kind = build_quantized_kind(20);
678            assert_eq!(kind.len(), 20);
679            assert!(!kind.is_empty());
680            assert!(kind.contains(NodeId::new(1)));
681            assert!(!kind.contains(NodeId::new(999)));
682        }
683
684        #[test]
685        fn quantized_kind_insert_and_search() {
686            let kind = build_quantized_kind(30);
687            let query = vec![0.5, 0.5, 0.0, 0.0];
688            let results = kind.search(&query, 3, &NoopAccessor);
689            assert_eq!(results.len(), 3);
690        }
691
692        #[test]
693        fn quantized_kind_search_with_ef() {
694            let kind = build_quantized_kind(30);
695            let query = vec![0.5, 0.5, 0.0, 0.0];
696            let results = kind.search_with_ef(&query, 3, 50, &NoopAccessor);
697            assert_eq!(results.len(), 3);
698        }
699
700        #[test]
701        fn quantized_kind_search_with_filter() {
702            let kind = build_quantized_kind(30);
703            let allowlist: HashSet<NodeId> = (1..=10).map(NodeId::new).collect();
704            let query = vec![0.1, 0.1, 0.0, 0.0];
705            let results = kind.search_with_filter(&query, 5, &allowlist, &NoopAccessor);
706            assert!(!results.is_empty());
707            for (id, _) in &results {
708                assert!(allowlist.contains(id));
709            }
710        }
711
712        #[test]
713        fn quantized_kind_search_with_ef_and_filter() {
714            let kind = build_quantized_kind(30);
715            let allowlist: HashSet<NodeId> = (5..=15).map(NodeId::new).collect();
716            let query = vec![0.3, 0.3, 0.0, 0.0];
717            let results = kind.search_with_ef_and_filter(&query, 3, 50, &allowlist, &NoopAccessor);
718            for (id, _) in &results {
719                assert!(allowlist.contains(id));
720            }
721        }
722
723        #[test]
724        fn quantized_kind_batch_search() {
725            let kind = build_quantized_kind(30);
726            let queries = vec![vec![0.1, 0.0, 0.0, 0.0], vec![0.9, 0.9, 0.0, 0.0]];
727            let results = kind.batch_search(&queries, 2, &NoopAccessor);
728            assert_eq!(results.len(), 2);
729        }
730
731        #[test]
732        fn quantized_kind_batch_search_with_ef() {
733            let kind = build_quantized_kind(30);
734            let queries = vec![vec![0.1, 0.0, 0.0, 0.0]];
735            let results = kind.batch_search_with_ef(&queries, 2, 50, &NoopAccessor);
736            assert_eq!(results.len(), 1);
737        }
738
739        #[test]
740        fn quantized_kind_batch_search_with_filter() {
741            let kind = build_quantized_kind(30);
742            let allowlist: HashSet<NodeId> = (1..=10).map(NodeId::new).collect();
743            let queries = vec![vec![0.1, 0.0, 0.0, 0.0]];
744            let results = kind.batch_search_with_filter(&queries, 5, &allowlist, &NoopAccessor);
745            assert_eq!(results.len(), 1);
746            for (id, _) in &results[0] {
747                assert!(allowlist.contains(id));
748            }
749        }
750
751        #[test]
752        fn quantized_kind_batch_search_with_ef_and_filter() {
753            let kind = build_quantized_kind(30);
754            let allowlist: HashSet<NodeId> = (1..=15).map(NodeId::new).collect();
755            let queries = vec![vec![0.2, 0.0, 0.0, 0.0]];
756            let results =
757                kind.batch_search_with_ef_and_filter(&queries, 3, 50, &allowlist, &NoopAccessor);
758            assert_eq!(results.len(), 1);
759            for (id, _) in &results[0] {
760                assert!(allowlist.contains(id));
761            }
762        }
763
764        #[test]
765        fn quantized_kind_remove() {
766            let kind = build_quantized_kind(5);
767            assert!(kind.remove(NodeId::new(1)));
768            assert_eq!(kind.len(), 4);
769            assert!(!kind.contains(NodeId::new(1)));
770        }
771
772        #[test]
773        // reason: test indices 0..10 are non-negative
774        #[allow(clippy::cast_sign_loss)]
775        fn quantized_kind_snapshot_restore() {
776            let kind = build_quantized_kind(10);
777            let (entry, level, nodes) = kind.snapshot_topology();
778
779            let config = HnswConfig::new(4, DistanceMetric::Euclidean);
780            let kind2 = VectorIndexKind::Quantized(QuantizedHnswIndex::new(
781                config,
782                QuantizationType::Scalar,
783            ));
784            for i in 0..10 {
785                let vec: Vec<f32> = (0..4).map(|j| ((i * 4 + j) as f32) / 40.0).collect();
786                kind2.insert(NodeId::new(i as u64 + 1), &vec, &NoopAccessor);
787            }
788            kind2.restore_topology(entry, level, nodes);
789            assert_eq!(kind2.len(), 10);
790        }
791
792        #[test]
793        fn quantized_kind_heap_memory() {
794            let kind = build_quantized_kind(10);
795            assert!(kind.heap_memory_bytes() > 0);
796        }
797
798        #[test]
799        fn quantized_kind_type_accessors() {
800            let kind = build_quantized_kind(1);
801            assert!(kind.as_quantized().is_some());
802            assert!(kind.as_hnsw().is_none());
803            assert_eq!(kind.quantization_type(), Some(QuantizationType::Scalar));
804        }
805
806        #[test]
807        fn from_trait_impls() {
808            let config = HnswConfig::new(4, DistanceMetric::Euclidean);
809
810            let hnsw = HnswIndex::new(config.clone());
811            let kind: VectorIndexKind = hnsw.into();
812            assert!(kind.as_hnsw().is_some());
813
814            let quantized = QuantizedHnswIndex::new(config, QuantizationType::Binary);
815            let kind2: VectorIndexKind = quantized.into();
816            assert!(kind2.as_quantized().is_some());
817            assert_eq!(kind2.quantization_type(), Some(QuantizationType::Binary));
818        }
819    }
820}
grafeo_core/index/vector/mod.rs

grafeo_core/index/vector/
mod.rs