Skip to main content

oxirs_vec/
vector_index.rs

1//! In-memory vector index implementations and the `VectorIndex` trait.
2
3use anyhow::Result;
4use std::collections::HashMap;
5
6use crate::similarity;
7use crate::Vector;
8use crate::VectorId;
9
10/// Vector index trait for efficient similarity search
11pub trait VectorIndex: Send + Sync {
12    /// Insert a vector with associated URI
13    fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
14
15    /// Find k nearest neighbors
16    fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
17
18    /// Find all vectors within threshold similarity
19    fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
20
21    /// Get a vector by its URI
22    fn get_vector(&self, uri: &str) -> Option<&Vector>;
23
24    /// Add a vector with associated ID and metadata
25    fn add_vector(
26        &mut self,
27        id: VectorId,
28        vector: Vector,
29        _metadata: Option<HashMap<String, String>>,
30    ) -> Result<()> {
31        // Default implementation that delegates to insert
32        self.insert(id, vector)
33    }
34
35    /// Update an existing vector
36    fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
37        // Default implementation that delegates to insert
38        self.insert(id, vector)
39    }
40
41    /// Update metadata for a vector
42    fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
43        // Default implementation (no-op)
44        Ok(())
45    }
46
47    /// Remove a vector by its ID
48    fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
49        // Default implementation (no-op)
50        Ok(())
51    }
52
53    /// Iterate all stored (id, vector) pairs.
54    ///
55    /// The default returns an empty list; concrete index types that hold their
56    /// vectors in memory should override this to enable `save_to_disk`.
57    fn iter_vectors(&self) -> Vec<(String, Vector)> {
58        Vec::new()
59    }
60}
61
62/// In-memory vector index implementation
63pub struct MemoryVectorIndex {
64    vectors: Vec<(String, Vector)>,
65    similarity_config: similarity::SimilarityConfig,
66}
67
68impl MemoryVectorIndex {
69    /// Create a new empty in-memory vector index with default similarity config.
70    pub fn new() -> Self {
71        Self {
72            vectors: Vec::new(),
73            similarity_config: similarity::SimilarityConfig::default(),
74        }
75    }
76
77    /// Create a new in-memory vector index with a custom similarity configuration.
78    pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
79        Self {
80            vectors: Vec::new(),
81            similarity_config: config,
82        }
83    }
84}
85
86impl Default for MemoryVectorIndex {
87    fn default() -> Self {
88        Self::new()
89    }
90}
91
92impl VectorIndex for MemoryVectorIndex {
93    fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
94        // Check if vector already exists and update it
95        if let Some(pos) = self.vectors.iter().position(|(id, _)| id == &uri) {
96            self.vectors[pos] = (uri, vector);
97        } else {
98            self.vectors.push((uri, vector));
99        }
100        Ok(())
101    }
102
103    fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
104        let metric = self.similarity_config.primary_metric;
105        let query_f32 = query.as_f32();
106        let mut similarities: Vec<(String, f32)> = self
107            .vectors
108            .iter()
109            .map(|(uri, vec)| {
110                let vec_f32 = vec.as_f32();
111                let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
112                (uri.clone(), sim)
113            })
114            .collect();
115
116        similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
117        similarities.truncate(k);
118
119        Ok(similarities)
120    }
121
122    fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
123        let metric = self.similarity_config.primary_metric;
124        let query_f32 = query.as_f32();
125        let similarities: Vec<(String, f32)> = self
126            .vectors
127            .iter()
128            .filter_map(|(uri, vec)| {
129                let vec_f32 = vec.as_f32();
130                let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
131                if sim >= threshold {
132                    Some((uri.clone(), sim))
133                } else {
134                    None
135                }
136            })
137            .collect();
138
139        Ok(similarities)
140    }
141
142    fn get_vector(&self, uri: &str) -> Option<&Vector> {
143        self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
144    }
145
146    fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
147        if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
148            self.vectors[pos] = (id, vector);
149            Ok(())
150        } else {
151            Err(anyhow::anyhow!("Vector with id '{}' not found", id))
152        }
153    }
154
155    fn remove_vector(&mut self, id: VectorId) -> Result<()> {
156        if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
157            self.vectors.remove(pos);
158            Ok(())
159        } else {
160            Err(anyhow::anyhow!("Vector with id '{}' not found", id))
161        }
162    }
163
164    fn iter_vectors(&self) -> Vec<(String, Vector)> {
165        self.vectors.clone()
166    }
167}