lumosai_vector_memory/
index.rs1use std::collections::HashMap;
4use chrono::{DateTime, Utc};
5
6use lumosai_vector_core::prelude::*;
7use lumosai_vector_core::traits::{similarity, filter::StandardFilterEvaluator};
8use crate::MemoryConfig;
9
10pub struct MemoryIndex {
12 config: IndexConfig,
14 created_at: DateTime<Utc>,
16 updated_at: DateTime<Utc>,
18 documents: HashMap<DocumentId, Document>,
20 similarity_calculator: Box<dyn SimilarityCalculator>,
22 filter_evaluator: StandardFilterEvaluator,
24 memory_usage_bytes: u64,
26}
27
28impl MemoryIndex {
29 pub fn new(config: IndexConfig, _memory_config: &MemoryConfig) -> Result<Self> {
31 let similarity_calculator = similarity::create_calculator(config.metric);
32
33 Ok(Self {
34 config,
35 created_at: Utc::now(),
36 updated_at: Utc::now(),
37 documents: HashMap::new(),
38 similarity_calculator,
39 filter_evaluator: StandardFilterEvaluator,
40 memory_usage_bytes: 0,
41 })
42 }
43
44 pub fn get_info(&self) -> IndexInfo {
46 IndexInfo {
47 name: self.config.name.clone(),
48 dimension: self.config.dimension,
49 metric: self.config.metric,
50 vector_count: self.documents.len(),
51 size_bytes: self.memory_usage_bytes,
52 created_at: Some(self.created_at),
53 updated_at: Some(self.updated_at),
54 metadata: HashMap::new(),
55 }
56 }
57
58 pub fn dimension(&self) -> usize {
60 self.config.dimension
61 }
62
63 pub fn vector_count(&self) -> usize {
65 self.documents.len()
66 }
67
68 pub fn memory_usage(&self) -> u64 {
70 self.memory_usage_bytes
71 }
72
73 pub fn estimate_document_memory(&self, document: &Document) -> u64 {
75 let mut size = 0u64;
76
77 size += document.id.len() as u64;
79
80 size += document.content.len() as u64;
82
83 if let Some(embedding) = &document.embedding {
85 size += embedding.len() as u64 * 4; }
87
88 for (key, value) in &document.metadata {
90 size += key.len() as u64;
91 size += self.estimate_metadata_value_size(value);
92 }
93
94 size
95 }
96
97 fn estimate_metadata_value_size(&self, value: &MetadataValue) -> u64 {
98 match value {
99 MetadataValue::String(s) => s.len() as u64,
100 MetadataValue::Integer(_) => 8,
101 MetadataValue::Float(_) => 8,
102 MetadataValue::Boolean(_) => 1,
103 MetadataValue::Array(arr) => {
104 arr.iter().map(|v| self.estimate_metadata_value_size(v)).sum()
105 },
106 MetadataValue::Object(obj) => {
107 obj.iter().map(|(k, v)| k.len() as u64 + self.estimate_metadata_value_size(v)).sum()
108 },
109 MetadataValue::Null => 0,
110 }
111 }
112
113 pub fn upsert_document(&mut self, document: Document) -> Result<bool> {
115 let was_new = !self.documents.contains_key(&document.id);
116
117 if was_new {
118 self.memory_usage_bytes += self.estimate_document_memory(&document);
119 } else {
120 if let Some(old_doc) = self.documents.get(&document.id) {
122 self.memory_usage_bytes -= self.estimate_document_memory(old_doc);
123 }
124 self.memory_usage_bytes += self.estimate_document_memory(&document);
125 }
126
127 self.documents.insert(document.id.clone(), document);
128 self.updated_at = Utc::now();
129
130 Ok(was_new)
131 }
132
133 pub fn update_document(&mut self, document: Document) -> Result<()> {
135 if !self.documents.contains_key(&document.id) {
136 return Err(VectorError::vector_not_found(&document.id));
137 }
138
139 if let Some(old_doc) = self.documents.get(&document.id) {
141 self.memory_usage_bytes -= self.estimate_document_memory(old_doc);
142 }
143 self.memory_usage_bytes += self.estimate_document_memory(&document);
144
145 self.documents.insert(document.id.clone(), document);
146 self.updated_at = Utc::now();
147
148 Ok(())
149 }
150
151 pub fn delete_document(&mut self, id: &DocumentId) -> Result<Option<Document>> {
153 if let Some(document) = self.documents.remove(id) {
154 self.memory_usage_bytes -= self.estimate_document_memory(&document);
155 self.updated_at = Utc::now();
156 Ok(Some(document))
157 } else {
158 Ok(None)
159 }
160 }
161
162 pub fn get_document(&self, id: &DocumentId) -> Result<Option<Document>> {
164 Ok(self.documents.get(id).cloned())
165 }
166
167 pub fn search(&self, request: &SearchRequest) -> Result<Vec<SearchResult>> {
169 let query_vector = match &request.query {
170 SearchQuery::Vector(vector) => {
171 if vector.len() != self.config.dimension {
172 return Err(VectorError::dimension_mismatch(self.config.dimension, vector.len()));
173 }
174 vector.clone()
175 },
176 SearchQuery::Text(_) => {
177 return Err(VectorError::NotSupported(
178 "Text queries require an embedding model".to_string()
179 ));
180 },
181 };
182
183 let mut results = Vec::new();
184
185 for (id, document) in &self.documents {
186 if let Some(filter) = &request.filter {
188 if !self.filter_evaluator.evaluate(filter, &document.metadata)? {
189 continue;
190 }
191 }
192
193 if let Some(embedding) = &document.embedding {
195 let score = self.similarity_calculator.calculate_similarity(&query_vector, embedding)?;
196
197 let mut result = SearchResult::new(id.clone(), score);
198
199 if request.include_vectors {
200 result = result.with_vector(embedding.clone());
201 }
202
203 if request.include_metadata {
204 result = result.with_metadata(document.metadata.clone());
205 }
206
207 result = result.with_content(document.content.clone());
209
210 results.push(result);
211 }
212 }
213
214 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
216
217 results.truncate(request.top_k);
219
220 Ok(results)
221 }
222}