lumosai_vector_memory/
index.rs

1//! Memory index implementation
2
3use std::collections::HashMap;
4use chrono::{DateTime, Utc};
5
6use lumosai_vector_core::prelude::*;
7use lumosai_vector_core::traits::{similarity, filter::StandardFilterEvaluator};
8use crate::MemoryConfig;
9
10/// In-memory vector index
11pub struct MemoryIndex {
12    /// Index configuration
13    config: IndexConfig,
14    /// Index creation time
15    created_at: DateTime<Utc>,
16    /// Index last updated time
17    updated_at: DateTime<Utc>,
18    /// Documents stored in the index
19    documents: HashMap<DocumentId, Document>,
20    /// Similarity calculator
21    similarity_calculator: Box<dyn SimilarityCalculator>,
22    /// Filter evaluator
23    filter_evaluator: StandardFilterEvaluator,
24    /// Memory usage tracking
25    memory_usage_bytes: u64,
26}
27
28impl MemoryIndex {
29    /// Create a new memory index
30    pub fn new(config: IndexConfig, _memory_config: &MemoryConfig) -> Result<Self> {
31        let similarity_calculator = similarity::create_calculator(config.metric);
32        
33        Ok(Self {
34            config,
35            created_at: Utc::now(),
36            updated_at: Utc::now(),
37            documents: HashMap::new(),
38            similarity_calculator,
39            filter_evaluator: StandardFilterEvaluator,
40            memory_usage_bytes: 0,
41        })
42    }
43    
44    /// Get index information
45    pub fn get_info(&self) -> IndexInfo {
46        IndexInfo {
47            name: self.config.name.clone(),
48            dimension: self.config.dimension,
49            metric: self.config.metric,
50            vector_count: self.documents.len(),
51            size_bytes: self.memory_usage_bytes,
52            created_at: Some(self.created_at),
53            updated_at: Some(self.updated_at),
54            metadata: HashMap::new(),
55        }
56    }
57    
58    /// Get the dimension of vectors in this index
59    pub fn dimension(&self) -> usize {
60        self.config.dimension
61    }
62    
63    /// Get the number of vectors in this index
64    pub fn vector_count(&self) -> usize {
65        self.documents.len()
66    }
67    
68    /// Get memory usage in bytes
69    pub fn memory_usage(&self) -> u64 {
70        self.memory_usage_bytes
71    }
72    
73    /// Estimate memory usage for a document
74    pub fn estimate_document_memory(&self, document: &Document) -> u64 {
75        let mut size = 0u64;
76        
77        // Document ID
78        size += document.id.len() as u64;
79        
80        // Content
81        size += document.content.len() as u64;
82        
83        // Embedding (if present)
84        if let Some(embedding) = &document.embedding {
85            size += embedding.len() as u64 * 4; // f32 = 4 bytes
86        }
87        
88        // Metadata (rough estimate)
89        for (key, value) in &document.metadata {
90            size += key.len() as u64;
91            size += self.estimate_metadata_value_size(value);
92        }
93        
94        size
95    }
96    
97    fn estimate_metadata_value_size(&self, value: &MetadataValue) -> u64 {
98        match value {
99            MetadataValue::String(s) => s.len() as u64,
100            MetadataValue::Integer(_) => 8,
101            MetadataValue::Float(_) => 8,
102            MetadataValue::Boolean(_) => 1,
103            MetadataValue::Array(arr) => {
104                arr.iter().map(|v| self.estimate_metadata_value_size(v)).sum()
105            },
106            MetadataValue::Object(obj) => {
107                obj.iter().map(|(k, v)| k.len() as u64 + self.estimate_metadata_value_size(v)).sum()
108            },
109            MetadataValue::Null => 0,
110        }
111    }
112    
113    /// Insert or update a document
114    pub fn upsert_document(&mut self, document: Document) -> Result<bool> {
115        let was_new = !self.documents.contains_key(&document.id);
116        
117        if was_new {
118            self.memory_usage_bytes += self.estimate_document_memory(&document);
119        } else {
120            // For updates, we'll just recalculate (could be optimized)
121            if let Some(old_doc) = self.documents.get(&document.id) {
122                self.memory_usage_bytes -= self.estimate_document_memory(old_doc);
123            }
124            self.memory_usage_bytes += self.estimate_document_memory(&document);
125        }
126        
127        self.documents.insert(document.id.clone(), document);
128        self.updated_at = Utc::now();
129        
130        Ok(was_new)
131    }
132    
133    /// Update an existing document
134    pub fn update_document(&mut self, document: Document) -> Result<()> {
135        if !self.documents.contains_key(&document.id) {
136            return Err(VectorError::vector_not_found(&document.id));
137        }
138        
139        // Update memory usage
140        if let Some(old_doc) = self.documents.get(&document.id) {
141            self.memory_usage_bytes -= self.estimate_document_memory(old_doc);
142        }
143        self.memory_usage_bytes += self.estimate_document_memory(&document);
144        
145        self.documents.insert(document.id.clone(), document);
146        self.updated_at = Utc::now();
147        
148        Ok(())
149    }
150    
151    /// Delete a document
152    pub fn delete_document(&mut self, id: &DocumentId) -> Result<Option<Document>> {
153        if let Some(document) = self.documents.remove(id) {
154            self.memory_usage_bytes -= self.estimate_document_memory(&document);
155            self.updated_at = Utc::now();
156            Ok(Some(document))
157        } else {
158            Ok(None)
159        }
160    }
161    
162    /// Get a document by ID
163    pub fn get_document(&self, id: &DocumentId) -> Result<Option<Document>> {
164        Ok(self.documents.get(id).cloned())
165    }
166    
167    /// Search for similar documents
168    pub fn search(&self, request: &SearchRequest) -> Result<Vec<SearchResult>> {
169        let query_vector = match &request.query {
170            SearchQuery::Vector(vector) => {
171                if vector.len() != self.config.dimension {
172                    return Err(VectorError::dimension_mismatch(self.config.dimension, vector.len()));
173                }
174                vector.clone()
175            },
176            SearchQuery::Text(_) => {
177                return Err(VectorError::NotSupported(
178                    "Text queries require an embedding model".to_string()
179                ));
180            },
181        };
182        
183        let mut results = Vec::new();
184        
185        for (id, document) in &self.documents {
186            // Apply filter if provided
187            if let Some(filter) = &request.filter {
188                if !self.filter_evaluator.evaluate(filter, &document.metadata)? {
189                    continue;
190                }
191            }
192            
193            // Calculate similarity
194            if let Some(embedding) = &document.embedding {
195                let score = self.similarity_calculator.calculate_similarity(&query_vector, embedding)?;
196                
197                let mut result = SearchResult::new(id.clone(), score);
198                
199                if request.include_vectors {
200                    result = result.with_vector(embedding.clone());
201                }
202                
203                if request.include_metadata {
204                    result = result.with_metadata(document.metadata.clone());
205                }
206                
207                // Include content if available
208                result = result.with_content(document.content.clone());
209                
210                results.push(result);
211            }
212        }
213        
214        // Sort by score (descending)
215        results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
216        
217        // Limit to top_k
218        results.truncate(request.top_k);
219        
220        Ok(results)
221    }
222}