lumosai_vector_core/
types.rs

1//! Core types for the Lumos vector storage system
2
3use std::collections::HashMap;
4use uuid::Uuid;
5
6#[cfg(feature = "serde")]
7use serde::{Deserialize, Serialize};
8
9// Add chrono dependency for timestamps
10#[cfg(feature = "serde")]
11use chrono;
12
13/// Vector type alias for f32 vectors
14pub type Vector = Vec<f32>;
15
16/// Document ID type
17pub type DocumentId = String;
18
19/// Metadata type for storing arbitrary key-value pairs
20pub type Metadata = HashMap<String, MetadataValue>;
21
22/// Metadata value that can hold various types
23#[derive(Debug, Clone, PartialEq)]
24#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
25#[cfg_attr(feature = "serde", serde(untagged))]
26pub enum MetadataValue {
27    String(String),
28    Integer(i64),
29    Float(f64),
30    Boolean(bool),
31    Array(Vec<MetadataValue>),
32    Object(HashMap<String, MetadataValue>),
33    Null,
34}
35
36impl From<String> for MetadataValue {
37    fn from(s: String) -> Self {
38        MetadataValue::String(s)
39    }
40}
41
42impl From<&str> for MetadataValue {
43    fn from(s: &str) -> Self {
44        MetadataValue::String(s.to_string())
45    }
46}
47
48impl From<i64> for MetadataValue {
49    fn from(i: i64) -> Self {
50        MetadataValue::Integer(i)
51    }
52}
53
54impl From<f64> for MetadataValue {
55    fn from(f: f64) -> Self {
56        MetadataValue::Float(f)
57    }
58}
59
60impl From<bool> for MetadataValue {
61    fn from(b: bool) -> Self {
62        MetadataValue::Boolean(b)
63    }
64}
65
66/// Similarity metrics for vector comparison
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
68#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
69pub enum SimilarityMetric {
70    /// Cosine similarity (normalized dot product)
71    Cosine,
72    /// Euclidean distance (L2 norm)
73    Euclidean,
74    /// Dot product similarity
75    DotProduct,
76    /// Manhattan distance (L1 norm)
77    Manhattan,
78    /// Hamming distance (for binary vectors)
79    Hamming,
80}
81
82impl Default for SimilarityMetric {
83    fn default() -> Self {
84        SimilarityMetric::Cosine
85    }
86}
87
88/// Filter conditions for querying vectors
89#[derive(Debug, Clone, PartialEq)]
90#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
91pub enum FilterCondition {
92    /// Equality filter: field == value
93    Eq(String, MetadataValue),
94    /// Not equal filter: field != value
95    Ne(String, MetadataValue),
96    /// Greater than filter: field > value
97    Gt(String, MetadataValue),
98    /// Greater than or equal filter: field >= value
99    Gte(String, MetadataValue),
100    /// Less than filter: field < value
101    Lt(String, MetadataValue),
102    /// Less than or equal filter: field <= value
103    Lte(String, MetadataValue),
104    /// In filter: field in [values]
105    In(String, Vec<MetadataValue>),
106    /// Not in filter: field not in [values]
107    NotIn(String, Vec<MetadataValue>),
108    /// Exists filter: field exists
109    Exists(String),
110    /// Not exists filter: field does not exist
111    NotExists(String),
112    /// Text contains filter: field contains substring
113    Contains(String, String),
114    /// Text starts with filter: field starts with prefix
115    StartsWith(String, String),
116    /// Text ends with filter: field ends with suffix
117    EndsWith(String, String),
118    /// Regex match filter: field matches regex
119    Regex(String, String),
120    /// Logical AND: all conditions must be true
121    And(Vec<FilterCondition>),
122    /// Logical OR: at least one condition must be true
123    Or(Vec<FilterCondition>),
124    /// Logical NOT: condition must be false
125    Not(Box<FilterCondition>),
126}
127
128impl FilterCondition {
129    /// Create an equality filter
130    pub fn eq(field: impl Into<String>, value: impl Into<MetadataValue>) -> Self {
131        FilterCondition::Eq(field.into(), value.into())
132    }
133    
134    /// Create a not equal filter
135    pub fn ne(field: impl Into<String>, value: impl Into<MetadataValue>) -> Self {
136        FilterCondition::Ne(field.into(), value.into())
137    }
138    
139    /// Create a greater than filter
140    pub fn gt(field: impl Into<String>, value: impl Into<MetadataValue>) -> Self {
141        FilterCondition::Gt(field.into(), value.into())
142    }
143    
144    /// Create a less than filter
145    pub fn lt(field: impl Into<String>, value: impl Into<MetadataValue>) -> Self {
146        FilterCondition::Lt(field.into(), value.into())
147    }
148    
149    /// Create an in filter
150    pub fn in_values(field: impl Into<String>, values: Vec<impl Into<MetadataValue>>) -> Self {
151        FilterCondition::In(
152            field.into(),
153            values.into_iter().map(|v| v.into()).collect(),
154        )
155    }
156    
157    /// Create an AND filter
158    pub fn and(conditions: Vec<FilterCondition>) -> Self {
159        FilterCondition::And(conditions)
160    }
161    
162    /// Create an OR filter
163    pub fn or(conditions: Vec<FilterCondition>) -> Self {
164        FilterCondition::Or(conditions)
165    }
166    
167    /// Create a NOT filter
168    pub fn not(condition: FilterCondition) -> Self {
169        FilterCondition::Not(Box::new(condition))
170    }
171    
172    /// Create an exists filter
173    pub fn exists(field: impl Into<String>) -> Self {
174        FilterCondition::Exists(field.into())
175    }
176    
177    /// Create a contains filter
178    pub fn contains(field: impl Into<String>, substring: impl Into<String>) -> Self {
179        FilterCondition::Contains(field.into(), substring.into())
180    }
181}
182
183/// Index configuration
184#[derive(Debug, Clone)]
185#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
186pub struct IndexConfig {
187    /// Index name
188    pub name: String,
189    /// Vector dimension
190    pub dimension: usize,
191    /// Similarity metric
192    pub metric: SimilarityMetric,
193    /// Optional index-specific configuration
194    pub options: HashMap<String, MetadataValue>,
195}
196
197impl IndexConfig {
198    /// Create a new index configuration
199    pub fn new(name: impl Into<String>, dimension: usize) -> Self {
200        Self {
201            name: name.into(),
202            dimension,
203            metric: SimilarityMetric::default(),
204            options: HashMap::new(),
205        }
206    }
207    
208    /// Set the similarity metric
209    pub fn with_metric(mut self, metric: SimilarityMetric) -> Self {
210        self.metric = metric;
211        self
212    }
213    
214    /// Add an option
215    pub fn with_option(mut self, key: impl Into<String>, value: impl Into<MetadataValue>) -> Self {
216        self.options.insert(key.into(), value.into());
217        self
218    }
219}
220
221/// Index statistics and information
222#[derive(Debug, Clone)]
223#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
224pub struct IndexInfo {
225    /// Index name
226    pub name: String,
227    /// Vector dimension
228    pub dimension: usize,
229    /// Similarity metric
230    pub metric: SimilarityMetric,
231    /// Number of vectors in the index
232    pub vector_count: usize,
233    /// Index size in bytes
234    pub size_bytes: u64,
235    /// Index creation timestamp
236    pub created_at: Option<chrono::DateTime<chrono::Utc>>,
237    /// Index last updated timestamp
238    pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
239    /// Additional index metadata
240    pub metadata: Metadata,
241}
242
243/// Document representation with embedding support
244#[derive(Debug, Clone)]
245#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
246pub struct Document {
247    /// Document ID
248    pub id: DocumentId,
249    /// Document content/text
250    pub content: String,
251    /// Vector embedding (optional)
252    pub embedding: Option<Vector>,
253    /// Document metadata
254    pub metadata: Metadata,
255}
256
257impl Document {
258    /// Create a new document
259    pub fn new(id: impl Into<DocumentId>, content: impl Into<String>) -> Self {
260        Self {
261            id: id.into(),
262            content: content.into(),
263            embedding: None,
264            metadata: HashMap::new(),
265        }
266    }
267    
268    /// Create a new document with auto-generated ID
269    pub fn with_content(content: impl Into<String>) -> Self {
270        Self::new(Uuid::new_v4().to_string(), content)
271    }
272    
273    /// Set the embedding
274    pub fn with_embedding(mut self, embedding: Vector) -> Self {
275        self.embedding = Some(embedding);
276        self
277    }
278    
279    /// Add metadata
280    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<MetadataValue>) -> Self {
281        self.metadata.insert(key.into(), value.into());
282        self
283    }
284    
285    /// Set all metadata
286    pub fn with_all_metadata(mut self, metadata: Metadata) -> Self {
287        self.metadata = metadata;
288        self
289    }
290}
291
292/// Search request for querying vectors
293#[derive(Debug, Clone)]
294#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
295pub struct SearchRequest {
296    /// Index name to search
297    pub index_name: String,
298    /// Query vector or text
299    pub query: SearchQuery,
300    /// Number of results to return
301    pub top_k: usize,
302    /// Optional filter conditions
303    pub filter: Option<FilterCondition>,
304    /// Whether to include vectors in results
305    pub include_vectors: bool,
306    /// Whether to include metadata in results
307    pub include_metadata: bool,
308    /// Search options
309    pub options: HashMap<String, MetadataValue>,
310}
311
312impl SearchRequest {
313    /// Create a new search request with a vector query
314    pub fn new(index_name: impl Into<String>, vector: Vector) -> Self {
315        Self {
316            index_name: index_name.into(),
317            query: SearchQuery::Vector(vector),
318            top_k: 10,
319            filter: None,
320            include_vectors: false,
321            include_metadata: true,
322            options: HashMap::new(),
323        }
324    }
325
326    /// Create a new search request with a text query
327    pub fn new_text(index_name: impl Into<String>, text: impl Into<String>) -> Self {
328        Self {
329            index_name: index_name.into(),
330            query: SearchQuery::Text(text.into()),
331            top_k: 10,
332            filter: None,
333            include_vectors: false,
334            include_metadata: true,
335            options: HashMap::new(),
336        }
337    }
338
339    /// Set the number of results to return
340    pub fn with_top_k(mut self, top_k: usize) -> Self {
341        self.top_k = top_k;
342        self
343    }
344
345    /// Set the filter condition
346    pub fn with_filter(mut self, filter: FilterCondition) -> Self {
347        self.filter = Some(filter);
348        self
349    }
350
351    /// Set whether to include vectors in results
352    pub fn with_include_vectors(mut self, include: bool) -> Self {
353        self.include_vectors = include;
354        self
355    }
356
357    /// Set whether to include metadata in results
358    pub fn with_include_metadata(mut self, include: bool) -> Self {
359        self.include_metadata = include;
360        self
361    }
362
363    /// Add a search option
364    pub fn with_option(mut self, key: impl Into<String>, value: MetadataValue) -> Self {
365        self.options.insert(key.into(), value);
366        self
367    }
368}
369
370/// Search query can be either a vector or text
371#[derive(Debug, Clone)]
372#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
373pub enum SearchQuery {
374    /// Vector query
375    Vector(Vector),
376    /// Text query (requires embedding model)
377    Text(String),
378}
379
380/// Search result item
381#[derive(Debug, Clone)]
382#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
383pub struct SearchResult {
384    /// Document ID
385    pub id: DocumentId,
386    /// Similarity score
387    pub score: f32,
388    /// Document vector (if requested)
389    pub vector: Option<Vector>,
390    /// Document metadata (if requested)
391    pub metadata: Option<Metadata>,
392    /// Document content (if available)
393    pub content: Option<String>,
394}
395
396impl SearchResult {
397    /// Create a new search result
398    pub fn new(id: impl Into<DocumentId>, score: f32) -> Self {
399        Self {
400            id: id.into(),
401            score,
402            vector: None,
403            metadata: None,
404            content: None,
405        }
406    }
407    
408    /// Set the vector
409    pub fn with_vector(mut self, vector: Vector) -> Self {
410        self.vector = Some(vector);
411        self
412    }
413    
414    /// Set the metadata
415    pub fn with_metadata(mut self, metadata: Metadata) -> Self {
416        self.metadata = Some(metadata);
417        self
418    }
419    
420    /// Set the content
421    pub fn with_content(mut self, content: impl Into<String>) -> Self {
422        self.content = Some(content.into());
423        self
424    }
425}
426
427/// Search response
428#[derive(Debug, Clone)]
429#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
430pub struct SearchResponse {
431    /// Search results
432    pub results: Vec<SearchResult>,
433    /// Total number of results (before pagination)
434    pub total_count: Option<usize>,
435    /// Search execution time in milliseconds
436    pub execution_time_ms: Option<u64>,
437    /// Additional response metadata
438    pub metadata: Metadata,
439}
440
441impl SearchResponse {
442    /// Create a new search response
443    pub fn new(results: Vec<SearchResult>) -> Self {
444        Self {
445            results,
446            total_count: None,
447            execution_time_ms: None,
448            metadata: HashMap::new(),
449        }
450    }
451    
452    /// Set the total count
453    pub fn with_total_count(mut self, count: usize) -> Self {
454        self.total_count = Some(count);
455        self
456    }
457    
458    /// Set the execution time
459    pub fn with_execution_time(mut self, time_ms: u64) -> Self {
460        self.execution_time_ms = Some(time_ms);
461        self
462    }
463}