1use std::collections::HashMap;
8use std::time::Duration;
9
10use common::StorageConfig;
11use serde::{Deserialize, Serialize};
12
13pub use crate::serde::FieldType;
15pub use crate::serde::collection_meta::DistanceMetric;
16
17pub const VECTOR_FIELD_NAME: &str = "vector";
19
20#[derive(Debug, Clone)]
42pub struct Vector {
43 pub id: String,
45
46 pub attributes: Vec<Attribute>,
48}
49
50impl Vector {
51 pub fn new(id: impl Into<String>, values: Vec<f32>) -> Self {
55 Self {
56 id: id.into(),
57 attributes: vec![Attribute::new(
58 VECTOR_FIELD_NAME,
59 AttributeValue::Vector(values),
60 )],
61 }
62 }
63
64 pub fn builder(id: impl Into<String>, values: Vec<f32>) -> VectorBuilder {
68 VectorBuilder {
69 id: id.into(),
70 attributes: vec![Attribute::new(
71 VECTOR_FIELD_NAME,
72 AttributeValue::Vector(values),
73 )],
74 }
75 }
76
77 pub fn attribute(&self, name: &str) -> Option<&AttributeValue> {
78 self.attributes
79 .iter()
80 .filter(|a| a.name == name)
81 .map(|a| &a.value)
82 .next()
83 }
84}
85
86#[derive(Debug)]
88pub struct VectorBuilder {
89 id: String,
90 attributes: Vec<Attribute>,
91}
92
93impl VectorBuilder {
94 pub fn attribute(mut self, name: impl Into<String>, value: impl Into<AttributeValue>) -> Self {
96 self.attributes
97 .push(Attribute::new(name.into(), value.into()));
98 self
99 }
100
101 pub fn build(self) -> Vector {
103 Vector {
104 id: self.id,
105 attributes: self.attributes,
106 }
107 }
108}
109
110#[derive(Debug, Clone, PartialEq)]
116pub struct Attribute {
117 pub name: String,
118 pub value: AttributeValue,
119}
120
121impl Attribute {
122 pub fn new(name: impl Into<String>, value: AttributeValue) -> Self {
123 Self {
124 name: name.into(),
125 value,
126 }
127 }
128}
129
130#[derive(Debug, Clone, PartialEq)]
135pub enum AttributeValue {
136 String(String),
137 Int64(i64),
138 Float64(f64),
139 Bool(bool),
140 Vector(Vec<f32>),
141}
142
143impl From<String> for AttributeValue {
145 fn from(s: String) -> Self {
146 AttributeValue::String(s)
147 }
148}
149
150impl From<&str> for AttributeValue {
151 fn from(s: &str) -> Self {
152 AttributeValue::String(s.to_string())
153 }
154}
155
156impl From<i64> for AttributeValue {
157 fn from(v: i64) -> Self {
158 AttributeValue::Int64(v)
159 }
160}
161
162impl From<f64> for AttributeValue {
163 fn from(v: f64) -> Self {
164 AttributeValue::Float64(v)
165 }
166}
167
168impl From<bool> for AttributeValue {
169 fn from(v: bool) -> Self {
170 AttributeValue::Bool(v)
171 }
172}
173
174impl From<crate::serde::FieldValue> for AttributeValue {
175 fn from(field: crate::serde::FieldValue) -> Self {
176 match field {
177 crate::serde::FieldValue::String(s) => AttributeValue::String(s),
178 crate::serde::FieldValue::Int64(v) => AttributeValue::Int64(v),
179 crate::serde::FieldValue::Float64(v) => AttributeValue::Float64(v),
180 crate::serde::FieldValue::Bool(v) => AttributeValue::Bool(v),
181 crate::serde::FieldValue::Vector(v) => AttributeValue::Vector(v),
182 }
183 }
184}
185
186#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct Config {
189 pub storage: StorageConfig,
194
195 pub dimensions: u16,
200
201 pub distance_metric: DistanceMetric,
203
204 #[serde(with = "duration_secs")]
206 pub flush_interval: Duration,
207
208 pub split_threshold_vectors: usize,
210
211 pub merge_threshold_vectors: usize,
213
214 pub split_search_neighbourhood: usize,
216
217 pub max_pending_and_running_rebalance_tasks: usize,
220
221 pub rebalance_backpressure_resume_threshold: usize,
224
225 pub max_rebalance_tasks: usize,
227
228 pub chunk_target: u16,
230
231 pub query_pruning_factor: Option<f32>,
241
242 pub metadata_fields: Vec<MetadataFieldSpec>,
248}
249
250impl Default for Config {
251 fn default() -> Self {
252 Self {
253 storage: StorageConfig::InMemory,
254 dimensions: 0, distance_metric: DistanceMetric::L2,
256 flush_interval: Duration::from_secs(60),
257 split_threshold_vectors: 2_000,
258 merge_threshold_vectors: 500,
259 split_search_neighbourhood: 16,
260 max_pending_and_running_rebalance_tasks: 16,
261 rebalance_backpressure_resume_threshold: 8,
262 max_rebalance_tasks: 8,
263 chunk_target: 4096,
264 query_pruning_factor: None,
265 metadata_fields: Vec::new(),
266 }
267 }
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize)]
272pub struct ReaderConfig {
273 pub storage: StorageConfig,
275
276 pub dimensions: u16,
278
279 pub distance_metric: DistanceMetric,
281
282 pub query_pruning_factor: Option<f32>,
286
287 pub metadata_fields: Vec<MetadataFieldSpec>,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
293pub struct MetadataFieldSpec {
294 pub name: String,
296
297 pub field_type: FieldType,
299
300 pub indexed: bool,
303}
304
305impl MetadataFieldSpec {
306 pub fn new(name: impl Into<String>, field_type: FieldType, indexed: bool) -> Self {
307 Self {
308 name: name.into(),
309 field_type,
310 indexed,
311 }
312 }
313}
314
315#[derive(Debug, Clone)]
317pub struct SearchResult {
318 pub score: f32,
323 pub vector: Vector,
325}
326
327#[derive(Debug, Clone, PartialEq)]
332pub enum FieldSelection {
333 All,
335 None,
337 Fields(Vec<String>),
340}
341
342impl From<bool> for FieldSelection {
343 fn from(include: bool) -> Self {
344 if include {
345 FieldSelection::All
346 } else {
347 FieldSelection::None
348 }
349 }
350}
351
352impl From<Vec<&str>> for FieldSelection {
353 fn from(fields: Vec<&str>) -> Self {
354 FieldSelection::Fields(fields.into_iter().map(String::from).collect())
355 }
356}
357
358impl From<Vec<String>> for FieldSelection {
359 fn from(fields: Vec<String>) -> Self {
360 FieldSelection::Fields(fields)
361 }
362}
363
364#[derive(Debug, Clone)]
375pub struct Query {
376 pub vector: Vec<f32>,
378 pub limit: usize,
380 pub filter: Option<Filter>,
382 pub include_fields: FieldSelection,
384}
385
386impl Query {
387 pub fn new(vector: Vec<f32>) -> Self {
389 Self {
390 vector,
391 limit: 10,
392 filter: None,
393 include_fields: FieldSelection::All,
394 }
395 }
396
397 pub fn with_limit(mut self, limit: usize) -> Self {
399 self.limit = limit;
400 self
401 }
402
403 pub fn with_filter(mut self, filter: Filter) -> Self {
405 self.filter = Some(filter);
406 self
407 }
408
409 pub fn with_fields(mut self, fields: impl Into<FieldSelection>) -> Self {
413 self.include_fields = fields.into();
414 self
415 }
416}
417
418#[derive(Debug, Clone, PartialEq)]
423pub enum Filter {
424 Eq(String, AttributeValue),
426 Neq(String, AttributeValue),
428 In(String, Vec<AttributeValue>),
430 And(Vec<Filter>),
432 Or(Vec<Filter>),
434}
435
436impl Filter {
437 pub fn eq(field: impl Into<String>, value: impl Into<AttributeValue>) -> Self {
439 Filter::Eq(field.into(), value.into())
440 }
441
442 pub fn neq(field: impl Into<String>, value: impl Into<AttributeValue>) -> Self {
444 Filter::Neq(field.into(), value.into())
445 }
446
447 pub fn in_set(field: impl Into<String>, values: Vec<AttributeValue>) -> Self {
449 Filter::In(field.into(), values)
450 }
451
452 pub fn and(filters: Vec<Filter>) -> Self {
454 Filter::And(filters)
455 }
456
457 pub fn or(filters: Vec<Filter>) -> Self {
459 Filter::Or(filters)
460 }
461}
462
463pub(crate) fn attributes_to_map(attributes: &[Attribute]) -> HashMap<String, AttributeValue> {
465 attributes
466 .iter()
467 .map(|attr| (attr.name.clone(), attr.value.clone()))
468 .collect()
469}
470
471mod duration_secs {
472 use std::time::Duration;
473
474 use serde::{Deserialize, Deserializer, Serializer};
475
476 pub fn serialize<S: Serializer>(d: &Duration, s: S) -> Result<S::Ok, S::Error> {
477 s.serialize_u64(d.as_secs())
478 }
479
480 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Duration, D::Error> {
481 let secs = u64::deserialize(d)?;
482 Ok(Duration::from_secs(secs))
483 }
484}
485
486#[cfg(test)]
487mod tests {
488 use super::*;
489
490 #[test]
491 fn should_create_vector_with_builder() {
492 let vector = Vector::builder("test-id", vec![1.0, 2.0, 3.0])
494 .attribute("category", "test")
495 .attribute("count", 42i64)
496 .attribute("score", 0.95)
497 .attribute("enabled", true)
498 .build();
499
500 assert_eq!(vector.id, "test-id");
502 assert_eq!(vector.attributes.len(), 5);
504 assert_eq!(vector.attributes[0].name, "vector");
506 assert_eq!(
507 vector.attributes[0].value,
508 AttributeValue::Vector(vec![1.0, 2.0, 3.0])
509 );
510 assert_eq!(vector.attributes[1].name, "category");
512 assert_eq!(
513 vector.attributes[1].value,
514 AttributeValue::String("test".to_string())
515 );
516 }
517
518 #[test]
519 fn should_create_vector_without_extra_attributes() {
520 let vector = Vector::new("test-id", vec![1.0, 2.0, 3.0]);
522
523 assert_eq!(vector.id, "test-id");
525 assert_eq!(vector.attributes.len(), 1);
527 assert_eq!(vector.attributes[0].name, "vector");
528 assert_eq!(
529 vector.attributes[0].value,
530 AttributeValue::Vector(vec![1.0, 2.0, 3.0])
531 );
532 }
533
534 #[test]
535 fn should_convert_str_to_attribute_value() {
536 let value: AttributeValue = "test".into();
538
539 assert_eq!(value, AttributeValue::String("test".to_string()));
541 }
542
543 #[test]
544 fn should_convert_int_to_attribute_value() {
545 let value: AttributeValue = 42i64.into();
547
548 assert_eq!(value, AttributeValue::Int64(42));
550 }
551
552 #[test]
553 fn should_convert_attributes_to_map() {
554 let attributes = vec![
556 Attribute::new("name", AttributeValue::String("test".to_string())),
557 Attribute::new("count", AttributeValue::Int64(42)),
558 ];
559
560 let map = attributes_to_map(&attributes);
562
563 assert_eq!(map.len(), 2);
565 assert_eq!(
566 map.get("name"),
567 Some(&AttributeValue::String("test".to_string()))
568 );
569 assert_eq!(map.get("count"), Some(&AttributeValue::Int64(42)));
570 }
571}