use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::error::{VectorError, VectorResult};
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DistanceMetric {
Cosine,
Euclidean,
DotProduct,
}
impl DistanceMetric {
pub fn compute(&self, a: &[f32], b: &[f32]) -> f32 {
match self {
DistanceMetric::Cosine => {
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if na == 0.0 || nb == 0.0 {
1.0
} else {
1.0 - dot / (na * nb)
}
}
DistanceMetric::Euclidean => a
.iter()
.zip(b.iter())
.map(|(x, y)| (x - y) * (x - y))
.sum::<f32>()
.sqrt(),
DistanceMetric::DotProduct => -a.iter().zip(b.iter()).map(|(x, y)| x * y).sum::<f32>(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IndexType {
HNSW,
Flat,
}
impl IndexType {
pub fn auto_select(vector_count: usize) -> Self {
if vector_count < 1_000 {
IndexType::Flat
} else {
IndexType::HNSW
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct VectorRecord {
pub id: Uuid,
pub collection: String,
pub vector: Vec<f32>,
pub metadata: serde_json::Value,
pub text: Option<String>,
pub created_at: DateTime<Utc>,
}
impl VectorRecord {
pub fn new(collection: impl Into<String>, vector: Vec<f32>) -> Self {
VectorRecord {
id: Uuid::new_v4(),
collection: collection.into(),
vector,
metadata: serde_json::json!({}),
text: None,
created_at: Utc::now(),
}
}
pub fn with_text(mut self, text: impl Into<String>) -> Self {
self.text = Some(text.into());
self
}
pub fn with_metadata(mut self, meta: serde_json::Value) -> Self {
self.metadata = meta;
self
}
pub fn dimensions(&self) -> usize {
self.vector.len()
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Collection {
pub workspace_id: String,
pub name: String,
pub dimensions: usize,
pub distance: DistanceMetric,
pub index_type: IndexType,
pub created_at: DateTime<Utc>,
pub vector_count: u64,
pub metadata: serde_json::Value,
pub ef_construction: usize,
pub m_connections: usize,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SearchResult {
pub id: Uuid,
pub score: f32,
pub vector: Option<Vec<f32>>,
pub metadata: serde_json::Value,
pub text: Option<String>,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct SearchMetrics {
pub query_vector_dims: usize,
pub candidates_evaluated: usize,
pub post_filter_count: usize,
pub latency_us: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct SearchResponse {
pub results: Vec<SearchResult>,
pub metrics: SearchMetrics,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "op", rename_all = "snake_case")]
pub enum MetadataFilter {
Eq {
key: String,
value: serde_json::Value,
},
Gt {
key: String,
value: f64,
},
Lt {
key: String,
value: f64,
},
Contains {
key: String,
value: String,
},
In {
key: String,
values: Vec<serde_json::Value>,
},
Exists {
key: String,
},
And(Vec<MetadataFilter>),
Or(Vec<MetadataFilter>),
Not(Box<MetadataFilter>),
}
impl MetadataFilter {
pub fn matches(&self, metadata: &serde_json::Value) -> bool {
crate::search::filters::apply_filter(self, metadata)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum RerankerConfig {
None,
Diversity {
lambda: f32,
weight: f32,
},
Recency {
boost: f32,
half_life_days: f32,
weight: f32,
},
Composite(Vec<RerankerConfig>),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SearchQuery {
pub collection: String,
pub vector: Vec<f32>,
pub top_k: usize,
pub filter: Option<MetadataFilter>,
pub include_vectors: bool,
pub include_metadata: bool,
pub ef_search: Option<usize>,
pub reranker: Option<RerankerConfig>,
}
impl SearchQuery {
pub fn validate(&self) -> VectorResult<()> {
if self.collection.is_empty() {
return Err(VectorError::SearchError(
"collection name must not be empty".into(),
));
}
if self.vector.is_empty() {
return Err(VectorError::SearchError(
"query vector must not be empty".into(),
));
}
if self.top_k == 0 {
return Err(VectorError::SearchError("top_k must be > 0".into()));
}
if let Some(filter) = &self.filter {
crate::search::filters::validate_filter(filter)?;
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct HybridQuery {
pub collection: String,
pub vector: Vec<f32>,
pub text: Option<String>,
pub top_k: usize,
pub alpha: f32,
pub filter: Option<MetadataFilter>,
pub include_vectors: bool,
pub reranker: Option<RerankerConfig>,
}
impl HybridQuery {
pub fn validate(&self) -> VectorResult<()> {
if self.collection.is_empty() {
return Err(VectorError::SearchError(
"collection name must not be empty".into(),
));
}
if self.vector.is_empty() {
return Err(VectorError::SearchError(
"query vector must not be empty".into(),
));
}
if self.top_k == 0 {
return Err(VectorError::SearchError("top_k must be > 0".into()));
}
if !(0.0..=1.0).contains(&self.alpha) {
return Err(VectorError::SearchError(
"hybrid alpha must be between 0.0 and 1.0".into(),
));
}
if let Some(filter) = &self.filter {
crate::search::filters::validate_filter(filter)?;
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct CollectionStats {
pub vector_count: u64,
pub size_bytes: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct EngineStats {
pub collection_count: usize,
pub total_vectors: u64,
pub loaded_indexes: usize,
pub loaded_mmap_files: usize,
pub embedding_cache_hits: u64,
pub embedding_cache_misses: u64,
}