pub mod index;
pub mod embeddings;
pub mod client;
pub mod server;
pub mod persistence;
pub mod errors;
pub use index::flat::FlatIndex;
pub use index::hnsw::HNSWIndex;
pub use embeddings::{EmbeddingGenerator, EmbeddingFunction};
pub use client::{VectorLiteClient, Collection, Settings, IndexType};
pub use server::{create_app, start_server};
pub use persistence::{PersistenceError, save_collection_to_file, load_collection_from_file};
use serde::{Serialize, Deserialize};
pub const DEFAULT_VECTOR_DIMENSION: usize = 768;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Vector {
pub id: u64,
pub values: Vec<f64>,
pub text: String,
pub metadata: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub id: u64,
pub score: f64,
pub text: String,
pub metadata: Option<serde_json::Value>,
}
pub trait VectorIndex {
fn add(&mut self, vector: Vector) -> Result<(), String>;
fn delete(&mut self, id: u64) -> Result<(), String>;
fn search(&self, query: &[f64], k: usize, similarity_metric: SimilarityMetric) -> Vec<SearchResult>;
fn len(&self) -> usize;
fn is_empty(&self) -> bool;
fn get_vector(&self, id: u64) -> Option<&Vector>;
fn dimension(&self) -> usize;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum VectorIndexWrapper {
Flat(FlatIndex),
HNSW(Box<HNSWIndex>),
}
impl VectorIndex for VectorIndexWrapper {
fn add(&mut self, vector: Vector) -> Result<(), String> {
match self {
VectorIndexWrapper::Flat(index) => index.add(vector),
VectorIndexWrapper::HNSW(index) => index.add(vector),
}
}
fn delete(&mut self, id: u64) -> Result<(), String> {
match self {
VectorIndexWrapper::Flat(index) => index.delete(id),
VectorIndexWrapper::HNSW(index) => index.delete(id),
}
}
fn search(&self, query: &[f64], k: usize, s: SimilarityMetric) -> Vec<SearchResult> {
match self {
VectorIndexWrapper::Flat(index) => index.search(query, k, s),
VectorIndexWrapper::HNSW(index) => index.search(query, k, s),
}
}
fn len(&self) -> usize {
match self {
VectorIndexWrapper::Flat(index) => index.len(),
VectorIndexWrapper::HNSW(index) => index.len(),
}
}
fn is_empty(&self) -> bool {
match self {
VectorIndexWrapper::Flat(index) => index.is_empty(),
VectorIndexWrapper::HNSW(index) => index.is_empty(),
}
}
fn get_vector(&self, id: u64) -> Option<&Vector> {
match self {
VectorIndexWrapper::Flat(index) => index.get_vector(id),
VectorIndexWrapper::HNSW(index) => index.get_vector(id),
}
}
fn dimension(&self) -> usize {
match self {
VectorIndexWrapper::Flat(index) => index.dimension(),
VectorIndexWrapper::HNSW(index) => index.dimension(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum SimilarityMetric {
#[default]
Cosine,
Euclidean,
Manhattan,
DotProduct,
}
impl SimilarityMetric {
pub fn calculate(&self, a: &[f64], b: &[f64]) -> f64 {
assert_eq!(a.len(), b.len(), "Vectors must have the same length");
match self {
SimilarityMetric::Cosine => cosine_similarity(a, b),
SimilarityMetric::Euclidean => euclidean_similarity(a, b),
SimilarityMetric::Manhattan => manhattan_similarity(a, b),
SimilarityMetric::DotProduct => dot_product(a, b),
}
}
}
pub fn cosine_similarity(a: &[f64], b: &[f64]) -> f64 {
assert_eq!(a.len(), b.len(), "Vectors must have the same length");
let (mut dot, mut norm_a_sq, mut norm_b_sq) = (0.0, 0.0, 0.0);
for (&x, &y) in a.iter().zip(b.iter()) {
dot += x * y;
norm_a_sq += x * x;
norm_b_sq += y * y;
}
let norm_a = norm_a_sq.sqrt();
let norm_b = norm_b_sq.sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
0.0
} else {
dot / (norm_a * norm_b)
}
}
pub fn euclidean_similarity(a: &[f64], b: &[f64]) -> f64 {
assert_eq!(a.len(), b.len(), "Vectors must have the same length");
let sum_sq = a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f64>();
let distance = sum_sq.sqrt();
1.0 / (1.0 + distance)
}
pub fn manhattan_similarity(a: &[f64], b: &[f64]) -> f64 {
assert_eq!(a.len(), b.len(), "Vectors must have the same length");
let distance = a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).abs())
.sum::<f64>();
1.0 / (1.0 + distance)
}
pub fn dot_product(a: &[f64], b: &[f64]) -> f64 {
assert_eq!(a.len(), b.len(), "Vectors must have the same length");
a.iter()
.zip(b.iter())
.map(|(x, y)| x * y)
.sum::<f64>()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cosine_similarity_identical_vectors() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.0, 2.0, 3.0];
assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-10);
}
#[test]
fn test_cosine_similarity_orthogonal_vectors() {
let a = vec![1.0, 0.0];
let b = vec![0.0, 1.0];
assert!((cosine_similarity(&a, &b) - 0.0).abs() < 1e-10);
}
#[test]
fn test_cosine_similarity_opposite_vectors() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![-1.0, -2.0, -3.0];
assert!((cosine_similarity(&a, &b) - (-1.0)).abs() < 1e-10);
}
#[test]
fn test_euclidean_similarity_identical_vectors() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.0, 2.0, 3.0];
assert!((euclidean_similarity(&a, &b) - 1.0).abs() < 1e-10);
}
#[test]
fn test_euclidean_similarity_different_vectors() {
let a = vec![0.0, 0.0];
let b = vec![3.0, 4.0];
let expected = 1.0 / (1.0 + 5.0); assert!((euclidean_similarity(&a, &b) - expected).abs() < 1e-10);
}
#[test]
fn test_manhattan_similarity_identical_vectors() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.0, 2.0, 3.0];
assert!((manhattan_similarity(&a, &b) - 1.0).abs() < 1e-10);
}
#[test]
fn test_manhattan_similarity_different_vectors() {
let a = vec![0.0, 0.0];
let b = vec![3.0, 4.0];
let expected = 1.0 / (1.0 + 7.0); assert!((manhattan_similarity(&a, &b) - expected).abs() < 1e-10);
}
#[test]
fn test_dot_product_identical_vectors() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.0, 2.0, 3.0];
let expected = 1.0 + 4.0 + 9.0; assert!((dot_product(&a, &b) - expected).abs() < 1e-10);
}
#[test]
fn test_dot_product_orthogonal_vectors() {
let a = vec![1.0, 0.0];
let b = vec![0.0, 1.0];
assert!((dot_product(&a, &b) - 0.0).abs() < 1e-10);
}
#[test]
fn test_dot_product_opposite_vectors() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![-1.0, -2.0, -3.0];
let expected = -1.0 - 4.0 - 9.0; assert!((dot_product(&a, &b) - expected).abs() < 1e-10);
}
#[test]
fn test_similarity_metric_enum() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.0, 2.0, 3.0];
assert!((SimilarityMetric::Cosine.calculate(&a, &b) - 1.0).abs() < 1e-10);
assert!((SimilarityMetric::Euclidean.calculate(&a, &b) - 1.0).abs() < 1e-10);
assert!((SimilarityMetric::Manhattan.calculate(&a, &b) - 1.0).abs() < 1e-10);
assert!((SimilarityMetric::DotProduct.calculate(&a, &b) - 14.0).abs() < 1e-10);
}
#[test]
fn test_similarity_metric_default() {
assert_eq!(SimilarityMetric::default(), SimilarityMetric::Cosine);
}
#[test]
fn test_vector_store_creation() {
let vectors = vec![
Vector { id: 0, values: vec![1.0, 2.0, 3.0], text: "test1".to_string(), metadata: None },
Vector { id: 1, values: vec![4.0, 5.0, 6.0], text: "test2".to_string(), metadata: None },
];
let store = FlatIndex::new(3, vectors);
assert_eq!(store.len(), 2);
assert!(!store.is_empty());
}
#[test]
fn test_vector_store_search() {
let vectors = vec![
Vector { id: 0, values: vec![1.0, 0.0, 0.0], text: "test1".to_string(), metadata: None },
Vector { id: 1, values: vec![0.0, 1.0, 0.0], text: "test2".to_string(), metadata: None },
Vector { id: 2, values: vec![0.0, 0.0, 1.0], text: "test3".to_string(), metadata: None },
];
let store = FlatIndex::new(3, vectors);
let query = vec![1.0, 0.0, 0.0];
let results = store.search(&query, 2, SimilarityMetric::Cosine);
assert_eq!(results.len(), 2);
assert_eq!(results[0].id, 0);
assert!((results[0].score - 1.0).abs() < 1e-10);
}
#[test]
fn test_vector_index_wrapper_serialization() {
use serde_json;
let vectors = vec![
Vector { id: 1, values: vec![1.0, 0.0, 0.0], text: "test1".to_string(), metadata: None },
Vector { id: 2, values: vec![0.0, 1.0, 0.0], text: "test2".to_string(), metadata: None },
];
let flat_index = FlatIndex::new(3, vectors);
let wrapper = VectorIndexWrapper::Flat(flat_index);
let serialized = serde_json::to_string(&wrapper).expect("Serialization should work");
let deserialized: VectorIndexWrapper = serde_json::from_str(&serialized).expect("Deserialization should work");
assert_eq!(deserialized.len(), 2);
assert_eq!(deserialized.dimension(), 3);
assert!(!deserialized.is_empty());
let query = vec![1.1, 0.1, 0.1];
let results = deserialized.search(&query, 1, SimilarityMetric::Cosine);
assert_eq!(results.len(), 1);
assert_eq!(results[0].id, 1);
}
#[test]
fn test_vector_metadata_functionality() {
use serde_json::json;
let metadata = json!({
"title": "Test Document",
"category": "example",
"tags": ["test", "metadata"],
"nested": {
"value": 42,
"enabled": true
}
});
let vector = Vector {
id: 1,
values: vec![1.0, 2.0, 3.0],
text: "Test document text".to_string(),
metadata: Some(metadata.clone()),
};
assert!(vector.metadata.is_some());
let stored_metadata = vector.metadata.as_ref().unwrap();
assert_eq!(stored_metadata["title"], "Test Document");
assert_eq!(stored_metadata["category"], "example");
assert_eq!(stored_metadata["nested"]["value"], 42);
assert_eq!(stored_metadata["nested"]["enabled"], true);
let vector_no_metadata = Vector {
id: 2,
values: vec![4.0, 5.0, 6.0],
text: "Test text".to_string(),
metadata: None,
};
assert!(vector_no_metadata.metadata.is_none());
let serialized = serde_json::to_string(&vector).expect("Serialization should work");
let deserialized: Vector = serde_json::from_str(&serialized).expect("Deserialization should work");
assert_eq!(deserialized.id, vector.id);
assert_eq!(deserialized.values, vector.values);
assert_eq!(deserialized.metadata, vector.metadata);
}
}