mod accessor;
mod distance;
mod mmr;
pub mod quantization;
mod simd;
pub mod storage;
pub mod zone_map;
#[cfg(feature = "vector-index")]
mod config;
#[cfg(feature = "vector-index")]
mod hnsw;
#[cfg(feature = "vector-index")]
mod quantized_hnsw;
pub use accessor::{PropertyVectorAccessor, VectorAccessor};
pub use distance::{
DistanceMetric, compute_distance, cosine_distance, cosine_similarity, dot_product,
euclidean_distance, euclidean_distance_squared, l2_norm, manhattan_distance, normalize,
simd_support,
};
pub use mmr::mmr_select;
pub use quantization::{BinaryQuantizer, ProductQuantizer, QuantizationType, ScalarQuantizer};
#[cfg(feature = "mmap")]
pub use storage::MmapStorage;
pub use storage::{RamStorage, StorageBackend, VectorStorage};
pub use zone_map::VectorZoneMap;
#[cfg(feature = "vector-index")]
pub use config::HnswConfig;
#[cfg(feature = "vector-index")]
pub use hnsw::HnswIndex;
#[cfg(feature = "vector-index")]
pub use quantized_hnsw::QuantizedHnswIndex;
use grafeo_common::types::NodeId;
#[derive(Debug, Clone)]
pub struct VectorConfig {
pub dimensions: usize,
pub metric: DistanceMetric,
}
impl VectorConfig {
#[must_use]
pub const fn new(dimensions: usize, metric: DistanceMetric) -> Self {
Self { dimensions, metric }
}
#[must_use]
pub const fn cosine(dimensions: usize) -> Self {
Self::new(dimensions, DistanceMetric::Cosine)
}
#[must_use]
pub const fn euclidean(dimensions: usize) -> Self {
Self::new(dimensions, DistanceMetric::Euclidean)
}
}
impl Default for VectorConfig {
fn default() -> Self {
Self {
dimensions: 384, metric: DistanceMetric::default(),
}
}
}
pub fn brute_force_knn<'a, I>(
vectors: I,
query: &[f32],
k: usize,
metric: DistanceMetric,
) -> Vec<(NodeId, f32)>
where
I: Iterator<Item = (NodeId, &'a [f32])>,
{
let mut results: Vec<(NodeId, f32)> = vectors
.map(|(id, vec)| (id, compute_distance(query, vec, metric)))
.collect();
results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
results.truncate(k);
results
}
pub fn brute_force_knn_filtered<'a, I, F>(
vectors: I,
query: &[f32],
k: usize,
metric: DistanceMetric,
predicate: F,
) -> Vec<(NodeId, f32)>
where
I: Iterator<Item = (NodeId, &'a [f32])>,
F: Fn(NodeId) -> bool,
{
let mut results: Vec<(NodeId, f32)> = vectors
.filter(|(id, _)| predicate(*id))
.map(|(id, vec)| (id, compute_distance(query, vec, metric)))
.collect();
results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
results.truncate(k);
results
}
pub fn batch_distances<'a, I>(
vectors: I,
query: &[f32],
metric: DistanceMetric,
) -> Vec<(NodeId, f32)>
where
I: Iterator<Item = (NodeId, &'a [f32])>,
{
vectors
.map(|(id, vec)| (id, compute_distance(query, vec, metric)))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_vector_config_default() {
let config = VectorConfig::default();
assert_eq!(config.dimensions, 384);
assert_eq!(config.metric, DistanceMetric::Cosine);
}
#[test]
fn test_vector_config_constructors() {
let cosine = VectorConfig::cosine(768);
assert_eq!(cosine.dimensions, 768);
assert_eq!(cosine.metric, DistanceMetric::Cosine);
let euclidean = VectorConfig::euclidean(1536);
assert_eq!(euclidean.dimensions, 1536);
assert_eq!(euclidean.metric, DistanceMetric::Euclidean);
}
#[test]
fn test_brute_force_knn() {
let vectors = vec![
(NodeId::new(1), [0.0f32, 0.0, 0.0].as_slice()),
(NodeId::new(2), [1.0f32, 0.0, 0.0].as_slice()),
(NodeId::new(3), [2.0f32, 0.0, 0.0].as_slice()),
(NodeId::new(4), [3.0f32, 0.0, 0.0].as_slice()),
];
let query = [0.5f32, 0.0, 0.0];
let results = brute_force_knn(vectors.into_iter(), &query, 2, DistanceMetric::Euclidean);
assert_eq!(results.len(), 2);
assert!(results[0].0 == NodeId::new(1) || results[0].0 == NodeId::new(2));
}
#[test]
fn test_brute_force_knn_empty() {
let vectors: Vec<(NodeId, &[f32])> = vec![];
let query = [0.0f32, 0.0];
let results = brute_force_knn(vectors.into_iter(), &query, 10, DistanceMetric::Cosine);
assert!(results.is_empty());
}
#[test]
fn test_brute_force_knn_k_larger_than_n() {
let vectors = vec![
(NodeId::new(1), [0.0f32, 0.0].as_slice()),
(NodeId::new(2), [1.0f32, 0.0].as_slice()),
];
let query = [0.0f32, 0.0];
let results = brute_force_knn(vectors.into_iter(), &query, 10, DistanceMetric::Euclidean);
assert_eq!(results.len(), 2);
}
#[test]
fn test_brute_force_knn_filtered() {
let vectors = vec![
(NodeId::new(1), [0.0f32, 0.0].as_slice()),
(NodeId::new(2), [1.0f32, 0.0].as_slice()),
(NodeId::new(3), [2.0f32, 0.0].as_slice()),
];
let query = [0.0f32, 0.0];
let results = brute_force_knn_filtered(
vectors.into_iter(),
&query,
10,
DistanceMetric::Euclidean,
|id| id.as_u64() % 2 == 0,
);
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, NodeId::new(2));
}
#[test]
fn test_batch_distances() {
let vectors = vec![
(NodeId::new(1), [0.0f32, 0.0].as_slice()),
(NodeId::new(2), [3.0f32, 4.0].as_slice()),
];
let query = [0.0f32, 0.0];
let results = batch_distances(vectors.into_iter(), &query, DistanceMetric::Euclidean);
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, NodeId::new(1));
assert!((results[0].1 - 0.0).abs() < 0.001);
assert_eq!(results[1].0, NodeId::new(2));
assert!((results[1].1 - 5.0).abs() < 0.001); }
}