use crate::error::{Result, RuvectorError};
use crate::types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery, SearchResult, VectorEntry};
use crate::vector_db::VectorDB;
use std::collections::HashMap;
pub struct FannAdapter {
db: VectorDB,
}
impl FannAdapter {
pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: storage_path.into(),
hnsw_config: Some(HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 100,
max_elements: 100_000,
}),
quantization: None,
};
Ok(Self {
db: VectorDB::new(options)?,
})
}
pub fn store_layer(
&self,
layer_id: impl Into<String>,
embedding: &[f32],
metadata: Option<HashMap<String, serde_json::Value>>,
) -> Result<String> {
let id = layer_id.into();
self.db.insert(VectorEntry {
id: Some(id),
vector: embedding.to_vec(),
metadata,
})
}
pub fn find_similar_layers(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
self.db.search(SearchQuery {
vector: query.to_vec(),
k,
filter: None,
ef_search: None,
})
}
pub fn find_similar_layers_filtered(
&self,
query: &[f32],
k: usize,
filter: HashMap<String, serde_json::Value>,
) -> Result<Vec<SearchResult>> {
self.db.search(SearchQuery {
vector: query.to_vec(),
k,
filter: Some(filter),
ef_search: None,
})
}
pub fn delete_layer(&self, layer_id: &str) -> Result<bool> {
self.db.delete(layer_id)
}
pub fn len(&self) -> Result<usize> {
self.db.len()
}
pub fn is_empty(&self) -> Result<bool> {
self.db.is_empty()
}
}
#[derive(Debug, Clone)]
pub struct FileEntry {
pub path: String,
pub description: String,
pub dimensions: usize,
}
pub struct SemanticSearchAdapter {
db: VectorDB,
dimensions: usize,
}
impl SemanticSearchAdapter {
pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: storage_path.into(),
hnsw_config: Some(HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 100,
max_elements: 500_000,
}),
quantization: None,
};
Ok(Self {
db: VectorDB::new(options)?,
dimensions,
})
}
pub fn index_file(
&self,
path: impl Into<String>,
description: impl Into<String>,
embedding: &[f32],
) -> Result<String> {
let path_str = path.into();
if embedding.len() != self.dimensions {
return Err(RuvectorError::DimensionMismatch {
expected: self.dimensions,
actual: embedding.len(),
});
}
let mut metadata = HashMap::new();
metadata.insert(
"description".to_string(),
serde_json::Value::String(description.into()),
);
metadata.insert(
"path".to_string(),
serde_json::Value::String(path_str.clone()),
);
self.db.insert(VectorEntry {
id: Some(path_str),
vector: embedding.to_vec(),
metadata: Some(metadata),
})
}
pub fn remove_file(&self, path: &str) -> Result<bool> {
self.db.delete(path)
}
pub fn search(
&self,
_query_text: &str,
query_embedding: &[f32],
k: usize,
) -> Result<Vec<SearchResult>> {
if query_embedding.len() != self.dimensions {
return Err(RuvectorError::DimensionMismatch {
expected: self.dimensions,
actual: query_embedding.len(),
});
}
self.db.search(SearchQuery {
vector: query_embedding.to_vec(),
k,
filter: None,
ef_search: None,
})
}
pub fn len(&self) -> Result<usize> {
self.db.len()
}
pub fn is_empty(&self) -> Result<bool> {
self.db.is_empty()
}
pub fn list_files(&self) -> Result<Vec<String>> {
self.db.keys()
}
}
#[inline]
pub fn normalize(v: &[f32]) -> Vec<f32> {
let norm_sq: f32 = v.iter().map(|x| x * x).sum();
if norm_sq < f32::EPSILON {
return v.to_vec();
}
let norm = norm_sq.sqrt();
v.iter().map(|x| x / norm).collect()
}
#[inline]
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
debug_assert_eq!(a.len(), b.len(), "cosine_similarity: length mismatch");
let (mut dot, mut norm_a, mut norm_b) = (0.0f32, 0.0f32, 0.0f32);
for (&ai, &bi) in a.iter().zip(b.iter()) {
dot += ai * bi;
norm_a += ai * ai;
norm_b += bi * bi;
}
let denom = norm_a.sqrt() * norm_b.sqrt();
if denom > f32::EPSILON {
dot / denom
} else {
0.0
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_normalize_unit_vector() {
let v = vec![3.0f32, 4.0];
let n = normalize(&v);
let norm: f32 = n.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!(
(norm - 1.0).abs() < 1e-6,
"Expected unit norm, got {}",
norm
);
}
#[test]
fn test_normalize_zero_vector() {
let v = vec![0.0f32, 0.0, 0.0];
let n = normalize(&v);
assert_eq!(n, v, "Zero vector should be returned unchanged");
}
#[test]
fn test_cosine_similarity_identical() {
let v = vec![1.0f32, 2.0, 3.0];
let sim = cosine_similarity(&v, &v);
assert!(
(sim - 1.0).abs() < 1e-5,
"Identical vectors: expected 1.0, got {}",
sim
);
}
#[test]
fn test_cosine_similarity_orthogonal() {
let a = vec![1.0f32, 0.0];
let b = vec![0.0f32, 1.0];
let sim = cosine_similarity(&a, &b);
assert!(
sim.abs() < 1e-5,
"Orthogonal vectors: expected 0.0, got {}",
sim
);
}
#[test]
fn test_semantic_search_adapter_roundtrip() {
let dir = tempdir().unwrap();
let path = dir.path().join("sparc.db").to_string_lossy().to_string();
let adapter = SemanticSearchAdapter::new(4, path).unwrap();
let emb_a = normalize(&[1.0, 0.0, 0.0, 0.0]);
let emb_b = normalize(&[0.0, 1.0, 0.0, 0.0]);
let emb_c = normalize(&[0.0, 0.0, 1.0, 0.0]);
adapter
.index_file("src/auth.rs", "authentication", &emb_a)
.unwrap();
adapter
.index_file("src/user.rs", "user model", &emb_b)
.unwrap();
adapter
.index_file("src/storage.rs", "storage layer", &emb_c)
.unwrap();
assert_eq!(adapter.len().unwrap(), 3);
let results = adapter.search("auth", &emb_a, 2).unwrap();
assert!(!results.is_empty());
assert_eq!(results[0].id, "src/auth.rs");
}
#[test]
fn test_fann_adapter_store_and_retrieve() {
let dir = tempdir().unwrap();
let path = dir.path().join("fann.db").to_string_lossy().to_string();
let adapter = FannAdapter::new(4, path).unwrap();
let layer_emb_0 = normalize(&[1.0, 1.0, 0.0, 0.0]);
let layer_emb_1 = normalize(&[0.0, 0.0, 1.0, 1.0]);
let layer_emb_2 = normalize(&[1.0, 0.0, 1.0, 0.0]);
adapter
.store_layer("model_v1/layer_0", &layer_emb_0, None)
.unwrap();
adapter
.store_layer("model_v1/layer_1", &layer_emb_1, None)
.unwrap();
adapter
.store_layer("model_v1/layer_2", &layer_emb_2, None)
.unwrap();
let results = adapter.find_similar_layers(&layer_emb_0, 1).unwrap();
assert!(!results.is_empty());
assert_eq!(results[0].id, "model_v1/layer_0");
}
}