1use crate::error::{Result, RuvectorError};
16use crate::types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery, SearchResult, VectorEntry};
17use crate::vector_db::VectorDB;
18use std::collections::HashMap;
19
20pub struct FannAdapter {
38 db: VectorDB,
39}
40
41impl FannAdapter {
42 pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
48 let options = DbOptions {
49 dimensions,
50 distance_metric: DistanceMetric::Cosine,
51 storage_path: storage_path.into(),
52 hnsw_config: Some(HnswConfig {
53 m: 16,
54 ef_construction: 100,
55 ef_search: 100,
56 max_elements: 100_000,
57 }),
58 quantization: None,
59 };
60 Ok(Self {
61 db: VectorDB::new(options)?,
62 })
63 }
64
65 pub fn store_layer(
70 &self,
71 layer_id: impl Into<String>,
72 embedding: &[f32],
73 metadata: Option<HashMap<String, serde_json::Value>>,
74 ) -> Result<String> {
75 let id = layer_id.into();
76 self.db.insert(VectorEntry {
77 id: Some(id),
78 vector: embedding.to_vec(),
79 metadata,
80 })
81 }
82
83 pub fn find_similar_layers(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
87 self.db.search(SearchQuery {
88 vector: query.to_vec(),
89 k,
90 filter: None,
91 ef_search: None,
92 })
93 }
94
95 pub fn find_similar_layers_filtered(
99 &self,
100 query: &[f32],
101 k: usize,
102 filter: HashMap<String, serde_json::Value>,
103 ) -> Result<Vec<SearchResult>> {
104 self.db.search(SearchQuery {
105 vector: query.to_vec(),
106 k,
107 filter: Some(filter),
108 ef_search: None,
109 })
110 }
111
112 pub fn delete_layer(&self, layer_id: &str) -> Result<bool> {
114 self.db.delete(layer_id)
115 }
116
117 pub fn len(&self) -> Result<usize> {
119 self.db.len()
120 }
121
122 pub fn is_empty(&self) -> Result<bool> {
124 self.db.is_empty()
125 }
126}
127
128#[derive(Debug, Clone)]
132pub struct FileEntry {
133 pub path: String,
135 pub description: String,
137 pub dimensions: usize,
139}
140
141pub struct SemanticSearchAdapter {
165 db: VectorDB,
166 dimensions: usize,
167}
168
169impl SemanticSearchAdapter {
170 pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
175 let options = DbOptions {
176 dimensions,
177 distance_metric: DistanceMetric::Cosine,
178 storage_path: storage_path.into(),
179 hnsw_config: Some(HnswConfig {
180 m: 16,
181 ef_construction: 100,
182 ef_search: 100,
183 max_elements: 500_000,
184 }),
185 quantization: None,
186 };
187 Ok(Self {
188 db: VectorDB::new(options)?,
189 dimensions,
190 })
191 }
192
193 pub fn index_file(
199 &self,
200 path: impl Into<String>,
201 description: impl Into<String>,
202 embedding: &[f32],
203 ) -> Result<String> {
204 let path_str = path.into();
205 if embedding.len() != self.dimensions {
206 return Err(RuvectorError::DimensionMismatch {
207 expected: self.dimensions,
208 actual: embedding.len(),
209 });
210 }
211
212 let mut metadata = HashMap::new();
213 metadata.insert(
214 "description".to_string(),
215 serde_json::Value::String(description.into()),
216 );
217 metadata.insert(
218 "path".to_string(),
219 serde_json::Value::String(path_str.clone()),
220 );
221
222 self.db.insert(VectorEntry {
223 id: Some(path_str),
224 vector: embedding.to_vec(),
225 metadata: Some(metadata),
226 })
227 }
228
229 pub fn remove_file(&self, path: &str) -> Result<bool> {
231 self.db.delete(path)
232 }
233
234 pub fn search(
240 &self,
241 _query_text: &str,
242 query_embedding: &[f32],
243 k: usize,
244 ) -> Result<Vec<SearchResult>> {
245 if query_embedding.len() != self.dimensions {
246 return Err(RuvectorError::DimensionMismatch {
247 expected: self.dimensions,
248 actual: query_embedding.len(),
249 });
250 }
251 self.db.search(SearchQuery {
252 vector: query_embedding.to_vec(),
253 k,
254 filter: None,
255 ef_search: None,
256 })
257 }
258
259 pub fn len(&self) -> Result<usize> {
261 self.db.len()
262 }
263
264 pub fn is_empty(&self) -> Result<bool> {
266 self.db.is_empty()
267 }
268
269 pub fn list_files(&self) -> Result<Vec<String>> {
271 self.db.keys()
272 }
273}
274
275#[inline]
282pub fn normalize(v: &[f32]) -> Vec<f32> {
283 let norm_sq: f32 = v.iter().map(|x| x * x).sum();
284 if norm_sq < f32::EPSILON {
285 return v.to_vec();
286 }
287 let norm = norm_sq.sqrt();
288 v.iter().map(|x| x / norm).collect()
289}
290
291#[inline]
296pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
297 debug_assert_eq!(a.len(), b.len(), "cosine_similarity: length mismatch");
298 let (mut dot, mut norm_a, mut norm_b) = (0.0f32, 0.0f32, 0.0f32);
299 for (&ai, &bi) in a.iter().zip(b.iter()) {
300 dot += ai * bi;
301 norm_a += ai * ai;
302 norm_b += bi * bi;
303 }
304 let denom = norm_a.sqrt() * norm_b.sqrt();
305 if denom > f32::EPSILON {
306 dot / denom
307 } else {
308 0.0
309 }
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315 use tempfile::tempdir;
316
317 #[test]
318 fn test_normalize_unit_vector() {
319 let v = vec![3.0f32, 4.0];
320 let n = normalize(&v);
321 let norm: f32 = n.iter().map(|x| x * x).sum::<f32>().sqrt();
322 assert!(
323 (norm - 1.0).abs() < 1e-6,
324 "Expected unit norm, got {}",
325 norm
326 );
327 }
328
329 #[test]
330 fn test_normalize_zero_vector() {
331 let v = vec![0.0f32, 0.0, 0.0];
332 let n = normalize(&v);
333 assert_eq!(n, v, "Zero vector should be returned unchanged");
334 }
335
336 #[test]
337 fn test_cosine_similarity_identical() {
338 let v = vec![1.0f32, 2.0, 3.0];
339 let sim = cosine_similarity(&v, &v);
340 assert!(
341 (sim - 1.0).abs() < 1e-5,
342 "Identical vectors: expected 1.0, got {}",
343 sim
344 );
345 }
346
347 #[test]
348 fn test_cosine_similarity_orthogonal() {
349 let a = vec![1.0f32, 0.0];
350 let b = vec![0.0f32, 1.0];
351 let sim = cosine_similarity(&a, &b);
352 assert!(
353 sim.abs() < 1e-5,
354 "Orthogonal vectors: expected 0.0, got {}",
355 sim
356 );
357 }
358
359 #[test]
360 fn test_semantic_search_adapter_roundtrip() {
361 let dir = tempdir().unwrap();
362 let path = dir.path().join("sparc.db").to_string_lossy().to_string();
363 let adapter = SemanticSearchAdapter::new(4, path).unwrap();
364
365 let emb_a = normalize(&[1.0, 0.0, 0.0, 0.0]);
366 let emb_b = normalize(&[0.0, 1.0, 0.0, 0.0]);
367 let emb_c = normalize(&[0.0, 0.0, 1.0, 0.0]);
368
369 adapter
371 .index_file("src/auth.rs", "authentication", &emb_a)
372 .unwrap();
373 adapter
374 .index_file("src/user.rs", "user model", &emb_b)
375 .unwrap();
376 adapter
377 .index_file("src/storage.rs", "storage layer", &emb_c)
378 .unwrap();
379
380 assert_eq!(adapter.len().unwrap(), 3);
381
382 let results = adapter.search("auth", &emb_a, 2).unwrap();
384 assert!(!results.is_empty());
385 assert_eq!(results[0].id, "src/auth.rs");
386 }
387
388 #[test]
389 fn test_fann_adapter_store_and_retrieve() {
390 let dir = tempdir().unwrap();
391 let path = dir.path().join("fann.db").to_string_lossy().to_string();
392 let adapter = FannAdapter::new(4, path).unwrap();
393
394 let layer_emb_0 = normalize(&[1.0, 1.0, 0.0, 0.0]);
395 let layer_emb_1 = normalize(&[0.0, 0.0, 1.0, 1.0]);
396 let layer_emb_2 = normalize(&[1.0, 0.0, 1.0, 0.0]);
397
398 adapter
400 .store_layer("model_v1/layer_0", &layer_emb_0, None)
401 .unwrap();
402 adapter
403 .store_layer("model_v1/layer_1", &layer_emb_1, None)
404 .unwrap();
405 adapter
406 .store_layer("model_v1/layer_2", &layer_emb_2, None)
407 .unwrap();
408
409 let results = adapter.find_similar_layers(&layer_emb_0, 1).unwrap();
410 assert!(!results.is_empty());
411 assert_eq!(results[0].id, "model_v1/layer_0");
412 }
413}