1use std::cell::RefCell;
2use std::sync::Arc;
3
4use parking_lot::RwLock;
5use tracing::instrument;
6use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
7
8use crate::error::VectorError;
9use issundb_core::{Graph, NodeId};
10
11pub struct Hit {
13 pub node: NodeId,
14 pub distance: f32,
15}
16
17#[derive(Debug, Clone)]
19pub struct VectorSearchOptions {
20 pub k: usize,
22 pub label: Option<String>,
24 pub properties: Option<std::collections::HashMap<String, serde_json::Value>>,
26 pub rescore_factor: Option<usize>,
34}
35
36impl Default for VectorSearchOptions {
37 fn default() -> Self {
38 Self {
39 k: 10,
40 label: None,
41 properties: None,
42 rescore_factor: None,
43 }
44 }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
49pub enum VectorMetric {
50 #[default]
52 Cosine,
53 L2,
55 Dot,
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
61pub enum VectorQuantization {
62 #[default]
64 Float32,
65 Float16,
67 Int8,
69}
70
71impl std::str::FromStr for VectorMetric {
72 type Err = VectorError;
73
74 fn from_str(s: &str) -> Result<Self, Self::Err> {
78 match s.to_lowercase().as_str() {
79 "cosine" => Ok(Self::Cosine),
80 "l2" => Ok(Self::L2),
81 "dot" | "ip" => Ok(Self::Dot),
82 other => Err(VectorError::InvalidConfig(format!(
83 "unknown metric '{other}' (expected 'cosine', 'l2', or 'dot')"
84 ))),
85 }
86 }
87}
88
89impl std::str::FromStr for VectorQuantization {
90 type Err = VectorError;
91
92 fn from_str(s: &str) -> Result<Self, Self::Err> {
95 match s.to_lowercase().as_str() {
96 "float32" => Ok(Self::Float32),
97 "float16" => Ok(Self::Float16),
98 "int8" => Ok(Self::Int8),
99 other => Err(VectorError::InvalidConfig(format!(
100 "unknown quantization '{other}' (expected 'float32', 'float16', or 'int8')"
101 ))),
102 }
103 }
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
108pub struct VectorIndexOptions {
109 pub metric: VectorMetric,
110 pub quantization: VectorQuantization,
111}
112
113enum Inner {
114 Empty,
115 Ready { index: Index, dims: usize },
116}
117
118pub(crate) struct VectorIndex {
124 opts: VectorIndexOptions,
125 inner: RwLock<Inner>,
126}
127
128impl Default for VectorIndex {
129 fn default() -> Self {
130 Self::new()
131 }
132}
133
134impl VectorIndex {
135 pub fn new() -> Self {
137 Self::new_with_options(VectorIndexOptions::default())
138 }
139
140 pub fn new_with_options(opts: VectorIndexOptions) -> Self {
142 Self {
143 opts,
144 inner: RwLock::new(Inner::Empty),
145 }
146 }
147
148 pub fn upsert(&self, node: NodeId, v: &[f32]) -> Result<(), VectorError> {
154 let dims = v.len();
155 if dims == 0 {
156 return Err(VectorError::IndexFault(
157 "embedding must not be empty".into(),
158 ));
159 }
160 let mut guard = self.inner.write();
161 match &mut *guard {
162 Inner::Empty => {
163 let opts = IndexOptions {
164 dimensions: dims,
165 metric: metric_to_usearch(self.opts.metric),
166 quantization: quantization_to_usearch(self.opts.quantization),
167 ..Default::default()
168 };
169 let index =
170 Index::new(&opts).map_err(|e| VectorError::IndexFault(e.to_string()))?;
171 index
172 .reserve(64)
173 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
174 index
175 .add(node, v)
176 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
177 *guard = Inner::Ready { index, dims };
178 }
179 Inner::Ready { index, dims: d } => {
180 if dims != *d {
181 return Err(VectorError::DimensionMismatch {
182 expected: *d,
183 got: dims,
184 });
185 }
186 if index.contains(node) {
187 index
188 .remove(node)
189 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
190 }
191 if index.size() >= index.capacity() {
192 let new_cap = (index.capacity() * 2).max(64);
193 index
194 .reserve(new_cap)
195 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
196 }
197 index
198 .add(node, v)
199 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
200 }
201 }
202 Ok(())
203 }
204
205 pub fn remove(&self, node: NodeId) -> Result<(), VectorError> {
207 let mut guard = self.inner.write();
208 if let Inner::Ready { index, .. } = &mut *guard {
209 if index.contains(node) {
210 index
211 .remove(node)
212 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
213 }
214 }
215 Ok(())
216 }
217
218 pub fn search(&self, q: &[f32], k: usize) -> Result<Vec<Hit>, VectorError> {
223 let guard = self.inner.read();
224 match &*guard {
225 Inner::Empty => Ok(vec![]),
226 Inner::Ready { index, dims } => {
227 if q.len() != *dims {
228 return Err(VectorError::DimensionMismatch {
229 expected: *dims,
230 got: q.len(),
231 });
232 }
233 if k == 0 || index.size() == 0 {
234 return Ok(vec![]);
235 }
236 let actual_k = k.min(index.size());
237 let matches = index
238 .search::<f32>(q, actual_k)
239 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
240 Ok(matches
241 .keys
242 .iter()
243 .zip(matches.distances.iter())
244 .map(|(&node, &distance)| Hit { node, distance })
245 .collect())
246 }
247 }
248 }
249
250 pub fn search_filtered<F>(
258 &self,
259 q: &[f32],
260 k: usize,
261 predicate: F,
262 ) -> Result<Vec<Hit>, VectorError>
263 where
264 F: Fn(NodeId) -> bool,
265 {
266 let guard = self.inner.read();
267 match &*guard {
268 Inner::Empty => Ok(vec![]),
269 Inner::Ready { index, dims } => {
270 if q.len() != *dims {
271 return Err(VectorError::DimensionMismatch {
272 expected: *dims,
273 got: q.len(),
274 });
275 }
276 if k == 0 || index.size() == 0 {
277 return Ok(vec![]);
278 }
279 let actual_k = k.min(index.size());
280 let matches = index
281 .filtered_search::<f32, _>(q, actual_k, predicate)
282 .map_err(|e| VectorError::IndexFault(e.to_string()))?;
283 Ok(matches
284 .keys
285 .iter()
286 .zip(matches.distances.iter())
287 .map(|(&node, &distance)| Hit { node, distance })
288 .collect())
289 }
290 }
291 }
292}
293
294fn encode_vector(v: &[f32]) -> Result<Vec<u8>, VectorError> {
295 if v.is_empty() {
296 return Err(VectorError::IndexFault(
297 "embedding must not be empty".into(),
298 ));
299 }
300 Ok(v.iter().flat_map(|f| f.to_le_bytes()).collect())
301}
302
303fn decode_vector(bytes: &[u8]) -> Result<Vec<f32>, VectorError> {
304 if bytes.len() % 4 != 0 {
305 return Err(VectorError::IndexFault(format!(
306 "stored embedding byte length must be divisible by 4, got {}",
307 bytes.len()
308 )));
309 }
310 let vector = bytes
311 .chunks_exact(4)
312 .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
313 .collect();
314 Ok(vector)
315}
316
317pub trait VectorGraphExt {
319 fn configure_vector_index(&self, opts: VectorIndexOptions) -> Result<(), VectorError>;
330
331 fn reindex_vector_index(&self, opts: VectorIndexOptions) -> Result<(), VectorError>;
343
344 fn upsert_vector(&self, n: NodeId, v: &[f32]) -> Result<(), VectorError>;
346
347 fn remove_vector(&self, n: NodeId) -> Result<(), VectorError>;
349
350 fn vector_search(&self, q: &[f32], k: usize) -> Result<Vec<Hit>, VectorError>;
352
353 fn vector_search_with(
366 &self,
367 q: &[f32],
368 opts: &VectorSearchOptions,
369 ) -> Result<Vec<Hit>, VectorError>;
370}
371
372struct VectorIndexCache(VectorIndex);
374
375impl VectorGraphExt for Graph {
376 fn configure_vector_index(&self, opts: VectorIndexOptions) -> Result<(), VectorError> {
377 let current = load_config(self)?;
378 if current == Some(opts) {
379 return Ok(());
380 }
381 if !self.vector_bytes()?.is_empty() {
385 return Err(VectorError::AlreadyConfigured {
386 existing: format!("{:?}", current.unwrap_or_default()),
387 requested: format!("{opts:?}"),
388 });
389 }
390 self.put_vector_config(&encode_config(opts))?;
391 self.set_extension(Arc::new(VectorIndexCache(VectorIndex::new_with_options(
394 opts,
395 ))));
396 Ok(())
397 }
398
399 fn reindex_vector_index(&self, opts: VectorIndexOptions) -> Result<(), VectorError> {
400 self.put_vector_config(&encode_config(opts))?;
404 let rebuilt = build_index(self, opts)?;
405 self.set_extension(Arc::new(VectorIndexCache(rebuilt)));
406 Ok(())
407 }
408
409 #[instrument(skip(self, v), fields(node = %n, dims = v.len()))]
410 fn upsert_vector(&self, n: NodeId, v: &[f32]) -> Result<(), VectorError> {
411 let bytes = encode_vector(v)?;
412 let arc = get_or_init_cache(self)?;
421 arc.0.upsert(n, v)?;
422 self.put_vector_bytes(n, &bytes)?;
423 Ok(())
424 }
425
426 fn remove_vector(&self, n: NodeId) -> Result<(), VectorError> {
427 self.delete_vector_bytes(n)?;
428 if let Some(arc) = self.get_extension::<VectorIndexCache>() {
430 arc.0.remove(n)?;
431 }
432 Ok(())
433 }
434
435 #[instrument(skip(self, q), fields(k = %k, dims = q.len()))]
436 fn vector_search(&self, q: &[f32], k: usize) -> Result<Vec<Hit>, VectorError> {
437 let opts = VectorSearchOptions {
438 k,
439 ..Default::default()
440 };
441 self.vector_search_with(q, &opts)
442 }
443
444 #[instrument(skip(self, q), fields(k = %opts.k, label = ?opts.label, dims = q.len()))]
445 fn vector_search_with(
446 &self,
447 q: &[f32],
448 opts: &VectorSearchOptions,
449 ) -> Result<Vec<Hit>, VectorError> {
450 let arc = get_or_init_cache(self)?;
451
452 let index_quantization = arc.0.opts.quantization;
453 let rescore_factor =
454 opts.rescore_factor
455 .unwrap_or(if index_quantization != VectorQuantization::Float32 {
456 2
457 } else {
458 1
459 });
460
461 let fetch_k = if rescore_factor > 1 {
462 opts.k.saturating_mul(rescore_factor)
463 } else {
464 opts.k
465 };
466
467 let hits = if opts.label.is_some() || opts.properties.is_some() {
468 let pred_err: RefCell<Option<VectorError>> = RefCell::new(None);
479 let matches_filters = |node: NodeId| -> Result<bool, VectorError> {
480 if let Some(label) = &opts.label {
481 if self.label_filter(&[node], label)?.is_empty() {
482 return Ok(false);
483 }
484 }
485 if let Some(filters) = &opts.properties {
486 for (key, want) in filters {
487 match self.node_prop_json(node, key)? {
488 Some(got) if &got == want => {}
489 _ => return Ok(false),
490 }
491 }
492 }
493 Ok(true)
494 };
495 let predicate = |node: NodeId| -> bool {
496 if pred_err.borrow().is_some() {
497 return false;
498 }
499 match matches_filters(node) {
500 Ok(keep) => keep,
501 Err(e) => {
502 *pred_err.borrow_mut() = Some(e);
503 false
504 }
505 }
506 };
507
508 let results = arc.0.search_filtered(q, fetch_k, predicate)?;
509 if let Some(e) = pred_err.into_inner() {
510 return Err(e);
511 }
512 results
513 } else {
514 arc.0.search(q, fetch_k)?
515 };
516
517 let mut final_hits = if rescore_factor > 1 && !hits.is_empty() {
518 let byte_rows: Vec<(Hit, Option<Vec<u8>>)> = self.view(|txn| {
522 hits.into_iter()
523 .map(|hit| {
524 let bytes = txn.get_vector_bytes(hit.node)?;
525 Ok((hit, bytes))
526 })
527 .collect()
528 })?;
529 let mut rescored = Vec::with_capacity(byte_rows.len());
530 for (hit, bytes) in byte_rows {
531 rescored.push(match bytes {
532 Some(b) => Hit {
533 node: hit.node,
534 distance: exact_distance(q, &decode_vector(&b)?, arc.0.opts.metric),
535 },
536 None => hit,
537 });
538 }
539 rescored.sort_unstable_by(|a, b| {
540 a.distance
541 .partial_cmp(&b.distance)
542 .unwrap_or(std::cmp::Ordering::Equal)
543 });
544 rescored
545 } else {
546 hits
547 };
548
549 final_hits.truncate(opts.k);
550 Ok(final_hits)
551 }
552}
553
554fn exact_distance(q: &[f32], v: &[f32], metric: VectorMetric) -> f32 {
559 match metric {
560 VectorMetric::Cosine => {
561 let mut dot = 0.0;
562 let mut norm_q = 0.0;
563 let mut norm_v = 0.0;
564 for (&qi, &vi) in q.iter().zip(v.iter()) {
565 dot += qi * vi;
566 norm_q += qi * qi;
567 norm_v += vi * vi;
568 }
569 if norm_q > 0.0 && norm_v > 0.0 {
570 (1.0 - (dot / (norm_q.sqrt() * norm_v.sqrt()))).max(0.0)
572 } else {
573 1.0
574 }
575 }
576 VectorMetric::L2 => {
577 let mut sum = 0.0;
578 for (&qi, &vi) in q.iter().zip(v.iter()) {
579 let diff = qi - vi;
580 sum += diff * diff;
581 }
582 sum
583 }
584 VectorMetric::Dot => {
585 let mut dot = 0.0;
586 for (&qi, &vi) in q.iter().zip(v.iter()) {
587 dot += qi * vi;
588 }
589 1.0 - dot
590 }
591 }
592}
593
594fn get_or_init_cache(graph: &Graph) -> Result<Arc<VectorIndexCache>, VectorError> {
597 graph.get_or_init_extension_with(|| {
602 let opts = load_config(graph)?.unwrap_or_default();
603 Ok(Arc::new(VectorIndexCache(build_index(graph, opts)?)))
604 })
605}
606
607fn build_index(graph: &Graph, opts: VectorIndexOptions) -> Result<VectorIndex, VectorError> {
611 let idx = VectorIndex::new_with_options(opts);
612 for (node_id, bytes) in graph.vector_bytes()? {
613 let v = decode_vector(&bytes)?;
614 idx.upsert(node_id, &v)?;
615 }
616 Ok(idx)
617}
618
619fn load_config(graph: &Graph) -> Result<Option<VectorIndexOptions>, VectorError> {
622 match graph.get_vector_config()? {
623 Some(bytes) => Ok(Some(decode_config(&bytes)?)),
624 None => Ok(None),
625 }
626}
627
628fn encode_config(opts: VectorIndexOptions) -> [u8; 2] {
630 let metric = match opts.metric {
631 VectorMetric::Cosine => 0,
632 VectorMetric::L2 => 1,
633 VectorMetric::Dot => 2,
634 };
635 let quant = match opts.quantization {
636 VectorQuantization::Float32 => 0,
637 VectorQuantization::Float16 => 1,
638 VectorQuantization::Int8 => 2,
639 };
640 [metric, quant]
641}
642
643fn decode_config(bytes: &[u8]) -> Result<VectorIndexOptions, VectorError> {
645 let [metric, quant] = bytes.try_into().map_err(|_| {
646 VectorError::IndexFault(format!(
647 "vector config must be 2 bytes, got {}",
648 bytes.len()
649 ))
650 })?;
651 let metric = match metric {
652 0 => VectorMetric::Cosine,
653 1 => VectorMetric::L2,
654 2 => VectorMetric::Dot,
655 other => {
656 return Err(VectorError::IndexFault(format!(
657 "unknown vector metric tag {other}"
658 )));
659 }
660 };
661 let quantization = match quant {
662 0 => VectorQuantization::Float32,
663 1 => VectorQuantization::Float16,
664 2 => VectorQuantization::Int8,
665 other => {
666 return Err(VectorError::IndexFault(format!(
667 "unknown vector quantization tag {other}"
668 )));
669 }
670 };
671 Ok(VectorIndexOptions {
672 metric,
673 quantization,
674 })
675}
676
677fn metric_to_usearch(m: VectorMetric) -> MetricKind {
678 match m {
679 VectorMetric::Cosine => MetricKind::Cos,
680 VectorMetric::L2 => MetricKind::L2sq,
681 VectorMetric::Dot => MetricKind::IP,
682 }
683}
684
685fn quantization_to_usearch(q: VectorQuantization) -> ScalarKind {
686 match q {
687 VectorQuantization::Float32 => ScalarKind::F32,
688 VectorQuantization::Float16 => ScalarKind::F16,
689 VectorQuantization::Int8 => ScalarKind::I8,
690 }
691}
692
693#[cfg(test)]
694mod tests {
695 use serde_json::json;
696 use tempfile::TempDir;
697
698 use super::*;
699
700 fn open_tmp() -> (TempDir, Graph) {
701 let dir = TempDir::new().unwrap();
702 let graph = Graph::open(dir.path(), 1).unwrap();
703 (dir, graph)
704 }
705
706 #[test]
707 fn metric_from_str_is_case_insensitive_with_alias() {
708 assert_eq!(
709 "cosine".parse::<VectorMetric>().unwrap(),
710 VectorMetric::Cosine
711 );
712 assert_eq!("L2".parse::<VectorMetric>().unwrap(), VectorMetric::L2);
713 assert_eq!("Dot".parse::<VectorMetric>().unwrap(), VectorMetric::Dot);
714 assert_eq!("ip".parse::<VectorMetric>().unwrap(), VectorMetric::Dot);
715 assert!("hamming".parse::<VectorMetric>().is_err());
716 }
717
718 #[test]
719 fn quantization_from_str_is_case_insensitive() {
720 assert_eq!(
721 "float32".parse::<VectorQuantization>().unwrap(),
722 VectorQuantization::Float32
723 );
724 assert_eq!(
725 "Float16".parse::<VectorQuantization>().unwrap(),
726 VectorQuantization::Float16
727 );
728 assert_eq!(
729 "INT8".parse::<VectorQuantization>().unwrap(),
730 VectorQuantization::Int8
731 );
732 assert!("b1".parse::<VectorQuantization>().is_err());
733 }
734
735 #[test]
736 fn upsert_vector_and_search_finds_nearest() {
737 let (_dir, graph) = open_tmp();
738 let a = graph.add_node("N", &json!({})).unwrap();
739 let b = graph.add_node("N", &json!({})).unwrap();
740 let c = graph.add_node("N", &json!({})).unwrap();
741
742 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
743 graph.upsert_vector(b, &[0.0f32, 1.0, 0.0]).unwrap();
744 graph.upsert_vector(c, &[0.0f32, 0.0, 1.0]).unwrap();
745
746 let hits = graph.vector_search(&[1.0f32, 0.0, 0.0], 1).unwrap();
747 assert_eq!(hits.len(), 1);
748 assert_eq!(hits[0].node, a);
749 }
750
751 #[test]
752 fn vector_search_empty_index_returns_empty() {
753 let (_dir, graph) = open_tmp();
754 let hits = graph.vector_search(&[1.0f32, 0.0, 0.0], 5).unwrap();
755 assert!(hits.is_empty());
756 }
757
758 #[test]
759 fn vector_search_k_larger_than_index_returns_all() {
760 let (_dir, graph) = open_tmp();
761 let a = graph.add_node("N", &json!({})).unwrap();
762 let b = graph.add_node("N", &json!({})).unwrap();
763 graph.upsert_vector(a, &[1.0f32, 0.0]).unwrap();
764 graph.upsert_vector(b, &[0.0f32, 1.0]).unwrap();
765
766 let hits = graph.vector_search(&[1.0f32, 0.0], 100).unwrap();
767 assert_eq!(hits.len(), 2);
768 }
769
770 #[test]
771 fn upsert_vector_overwrites_existing_embedding() {
772 let (_dir, graph) = open_tmp();
773 let a = graph.add_node("N", &json!({})).unwrap();
774 let b = graph.add_node("N", &json!({})).unwrap();
775
776 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
777 graph.upsert_vector(b, &[0.0f32, 1.0, 0.0]).unwrap();
778 graph.upsert_vector(a, &[0.0f32, 1.0, 0.0]).unwrap();
779
780 let hits = graph.vector_search(&[0.0f32, 1.0, 0.0], 1).unwrap();
781 assert_eq!(hits.len(), 1);
782 assert!(
783 (hits[0].distance).abs() < 1e-5,
784 "distance to query should be near zero"
785 );
786 }
787
788 #[test]
789 fn vector_index_rebuilds_from_lmdb_on_reopen() {
790 let dir = TempDir::new().unwrap();
791 let a = {
792 let graph = Graph::open(dir.path(), 1).unwrap();
793 let a = graph.add_node("N", &json!({})).unwrap();
794 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
795 a
796 };
797
798 let graph = Graph::open(dir.path(), 1).unwrap();
799 let hits = graph.vector_search(&[1.0f32, 0.0, 0.0], 1).unwrap();
800 assert_eq!(hits.len(), 1);
801 assert_eq!(hits[0].node, a);
802 }
803
804 #[test]
805 fn remove_vector_deletes_from_index_and_lmdb() {
806 let (_dir, graph) = open_tmp();
807 let a = graph.add_node("N", &json!({})).unwrap();
808 let b = graph.add_node("N", &json!({})).unwrap();
809 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
810 graph.upsert_vector(b, &[0.0f32, 1.0, 0.0]).unwrap();
811
812 graph.remove_vector(a).unwrap();
813
814 let hits = graph.vector_search(&[1.0f32, 0.0, 0.0], 2).unwrap();
815 assert!(
816 hits.iter().all(|h| h.node != a),
817 "removed node must not appear in search results"
818 );
819 }
820
821 #[test]
822 fn vector_search_with_label_filter_excludes_other_labels() {
823 let (_dir, graph) = open_tmp();
824 let a = graph.add_node("Article", &json!({})).unwrap();
825 let b = graph.add_node("Person", &json!({})).unwrap();
826 let c = graph.add_node("Article", &json!({})).unwrap();
827 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
828 graph.upsert_vector(b, &[1.0f32, 0.0, 0.0]).unwrap(); graph.upsert_vector(c, &[0.9f32, 0.1, 0.0]).unwrap();
830
831 let opts = VectorSearchOptions {
832 k: 3,
833 label: Some("Article".into()),
834 properties: None,
835 rescore_factor: None,
836 };
837 let hits = graph
838 .vector_search_with(&[1.0f32, 0.0, 0.0], &opts)
839 .unwrap();
840 assert!(
842 hits.iter().all(|h| h.node != b),
843 "Person node must be filtered out"
844 );
845 assert!(hits.len() <= 2);
846 assert!(hits.iter().any(|h| h.node == a));
847 }
848
849 #[test]
850 fn vector_search_with_selective_property_filter_finds_distant_matches() {
851 let (_dir, graph) = open_tmp();
857 for i in 0..200u32 {
859 let n = graph.add_node("N", &json!({ "team": "red" })).unwrap();
860 let jitter = (i as f32) * 1e-4;
861 graph.upsert_vector(n, &[1.0, jitter, 0.0]).unwrap();
862 }
863 let blue1 = graph.add_node("N", &json!({ "team": "blue" })).unwrap();
865 let blue2 = graph.add_node("N", &json!({ "team": "blue" })).unwrap();
866 graph.upsert_vector(blue1, &[0.6, 0.8, 0.0]).unwrap();
867 graph.upsert_vector(blue2, &[0.5, 0.85, 0.0]).unwrap();
868
869 let mut filters = std::collections::HashMap::new();
870 filters.insert("team".to_string(), json!("blue"));
871 let opts = VectorSearchOptions {
872 k: 2,
873 label: None,
874 properties: Some(filters),
875 rescore_factor: None,
876 };
877 let hits = graph
878 .vector_search_with(&[1.0f32, 0.0, 0.0], &opts)
879 .unwrap();
880
881 assert_eq!(hits.len(), 2, "both blue matches must be returned");
882 assert!(hits.iter().any(|h| h.node == blue1));
883 assert!(hits.iter().any(|h| h.node == blue2));
884 }
885
886 #[test]
887 fn rejected_upsert_does_not_persist_and_brick_reopen() {
888 let dir = TempDir::new().unwrap();
892 let a = {
893 let graph = Graph::open(dir.path(), 1).unwrap();
894 let a = graph.add_node("N", &json!({})).unwrap();
895 let b = graph.add_node("N", &json!({})).unwrap();
896 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
897 let bad = graph.upsert_vector(b, &[1.0f32, 0.0]);
899 assert!(matches!(bad, Err(VectorError::DimensionMismatch { .. })));
900 a
901 };
902
903 let graph = Graph::open(dir.path(), 1).unwrap();
905 let hits = graph.vector_search(&[1.0f32, 0.0, 0.0], 1).unwrap();
906 assert_eq!(hits.len(), 1);
907 assert_eq!(hits[0].node, a);
908 }
909
910 #[test]
911 fn configure_vector_index_persists_metric_across_reopen() {
912 let dir = TempDir::new().unwrap();
913 let a = {
914 let graph = Graph::open(dir.path(), 1).unwrap();
915 graph
916 .configure_vector_index(VectorIndexOptions {
917 metric: VectorMetric::L2,
918 quantization: VectorQuantization::Float32,
919 })
920 .unwrap();
921 let a = graph.add_node("N", &json!({})).unwrap();
922 let b = graph.add_node("N", &json!({})).unwrap();
923 graph.upsert_vector(a, &[0.0f32, 0.0]).unwrap();
924 graph.upsert_vector(b, &[5.0f32, 5.0]).unwrap();
925 a
926 };
927
928 let graph = Graph::open(dir.path(), 1).unwrap();
930 let hits = graph.vector_search(&[0.1f32, 0.1], 1).unwrap();
931 assert_eq!(hits.len(), 1);
932 assert_eq!(
933 hits[0].node, a,
934 "nearest under L2 must be the origin vector"
935 );
936 }
937
938 #[test]
939 fn configure_vector_index_idempotent_with_same_options() {
940 let (_dir, graph) = open_tmp();
941 let opts = VectorIndexOptions {
942 metric: VectorMetric::Dot,
943 quantization: VectorQuantization::Float16,
944 };
945 graph.configure_vector_index(opts).unwrap();
946 let a = graph.add_node("N", &json!({})).unwrap();
947 graph.upsert_vector(a, &[1.0f32, 0.0]).unwrap();
948 graph.configure_vector_index(opts).unwrap();
950 }
951
952 #[test]
953 fn configure_vector_index_rejects_change_after_vectors_exist() {
954 let (_dir, graph) = open_tmp();
955 graph
956 .configure_vector_index(VectorIndexOptions {
957 metric: VectorMetric::Cosine,
958 quantization: VectorQuantization::Float32,
959 })
960 .unwrap();
961 let a = graph.add_node("N", &json!({})).unwrap();
962 graph.upsert_vector(a, &[1.0f32, 0.0]).unwrap();
963
964 let changed = graph.configure_vector_index(VectorIndexOptions {
965 metric: VectorMetric::L2,
966 quantization: VectorQuantization::Float32,
967 });
968 assert!(matches!(
969 changed,
970 Err(VectorError::AlreadyConfigured { .. })
971 ));
972 }
973
974 #[test]
975 fn reindex_vector_index_switches_metric_on_populated_graph() {
976 let dir = TempDir::new().unwrap();
977 let (a, b) = {
978 let graph = Graph::open(dir.path(), 1).unwrap();
979 let a = graph.add_node("N", &json!({})).unwrap();
981 let b = graph.add_node("N", &json!({})).unwrap();
982 graph.upsert_vector(a, &[0.0f32, 0.0]).unwrap();
983 graph.upsert_vector(b, &[5.0f32, 5.0]).unwrap();
984
985 let refused = graph.configure_vector_index(VectorIndexOptions {
987 metric: VectorMetric::L2,
988 quantization: VectorQuantization::Float32,
989 });
990 assert!(matches!(
991 refused,
992 Err(VectorError::AlreadyConfigured { .. })
993 ));
994
995 graph
997 .reindex_vector_index(VectorIndexOptions {
998 metric: VectorMetric::L2,
999 quantization: VectorQuantization::Float32,
1000 })
1001 .unwrap();
1002 (a, b)
1003 };
1004
1005 let graph = Graph::open(dir.path(), 1).unwrap();
1007 let hits = graph.vector_search(&[0.1f32, 0.1], 2).unwrap();
1008 assert_eq!(hits[0].node, a, "origin is nearest under L2");
1009 assert!(hits.iter().any(|h| h.node == b));
1010 }
1011
1012 #[test]
1013 fn vector_cache_is_reused_across_searches() {
1014 let (_dir, graph) = open_tmp();
1015 let a = graph.add_node("N", &json!({})).unwrap();
1016 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
1017
1018 let h1 = graph.vector_search(&[1.0f32, 0.0, 0.0], 1).unwrap();
1020 let h2 = graph.vector_search(&[1.0f32, 0.0, 0.0], 1).unwrap();
1021 assert_eq!(h1.len(), 1);
1022 assert_eq!(h2.len(), 1);
1023 assert_eq!(h1[0].node, h2[0].node);
1024 }
1025
1026 #[test]
1027 fn test_concurrent_vector_searches() {
1028 let (_dir, graph) = open_tmp();
1029 let a = graph.add_node("N", &json!({})).unwrap();
1030 graph.upsert_vector(a, &[1.0f32, 0.0, 0.0]).unwrap();
1031
1032 let graph = Arc::new(graph);
1033 let mut handles = vec![];
1034 for _ in 0..10 {
1035 let g = Arc::clone(&graph);
1036 let target_node = a;
1037 handles.push(std::thread::spawn(move || {
1038 let hits = g.vector_search(&[1.0f32, 0.0, 0.0], 1).unwrap();
1039 assert_eq!(hits.len(), 1);
1040 assert_eq!(hits[0].node, target_node);
1041 }));
1042 }
1043
1044 for h in handles {
1045 h.join().unwrap();
1046 }
1047 }
1048
1049 #[test]
1050 fn vector_search_with_int8_quantization_finds_nearest() {
1051 let (_dir, graph) = open_tmp();
1054 graph
1055 .configure_vector_index(VectorIndexOptions {
1056 metric: VectorMetric::Cosine,
1057 quantization: VectorQuantization::Int8,
1058 })
1059 .unwrap();
1060 let a = graph.add_node("N", &json!({})).unwrap();
1061 let b = graph.add_node("N", &json!({})).unwrap();
1062 let c = graph.add_node("N", &json!({})).unwrap();
1063 graph.upsert_vector(a, &[1.0, 0.0, 0.0]).unwrap();
1064 graph.upsert_vector(b, &[0.0, 1.0, 0.0]).unwrap();
1065 graph.upsert_vector(c, &[0.0, 0.0, 1.0]).unwrap();
1066
1067 let hits = graph.vector_search(&[1.0, 0.0, 0.0], 1).unwrap();
1068 assert_eq!(hits.len(), 1);
1069 assert_eq!(hits[0].node, a);
1070 }
1071
1072 #[test]
1073 fn vector_search_with_multiple_property_filters_requires_all() {
1074 let (_dir, graph) = open_tmp();
1078 let near = graph
1079 .add_node("N", &json!({ "team": "blue", "role": "ic" }))
1080 .unwrap();
1081 let far = graph
1082 .add_node("N", &json!({ "team": "blue", "role": "lead" }))
1083 .unwrap();
1084 graph.upsert_vector(near, &[1.0, 0.0, 0.0]).unwrap();
1085 graph.upsert_vector(far, &[0.9, 0.1, 0.0]).unwrap();
1086
1087 let mut filters = std::collections::HashMap::new();
1088 filters.insert("team".to_string(), json!("blue"));
1089 filters.insert("role".to_string(), json!("lead"));
1090 let opts = VectorSearchOptions {
1091 k: 2,
1092 label: None,
1093 properties: Some(filters),
1094 rescore_factor: None,
1095 };
1096 let hits = graph.vector_search_with(&[1.0, 0.0, 0.0], &opts).unwrap();
1097
1098 assert_eq!(hits.len(), 1);
1100 assert_eq!(hits[0].node, far);
1101 }
1102
1103 #[test]
1104 fn vector_search_quantized_rescore() {
1105 let (_dir, graph) = open_tmp();
1106 graph
1107 .configure_vector_index(VectorIndexOptions {
1108 metric: VectorMetric::Cosine,
1109 quantization: VectorQuantization::Int8,
1110 })
1111 .unwrap();
1112
1113 let n1 = graph.add_node("N", &json!({})).unwrap();
1114 let n2 = graph.add_node("N", &json!({})).unwrap();
1115
1116 graph.upsert_vector(n1, &[0.9, 0.1]).unwrap();
1117 graph.upsert_vector(n2, &[0.95, 0.05]).unwrap();
1118
1119 let query = &[1.0, 0.0];
1120
1121 let opts = VectorSearchOptions {
1123 k: 2,
1124 rescore_factor: Some(2),
1125 ..Default::default()
1126 };
1127 let hits = graph.vector_search_with(query, &opts).unwrap();
1128 assert_eq!(hits.len(), 2);
1129 assert_eq!(hits[0].node, n2);
1130 assert_eq!(hits[1].node, n1);
1131
1132 assert!(hits[0].distance < hits[1].distance);
1134 }
1135}