1use std::{
2 borrow::Cow,
3 collections::HashMap,
4 path::Path,
5 sync::{
6 atomic::{AtomicUsize, Ordering},
7 Mutex, RwLock,
8 },
9};
10
11use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
12
13use crate::{
14 error::MemoryError,
15 health::SubsystemReporter,
16 types::{validate_name, Scope, ScopeFilter},
17};
18
19const INITIAL_INDEX_CAPACITY: usize = 1024;
24
25struct RawSearchResults {
26 keys: Vec<u64>,
27 distances: Vec<f32>,
28}
29
30type RawIndexError = Box<dyn std::error::Error + Send + Sync>;
31
32trait RawIndex: Send + Sync + Sized {
38 fn create(dimensions: usize) -> Result<Self, RawIndexError>;
39 fn add(&self, key: u64, vector: &[f32]) -> Result<(), RawIndexError>;
40 fn remove(&self, key: u64) -> Result<(), RawIndexError>;
41 fn search(&self, query: &[f32], count: usize) -> Result<RawSearchResults, RawIndexError>;
42 fn save(&self, path: &str) -> Result<(), RawIndexError>;
43 fn reserve(&self, capacity: usize) -> Result<(), RawIndexError>;
44 fn size(&self) -> usize;
45 fn capacity(&self) -> usize;
46}
47
48struct UsearchRawIndex {
53 inner: Index,
54}
55
56impl RawIndex for UsearchRawIndex {
57 fn create(dimensions: usize) -> Result<Self, RawIndexError> {
58 let options = IndexOptions {
59 dimensions,
60 metric: MetricKind::Cos,
61 quantization: ScalarKind::F32,
62 ..Default::default()
63 };
64 let inner = Index::new(&options)?;
65 inner.reserve(INITIAL_INDEX_CAPACITY)?;
66 Ok(Self { inner })
67 }
68
69 fn add(&self, key: u64, vector: &[f32]) -> Result<(), RawIndexError> {
70 self.inner.add(key, vector).map_err(|e| e.into())
71 }
72
73 fn remove(&self, key: u64) -> Result<(), RawIndexError> {
74 self.inner.remove(key).map(|_| ()).map_err(|e| e.into())
75 }
76
77 fn search(&self, query: &[f32], count: usize) -> Result<RawSearchResults, RawIndexError> {
78 let m = self.inner.search(query, count)?;
79 Ok(RawSearchResults {
80 keys: m.keys,
81 distances: m.distances,
82 })
83 }
84
85 fn save(&self, path: &str) -> Result<(), RawIndexError> {
86 self.inner.save(path).map_err(|e| e.into())
87 }
88
89 fn reserve(&self, capacity: usize) -> Result<(), RawIndexError> {
90 self.inner.reserve(capacity).map_err(|e| e.into())
91 }
92
93 fn size(&self) -> usize {
94 self.inner.size()
95 }
96
97 fn capacity(&self) -> usize {
98 self.inner.capacity()
99 }
100}
101
102struct VectorState<R: RawIndex> {
108 index: R,
109 key_map: HashMap<u64, String>,
111 name_map: HashMap<String, u64>,
113 next_key: u64,
115 commit_sha: Option<String>,
117}
118
119struct VectorIndex<R: RawIndex = UsearchRawIndex> {
124 state: Mutex<VectorState<R>>,
125 entry_count: AtomicUsize,
126}
127
128impl VectorIndex<UsearchRawIndex> {
129 fn load(path: &Path) -> Result<Self, MemoryError> {
131 let path_str = path.to_str().ok_or_else(|| MemoryError::InvalidInput {
132 reason: "non-UTF-8 index path".to_string(),
133 })?;
134
135 let options = IndexOptions {
139 dimensions: 1, metric: MetricKind::Cos,
141 quantization: ScalarKind::F32,
142 ..Default::default()
143 };
144 let inner = Index::new(&options)
145 .map_err(|e| MemoryError::Index(format!("init for load: {}", e)))?;
146 inner
147 .load(path_str)
148 .map_err(|e| MemoryError::Index(format!("load: {}", e)))?;
149
150 let keys_path = format!("{}.keys.json", path_str);
152 let (key_map, next_key, commit_sha): (HashMap<u64, String>, u64, Option<String>) =
153 if std::path::Path::new(&keys_path).exists() {
154 let json = std::fs::read_to_string(&keys_path)?;
155 let value: serde_json::Value = serde_json::from_str(&json)
157 .map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
158 if value.is_object() && value.get("key_map").is_some() {
159 let km: HashMap<u64, String> = serde_json::from_value(value["key_map"].clone())
160 .map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
161 let nk: u64 = value["next_key"]
162 .as_u64()
163 .unwrap_or_else(|| km.keys().max().map(|k| k + 1).unwrap_or(0));
164 let sha: Option<String> = value
165 .get("commit_sha")
166 .and_then(|v| v.as_str())
167 .map(|s| s.to_string());
168 (km, nk, sha)
169 } else {
170 let km: HashMap<u64, String> = serde_json::from_value(value)
172 .map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
173 let nk = km.keys().max().map(|k| k + 1).unwrap_or(0);
174 (km, nk, None)
175 }
176 } else {
177 (HashMap::new(), 0, None)
178 };
179
180 let name_map: HashMap<String, u64> = key_map.iter().map(|(&k, v)| (v.clone(), k)).collect();
181 if key_map.len() != name_map.len() {
182 tracing::warn!(
183 key_map_len = key_map.len(),
184 name_map_len = name_map.len(),
185 "key_map and name_map have different sizes; index may contain duplicate names"
186 );
187 }
188
189 let count = key_map.len();
190 Ok(Self {
191 state: Mutex::new(VectorState {
192 index: UsearchRawIndex { inner },
193 key_map,
194 name_map,
195 next_key,
196 commit_sha,
197 }),
198 entry_count: AtomicUsize::new(count),
199 })
200 }
201}
202
203fn raw_err(e: RawIndexError) -> MemoryError {
206 MemoryError::Index(e.to_string())
207}
208
209impl<R: RawIndex> VectorIndex<R> {
210 fn new(dimensions: usize) -> Result<Self, MemoryError> {
211 let index = R::create(dimensions).map_err(raw_err)?;
212 Ok(Self {
213 state: Mutex::new(VectorState {
214 index,
215 key_map: HashMap::new(),
216 name_map: HashMap::new(),
217 next_key: 0,
218 commit_sha: None,
219 }),
220 entry_count: AtomicUsize::new(0),
221 })
222 }
223
224 fn grow_if_needed_inner(state: &VectorState<R>, additional: usize) -> Result<(), MemoryError> {
225 let current_capacity = state.index.capacity();
226 let current_size = state.index.size();
227 if current_size + additional > current_capacity {
228 let new_capacity = (current_capacity + additional).max(current_capacity * 2);
229 state.index.reserve(new_capacity).map_err(raw_err)?;
230 }
231 Ok(())
232 }
233
234 fn find_key_by_name(&self, name: &str) -> Option<u64> {
236 let state = self
237 .state
238 .lock()
239 .expect("lock poisoned — prior panic corrupted state");
240 state.name_map.get(name).copied()
241 }
242
243 fn add_with_next_key(&self, vector: &[f32], name: String) -> Result<u64, MemoryError> {
246 let mut state = self
247 .state
248 .lock()
249 .expect("lock poisoned — prior panic corrupted state");
250 Self::grow_if_needed_inner(&state, 1)?;
251 let key = state.next_key;
252 state.index.add(key, vector).map_err(raw_err)?;
253 state.name_map.insert(name.clone(), key);
254 state.key_map.insert(key, name);
255 state.next_key = state
256 .next_key
257 .checked_add(1)
258 .expect("vector key space exhausted");
259 self.entry_count
260 .store(state.key_map.len(), Ordering::Relaxed);
261 Ok(key)
262 }
263
264 fn search(&self, query: &[f32], limit: usize) -> Result<Vec<(u64, String, f32)>, MemoryError> {
268 let state = self
269 .state
270 .lock()
271 .expect("lock poisoned — prior panic corrupted state");
272 let raw = state.index.search(query, limit).map_err(raw_err)?;
273
274 let results = raw
275 .keys
276 .into_iter()
277 .zip(raw.distances)
278 .filter_map(|(key, dist)| {
279 state
280 .key_map
281 .get(&key)
282 .map(|name| (key, name.clone(), dist))
283 })
284 .collect();
285 Ok(results)
286 }
287
288 fn remove(&self, key: u64) -> Result<(), MemoryError> {
290 let mut state = self
291 .state
292 .lock()
293 .expect("lock poisoned — prior panic corrupted state");
294 state.index.remove(key).map_err(raw_err)?;
295 if let Some(name) = state.key_map.remove(&key) {
296 if state.name_map.get(&name).copied() == Some(key) {
299 state.name_map.remove(&name);
300 }
301 self.entry_count
302 .store(state.key_map.len(), Ordering::Relaxed);
303 }
304 Ok(())
305 }
306
307 fn rollback_add(&self, new_key: u64, old_key: Option<u64>, name: &str) {
315 let mut state = self
316 .state
317 .lock()
318 .expect("lock poisoned — prior panic corrupted state");
319 if let Err(e) = state.index.remove(new_key) {
321 tracing::warn!(error = %e, "rollback: raw index remove failed");
322 }
323 state.key_map.remove(&new_key);
325 if let Some(old) = old_key {
327 state.name_map.insert(name.to_owned(), old);
328 } else {
329 state.name_map.remove(name);
330 }
331 self.entry_count
332 .store(state.key_map.len(), Ordering::Relaxed);
333 }
334
335 fn key_count(&self) -> usize {
337 self.entry_count.load(Ordering::Relaxed)
338 }
339
340 fn commit_sha(&self) -> Option<String> {
342 let state = self
343 .state
344 .lock()
345 .expect("lock poisoned — prior panic corrupted state");
346 state.commit_sha.clone()
347 }
348
349 fn set_commit_sha(&self, sha: Option<&str>) {
351 let mut state = self
352 .state
353 .lock()
354 .expect("lock poisoned — prior panic corrupted state");
355 state.commit_sha = sha.map(|s| s.to_owned());
356 }
357
358 fn save(&self, path: &Path) -> Result<(), MemoryError> {
360 let path_str = path.to_str().ok_or_else(|| MemoryError::InvalidInput {
361 reason: "non-UTF-8 index path".to_string(),
362 })?;
363
364 let state = self
365 .state
366 .lock()
367 .expect("lock poisoned — prior panic corrupted state");
368 state.index.save(path_str).map_err(raw_err)?;
369
370 let keys_path = format!("{}.keys.json", path_str);
372 let payload = serde_json::json!({
373 "key_map": &state.key_map,
374 "next_key": state.next_key,
375 "commit_sha": state.commit_sha,
376 });
377 let json = serde_json::to_string(&payload)
378 .map_err(|e| MemoryError::Index(format!("keymap serialise: {}", e)))?;
379 std::fs::write(&keys_path, json)?;
380
381 Ok(())
382 }
383}
384
385#[non_exhaustive]
396pub struct UsearchStore {
397 inner: UsearchStoreInner<UsearchRawIndex>,
398 reporter: SubsystemReporter,
399}
400
401struct UsearchStoreInner<R: RawIndex> {
403 scopes: RwLock<HashMap<Scope, VectorIndex<R>>>,
405 all: VectorIndex<R>,
407 dimensions: usize,
409}
410
411impl UsearchStore {
418 pub fn new(dimensions: usize) -> Result<Self, MemoryError> {
423 Self::new_with_reporter(dimensions, SubsystemReporter::new())
424 }
425
426 pub fn new_with_reporter(
428 dimensions: usize,
429 reporter: SubsystemReporter,
430 ) -> Result<Self, MemoryError> {
431 let global = VectorIndex::new(dimensions)?;
432 let all = VectorIndex::new(dimensions)?;
433 let mut scopes = HashMap::new();
434 scopes.insert(Scope::Global, global);
435 Ok(Self {
436 inner: UsearchStoreInner {
437 scopes: RwLock::new(scopes),
438 all,
439 dimensions,
440 },
441 reporter,
442 })
443 }
444
445 pub fn load(dir: &Path, dimensions: usize) -> Result<Self, MemoryError> {
450 Self::load_with_reporter(dir, dimensions, SubsystemReporter::new())
451 }
452
453 pub fn load_with_reporter(
455 dir: &Path,
456 dimensions: usize,
457 reporter: SubsystemReporter,
458 ) -> Result<Self, MemoryError> {
459 let span = tracing::info_span!("index.load", key_count = tracing::field::Empty,);
460 let _enter = span.enter();
461
462 let dirty_marker = dir.join(".save-in-progress");
467 if dirty_marker.exists() {
468 tracing::warn!("detected interrupted index save — discarding indexes");
469 let _ = std::fs::remove_file(&dirty_marker);
470 return Self::new_with_reporter(dimensions, reporter);
471 }
472
473 let all_path = dir.join("all").join("index.usearch");
475 let all = if all_path.exists() {
476 VectorIndex::load(&all_path)?
477 } else {
478 VectorIndex::new(dimensions)?
479 };
480
481 let mut scopes: HashMap<Scope, VectorIndex<UsearchRawIndex>> = HashMap::new();
482
483 let global_path = dir.join("global").join("index.usearch");
485 let global = if global_path.exists() {
486 VectorIndex::load(&global_path)?
487 } else {
488 VectorIndex::new(dimensions)?
489 };
490 scopes.insert(Scope::Global, global);
491
492 let projects_dir = dir.join("projects");
494 if projects_dir.is_dir() {
495 let entries = std::fs::read_dir(&projects_dir)
496 .map_err(|e| MemoryError::Index(format!("read projects dir: {}", e)))?;
497 for entry in entries {
498 let entry =
499 entry.map_err(|e| MemoryError::Index(format!("read dir entry: {}", e)))?;
500 let path = entry.path();
501 if path.is_dir() {
502 let project_name = path
503 .file_name()
504 .and_then(|n| n.to_str())
505 .map(|s| s.to_string())
506 .ok_or_else(|| {
507 MemoryError::Index("non-UTF-8 project directory name".to_string())
508 })?;
509 if let Err(e) = validate_name(&project_name) {
510 tracing::warn!(
511 project_name = %project_name,
512 error = %e,
513 "skipping project index with invalid name"
514 );
515 continue;
516 }
517 let index_path = path.join("index.usearch");
518 if index_path.exists() {
519 let idx = VectorIndex::load(&index_path)?;
520 scopes.insert(Scope::Project(project_name), idx);
521 }
522 }
523 }
524 }
525
526 let key_count = all.key_count();
527 span.record("key_count", key_count);
528
529 Ok(Self {
530 inner: UsearchStoreInner {
531 scopes: RwLock::new(scopes),
532 all,
533 dimensions,
534 },
535 reporter,
536 })
537 }
538}
539
540impl<R: RawIndex> UsearchStoreInner<R> {
542 fn add(
543 &self,
544 scope: &Scope,
545 vector: &[f32],
546 qualified_name: String,
547 ) -> Result<u64, MemoryError> {
548 let dimensions = vector.len();
549 let span = tracing::debug_span!(
550 "index.add",
551 scope = %scope.dir_prefix(),
552 dimensions,
553 key_count = tracing::field::Empty,
554 );
555 let _enter = span.enter();
556
557 if vector.len() != self.dimensions {
558 return Err(MemoryError::InvalidInput {
559 reason: format!(
560 "expected {} dimensions, got {}",
561 self.dimensions,
562 vector.len()
563 ),
564 });
565 }
566
567 let mut scopes = self.scopes.write().expect("scopes lock poisoned");
571
572 if !scopes.contains_key(scope) {
574 scopes.insert(scope.clone(), Self::new_index(self.dimensions)?);
575 }
576
577 let scope_idx = scopes
578 .get(scope)
579 .expect("scope index must exist after insert");
580
581 let old_scope_key = scope_idx.find_key_by_name(&qualified_name);
583 let old_all_key = self.all.find_key_by_name(&qualified_name);
584
585 let new_scope_key = scope_idx.add_with_next_key(vector, qualified_name.clone())?;
587
588 let all_key = match self.all.add_with_next_key(vector, qualified_name.clone()) {
591 Ok(key) => key,
592 Err(e) => {
593 scope_idx.rollback_add(new_scope_key, old_scope_key, &qualified_name);
594 return Err(e);
595 }
596 };
597
598 if let Some(key) = old_scope_key {
600 let _ = scope_idx.remove(key);
601 }
602 if let Some(key) = old_all_key {
603 let _ = self.all.remove(key);
604 }
605
606 span.record("key_count", self.all.key_count());
608
609 Ok(all_key)
610 }
611
612 fn remove(&self, scope: &Scope, qualified_name: &str) -> Result<(), MemoryError> {
613 let _span = tracing::debug_span!(
614 "index.remove",
615 scope = %scope.dir_prefix(),
616 )
617 .entered();
618
619 let scopes = self.scopes.write().expect("scopes lock poisoned");
621
622 if let Some(scope_idx) = scopes.get(scope) {
624 if let Some(key) = scope_idx.find_key_by_name(qualified_name) {
625 if let Err(e) = scope_idx.remove(key) {
626 tracing::warn!(
627 qualified_name = %qualified_name,
628 error = %e,
629 "scope index removal failed; continuing to all-index"
630 );
631 }
632 }
633 }
634
635 if let Some(key) = self.all.find_key_by_name(qualified_name) {
637 if let Err(e) = self.all.remove(key) {
638 tracing::warn!(
639 qualified_name = %qualified_name,
640 error = %e,
641 "all-index removal failed"
642 );
643 }
644 }
645
646 Ok(())
647 }
648
649 fn search(
650 &self,
651 filter: &ScopeFilter,
652 query: &[f32],
653 limit: usize,
654 ) -> Result<Vec<(u64, String, f32)>, MemoryError> {
655 let dimensions = query.len();
656 let scope_str: Cow<'_, str> = match filter {
657 ScopeFilter::GlobalOnly => "global".into(),
658 ScopeFilter::All => "all".into(),
659 ScopeFilter::ProjectAndGlobal(p) => format!("project+global:{p}").into(),
660 };
661 let span = tracing::debug_span!(
662 "index.search",
663 scope = %scope_str,
664 dimensions,
665 key_count = self.all.key_count(),
666 count = tracing::field::Empty,
667 );
668 let _enter = span.enter();
669
670 if query.len() != self.dimensions {
671 return Err(MemoryError::InvalidInput {
672 reason: format!(
673 "expected {} dimensions, got {}",
674 self.dimensions,
675 query.len()
676 ),
677 });
678 }
679
680 let results = match filter {
681 ScopeFilter::All => self.all.search(query, limit),
682
683 ScopeFilter::GlobalOnly => {
684 let scopes = self.scopes.read().expect("scopes lock poisoned");
685 match scopes.get(&Scope::Global) {
686 Some(global_idx) => global_idx.search(query, limit),
687 None => Ok(Vec::new()),
688 }
689 }
690
691 ScopeFilter::ProjectAndGlobal(project_name) => {
692 let scopes = self.scopes.read().expect("scopes lock poisoned");
693 let project_scope = Scope::Project(project_name.clone());
694
695 let mut combined: Vec<(u64, String, f32)> = Vec::new();
696
697 if let Some(global_idx) = scopes.get(&Scope::Global) {
698 let mut global_results = global_idx.search(query, limit)?;
699 combined.append(&mut global_results);
700 }
701
702 if let Some(proj_idx) = scopes.get(&project_scope) {
703 let mut proj_results = proj_idx.search(query, limit)?;
704 combined.append(&mut proj_results);
705 }
706
707 let mut seen = std::collections::HashSet::new();
709 combined.retain(|(_, name, _)| seen.insert(name.clone()));
710 combined.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
712 combined.truncate(limit);
713 Ok(combined)
714 }
715 };
716 if let Ok(ref r) = results {
717 span.record("count", r.len());
718 }
719 results
720 }
721
722 fn find_key_by_name(&self, qualified_name: &str) -> Option<u64> {
723 self.all.find_key_by_name(qualified_name)
724 }
725
726 fn save(&self, dir: &Path) -> Result<(), MemoryError> {
727 let span = tracing::debug_span!("index.save", key_count = tracing::field::Empty,);
728 let _enter = span.enter();
729
730 std::fs::create_dir_all(dir)?;
731
732 let marker = dir.join(".save-in-progress");
735 std::fs::write(&marker, b"")?;
736
737 let result = (|| -> Result<(), MemoryError> {
738 let scopes = self.scopes.read().expect("scopes lock poisoned");
740
741 let all_dir = dir.join("all");
743 std::fs::create_dir_all(&all_dir)?;
744 self.all.save(&all_dir.join("index.usearch"))?;
745
746 for (scope, idx) in scopes.iter() {
748 let scope_dir = dir.join(scope.dir_prefix());
749 std::fs::create_dir_all(&scope_dir)?;
750 idx.save(&scope_dir.join("index.usearch"))?;
751 }
752
753 let key_count = self.all.key_count();
755 span.record("key_count", key_count);
756
757 Ok(())
759 })();
760
761 let _ = std::fs::remove_file(&marker);
764
765 result
766 }
767
768 fn commit_sha(&self) -> Option<String> {
769 self.all.commit_sha()
770 }
771
772 fn set_commit_sha(&self, sha: Option<&str>) {
773 let scopes = self.scopes.read().expect("scopes lock poisoned");
774 self.all.set_commit_sha(sha);
775 for idx in scopes.values() {
776 idx.set_commit_sha(sha);
777 }
778 }
779}
780
781impl<R: RawIndex> UsearchStoreInner<R> {
782 fn new_index(dimensions: usize) -> Result<VectorIndex<R>, MemoryError> {
783 VectorIndex::new(dimensions)
784 }
785}
786
787impl crate::index::sealed::Sealed for UsearchStore {}
792
793impl crate::index::VectorStore for UsearchStore {
794 fn add(
795 &self,
796 scope: &Scope,
797 vector: &[f32],
798 qualified_name: String,
799 ) -> Result<u64, MemoryError> {
800 let result = self.inner.add(scope, vector, qualified_name);
801 match &result {
802 Ok(_) => self.reporter.report_ok(),
803 Err(_) => self.reporter.report_err("index add failed"),
804 }
805 result
806 }
807
808 fn remove(&self, scope: &Scope, qualified_name: &str) -> Result<(), MemoryError> {
809 let result = self.inner.remove(scope, qualified_name);
810 match &result {
811 Ok(_) => self.reporter.report_ok(),
812 Err(_) => self.reporter.report_err("index remove failed"),
813 }
814 result
815 }
816
817 fn search(
818 &self,
819 filter: &ScopeFilter,
820 query: &[f32],
821 limit: usize,
822 ) -> Result<Vec<(u64, String, f32)>, MemoryError> {
823 let result = self.inner.search(filter, query, limit);
824 match &result {
825 Ok(_) => self.reporter.report_ok(),
826 Err(_) => self.reporter.report_err("index search failed"),
827 }
828 result
829 }
830
831 fn find_by_name(&self, qualified_name: &str) -> Option<u64> {
832 self.inner.find_key_by_name(qualified_name)
833 }
834
835 fn save(&self, dir: &Path) -> Result<(), MemoryError> {
836 self.inner.save(dir)
837 }
838
839 fn is_ready(&self) -> bool {
840 true
841 }
842
843 fn dimensions(&self) -> usize {
844 self.inner.dimensions
845 }
846
847 fn commit_sha(&self) -> Option<String> {
848 self.inner.commit_sha()
849 }
850
851 fn set_commit_sha(&self, sha: Option<&str>) {
852 self.inner.set_commit_sha(sha)
853 }
854}
855
856#[cfg(test)]
861mod tests {
862 use super::*;
863 use crate::index::VectorStore;
864
865 #[derive(Debug, Clone, Copy, PartialEq)]
871 enum FailOn {
872 Add,
873 Remove,
874 Search,
875 Save,
876 Reserve,
877 None,
878 }
879
880 struct FailingRawIndex {
882 inner: Index,
884 fail_on: FailOn,
886 call_count: Mutex<usize>,
888 fail_after: usize,
890 }
891
892 impl FailingRawIndex {
893 fn new(dimensions: usize, fail_on: FailOn, fail_after: usize) -> Self {
894 let options = IndexOptions {
895 dimensions,
896 metric: MetricKind::Cos,
897 quantization: ScalarKind::F32,
898 ..Default::default()
899 };
900 let inner = Index::new(&options).expect("create failing index");
901 inner.reserve(1024).expect("reserve");
902 Self {
903 inner,
904 fail_on,
905 call_count: Mutex::new(0),
906 fail_after,
907 }
908 }
909
910 fn should_fail(&self, op: FailOn) -> bool {
911 if self.fail_on != op {
912 return false;
913 }
914 let mut count = self.call_count.lock().unwrap();
915 if self.fail_after == 0 || *count >= self.fail_after {
916 return true;
917 }
918 *count += 1;
919 false
920 }
921
922 fn injected_error(op: &str) -> RawIndexError {
923 format!("injected {op} failure").into()
924 }
925 }
926
927 impl RawIndex for FailingRawIndex {
928 fn create(dimensions: usize) -> Result<Self, RawIndexError> {
929 Ok(FailingRawIndex::new(dimensions, FailOn::None, 0))
930 }
931
932 fn add(&self, key: u64, vector: &[f32]) -> Result<(), RawIndexError> {
933 if self.should_fail(FailOn::Add) {
934 return Err(Self::injected_error("add"));
935 }
936 self.inner.add(key, vector).map_err(|e| e.into())
937 }
938
939 fn remove(&self, key: u64) -> Result<(), RawIndexError> {
940 if self.should_fail(FailOn::Remove) {
941 return Err(Self::injected_error("remove"));
942 }
943 self.inner.remove(key).map(|_| ()).map_err(|e| e.into())
944 }
945
946 fn search(&self, query: &[f32], count: usize) -> Result<RawSearchResults, RawIndexError> {
947 if self.should_fail(FailOn::Search) {
948 return Err(Self::injected_error("search"));
949 }
950 let m = self.inner.search(query, count)?;
951 Ok(RawSearchResults {
952 keys: m.keys,
953 distances: m.distances,
954 })
955 }
956
957 fn save(&self, path: &str) -> Result<(), RawIndexError> {
958 if self.should_fail(FailOn::Save) {
959 return Err(Self::injected_error("save"));
960 }
961 self.inner.save(path).map_err(|e| e.into())
962 }
963
964 fn reserve(&self, capacity: usize) -> Result<(), RawIndexError> {
965 if self.should_fail(FailOn::Reserve) {
966 return Err(Self::injected_error("reserve"));
967 }
968 self.inner.reserve(capacity).map_err(|e| e.into())
969 }
970
971 fn size(&self) -> usize {
972 self.inner.size()
973 }
974
975 fn capacity(&self) -> usize {
976 self.inner.capacity()
977 }
978 }
979
980 fn make_failing_index(
982 dimensions: usize,
983 fail_on: FailOn,
984 fail_after: usize,
985 ) -> VectorIndex<FailingRawIndex> {
986 VectorIndex {
987 state: Mutex::new(VectorState {
988 index: FailingRawIndex::new(dimensions, fail_on, fail_after),
989 key_map: HashMap::new(),
990 name_map: HashMap::new(),
991 next_key: 0,
992 commit_sha: None,
993 }),
994 entry_count: AtomicUsize::new(0),
995 }
996 }
997
998 struct FailableStore {
1001 inner: UsearchStoreInner<FailingRawIndex>,
1002 }
1003
1004 fn make_failable_store(
1005 dimensions: usize,
1006 all_fail_on: FailOn,
1007 all_fail_after: usize,
1008 ) -> FailableStore {
1009 let all = make_failing_index(dimensions, all_fail_on, all_fail_after);
1010 let scope = make_failing_index(dimensions, FailOn::None, 0);
1011 let mut scopes = HashMap::new();
1012 scopes.insert(Scope::Global, scope);
1013 FailableStore {
1014 inner: UsearchStoreInner {
1015 scopes: RwLock::new(scopes),
1016 all,
1017 dimensions,
1018 },
1019 }
1020 }
1021
1022 fn make_index() -> VectorIndex {
1027 VectorIndex::new(4).expect("failed to create index")
1028 }
1029
1030 fn dummy_vec() -> Vec<f32> {
1031 vec![1.0, 0.0, 0.0, 0.0]
1032 }
1033
1034 #[test]
1037 fn remove_old_key_does_not_clobber_upserted_name_map_entry() {
1038 let index = make_index();
1039 let v = dummy_vec();
1040
1041 let old_key = index
1042 .add_with_next_key(&v, "global/foo".to_string())
1043 .expect("first add failed");
1044 let new_key = index
1045 .add_with_next_key(&v, "global/foo".to_string())
1046 .expect("second add failed");
1047
1048 assert_ne!(old_key, new_key, "keys must differ");
1049
1050 index.remove(old_key).expect("remove failed");
1051
1052 assert_eq!(
1053 index.find_key_by_name("global/foo"),
1054 Some(new_key),
1055 "name_map entry for new_key was incorrectly removed"
1056 );
1057 }
1058
1059 #[test]
1060 fn remove_only_key_clears_name_map() {
1061 let index = make_index();
1062 let v = dummy_vec();
1063
1064 let key = index
1065 .add_with_next_key(&v, "global/bar".to_string())
1066 .expect("add failed");
1067
1068 index.remove(key).expect("remove failed");
1069
1070 assert_eq!(
1071 index.find_key_by_name("global/bar"),
1072 None,
1073 "name_map entry should have been cleared"
1074 );
1075 }
1076
1077 fn make_store() -> UsearchStore {
1082 UsearchStore::new(8).expect("failed to create UsearchStore")
1083 }
1084
1085 fn vec_a() -> Vec<f32> {
1086 vec![1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
1087 }
1088
1089 fn vec_b() -> Vec<f32> {
1090 vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
1091 }
1092
1093 fn vec_c() -> Vec<f32> {
1094 vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
1095 }
1096
1097 #[test]
1098 fn usearch_store_add_inserts_into_scope_and_all() {
1099 let si: &dyn VectorStore = &make_store();
1100 let scope = Scope::Global;
1101 let name = "global/memory-a".to_string();
1102
1103 si.add(&scope, &vec_a(), name.clone()).expect("add failed");
1104
1105 assert!(si.find_by_name(&name).is_some(), "should be in all-index");
1106
1107 let results = si
1108 .search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
1109 .expect("search failed");
1110 assert!(
1111 results.iter().any(|(_, n, _)| n == &name),
1112 "should be found in global search"
1113 );
1114 }
1115
1116 #[test]
1117 fn usearch_store_remove_removes_from_both() {
1118 let si: &dyn VectorStore = &make_store();
1119 let scope = Scope::Global;
1120 let name = "global/memory-rm".to_string();
1121
1122 si.add(&scope, &vec_a(), name.clone()).expect("add failed");
1123 assert!(si.find_by_name(&name).is_some(), "should exist");
1124
1125 si.remove(&scope, &name).expect("remove failed");
1126
1127 assert!(
1128 si.find_by_name(&name).is_none(),
1129 "should be gone from all-index"
1130 );
1131
1132 let results = si
1133 .search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
1134 .expect("search failed");
1135 assert!(
1136 !results.iter().any(|(_, n, _)| n == &name),
1137 "should not appear in global search after removal"
1138 );
1139 }
1140
1141 #[test]
1142 fn usearch_store_search_global_only() {
1143 let si: &dyn VectorStore = &make_store();
1144 let proj = Scope::Project("myproj".to_string());
1145
1146 si.add(&Scope::Global, &vec_a(), "global/mem-global".to_string())
1147 .expect("add global failed");
1148 si.add(&proj, &vec_b(), "projects/myproj/mem-proj".to_string())
1149 .expect("add project failed");
1150
1151 let results = si
1152 .search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
1153 .expect("search failed");
1154
1155 let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
1156 assert!(
1157 names.contains(&"global/mem-global"),
1158 "should contain global"
1159 );
1160 assert!(
1161 !names.contains(&"projects/myproj/mem-proj"),
1162 "should NOT contain project memory"
1163 );
1164 }
1165
1166 #[test]
1167 fn usearch_store_search_project_and_global() {
1168 let si: &dyn VectorStore = &make_store();
1169 let proj_a = Scope::Project("alpha".to_string());
1170 let proj_b = Scope::Project("beta".to_string());
1171
1172 si.add(&Scope::Global, &vec_a(), "global/g1".to_string())
1173 .expect("add global failed");
1174 si.add(&proj_a, &vec_b(), "projects/alpha/a1".to_string())
1175 .expect("add alpha failed");
1176 si.add(&proj_b, &vec_c(), "projects/beta/b1".to_string())
1177 .expect("add beta failed");
1178
1179 let results = si
1180 .search(
1181 &ScopeFilter::ProjectAndGlobal("alpha".to_string()),
1182 &vec_a(),
1183 10,
1184 )
1185 .expect("search failed");
1186
1187 let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
1188 assert!(names.contains(&"global/g1"), "should contain global");
1189 assert!(names.contains(&"projects/alpha/a1"), "should contain alpha");
1190 assert!(
1191 !names.contains(&"projects/beta/b1"),
1192 "should NOT contain beta"
1193 );
1194 }
1195
1196 #[test]
1197 fn usearch_store_search_all() {
1198 let si: &dyn VectorStore = &make_store();
1199 let proj = Scope::Project("foo".to_string());
1200
1201 si.add(&Scope::Global, &vec_a(), "global/x".to_string())
1202 .expect("add global");
1203 si.add(&proj, &vec_b(), "projects/foo/y".to_string())
1204 .expect("add project");
1205
1206 let results = si
1207 .search(&ScopeFilter::All, &vec_a(), 10)
1208 .expect("search failed");
1209
1210 let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
1211 assert!(names.contains(&"global/x"), "all should include global");
1212 assert!(
1213 names.contains(&"projects/foo/y"),
1214 "all should include project"
1215 );
1216 }
1217
1218 #[test]
1219 fn usearch_store_upsert_replaces_old_entry() {
1220 let si: &dyn VectorStore = &make_store();
1221 let name = "global/memo".to_string();
1222 si.add(&Scope::Global, &vec_a(), name.clone()).unwrap();
1223 si.add(&Scope::Global, &vec_b(), name.clone()).unwrap();
1224 let results = si.search(&ScopeFilter::All, &vec_b(), 10).unwrap();
1225 assert_eq!(
1226 results.iter().filter(|(_, n, _)| n == &name).count(),
1227 1,
1228 "upsert should leave exactly one entry for the name"
1229 );
1230 }
1231
1232 #[test]
1233 fn usearch_store_dirty_marker_discards_indexes() {
1234 let dir = tempfile::tempdir().expect("tempdir");
1235 let si = UsearchStore::new(8).expect("create");
1236 let store: &dyn VectorStore = &si;
1237 store
1238 .add(&Scope::Global, &vec_a(), "global/test-mem".to_string())
1239 .expect("add");
1240 store.set_commit_sha(Some("abc123"));
1241 store.save(dir.path()).expect("save");
1242
1243 std::fs::write(dir.path().join(".save-in-progress"), b"").unwrap();
1244
1245 let loaded = UsearchStore::load(dir.path(), 8).expect("load");
1246 let loaded: &dyn VectorStore = &loaded;
1247 assert!(
1248 loaded.commit_sha().is_none(),
1249 "dirty marker should result in no SHA"
1250 );
1251 assert!(
1252 loaded.find_by_name("global/test-mem").is_none(),
1253 "dirty marker should discard all indexed data"
1254 );
1255 assert!(
1256 !dir.path().join(".save-in-progress").exists(),
1257 "marker should be cleaned up"
1258 );
1259 }
1260
1261 #[test]
1262 fn usearch_store_save_load_round_trip() {
1263 let dir = tempfile::tempdir().expect("tempdir");
1264 let si = UsearchStore::new(8).expect("create");
1265 let store: &dyn VectorStore = &si;
1266 let proj = Scope::Project("rtrip".to_string());
1267
1268 store
1269 .add(&Scope::Global, &vec_a(), "global/rt-global".to_string())
1270 .expect("add global");
1271 store
1272 .add(&proj, &vec_b(), "projects/rtrip/rt-proj".to_string())
1273 .expect("add project");
1274
1275 store.save(dir.path()).expect("save failed");
1276
1277 let loaded = UsearchStore::load(dir.path(), 8).expect("load failed");
1278 let loaded: &dyn VectorStore = &loaded;
1279
1280 assert!(
1281 loaded.find_by_name("global/rt-global").is_some(),
1282 "global memory should survive round-trip"
1283 );
1284 assert!(
1285 loaded.find_by_name("projects/rtrip/rt-proj").is_some(),
1286 "project memory should survive round-trip"
1287 );
1288
1289 let results = loaded
1290 .search(
1291 &ScopeFilter::ProjectAndGlobal("rtrip".to_string()),
1292 &vec_a(),
1293 10,
1294 )
1295 .expect("search failed");
1296 let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
1297 assert!(names.contains(&"global/rt-global"));
1298 assert!(names.contains(&"projects/rtrip/rt-proj"));
1299 }
1300
1301 #[test]
1302 fn usearch_store_same_short_name_different_scopes_coexist() {
1303 let si: &dyn VectorStore = &make_store();
1304 si.add(&Scope::Global, &vec_a(), "global/foo".to_string())
1305 .unwrap();
1306 si.add(
1307 &Scope::Project("p".into()),
1308 &vec_b(),
1309 "projects/p/foo".to_string(),
1310 )
1311 .unwrap();
1312 assert!(si.find_by_name("global/foo").is_some());
1313 assert!(si.find_by_name("projects/p/foo").is_some());
1314 assert_ne!(
1315 si.find_by_name("global/foo"),
1316 si.find_by_name("projects/p/foo"),
1317 "different scopes should have distinct keys"
1318 );
1319 }
1320
1321 #[test]
1326 fn tc03_failing_raw_index_is_injectable() {
1327 let idx = make_failing_index(4, FailOn::Add, 0);
1328 let v = vec![1.0_f32, 0.0, 0.0, 0.0];
1329 let result = idx.add_with_next_key(&v, "test/name".to_string());
1330 assert!(
1331 result.is_err(),
1332 "FailingRawIndex with FailOn::Add should return error"
1333 );
1334 }
1335
1336 #[test]
1341 fn tc04a_rollback_on_all_index_failure_scope_count_unchanged() {
1342 let fs = make_failable_store(8, FailOn::Add, 0);
1344
1345 let scope = Scope::Global;
1346 let name = "global/rollback-test".to_string();
1347
1348 let result = fs.inner.add(&scope, &vec_a(), name.clone());
1350 assert!(result.is_err(), "add should fail when all-index fails");
1351
1352 let scopes = fs.inner.scopes.read().unwrap();
1354 let scope_idx = scopes.get(&scope).expect("global scope must exist");
1355 assert_eq!(
1356 scope_idx.key_count(),
1357 0,
1358 "TC-04a: scope index entry count should be 0 after rollback (was: {})",
1359 scope_idx.key_count()
1360 );
1361 }
1362
1363 #[test]
1368 fn tc04b_rollback_does_not_corrupt_existing_entries() {
1369 let fs = make_failable_store(8, FailOn::Add, 1);
1371
1372 let scope = Scope::Global;
1373
1374 let first_name = "global/existing".to_string();
1376 fs.inner
1377 .add(&scope, &vec_a(), first_name.clone())
1378 .expect("first add should succeed");
1379
1380 let second_name = "global/failing".to_string();
1382 let result = fs.inner.add(&scope, &vec_b(), second_name.clone());
1383 assert!(result.is_err(), "second add should fail");
1384
1385 assert!(
1387 fs.inner.find_key_by_name(&first_name).is_some(),
1388 "TC-04b: existing entry should not be corrupted by rollback"
1389 );
1390 assert!(
1391 fs.inner.find_key_by_name(&second_name).is_none(),
1392 "TC-04b: failed entry should not be in the index"
1393 );
1394 }
1395
1396 #[test]
1401 fn tc04c_upsert_rollback_preserves_original_entry() {
1402 let fs = make_failable_store(8, FailOn::Add, 1);
1406
1407 let scope = Scope::Global;
1408 let name = "global/upsert-rollback".to_string();
1409
1410 let original_key = fs
1412 .inner
1413 .add(&scope, &vec_a(), name.clone())
1414 .expect("TC-04c: first add should succeed");
1415
1416 let result = fs.inner.add(&scope, &vec_b(), name.clone());
1418 assert!(
1419 result.is_err(),
1420 "TC-04c: second add should fail when all-index fails"
1421 );
1422
1423 let key_after = fs.inner.find_key_by_name(&name);
1425 assert_eq!(
1426 key_after,
1427 Some(original_key),
1428 "TC-04c: original entry must survive upsert rollback (expected key {original_key}, got {key_after:?})"
1429 );
1430 }
1431
1432 #[test]
1437 fn tc05a_errors_are_memory_error_variants() {
1438 let idx = make_failing_index(4, FailOn::Add, 0);
1439 let v = vec![1.0_f32, 0.0, 0.0, 0.0];
1440 let err = idx
1441 .add_with_next_key(&v, "test/name".to_string())
1442 .unwrap_err();
1443 assert!(
1445 matches!(err, MemoryError::Index(_)),
1446 "TC-05a: error should be MemoryError::Index, got: {:?}",
1447 err
1448 );
1449 }
1450
1451 #[test]
1456 fn tc05b_error_display_has_no_raw_usearch_type_names() {
1457 let idx = make_failing_index(4, FailOn::Add, 0);
1458 let v = vec![1.0_f32, 0.0, 0.0, 0.0];
1459 let err = idx
1460 .add_with_next_key(&v, "test/name".to_string())
1461 .unwrap_err();
1462 let display = format!("{}", err);
1463 assert!(
1465 display.contains("index error"),
1466 "TC-05b: display should contain 'index error', got: {}",
1467 display
1468 );
1469 assert!(
1470 !display.contains("usearch") && !display.contains("cxx::Exception"),
1471 "TC-05b: display must not leak raw backend type names, got: {}",
1472 display
1473 );
1474 }
1475
1476 #[test]
1481 fn tc05b_dimension_mismatch_error_is_clean() {
1482 let store = UsearchStore::new(8).expect("create");
1483 let wrong_dims = vec![1.0_f32, 0.0, 0.0]; let err = store
1485 .inner
1486 .add(&Scope::Global, &wrong_dims, "global/bad-dims".to_string())
1487 .unwrap_err();
1488 let display = format!("{}", err);
1489 assert!(
1490 !display.contains("usearch") && !display.contains("cxx::Exception"),
1491 "error display must not leak backend type names, got: {}",
1492 display
1493 );
1494 assert!(
1495 matches!(err, MemoryError::InvalidInput { .. }),
1496 "dimension mismatch should return InvalidInput, got: {:?}",
1497 err
1498 );
1499 }
1500
1501 #[test]
1506 fn tc06a_usearch_store_is_ready() {
1507 let store = UsearchStore::new(4).expect("create");
1508 assert!(
1509 store.is_ready(),
1510 "TC-06a: UsearchStore::is_ready() should return true"
1511 );
1512 }
1513}