use std::{
borrow::Cow,
collections::HashMap,
path::Path,
sync::{
atomic::{AtomicUsize, Ordering},
Mutex, RwLock,
},
};
use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
use crate::{
error::MemoryError,
health::SubsystemReporter,
types::{validate_name, Scope, ScopeFilter},
};
const INITIAL_INDEX_CAPACITY: usize = 1024;
struct RawSearchResults {
keys: Vec<u64>,
distances: Vec<f32>,
}
type RawIndexError = Box<dyn std::error::Error + Send + Sync>;
trait RawIndex: Send + Sync + Sized {
fn create(dimensions: usize) -> Result<Self, RawIndexError>;
fn add(&self, key: u64, vector: &[f32]) -> Result<(), RawIndexError>;
fn remove(&self, key: u64) -> Result<(), RawIndexError>;
fn search(&self, query: &[f32], count: usize) -> Result<RawSearchResults, RawIndexError>;
fn save(&self, path: &str) -> Result<(), RawIndexError>;
fn reserve(&self, capacity: usize) -> Result<(), RawIndexError>;
fn size(&self) -> usize;
fn capacity(&self) -> usize;
}
struct UsearchRawIndex {
inner: Index,
}
impl RawIndex for UsearchRawIndex {
fn create(dimensions: usize) -> Result<Self, RawIndexError> {
let options = IndexOptions {
dimensions,
metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let inner = Index::new(&options)?;
inner.reserve(INITIAL_INDEX_CAPACITY)?;
Ok(Self { inner })
}
fn add(&self, key: u64, vector: &[f32]) -> Result<(), RawIndexError> {
self.inner.add(key, vector).map_err(|e| e.into())
}
fn remove(&self, key: u64) -> Result<(), RawIndexError> {
self.inner.remove(key).map(|_| ()).map_err(|e| e.into())
}
fn search(&self, query: &[f32], count: usize) -> Result<RawSearchResults, RawIndexError> {
let m = self.inner.search(query, count)?;
Ok(RawSearchResults {
keys: m.keys,
distances: m.distances,
})
}
fn save(&self, path: &str) -> Result<(), RawIndexError> {
self.inner.save(path).map_err(|e| e.into())
}
fn reserve(&self, capacity: usize) -> Result<(), RawIndexError> {
self.inner.reserve(capacity).map_err(|e| e.into())
}
fn size(&self) -> usize {
self.inner.size()
}
fn capacity(&self) -> usize {
self.inner.capacity()
}
}
struct VectorState<R: RawIndex> {
index: R,
key_map: HashMap<u64, String>,
name_map: HashMap<String, u64>,
next_key: u64,
commit_sha: Option<String>,
}
struct VectorIndex<R: RawIndex = UsearchRawIndex> {
state: Mutex<VectorState<R>>,
entry_count: AtomicUsize,
}
impl VectorIndex<UsearchRawIndex> {
fn load(path: &Path) -> Result<Self, MemoryError> {
let path_str = path.to_str().ok_or_else(|| MemoryError::InvalidInput {
reason: "non-UTF-8 index path".to_string(),
})?;
let options = IndexOptions {
dimensions: 1, metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let inner = Index::new(&options)
.map_err(|e| MemoryError::Index(format!("init for load: {}", e)))?;
inner
.load(path_str)
.map_err(|e| MemoryError::Index(format!("load: {}", e)))?;
let keys_path = format!("{}.keys.json", path_str);
let (key_map, next_key, commit_sha): (HashMap<u64, String>, u64, Option<String>) =
if std::path::Path::new(&keys_path).exists() {
let json = std::fs::read_to_string(&keys_path)?;
let value: serde_json::Value = serde_json::from_str(&json)
.map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
if value.is_object() && value.get("key_map").is_some() {
let km: HashMap<u64, String> = serde_json::from_value(value["key_map"].clone())
.map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
let nk: u64 = value["next_key"]
.as_u64()
.unwrap_or_else(|| km.keys().max().map(|k| k + 1).unwrap_or(0));
let sha: Option<String> = value
.get("commit_sha")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
(km, nk, sha)
} else {
let km: HashMap<u64, String> = serde_json::from_value(value)
.map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
let nk = km.keys().max().map(|k| k + 1).unwrap_or(0);
(km, nk, None)
}
} else {
(HashMap::new(), 0, None)
};
let name_map: HashMap<String, u64> = key_map.iter().map(|(&k, v)| (v.clone(), k)).collect();
if key_map.len() != name_map.len() {
tracing::warn!(
key_map_len = key_map.len(),
name_map_len = name_map.len(),
"key_map and name_map have different sizes; index may contain duplicate names"
);
}
let count = key_map.len();
Ok(Self {
state: Mutex::new(VectorState {
index: UsearchRawIndex { inner },
key_map,
name_map,
next_key,
commit_sha,
}),
entry_count: AtomicUsize::new(count),
})
}
}
fn raw_err(e: RawIndexError) -> MemoryError {
MemoryError::Index(e.to_string())
}
impl<R: RawIndex> VectorIndex<R> {
fn new(dimensions: usize) -> Result<Self, MemoryError> {
let index = R::create(dimensions).map_err(raw_err)?;
Ok(Self {
state: Mutex::new(VectorState {
index,
key_map: HashMap::new(),
name_map: HashMap::new(),
next_key: 0,
commit_sha: None,
}),
entry_count: AtomicUsize::new(0),
})
}
fn grow_if_needed_inner(state: &VectorState<R>, additional: usize) -> Result<(), MemoryError> {
let current_capacity = state.index.capacity();
let current_size = state.index.size();
if current_size + additional > current_capacity {
let new_capacity = (current_capacity + additional).max(current_capacity * 2);
state.index.reserve(new_capacity).map_err(raw_err)?;
}
Ok(())
}
fn find_key_by_name(&self, name: &str) -> Option<u64> {
let state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
state.name_map.get(name).copied()
}
fn add_with_next_key(&self, vector: &[f32], name: String) -> Result<u64, MemoryError> {
let mut state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
Self::grow_if_needed_inner(&state, 1)?;
let key = state.next_key;
state.index.add(key, vector).map_err(raw_err)?;
state.name_map.insert(name.clone(), key);
state.key_map.insert(key, name);
state.next_key = state
.next_key
.checked_add(1)
.expect("vector key space exhausted");
self.entry_count
.store(state.key_map.len(), Ordering::Relaxed);
Ok(key)
}
fn search(&self, query: &[f32], limit: usize) -> Result<Vec<(u64, String, f32)>, MemoryError> {
let state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
let raw = state.index.search(query, limit).map_err(raw_err)?;
let results = raw
.keys
.into_iter()
.zip(raw.distances)
.filter_map(|(key, dist)| {
state
.key_map
.get(&key)
.map(|name| (key, name.clone(), dist))
})
.collect();
Ok(results)
}
fn remove(&self, key: u64) -> Result<(), MemoryError> {
let mut state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
state.index.remove(key).map_err(raw_err)?;
if let Some(name) = state.key_map.remove(&key) {
if state.name_map.get(&name).copied() == Some(key) {
state.name_map.remove(&name);
}
self.entry_count
.store(state.key_map.len(), Ordering::Relaxed);
}
Ok(())
}
fn rollback_add(&self, new_key: u64, old_key: Option<u64>, name: &str) {
let mut state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
if let Err(e) = state.index.remove(new_key) {
tracing::warn!(error = %e, "rollback: raw index remove failed");
}
state.key_map.remove(&new_key);
if let Some(old) = old_key {
state.name_map.insert(name.to_owned(), old);
} else {
state.name_map.remove(name);
}
self.entry_count
.store(state.key_map.len(), Ordering::Relaxed);
}
fn key_count(&self) -> usize {
self.entry_count.load(Ordering::Relaxed)
}
fn commit_sha(&self) -> Option<String> {
let state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
state.commit_sha.clone()
}
fn set_commit_sha(&self, sha: Option<&str>) {
let mut state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
state.commit_sha = sha.map(|s| s.to_owned());
}
fn save(&self, path: &Path) -> Result<(), MemoryError> {
let path_str = path.to_str().ok_or_else(|| MemoryError::InvalidInput {
reason: "non-UTF-8 index path".to_string(),
})?;
let state = self
.state
.lock()
.expect("lock poisoned — prior panic corrupted state");
state.index.save(path_str).map_err(raw_err)?;
let keys_path = format!("{}.keys.json", path_str);
let payload = serde_json::json!({
"key_map": &state.key_map,
"next_key": state.next_key,
"commit_sha": state.commit_sha,
});
let json = serde_json::to_string(&payload)
.map_err(|e| MemoryError::Index(format!("keymap serialise: {}", e)))?;
std::fs::write(&keys_path, json)?;
Ok(())
}
}
#[non_exhaustive]
pub struct UsearchStore {
inner: UsearchStoreInner<UsearchRawIndex>,
reporter: SubsystemReporter,
}
struct UsearchStoreInner<R: RawIndex> {
scopes: RwLock<HashMap<Scope, VectorIndex<R>>>,
all: VectorIndex<R>,
dimensions: usize,
}
impl UsearchStore {
pub fn new(dimensions: usize) -> Result<Self, MemoryError> {
Self::new_with_reporter(dimensions, SubsystemReporter::new())
}
pub fn new_with_reporter(
dimensions: usize,
reporter: SubsystemReporter,
) -> Result<Self, MemoryError> {
let global = VectorIndex::new(dimensions)?;
let all = VectorIndex::new(dimensions)?;
let mut scopes = HashMap::new();
scopes.insert(Scope::Global, global);
Ok(Self {
inner: UsearchStoreInner {
scopes: RwLock::new(scopes),
all,
dimensions,
},
reporter,
})
}
pub fn load(dir: &Path, dimensions: usize) -> Result<Self, MemoryError> {
Self::load_with_reporter(dir, dimensions, SubsystemReporter::new())
}
pub fn load_with_reporter(
dir: &Path,
dimensions: usize,
reporter: SubsystemReporter,
) -> Result<Self, MemoryError> {
let span = tracing::info_span!("index.load", key_count = tracing::field::Empty,);
let _enter = span.enter();
let dirty_marker = dir.join(".save-in-progress");
if dirty_marker.exists() {
tracing::warn!("detected interrupted index save — discarding indexes");
let _ = std::fs::remove_file(&dirty_marker);
return Self::new_with_reporter(dimensions, reporter);
}
let all_path = dir.join("all").join("index.usearch");
let all = if all_path.exists() {
VectorIndex::load(&all_path)?
} else {
VectorIndex::new(dimensions)?
};
let mut scopes: HashMap<Scope, VectorIndex<UsearchRawIndex>> = HashMap::new();
let global_path = dir.join("global").join("index.usearch");
let global = if global_path.exists() {
VectorIndex::load(&global_path)?
} else {
VectorIndex::new(dimensions)?
};
scopes.insert(Scope::Global, global);
let projects_dir = dir.join("projects");
if projects_dir.is_dir() {
let entries = std::fs::read_dir(&projects_dir)
.map_err(|e| MemoryError::Index(format!("read projects dir: {}", e)))?;
for entry in entries {
let entry =
entry.map_err(|e| MemoryError::Index(format!("read dir entry: {}", e)))?;
let path = entry.path();
if path.is_dir() {
let project_name = path
.file_name()
.and_then(|n| n.to_str())
.map(|s| s.to_string())
.ok_or_else(|| {
MemoryError::Index("non-UTF-8 project directory name".to_string())
})?;
if let Err(e) = validate_name(&project_name) {
tracing::warn!(
project_name = %project_name,
error = %e,
"skipping project index with invalid name"
);
continue;
}
let index_path = path.join("index.usearch");
if index_path.exists() {
let idx = VectorIndex::load(&index_path)?;
scopes.insert(Scope::Project(project_name), idx);
}
}
}
}
let key_count = all.key_count();
span.record("key_count", key_count);
Ok(Self {
inner: UsearchStoreInner {
scopes: RwLock::new(scopes),
all,
dimensions,
},
reporter,
})
}
}
impl<R: RawIndex> UsearchStoreInner<R> {
fn add(
&self,
scope: &Scope,
vector: &[f32],
qualified_name: String,
) -> Result<u64, MemoryError> {
let dimensions = vector.len();
let span = tracing::debug_span!(
"index.add",
scope = %scope.dir_prefix(),
dimensions,
key_count = tracing::field::Empty,
);
let _enter = span.enter();
if vector.len() != self.dimensions {
return Err(MemoryError::InvalidInput {
reason: format!(
"expected {} dimensions, got {}",
self.dimensions,
vector.len()
),
});
}
let mut scopes = self.scopes.write().expect("scopes lock poisoned");
if !scopes.contains_key(scope) {
scopes.insert(scope.clone(), Self::new_index(self.dimensions)?);
}
let scope_idx = scopes
.get(scope)
.expect("scope index must exist after insert");
let old_scope_key = scope_idx.find_key_by_name(&qualified_name);
let old_all_key = self.all.find_key_by_name(&qualified_name);
let new_scope_key = scope_idx.add_with_next_key(vector, qualified_name.clone())?;
let all_key = match self.all.add_with_next_key(vector, qualified_name.clone()) {
Ok(key) => key,
Err(e) => {
scope_idx.rollback_add(new_scope_key, old_scope_key, &qualified_name);
return Err(e);
}
};
if let Some(key) = old_scope_key {
let _ = scope_idx.remove(key);
}
if let Some(key) = old_all_key {
let _ = self.all.remove(key);
}
span.record("key_count", self.all.key_count());
Ok(all_key)
}
fn remove(&self, scope: &Scope, qualified_name: &str) -> Result<(), MemoryError> {
let _span = tracing::debug_span!(
"index.remove",
scope = %scope.dir_prefix(),
)
.entered();
let scopes = self.scopes.write().expect("scopes lock poisoned");
if let Some(scope_idx) = scopes.get(scope) {
if let Some(key) = scope_idx.find_key_by_name(qualified_name) {
if let Err(e) = scope_idx.remove(key) {
tracing::warn!(
qualified_name = %qualified_name,
error = %e,
"scope index removal failed; continuing to all-index"
);
}
}
}
if let Some(key) = self.all.find_key_by_name(qualified_name) {
if let Err(e) = self.all.remove(key) {
tracing::warn!(
qualified_name = %qualified_name,
error = %e,
"all-index removal failed"
);
}
}
Ok(())
}
fn search(
&self,
filter: &ScopeFilter,
query: &[f32],
limit: usize,
) -> Result<Vec<(u64, String, f32)>, MemoryError> {
let dimensions = query.len();
let scope_str: Cow<'_, str> = match filter {
ScopeFilter::GlobalOnly => "global".into(),
ScopeFilter::All => "all".into(),
ScopeFilter::ProjectAndGlobal(p) => format!("project+global:{p}").into(),
};
let span = tracing::debug_span!(
"index.search",
scope = %scope_str,
dimensions,
key_count = self.all.key_count(),
count = tracing::field::Empty,
);
let _enter = span.enter();
if query.len() != self.dimensions {
return Err(MemoryError::InvalidInput {
reason: format!(
"expected {} dimensions, got {}",
self.dimensions,
query.len()
),
});
}
let results = match filter {
ScopeFilter::All => self.all.search(query, limit),
ScopeFilter::GlobalOnly => {
let scopes = self.scopes.read().expect("scopes lock poisoned");
match scopes.get(&Scope::Global) {
Some(global_idx) => global_idx.search(query, limit),
None => Ok(Vec::new()),
}
}
ScopeFilter::ProjectAndGlobal(project_name) => {
let scopes = self.scopes.read().expect("scopes lock poisoned");
let project_scope = Scope::Project(project_name.clone());
let mut combined: Vec<(u64, String, f32)> = Vec::new();
if let Some(global_idx) = scopes.get(&Scope::Global) {
let mut global_results = global_idx.search(query, limit)?;
combined.append(&mut global_results);
}
if let Some(proj_idx) = scopes.get(&project_scope) {
let mut proj_results = proj_idx.search(query, limit)?;
combined.append(&mut proj_results);
}
let mut seen = std::collections::HashSet::new();
combined.retain(|(_, name, _)| seen.insert(name.clone()));
combined.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
combined.truncate(limit);
Ok(combined)
}
};
if let Ok(ref r) = results {
span.record("count", r.len());
}
results
}
fn find_key_by_name(&self, qualified_name: &str) -> Option<u64> {
self.all.find_key_by_name(qualified_name)
}
fn save(&self, dir: &Path) -> Result<(), MemoryError> {
let span = tracing::debug_span!("index.save", key_count = tracing::field::Empty,);
let _enter = span.enter();
std::fs::create_dir_all(dir)?;
let marker = dir.join(".save-in-progress");
std::fs::write(&marker, b"")?;
let result = (|| -> Result<(), MemoryError> {
let scopes = self.scopes.read().expect("scopes lock poisoned");
let all_dir = dir.join("all");
std::fs::create_dir_all(&all_dir)?;
self.all.save(&all_dir.join("index.usearch"))?;
for (scope, idx) in scopes.iter() {
let scope_dir = dir.join(scope.dir_prefix());
std::fs::create_dir_all(&scope_dir)?;
idx.save(&scope_dir.join("index.usearch"))?;
}
let key_count = self.all.key_count();
span.record("key_count", key_count);
Ok(())
})();
let _ = std::fs::remove_file(&marker);
result
}
fn commit_sha(&self) -> Option<String> {
self.all.commit_sha()
}
fn set_commit_sha(&self, sha: Option<&str>) {
let scopes = self.scopes.read().expect("scopes lock poisoned");
self.all.set_commit_sha(sha);
for idx in scopes.values() {
idx.set_commit_sha(sha);
}
}
}
impl<R: RawIndex> UsearchStoreInner<R> {
fn new_index(dimensions: usize) -> Result<VectorIndex<R>, MemoryError> {
VectorIndex::new(dimensions)
}
}
impl crate::index::sealed::Sealed for UsearchStore {}
impl crate::index::VectorStore for UsearchStore {
fn add(
&self,
scope: &Scope,
vector: &[f32],
qualified_name: String,
) -> Result<u64, MemoryError> {
let result = self.inner.add(scope, vector, qualified_name);
match &result {
Ok(_) => self.reporter.report_ok(),
Err(_) => self.reporter.report_err("index add failed"),
}
result
}
fn remove(&self, scope: &Scope, qualified_name: &str) -> Result<(), MemoryError> {
let result = self.inner.remove(scope, qualified_name);
match &result {
Ok(_) => self.reporter.report_ok(),
Err(_) => self.reporter.report_err("index remove failed"),
}
result
}
fn search(
&self,
filter: &ScopeFilter,
query: &[f32],
limit: usize,
) -> Result<Vec<(u64, String, f32)>, MemoryError> {
let result = self.inner.search(filter, query, limit);
match &result {
Ok(_) => self.reporter.report_ok(),
Err(_) => self.reporter.report_err("index search failed"),
}
result
}
fn find_by_name(&self, qualified_name: &str) -> Option<u64> {
self.inner.find_key_by_name(qualified_name)
}
fn save(&self, dir: &Path) -> Result<(), MemoryError> {
self.inner.save(dir)
}
fn is_ready(&self) -> bool {
true
}
fn dimensions(&self) -> usize {
self.inner.dimensions
}
fn commit_sha(&self) -> Option<String> {
self.inner.commit_sha()
}
fn set_commit_sha(&self, sha: Option<&str>) {
self.inner.set_commit_sha(sha)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::index::VectorStore;
#[derive(Debug, Clone, Copy, PartialEq)]
enum FailOn {
Add,
Remove,
Search,
Save,
Reserve,
None,
}
struct FailingRawIndex {
inner: Index,
fail_on: FailOn,
call_count: Mutex<usize>,
fail_after: usize,
}
impl FailingRawIndex {
fn new(dimensions: usize, fail_on: FailOn, fail_after: usize) -> Self {
let options = IndexOptions {
dimensions,
metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let inner = Index::new(&options).expect("create failing index");
inner.reserve(1024).expect("reserve");
Self {
inner,
fail_on,
call_count: Mutex::new(0),
fail_after,
}
}
fn should_fail(&self, op: FailOn) -> bool {
if self.fail_on != op {
return false;
}
let mut count = self.call_count.lock().unwrap();
if self.fail_after == 0 || *count >= self.fail_after {
return true;
}
*count += 1;
false
}
fn injected_error(op: &str) -> RawIndexError {
format!("injected {op} failure").into()
}
}
impl RawIndex for FailingRawIndex {
fn create(dimensions: usize) -> Result<Self, RawIndexError> {
Ok(FailingRawIndex::new(dimensions, FailOn::None, 0))
}
fn add(&self, key: u64, vector: &[f32]) -> Result<(), RawIndexError> {
if self.should_fail(FailOn::Add) {
return Err(Self::injected_error("add"));
}
self.inner.add(key, vector).map_err(|e| e.into())
}
fn remove(&self, key: u64) -> Result<(), RawIndexError> {
if self.should_fail(FailOn::Remove) {
return Err(Self::injected_error("remove"));
}
self.inner.remove(key).map(|_| ()).map_err(|e| e.into())
}
fn search(&self, query: &[f32], count: usize) -> Result<RawSearchResults, RawIndexError> {
if self.should_fail(FailOn::Search) {
return Err(Self::injected_error("search"));
}
let m = self.inner.search(query, count)?;
Ok(RawSearchResults {
keys: m.keys,
distances: m.distances,
})
}
fn save(&self, path: &str) -> Result<(), RawIndexError> {
if self.should_fail(FailOn::Save) {
return Err(Self::injected_error("save"));
}
self.inner.save(path).map_err(|e| e.into())
}
fn reserve(&self, capacity: usize) -> Result<(), RawIndexError> {
if self.should_fail(FailOn::Reserve) {
return Err(Self::injected_error("reserve"));
}
self.inner.reserve(capacity).map_err(|e| e.into())
}
fn size(&self) -> usize {
self.inner.size()
}
fn capacity(&self) -> usize {
self.inner.capacity()
}
}
fn make_failing_index(
dimensions: usize,
fail_on: FailOn,
fail_after: usize,
) -> VectorIndex<FailingRawIndex> {
VectorIndex {
state: Mutex::new(VectorState {
index: FailingRawIndex::new(dimensions, fail_on, fail_after),
key_map: HashMap::new(),
name_map: HashMap::new(),
next_key: 0,
commit_sha: None,
}),
entry_count: AtomicUsize::new(0),
}
}
struct FailableStore {
inner: UsearchStoreInner<FailingRawIndex>,
}
fn make_failable_store(
dimensions: usize,
all_fail_on: FailOn,
all_fail_after: usize,
) -> FailableStore {
let all = make_failing_index(dimensions, all_fail_on, all_fail_after);
let scope = make_failing_index(dimensions, FailOn::None, 0);
let mut scopes = HashMap::new();
scopes.insert(Scope::Global, scope);
FailableStore {
inner: UsearchStoreInner {
scopes: RwLock::new(scopes),
all,
dimensions,
},
}
}
fn make_index() -> VectorIndex {
VectorIndex::new(4).expect("failed to create index")
}
fn dummy_vec() -> Vec<f32> {
vec![1.0, 0.0, 0.0, 0.0]
}
#[test]
fn remove_old_key_does_not_clobber_upserted_name_map_entry() {
let index = make_index();
let v = dummy_vec();
let old_key = index
.add_with_next_key(&v, "global/foo".to_string())
.expect("first add failed");
let new_key = index
.add_with_next_key(&v, "global/foo".to_string())
.expect("second add failed");
assert_ne!(old_key, new_key, "keys must differ");
index.remove(old_key).expect("remove failed");
assert_eq!(
index.find_key_by_name("global/foo"),
Some(new_key),
"name_map entry for new_key was incorrectly removed"
);
}
#[test]
fn remove_only_key_clears_name_map() {
let index = make_index();
let v = dummy_vec();
let key = index
.add_with_next_key(&v, "global/bar".to_string())
.expect("add failed");
index.remove(key).expect("remove failed");
assert_eq!(
index.find_key_by_name("global/bar"),
None,
"name_map entry should have been cleared"
);
}
fn make_store() -> UsearchStore {
UsearchStore::new(8).expect("failed to create UsearchStore")
}
fn vec_a() -> Vec<f32> {
vec![1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
}
fn vec_b() -> Vec<f32> {
vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
}
fn vec_c() -> Vec<f32> {
vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
}
#[test]
fn usearch_store_add_inserts_into_scope_and_all() {
let si: &dyn VectorStore = &make_store();
let scope = Scope::Global;
let name = "global/memory-a".to_string();
si.add(&scope, &vec_a(), name.clone()).expect("add failed");
assert!(si.find_by_name(&name).is_some(), "should be in all-index");
let results = si
.search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
.expect("search failed");
assert!(
results.iter().any(|(_, n, _)| n == &name),
"should be found in global search"
);
}
#[test]
fn usearch_store_remove_removes_from_both() {
let si: &dyn VectorStore = &make_store();
let scope = Scope::Global;
let name = "global/memory-rm".to_string();
si.add(&scope, &vec_a(), name.clone()).expect("add failed");
assert!(si.find_by_name(&name).is_some(), "should exist");
si.remove(&scope, &name).expect("remove failed");
assert!(
si.find_by_name(&name).is_none(),
"should be gone from all-index"
);
let results = si
.search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
.expect("search failed");
assert!(
!results.iter().any(|(_, n, _)| n == &name),
"should not appear in global search after removal"
);
}
#[test]
fn usearch_store_search_global_only() {
let si: &dyn VectorStore = &make_store();
let proj = Scope::Project("myproj".to_string());
si.add(&Scope::Global, &vec_a(), "global/mem-global".to_string())
.expect("add global failed");
si.add(&proj, &vec_b(), "projects/myproj/mem-proj".to_string())
.expect("add project failed");
let results = si
.search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
.expect("search failed");
let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
assert!(
names.contains(&"global/mem-global"),
"should contain global"
);
assert!(
!names.contains(&"projects/myproj/mem-proj"),
"should NOT contain project memory"
);
}
#[test]
fn usearch_store_search_project_and_global() {
let si: &dyn VectorStore = &make_store();
let proj_a = Scope::Project("alpha".to_string());
let proj_b = Scope::Project("beta".to_string());
si.add(&Scope::Global, &vec_a(), "global/g1".to_string())
.expect("add global failed");
si.add(&proj_a, &vec_b(), "projects/alpha/a1".to_string())
.expect("add alpha failed");
si.add(&proj_b, &vec_c(), "projects/beta/b1".to_string())
.expect("add beta failed");
let results = si
.search(
&ScopeFilter::ProjectAndGlobal("alpha".to_string()),
&vec_a(),
10,
)
.expect("search failed");
let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
assert!(names.contains(&"global/g1"), "should contain global");
assert!(names.contains(&"projects/alpha/a1"), "should contain alpha");
assert!(
!names.contains(&"projects/beta/b1"),
"should NOT contain beta"
);
}
#[test]
fn usearch_store_search_all() {
let si: &dyn VectorStore = &make_store();
let proj = Scope::Project("foo".to_string());
si.add(&Scope::Global, &vec_a(), "global/x".to_string())
.expect("add global");
si.add(&proj, &vec_b(), "projects/foo/y".to_string())
.expect("add project");
let results = si
.search(&ScopeFilter::All, &vec_a(), 10)
.expect("search failed");
let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
assert!(names.contains(&"global/x"), "all should include global");
assert!(
names.contains(&"projects/foo/y"),
"all should include project"
);
}
#[test]
fn usearch_store_upsert_replaces_old_entry() {
let si: &dyn VectorStore = &make_store();
let name = "global/memo".to_string();
si.add(&Scope::Global, &vec_a(), name.clone()).unwrap();
si.add(&Scope::Global, &vec_b(), name.clone()).unwrap();
let results = si.search(&ScopeFilter::All, &vec_b(), 10).unwrap();
assert_eq!(
results.iter().filter(|(_, n, _)| n == &name).count(),
1,
"upsert should leave exactly one entry for the name"
);
}
#[test]
fn usearch_store_dirty_marker_discards_indexes() {
let dir = tempfile::tempdir().expect("tempdir");
let si = UsearchStore::new(8).expect("create");
let store: &dyn VectorStore = &si;
store
.add(&Scope::Global, &vec_a(), "global/test-mem".to_string())
.expect("add");
store.set_commit_sha(Some("abc123"));
store.save(dir.path()).expect("save");
std::fs::write(dir.path().join(".save-in-progress"), b"").unwrap();
let loaded = UsearchStore::load(dir.path(), 8).expect("load");
let loaded: &dyn VectorStore = &loaded;
assert!(
loaded.commit_sha().is_none(),
"dirty marker should result in no SHA"
);
assert!(
loaded.find_by_name("global/test-mem").is_none(),
"dirty marker should discard all indexed data"
);
assert!(
!dir.path().join(".save-in-progress").exists(),
"marker should be cleaned up"
);
}
#[test]
fn usearch_store_save_load_round_trip() {
let dir = tempfile::tempdir().expect("tempdir");
let si = UsearchStore::new(8).expect("create");
let store: &dyn VectorStore = &si;
let proj = Scope::Project("rtrip".to_string());
store
.add(&Scope::Global, &vec_a(), "global/rt-global".to_string())
.expect("add global");
store
.add(&proj, &vec_b(), "projects/rtrip/rt-proj".to_string())
.expect("add project");
store.save(dir.path()).expect("save failed");
let loaded = UsearchStore::load(dir.path(), 8).expect("load failed");
let loaded: &dyn VectorStore = &loaded;
assert!(
loaded.find_by_name("global/rt-global").is_some(),
"global memory should survive round-trip"
);
assert!(
loaded.find_by_name("projects/rtrip/rt-proj").is_some(),
"project memory should survive round-trip"
);
let results = loaded
.search(
&ScopeFilter::ProjectAndGlobal("rtrip".to_string()),
&vec_a(),
10,
)
.expect("search failed");
let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
assert!(names.contains(&"global/rt-global"));
assert!(names.contains(&"projects/rtrip/rt-proj"));
}
#[test]
fn usearch_store_same_short_name_different_scopes_coexist() {
let si: &dyn VectorStore = &make_store();
si.add(&Scope::Global, &vec_a(), "global/foo".to_string())
.unwrap();
si.add(
&Scope::Project("p".into()),
&vec_b(),
"projects/p/foo".to_string(),
)
.unwrap();
assert!(si.find_by_name("global/foo").is_some());
assert!(si.find_by_name("projects/p/foo").is_some());
assert_ne!(
si.find_by_name("global/foo"),
si.find_by_name("projects/p/foo"),
"different scopes should have distinct keys"
);
}
#[test]
fn tc03_failing_raw_index_is_injectable() {
let idx = make_failing_index(4, FailOn::Add, 0);
let v = vec![1.0_f32, 0.0, 0.0, 0.0];
let result = idx.add_with_next_key(&v, "test/name".to_string());
assert!(
result.is_err(),
"FailingRawIndex with FailOn::Add should return error"
);
}
#[test]
fn tc04a_rollback_on_all_index_failure_scope_count_unchanged() {
let fs = make_failable_store(8, FailOn::Add, 0);
let scope = Scope::Global;
let name = "global/rollback-test".to_string();
let result = fs.inner.add(&scope, &vec_a(), name.clone());
assert!(result.is_err(), "add should fail when all-index fails");
let scopes = fs.inner.scopes.read().unwrap();
let scope_idx = scopes.get(&scope).expect("global scope must exist");
assert_eq!(
scope_idx.key_count(),
0,
"TC-04a: scope index entry count should be 0 after rollback (was: {})",
scope_idx.key_count()
);
}
#[test]
fn tc04b_rollback_does_not_corrupt_existing_entries() {
let fs = make_failable_store(8, FailOn::Add, 1);
let scope = Scope::Global;
let first_name = "global/existing".to_string();
fs.inner
.add(&scope, &vec_a(), first_name.clone())
.expect("first add should succeed");
let second_name = "global/failing".to_string();
let result = fs.inner.add(&scope, &vec_b(), second_name.clone());
assert!(result.is_err(), "second add should fail");
assert!(
fs.inner.find_key_by_name(&first_name).is_some(),
"TC-04b: existing entry should not be corrupted by rollback"
);
assert!(
fs.inner.find_key_by_name(&second_name).is_none(),
"TC-04b: failed entry should not be in the index"
);
}
#[test]
fn tc04c_upsert_rollback_preserves_original_entry() {
let fs = make_failable_store(8, FailOn::Add, 1);
let scope = Scope::Global;
let name = "global/upsert-rollback".to_string();
let original_key = fs
.inner
.add(&scope, &vec_a(), name.clone())
.expect("TC-04c: first add should succeed");
let result = fs.inner.add(&scope, &vec_b(), name.clone());
assert!(
result.is_err(),
"TC-04c: second add should fail when all-index fails"
);
let key_after = fs.inner.find_key_by_name(&name);
assert_eq!(
key_after,
Some(original_key),
"TC-04c: original entry must survive upsert rollback (expected key {original_key}, got {key_after:?})"
);
}
#[test]
fn tc05a_errors_are_memory_error_variants() {
let idx = make_failing_index(4, FailOn::Add, 0);
let v = vec![1.0_f32, 0.0, 0.0, 0.0];
let err = idx
.add_with_next_key(&v, "test/name".to_string())
.unwrap_err();
assert!(
matches!(err, MemoryError::Index(_)),
"TC-05a: error should be MemoryError::Index, got: {:?}",
err
);
}
#[test]
fn tc05b_error_display_has_no_raw_usearch_type_names() {
let idx = make_failing_index(4, FailOn::Add, 0);
let v = vec![1.0_f32, 0.0, 0.0, 0.0];
let err = idx
.add_with_next_key(&v, "test/name".to_string())
.unwrap_err();
let display = format!("{}", err);
assert!(
display.contains("index error"),
"TC-05b: display should contain 'index error', got: {}",
display
);
assert!(
!display.contains("usearch") && !display.contains("cxx::Exception"),
"TC-05b: display must not leak raw backend type names, got: {}",
display
);
}
#[test]
fn tc05b_dimension_mismatch_error_is_clean() {
let store = UsearchStore::new(8).expect("create");
let wrong_dims = vec![1.0_f32, 0.0, 0.0]; let err = store
.inner
.add(&Scope::Global, &wrong_dims, "global/bad-dims".to_string())
.unwrap_err();
let display = format!("{}", err);
assert!(
!display.contains("usearch") && !display.contains("cxx::Exception"),
"error display must not leak backend type names, got: {}",
display
);
assert!(
matches!(err, MemoryError::InvalidInput { .. }),
"dimension mismatch should return InvalidInput, got: {:?}",
err
);
}
#[test]
fn tc06a_usearch_store_is_ready() {
let store = UsearchStore::new(4).expect("create");
assert!(
store.is_ready(),
"TC-06a: UsearchStore::is_ready() should return true"
);
}
}