Skip to main content

memory_mcp/
index.rs

1use std::{
2    collections::HashMap,
3    path::Path,
4    sync::{Mutex, RwLock},
5};
6
7use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
8
9use crate::{
10    error::MemoryError,
11    types::{validate_name, Scope, ScopeFilter},
12};
13
14// ---------------------------------------------------------------------------
15// VectorIndex
16// ---------------------------------------------------------------------------
17
18/// Internal state kept behind the mutex.
19struct VectorState {
20    index: Index,
21    /// Maps usearch u64 keys → memory name strings.
22    key_map: HashMap<u64, String>,
23    /// Reverse map: memory name strings → usearch u64 keys (derived from key_map).
24    name_map: HashMap<String, u64>,
25    /// Monotonic counter used to assign unique vector keys.
26    next_key: u64,
27    /// Commit SHA at the time this index was last saved/loaded.
28    commit_sha: Option<String>,
29}
30
31/// Wraps `usearch::Index` and a key-map behind a single `std::sync::Mutex`.
32///
33/// `usearch::Index` is `Send + Sync`, and `HashMap` is `Send`, so
34/// `VectorIndex` is `Send + Sync` via the mutex.
35pub struct VectorIndex {
36    state: Mutex<VectorState>,
37}
38
39impl VectorIndex {
40    /// Initial capacity reserved when creating a new index.
41    const INITIAL_CAPACITY: usize = 1024;
42
43    /// Create a new HNSW index with cosine metric.
44    pub fn new(dimensions: usize) -> Result<Self, MemoryError> {
45        let options = IndexOptions {
46            dimensions,
47            metric: MetricKind::Cos,
48            quantization: ScalarKind::F32,
49            ..Default::default()
50        };
51        let index =
52            Index::new(&options).map_err(|e| MemoryError::Index(format!("create: {}", e)))?;
53        // usearch requires reserve() before any add() calls.
54        index
55            .reserve(Self::INITIAL_CAPACITY)
56            .map_err(|e| MemoryError::Index(format!("reserve: {}", e)))?;
57        Ok(Self {
58            state: Mutex::new(VectorState {
59                index,
60                key_map: HashMap::new(),
61                name_map: HashMap::new(),
62                next_key: 0,
63                commit_sha: None,
64            }),
65        })
66    }
67
68    /// Grow the index if it doesn't have room for `additional` more vectors.
69    ///
70    /// Operates on an already-locked `VectorState` reference so callers that
71    /// already hold the lock can call this without re-locking.
72    fn grow_if_needed_inner(state: &VectorState, additional: usize) -> Result<(), MemoryError> {
73        let current_capacity = state.index.capacity();
74        let current_size = state.index.size();
75        if current_size + additional > current_capacity {
76            let new_capacity = (current_capacity + additional).max(current_capacity * 2);
77            state
78                .index
79                .reserve(new_capacity)
80                .map_err(|e| MemoryError::Index(format!("reserve: {}", e)))?;
81        }
82        Ok(())
83    }
84
85    /// Ensure the index has capacity for at least `additional` more vectors.
86    pub fn grow_if_needed(&self, additional: usize) -> Result<(), MemoryError> {
87        let state = self
88            .state
89            .lock()
90            .expect("lock poisoned — prior panic corrupted state");
91        Self::grow_if_needed_inner(&state, additional)
92    }
93
94    /// Atomically increment and return the next unique vector key.
95    #[cfg(test)]
96    pub fn next_key(&self) -> u64 {
97        let mut state = self
98            .state
99            .lock()
100            .expect("lock poisoned — prior panic corrupted state");
101        let key = state.next_key;
102        state.next_key += 1;
103        key
104    }
105
106    /// Find the vector key associated with a qualified memory name.
107    pub fn find_key_by_name(&self, name: &str) -> Option<u64> {
108        let state = self
109            .state
110            .lock()
111            .expect("lock poisoned — prior panic corrupted state");
112        state.name_map.get(name).copied()
113    }
114
115    /// Add a vector under the given key, growing the index if necessary.
116    #[cfg(test)]
117    pub fn add(&self, key: u64, vector: &[f32], name: String) -> Result<(), MemoryError> {
118        let mut state = self
119            .state
120            .lock()
121            .expect("lock poisoned — prior panic corrupted state");
122        Self::grow_if_needed_inner(&state, 1)?;
123        state
124            .index
125            .add(key, vector)
126            .map_err(|e| MemoryError::Index(format!("add: {}", e)))?;
127        state.name_map.insert(name.clone(), key);
128        state.key_map.insert(key, name);
129        Ok(())
130    }
131
132    /// Atomically allocate the next key and add the vector in one lock acquisition.
133    /// Returns the assigned key on success. On failure the counter is not advanced.
134    pub fn add_with_next_key(&self, vector: &[f32], name: String) -> Result<u64, MemoryError> {
135        let mut state = self
136            .state
137            .lock()
138            .expect("lock poisoned — prior panic corrupted state");
139        Self::grow_if_needed_inner(&state, 1)?;
140        let key = state.next_key;
141        state
142            .index
143            .add(key, vector)
144            .map_err(|e| MemoryError::Index(format!("add: {}", e)))?;
145        state.name_map.insert(name.clone(), key);
146        state.key_map.insert(key, name);
147        state.next_key = state
148            .next_key
149            .checked_add(1)
150            .expect("vector key space exhausted");
151        Ok(key)
152    }
153
154    /// Search for the `limit` nearest neighbours of `query`.
155    ///
156    /// Returns `(key, distance)` pairs sorted by ascending distance.
157    pub fn search(
158        &self,
159        query: &[f32],
160        limit: usize,
161    ) -> Result<Vec<(u64, String, f32)>, MemoryError> {
162        let state = self
163            .state
164            .lock()
165            .expect("lock poisoned — prior panic corrupted state");
166        let matches = state
167            .index
168            .search(query, limit)
169            .map_err(|e| MemoryError::Index(format!("search: {}", e)))?;
170
171        let results = matches
172            .keys
173            .into_iter()
174            .zip(matches.distances)
175            .filter_map(|(key, dist)| {
176                state
177                    .key_map
178                    .get(&key)
179                    .map(|name| (key, name.clone(), dist))
180            })
181            .collect();
182        Ok(results)
183    }
184
185    /// Remove a vector by key.
186    pub fn remove(&self, key: u64) -> Result<(), MemoryError> {
187        let mut state = self
188            .state
189            .lock()
190            .expect("lock poisoned — prior panic corrupted state");
191        state
192            .index
193            .remove(key)
194            .map_err(|e| MemoryError::Index(format!("remove: {}", e)))?;
195        if let Some(name) = state.key_map.remove(&key) {
196            // Only remove from name_map if it still points to this key.
197            // An upsert may have already updated name_map to point to a newer key.
198            if state.name_map.get(&name).copied() == Some(key) {
199                state.name_map.remove(&name);
200            }
201        }
202        Ok(())
203    }
204
205    /// Return the number of entries currently in the key map.
206    pub(crate) fn key_count(&self) -> usize {
207        let state = self
208            .state
209            .lock()
210            .expect("lock poisoned — prior panic corrupted state");
211        state.key_map.len()
212    }
213
214    /// Return the commit SHA stored in the index metadata (if any).
215    pub fn commit_sha(&self) -> Option<String> {
216        let state = self
217            .state
218            .lock()
219            .expect("lock poisoned — prior panic corrupted state");
220        state.commit_sha.clone()
221    }
222
223    /// Set the commit SHA in the index metadata.
224    pub fn set_commit_sha(&self, sha: Option<&str>) {
225        let mut state = self
226            .state
227            .lock()
228            .expect("lock poisoned — prior panic corrupted state");
229        state.commit_sha = sha.map(|s| s.to_owned());
230    }
231
232    /// Persist the index to `path`. Also writes `<path>.keys.json`.
233    ///
234    /// If `commit_sha` is `Some`, it is written to the metadata alongside the
235    /// key map so the next load can verify freshness.
236    pub fn save(&self, path: &Path) -> Result<(), MemoryError> {
237        let path_str = path.to_str().ok_or_else(|| MemoryError::InvalidInput {
238            reason: "non-UTF-8 index path".to_string(),
239        })?;
240
241        let state = self
242            .state
243            .lock()
244            .expect("lock poisoned — prior panic corrupted state");
245        state
246            .index
247            .save(path_str)
248            .map_err(|e| MemoryError::Index(format!("save: {}", e)))?;
249
250        // Persist the key map and counter alongside the index.
251        let keys_path = format!("{}.keys.json", path_str);
252        let payload = serde_json::json!({
253            "key_map": &state.key_map,
254            "next_key": state.next_key,
255            "commit_sha": state.commit_sha,
256        });
257        let json = serde_json::to_string(&payload)
258            .map_err(|e| MemoryError::Index(format!("keymap serialise: {}", e)))?;
259        std::fs::write(&keys_path, json)?;
260
261        Ok(())
262    }
263
264    /// Load an existing index from `path`. Also reads `<path>.keys.json`.
265    pub fn load(path: &Path) -> Result<Self, MemoryError> {
266        let path_str = path.to_str().ok_or_else(|| MemoryError::InvalidInput {
267            reason: "non-UTF-8 index path".to_string(),
268        })?;
269
270        // We need to know dimensions to create the IndexOptions for load.
271        // usearch::Index::load() restores dimensions from the file, so we
272        // use placeholder options here — they are overwritten on load.
273        let options = IndexOptions {
274            dimensions: 1, // overwritten by load()
275            metric: MetricKind::Cos,
276            quantization: ScalarKind::F32,
277            ..Default::default()
278        };
279        let index = Index::new(&options)
280            .map_err(|e| MemoryError::Index(format!("init for load: {}", e)))?;
281        index
282            .load(path_str)
283            .map_err(|e| MemoryError::Index(format!("load: {}", e)))?;
284
285        // Load the key map and counter.
286        let keys_path = format!("{}.keys.json", path_str);
287        let (key_map, next_key, commit_sha): (HashMap<u64, String>, u64, Option<String>) =
288            if std::path::Path::new(&keys_path).exists() {
289                let json = std::fs::read_to_string(&keys_path)?;
290                // Support both old format (bare HashMap) and new format ({key_map, next_key}).
291                let value: serde_json::Value = serde_json::from_str(&json)
292                    .map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
293                if value.is_object() && value.get("key_map").is_some() {
294                    let km: HashMap<u64, String> = serde_json::from_value(value["key_map"].clone())
295                        .map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
296                    let nk: u64 = value["next_key"]
297                        .as_u64()
298                        .unwrap_or_else(|| km.keys().max().map(|k| k + 1).unwrap_or(0));
299                    let sha: Option<String> = value
300                        .get("commit_sha")
301                        .and_then(|v| v.as_str())
302                        .map(|s| s.to_string());
303                    (km, nk, sha)
304                } else {
305                    // Legacy format: bare HashMap.
306                    let km: HashMap<u64, String> = serde_json::from_value(value)
307                        .map_err(|e| MemoryError::Index(format!("keymap deserialise: {}", e)))?;
308                    let nk = km.keys().max().map(|k| k + 1).unwrap_or(0);
309                    (km, nk, None)
310                }
311            } else {
312                (HashMap::new(), 0, None)
313            };
314
315        let name_map: HashMap<String, u64> = key_map.iter().map(|(&k, v)| (v.clone(), k)).collect();
316        if key_map.len() != name_map.len() {
317            tracing::warn!(
318                key_map_len = key_map.len(),
319                name_map_len = name_map.len(),
320                "key_map and name_map have different sizes; index may contain duplicate names"
321            );
322        }
323
324        Ok(Self {
325            state: Mutex::new(VectorState {
326                index,
327                key_map,
328                name_map,
329                next_key,
330                commit_sha,
331            }),
332        })
333    }
334}
335
336// ---------------------------------------------------------------------------
337// ScopedIndex
338// ---------------------------------------------------------------------------
339
340/// Manages multiple `VectorIndex` instances — one per scope (global, each
341/// project) plus a combined "all" index. Every memory exists in exactly two
342/// indexes: its scope-specific index + the "all" index.
343///
344/// `ScopedIndex` is `Send + Sync` because all inner state is protected by
345/// `RwLock` / `Mutex`.
346pub struct ScopedIndex {
347    /// Per-scope indexes (global + each project).
348    scopes: RwLock<HashMap<Scope, VectorIndex>>,
349    /// Combined index containing all vectors.
350    all: VectorIndex,
351    /// Embedding dimensions (needed to create new scope indexes).
352    dimensions: usize,
353}
354
355// Locking order: `scopes` (RwLock) is always acquired before any
356// `VectorIndex::state` (Mutex). Never hold a VectorIndex Mutex while
357// acquiring `scopes`. The `all` index is accessed directly (not through
358// `scopes`), but always while `scopes` is already held or after it has
359// been released — never in the reverse order.
360
361impl ScopedIndex {
362    /// Create a new `ScopedIndex` with empty global + all indexes.
363    pub fn new(dimensions: usize) -> Result<Self, MemoryError> {
364        let global = VectorIndex::new(dimensions)?;
365        let all = VectorIndex::new(dimensions)?;
366        let mut scopes = HashMap::new();
367        scopes.insert(Scope::Global, global);
368        Ok(Self {
369            scopes: RwLock::new(scopes),
370            all,
371            dimensions,
372        })
373    }
374
375    /// Insert `vector` into both the scope-specific index and the all-index.
376    ///
377    /// Handles upserts: if `qualified_name` already exists in either index, the
378    /// old entry is removed after the new one is successfully inserted.
379    ///
380    /// Returns the key assigned in the all-index.
381    pub fn add(
382        &self,
383        scope: &Scope,
384        vector: &[f32],
385        qualified_name: String,
386    ) -> Result<u64, MemoryError> {
387        let dimensions = vector.len();
388        let _span = tracing::debug_span!(
389            "index.add",
390            scope = %scope.dir_prefix(),
391            dimensions,
392            key_count = tracing::field::Empty,
393        )
394        .entered();
395
396        // Write lock serialises the full find→insert→remove composite so
397        // concurrent upserts for the same name cannot interleave. Reads
398        // (via `search`) use a read lock and are not blocked by other reads.
399        let mut scopes = self.scopes.write().expect("scopes lock poisoned");
400
401        // Ensure scope index exists (inline, since we already hold write lock).
402        if !scopes.contains_key(scope) {
403            scopes.insert(scope.clone(), VectorIndex::new(self.dimensions)?);
404        }
405
406        let scope_idx = scopes
407            .get(scope)
408            .expect("scope index must exist after insert");
409
410        // Capture old keys before inserting new ones.
411        let old_scope_key = scope_idx.find_key_by_name(&qualified_name);
412        let old_all_key = self.all.find_key_by_name(&qualified_name);
413
414        // Insert into scope index first.
415        let new_scope_key = scope_idx.add_with_next_key(vector, qualified_name.clone())?;
416
417        // Insert into all-index; if this fails, roll back scope insert.
418        // Note: the rollback path is not unit-tested because usearch allocation
419        // failures are not injectable without a mock layer. The logic is simple
420        // (remove the key we just inserted) and covered by VectorIndex::remove's
421        // existing tests.
422        let all_key = match self.all.add_with_next_key(vector, qualified_name) {
423            Ok(key) => key,
424            Err(e) => {
425                let _ = scope_idx.remove(new_scope_key);
426                return Err(e);
427            }
428        };
429
430        // Both succeeded — now clean up old entries.
431        if let Some(key) = old_scope_key {
432            let _ = scope_idx.remove(key);
433        }
434        if let Some(key) = old_all_key {
435            let _ = self.all.remove(key);
436        }
437
438        // Record key_count (all-index size) after insertion.
439        tracing::Span::current().record("key_count", self.all.key_count());
440
441        Ok(all_key)
442    }
443
444    /// Remove a memory by qualified name from both the scope-specific index
445    /// and the all-index.
446    ///
447    /// Both removals are best-effort: an error in one does not prevent the
448    /// other from running. Returns `Ok(())` regardless of individual failures.
449    pub fn remove(&self, scope: &Scope, qualified_name: &str) -> Result<(), MemoryError> {
450        let _span = tracing::debug_span!(
451            "index.remove",
452            scope = %scope.dir_prefix(),
453        )
454        .entered();
455
456        // Write lock serialises with concurrent adds for the same name.
457        let scopes = self.scopes.write().expect("scopes lock poisoned");
458
459        // Remove from scope index (best-effort).
460        if let Some(scope_idx) = scopes.get(scope) {
461            if let Some(key) = scope_idx.find_key_by_name(qualified_name) {
462                if let Err(e) = scope_idx.remove(key) {
463                    tracing::warn!(
464                        qualified_name = %qualified_name,
465                        error = %e,
466                        "scope index removal failed; continuing to all-index"
467                    );
468                }
469            }
470        }
471
472        // Remove from all-index (best-effort).
473        if let Some(key) = self.all.find_key_by_name(qualified_name) {
474            if let Err(e) = self.all.remove(key) {
475                tracing::warn!(
476                    qualified_name = %qualified_name,
477                    error = %e,
478                    "all-index removal failed"
479                );
480            }
481        }
482
483        Ok(())
484    }
485
486    /// Search for the nearest neighbours of `query`, routing to the correct
487    /// indexes based on `filter`.
488    ///
489    /// | `filter`               | Indexes searched          | Merge strategy             |
490    /// |------------------------|---------------------------|----------------------------|
491    /// | `GlobalOnly`           | `global`                  | Direct top-k               |
492    /// | `ProjectAndGlobal(p)`  | `global` + `projects/p`   | Merge by distance, top-k   |
493    /// | `All`                  | `all` combined index       | Direct top-k               |
494    pub fn search(
495        &self,
496        filter: &ScopeFilter,
497        query: &[f32],
498        limit: usize,
499    ) -> Result<Vec<(u64, String, f32)>, MemoryError> {
500        let dimensions = query.len();
501        let scope_str = match filter {
502            ScopeFilter::GlobalOnly => "global".to_owned(),
503            ScopeFilter::All => "all".to_owned(),
504            ScopeFilter::ProjectAndGlobal(p) => format!("project+global:{p}"),
505        };
506        let span = tracing::debug_span!(
507            "index.search",
508            scope = %scope_str,
509            dimensions,
510            count = tracing::field::Empty,
511        );
512        let _enter = span.entered();
513
514        let results = match filter {
515            ScopeFilter::All => self.all.search(query, limit),
516
517            ScopeFilter::GlobalOnly => {
518                let scopes = self.scopes.read().expect("scopes lock poisoned");
519                match scopes.get(&Scope::Global) {
520                    Some(global_idx) => global_idx.search(query, limit),
521                    None => Ok(Vec::new()),
522                }
523            }
524
525            ScopeFilter::ProjectAndGlobal(project_name) => {
526                let scopes = self.scopes.read().expect("scopes lock poisoned");
527                let project_scope = Scope::Project(project_name.clone());
528
529                let mut combined: Vec<(u64, String, f32)> = Vec::new();
530
531                if let Some(global_idx) = scopes.get(&Scope::Global) {
532                    let mut global_results = global_idx.search(query, limit)?;
533                    combined.append(&mut global_results);
534                }
535
536                if let Some(proj_idx) = scopes.get(&project_scope) {
537                    let mut proj_results = proj_idx.search(query, limit)?;
538                    combined.append(&mut proj_results);
539                }
540
541                // Deduplicate by qualified name (HashSet ensures non-adjacent dupes are caught).
542                let mut seen = std::collections::HashSet::new();
543                combined.retain(|(_, name, _)| seen.insert(name.clone()));
544                // Sort by ascending distance and take top-k.
545                combined.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
546                combined.truncate(limit);
547                Ok(combined)
548            }
549        };
550        if let Ok(ref r) = results {
551            tracing::Span::current().record("count", r.len());
552        }
553        results
554    }
555
556    /// Find the key for a given qualified name in the **all-index** (not scope-specific).
557    ///
558    /// This is the canonical lookup — the all-index contains every memory regardless of scope.
559    pub fn find_key_by_name(&self, qualified_name: &str) -> Option<u64> {
560        self.all.find_key_by_name(qualified_name)
561    }
562
563    /// Grow all indexes to accommodate `additional` more vectors.
564    ///
565    /// Reserved for future batch-insert operations; no production callers currently exist.
566    #[allow(dead_code)]
567    pub fn grow_if_needed(&self, additional: usize) -> Result<(), MemoryError> {
568        self.all.grow_if_needed(additional)?;
569        let scopes = self.scopes.read().expect("scopes lock poisoned");
570        for idx in scopes.values() {
571            idx.grow_if_needed(additional)?;
572        }
573        Ok(())
574    }
575
576    /// Persist all indexes to subdirectories under `dir`.
577    ///
578    /// Layout:
579    /// ```text
580    /// dir/
581    ///   all/index.usearch  (+ .keys.json)
582    ///   global/index.usearch
583    ///   projects/foo/index.usearch
584    /// ```
585    pub fn save(&self, dir: &Path) -> Result<(), MemoryError> {
586        let _span =
587            tracing::debug_span!("index.save", key_count = tracing::field::Empty,).entered();
588
589        std::fs::create_dir_all(dir)?;
590
591        // Write a dirty marker — if we crash mid-save, the next load will see
592        // this and ignore commit SHAs (forcing a fresh rebuild).
593        let marker = dir.join(".save-in-progress");
594        std::fs::write(&marker, b"")?;
595
596        // Persist all-index.
597        let all_dir = dir.join("all");
598        std::fs::create_dir_all(&all_dir)?;
599        self.all.save(&all_dir.join("index.usearch"))?;
600
601        // Persist per-scope indexes.
602        let scopes = self.scopes.read().expect("scopes lock poisoned");
603        for (scope, idx) in scopes.iter() {
604            let scope_dir = dir.join(scope.dir_prefix());
605            std::fs::create_dir_all(&scope_dir)?;
606            idx.save(&scope_dir.join("index.usearch"))?;
607        }
608
609        // Record total key count (all-index is authoritative — it holds every entry).
610        let key_count = self.all.key_count();
611        tracing::Span::current().record("key_count", key_count);
612
613        // Remove marker — save completed successfully.
614        let _ = std::fs::remove_file(&marker);
615
616        Ok(())
617    }
618
619    /// Load all indexes from subdirectories under `dir`.
620    ///
621    /// Missing subdirectories are treated as empty — those scopes will be
622    /// rebuilt incrementally on next use.
623    pub fn load(dir: &Path, dimensions: usize) -> Result<Self, MemoryError> {
624        let span = tracing::info_span!("index.load", key_count = tracing::field::Empty,);
625        let _enter = span.entered();
626
627        // If a previous save was interrupted, the on-disk state may be
628        // inconsistent (some indexes from current state, others from prior).
629        // Rather than loading mixed data, start fresh — indexes are a cache
630        // that can always be rebuilt from the source-of-truth markdown files.
631        let dirty_marker = dir.join(".save-in-progress");
632        if dirty_marker.exists() {
633            tracing::warn!("detected interrupted index save — discarding indexes");
634            let _ = std::fs::remove_file(&dirty_marker);
635            return Self::new(dimensions);
636        }
637
638        // Load all-index.
639        let all_path = dir.join("all").join("index.usearch");
640        let all = if all_path.exists() {
641            VectorIndex::load(&all_path)?
642        } else {
643            VectorIndex::new(dimensions)?
644        };
645
646        let mut scopes: HashMap<Scope, VectorIndex> = HashMap::new();
647
648        // Load global index.
649        let global_path = dir.join("global").join("index.usearch");
650        let global = if global_path.exists() {
651            VectorIndex::load(&global_path)?
652        } else {
653            VectorIndex::new(dimensions)?
654        };
655        scopes.insert(Scope::Global, global);
656
657        // Scan for project indexes under projects/*/
658        let projects_dir = dir.join("projects");
659        if projects_dir.is_dir() {
660            let entries = std::fs::read_dir(&projects_dir)
661                .map_err(|e| MemoryError::Index(format!("read projects dir: {}", e)))?;
662            for entry in entries {
663                let entry =
664                    entry.map_err(|e| MemoryError::Index(format!("read dir entry: {}", e)))?;
665                let path = entry.path();
666                if path.is_dir() {
667                    let project_name = path
668                        .file_name()
669                        .and_then(|n| n.to_str())
670                        .map(|s| s.to_string())
671                        .ok_or_else(|| {
672                            MemoryError::Index("non-UTF-8 project directory name".to_string())
673                        })?;
674                    if let Err(e) = validate_name(&project_name) {
675                        tracing::warn!(
676                            project_name = %project_name,
677                            error = %e,
678                            "skipping project index with invalid name"
679                        );
680                        continue;
681                    }
682                    let index_path = path.join("index.usearch");
683                    if index_path.exists() {
684                        let idx = VectorIndex::load(&index_path)?;
685                        scopes.insert(Scope::Project(project_name), idx);
686                    }
687                }
688            }
689        }
690
691        let key_count = all.key_count();
692        tracing::Span::current().record("key_count", key_count);
693
694        Ok(Self {
695            scopes: RwLock::new(scopes),
696            all,
697            dimensions,
698        })
699    }
700
701    /// Read the commit SHA from the all-index metadata.
702    pub fn commit_sha(&self) -> Option<String> {
703        self.all.commit_sha()
704    }
705
706    /// Set the commit SHA on all sub-indexes.
707    pub fn set_commit_sha(&self, sha: Option<&str>) {
708        self.all.set_commit_sha(sha);
709        let scopes = self.scopes.read().expect("scopes lock poisoned");
710        for idx in scopes.values() {
711            idx.set_commit_sha(sha);
712        }
713    }
714}
715
716// ---------------------------------------------------------------------------
717// Tests
718// ---------------------------------------------------------------------------
719
720#[cfg(test)]
721mod tests {
722    use super::*;
723
724    fn make_index() -> VectorIndex {
725        VectorIndex::new(4).expect("failed to create index")
726    }
727
728    fn dummy_vec() -> Vec<f32> {
729        vec![1.0, 0.0, 0.0, 0.0]
730    }
731
732    /// Verify that `remove(old_key)` does NOT clobber `name_map` when an
733    /// upsert has already updated `name_map` to point to a newer key.
734    ///
735    /// Pattern: add_with_next_key("name") → old_key
736    ///          add_with_next_key("name") → new_key  (name_map now points to new_key)
737    ///          remove(old_key)
738    ///          find_key_by_name("name") must return new_key (not None)
739    #[test]
740    fn remove_old_key_does_not_clobber_upserted_name_map_entry() {
741        let index = make_index();
742        let v = dummy_vec();
743
744        // First insert — establishes old_key.
745        let old_key = index
746            .add_with_next_key(&v, "global/foo".to_string())
747            .expect("first add failed");
748
749        // Upsert (second insert for same name) — name_map now points to new_key.
750        let new_key = index
751            .add_with_next_key(&v, "global/foo".to_string())
752            .expect("second add failed");
753
754        assert_ne!(old_key, new_key, "keys must differ");
755
756        // Remove the OLD key — should not disturb name_map's entry for new_key.
757        index.remove(old_key).expect("remove failed");
758
759        // name_map must still resolve "global/foo" to new_key.
760        assert_eq!(
761            index.find_key_by_name("global/foo"),
762            Some(new_key),
763            "name_map entry for new_key was incorrectly removed"
764        );
765    }
766
767    /// Removing the current (only) key should clear the name_map entry.
768    #[test]
769    fn remove_only_key_clears_name_map() {
770        let index = make_index();
771        let v = dummy_vec();
772
773        let key = index
774            .add_with_next_key(&v, "global/bar".to_string())
775            .expect("add failed");
776
777        index.remove(key).expect("remove failed");
778
779        assert_eq!(
780            index.find_key_by_name("global/bar"),
781            None,
782            "name_map entry should have been cleared"
783        );
784    }
785
786    // -----------------------------------------------------------------------
787    // ScopedIndex tests
788    // -----------------------------------------------------------------------
789
790    fn make_scoped() -> ScopedIndex {
791        ScopedIndex::new(8).expect("failed to create scoped index")
792    }
793
794    fn vec_a() -> Vec<f32> {
795        vec![1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
796    }
797
798    fn vec_b() -> Vec<f32> {
799        vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
800    }
801
802    fn vec_c() -> Vec<f32> {
803        vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
804    }
805
806    #[test]
807    fn scoped_index_add_inserts_into_scope_and_all() {
808        let si = make_scoped();
809        let scope = Scope::Global;
810        let name = "global/memory-a".to_string();
811
812        si.add(&scope, &vec_a(), name.clone()).expect("add failed");
813
814        // Should be findable in the all-index via find_key_by_name.
815        assert!(
816            si.find_key_by_name(&name).is_some(),
817            "should be in all-index"
818        );
819
820        // Should also be in scope-specific index — verify via search.
821        let results = si
822            .search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
823            .expect("search failed");
824        assert!(
825            results.iter().any(|(_, n, _)| n == &name),
826            "should be found in global search"
827        );
828    }
829
830    #[test]
831    fn scoped_index_remove_removes_from_both() {
832        let si = make_scoped();
833        let scope = Scope::Global;
834        let name = "global/memory-rm".to_string();
835
836        si.add(&scope, &vec_a(), name.clone()).expect("add failed");
837        assert!(si.find_key_by_name(&name).is_some(), "should exist");
838
839        si.remove(&scope, &name).expect("remove failed");
840
841        assert!(
842            si.find_key_by_name(&name).is_none(),
843            "should be gone from all-index"
844        );
845
846        let results = si
847            .search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
848            .expect("search failed");
849        assert!(
850            !results.iter().any(|(_, n, _)| n == &name),
851            "should not appear in global search after removal"
852        );
853    }
854
855    #[test]
856    fn scoped_index_search_global_only() {
857        let si = make_scoped();
858        let proj = Scope::Project("myproj".to_string());
859
860        si.add(&Scope::Global, &vec_a(), "global/mem-global".to_string())
861            .expect("add global failed");
862        si.add(&proj, &vec_b(), "projects/myproj/mem-proj".to_string())
863            .expect("add project failed");
864
865        let results = si
866            .search(&ScopeFilter::GlobalOnly, &vec_a(), 5)
867            .expect("search failed");
868
869        let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
870        assert!(
871            names.contains(&"global/mem-global"),
872            "should contain global"
873        );
874        assert!(
875            !names.contains(&"projects/myproj/mem-proj"),
876            "should NOT contain project memory"
877        );
878    }
879
880    #[test]
881    fn scoped_index_search_project_and_global() {
882        let si = make_scoped();
883        let proj_a = Scope::Project("alpha".to_string());
884        let proj_b = Scope::Project("beta".to_string());
885
886        si.add(&Scope::Global, &vec_a(), "global/g1".to_string())
887            .expect("add global failed");
888        si.add(&proj_a, &vec_b(), "projects/alpha/a1".to_string())
889            .expect("add alpha failed");
890        si.add(&proj_b, &vec_c(), "projects/beta/b1".to_string())
891            .expect("add beta failed");
892
893        let results = si
894            .search(
895                &ScopeFilter::ProjectAndGlobal("alpha".to_string()),
896                &vec_a(),
897                10,
898            )
899            .expect("search failed");
900
901        let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
902        assert!(names.contains(&"global/g1"), "should contain global");
903        assert!(names.contains(&"projects/alpha/a1"), "should contain alpha");
904        assert!(
905            !names.contains(&"projects/beta/b1"),
906            "should NOT contain beta"
907        );
908    }
909
910    #[test]
911    fn scoped_index_search_all() {
912        let si = make_scoped();
913        let proj = Scope::Project("foo".to_string());
914
915        si.add(&Scope::Global, &vec_a(), "global/x".to_string())
916            .expect("add global");
917        si.add(&proj, &vec_b(), "projects/foo/y".to_string())
918            .expect("add project");
919
920        let results = si
921            .search(&ScopeFilter::All, &vec_a(), 10)
922            .expect("search failed");
923
924        let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
925        assert!(names.contains(&"global/x"), "all should include global");
926        assert!(
927            names.contains(&"projects/foo/y"),
928            "all should include project"
929        );
930    }
931
932    #[test]
933    fn scoped_index_upsert_replaces_old_entry() {
934        let si = make_scoped();
935        let name = "global/memo".to_string();
936        si.add(&Scope::Global, &vec_a(), name.clone()).unwrap();
937        si.add(&Scope::Global, &vec_b(), name.clone()).unwrap();
938        // Should have exactly one entry in all-index search.
939        let results = si.search(&ScopeFilter::All, &vec_b(), 10).unwrap();
940        assert_eq!(
941            results.iter().filter(|(_, n, _)| n == &name).count(),
942            1,
943            "upsert should leave exactly one entry for the name"
944        );
945    }
946
947    #[test]
948    fn scoped_index_dirty_marker_discards_indexes() {
949        let dir = tempfile::tempdir().expect("tempdir");
950        let si = ScopedIndex::new(8).expect("create");
951        si.add(&Scope::Global, &vec_a(), "global/test-mem".to_string())
952            .expect("add");
953        si.set_commit_sha(Some("abc123"));
954        si.save(dir.path()).expect("save");
955
956        // Simulate interrupted save by re-creating the marker.
957        std::fs::write(dir.path().join(".save-in-progress"), b"").unwrap();
958
959        // Load should discard all indexes and return fresh empty ones.
960        let loaded = ScopedIndex::load(dir.path(), 8).expect("load");
961        assert!(
962            loaded.commit_sha().is_none(),
963            "dirty marker should result in no SHA"
964        );
965        assert!(
966            loaded.find_key_by_name("global/test-mem").is_none(),
967            "dirty marker should discard all indexed data"
968        );
969        assert!(
970            !dir.path().join(".save-in-progress").exists(),
971            "marker should be cleaned up"
972        );
973    }
974
975    #[test]
976    fn scoped_index_save_load_round_trip() {
977        let dir = tempfile::tempdir().expect("tempdir");
978        let si = ScopedIndex::new(8).expect("create");
979        let proj = Scope::Project("rtrip".to_string());
980
981        si.add(&Scope::Global, &vec_a(), "global/rt-global".to_string())
982            .expect("add global");
983        si.add(&proj, &vec_b(), "projects/rtrip/rt-proj".to_string())
984            .expect("add project");
985
986        si.save(dir.path()).expect("save failed");
987
988        let loaded = ScopedIndex::load(dir.path(), 8).expect("load failed");
989
990        // Verify all-index finds both memories.
991        assert!(
992            loaded.find_key_by_name("global/rt-global").is_some(),
993            "global memory should survive round-trip"
994        );
995        assert!(
996            loaded.find_key_by_name("projects/rtrip/rt-proj").is_some(),
997            "project memory should survive round-trip"
998        );
999
1000        // Verify search still works after reload.
1001        let results = loaded
1002            .search(
1003                &ScopeFilter::ProjectAndGlobal("rtrip".to_string()),
1004                &vec_a(),
1005                10,
1006            )
1007            .expect("search failed");
1008        let names: Vec<&str> = results.iter().map(|(_, n, _)| n.as_str()).collect();
1009        assert!(names.contains(&"global/rt-global"));
1010        assert!(names.contains(&"projects/rtrip/rt-proj"));
1011    }
1012
1013    #[test]
1014    fn scoped_index_same_short_name_different_scopes_coexist() {
1015        let si = make_scoped();
1016        si.add(&Scope::Global, &vec_a(), "global/foo".to_string())
1017            .unwrap();
1018        si.add(
1019            &Scope::Project("p".into()),
1020            &vec_b(),
1021            "projects/p/foo".to_string(),
1022        )
1023        .unwrap();
1024        assert!(si.find_key_by_name("global/foo").is_some());
1025        assert!(si.find_key_by_name("projects/p/foo").is_some());
1026        assert_ne!(
1027            si.find_key_by_name("global/foo"),
1028            si.find_key_by_name("projects/p/foo"),
1029            "different scopes should have distinct keys"
1030        );
1031    }
1032}