Skip to main content

solo_core/
vector_index.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! `VectorIndex` and `VectorIndexFactory` traits. See ADR-0002 and ADR-0003.
4//!
5//! All methods take `&self`. Implementations MUST handle concurrency
6//! internally (e.g., `hnsw_rs` uses `parking_lot::RwLock` under the hood;
7//! search takes a read lock, insert takes a write lock). This shape lets the
8//! writer thread and read-pool tasks share a single
9//! `Arc<dyn VectorIndex + Send + Sync>` instance without application-level
10//! locking — see ADR-0003 § Operational invariants for the rationale.
11
12use crate::error::Result;
13use std::path::Path;
14
15/// Approximate nearest-neighbor index over FP32 vectors keyed by SQLite rowid.
16///
17/// All mutating methods take `&self`. The discipline that makes this safe is
18/// twofold: (1) the impl handles its own concurrency, and (2) only one task
19/// (the WriterActor on its dedicated OS thread) issues mutations — read tasks
20/// only call `search`. The trait does not enforce (2); callers must.
21pub trait VectorIndex: Send + Sync {
22    /// Add a vector keyed by SQLite rowid. Idempotent — adding an existing
23    /// rowid replaces the prior vector.
24    fn add(&self, rowid: i64, embedding: &[f32]) -> Result<()>;
25
26    /// Remove a vector by rowid. Idempotent — removing a missing rowid is OK.
27    /// May leave a tombstone (HNSW does); index is rebuilt periodically to
28    /// compact.
29    fn remove(&self, rowid: i64) -> Result<()>;
30
31    /// Approximate nearest-neighbor search. Returns up to k results sorted by
32    /// distance (ascending — smaller distance is more similar).
33    fn search(&self, query: &[f32], k: usize) -> Result<Vec<(i64, f32)>>;
34
35    /// Snapshot the index to disk atomically. Implementations MUST write to a
36    /// `.tmp` file, fsync, then atomically rename over the target. The
37    /// previous snapshot should be preserved as a `.bak` file until the new
38    /// snapshot is fully in place. See `solo-v0-architecture.md §3.2` —
39    /// "the HNSW sidecar is a cache, not source-of-truth."
40    fn save(&self, path: &Path) -> Result<()>;
41
42    /// Number of vectors currently in the index. Used at startup to detect
43    /// drift against `SELECT COUNT(*) FROM episodes WHERE tier = 'hot'`. On
44    /// mismatch, the index is rebuilt from SQLite.
45    fn len(&self) -> usize;
46
47    /// True if the index contains no vectors.
48    fn is_empty(&self) -> bool {
49        self.len() == 0
50    }
51
52    /// Vector dimension. Must match the Embedder that produced the vectors.
53    fn dim(&self) -> usize;
54}
55
56/// Separates "create a fresh index" from "load existing index from disk."
57/// The factory holds the configuration (HNSW M, efConstruction, etc.) that
58/// is otherwise duplicated between the two operations.
59pub trait VectorIndexFactory: Send + Sync {
60    type Index: VectorIndex;
61
62    /// Create a new empty index with the given vector dimension.
63    fn create(&self, dim: usize) -> Result<Self::Index>;
64
65    /// Load an existing index from disk. Validates internal invariants on
66    /// load.
67    fn load(&self, path: &Path) -> Result<Self::Index>;
68}