solo_core/vector_index.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! `VectorIndex` and `VectorIndexFactory` traits. See ADR-0002 and ADR-0003.
4//!
5//! All methods take `&self`. Implementations MUST handle concurrency
6//! internally (e.g., `hnsw_rs` uses `parking_lot::RwLock` under the hood;
7//! search takes a read lock, insert takes a write lock). This shape lets the
8//! writer thread and read-pool tasks share a single
9//! `Arc<dyn VectorIndex + Send + Sync>` instance without application-level
10//! locking — see ADR-0003 § Operational invariants for the rationale.
11
12use crate::error::Result;
13use std::path::Path;
14
15/// Approximate nearest-neighbor index over FP32 vectors keyed by SQLite rowid.
16///
17/// All mutating methods take `&self`. The discipline that makes this safe is
18/// twofold: (1) the impl handles its own concurrency, and (2) only one task
19/// (the WriterActor on its dedicated OS thread) issues mutations — read tasks
20/// only call `search`. The trait does not enforce (2); callers must.
21pub trait VectorIndex: Send + Sync {
22 /// Add a vector keyed by SQLite rowid. Idempotent — adding an existing
23 /// rowid replaces the prior vector.
24 fn add(&self, rowid: i64, embedding: &[f32]) -> Result<()>;
25
26 /// Remove a vector by rowid. Idempotent — removing a missing rowid is OK.
27 /// May leave a tombstone (HNSW does); index is rebuilt periodically to
28 /// compact.
29 fn remove(&self, rowid: i64) -> Result<()>;
30
31 /// Approximate nearest-neighbor search. Returns up to k results sorted by
32 /// distance (ascending — smaller distance is more similar).
33 fn search(&self, query: &[f32], k: usize) -> Result<Vec<(i64, f32)>>;
34
35 /// Snapshot the index to disk atomically. Implementations MUST write to a
36 /// `.tmp` file, fsync, then atomically rename over the target. The
37 /// previous snapshot should be preserved as a `.bak` file until the new
38 /// snapshot is fully in place. See `solo-v0-architecture.md §3.2` —
39 /// "the HNSW sidecar is a cache, not source-of-truth."
40 fn save(&self, path: &Path) -> Result<()>;
41
42 /// Number of vectors currently in the index. Used at startup to detect
43 /// drift against `SELECT COUNT(*) FROM episodes WHERE tier = 'hot'`. On
44 /// mismatch, the index is rebuilt from SQLite.
45 fn len(&self) -> usize;
46
47 /// True if the index contains no vectors.
48 fn is_empty(&self) -> bool {
49 self.len() == 0
50 }
51
52 /// Vector dimension. Must match the Embedder that produced the vectors.
53 fn dim(&self) -> usize;
54}
55
56/// Separates "create a fresh index" from "load existing index from disk."
57/// The factory holds the configuration (HNSW M, efConstruction, etc.) that
58/// is otherwise duplicated between the two operations.
59pub trait VectorIndexFactory: Send + Sync {
60 type Index: VectorIndex;
61
62 /// Create a new empty index with the given vector dimension.
63 fn create(&self, dim: usize) -> Result<Self::Index>;
64
65 /// Load an existing index from disk. Validates internal invariants on
66 /// load.
67 fn load(&self, path: &Path) -> Result<Self::Index>;
68}