nodedb_vector/segment_backing/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2
3pub mod plain;
4
5pub use plain::PlainMmapBacking;
6
7/// Storage abstraction for HNSW vector data.
8///
9/// Two implementations coexist:
10/// - [`PlainMmapBacking`]: zero-copy mmap of a plaintext NDVS file (Origin).
11/// - `PagedbBacking`: encrypted segment read via pagedb (Lite, task 2a.2).
12///
13/// Implementations are responsible for vector retrieval; HNSW graph traversal
14/// makes no storage-level decisions.
15///
16/// # `Send + Sync` bound
17///
18/// The bound allows consumers to park the backing in an
19/// `Arc<dyn VectorSegmentBacking>` and dispatch across tasks. Lite will
20/// require this in task 2a.2.
21///
22/// # Return type for `get_vector`
23///
24/// `-> Option<&[f32]>` borrows from `&self`. This is correct for
25/// `PlainMmapBacking` (slice into mmap region lives as long as the backing)
26/// and for the planned `PagedbBacking` (which will hold a long-lived
27/// decrypted vector slab in a `MmapView` field on `self`).
28///
29/// If a future backing genuinely cannot return a `&self`-lifetime slice
30/// without copying, the signature can be changed to `Cow<'_, [f32]>` in a
31/// follow-up refactor. For now both known impls support the zero-copy path.
32pub trait VectorSegmentBacking: Send + Sync {
33 /// Number of vectors stored in this segment.
34 fn len(&self) -> usize;
35
36 /// Returns `true` when the segment contains no vectors.
37 fn is_empty(&self) -> bool {
38 self.len() == 0
39 }
40
41 /// Dimensionality of each stored vector.
42 fn dim(&self) -> usize;
43
44 /// Fetch one vector by local position id (0..len).
45 ///
46 /// Backings should make this cheap — zero-copy where possible, decrypted
47 /// page lookup on cold paths. Returns `None` if `id` is out of bounds.
48 fn get_vector(&self, id: u32) -> Option<&[f32]>;
49
50 /// Surrogate id for a local position.
51 ///
52 /// Returns `None` if `id` is out of bounds.
53 fn get_surrogate(&self, id: u32) -> Option<u64>;
54
55 /// Optional prefetch hint.
56 ///
57 /// Implementations may call `madvise(MADV_WILLNEED)` (mmap) or warm a
58 /// pagedb page cache entry. Default is a no-op.
59 fn prefetch(&self, _id: u32) {}
60}