nodedb_vector/segment_backing/plain.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! [`PlainMmapBacking`]: zero-copy [`VectorSegmentBacking`] over a plaintext
4//! NDVS mmap segment.
5
6use std::sync::Arc;
7
8use crate::mmap_segment::MmapVectorSegment;
9
10use super::VectorSegmentBacking;
11
12/// Zero-copy [`VectorSegmentBacking`] backed by a plaintext NDVS mmap segment.
13///
14/// Vectors and surrogate IDs are served as slices directly into the mmap
15/// region — no allocation on the read path.
16///
17/// # `Send + Sync` rationale
18///
19/// [`MmapVectorSegment`] is declared `!Send + !Sync` because it holds a
20/// `*const u8` field (`base`) pointing into the mmap region. Raw pointers are
21/// conservative: the compiler cannot know whether the pointee is safe to share.
22///
23/// The mmap region behind `base` is:
24/// - mapped with `PROT_READ | MAP_PRIVATE` — never mutated through this
25/// pointer after construction,
26/// - valid for exactly the lifetime of the [`MmapVectorSegment`] (the
27/// descriptor `_fd` keeps the file open; `munmap` runs in `Drop`),
28/// - not thread-affine — the OS virtual-memory subsystem treats it as a
29/// process-global read-only region.
30///
31/// All access to `base` goes through `get_vector` / `get_surrogate` /
32/// `prefetch`, which derive shared borrows (`&[f32]`, `&[u8]`) that live no
33/// longer than `&self`. No `&mut` path exists. Multiple threads reading
34/// distinct vectors concurrently is safe for the same reason `&[T]` is `Sync`.
35///
36/// The `Arc<MmapVectorSegment>` wrapper ensures the segment (and therefore
37/// the mmap region) outlives any `&[f32]` slice handed out through this type.
38///
39/// SAFETY: given the above invariants, treating `PlainMmapBacking` as
40/// `Send + Sync` is correct.
41pub struct PlainMmapBacking {
42 inner: Arc<MmapVectorSegment>,
43}
44
45// SAFETY: see struct-level doc comment. `MmapVectorSegment` holds a
46// `*const u8` (`base`) into a read-only MAP_PRIVATE mmap region. The region
47// is immutable after construction, process-global, and valid for the lifetime
48// of the Arc. No interior mutability exists; concurrent reads are safe.
49unsafe impl Send for PlainMmapBacking {}
50unsafe impl Sync for PlainMmapBacking {}
51
52impl PlainMmapBacking {
53 /// Wrap a [`MmapVectorSegment`] that is not yet reference-counted.
54 ///
55 /// `MmapVectorSegment` holds a `*const u8` raw pointer which makes it
56 /// `!Send + !Sync` by default. The `unsafe impl Send + Sync` on
57 /// `PlainMmapBacking` (see struct-level doc comment) establishes the safety
58 /// invariants; clippy cannot see through the `Arc` to the impl, so we
59 /// suppress the lint here.
60 #[allow(clippy::arc_with_non_send_sync)]
61 pub fn new(seg: MmapVectorSegment) -> Self {
62 Self {
63 inner: Arc::new(seg),
64 }
65 }
66
67 /// Wrap an already reference-counted segment.
68 ///
69 /// Useful when the same segment is shared with other consumers (e.g. a
70 /// [`crate::collection::VectorCollection`] that also owns the segment for
71 /// direct SIMD scan).
72 pub fn from_arc(seg: Arc<MmapVectorSegment>) -> Self {
73 Self { inner: seg }
74 }
75
76 /// Access the underlying segment.
77 pub fn segment(&self) -> &Arc<MmapVectorSegment> {
78 &self.inner
79 }
80}
81
82impl VectorSegmentBacking for PlainMmapBacking {
83 #[inline]
84 fn len(&self) -> usize {
85 self.inner.count()
86 }
87
88 #[inline]
89 fn dim(&self) -> usize {
90 self.inner.dim()
91 }
92
93 #[inline]
94 fn get_vector(&self, id: u32) -> Option<&[f32]> {
95 self.inner.get_vector(id)
96 }
97
98 #[inline]
99 fn get_surrogate(&self, id: u32) -> Option<u64> {
100 self.inner.get_surrogate_id(id)
101 }
102
103 #[inline]
104 fn prefetch(&self, id: u32) {
105 self.inner.prefetch(id);
106 }
107}
108
109#[cfg(test)]
110mod tests {
111 use tempfile::tempdir;
112
113 use super::*;
114 use crate::mmap_segment::MmapVectorSegment;
115
116 fn make_backing(dim: usize, vecs: &[Vec<f32>]) -> PlainMmapBacking {
117 let dir = tempdir().unwrap();
118 let path = dir.path().join("test.ndvs");
119
120 let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
121 let surrogates: Vec<u64> = (0..vecs.len() as u64).collect();
122
123 let seg =
124 MmapVectorSegment::create_with_surrogates(&path, dim, &refs, &surrogates).unwrap();
125
126 // Keep the tempdir alive by leaking it for the test duration.
127 // The backing borrows from the mmap, which is already self-contained
128 // (fd kept open by the segment); dir can be dropped.
129 drop(dir);
130
131 PlainMmapBacking::new(seg)
132 }
133
134 #[test]
135 fn plain_backing_basic_roundtrip() {
136 let dim = 4;
137 let vecs = vec![
138 vec![1.0_f32, 2.0, 3.0, 4.0],
139 vec![5.0_f32, 6.0, 7.0, 8.0],
140 vec![9.0_f32, 10.0, 11.0, 12.0],
141 ];
142
143 let backing = make_backing(dim, &vecs);
144
145 assert_eq!(backing.len(), 3);
146 assert_eq!(backing.dim(), 4);
147 assert!(!backing.is_empty());
148
149 for (i, expected) in vecs.iter().enumerate() {
150 let got = backing
151 .get_vector(i as u32)
152 .expect("vector must be present");
153 assert_eq!(got, expected.as_slice(), "vector {i} mismatch");
154
155 let sid = backing
156 .get_surrogate(i as u32)
157 .expect("surrogate must be present");
158 assert_eq!(sid, i as u64, "surrogate {i} mismatch");
159 }
160
161 // prefetch must not panic
162 backing.prefetch(0);
163 backing.prefetch(1);
164 backing.prefetch(2);
165 }
166
167 /// Compile-time proof that `PlainMmapBacking` satisfies `Send + Sync`.
168 #[test]
169 fn plain_backing_is_send_sync() {
170 fn assert_send_sync<T: Send + Sync>(_: &T) {}
171
172 let dir = tempdir().unwrap();
173 let path = dir.path().join("check.ndvs");
174 let seg = MmapVectorSegment::create(&path, 2, &[&[1.0_f32, 2.0]]).unwrap();
175 let backing = PlainMmapBacking::new(seg);
176
177 assert_send_sync(&backing);
178 }
179
180 #[test]
181 fn plain_backing_out_of_bounds_returns_none() {
182 let backing = make_backing(3, &[vec![1.0_f32, 2.0, 3.0]]);
183
184 assert!(
185 backing.get_vector(1).is_none(),
186 "id=1 must be out of bounds"
187 );
188 assert!(
189 backing.get_surrogate(1).is_none(),
190 "id=1 surrogate must be out of bounds"
191 );
192 // prefetch on out-of-bounds must be a no-op (no panic)
193 backing.prefetch(1);
194 }
195
196 #[test]
197 fn plain_backing_empty_segment() {
198 let dir = tempdir().unwrap();
199 let path = dir.path().join("empty.ndvs");
200 let seg = MmapVectorSegment::create(&path, 4, &[]).unwrap();
201 let backing = PlainMmapBacking::new(seg);
202
203 assert_eq!(backing.len(), 0);
204 assert!(backing.is_empty());
205 assert!(backing.get_vector(0).is_none());
206 assert!(backing.get_surrogate(0).is_none());
207 backing.prefetch(0);
208 }
209}