Skip to main content

nodedb_vector/segment_backing/
plain.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! [`PlainMmapBacking`]: zero-copy [`VectorSegmentBacking`] over a plaintext
4//! NDVS mmap segment.
5
6use std::sync::Arc;
7
8use crate::mmap_segment::MmapVectorSegment;
9
10use super::VectorSegmentBacking;
11
12/// Zero-copy [`VectorSegmentBacking`] backed by a plaintext NDVS mmap segment.
13///
14/// Vectors and surrogate IDs are served as slices directly into the mmap
15/// region — no allocation on the read path.
16///
17/// # `Send + Sync` rationale
18///
19/// [`MmapVectorSegment`] is declared `!Send + !Sync` because it holds a
20/// `*const u8` field (`base`) pointing into the mmap region.  Raw pointers are
21/// conservative: the compiler cannot know whether the pointee is safe to share.
22///
23/// The mmap region behind `base` is:
24/// - mapped with `PROT_READ | MAP_PRIVATE` — never mutated through this
25///   pointer after construction,
26/// - valid for exactly the lifetime of the [`MmapVectorSegment`] (the
27///   descriptor `_fd` keeps the file open; `munmap` runs in `Drop`),
28/// - not thread-affine — the OS virtual-memory subsystem treats it as a
29///   process-global read-only region.
30///
31/// All access to `base` goes through `get_vector` / `get_surrogate` /
32/// `prefetch`, which derive shared borrows (`&[f32]`, `&[u8]`) that live no
33/// longer than `&self`.  No `&mut` path exists.  Multiple threads reading
34/// distinct vectors concurrently is safe for the same reason `&[T]` is `Sync`.
35///
36/// The `Arc<MmapVectorSegment>` wrapper ensures the segment (and therefore
37/// the mmap region) outlives any `&[f32]` slice handed out through this type.
38///
39/// SAFETY: given the above invariants, treating `PlainMmapBacking` as
40/// `Send + Sync` is correct.
41pub struct PlainMmapBacking {
42    inner: Arc<MmapVectorSegment>,
43}
44
45// SAFETY: see struct-level doc comment.  `MmapVectorSegment` holds a
46// `*const u8` (`base`) into a read-only MAP_PRIVATE mmap region.  The region
47// is immutable after construction, process-global, and valid for the lifetime
48// of the Arc.  No interior mutability exists; concurrent reads are safe.
49unsafe impl Send for PlainMmapBacking {}
50unsafe impl Sync for PlainMmapBacking {}
51
52impl PlainMmapBacking {
53    /// Wrap a [`MmapVectorSegment`] that is not yet reference-counted.
54    ///
55    /// `MmapVectorSegment` holds a `*const u8` raw pointer which makes it
56    /// `!Send + !Sync` by default. The `unsafe impl Send + Sync` on
57    /// `PlainMmapBacking` (see struct-level doc comment) establishes the safety
58    /// invariants; clippy cannot see through the `Arc` to the impl, so we
59    /// suppress the lint here.
60    #[allow(clippy::arc_with_non_send_sync)]
61    pub fn new(seg: MmapVectorSegment) -> Self {
62        Self {
63            inner: Arc::new(seg),
64        }
65    }
66
67    /// Wrap an already reference-counted segment.
68    ///
69    /// Useful when the same segment is shared with other consumers (e.g. a
70    /// [`crate::collection::VectorCollection`] that also owns the segment for
71    /// direct SIMD scan).
72    pub fn from_arc(seg: Arc<MmapVectorSegment>) -> Self {
73        Self { inner: seg }
74    }
75
76    /// Access the underlying segment.
77    pub fn segment(&self) -> &Arc<MmapVectorSegment> {
78        &self.inner
79    }
80}
81
82impl VectorSegmentBacking for PlainMmapBacking {
83    #[inline]
84    fn len(&self) -> usize {
85        self.inner.count()
86    }
87
88    #[inline]
89    fn dim(&self) -> usize {
90        self.inner.dim()
91    }
92
93    #[inline]
94    fn get_vector(&self, id: u32) -> Option<&[f32]> {
95        self.inner.get_vector(id)
96    }
97
98    #[inline]
99    fn get_surrogate(&self, id: u32) -> Option<u64> {
100        self.inner.get_surrogate_id(id)
101    }
102
103    #[inline]
104    fn prefetch(&self, id: u32) {
105        self.inner.prefetch(id);
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use tempfile::tempdir;
112
113    use super::*;
114    use crate::mmap_segment::MmapVectorSegment;
115
116    fn make_backing(dim: usize, vecs: &[Vec<f32>]) -> PlainMmapBacking {
117        let dir = tempdir().unwrap();
118        let path = dir.path().join("test.ndvs");
119
120        let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
121        let surrogates: Vec<u64> = (0..vecs.len() as u64).collect();
122
123        let seg =
124            MmapVectorSegment::create_with_surrogates(&path, dim, &refs, &surrogates).unwrap();
125
126        // Keep the tempdir alive by leaking it for the test duration.
127        // The backing borrows from the mmap, which is already self-contained
128        // (fd kept open by the segment); dir can be dropped.
129        drop(dir);
130
131        PlainMmapBacking::new(seg)
132    }
133
134    #[test]
135    fn plain_backing_basic_roundtrip() {
136        let dim = 4;
137        let vecs = vec![
138            vec![1.0_f32, 2.0, 3.0, 4.0],
139            vec![5.0_f32, 6.0, 7.0, 8.0],
140            vec![9.0_f32, 10.0, 11.0, 12.0],
141        ];
142
143        let backing = make_backing(dim, &vecs);
144
145        assert_eq!(backing.len(), 3);
146        assert_eq!(backing.dim(), 4);
147        assert!(!backing.is_empty());
148
149        for (i, expected) in vecs.iter().enumerate() {
150            let got = backing
151                .get_vector(i as u32)
152                .expect("vector must be present");
153            assert_eq!(got, expected.as_slice(), "vector {i} mismatch");
154
155            let sid = backing
156                .get_surrogate(i as u32)
157                .expect("surrogate must be present");
158            assert_eq!(sid, i as u64, "surrogate {i} mismatch");
159        }
160
161        // prefetch must not panic
162        backing.prefetch(0);
163        backing.prefetch(1);
164        backing.prefetch(2);
165    }
166
167    /// Compile-time proof that `PlainMmapBacking` satisfies `Send + Sync`.
168    #[test]
169    fn plain_backing_is_send_sync() {
170        fn assert_send_sync<T: Send + Sync>(_: &T) {}
171
172        let dir = tempdir().unwrap();
173        let path = dir.path().join("check.ndvs");
174        let seg = MmapVectorSegment::create(&path, 2, &[&[1.0_f32, 2.0]]).unwrap();
175        let backing = PlainMmapBacking::new(seg);
176
177        assert_send_sync(&backing);
178    }
179
180    #[test]
181    fn plain_backing_out_of_bounds_returns_none() {
182        let backing = make_backing(3, &[vec![1.0_f32, 2.0, 3.0]]);
183
184        assert!(
185            backing.get_vector(1).is_none(),
186            "id=1 must be out of bounds"
187        );
188        assert!(
189            backing.get_surrogate(1).is_none(),
190            "id=1 surrogate must be out of bounds"
191        );
192        // prefetch on out-of-bounds must be a no-op (no panic)
193        backing.prefetch(1);
194    }
195
196    #[test]
197    fn plain_backing_empty_segment() {
198        let dir = tempdir().unwrap();
199        let path = dir.path().join("empty.ndvs");
200        let seg = MmapVectorSegment::create(&path, 4, &[]).unwrap();
201        let backing = PlainMmapBacking::new(seg);
202
203        assert_eq!(backing.len(), 0);
204        assert!(backing.is_empty());
205        assert!(backing.get_vector(0).is_none());
206        assert!(backing.get_surrogate(0).is_none());
207        backing.prefetch(0);
208    }
209}