Skip to main content

nodedb_vector/mmap_segment/
reader.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Memory-mapped reader for the NDVS v2 vector segment format.
4
5use std::os::fd::AsRawFd;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use std::sync::atomic::Ordering;
9
10use nodedb_mem::{BudgetGuard, EngineId, MemoryGovernor};
11
12use super::format::{
13    FOOTER_SIZE, FORMAT_VERSION, HEADER_SIZE, MAGIC, VectorSegmentCodec, VectorSegmentDropPolicy,
14    observability, vec_pad,
15};
16use super::writer::write_segment;
17use crate::error::VectorError;
18
19/// Memory-mapped vector segment file (v2 NDVS format).
20///
21/// Exposes a `&[f32]` view of the vector data block and a `&[u64]` view of
22/// the surrogate ID block — both zero-copy slices into the mmap region.
23///
24/// Not `Send` or `Sync` — owned by a single Data Plane core.
25#[derive(Debug)]
26pub struct MmapVectorSegment {
27    path: PathBuf,
28    _fd: std::fs::File,
29    base: *const u8,
30    mmap_size: usize,
31    dim: usize,
32    count: usize,
33    /// Byte offset of the vector data block within the mmap.
34    vec_offset: usize,
35    /// Byte offset of the surrogate ID block within the mmap.
36    sid_offset: usize,
37    drop_policy: VectorSegmentDropPolicy,
38    madvise_state: Option<libc::c_int>,
39    /// RAII budget guard for the mmap region.  Held for the lifetime of the
40    /// mapping; released automatically on `Drop` alongside `munmap`.
41    _budget_guard: Option<BudgetGuard>,
42}
43
44impl MmapVectorSegment {
45    // ── Constructors ──────────────────────────────────────────────────────────
46
47    /// Create a new segment file (surrogates default to 0) and open it.
48    pub fn create(path: &Path, dim: usize, vectors: &[&[f32]]) -> std::io::Result<Self> {
49        write_segment(path, dim, vectors, &[])?;
50        Self::open_with_policy(path, VectorSegmentDropPolicy::default())
51    }
52
53    /// Create a new segment file with explicit surrogate IDs and open it.
54    pub fn create_with_surrogates(
55        path: &Path,
56        dim: usize,
57        vectors: &[&[f32]],
58        surrogate_ids: &[u64],
59    ) -> std::io::Result<Self> {
60        write_segment(path, dim, vectors, surrogate_ids)?;
61        Self::open_with_policy(path, VectorSegmentDropPolicy::default())
62    }
63
64    /// Create a new segment with an explicit drop policy.
65    pub fn create_with_policy(
66        path: &Path,
67        dim: usize,
68        vectors: &[&[f32]],
69        policy: VectorSegmentDropPolicy,
70    ) -> std::io::Result<Self> {
71        write_segment(path, dim, vectors, &[])?;
72        Self::open_with_policy(path, policy)
73    }
74
75    /// Open an existing segment file and memory-map it.
76    pub fn open(path: &Path) -> std::io::Result<Self> {
77        Self::open_with_policy(path, VectorSegmentDropPolicy::default())
78    }
79
80    /// Open an existing segment with an explicit drop policy.
81    pub fn open_with_policy(path: &Path, policy: VectorSegmentDropPolicy) -> std::io::Result<Self> {
82        let fd = std::fs::OpenOptions::new().read(true).open(path)?;
83        let file_size = fd.metadata()?.len() as usize;
84
85        let min_size = HEADER_SIZE + FOOTER_SIZE;
86        if file_size < min_size {
87            return Err(std::io::Error::new(
88                std::io::ErrorKind::InvalidData,
89                format!("segment file too small: {file_size} < {min_size} bytes"),
90            ));
91        }
92
93        let base = unsafe {
94            libc::mmap(
95                std::ptr::null_mut(),
96                file_size,
97                libc::PROT_READ,
98                libc::MAP_PRIVATE,
99                fd.as_raw_fd(),
100                0,
101            )
102        };
103        if base == libc::MAP_FAILED {
104            return Err(std::io::Error::last_os_error());
105        }
106        let base = base as *const u8;
107
108        Self::validate_and_build(fd, base, file_size, path, policy, None).inspect_err(|_e| {
109            unsafe { libc::munmap(base as *mut libc::c_void, file_size) };
110        })
111    }
112
113    /// Open an existing segment with a memory governor.
114    ///
115    /// Reserves `file_size` bytes in the `EngineId::Vector` budget before
116    /// mapping the file.  Returns `VectorError::BudgetExhausted` if the
117    /// governor rejects the reservation.  The reservation is released
118    /// automatically when the segment is dropped (RAII via `BudgetGuard`).
119    pub fn open_with_governor(
120        path: &Path,
121        governor: &Arc<MemoryGovernor>,
122    ) -> Result<Self, VectorError> {
123        Self::open_with_governor_and_policy(path, governor, VectorSegmentDropPolicy::default())
124    }
125
126    /// Open an existing segment with a memory governor and explicit drop policy.
127    pub fn open_with_governor_and_policy(
128        path: &Path,
129        governor: &Arc<MemoryGovernor>,
130        policy: VectorSegmentDropPolicy,
131    ) -> Result<Self, VectorError> {
132        let fd = std::fs::OpenOptions::new().read(true).open(path)?;
133        let file_size = fd.metadata()?.len() as usize;
134
135        let budget_guard = governor.reserve(EngineId::Vector, file_size)?;
136
137        let min_size = HEADER_SIZE + FOOTER_SIZE;
138        if file_size < min_size {
139            // budget_guard dropped here → bytes returned to budget
140            return Err(std::io::Error::new(
141                std::io::ErrorKind::InvalidData,
142                format!("segment file too small: {file_size} < {min_size} bytes"),
143            )
144            .into());
145        }
146
147        let base = unsafe {
148            libc::mmap(
149                std::ptr::null_mut(),
150                file_size,
151                libc::PROT_READ,
152                libc::MAP_PRIVATE,
153                fd.as_raw_fd(),
154                0,
155            )
156        };
157        if base == libc::MAP_FAILED {
158            // budget_guard dropped here → bytes returned to budget
159            return Err(std::io::Error::last_os_error().into());
160        }
161        let base = base as *const u8;
162
163        Self::validate_and_build(fd, base, file_size, path, policy, Some(budget_guard))
164            .map_err(VectorError::from)
165            .inspect_err(|_| {
166                unsafe { libc::munmap(base as *mut libc::c_void, file_size) };
167            })
168    }
169
170    // ── Validation ────────────────────────────────────────────────────────────
171
172    fn validate_and_build(
173        fd: std::fs::File,
174        base: *const u8,
175        file_size: usize,
176        path: &Path,
177        policy: VectorSegmentDropPolicy,
178        budget_guard: Option<BudgetGuard>,
179    ) -> std::io::Result<Self> {
180        // Validate magic + format version.
181        let header = unsafe { std::slice::from_raw_parts(base, HEADER_SIZE) };
182        if &header[0..4] != MAGIC.as_slice() {
183            return Err(std::io::Error::new(
184                std::io::ErrorKind::InvalidData,
185                "invalid NDVS magic bytes",
186            ));
187        }
188        let fv = u16::from_le_bytes([header[4], header[5]]);
189        if fv != FORMAT_VERSION {
190            return Err(std::io::Error::new(
191                std::io::ErrorKind::InvalidData,
192                format!("unsupported segment format version {fv}; expected {FORMAT_VERSION}"),
193            ));
194        }
195
196        let dim = u32::from_le_bytes([header[8], header[9], header[10], header[11]]) as usize;
197        let count = u64::from_le_bytes([
198            header[12], header[13], header[14], header[15], header[16], header[17], header[18],
199            header[19],
200        ]) as usize;
201        let compression_byte = header[21];
202
203        // Codec dispatch — exhaustive match; non-None variants will be
204        // obvious when compression is wired in the future.
205        let codec = VectorSegmentCodec::from_u8(compression_byte)?;
206        match codec {
207            VectorSegmentCodec::None => { /* raw packed f32 — proceed */ }
208        }
209
210        if dim == 0 && count > 0 {
211            return Err(std::io::Error::new(
212                std::io::ErrorKind::InvalidData,
213                "segment has dim=0 with nonzero count",
214            ));
215        }
216
217        // Validate total file size with overflow-safe arithmetic.
218        let vec_bytes = dim
219            .checked_mul(count)
220            .and_then(|n| n.checked_mul(4))
221            .ok_or_else(|| {
222                std::io::Error::new(
223                    std::io::ErrorKind::InvalidData,
224                    format!("segment header overflow: dim={dim}, count={count}"),
225                )
226            })?;
227        let sid_bytes = count.checked_mul(8).ok_or_else(|| {
228            std::io::Error::new(
229                std::io::ErrorKind::InvalidData,
230                format!("surrogate block overflow: count={count}"),
231            )
232        })?;
233        let pad_bytes = vec_pad(vec_bytes);
234        let expected = HEADER_SIZE
235            .checked_add(vec_bytes)
236            .and_then(|n| n.checked_add(pad_bytes))
237            .and_then(|n| n.checked_add(sid_bytes))
238            .and_then(|n| n.checked_add(FOOTER_SIZE))
239            .ok_or_else(|| {
240                std::io::Error::new(
241                    std::io::ErrorKind::InvalidData,
242                    "total segment size overflow",
243                )
244            })?;
245        if file_size != expected {
246            return Err(std::io::Error::new(
247                std::io::ErrorKind::InvalidData,
248                format!("segment size mismatch: expected {expected} bytes, got {file_size}"),
249            ));
250        }
251
252        // Validate footer.
253        let footer_start = file_size - FOOTER_SIZE;
254        let footer = unsafe { std::slice::from_raw_parts(base.add(footer_start), FOOTER_SIZE) };
255
256        // Trailing magic — last 4 bytes of the file must be b"NDVS".
257        if &footer[42..46] != MAGIC.as_slice() {
258            return Err(std::io::Error::new(
259                std::io::ErrorKind::InvalidData,
260                "invalid NDVS trailing magic bytes",
261            ));
262        }
263
264        let footer_fv = u16::from_le_bytes([footer[0], footer[1]]);
265        if footer_fv != FORMAT_VERSION {
266            return Err(std::io::Error::new(
267                std::io::ErrorKind::InvalidData,
268                format!(
269                    "unsupported segment footer version {footer_fv}; expected {FORMAT_VERSION}"
270                ),
271            ));
272        }
273        let stored_footer_size =
274            u32::from_le_bytes([footer[38], footer[39], footer[40], footer[41]]) as usize;
275        if stored_footer_size != FOOTER_SIZE {
276            return Err(std::io::Error::new(
277                std::io::ErrorKind::InvalidData,
278                format!("footer_size field {stored_footer_size} != {FOOTER_SIZE}"),
279            ));
280        }
281
282        // CRC32C integrity check.
283        let body = unsafe { std::slice::from_raw_parts(base, footer_start) };
284        let computed = crc32c::crc32c(body);
285        let stored = u32::from_le_bytes([footer[34], footer[35], footer[36], footer[37]]);
286        if computed != stored {
287            return Err(std::io::Error::new(
288                std::io::ErrorKind::InvalidData,
289                format!("CRC32C mismatch: stored {stored:#010x}, computed {computed:#010x}"),
290            ));
291        }
292
293        let vec_offset = HEADER_SIZE;
294        let sid_offset = HEADER_SIZE + vec_bytes + pad_bytes;
295
296        // Advise MADV_RANDOM: HNSW traversal is non-sequential.
297        let mut madvise_state = None;
298        if vec_bytes + sid_bytes > 0 {
299            let rc =
300                unsafe { libc::madvise(base as *mut libc::c_void, file_size, libc::MADV_RANDOM) };
301            if rc == 0 {
302                madvise_state = Some(libc::MADV_RANDOM);
303                observability::RANDOM_COUNT.fetch_add(1, Ordering::Relaxed);
304            } else {
305                tracing::warn!(
306                    path = %path.display(),
307                    errno = std::io::Error::last_os_error().raw_os_error().unwrap_or(0),
308                    "madvise(MADV_RANDOM) failed on vector segment; continuing with kernel default",
309                );
310            }
311        }
312
313        Ok(Self {
314            path: path.to_path_buf(),
315            _fd: fd,
316            base,
317            mmap_size: file_size,
318            dim,
319            count,
320            vec_offset,
321            sid_offset,
322            drop_policy: policy,
323            madvise_state,
324            _budget_guard: budget_guard,
325        })
326    }
327
328    // ── Accessors ─────────────────────────────────────────────────────────────
329
330    /// The madvise hint set on this segment (if any).
331    pub fn madvise_state(&self) -> Option<libc::c_int> {
332        self.madvise_state
333    }
334
335    /// Get a vector by local index. Returns a slice into the mmap'd region.
336    #[inline]
337    pub fn get_vector(&self, id: u32) -> Option<&[f32]> {
338        let idx = id as usize;
339        if idx >= self.count {
340            return None;
341        }
342        let byte_len = self.dim.checked_mul(4)?;
343        let offset = self.vec_offset.checked_add(idx.checked_mul(byte_len)?)?;
344        let end = offset.checked_add(byte_len)?;
345        if end > self.sid_offset {
346            return None;
347        }
348        unsafe {
349            let ptr = self.base.add(offset) as *const f32;
350            Some(std::slice::from_raw_parts(ptr, self.dim))
351        }
352    }
353
354    /// Get the surrogate ID for a local index (0-based row in this segment).
355    #[inline]
356    pub fn get_surrogate_id(&self, id: u32) -> Option<u64> {
357        let idx = id as usize;
358        if idx >= self.count {
359            return None;
360        }
361        let offset = self.sid_offset.checked_add(idx.checked_mul(8)?)?;
362        let end = offset.checked_add(8)?;
363        let sid_end = self.mmap_size - FOOTER_SIZE;
364        if end > sid_end {
365            return None;
366        }
367        let bytes = unsafe { std::slice::from_raw_parts(self.base.add(offset), 8) };
368        Some(u64::from_le_bytes(bytes.try_into().expect("invariant: from_raw_parts constructed with len=8, so try_into::<[u8;8]> always succeeds")))
369    }
370
371    /// The full vector data block as a contiguous `&[f32]` of length `D × N`.
372    ///
373    /// Suitable for SIMD distance computation over all vectors.
374    #[inline]
375    pub fn all_vectors_flat(&self) -> &[f32] {
376        let float_count = self.dim * self.count;
377        unsafe {
378            let ptr = self.base.add(self.vec_offset) as *const f32;
379            std::slice::from_raw_parts(ptr, float_count)
380        }
381    }
382
383    /// The full surrogate ID block as a contiguous `&[u64]` of length `N`.
384    ///
385    /// Parallel to `all_vectors_flat`: row `i` in vectors ↔ `surrogate_ids[i]`.
386    #[inline]
387    pub fn all_surrogate_ids(&self) -> &[u64] {
388        unsafe {
389            let ptr = self.base.add(self.sid_offset) as *const u64;
390            std::slice::from_raw_parts(ptr, self.count)
391        }
392    }
393
394    /// Prefetch a vector's page into memory via `madvise(MADV_WILLNEED)`.
395    pub fn prefetch(&self, id: u32) {
396        let idx = id as usize;
397        if idx >= self.count {
398            return;
399        }
400        let byte_len = match self.dim.checked_mul(4) {
401            Some(v) => v,
402            None => return,
403        };
404        let Some(idx_bytes) = idx.checked_mul(byte_len) else {
405            return;
406        };
407        let Some(offset) = self.vec_offset.checked_add(idx_bytes) else {
408            return;
409        };
410        if offset
411            .checked_add(byte_len)
412            .is_none_or(|e| e > self.sid_offset)
413        {
414            return;
415        }
416        let page_start = offset & !(4095);
417        let len = (byte_len + 4095) & !(4095);
418        unsafe {
419            libc::madvise(
420                self.base.add(page_start) as *mut libc::c_void,
421                len,
422                libc::MADV_WILLNEED,
423            );
424        }
425    }
426
427    /// Prefetch a batch of vector IDs.
428    pub fn prefetch_batch(&self, ids: &[u32]) {
429        for &id in ids {
430            self.prefetch(id);
431        }
432    }
433
434    pub fn dim(&self) -> usize {
435        self.dim
436    }
437
438    pub fn count(&self) -> usize {
439        self.count
440    }
441
442    pub fn path(&self) -> &Path {
443        &self.path
444    }
445
446    pub fn mmap_bytes(&self) -> usize {
447        self.mmap_size
448    }
449
450    pub fn file_size(&self) -> usize {
451        self.mmap_size
452    }
453}
454
455impl Drop for MmapVectorSegment {
456    fn drop(&mut self) {
457        if !self.base.is_null() && self.mmap_size > 0 {
458            if self.drop_policy.dontneed_on_drop() {
459                let data_bytes = self.mmap_size.saturating_sub(HEADER_SIZE + FOOTER_SIZE);
460                if data_bytes > 0 {
461                    unsafe {
462                        libc::madvise(
463                            self.base as *mut libc::c_void,
464                            self.mmap_size,
465                            libc::MADV_DONTNEED,
466                        );
467                    }
468                    observability::DONTNEED_COUNT.fetch_add(1, Ordering::Relaxed);
469                }
470            }
471            unsafe {
472                libc::munmap(self.base as *mut libc::c_void, self.mmap_size);
473            }
474        }
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481
482    #[test]
483    fn create_and_read() {
484        let dir = tempfile::tempdir().unwrap();
485        let path = dir.path().join("test.vseg");
486
487        let v0 = vec![1.0f32, 2.0, 3.0];
488        let v1 = vec![4.0f32, 5.0, 6.0];
489        let v2 = vec![7.0f32, 8.0, 9.0];
490        let surrogates = vec![10u64, 20, 30];
491
492        let seg =
493            MmapVectorSegment::create_with_surrogates(&path, 3, &[&v0, &v1, &v2], &surrogates)
494                .unwrap();
495
496        assert_eq!(seg.dim(), 3);
497        assert_eq!(seg.count(), 3);
498        assert_eq!(seg.get_vector(0).unwrap(), &[1.0, 2.0, 3.0]);
499        assert_eq!(seg.get_vector(1).unwrap(), &[4.0, 5.0, 6.0]);
500        assert_eq!(seg.get_vector(2).unwrap(), &[7.0, 8.0, 9.0]);
501        assert!(seg.get_vector(3).is_none());
502        assert_eq!(seg.get_surrogate_id(0).unwrap(), 10);
503        assert_eq!(seg.get_surrogate_id(1).unwrap(), 20);
504        assert_eq!(seg.get_surrogate_id(2).unwrap(), 30);
505        assert!(seg.get_surrogate_id(3).is_none());
506    }
507
508    #[test]
509    fn flat_slices() {
510        let dir = tempfile::tempdir().unwrap();
511        let path = dir.path().join("flat.vseg");
512
513        let v0 = vec![1.0f32, 2.0, 3.0];
514        let v1 = vec![4.0f32, 5.0, 6.0];
515        let sids = vec![100u64, 200];
516
517        let seg = MmapVectorSegment::create_with_surrogates(&path, 3, &[&v0, &v1], &sids).unwrap();
518
519        assert_eq!(seg.all_vectors_flat(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
520        assert_eq!(seg.all_surrogate_ids(), &[100u64, 200]);
521    }
522
523    #[test]
524    fn reopen_roundtrip() {
525        let dir = tempfile::tempdir().unwrap();
526        let path = dir.path().join("reopen.vseg");
527
528        let vectors: Vec<Vec<f32>> = (0..100)
529            .map(|i| vec![i as f32, (i as f32).sin(), (i as f32).cos()])
530            .collect();
531        let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
532        let sids: Vec<u64> = (0u64..100).collect();
533
534        MmapVectorSegment::create_with_surrogates(&path, 3, &refs, &sids).unwrap();
535
536        let seg = MmapVectorSegment::open(&path).unwrap();
537        assert_eq!(seg.count(), 100);
538        for (i, v) in vectors.iter().enumerate() {
539            assert_eq!(seg.get_vector(i as u32).unwrap(), v.as_slice());
540            assert_eq!(seg.get_surrogate_id(i as u32).unwrap(), i as u64);
541        }
542    }
543
544    #[test]
545    fn no_surrogates_defaults_to_zero() {
546        let dir = tempfile::tempdir().unwrap();
547        let path = dir.path().join("nosid.vseg");
548
549        let v = vec![1.0f32, 2.0];
550        let seg = MmapVectorSegment::create(&path, 2, &[&v]).unwrap();
551        assert_eq!(seg.get_surrogate_id(0).unwrap(), 0);
552    }
553
554    #[test]
555    fn prefetch_does_not_crash() {
556        let dir = tempfile::tempdir().unwrap();
557        let path = dir.path().join("prefetch.vseg");
558
559        let v = vec![1.0f32; 768];
560        let seg = MmapVectorSegment::create(&path, 768, &[&v]).unwrap();
561        seg.prefetch(0);
562        seg.prefetch(999);
563    }
564
565    #[test]
566    fn empty_segment() {
567        let dir = tempfile::tempdir().unwrap();
568        let path = dir.path().join("empty.vseg");
569
570        let seg = MmapVectorSegment::create(&path, 3, &[]).unwrap();
571        assert_eq!(seg.count(), 0);
572        assert!(seg.get_vector(0).is_none());
573        assert_eq!(seg.all_vectors_flat().len(), 0);
574        assert_eq!(seg.all_surrogate_ids().len(), 0);
575    }
576
577    #[test]
578    fn footer_golden_layout() {
579        let dir = tempfile::tempdir().unwrap();
580        let path = dir.path().join("golden.vseg");
581        let v = vec![1.0f32, 2.0, 3.0];
582        write_segment(&path, 3, &[&v], &[42]).unwrap();
583        let data = std::fs::read(&path).unwrap();
584
585        // Footer starts at file_size - FOOTER_SIZE.
586        let footer_start = data.len() - FOOTER_SIZE;
587        let footer = &data[footer_start..];
588
589        // [0..2] format_version = FORMAT_VERSION.
590        let fv = u16::from_le_bytes([footer[0], footer[1]]);
591        assert_eq!(fv, FORMAT_VERSION);
592
593        // [34..38] checksum matches body CRC32C.
594        let body = &data[..footer_start];
595        let expected_crc = crc32c::crc32c(body);
596        let stored_crc = u32::from_le_bytes([footer[34], footer[35], footer[36], footer[37]]);
597        assert_eq!(stored_crc, expected_crc);
598
599        // [38..42] footer_size = FOOTER_SIZE (46).
600        let fs = u32::from_le_bytes([footer[38], footer[39], footer[40], footer[41]]) as usize;
601        assert_eq!(fs, FOOTER_SIZE);
602
603        // [42..46] trailing magic = b"NDVS".
604        assert_eq!(&footer[42..46], b"NDVS");
605    }
606
607    #[test]
608    fn trailing_magic_corruption_rejected() {
609        let dir = tempfile::tempdir().unwrap();
610        let path = dir.path().join("trailmagic.vseg");
611        let v = vec![1.0f32, 2.0, 3.0];
612        write_segment(&path, 3, &[&v], &[42]).unwrap();
613
614        let mut data = std::fs::read(&path).unwrap();
615        // Corrupt the last 4 bytes (trailing magic).
616        let last = data.len();
617        data[last - 4] = 0xde;
618        data[last - 3] = 0xad;
619        data[last - 2] = 0xbe;
620        data[last - 1] = 0xef;
621        std::fs::write(&path, &data).unwrap();
622
623        let result = MmapVectorSegment::open(&path);
624        assert!(result.is_err(), "expected trailing magic error");
625        let err = result.unwrap_err();
626        assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
627        assert!(
628            err.to_string().contains("trailing magic"),
629            "expected trailing magic message, got: {err}"
630        );
631    }
632
633    #[test]
634    fn footer_version_mismatch_rejected() {
635        let dir = tempfile::tempdir().unwrap();
636        let path = dir.path().join("fvmismatch.vseg");
637        let v = vec![1.0f32, 2.0, 3.0];
638        write_segment(&path, 3, &[&v], &[42]).unwrap();
639
640        let mut data = std::fs::read(&path).unwrap();
641        // Corrupt the footer format version bytes to 99.
642        let footer_start = data.len() - FOOTER_SIZE;
643        let fv_bytes = 99u16.to_le_bytes();
644        data[footer_start] = fv_bytes[0];
645        data[footer_start + 1] = fv_bytes[1];
646        std::fs::write(&path, &data).unwrap();
647
648        let result = MmapVectorSegment::open(&path);
649        assert!(result.is_err(), "expected footer version mismatch error");
650        assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::InvalidData);
651    }
652
653    #[test]
654    fn crc_corruption_rejected() {
655        let dir = tempfile::tempdir().unwrap();
656        let path = dir.path().join("corrupt.vseg");
657
658        let v = vec![1.0f32, 2.0, 3.0];
659        write_segment(&path, 3, &[&v], &[42]).unwrap();
660
661        let mut data = std::fs::read(&path).unwrap();
662        data[HEADER_SIZE] ^= 0xff;
663        std::fs::write(&path, &data).unwrap();
664
665        let result = MmapVectorSegment::open(&path);
666        assert!(result.is_err(), "expected CRC error");
667        assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::InvalidData);
668    }
669
670    #[test]
671    fn bad_magic_rejected() {
672        let dir = tempfile::tempdir().unwrap();
673        let path = dir.path().join("badmagic.vseg");
674
675        let v = vec![1.0f32, 2.0];
676        write_segment(&path, 2, &[&v], &[]).unwrap();
677
678        let mut data = std::fs::read(&path).unwrap();
679        data[0] = b'X';
680        std::fs::write(&path, &data).unwrap();
681
682        let result = MmapVectorSegment::open(&path);
683        assert!(result.is_err());
684        assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::InvalidData);
685    }
686
687    #[test]
688    fn overflow_dim_count_rejected() {
689        let dir = tempfile::tempdir().unwrap();
690        let path = dir.path().join("overflow.vseg");
691
692        let dim: u32 = 0x40000001;
693        let count: u64 = 0x40000001;
694
695        let mut buf = Vec::new();
696        buf.extend_from_slice(&MAGIC);
697        buf.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
698        buf.extend_from_slice(&0u16.to_le_bytes());
699        buf.extend_from_slice(&dim.to_le_bytes());
700        buf.extend_from_slice(&count.to_le_bytes());
701        buf.push(0u8); // dtype
702        buf.push(0u8); // codec None
703        buf.extend_from_slice(&[0u8; 6]); // reserved
704        std::fs::write(&path, &buf).unwrap();
705
706        let result = MmapVectorSegment::open(&path);
707        assert!(
708            result.is_err(),
709            "expected Err for overflow-inducing dim/count"
710        );
711    }
712
713    #[test]
714    fn zero_dim_with_nonzero_count_rejected() {
715        let dir = tempfile::tempdir().unwrap();
716        let path = dir.path().join("zerodim.vseg");
717
718        let dim: u32 = 0;
719        let count: u64 = 1000;
720
721        let mut buf = Vec::new();
722        buf.extend_from_slice(&MAGIC);
723        buf.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
724        buf.extend_from_slice(&0u16.to_le_bytes());
725        buf.extend_from_slice(&dim.to_le_bytes());
726        buf.extend_from_slice(&count.to_le_bytes());
727        buf.push(0u8);
728        buf.push(0u8);
729        buf.extend_from_slice(&[0u8; 6]);
730        buf.extend_from_slice(&[0u8; 64]);
731        std::fs::write(&path, &buf).unwrap();
732
733        let result = MmapVectorSegment::open(&path);
734        assert!(result.is_err(), "expected Err for dim=0 with nonzero count");
735    }
736}