Skip to main content

nodedb_vector/mmap_segment/
format.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! On-disk format constants and shared types for the NDVS vector segment.
4
5use std::sync::atomic::{AtomicU64, Ordering};
6
7// ── Layout constants ──────────────────────────────────────────────────────────
8
9pub(super) const MAGIC: [u8; 4] = *b"NDVS";
10pub(super) const FORMAT_VERSION: u16 = 1;
11pub(super) const DTYPE_F32: u8 = 0;
12
13/// Header size in bytes (32). Padded to 8-byte alignment so the surrogate
14/// ID block (u64) is naturally aligned regardless of vector dimension.
15pub(super) const HEADER_SIZE: usize = 32;
16
17/// Compute the 8-byte-aligned padding inserted after the vector data block
18/// so the surrogate ID block lands on an 8-byte boundary.
19///
20/// Returns 0 when `vec_bytes` is already a multiple of 8, else 4.
21#[inline]
22pub(super) const fn vec_pad(vec_bytes: usize) -> usize {
23    (8 - (vec_bytes % 8)) % 8
24}
25
26/// Footer size in bytes (46).
27///
28/// Layout:
29/// ```text
30/// [0..2]   format_version  (u16 LE)
31/// [2..34]  created_by      (32-byte null-padded version string)
32/// [34..38] checksum        (u32 LE CRC32C over header + data body)
33/// [38..42] footer_size     (u32 LE, always 46)
34/// [42..46] trailing_magic  (b"NDVS")
35/// ```
36pub(super) const FOOTER_SIZE: usize = 46;
37
38// ── Codec slot ────────────────────────────────────────────────────────────────
39
40/// At-rest compression codec for the vector data block.
41///
42/// Stored as a `u8` in the segment header (byte 21). Only `None` is supported
43/// in v2. The decode site in the reader matches exhaustively so the right
44/// hook for future compression codecs is obvious.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46#[repr(u8)]
47#[non_exhaustive]
48pub enum VectorSegmentCodec {
49    /// No compression — raw packed `[f32; D] × N`.
50    None = 0,
51}
52
53impl VectorSegmentCodec {
54    pub(super) fn from_u8(v: u8) -> std::io::Result<Self> {
55        match v {
56            0 => Ok(Self::None),
57            other => Err(std::io::Error::new(
58                std::io::ErrorKind::InvalidData,
59                format!("unknown VectorSegmentCodec byte {other}"),
60            )),
61        }
62    }
63}
64
65// ── Drop-time page-cache policy ───────────────────────────────────────────────
66
67/// Drop-time page-cache policy for a vector segment.
68///
69/// HNSW traversal touches a small fraction of a segment's pages. When the
70/// segment is dropped we hint the kernel that the residual pages can be
71/// evicted so they don't crowd hotter engines' working sets.
72#[derive(Debug, Clone, Copy)]
73pub struct VectorSegmentDropPolicy {
74    dontneed_on_drop: bool,
75}
76
77impl VectorSegmentDropPolicy {
78    pub const fn new(dontneed_on_drop: bool) -> Self {
79        Self { dontneed_on_drop }
80    }
81    pub const fn keep_resident() -> Self {
82        Self {
83            dontneed_on_drop: false,
84        }
85    }
86    pub const fn dontneed_on_drop(self) -> bool {
87        self.dontneed_on_drop
88    }
89}
90
91impl Default for VectorSegmentDropPolicy {
92    fn default() -> Self {
93        Self {
94            dontneed_on_drop: true,
95        }
96    }
97}
98
99// ── Observability ─────────────────────────────────────────────────────────────
100
101/// Module-scoped atomic counters for madvise call observability.
102///
103/// These lightweight atomics are always compiled and are used by Event-Plane
104/// metrics to track page-cache hint activity on vector segments at runtime.
105pub mod observability {
106    use super::{AtomicU64, Ordering};
107    pub(crate) static DONTNEED_COUNT: AtomicU64 = AtomicU64::new(0);
108    pub(crate) static RANDOM_COUNT: AtomicU64 = AtomicU64::new(0);
109
110    pub fn dontneed_count() -> u64 {
111        DONTNEED_COUNT.load(Ordering::Relaxed)
112    }
113    pub fn random_count() -> u64 {
114        RANDOM_COUNT.load(Ordering::Relaxed)
115    }
116}