nodedb_vector/mmap_segment/format.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! On-disk format constants and shared types for the NDVS vector segment.
4
5use std::sync::atomic::{AtomicU64, Ordering};
6
7// ── Layout constants ──────────────────────────────────────────────────────────
8
9pub(super) const MAGIC: [u8; 4] = *b"NDVS";
10pub(super) const FORMAT_VERSION: u16 = 1;
11pub(super) const DTYPE_F32: u8 = 0;
12
13/// Header size in bytes (32). Padded to 8-byte alignment so the surrogate
14/// ID block (u64) is naturally aligned regardless of vector dimension.
15pub(super) const HEADER_SIZE: usize = 32;
16
17/// Compute the 8-byte-aligned padding inserted after the vector data block
18/// so the surrogate ID block lands on an 8-byte boundary.
19///
20/// Returns 0 when `vec_bytes` is already a multiple of 8, else 4.
21#[inline]
22pub(super) const fn vec_pad(vec_bytes: usize) -> usize {
23 (8 - (vec_bytes % 8)) % 8
24}
25
26/// Footer size in bytes (46).
27///
28/// Layout:
29/// ```text
30/// [0..2] format_version (u16 LE)
31/// [2..34] created_by (32-byte null-padded version string)
32/// [34..38] checksum (u32 LE CRC32C over header + data body)
33/// [38..42] footer_size (u32 LE, always 46)
34/// [42..46] trailing_magic (b"NDVS")
35/// ```
36pub(super) const FOOTER_SIZE: usize = 46;
37
38// ── Codec slot ────────────────────────────────────────────────────────────────
39
40/// At-rest compression codec for the vector data block.
41///
42/// Stored as a `u8` in the segment header (byte 21). Only `None` is supported
43/// in v2. The decode site in the reader matches exhaustively so the right
44/// hook for future compression codecs is obvious.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46#[repr(u8)]
47#[non_exhaustive]
48pub enum VectorSegmentCodec {
49 /// No compression — raw packed `[f32; D] × N`.
50 None = 0,
51}
52
53impl VectorSegmentCodec {
54 pub(super) fn from_u8(v: u8) -> std::io::Result<Self> {
55 match v {
56 0 => Ok(Self::None),
57 other => Err(std::io::Error::new(
58 std::io::ErrorKind::InvalidData,
59 format!("unknown VectorSegmentCodec byte {other}"),
60 )),
61 }
62 }
63}
64
65// ── Drop-time page-cache policy ───────────────────────────────────────────────
66
67/// Drop-time page-cache policy for a vector segment.
68///
69/// HNSW traversal touches a small fraction of a segment's pages. When the
70/// segment is dropped we hint the kernel that the residual pages can be
71/// evicted so they don't crowd hotter engines' working sets.
72#[derive(Debug, Clone, Copy)]
73pub struct VectorSegmentDropPolicy {
74 dontneed_on_drop: bool,
75}
76
77impl VectorSegmentDropPolicy {
78 pub const fn new(dontneed_on_drop: bool) -> Self {
79 Self { dontneed_on_drop }
80 }
81 pub const fn keep_resident() -> Self {
82 Self {
83 dontneed_on_drop: false,
84 }
85 }
86 pub const fn dontneed_on_drop(self) -> bool {
87 self.dontneed_on_drop
88 }
89}
90
91impl Default for VectorSegmentDropPolicy {
92 fn default() -> Self {
93 Self {
94 dontneed_on_drop: true,
95 }
96 }
97}
98
99// ── Observability ─────────────────────────────────────────────────────────────
100
101/// Module-scoped atomic counters for madvise call observability.
102///
103/// These lightweight atomics are always compiled and are used by Event-Plane
104/// metrics to track page-cache hint activity on vector segments at runtime.
105pub mod observability {
106 use super::{AtomicU64, Ordering};
107 pub(crate) static DONTNEED_COUNT: AtomicU64 = AtomicU64::new(0);
108 pub(crate) static RANDOM_COUNT: AtomicU64 = AtomicU64::new(0);
109
110 pub fn dontneed_count() -> u64 {
111 DONTNEED_COUNT.load(Ordering::Relaxed)
112 }
113 pub fn random_count() -> u64 {
114 RANDOM_COUNT.load(Ordering::Relaxed)
115 }
116}