nodedb_vector/vamana/storage.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Vamana on-disk layout description.
4//!
5//! **No I/O code lives here.** This module describes the byte layout that
6//! a future io_uring integration (in `nodedb-wal`) will read and write.
7//! Offset arithmetic is provided so callers can compute SSD byte positions
8//! for full-precision vector fetches without materialising any buffers.
9//!
10//! # File layout
11//!
12//! ```text
13//! Offset Size Field
14//! ------ ---- -----
15//! 0 8 magic = b"NDBVAMAN"
16//! 8 4 version: u32
17//! 12 4 dim: u32
18//! 16 4 r: u32 (max degree)
19//! 20 4 alpha: f32
20//! 24 8 num_nodes: u64
21//! 32 8 entry: u64 (entry-point node index)
22//! 40 8 adjacency_offset: u64 (byte offset to adjacency block)
23//! 48 8 vectors_offset: u64 (byte offset to vectors block)
24//! 56 … reserved / padding to 64-byte header boundary
25//!
26//! --- adjacency block (mmap-resident, hot path) ---
27//! [u32 × r × num_nodes] neighbors, zero-padded when degree < r
28//!
29//! --- vectors block (SSD-resident, io_uring fetch on demand) ---
30//! [f32 × dim × num_nodes] full-precision FP32 vectors
31//! ```
32//!
33//! The adjacency block fits in RAM alongside the compressed codec vectors.
34//! The vectors block is fetched on demand via io_uring at a fixed, computable
35//! offset (see `vector_offset`).
36
37/// Magic bytes identifying a NodeDB Vamana index file.
38pub const VAMANA_MAGIC: &[u8; 8] = b"NDBVAMAN";
39
40/// Current on-disk format version.
41pub const VAMANA_VERSION: u32 = 1;
42
43/// Size of the fixed file header in bytes.
44pub const HEADER_BYTES: u64 = 64;
45
46/// Vamana on-disk layout descriptor.
47///
48/// Populated when opening or creating an index file; used to compute
49/// byte offsets for io_uring scatter/gather reads.
50#[derive(Debug, Clone, PartialEq)]
51pub struct VamanaStorageLayout {
52 /// Vector dimensionality.
53 pub dim: u32,
54 /// Maximum out-degree per node.
55 pub r: u32,
56 /// α-pruning factor (stored for validation when reopening the file).
57 pub alpha: f32,
58 /// Total number of nodes.
59 pub num_nodes: u64,
60 /// Internal index of the entry-point node.
61 pub entry: u64,
62 /// Byte offset of the adjacency block from the start of the file.
63 pub adjacency_offset: u64,
64 /// Byte offset of the vectors block from the start of the file.
65 pub vectors_offset: u64,
66}
67
68impl VamanaStorageLayout {
69 /// Build a layout descriptor for a freshly written index.
70 ///
71 /// Offsets are computed deterministically from `dim`, `r`, and
72 /// `num_nodes`:
73 ///
74 /// * `adjacency_offset` = `HEADER_BYTES`
75 /// * `vectors_offset` = `adjacency_offset + r * 4 * num_nodes`
76 pub fn new(dim: u32, r: u32, alpha: f32, num_nodes: u64, entry: u64) -> Self {
77 let adjacency_offset = HEADER_BYTES;
78 let adjacency_bytes = r as u64 * 4 * num_nodes; // u32 per slot
79 let vectors_offset = adjacency_offset + adjacency_bytes;
80 Self {
81 dim,
82 r,
83 alpha,
84 num_nodes,
85 entry,
86 adjacency_offset,
87 vectors_offset,
88 }
89 }
90
91 /// Total size of the adjacency block in bytes.
92 pub fn adjacency_bytes(&self) -> u64 {
93 self.r as u64 * 4 * self.num_nodes
94 }
95
96 /// Total size of the vectors block in bytes.
97 pub fn vectors_bytes(&self) -> u64 {
98 self.dim as u64 * 4 * self.num_nodes // f32 = 4 bytes
99 }
100}
101
102/// Return the byte offset of the full-precision vector for node `idx`.
103///
104/// This is the offset to pass to an io_uring `IORING_OP_READ` or `pread`
105/// call to fetch `dim * 4` bytes of FP32 data for the given node.
106///
107/// # Example
108///
109/// ```
110/// use nodedb_vector::vamana::storage::{VamanaStorageLayout, vector_offset};
111///
112/// let layout = VamanaStorageLayout::new(4, 64, 1.2, 100, 0);
113/// let off = vector_offset(&layout, 5);
114/// // adjacency block: 64 * 4 * 100 = 25600 bytes
115/// // header: 64 bytes
116/// // vectors start: 64 + 25600 = 25664
117/// // vector 5: 25664 + 5 * 4 * 4 = 25664 + 80 = 25744
118/// assert_eq!(off, 25744);
119/// ```
120pub fn vector_offset(layout: &VamanaStorageLayout, idx: u64) -> u64 {
121 layout.vectors_offset + idx * layout.dim as u64 * 4
122}
123
124#[cfg(test)]
125mod tests {
126 use super::*;
127
128 #[test]
129 fn vector_offset_manual_computation() {
130 // dim=4, r=64, num_nodes=100
131 let layout = VamanaStorageLayout::new(4, 64, 1.2, 100, 0);
132
133 // adjacency_offset = HEADER_BYTES = 64
134 assert_eq!(layout.adjacency_offset, 64);
135
136 // adjacency_bytes = 64 * 4 * 100 = 25_600
137 assert_eq!(layout.adjacency_bytes(), 25_600);
138
139 // vectors_offset = 64 + 25_600 = 25_664
140 assert_eq!(layout.vectors_offset, 25_664);
141
142 // vector_offset(idx=5) = 25_664 + 5 * 4 * 4 = 25_664 + 80 = 25_744
143 assert_eq!(vector_offset(&layout, 5), 25_744);
144 }
145
146 #[test]
147 fn vector_offset_idx_zero_equals_vectors_offset() {
148 let layout = VamanaStorageLayout::new(8, 32, 1.2, 50, 0);
149 assert_eq!(vector_offset(&layout, 0), layout.vectors_offset);
150 }
151
152 #[test]
153 fn vectors_bytes_correct() {
154 let layout = VamanaStorageLayout::new(128, 64, 1.2, 1000, 0);
155 // 128 dims * 4 bytes * 1000 nodes = 512_000
156 assert_eq!(layout.vectors_bytes(), 512_000);
157 }
158
159 #[test]
160 fn magic_bytes_length() {
161 assert_eq!(VAMANA_MAGIC.len(), 8);
162 }
163}