omendb_core/omen/
vectors.rs

1//! Vector section - contiguous f32 array with O(1) access
2#![allow(clippy::cast_ptr_alignment)] // alignment verified before cast
3
4use memmap2::MmapMut;
5use std::io;
6
7/// Vector section - contiguous array of vectors
8pub struct VectorSection {
9    dimensions: u32,
10    count: u64,
11    /// Memory-mapped vector data (read-only view)
12    data: *const f32,
13    data_len: usize,
14}
15
16// Safety: VectorSection is read-only after creation
17unsafe impl Send for VectorSection {}
18unsafe impl Sync for VectorSection {}
19
20impl VectorSection {
21    /// Create from mmap region
22    ///
23    /// # Safety
24    /// The mmap must remain valid for the lifetime of this section.
25    /// The data at offset must be properly aligned f32 values.
26    pub unsafe fn from_mmap(
27        mmap: &MmapMut,
28        offset: usize,
29        length: usize,
30        dimensions: u32,
31    ) -> io::Result<Self> {
32        if length == 0 {
33            return Ok(Self {
34                dimensions,
35                count: 0,
36                data: std::ptr::null(),
37                data_len: 0,
38            });
39        }
40
41        let bytes_per_vector = dimensions as usize * std::mem::size_of::<f32>();
42        if bytes_per_vector == 0 {
43            return Err(io::Error::new(
44                io::ErrorKind::InvalidInput,
45                "dimensions must be > 0",
46            ));
47        }
48
49        let count = length / bytes_per_vector;
50
51        // Check alignment
52        let ptr = mmap.as_ptr().add(offset);
53        if !(ptr as usize).is_multiple_of(std::mem::align_of::<f32>()) {
54            return Err(io::Error::new(
55                io::ErrorKind::InvalidData,
56                "Vector data not properly aligned",
57            ));
58        }
59
60        Ok(Self {
61            dimensions,
62            count: count as u64,
63            data: ptr.cast::<f32>(),
64            data_len: length / std::mem::size_of::<f32>(),
65        })
66    }
67
68    /// Create empty section for building
69    #[must_use]
70    pub fn new(dimensions: u32) -> Self {
71        Self {
72            dimensions,
73            count: 0,
74            data: std::ptr::null(),
75            data_len: 0,
76        }
77    }
78
79    /// Get vector by index - O(1)
80    #[inline]
81    #[must_use]
82    pub fn get(&self, index: u32) -> Option<&[f32]> {
83        if index as u64 >= self.count || self.data.is_null() {
84            return None;
85        }
86
87        let offset = index as usize * self.dimensions as usize;
88        if offset + self.dimensions as usize > self.data_len {
89            return None;
90        }
91
92        // Safety: We checked bounds above
93        unsafe {
94            let ptr = self.data.add(offset);
95            Some(std::slice::from_raw_parts(ptr, self.dimensions as usize))
96        }
97    }
98
99    /// Prefetch vector for cache warming (VSAG-style)
100    #[inline]
101    pub fn prefetch(&self, index: u32) {
102        if index as u64 >= self.count || self.data.is_null() {
103            return;
104        }
105
106        let offset = index as usize * self.dimensions as usize;
107
108        // Safety: We checked bounds
109        #[cfg(target_arch = "x86_64")]
110        unsafe {
111            let ptr = self.data.add(offset).cast::<u8>();
112            // Prefetch multiple cache lines (64 bytes each)
113            let bytes_to_prefetch = self.dimensions as usize * std::mem::size_of::<f32>();
114            let cache_lines = bytes_to_prefetch.div_ceil(64);
115
116            for i in 0..cache_lines.min(8) {
117                use std::arch::x86_64::_mm_prefetch;
118                _mm_prefetch(ptr.add(i * 64).cast::<i8>(), 3); // _MM_HINT_T0
119            }
120        }
121
122        // aarch64 prefetch requires nightly feature, skip for now
123        #[cfg(not(target_arch = "x86_64"))]
124        {
125            let _ = offset; // Suppress unused warning
126        }
127    }
128
129    /// Get dimensions
130    #[must_use]
131    pub fn dimensions(&self) -> u32 {
132        self.dimensions
133    }
134
135    /// Get vector count
136    #[must_use]
137    pub fn count(&self) -> u64 {
138        self.count
139    }
140
141    /// Calculate size in bytes for given count
142    #[must_use]
143    pub fn size_for_count(dimensions: u32, count: u64) -> u64 {
144        count * dimensions as u64 * std::mem::size_of::<f32>() as u64
145    }
146
147    /// Serialize vectors to bytes (for writing)
148    pub fn write_vectors<W: io::Write>(writer: &mut W, vectors: &[&[f32]]) -> io::Result<()> {
149        for vector in vectors {
150            for &val in *vector {
151                writer.write_all(&val.to_le_bytes())?;
152            }
153        }
154        Ok(())
155    }
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161
162    #[test]
163    fn test_size_calculation() {
164        // 768 dimensions, 1000 vectors, f32
165        let size = VectorSection::size_for_count(768, 1000);
166        assert_eq!(size, 768 * 1000 * 4); // 3,072,000 bytes
167    }
168}