omendb_core/omen/
vectors.rs1#![allow(clippy::cast_ptr_alignment)] use memmap2::MmapMut;
5use std::io;
6
7pub struct VectorSection {
9 dimensions: u32,
10 count: u64,
11 data: *const f32,
13 data_len: usize,
14}
15
16unsafe impl Send for VectorSection {}
18unsafe impl Sync for VectorSection {}
19
20impl VectorSection {
21 pub unsafe fn from_mmap(
27 mmap: &MmapMut,
28 offset: usize,
29 length: usize,
30 dimensions: u32,
31 ) -> io::Result<Self> {
32 if length == 0 {
33 return Ok(Self {
34 dimensions,
35 count: 0,
36 data: std::ptr::null(),
37 data_len: 0,
38 });
39 }
40
41 let bytes_per_vector = dimensions as usize * std::mem::size_of::<f32>();
42 if bytes_per_vector == 0 {
43 return Err(io::Error::new(
44 io::ErrorKind::InvalidInput,
45 "dimensions must be > 0",
46 ));
47 }
48
49 let count = length / bytes_per_vector;
50
51 let ptr = mmap.as_ptr().add(offset);
53 if !(ptr as usize).is_multiple_of(std::mem::align_of::<f32>()) {
54 return Err(io::Error::new(
55 io::ErrorKind::InvalidData,
56 "Vector data not properly aligned",
57 ));
58 }
59
60 Ok(Self {
61 dimensions,
62 count: count as u64,
63 data: ptr.cast::<f32>(),
64 data_len: length / std::mem::size_of::<f32>(),
65 })
66 }
67
68 #[must_use]
70 pub fn new(dimensions: u32) -> Self {
71 Self {
72 dimensions,
73 count: 0,
74 data: std::ptr::null(),
75 data_len: 0,
76 }
77 }
78
79 #[inline]
81 #[must_use]
82 pub fn get(&self, index: u32) -> Option<&[f32]> {
83 if index as u64 >= self.count || self.data.is_null() {
84 return None;
85 }
86
87 let offset = index as usize * self.dimensions as usize;
88 if offset + self.dimensions as usize > self.data_len {
89 return None;
90 }
91
92 unsafe {
94 let ptr = self.data.add(offset);
95 Some(std::slice::from_raw_parts(ptr, self.dimensions as usize))
96 }
97 }
98
99 #[inline]
101 pub fn prefetch(&self, index: u32) {
102 if index as u64 >= self.count || self.data.is_null() {
103 return;
104 }
105
106 let offset = index as usize * self.dimensions as usize;
107
108 #[cfg(target_arch = "x86_64")]
110 unsafe {
111 let ptr = self.data.add(offset).cast::<u8>();
112 let bytes_to_prefetch = self.dimensions as usize * std::mem::size_of::<f32>();
114 let cache_lines = bytes_to_prefetch.div_ceil(64);
115
116 for i in 0..cache_lines.min(8) {
117 use std::arch::x86_64::_mm_prefetch;
118 _mm_prefetch(ptr.add(i * 64).cast::<i8>(), 3); }
120 }
121
122 #[cfg(not(target_arch = "x86_64"))]
124 {
125 let _ = offset; }
127 }
128
129 #[must_use]
131 pub fn dimensions(&self) -> u32 {
132 self.dimensions
133 }
134
135 #[must_use]
137 pub fn count(&self) -> u64 {
138 self.count
139 }
140
141 #[must_use]
143 pub fn size_for_count(dimensions: u32, count: u64) -> u64 {
144 count * dimensions as u64 * std::mem::size_of::<f32>() as u64
145 }
146
147 pub fn write_vectors<W: io::Write>(writer: &mut W, vectors: &[&[f32]]) -> io::Result<()> {
149 for vector in vectors {
150 for &val in *vector {
151 writer.write_all(&val.to_le_bytes())?;
152 }
153 }
154 Ok(())
155 }
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161
162 #[test]
163 fn test_size_calculation() {
164 let size = VectorSection::size_for_count(768, 1000);
166 assert_eq!(size, 768 * 1000 * 4); }
168}