ruvector_core/
cache_optimized.rs1use std::alloc::{alloc, dealloc, Layout};
7use std::ptr;
8
9const CACHE_LINE_SIZE: usize = 64;
11
12#[repr(align(64))] pub struct SoAVectorStorage {
18 count: usize,
20 dimensions: usize,
22 capacity: usize,
24 data: *mut f32,
27}
28
29impl SoAVectorStorage {
30 const MAX_DIMENSIONS: usize = 65536;
32 const MAX_CAPACITY: usize = 1 << 24; pub fn new(dimensions: usize, initial_capacity: usize) -> Self {
40 assert!(
42 dimensions > 0 && dimensions <= Self::MAX_DIMENSIONS,
43 "dimensions must be between 1 and {}",
44 Self::MAX_DIMENSIONS
45 );
46 assert!(
47 initial_capacity <= Self::MAX_CAPACITY,
48 "initial_capacity exceeds maximum of {}",
49 Self::MAX_CAPACITY
50 );
51
52 let capacity = initial_capacity.next_power_of_two();
53
54 let total_elements = dimensions
56 .checked_mul(capacity)
57 .expect("dimensions * capacity overflow");
58 let total_bytes = total_elements
59 .checked_mul(std::mem::size_of::<f32>())
60 .expect("total size overflow");
61
62 let layout =
63 Layout::from_size_align(total_bytes, CACHE_LINE_SIZE).expect("invalid memory layout");
64
65 let data = unsafe { alloc(layout) as *mut f32 };
66
67 unsafe {
69 ptr::write_bytes(data, 0, total_elements);
70 }
71
72 Self {
73 count: 0,
74 dimensions,
75 capacity,
76 data,
77 }
78 }
79
80 pub fn push(&mut self, vector: &[f32]) {
82 assert_eq!(vector.len(), self.dimensions);
83
84 if self.count >= self.capacity {
85 self.grow();
86 }
87
88 for (dim_idx, &value) in vector.iter().enumerate() {
90 let offset = dim_idx * self.capacity + self.count;
91 unsafe {
92 *self.data.add(offset) = value;
93 }
94 }
95
96 self.count += 1;
97 }
98
99 pub fn get(&self, index: usize, output: &mut [f32]) {
101 assert!(index < self.count);
102 assert_eq!(output.len(), self.dimensions);
103
104 for dim_idx in 0..self.dimensions {
105 let offset = dim_idx * self.capacity + index;
106 output[dim_idx] = unsafe { *self.data.add(offset) };
107 }
108 }
109
110 pub fn dimension_slice(&self, dim_idx: usize) -> &[f32] {
113 assert!(dim_idx < self.dimensions);
114 let offset = dim_idx * self.capacity;
115 unsafe { std::slice::from_raw_parts(self.data.add(offset), self.count) }
116 }
117
118 pub fn dimension_slice_mut(&mut self, dim_idx: usize) -> &mut [f32] {
120 assert!(dim_idx < self.dimensions);
121 let offset = dim_idx * self.capacity;
122 unsafe { std::slice::from_raw_parts_mut(self.data.add(offset), self.count) }
123 }
124
125 pub fn len(&self) -> usize {
127 self.count
128 }
129
130 pub fn is_empty(&self) -> bool {
132 self.count == 0
133 }
134
135 pub fn dimensions(&self) -> usize {
137 self.dimensions
138 }
139
140 fn grow(&mut self) {
142 let new_capacity = self.capacity * 2;
143
144 let new_total_elements = self.dimensions
146 .checked_mul(new_capacity)
147 .expect("dimensions * new_capacity overflow");
148 let new_total_bytes = new_total_elements
149 .checked_mul(std::mem::size_of::<f32>())
150 .expect("total size overflow in grow");
151
152 let new_layout = Layout::from_size_align(new_total_bytes, CACHE_LINE_SIZE)
153 .expect("invalid memory layout in grow");
154
155 let new_data = unsafe { alloc(new_layout) as *mut f32 };
156
157 for dim_idx in 0..self.dimensions {
159 let old_offset = dim_idx * self.capacity;
160 let new_offset = dim_idx * new_capacity;
161
162 unsafe {
163 ptr::copy_nonoverlapping(
164 self.data.add(old_offset),
165 new_data.add(new_offset),
166 self.count,
167 );
168 }
169 }
170
171 let old_layout = Layout::from_size_align(
173 self.dimensions * self.capacity * std::mem::size_of::<f32>(),
174 CACHE_LINE_SIZE,
175 )
176 .unwrap();
177
178 unsafe {
179 dealloc(self.data as *mut u8, old_layout);
180 }
181
182 self.data = new_data;
183 self.capacity = new_capacity;
184 }
185
186 pub fn batch_euclidean_distances(&self, query: &[f32], output: &mut [f32]) {
189 assert_eq!(query.len(), self.dimensions);
190 assert_eq!(output.len(), self.count);
191
192 output.fill(0.0);
194
195 for dim_idx in 0..self.dimensions {
197 let dim_slice = self.dimension_slice(dim_idx);
198 let query_val = query[dim_idx];
199
200 for vec_idx in 0..self.count {
202 let diff = dim_slice[vec_idx] - query_val;
203 output[vec_idx] += diff * diff;
204 }
205 }
206
207 for distance in output.iter_mut() {
209 *distance = distance.sqrt();
210 }
211 }
212}
213
214impl Drop for SoAVectorStorage {
215 fn drop(&mut self) {
216 let layout = Layout::from_size_align(
217 self.dimensions * self.capacity * std::mem::size_of::<f32>(),
218 CACHE_LINE_SIZE,
219 )
220 .unwrap();
221
222 unsafe {
223 dealloc(self.data as *mut u8, layout);
224 }
225 }
226}
227
228unsafe impl Send for SoAVectorStorage {}
229unsafe impl Sync for SoAVectorStorage {}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
236 fn test_soa_storage() {
237 let mut storage = SoAVectorStorage::new(3, 4);
238
239 storage.push(&[1.0, 2.0, 3.0]);
240 storage.push(&[4.0, 5.0, 6.0]);
241
242 assert_eq!(storage.len(), 2);
243
244 let mut output = vec![0.0; 3];
245 storage.get(0, &mut output);
246 assert_eq!(output, vec![1.0, 2.0, 3.0]);
247
248 storage.get(1, &mut output);
249 assert_eq!(output, vec![4.0, 5.0, 6.0]);
250 }
251
252 #[test]
253 fn test_dimension_slice() {
254 let mut storage = SoAVectorStorage::new(3, 4);
255
256 storage.push(&[1.0, 2.0, 3.0]);
257 storage.push(&[4.0, 5.0, 6.0]);
258 storage.push(&[7.0, 8.0, 9.0]);
259
260 let dim0 = storage.dimension_slice(0);
262 assert_eq!(dim0, &[1.0, 4.0, 7.0]);
263
264 let dim1 = storage.dimension_slice(1);
266 assert_eq!(dim1, &[2.0, 5.0, 8.0]);
267 }
268
269 #[test]
270 fn test_batch_distances() {
271 let mut storage = SoAVectorStorage::new(3, 4);
272
273 storage.push(&[1.0, 0.0, 0.0]);
274 storage.push(&[0.0, 1.0, 0.0]);
275 storage.push(&[0.0, 0.0, 1.0]);
276
277 let query = vec![1.0, 0.0, 0.0];
278 let mut distances = vec![0.0; 3];
279
280 storage.batch_euclidean_distances(&query, &mut distances);
281
282 assert!((distances[0] - 0.0).abs() < 0.001);
283 assert!((distances[1] - 1.414).abs() < 0.01);
284 assert!((distances[2] - 1.414).abs() < 0.01);
285 }
286}