ruvector_core/
cache_optimized.rs1use std::alloc::{alloc, dealloc, Layout};
7use std::ptr;
8
9const CACHE_LINE_SIZE: usize = 64;
11
12#[repr(align(64))] pub struct SoAVectorStorage {
18 count: usize,
20 dimensions: usize,
22 capacity: usize,
24 data: *mut f32,
27}
28
29impl SoAVectorStorage {
30 const MAX_DIMENSIONS: usize = 65536;
32 const MAX_CAPACITY: usize = 1 << 24; pub fn new(dimensions: usize, initial_capacity: usize) -> Self {
40 assert!(
42 dimensions > 0 && dimensions <= Self::MAX_DIMENSIONS,
43 "dimensions must be between 1 and {}",
44 Self::MAX_DIMENSIONS
45 );
46 assert!(
47 initial_capacity <= Self::MAX_CAPACITY,
48 "initial_capacity exceeds maximum of {}",
49 Self::MAX_CAPACITY
50 );
51
52 let capacity = initial_capacity.next_power_of_two();
53
54 let total_elements = dimensions
56 .checked_mul(capacity)
57 .expect("dimensions * capacity overflow");
58 let total_bytes = total_elements
59 .checked_mul(std::mem::size_of::<f32>())
60 .expect("total size overflow");
61
62 let layout =
63 Layout::from_size_align(total_bytes, CACHE_LINE_SIZE).expect("invalid memory layout");
64
65 let data = unsafe { alloc(layout) as *mut f32 };
66
67 unsafe {
69 ptr::write_bytes(data, 0, total_elements);
70 }
71
72 Self {
73 count: 0,
74 dimensions,
75 capacity,
76 data,
77 }
78 }
79
80 pub fn push(&mut self, vector: &[f32]) {
82 assert_eq!(vector.len(), self.dimensions);
83
84 if self.count >= self.capacity {
85 self.grow();
86 }
87
88 for (dim_idx, &value) in vector.iter().enumerate() {
90 let offset = dim_idx * self.capacity + self.count;
91 unsafe {
92 *self.data.add(offset) = value;
93 }
94 }
95
96 self.count += 1;
97 }
98
99 pub fn get(&self, index: usize, output: &mut [f32]) {
101 assert!(index < self.count);
102 assert_eq!(output.len(), self.dimensions);
103
104 for dim_idx in 0..self.dimensions {
105 let offset = dim_idx * self.capacity + index;
106 output[dim_idx] = unsafe { *self.data.add(offset) };
107 }
108 }
109
110 pub fn dimension_slice(&self, dim_idx: usize) -> &[f32] {
113 assert!(dim_idx < self.dimensions);
114 let offset = dim_idx * self.capacity;
115 unsafe { std::slice::from_raw_parts(self.data.add(offset), self.count) }
116 }
117
118 pub fn dimension_slice_mut(&mut self, dim_idx: usize) -> &mut [f32] {
120 assert!(dim_idx < self.dimensions);
121 let offset = dim_idx * self.capacity;
122 unsafe { std::slice::from_raw_parts_mut(self.data.add(offset), self.count) }
123 }
124
125 pub fn len(&self) -> usize {
127 self.count
128 }
129
130 pub fn is_empty(&self) -> bool {
132 self.count == 0
133 }
134
135 pub fn dimensions(&self) -> usize {
137 self.dimensions
138 }
139
140 fn grow(&mut self) {
142 let new_capacity = self.capacity * 2;
143 let new_total_elements = self.dimensions * new_capacity;
144
145 let new_layout = Layout::from_size_align(
146 new_total_elements * std::mem::size_of::<f32>(),
147 CACHE_LINE_SIZE,
148 )
149 .unwrap();
150
151 let new_data = unsafe { alloc(new_layout) as *mut f32 };
152
153 for dim_idx in 0..self.dimensions {
155 let old_offset = dim_idx * self.capacity;
156 let new_offset = dim_idx * new_capacity;
157
158 unsafe {
159 ptr::copy_nonoverlapping(
160 self.data.add(old_offset),
161 new_data.add(new_offset),
162 self.count,
163 );
164 }
165 }
166
167 let old_layout = Layout::from_size_align(
169 self.dimensions * self.capacity * std::mem::size_of::<f32>(),
170 CACHE_LINE_SIZE,
171 )
172 .unwrap();
173
174 unsafe {
175 dealloc(self.data as *mut u8, old_layout);
176 }
177
178 self.data = new_data;
179 self.capacity = new_capacity;
180 }
181
182 pub fn batch_euclidean_distances(&self, query: &[f32], output: &mut [f32]) {
185 assert_eq!(query.len(), self.dimensions);
186 assert_eq!(output.len(), self.count);
187
188 output.fill(0.0);
190
191 for dim_idx in 0..self.dimensions {
193 let dim_slice = self.dimension_slice(dim_idx);
194 let query_val = query[dim_idx];
195
196 for vec_idx in 0..self.count {
198 let diff = dim_slice[vec_idx] - query_val;
199 output[vec_idx] += diff * diff;
200 }
201 }
202
203 for distance in output.iter_mut() {
205 *distance = distance.sqrt();
206 }
207 }
208}
209
210impl Drop for SoAVectorStorage {
211 fn drop(&mut self) {
212 let layout = Layout::from_size_align(
213 self.dimensions * self.capacity * std::mem::size_of::<f32>(),
214 CACHE_LINE_SIZE,
215 )
216 .unwrap();
217
218 unsafe {
219 dealloc(self.data as *mut u8, layout);
220 }
221 }
222}
223
224unsafe impl Send for SoAVectorStorage {}
225unsafe impl Sync for SoAVectorStorage {}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 #[test]
232 fn test_soa_storage() {
233 let mut storage = SoAVectorStorage::new(3, 4);
234
235 storage.push(&[1.0, 2.0, 3.0]);
236 storage.push(&[4.0, 5.0, 6.0]);
237
238 assert_eq!(storage.len(), 2);
239
240 let mut output = vec![0.0; 3];
241 storage.get(0, &mut output);
242 assert_eq!(output, vec![1.0, 2.0, 3.0]);
243
244 storage.get(1, &mut output);
245 assert_eq!(output, vec![4.0, 5.0, 6.0]);
246 }
247
248 #[test]
249 fn test_dimension_slice() {
250 let mut storage = SoAVectorStorage::new(3, 4);
251
252 storage.push(&[1.0, 2.0, 3.0]);
253 storage.push(&[4.0, 5.0, 6.0]);
254 storage.push(&[7.0, 8.0, 9.0]);
255
256 let dim0 = storage.dimension_slice(0);
258 assert_eq!(dim0, &[1.0, 4.0, 7.0]);
259
260 let dim1 = storage.dimension_slice(1);
262 assert_eq!(dim1, &[2.0, 5.0, 8.0]);
263 }
264
265 #[test]
266 fn test_batch_distances() {
267 let mut storage = SoAVectorStorage::new(3, 4);
268
269 storage.push(&[1.0, 0.0, 0.0]);
270 storage.push(&[0.0, 1.0, 0.0]);
271 storage.push(&[0.0, 0.0, 1.0]);
272
273 let query = vec![1.0, 0.0, 0.0];
274 let mut distances = vec![0.0; 3];
275
276 storage.batch_euclidean_distances(&query, &mut distances);
277
278 assert!((distances[0] - 0.0).abs() < 0.001);
279 assert!((distances[1] - 1.414).abs() < 0.01);
280 assert!((distances[2] - 1.414).abs() < 0.01);
281 }
282}