1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
//! Reorder, batch distance, and lifecycle operations for `ContiguousVectors`.
//!
//! Extracted from `perf_optimizations.rs` to reduce NLOC.
//! Contains reorder permutation, dot-product batching, Drop, and free SIMD helpers.
use super::perf_optimizations::ContiguousVectors;
use std::alloc::dealloc;
use std::ptr::{self, NonNull};
// =============================================================================
// ContiguousVectors: Reorder + Dot-Product + Drop
// =============================================================================
impl ContiguousVectors {
/// Reorders vectors according to the given permutation.
///
/// `new_order[i]` contains the old index of the vector that should occupy
/// position `i` after reordering. The permutation must have exactly
/// `self.len()` elements and every index must be `< self.len()`.
///
/// # Errors
///
/// Returns an error if:
/// - `new_order.len() != self.len()`
/// - Any index in `new_order` is out of bounds
/// - The new buffer allocation fails
pub fn reorder(&mut self, new_order: &[usize]) -> crate::error::Result<()> {
if new_order.len() != self.count {
return Err(crate::error::Error::Internal(format!(
"Reorder permutation length {} != vector count {}",
new_order.len(),
self.count
)));
}
if self.count == 0 {
return Ok(());
}
self.reorder_copy(new_order)
}
/// Performs the out-of-place vector copy for reordering.
///
/// Allocates a temporary buffer, copies vectors in permuted order, then
/// swaps the buffer into place. Uses `AllocGuard` for panic-safety.
fn reorder_copy(&mut self, new_order: &[usize]) -> crate::error::Result<()> {
use crate::alloc_guard::AllocGuard;
let new_layout = Self::layout(self.dimension, self.count)?;
let guard = AllocGuard::new_zeroed(new_layout).ok_or_else(|| {
crate::error::Error::AllocationFailed(format!(
"Reorder: failed to allocate {} bytes",
new_layout.size()
))
})?;
let new_ptr = NonNull::new(guard.cast::<f32>()).ok_or_else(|| {
crate::error::Error::AllocationFailed(
"Reorder: AllocGuard returned null pointer".to_string(),
)
})?;
self.copy_permuted_vectors(new_ptr.as_ptr(), new_order)?;
// Transfer ownership — guard will not free on drop
let _ = guard.into_raw();
// Deallocate old buffer
let old_layout = Self::layout(self.dimension, self.capacity)?;
// SAFETY: self.data was allocated with old_layout, is non-null (NonNull invariant).
// - Condition 1: old_layout matches the allocation parameters.
// - Condition 2: Pointer is non-null per NonNull invariant.
// SAFETY: Free old buffer after data migration to reordered buffer.
unsafe { dealloc(self.data.as_ptr().cast::<u8>(), old_layout) };
self.data = new_ptr;
self.capacity = self.count;
Ok(())
}
/// Copies vectors from the current buffer to `dst` in permuted order.
fn copy_permuted_vectors(
&self,
dst: *mut f32,
new_order: &[usize],
) -> crate::error::Result<()> {
let dim = self.dimension;
for (new_idx, &old_idx) in new_order.iter().enumerate() {
if old_idx >= self.count {
return Err(crate::error::Error::Internal(format!(
"Reorder index {old_idx} out of bounds (count={})",
self.count
)));
}
// SAFETY: src is within the current allocation (old_idx < count, count <= capacity).
// dst is within the new allocation (new_idx < new_order.len() == count).
// Both buffers are distinct (non-overlapping) allocations with room for `dim` f32s.
// - Condition 1: old_idx < count ensures src offset is in bounds.
// - Condition 2: new_idx < count ensures dst offset is in bounds.
// SAFETY: Out-of-place copy for cache-locality reordering.
unsafe {
ptr::copy_nonoverlapping(
self.data.as_ptr().add(old_idx * dim),
dst.add(new_idx * dim),
dim,
);
}
}
Ok(())
}
/// Computes dot product with another vector using SIMD.
#[inline]
#[must_use]
pub fn dot_product(&self, index: usize, query: &[f32]) -> Option<f32> {
let vector = self.get(index)?;
Some(crate::simd_native::dot_product_native(vector, query))
}
/// Prefetch distance for cache warming.
const PREFETCH_DISTANCE: usize = 4;
/// Computes batch dot products with a query vector.
///
/// This is optimized for HNSW search with prefetching.
#[must_use]
pub fn batch_dot_products(&self, indices: &[usize], query: &[f32]) -> Vec<f32> {
let mut results = Vec::with_capacity(indices.len());
for (i, &idx) in indices.iter().enumerate() {
// Prefetch upcoming vectors
if i + Self::PREFETCH_DISTANCE < indices.len() {
self.prefetch(indices[i + Self::PREFETCH_DISTANCE]);
}
if let Some(score) = self.dot_product(idx, query) {
results.push(score);
}
}
results
}
}
impl Drop for ContiguousVectors {
fn drop(&mut self) {
// EPIC-032/US-002: No null check needed - NonNull guarantees non-null
// Layout was valid at construction; it must still be valid at drop.
let Ok(layout) = Self::layout(self.dimension, self.capacity) else {
// Layout was valid at construction; this branch is unreachable
// unless memory corruption occurred. Leak memory rather than abort.
tracing::error!(
"ContiguousVectors::drop: layout computation failed \
(dim={}, cap={}), leaking memory",
self.dimension,
self.capacity,
);
return;
};
// SAFETY: data was allocated with this layout, is non-null (NonNull invariant)
// - Condition 1: Layout matches original allocation parameters.
// - Condition 2: Pointer is non-null per NonNull invariant.
// SAFETY: Release allocated memory when ContiguousVectors is dropped.
unsafe {
dealloc(self.data.as_ptr().cast::<u8>(), layout);
}
}
}
// =============================================================================
// Batch Distance Computation (free functions)
// =============================================================================
/// Computes multiple dot products in a single pass (cache-optimized).
///
/// F-17: Delegates to `batch_dot_product_native` which includes `x86_64`
/// prefetch hints for upcoming candidate vectors.
#[must_use]
pub fn batch_dot_products_simd(vectors: &[&[f32]], query: &[f32]) -> Vec<f32> {
crate::simd_native::batch_dot_product_native(vectors, query)
}
// =============================================================================
// SIMD Padding Utility
// =============================================================================
/// AVX2 register width for `f32` lanes: 256 bits / 32 bits = 8 lanes.
const SIMD_WIDTH: usize = 8;
/// Pads a vector to the next multiple of 8 (AVX2 register width for `f32`).
///
/// Appending zeros does not affect distance computations (cosine, euclidean, dot)
/// when the query and stored vectors share the same padded length.
///
/// Returns an empty `Vec` when the input is empty (0 is already a multiple of 8).
///
/// # Examples
///
/// ```
/// use velesdb_core::contiguous_ops::pad_to_simd_width;
///
/// let v = vec![1.0_f32, 2.0, 3.0];
/// let padded = pad_to_simd_width(&v);
/// assert_eq!(padded.len(), 8);
/// assert_eq!(&padded[..3], &[1.0, 2.0, 3.0]);
/// ```
#[must_use]
pub fn pad_to_simd_width(vector: &[f32]) -> Vec<f32> {
let len = vector.len();
if len == 0 {
return Vec::new();
}
let padded_len = len.div_ceil(SIMD_WIDTH) * SIMD_WIDTH;
let mut padded = vec![0.0_f32; padded_len];
padded[..len].copy_from_slice(vector);
padded
}
/// Computes multiple cosine similarities in a single pass with prefetch.
#[must_use]
pub fn batch_cosine_similarities(vectors: &[&[f32]], query: &[f32]) -> Vec<f32> {
let prefetch_distance = crate::simd_native::calculate_prefetch_distance(query.len());
let mut results = Vec::with_capacity(vectors.len());
for (i, v) in vectors.iter().enumerate() {
if i + prefetch_distance < vectors.len() {
crate::simd_native::prefetch_vector(vectors[i + prefetch_distance]);
}
results.push(crate::simd_native::cosine_similarity_native(v, query));
}
results
}