oxiblas_core/memory/
aligned_vec.rs

1//! Memory management utilities for OxiBLAS.
2//!
3//! This module provides:
4//! - Aligned memory allocation
5//! - Stack-based temporary allocation (StackReq pattern)
6//! - Cache-aware data layout utilities
7//! - Prefetch hints for cache optimization
8//! - Memory pool for temporary allocations
9//! - Custom allocator support via the `Alloc` trait
10
11use core::alloc::Layout;
12use core::mem::{align_of, size_of};
13use core::ptr::NonNull;
14use std::alloc::handle_alloc_error;
15
16use super::alloc::*;
17
18// =============================================================================
19// AlignedVec - Aligned heap allocation
20// =============================================================================
21
22/// A vector with guaranteed alignment and custom allocator support.
23///
24/// Unlike `Vec<T>`, this type ensures the underlying buffer is aligned
25/// to at least `ALIGN` bytes, which is required for efficient SIMD operations.
26///
27/// # Type Parameters
28///
29/// - `T`: The element type
30/// - `ALIGN`: The minimum alignment in bytes (default: 64 for cache line alignment)
31/// - `A`: The allocator type (default: `Global`)
32///
33/// # Custom Allocators
34///
35/// You can use a custom allocator by specifying the third type parameter:
36///
37/// ```ignore
38/// use oxiblas_core::memory::{AlignedVec, Alloc, Global};
39///
40/// // Use global allocator (default)
41/// let vec: AlignedVec<f64> = AlignedVec::zeros(100);
42///
43/// // Use custom allocator
44/// let custom_vec: AlignedVec<f64, 64, MyAlloc> = AlignedVec::zeros_in(100, MyAlloc::new());
45/// ```
46pub struct AlignedVec<T, const ALIGN: usize = DEFAULT_ALIGN, A: Alloc = Global> {
47    ptr: NonNull<T>,
48    len: usize,
49    cap: usize,
50    alloc: A,
51}
52
53// Convenience methods using Global allocator
54impl<T, const ALIGN: usize> AlignedVec<T, ALIGN, Global> {
55    /// Creates a new empty aligned vector.
56    #[inline]
57    pub const fn new() -> Self {
58        AlignedVec {
59            ptr: NonNull::dangling(),
60            len: 0,
61            cap: 0,
62            alloc: Global,
63        }
64    }
65
66    /// Creates a new aligned vector with the given capacity.
67    pub fn with_capacity(capacity: usize) -> Self {
68        Self::with_capacity_in(capacity, Global)
69    }
70
71    /// Creates a new aligned vector filled with zeros.
72    ///
73    /// This is more efficient than creating and then filling, as it uses
74    /// zeroed allocation.
75    pub fn zeros(len: usize) -> Self
76    where
77        T: bytemuck::Zeroable,
78    {
79        Self::zeros_in(len, Global)
80    }
81
82    /// Creates a new aligned vector filled with a value.
83    pub fn filled(len: usize, value: T) -> Self
84    where
85        T: Clone,
86    {
87        Self::filled_in(len, value, Global)
88    }
89
90    /// Creates a new aligned vector from a slice.
91    pub fn from_slice(slice: &[T]) -> Self
92    where
93        T: Clone,
94    {
95        Self::from_slice_in(slice, Global)
96    }
97}
98
99// Methods that work with any allocator
100impl<T, const ALIGN: usize, A: Alloc> AlignedVec<T, ALIGN, A> {
101    /// Creates a new empty aligned vector with the specified allocator.
102    #[inline]
103    pub fn new_in(alloc: A) -> Self {
104        AlignedVec {
105            ptr: NonNull::dangling(),
106            len: 0,
107            cap: 0,
108            alloc,
109        }
110    }
111
112    /// Creates a new aligned vector with the given capacity and allocator.
113    pub fn with_capacity_in(capacity: usize, alloc: A) -> Self {
114        if capacity == 0 {
115            return Self::new_in(alloc);
116        }
117
118        let layout = Self::layout_for(capacity);
119        let ptr = alloc.allocate(layout) as *mut T;
120
121        if ptr.is_null() {
122            handle_alloc_error(layout);
123        }
124
125        AlignedVec {
126            ptr: unsafe { NonNull::new_unchecked(ptr) },
127            len: 0,
128            cap: capacity,
129            alloc,
130        }
131    }
132
133    /// Creates a new aligned vector filled with zeros using the specified allocator.
134    pub fn zeros_in(len: usize, alloc: A) -> Self
135    where
136        T: bytemuck::Zeroable,
137    {
138        if len == 0 {
139            return Self::new_in(alloc);
140        }
141
142        let layout = Self::layout_for(len);
143        let ptr = alloc.allocate_zeroed(layout) as *mut T;
144
145        if ptr.is_null() {
146            handle_alloc_error(layout);
147        }
148
149        AlignedVec {
150            ptr: unsafe { NonNull::new_unchecked(ptr) },
151            len,
152            cap: len,
153            alloc,
154        }
155    }
156
157    /// Creates a new aligned vector filled with a value using the specified allocator.
158    pub fn filled_in(len: usize, value: T, alloc: A) -> Self
159    where
160        T: Clone,
161    {
162        let mut vec = Self::with_capacity_in(len, alloc);
163        for _ in 0..len {
164            vec.push(value.clone());
165        }
166        vec
167    }
168
169    /// Creates a new aligned vector from a slice using the specified allocator.
170    pub fn from_slice_in(slice: &[T], alloc: A) -> Self
171    where
172        T: Clone,
173    {
174        let mut vec = Self::with_capacity_in(slice.len(), alloc);
175        for item in slice {
176            vec.push(item.clone());
177        }
178        vec
179    }
180
181    /// Returns a reference to the allocator.
182    #[inline]
183    pub fn allocator(&self) -> &A {
184        &self.alloc
185    }
186
187    /// Returns the layout for a given capacity.
188    fn layout_for(capacity: usize) -> Layout {
189        let size = capacity * size_of::<T>();
190        let align = ALIGN.max(align_of::<T>());
191        Layout::from_size_align(size, align).expect("Invalid layout")
192    }
193
194    /// Returns the length of the vector.
195    #[inline]
196    pub fn len(&self) -> usize {
197        self.len
198    }
199
200    /// Returns true if the vector is empty.
201    #[inline]
202    pub fn is_empty(&self) -> bool {
203        self.len == 0
204    }
205
206    /// Returns the capacity of the vector.
207    #[inline]
208    pub fn capacity(&self) -> usize {
209        self.cap
210    }
211
212    /// Returns a pointer to the first element.
213    #[inline]
214    pub fn as_ptr(&self) -> *const T {
215        self.ptr.as_ptr()
216    }
217
218    /// Returns a mutable pointer to the first element.
219    #[inline]
220    pub fn as_mut_ptr(&mut self) -> *mut T {
221        self.ptr.as_ptr()
222    }
223
224    /// Returns a slice of the vector.
225    #[inline]
226    pub fn as_slice(&self) -> &[T] {
227        unsafe { core::slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
228    }
229
230    /// Returns a mutable slice of the vector.
231    #[inline]
232    pub fn as_mut_slice(&mut self) -> &mut [T] {
233        unsafe { core::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) }
234    }
235
236    /// Pushes a value onto the vector.
237    ///
238    /// # Panics
239    /// Panics if the vector is at capacity.
240    pub fn push(&mut self, value: T) {
241        if self.len >= self.cap {
242            self.grow();
243        }
244
245        unsafe {
246            self.ptr.as_ptr().add(self.len).write(value);
247        }
248        self.len += 1;
249    }
250
251    /// Pops a value from the vector.
252    pub fn pop(&mut self) -> Option<T> {
253        if self.len == 0 {
254            return None;
255        }
256
257        self.len -= 1;
258        unsafe { Some(self.ptr.as_ptr().add(self.len).read()) }
259    }
260
261    /// Clears the vector.
262    pub fn clear(&mut self) {
263        while self.pop().is_some() {}
264    }
265
266    /// Resizes the vector to the given length.
267    pub fn resize(&mut self, new_len: usize, value: T)
268    where
269        T: Clone,
270    {
271        if new_len > self.len {
272            self.reserve(new_len - self.len);
273            for _ in self.len..new_len {
274                self.push(value.clone());
275            }
276        } else {
277            while self.len > new_len {
278                self.pop();
279            }
280        }
281    }
282
283    /// Reserves capacity for at least `additional` more elements.
284    pub fn reserve(&mut self, additional: usize) {
285        let required = self.len + additional;
286        if required > self.cap {
287            let new_cap = required.max(self.cap * 2).max(8);
288            self.realloc(new_cap);
289        }
290    }
291
292    fn grow(&mut self) {
293        let new_cap = if self.cap == 0 { 8 } else { self.cap * 2 };
294        self.realloc(new_cap);
295    }
296
297    fn realloc(&mut self, new_cap: usize) {
298        let new_layout = Self::layout_for(new_cap);
299        let new_ptr = self.alloc.allocate(new_layout) as *mut T;
300
301        if new_ptr.is_null() {
302            handle_alloc_error(new_layout);
303        }
304
305        // Copy existing data
306        if self.cap > 0 {
307            unsafe {
308                core::ptr::copy_nonoverlapping(self.ptr.as_ptr(), new_ptr, self.len);
309                let old_layout = Self::layout_for(self.cap);
310                self.alloc
311                    .deallocate(self.ptr.as_ptr() as *mut u8, old_layout);
312            }
313        }
314
315        self.ptr = unsafe { NonNull::new_unchecked(new_ptr) };
316        self.cap = new_cap;
317    }
318}
319
320impl<T, const ALIGN: usize, A: Alloc> Drop for AlignedVec<T, ALIGN, A> {
321    fn drop(&mut self) {
322        // Drop all elements
323        for i in 0..self.len {
324            unsafe {
325                core::ptr::drop_in_place(self.ptr.as_ptr().add(i));
326            }
327        }
328
329        // Deallocate
330        if self.cap > 0 {
331            let layout = Self::layout_for(self.cap);
332            unsafe {
333                self.alloc.deallocate(self.ptr.as_ptr() as *mut u8, layout);
334            }
335        }
336    }
337}
338
339impl<T, const ALIGN: usize> Default for AlignedVec<T, ALIGN, Global> {
340    fn default() -> Self {
341        Self::new()
342    }
343}
344
345impl<T: Clone, const ALIGN: usize, A: Alloc> Clone for AlignedVec<T, ALIGN, A> {
346    fn clone(&self) -> Self {
347        Self::from_slice_in(self.as_slice(), self.alloc.clone())
348    }
349}
350
351impl<T, const ALIGN: usize, A: Alloc> core::ops::Deref for AlignedVec<T, ALIGN, A> {
352    type Target = [T];
353
354    fn deref(&self) -> &Self::Target {
355        self.as_slice()
356    }
357}
358
359impl<T, const ALIGN: usize, A: Alloc> core::ops::DerefMut for AlignedVec<T, ALIGN, A> {
360    fn deref_mut(&mut self) -> &mut Self::Target {
361        self.as_mut_slice()
362    }
363}
364
365impl<T, const ALIGN: usize, A: Alloc> core::ops::Index<usize> for AlignedVec<T, ALIGN, A> {
366    type Output = T;
367
368    fn index(&self, index: usize) -> &Self::Output {
369        &self.as_slice()[index]
370    }
371}
372
373impl<T, const ALIGN: usize, A: Alloc> core::ops::IndexMut<usize> for AlignedVec<T, ALIGN, A> {
374    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
375        &mut self.as_mut_slice()[index]
376    }
377}
378
379// Safety: AlignedVec is Send/Sync if T and A are
380unsafe impl<T: Send, const ALIGN: usize, A: Alloc + Send> Send for AlignedVec<T, ALIGN, A> {}
381unsafe impl<T: Sync, const ALIGN: usize, A: Alloc + Sync> Sync for AlignedVec<T, ALIGN, A> {}