Skip to main content

oxiblas_core/memory/
aligned_vec.rs

1//! Memory management utilities for OxiBLAS.
2//!
3//! This module provides:
4//! - Aligned memory allocation
5//! - Stack-based temporary allocation (StackReq pattern)
6//! - Cache-aware data layout utilities
7//! - Prefetch hints for cache optimization
8//! - Memory pool for temporary allocations
9//! - Custom allocator support via the `Alloc` trait
10
11use core::alloc::Layout;
12use core::mem::{align_of, size_of};
13use core::ptr::NonNull;
14
15#[cfg(not(feature = "std"))]
16use alloc::alloc::handle_alloc_error;
17#[cfg(feature = "std")]
18use std::alloc::handle_alloc_error;
19
20use super::alloc::*;
21
22// =============================================================================
23// AlignedVec - Aligned heap allocation
24// =============================================================================
25
26/// A vector with guaranteed alignment and custom allocator support.
27///
28/// Unlike `Vec<T>`, this type ensures the underlying buffer is aligned
29/// to at least `ALIGN` bytes, which is required for efficient SIMD operations.
30///
31/// # Type Parameters
32///
33/// - `T`: The element type
34/// - `ALIGN`: The minimum alignment in bytes (default: 64 for cache line alignment)
35/// - `A`: The allocator type (default: `Global`)
36///
37/// # Custom Allocators
38///
39/// You can use a custom allocator by specifying the third type parameter:
40///
41/// ```ignore
42/// use oxiblas_core::memory::{AlignedVec, Alloc, Global};
43///
44/// // Use global allocator (default)
45/// let vec: AlignedVec<f64> = AlignedVec::zeros(100);
46///
47/// // Use custom allocator
48/// let custom_vec: AlignedVec<f64, 64, MyAlloc> = AlignedVec::zeros_in(100, MyAlloc::new());
49/// ```
50pub struct AlignedVec<T, const ALIGN: usize = DEFAULT_ALIGN, A: Alloc = Global> {
51    ptr: NonNull<T>,
52    len: usize,
53    cap: usize,
54    alloc: A,
55}
56
57// Convenience methods using Global allocator
58impl<T, const ALIGN: usize> AlignedVec<T, ALIGN, Global> {
59    /// Creates a new empty aligned vector.
60    #[inline]
61    pub const fn new() -> Self {
62        AlignedVec {
63            ptr: NonNull::dangling(),
64            len: 0,
65            cap: 0,
66            alloc: Global,
67        }
68    }
69
70    /// Creates a new aligned vector with the given capacity.
71    pub fn with_capacity(capacity: usize) -> Self {
72        Self::with_capacity_in(capacity, Global)
73    }
74
75    /// Creates a new aligned vector filled with zeros.
76    ///
77    /// This is more efficient than creating and then filling, as it uses
78    /// zeroed allocation.
79    pub fn zeros(len: usize) -> Self
80    where
81        T: bytemuck::Zeroable,
82    {
83        Self::zeros_in(len, Global)
84    }
85
86    /// Creates a new aligned vector filled with a value.
87    pub fn filled(len: usize, value: T) -> Self
88    where
89        T: Clone,
90    {
91        Self::filled_in(len, value, Global)
92    }
93
94    /// Creates a new aligned vector from a slice.
95    pub fn from_slice(slice: &[T]) -> Self
96    where
97        T: Clone,
98    {
99        Self::from_slice_in(slice, Global)
100    }
101}
102
103// Methods that work with any allocator
104impl<T, const ALIGN: usize, A: Alloc> AlignedVec<T, ALIGN, A> {
105    /// Creates a new empty aligned vector with the specified allocator.
106    #[inline]
107    pub fn new_in(alloc: A) -> Self {
108        AlignedVec {
109            ptr: NonNull::dangling(),
110            len: 0,
111            cap: 0,
112            alloc,
113        }
114    }
115
116    /// Creates a new aligned vector with the given capacity and allocator.
117    pub fn with_capacity_in(capacity: usize, alloc: A) -> Self {
118        if capacity == 0 {
119            return Self::new_in(alloc);
120        }
121
122        let layout = Self::layout_for(capacity);
123        let ptr = alloc.allocate(layout) as *mut T;
124
125        if ptr.is_null() {
126            handle_alloc_error(layout);
127        }
128
129        AlignedVec {
130            ptr: unsafe { NonNull::new_unchecked(ptr) },
131            len: 0,
132            cap: capacity,
133            alloc,
134        }
135    }
136
137    /// Creates a new aligned vector filled with zeros using the specified allocator.
138    pub fn zeros_in(len: usize, alloc: A) -> Self
139    where
140        T: bytemuck::Zeroable,
141    {
142        if len == 0 {
143            return Self::new_in(alloc);
144        }
145
146        let layout = Self::layout_for(len);
147        let ptr = alloc.allocate_zeroed(layout) as *mut T;
148
149        if ptr.is_null() {
150            handle_alloc_error(layout);
151        }
152
153        AlignedVec {
154            ptr: unsafe { NonNull::new_unchecked(ptr) },
155            len,
156            cap: len,
157            alloc,
158        }
159    }
160
161    /// Creates a new aligned vector filled with a value using the specified allocator.
162    pub fn filled_in(len: usize, value: T, alloc: A) -> Self
163    where
164        T: Clone,
165    {
166        let mut vec = Self::with_capacity_in(len, alloc);
167        for _ in 0..len {
168            vec.push(value.clone());
169        }
170        vec
171    }
172
173    /// Creates a new aligned vector from a slice using the specified allocator.
174    pub fn from_slice_in(slice: &[T], alloc: A) -> Self
175    where
176        T: Clone,
177    {
178        let mut vec = Self::with_capacity_in(slice.len(), alloc);
179        for item in slice {
180            vec.push(item.clone());
181        }
182        vec
183    }
184
185    /// Returns a reference to the allocator.
186    #[inline]
187    pub fn allocator(&self) -> &A {
188        &self.alloc
189    }
190
191    /// Returns the layout for a given capacity.
192    fn layout_for(capacity: usize) -> Layout {
193        let size = capacity * size_of::<T>();
194        let align = ALIGN.max(align_of::<T>());
195        Layout::from_size_align(size, align).expect("Invalid layout")
196    }
197
198    /// Returns the length of the vector.
199    #[inline]
200    pub fn len(&self) -> usize {
201        self.len
202    }
203
204    /// Returns true if the vector is empty.
205    #[inline]
206    pub fn is_empty(&self) -> bool {
207        self.len == 0
208    }
209
210    /// Returns the capacity of the vector.
211    #[inline]
212    pub fn capacity(&self) -> usize {
213        self.cap
214    }
215
216    /// Returns a pointer to the first element.
217    #[inline]
218    pub fn as_ptr(&self) -> *const T {
219        self.ptr.as_ptr()
220    }
221
222    /// Returns a mutable pointer to the first element.
223    #[inline]
224    pub fn as_mut_ptr(&mut self) -> *mut T {
225        self.ptr.as_ptr()
226    }
227
228    /// Returns a slice of the vector.
229    #[inline]
230    pub fn as_slice(&self) -> &[T] {
231        unsafe { core::slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
232    }
233
234    /// Returns a mutable slice of the vector.
235    #[inline]
236    pub fn as_mut_slice(&mut self) -> &mut [T] {
237        unsafe { core::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) }
238    }
239
240    /// Pushes a value onto the vector.
241    ///
242    /// # Panics
243    /// Panics if the vector is at capacity.
244    pub fn push(&mut self, value: T) {
245        if self.len >= self.cap {
246            self.grow();
247        }
248
249        unsafe {
250            self.ptr.as_ptr().add(self.len).write(value);
251        }
252        self.len += 1;
253    }
254
255    /// Pops a value from the vector.
256    pub fn pop(&mut self) -> Option<T> {
257        if self.len == 0 {
258            return None;
259        }
260
261        self.len -= 1;
262        unsafe { Some(self.ptr.as_ptr().add(self.len).read()) }
263    }
264
265    /// Clears the vector.
266    pub fn clear(&mut self) {
267        while self.pop().is_some() {}
268    }
269
270    /// Resizes the vector to the given length.
271    pub fn resize(&mut self, new_len: usize, value: T)
272    where
273        T: Clone,
274    {
275        if new_len > self.len {
276            self.reserve(new_len - self.len);
277            for _ in self.len..new_len {
278                self.push(value.clone());
279            }
280        } else {
281            while self.len > new_len {
282                self.pop();
283            }
284        }
285    }
286
287    /// Reserves capacity for at least `additional` more elements.
288    pub fn reserve(&mut self, additional: usize) {
289        let required = self.len + additional;
290        if required > self.cap {
291            let new_cap = required.max(self.cap * 2).max(8);
292            self.realloc(new_cap);
293        }
294    }
295
296    fn grow(&mut self) {
297        let new_cap = if self.cap == 0 { 8 } else { self.cap * 2 };
298        self.realloc(new_cap);
299    }
300
301    fn realloc(&mut self, new_cap: usize) {
302        let new_layout = Self::layout_for(new_cap);
303        let new_ptr = self.alloc.allocate(new_layout) as *mut T;
304
305        if new_ptr.is_null() {
306            handle_alloc_error(new_layout);
307        }
308
309        // Copy existing data
310        if self.cap > 0 {
311            unsafe {
312                core::ptr::copy_nonoverlapping(self.ptr.as_ptr(), new_ptr, self.len);
313                let old_layout = Self::layout_for(self.cap);
314                self.alloc
315                    .deallocate(self.ptr.as_ptr() as *mut u8, old_layout);
316            }
317        }
318
319        self.ptr = unsafe { NonNull::new_unchecked(new_ptr) };
320        self.cap = new_cap;
321    }
322}
323
324impl<T, const ALIGN: usize, A: Alloc> Drop for AlignedVec<T, ALIGN, A> {
325    fn drop(&mut self) {
326        // Drop all elements
327        for i in 0..self.len {
328            unsafe {
329                core::ptr::drop_in_place(self.ptr.as_ptr().add(i));
330            }
331        }
332
333        // Deallocate
334        if self.cap > 0 {
335            let layout = Self::layout_for(self.cap);
336            unsafe {
337                self.alloc.deallocate(self.ptr.as_ptr() as *mut u8, layout);
338            }
339        }
340    }
341}
342
343impl<T, const ALIGN: usize> Default for AlignedVec<T, ALIGN, Global> {
344    fn default() -> Self {
345        Self::new()
346    }
347}
348
349impl<T: Clone, const ALIGN: usize, A: Alloc> Clone for AlignedVec<T, ALIGN, A> {
350    fn clone(&self) -> Self {
351        Self::from_slice_in(self.as_slice(), self.alloc.clone())
352    }
353}
354
355impl<T, const ALIGN: usize, A: Alloc> core::ops::Deref for AlignedVec<T, ALIGN, A> {
356    type Target = [T];
357
358    fn deref(&self) -> &Self::Target {
359        self.as_slice()
360    }
361}
362
363impl<T, const ALIGN: usize, A: Alloc> core::ops::DerefMut for AlignedVec<T, ALIGN, A> {
364    fn deref_mut(&mut self) -> &mut Self::Target {
365        self.as_mut_slice()
366    }
367}
368
369impl<T, const ALIGN: usize, A: Alloc> core::ops::Index<usize> for AlignedVec<T, ALIGN, A> {
370    type Output = T;
371
372    fn index(&self, index: usize) -> &Self::Output {
373        &self.as_slice()[index]
374    }
375}
376
377impl<T, const ALIGN: usize, A: Alloc> core::ops::IndexMut<usize> for AlignedVec<T, ALIGN, A> {
378    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
379        &mut self.as_mut_slice()[index]
380    }
381}
382
383// Safety: AlignedVec is Send/Sync if T and A are
384unsafe impl<T: Send, const ALIGN: usize, A: Alloc + Send> Send for AlignedVec<T, ALIGN, A> {}
385unsafe impl<T: Sync, const ALIGN: usize, A: Alloc + Sync> Sync for AlignedVec<T, ALIGN, A> {}