Skip to main content

sklears_simd/
allocator.rs

1//! Custom allocators optimized for SIMD operations
2//!
3//! This module provides specialized memory allocators that ensure proper alignment
4//! and memory layout for optimal SIMD performance.
5
6#[cfg(not(feature = "no-std"))]
7use std::alloc::{GlobalAlloc, Layout, System};
8#[cfg(not(feature = "no-std"))]
9use std::ptr::{self, NonNull};
10#[cfg(not(feature = "no-std"))]
11use std::sync::atomic::{AtomicUsize, Ordering};
12#[cfg(not(feature = "no-std"))]
13use std::{mem, slice};
14
15#[cfg(feature = "no-std")]
16use core::alloc::{GlobalAlloc, Layout};
17#[cfg(feature = "no-std")]
18use core::ptr::{self, NonNull};
19#[cfg(feature = "no-std")]
20use core::sync::atomic::{AtomicUsize, Ordering};
21#[cfg(feature = "no-std")]
22use core::{mem, slice};
23#[cfg(feature = "no-std")]
24extern crate alloc;
25#[cfg(feature = "no-std")]
26use alloc::alloc as global_alloc;
27#[cfg(feature = "no-std")]
28use alloc::vec::Vec;
29
30/// Statistics for SIMD allocator performance monitoring
31#[derive(Debug, Default)]
32pub struct AllocatorStats {
33    pub total_allocations: AtomicUsize,
34    pub total_deallocations: AtomicUsize,
35    pub bytes_allocated: AtomicUsize,
36    pub bytes_deallocated: AtomicUsize,
37    pub aligned_allocations: AtomicUsize,
38    pub peak_memory_usage: AtomicUsize,
39}
40
41impl AllocatorStats {
42    pub fn new() -> Self {
43        Self::default()
44    }
45
46    pub fn record_allocation(&self, size: usize, aligned: bool) {
47        self.total_allocations.fetch_add(1, Ordering::Relaxed);
48        self.bytes_allocated.fetch_add(size, Ordering::Relaxed);
49
50        if aligned {
51            self.aligned_allocations.fetch_add(1, Ordering::Relaxed);
52        }
53
54        // Update peak memory usage
55        let current_usage = self.current_memory_usage();
56        let mut peak = self.peak_memory_usage.load(Ordering::Relaxed);
57        while current_usage > peak {
58            match self.peak_memory_usage.compare_exchange_weak(
59                peak,
60                current_usage,
61                Ordering::Relaxed,
62                Ordering::Relaxed,
63            ) {
64                Ok(_) => break,
65                Err(new_peak) => peak = new_peak,
66            }
67        }
68    }
69
70    pub fn record_deallocation(&self, size: usize) {
71        self.total_deallocations.fetch_add(1, Ordering::Relaxed);
72        self.bytes_deallocated.fetch_add(size, Ordering::Relaxed);
73    }
74
75    pub fn current_memory_usage(&self) -> usize {
76        let allocated = self.bytes_allocated.load(Ordering::Relaxed);
77        let deallocated = self.bytes_deallocated.load(Ordering::Relaxed);
78        allocated.saturating_sub(deallocated)
79    }
80
81    pub fn allocation_efficiency(&self) -> f64 {
82        let total_allocs = self.total_allocations.load(Ordering::Relaxed);
83        let aligned_allocs = self.aligned_allocations.load(Ordering::Relaxed);
84
85        if total_allocs == 0 {
86            1.0
87        } else {
88            aligned_allocs as f64 / total_allocs as f64
89        }
90    }
91}
92
93/// SIMD-optimized allocator with alignment guarantees
94pub struct SimdAllocator {
95    stats: AllocatorStats,
96    default_alignment: usize,
97}
98
99impl SimdAllocator {
100    /// Create a new SIMD allocator with default 32-byte alignment (AVX2)
101    pub const fn new() -> Self {
102        Self::with_alignment(32)
103    }
104
105    /// Create a new SIMD allocator with custom alignment
106    pub const fn with_alignment(alignment: usize) -> Self {
107        Self {
108            stats: AllocatorStats {
109                total_allocations: AtomicUsize::new(0),
110                total_deallocations: AtomicUsize::new(0),
111                bytes_allocated: AtomicUsize::new(0),
112                bytes_deallocated: AtomicUsize::new(0),
113                aligned_allocations: AtomicUsize::new(0),
114                peak_memory_usage: AtomicUsize::new(0),
115            },
116            default_alignment: alignment,
117        }
118    }
119
120    /// Get allocator statistics
121    pub fn stats(&self) -> &AllocatorStats {
122        &self.stats
123    }
124
125    /// Allocate aligned memory for SIMD operations
126    pub fn allocate_simd<T>(&self, count: usize) -> Option<NonNull<T>> {
127        let size = count * mem::size_of::<T>();
128        let align = self.default_alignment.max(mem::align_of::<T>());
129
130        let layout = Layout::from_size_align(size, align).ok()?;
131
132        // Use system allocator for the actual allocation
133        #[cfg(not(feature = "no-std"))]
134        let ptr = unsafe { System.alloc(layout) };
135        #[cfg(feature = "no-std")]
136        let ptr = unsafe { global_alloc::alloc(layout) };
137
138        if ptr.is_null() {
139            None
140        } else {
141            self.stats.record_allocation(size, true);
142            NonNull::new(ptr.cast())
143        }
144    }
145
146    /// Deallocate SIMD-aligned memory previously allocated by `allocate_simd`.
147    ///
148    /// # Safety
149    ///
150    /// `ptr` must have been allocated by this allocator with `count` elements of type `T`, and
151    /// must not be used after this call.
152    pub unsafe fn deallocate_simd<T>(&self, ptr: NonNull<T>, count: usize) {
153        let size = count * mem::size_of::<T>();
154        let align = self.default_alignment.max(mem::align_of::<T>());
155
156        if let Ok(layout) = Layout::from_size_align(size, align) {
157            #[cfg(not(feature = "no-std"))]
158            System.dealloc(ptr.cast().as_ptr(), layout);
159            #[cfg(feature = "no-std")]
160            global_alloc::dealloc(ptr.cast().as_ptr(), layout);
161            self.stats.record_deallocation(size);
162        }
163    }
164
165    /// Allocate zero-initialized SIMD memory
166    pub fn allocate_zeroed_simd<T>(&self, count: usize) -> Option<NonNull<T>>
167    where
168        T: Copy,
169    {
170        let size = count * mem::size_of::<T>();
171        let align = self.default_alignment.max(mem::align_of::<T>());
172
173        let layout = Layout::from_size_align(size, align).ok()?;
174
175        #[cfg(not(feature = "no-std"))]
176        let ptr = unsafe { System.alloc_zeroed(layout) };
177        #[cfg(feature = "no-std")]
178        let ptr = unsafe { global_alloc::alloc_zeroed(layout) };
179
180        if ptr.is_null() {
181            None
182        } else {
183            self.stats.record_allocation(size, true);
184            NonNull::new(ptr.cast())
185        }
186    }
187
188    /// Reallocate SIMD memory with preserved alignment.
189    ///
190    /// # Safety
191    ///
192    /// `ptr` must have been allocated by this allocator with `old_count` elements of type `T`.
193    /// The returned pointer (if `Some`) replaces `ptr`, which must not be used after this call.
194    pub unsafe fn reallocate_simd<T>(
195        &self,
196        ptr: NonNull<T>,
197        old_count: usize,
198        new_count: usize,
199    ) -> Option<NonNull<T>> {
200        let old_size = old_count * mem::size_of::<T>();
201        let new_size = new_count * mem::size_of::<T>();
202        let align = self.default_alignment.max(mem::align_of::<T>());
203
204        let old_layout = Layout::from_size_align(old_size, align).ok()?;
205
206        #[cfg(not(feature = "no-std"))]
207        let new_ptr = System.realloc(ptr.cast().as_ptr(), old_layout, new_size);
208        #[cfg(feature = "no-std")]
209        let new_ptr = global_alloc::realloc(ptr.cast().as_ptr(), old_layout, new_size);
210
211        if new_ptr.is_null() {
212            None
213        } else {
214            self.stats.record_deallocation(old_size);
215            self.stats.record_allocation(new_size, true);
216            NonNull::new(new_ptr.cast())
217        }
218    }
219}
220
221impl Default for SimdAllocator {
222    fn default() -> Self {
223        Self::new()
224    }
225}
226
227unsafe impl GlobalAlloc for SimdAllocator {
228    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
229        #[cfg(not(feature = "no-std"))]
230        let ptr = System.alloc(layout);
231        #[cfg(feature = "no-std")]
232        let ptr = global_alloc::alloc(layout);
233        if !ptr.is_null() {
234            let is_aligned = layout.align() >= self.default_alignment;
235            self.stats.record_allocation(layout.size(), is_aligned);
236        }
237        ptr
238    }
239
240    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
241        #[cfg(not(feature = "no-std"))]
242        System.dealloc(ptr, layout);
243        #[cfg(feature = "no-std")]
244        global_alloc::dealloc(ptr, layout);
245        self.stats.record_deallocation(layout.size());
246    }
247
248    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
249        #[cfg(not(feature = "no-std"))]
250        let ptr = System.alloc_zeroed(layout);
251        #[cfg(feature = "no-std")]
252        let ptr = global_alloc::alloc_zeroed(layout);
253        if !ptr.is_null() {
254            let is_aligned = layout.align() >= self.default_alignment;
255            self.stats.record_allocation(layout.size(), is_aligned);
256        }
257        ptr
258    }
259
260    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
261        #[cfg(not(feature = "no-std"))]
262        let new_ptr = System.realloc(ptr, layout, new_size);
263        #[cfg(feature = "no-std")]
264        let new_ptr = global_alloc::realloc(ptr, layout, new_size);
265        if !new_ptr.is_null() {
266            self.stats.record_deallocation(layout.size());
267            self.stats
268                .record_allocation(new_size, layout.align() >= self.default_alignment);
269        }
270        new_ptr
271    }
272}
273
274/// SIMD-aligned vector type with custom allocator
275pub struct SimdVec<T> {
276    ptr: Option<NonNull<T>>,
277    len: usize,
278    capacity: usize,
279    allocator: SimdAllocator,
280}
281
282impl<T> SimdVec<T> {
283    /// Create a new SIMD vector with default alignment
284    pub fn new() -> Self {
285        Self::with_allocator(SimdAllocator::new())
286    }
287
288    /// Create a new SIMD vector with custom allocator
289    pub fn with_allocator(allocator: SimdAllocator) -> Self {
290        Self {
291            ptr: None,
292            len: 0,
293            capacity: 0,
294            allocator,
295        }
296    }
297
298    /// Create a SIMD vector with specified capacity
299    pub fn with_capacity(capacity: usize) -> Self {
300        let mut vec = Self::new();
301        vec.reserve(capacity);
302        vec
303    }
304
305    /// Reserve capacity for additional elements
306    pub fn reserve(&mut self, additional: usize) {
307        let new_capacity = self.len.checked_add(additional).expect("Capacity overflow");
308
309        if new_capacity <= self.capacity {
310            return;
311        }
312
313        let new_capacity = new_capacity.next_power_of_two().max(4);
314
315        if let Some(old_ptr) = self.ptr {
316            // Reallocate existing memory
317            let new_ptr = unsafe {
318                self.allocator
319                    .reallocate_simd(old_ptr, self.capacity, new_capacity)
320            };
321
322            if let Some(new_ptr) = new_ptr {
323                self.ptr = Some(new_ptr);
324                self.capacity = new_capacity;
325            } else {
326                panic!("Failed to reallocate SIMD memory");
327            }
328        } else {
329            // First allocation
330            let new_ptr = self.allocator.allocate_simd::<T>(new_capacity);
331
332            if let Some(new_ptr) = new_ptr {
333                self.ptr = Some(new_ptr);
334                self.capacity = new_capacity;
335            } else {
336                panic!("Failed to allocate SIMD memory");
337            }
338        }
339    }
340
341    /// Push an element to the vector
342    pub fn push(&mut self, value: T) {
343        if self.len == self.capacity {
344            self.reserve(1);
345        }
346
347        unsafe {
348            let ptr = self
349                .ptr
350                .expect("Vector should have allocated memory")
351                .as_ptr();
352            ptr::write(ptr.add(self.len), value);
353        }
354
355        self.len += 1;
356    }
357
358    /// Pop an element from the vector
359    pub fn pop(&mut self) -> Option<T> {
360        if self.len == 0 {
361            None
362        } else {
363            self.len -= 1;
364            unsafe {
365                let ptr = self
366                    .ptr
367                    .expect("Vector should have allocated memory")
368                    .as_ptr();
369                Some(ptr::read(ptr.add(self.len)))
370            }
371        }
372    }
373
374    /// Get the length of the vector
375    pub fn len(&self) -> usize {
376        self.len
377    }
378
379    /// Check if the vector is empty
380    pub fn is_empty(&self) -> bool {
381        self.len == 0
382    }
383
384    /// Get the capacity of the vector
385    pub fn capacity(&self) -> usize {
386        self.capacity
387    }
388
389    /// Get a slice view of the vector
390    pub fn as_slice(&self) -> &[T] {
391        if let Some(ptr) = self.ptr {
392            unsafe { slice::from_raw_parts(ptr.as_ptr(), self.len) }
393        } else {
394            &[]
395        }
396    }
397
398    /// Get a mutable slice view of the vector
399    pub fn as_mut_slice(&mut self) -> &mut [T] {
400        if let Some(ptr) = self.ptr {
401            unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), self.len) }
402        } else {
403            &mut []
404        }
405    }
406
407    /// Clear the vector
408    pub fn clear(&mut self) {
409        if mem::needs_drop::<T>() {
410            for i in 0..self.len {
411                unsafe {
412                    let ptr = self
413                        .ptr
414                        .expect("Vector should have allocated memory")
415                        .as_ptr();
416                    ptr::drop_in_place(ptr.add(i));
417                }
418            }
419        }
420        self.len = 0;
421    }
422
423    /// Get allocator statistics
424    pub fn allocator_stats(&self) -> &AllocatorStats {
425        self.allocator.stats()
426    }
427
428    /// Check if the underlying memory is properly aligned for SIMD
429    pub fn is_simd_aligned(&self) -> bool {
430        if let Some(ptr) = self.ptr {
431            let addr = ptr.as_ptr() as usize;
432            addr.is_multiple_of(self.allocator.default_alignment)
433        } else {
434            true // Empty vector is considered aligned
435        }
436    }
437}
438
439impl<T> Default for SimdVec<T> {
440    fn default() -> Self {
441        Self::new()
442    }
443}
444
445impl<T> Drop for SimdVec<T> {
446    fn drop(&mut self) {
447        self.clear();
448
449        if let Some(ptr) = self.ptr.take() {
450            unsafe {
451                self.allocator.deallocate_simd(ptr, self.capacity);
452            }
453        }
454    }
455}
456
457impl<T: Clone> Clone for SimdVec<T> {
458    fn clone(&self) -> Self {
459        let mut new_vec = Self::with_allocator(SimdAllocator::with_alignment(
460            self.allocator.default_alignment,
461        ));
462
463        new_vec.reserve(self.len);
464
465        for item in self.as_slice() {
466            new_vec.push(item.clone());
467        }
468
469        new_vec
470    }
471}
472
473/// Memory pool for frequent SIMD allocations
474pub struct SimdMemoryPool<T> {
475    free_blocks: Vec<(NonNull<T>, usize)>, // (ptr, capacity)
476    allocator: SimdAllocator,
477    block_size: usize,
478}
479
480impl<T> SimdMemoryPool<T> {
481    pub fn new(block_size: usize) -> Self {
482        Self {
483            free_blocks: Vec::new(),
484            allocator: SimdAllocator::new(),
485            block_size,
486        }
487    }
488
489    pub fn acquire(&mut self, min_capacity: usize) -> Option<(NonNull<T>, usize)> {
490        // Try to find a suitable free block
491        for (i, (_ptr, capacity)) in self.free_blocks.iter().enumerate() {
492            if *capacity >= min_capacity {
493                return Some(self.free_blocks.swap_remove(i));
494            }
495        }
496
497        // Allocate a new block if no suitable free block found
498        let capacity = min_capacity.max(self.block_size);
499        let ptr = self.allocator.allocate_simd(capacity)?;
500        Some((ptr, capacity))
501    }
502
503    pub fn release(&mut self, ptr: NonNull<T>, capacity: usize) {
504        self.free_blocks.push((ptr, capacity));
505    }
506
507    pub fn clear(&mut self) {
508        for (ptr, capacity) in self.free_blocks.drain(..) {
509            unsafe {
510                self.allocator.deallocate_simd(ptr, capacity);
511            }
512        }
513    }
514
515    pub fn stats(&self) -> &AllocatorStats {
516        self.allocator.stats()
517    }
518}
519
520impl<T> Drop for SimdMemoryPool<T> {
521    fn drop(&mut self) {
522        self.clear();
523    }
524}
525
526#[allow(non_snake_case)]
527#[cfg(all(test, not(feature = "no-std")))]
528mod tests {
529    use super::*;
530
531    #[cfg(feature = "no-std")]
532    use alloc::{vec, vec::Vec};
533
534    #[test]
535    fn test_simd_allocator_basic() {
536        let allocator = SimdAllocator::new();
537
538        let ptr = allocator.allocate_simd::<f32>(16);
539        assert!(ptr.is_some());
540
541        if let Some(ptr) = ptr {
542            // Check alignment
543            let addr = ptr.as_ptr() as usize;
544            assert_eq!(addr % 32, 0, "Memory should be 32-byte aligned");
545
546            unsafe {
547                allocator.deallocate_simd(ptr, 16);
548            }
549        }
550
551        let stats = allocator.stats();
552        assert_eq!(stats.total_allocations.load(Ordering::Relaxed), 1);
553        assert_eq!(stats.total_deallocations.load(Ordering::Relaxed), 1);
554    }
555
556    #[test]
557    fn test_simd_vec_basic_operations() {
558        let mut vec = SimdVec::<i32>::new();
559
560        assert!(vec.is_empty());
561        assert_eq!(vec.len(), 0);
562        assert!(vec.is_simd_aligned());
563
564        vec.push(1);
565        vec.push(2);
566        vec.push(3);
567
568        assert_eq!(vec.len(), 3);
569        assert!(!vec.is_empty());
570        assert_eq!(vec.as_slice(), &[1, 2, 3]);
571
572        assert_eq!(vec.pop(), Some(3));
573        assert_eq!(vec.len(), 2);
574
575        vec.clear();
576        assert!(vec.is_empty());
577    }
578
579    #[test]
580    fn test_simd_vec_capacity_growth() {
581        let mut vec = SimdVec::<u64>::new();
582
583        for i in 0..100 {
584            vec.push(i);
585        }
586
587        assert_eq!(vec.len(), 100);
588        assert!(vec.capacity() >= 100);
589        assert!(vec.is_simd_aligned());
590
591        // Check that values are correct
592        for (i, &value) in vec.as_slice().iter().enumerate() {
593            assert_eq!(value, i as u64);
594        }
595    }
596
597    #[test]
598    fn test_simd_vec_with_capacity() {
599        let vec = SimdVec::<f64>::with_capacity(50);
600
601        assert_eq!(vec.len(), 0);
602        assert!(vec.capacity() >= 50);
603        assert!(vec.is_simd_aligned());
604    }
605
606    #[test]
607    fn test_allocator_stats() {
608        let allocator = SimdAllocator::new();
609
610        let ptr1 = allocator
611            .allocate_simd::<f32>(16)
612            .expect("operation should succeed");
613        let ptr2 = allocator
614            .allocate_simd::<f64>(8)
615            .expect("operation should succeed");
616
617        let stats = allocator.stats();
618        assert_eq!(stats.total_allocations.load(Ordering::Relaxed), 2);
619        assert_eq!(stats.aligned_allocations.load(Ordering::Relaxed), 2);
620        assert!(stats.current_memory_usage() > 0);
621        assert_eq!(stats.allocation_efficiency(), 1.0);
622
623        unsafe {
624            allocator.deallocate_simd(ptr1, 16);
625            allocator.deallocate_simd(ptr2, 8);
626        }
627
628        assert_eq!(stats.total_deallocations.load(Ordering::Relaxed), 2);
629    }
630
631    #[test]
632    fn test_memory_pool() {
633        let mut pool = SimdMemoryPool::<i32>::new(64);
634
635        let (ptr1, cap1) = pool.acquire(32).expect("operation should succeed");
636        assert!(cap1 >= 32);
637
638        let (ptr2, cap2) = pool.acquire(16).expect("operation should succeed");
639        assert!(cap2 >= 16);
640
641        pool.release(ptr1, cap1);
642
643        // Should reuse the released block
644        let (ptr3, cap3) = pool.acquire(30).expect("operation should succeed");
645        assert_eq!(ptr3, ptr1);
646        assert_eq!(cap3, cap1);
647
648        pool.release(ptr2, cap2);
649        pool.release(ptr3, cap3);
650    }
651
652    #[test]
653    fn test_zeroed_allocation() {
654        let allocator = SimdAllocator::new();
655
656        let ptr = allocator
657            .allocate_zeroed_simd::<u32>(16)
658            .expect("operation should succeed");
659
660        unsafe {
661            let slice = slice::from_raw_parts(ptr.as_ptr(), 16);
662            for &value in slice {
663                assert_eq!(value, 0);
664            }
665
666            allocator.deallocate_simd(ptr, 16);
667        }
668    }
669
670    #[test]
671    fn test_custom_alignment() {
672        let allocator = SimdAllocator::with_alignment(64); // AVX-512 alignment
673
674        let ptr = allocator.allocate_simd::<f32>(16);
675        assert!(ptr.is_some());
676
677        if let Some(ptr) = ptr {
678            let addr = ptr.as_ptr() as usize;
679            assert_eq!(addr % 64, 0, "Memory should be 64-byte aligned");
680
681            unsafe {
682                allocator.deallocate_simd(ptr, 16);
683            }
684        }
685    }
686
687    #[test]
688    fn test_simd_vec_clone() {
689        let mut vec1 = SimdVec::<i32>::new();
690        vec1.push(1);
691        vec1.push(2);
692        vec1.push(3);
693
694        let vec2 = vec1.clone();
695
696        assert_eq!(vec1.as_slice(), vec2.as_slice());
697        assert!(vec2.is_simd_aligned());
698    }
699}