amari_core/
aligned_alloc.rs

1//! Aligned memory allocation for SIMD-optimized operations
2//!
3//! This module provides memory alignment utilities specifically designed
4//! for AVX2/SSE SIMD operations in geometric algebra computations.
5
6use alloc::alloc::{alloc, dealloc, Layout};
7use alloc::boxed::Box;
8use alloc::vec;
9use alloc::vec::Vec;
10use core::ptr::NonNull;
11
12/// 32-byte aligned allocation for AVX2 operations
13pub const AVX2_ALIGNMENT: usize = 32;
14
15/// 16-byte aligned allocation for SSE operations
16pub const SSE_ALIGNMENT: usize = 16;
17
18/// Cache line size for modern CPUs
19pub const CACHE_LINE_SIZE: usize = 64;
20
21/// Aligned memory block for SIMD operations
22#[repr(C)]
23pub struct AlignedMemory<T> {
24    ptr: NonNull<T>,
25    layout: Layout,
26}
27
28impl<T> AlignedMemory<T> {
29    /// Allocate aligned memory for the given number of elements
30    pub fn new(count: usize, alignment: usize) -> Result<Self, &'static str> {
31        let size = count * core::mem::size_of::<T>();
32        let layout = Layout::from_size_align(size, alignment)
33            .map_err(|_| "Invalid layout for aligned allocation")?;
34
35        let ptr = unsafe { alloc(layout) as *mut T };
36        if ptr.is_null() {
37            return Err("Failed to allocate aligned memory");
38        }
39
40        let ptr = unsafe { NonNull::new_unchecked(ptr) };
41
42        Ok(Self { ptr, layout })
43    }
44
45    /// Get a raw pointer to the allocated memory
46    pub fn as_ptr(&self) -> *const T {
47        self.ptr.as_ptr()
48    }
49
50    /// Get a mutable raw pointer to the allocated memory
51    pub fn as_mut_ptr(&mut self) -> *mut T {
52        self.ptr.as_ptr()
53    }
54
55    /// Get the number of elements that can fit in this allocation
56    pub fn capacity(&self) -> usize {
57        self.layout.size() / core::mem::size_of::<T>()
58    }
59}
60
61impl<T> Drop for AlignedMemory<T> {
62    fn drop(&mut self) {
63        unsafe {
64            dealloc(self.ptr.as_ptr() as *mut u8, self.layout);
65        }
66    }
67}
68
69/// Create an AVX2-aligned vector for f64 coefficients
70pub fn create_aligned_f64_vec(count: usize) -> Vec<f64> {
71    // For small sizes, use regular Vec
72    if count <= 16 {
73        return vec![0.0; count];
74    }
75
76    // Use system allocator with alignment hint
77    let mut vec = Vec::with_capacity(count);
78    vec.resize(count, 0.0);
79
80    // Verify alignment for critical sizes
81    let ptr = vec.as_ptr() as usize;
82    if count == 8 && !ptr.is_multiple_of(AVX2_ALIGNMENT) {
83        // Reallocate with proper alignment for 8-element vectors (3D Clifford algebra)
84        let mut aligned_vec = Vec::with_capacity(count + (AVX2_ALIGNMENT / 8));
85        aligned_vec.resize(count, 0.0);
86
87        // Find aligned position within the allocation
88        let start_ptr = aligned_vec.as_ptr() as usize;
89        let aligned_offset = (AVX2_ALIGNMENT - (start_ptr % AVX2_ALIGNMENT)) % AVX2_ALIGNMENT / 8;
90
91        if aligned_offset < aligned_vec.len() - count {
92            // Return a slice starting at the aligned position
93            aligned_vec.drain(0..aligned_offset);
94            aligned_vec.truncate(count);
95            return aligned_vec;
96        }
97    }
98
99    vec
100}
101
102/// Cache-friendly memory pool for frequent allocations
103pub struct MemoryPool {
104    blocks: Vec<AlignedMemory<f64>>,
105    block_size: usize,
106    alignment: usize,
107}
108
109impl MemoryPool {
110    /// Create a new memory pool with specified block size and alignment
111    pub fn new(block_size: usize, alignment: usize) -> Self {
112        Self {
113            blocks: Vec::new(),
114            block_size,
115            alignment,
116        }
117    }
118
119    /// Create a pool optimized for 3D Clifford algebra operations
120    pub fn for_3d_clifford() -> Self {
121        // 8 coefficients per multivector, 32-byte alignment for AVX2
122        Self::new(8, AVX2_ALIGNMENT)
123    }
124
125    /// Allocate a block from the pool
126    pub fn allocate(&mut self) -> Result<Box<[f64]>, &'static str> {
127        // For now, use regular allocation
128        // In a production system, this would maintain a pool of reusable blocks
129        let coefficients = create_aligned_f64_vec(self.block_size);
130        Ok(coefficients.into_boxed_slice())
131    }
132
133    /// Pre-allocate blocks for better performance
134    pub fn pre_allocate(&mut self, count: usize) -> Result<(), &'static str> {
135        for _ in 0..count {
136            let block = AlignedMemory::new(self.block_size, self.alignment)?;
137            self.blocks.push(block);
138        }
139        Ok(())
140    }
141}
142
143/// RAII wrapper for aligned coefficient arrays
144pub struct AlignedCoefficients {
145    data: Box<[f64]>,
146}
147
148impl AlignedCoefficients {
149    /// Create aligned coefficients for the given count
150    pub fn new(count: usize) -> Self {
151        Self {
152            data: create_aligned_f64_vec(count).into_boxed_slice(),
153        }
154    }
155
156    /// Create zero-initialized aligned coefficients
157    pub fn zero(count: usize) -> Self {
158        let mut coeffs = Self::new(count);
159        coeffs.data.fill(0.0);
160        coeffs
161    }
162
163    /// Get the underlying data
164    pub fn as_slice(&self) -> &[f64] {
165        &self.data
166    }
167
168    /// Get mutable access to the underlying data
169    pub fn as_mut_slice(&mut self) -> &mut [f64] {
170        &mut self.data
171    }
172
173    /// Convert to boxed slice
174    pub fn into_boxed_slice(self) -> Box<[f64]> {
175        self.data
176    }
177
178    /// Check if the memory is properly aligned for SIMD
179    pub fn is_simd_aligned(&self) -> bool {
180        let ptr = self.data.as_ptr() as usize;
181        ptr.is_multiple_of(AVX2_ALIGNMENT)
182    }
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn test_aligned_memory() {
191        let mem = AlignedMemory::<f64>::new(8, AVX2_ALIGNMENT).unwrap();
192        let ptr = mem.as_ptr() as usize;
193        assert_eq!(ptr % AVX2_ALIGNMENT, 0);
194        assert_eq!(mem.capacity(), 8);
195    }
196
197    #[test]
198    fn test_aligned_coefficients() {
199        let coeffs = AlignedCoefficients::zero(8);
200        assert_eq!(coeffs.as_slice().len(), 8);
201        assert!(coeffs.as_slice().iter().all(|&x| x == 0.0));
202    }
203
204    #[test]
205    fn test_memory_pool() {
206        let mut pool = MemoryPool::for_3d_clifford();
207        let _block = pool.allocate().unwrap();
208        assert_eq!(pool.block_size, 8);
209        assert_eq!(pool.alignment, AVX2_ALIGNMENT);
210    }
211
212    #[test]
213    fn test_aligned_f64_vec() {
214        let vec = create_aligned_f64_vec(8);
215        assert_eq!(vec.len(), 8);
216
217        // For 3D Clifford algebra, we want good alignment
218        let ptr = vec.as_ptr() as usize;
219        // Should be at least 8-byte aligned for f64
220        assert_eq!(ptr % 8, 0);
221    }
222}
amari_core/aligned_alloc.rs

amari_core/
aligned_alloc.rs