amari_core/
aligned_alloc.rs

1//! Aligned memory allocation for SIMD-optimized operations
2//!
3//! This module provides memory alignment utilities specifically designed
4//! for AVX2/SSE SIMD operations in geometric algebra computations.
5
6use alloc::alloc::{alloc, dealloc, Layout};
7use alloc::boxed::Box;
8use alloc::vec::Vec;
9use core::ptr::NonNull;
10
11/// 32-byte aligned allocation for AVX2 operations
12pub const AVX2_ALIGNMENT: usize = 32;
13
14/// 16-byte aligned allocation for SSE operations
15pub const SSE_ALIGNMENT: usize = 16;
16
17/// Cache line size for modern CPUs
18pub const CACHE_LINE_SIZE: usize = 64;
19
20/// Aligned memory block for SIMD operations
21#[repr(C)]
22pub struct AlignedMemory<T> {
23    ptr: NonNull<T>,
24    layout: Layout,
25}
26
27impl<T> AlignedMemory<T> {
28    /// Allocate aligned memory for the given number of elements
29    pub fn new(count: usize, alignment: usize) -> Result<Self, &'static str> {
30        let size = count * core::mem::size_of::<T>();
31        let layout = Layout::from_size_align(size, alignment)
32            .map_err(|_| "Invalid layout for aligned allocation")?;
33
34        let ptr = unsafe { alloc(layout) as *mut T };
35        if ptr.is_null() {
36            return Err("Failed to allocate aligned memory");
37        }
38
39        let ptr = unsafe { NonNull::new_unchecked(ptr) };
40
41        Ok(Self { ptr, layout })
42    }
43
44    /// Get a raw pointer to the allocated memory
45    pub fn as_ptr(&self) -> *const T {
46        self.ptr.as_ptr()
47    }
48
49    /// Get a mutable raw pointer to the allocated memory
50    pub fn as_mut_ptr(&mut self) -> *mut T {
51        self.ptr.as_ptr()
52    }
53
54    /// Get the number of elements that can fit in this allocation
55    pub fn capacity(&self) -> usize {
56        self.layout.size() / core::mem::size_of::<T>()
57    }
58}
59
60impl<T> Drop for AlignedMemory<T> {
61    fn drop(&mut self) {
62        unsafe {
63            dealloc(self.ptr.as_ptr() as *mut u8, self.layout);
64        }
65    }
66}
67
68/// Create an AVX2-aligned vector for f64 coefficients
69pub fn create_aligned_f64_vec(count: usize) -> Vec<f64> {
70    // For small sizes, use regular Vec
71    if count <= 16 {
72        return vec![0.0; count];
73    }
74
75    // Use system allocator with alignment hint
76    let mut vec = Vec::with_capacity(count);
77    vec.resize(count, 0.0);
78
79    // Verify alignment for critical sizes
80    let ptr = vec.as_ptr() as usize;
81    if count == 8 && !ptr.is_multiple_of(AVX2_ALIGNMENT) {
82        // Reallocate with proper alignment for 8-element vectors (3D Clifford algebra)
83        let mut aligned_vec = Vec::with_capacity(count + (AVX2_ALIGNMENT / 8));
84        aligned_vec.resize(count, 0.0);
85
86        // Find aligned position within the allocation
87        let start_ptr = aligned_vec.as_ptr() as usize;
88        let aligned_offset = (AVX2_ALIGNMENT - (start_ptr % AVX2_ALIGNMENT)) % AVX2_ALIGNMENT / 8;
89
90        if aligned_offset < aligned_vec.len() - count {
91            // Return a slice starting at the aligned position
92            aligned_vec.drain(0..aligned_offset);
93            aligned_vec.truncate(count);
94            return aligned_vec;
95        }
96    }
97
98    vec
99}
100
101/// Cache-friendly memory pool for frequent allocations
102pub struct MemoryPool {
103    blocks: Vec<AlignedMemory<f64>>,
104    block_size: usize,
105    alignment: usize,
106}
107
108impl MemoryPool {
109    /// Create a new memory pool with specified block size and alignment
110    pub fn new(block_size: usize, alignment: usize) -> Self {
111        Self {
112            blocks: Vec::new(),
113            block_size,
114            alignment,
115        }
116    }
117
118    /// Create a pool optimized for 3D Clifford algebra operations
119    pub fn for_3d_clifford() -> Self {
120        // 8 coefficients per multivector, 32-byte alignment for AVX2
121        Self::new(8, AVX2_ALIGNMENT)
122    }
123
124    /// Allocate a block from the pool
125    pub fn allocate(&mut self) -> Result<Box<[f64]>, &'static str> {
126        // For now, use regular allocation
127        // In a production system, this would maintain a pool of reusable blocks
128        let coefficients = create_aligned_f64_vec(self.block_size);
129        Ok(coefficients.into_boxed_slice())
130    }
131
132    /// Pre-allocate blocks for better performance
133    pub fn pre_allocate(&mut self, count: usize) -> Result<(), &'static str> {
134        for _ in 0..count {
135            let block = AlignedMemory::new(self.block_size, self.alignment)?;
136            self.blocks.push(block);
137        }
138        Ok(())
139    }
140}
141
142/// RAII wrapper for aligned coefficient arrays
143pub struct AlignedCoefficients {
144    data: Box<[f64]>,
145}
146
147impl AlignedCoefficients {
148    /// Create aligned coefficients for the given count
149    pub fn new(count: usize) -> Self {
150        Self {
151            data: create_aligned_f64_vec(count).into_boxed_slice(),
152        }
153    }
154
155    /// Create zero-initialized aligned coefficients
156    pub fn zero(count: usize) -> Self {
157        let mut coeffs = Self::new(count);
158        coeffs.data.fill(0.0);
159        coeffs
160    }
161
162    /// Get the underlying data
163    pub fn as_slice(&self) -> &[f64] {
164        &self.data
165    }
166
167    /// Get mutable access to the underlying data
168    pub fn as_mut_slice(&mut self) -> &mut [f64] {
169        &mut self.data
170    }
171
172    /// Convert to boxed slice
173    pub fn into_boxed_slice(self) -> Box<[f64]> {
174        self.data
175    }
176
177    /// Check if the memory is properly aligned for SIMD
178    pub fn is_simd_aligned(&self) -> bool {
179        let ptr = self.data.as_ptr() as usize;
180        ptr.is_multiple_of(AVX2_ALIGNMENT)
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn test_aligned_memory() {
190        let mem = AlignedMemory::<f64>::new(8, AVX2_ALIGNMENT).unwrap();
191        let ptr = mem.as_ptr() as usize;
192        assert_eq!(ptr % AVX2_ALIGNMENT, 0);
193        assert_eq!(mem.capacity(), 8);
194    }
195
196    #[test]
197    fn test_aligned_coefficients() {
198        let coeffs = AlignedCoefficients::zero(8);
199        assert_eq!(coeffs.as_slice().len(), 8);
200        assert!(coeffs.as_slice().iter().all(|&x| x == 0.0));
201    }
202
203    #[test]
204    fn test_memory_pool() {
205        let mut pool = MemoryPool::for_3d_clifford();
206        let _block = pool.allocate().unwrap();
207        assert_eq!(pool.block_size, 8);
208        assert_eq!(pool.alignment, AVX2_ALIGNMENT);
209    }
210
211    #[test]
212    fn test_aligned_f64_vec() {
213        let vec = create_aligned_f64_vec(8);
214        assert_eq!(vec.len(), 8);
215
216        // For 3D Clifford algebra, we want good alignment
217        let ptr = vec.as_ptr() as usize;
218        // Should be at least 8-byte aligned for f64
219        assert_eq!(ptr % 8, 0);
220    }
221}
amari_core/aligned_alloc.rs

amari_core/
aligned_alloc.rs