use crate::error::{Result, ZiporaError};
use crate::memory::cache_layout::{CacheOptimizedAllocator, CacheLayoutConfig, align_to_cache_line, AccessPattern, PrefetchHint};
use crate::memory::{get_optimal_numa_node, numa_alloc_aligned, numa_dealloc};
use crate::memory::simd_ops::{fast_fill, fast_prefetch};
use super::CachePadded;
use std::alloc::{Layout, alloc, dealloc};
use std::ptr::NonNull;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Duration;
const ALIGN_SIZE: usize = 8;
const OFFSET_SHIFT: u32 = 3; const SKIP_LIST_MAX_LEVELS: usize = 8;
const FAST_BIN_COUNT: usize = 64;
const FAST_BIN_THRESHOLD: usize = 8192;
const LIST_TAIL: u32 = 0;
const FAST_BIN_SIZES: &[usize] = &[
8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128,
144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512,
576, 640, 704, 768, 832, 896, 960, 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, 2048,
2304, 2560, 2816, 3072, 3328, 3584, 3840, 4096, 4608, 5120, 5632, 6144, 6656, 7168, 7680, 8192,
];
#[derive(Debug)]
#[repr(align(64))] struct LockFreeHead {
head: AtomicU64,
count: AtomicU32,
_padding: [u8; 64 - 12],
}
impl LockFreeHead {
fn new() -> Self {
Self {
head: AtomicU64::new(LIST_TAIL as u64),
count: AtomicU32::new(0),
_padding: [0; 64 - 12],
}
}
}
#[derive(Debug)]
struct SkipListNode {
size: u32,
forward: [AtomicU32; SKIP_LIST_MAX_LEVELS],
}
impl SkipListNode {
fn new(size: u32) -> Self {
const ATOMIC_U32_INIT: AtomicU32 = AtomicU32::new(LIST_TAIL);
Self {
size,
forward: [ATOMIC_U32_INIT; SKIP_LIST_MAX_LEVELS],
}
}
}
#[derive(Debug, Clone)]
pub struct LockFreePoolConfig {
pub memory_size: usize,
pub enable_stats: bool,
pub max_cas_retries: u32,
pub backoff_strategy: BackoffStrategy,
pub enable_cache_alignment: bool,
pub cache_config: Option<CacheLayoutConfig>,
pub enable_numa_awareness: bool,
pub enable_huge_pages: bool,
pub huge_page_threshold: usize,
pub enable_simd_optimization: bool,
pub zero_on_free: bool,
}
#[derive(Debug, Clone, Copy)]
pub enum BackoffStrategy {
None,
Linear,
Exponential { max_delay_us: u64 },
}
impl Default for LockFreePoolConfig {
fn default() -> Self {
Self {
memory_size: 64 * 1024 * 1024, enable_stats: true,
max_cas_retries: 1000,
backoff_strategy: BackoffStrategy::Exponential { max_delay_us: 1000 },
enable_cache_alignment: true,
cache_config: Some(CacheLayoutConfig::new()),
enable_numa_awareness: true,
enable_huge_pages: cfg!(target_os = "linux"),
huge_page_threshold: 2 * 1024 * 1024, enable_simd_optimization: true,
zero_on_free: false,
}
}
}
impl LockFreePoolConfig {
pub fn high_performance() -> Self {
Self {
memory_size: 256 * 1024 * 1024, enable_stats: false, max_cas_retries: 10000,
backoff_strategy: BackoffStrategy::Exponential { max_delay_us: 100 },
enable_cache_alignment: true,
cache_config: Some(CacheLayoutConfig::sequential()), enable_numa_awareness: true,
enable_huge_pages: true,
huge_page_threshold: 1024 * 1024, enable_simd_optimization: true,
zero_on_free: false,
}
}
pub fn compact() -> Self {
Self {
memory_size: 16 * 1024 * 1024, enable_stats: true,
max_cas_retries: 500,
backoff_strategy: BackoffStrategy::Linear,
enable_cache_alignment: false, cache_config: None,
enable_numa_awareness: false,
enable_huge_pages: false,
huge_page_threshold: 4 * 1024 * 1024, enable_simd_optimization: false,
zero_on_free: false,
}
}
}
#[derive(Debug, Default)]
pub struct LockFreePoolStats {
pub fast_allocs: AtomicU64,
pub skip_allocs: AtomicU64,
pub fast_deallocs: AtomicU64,
pub skip_deallocs: AtomicU64,
pub cas_failures: AtomicU64,
pub cas_successes: AtomicU64,
pub memory_usage: AtomicU64,
pub cache_aligned_allocs: AtomicU64,
pub numa_local_allocs: AtomicU64,
pub huge_page_allocs: AtomicU64,
}
impl LockFreePoolStats {
pub fn allocation_rate(&self) -> f64 {
let total_allocs = self.fast_allocs.load(Ordering::Relaxed) +
self.skip_allocs.load(Ordering::Relaxed);
total_allocs as f64
}
pub fn contention_ratio(&self) -> f64 {
let failures = self.cas_failures.load(Ordering::Relaxed);
let successes = self.cas_successes.load(Ordering::Relaxed);
let total = failures + successes;
if total == 0 { 0.0 } else { failures as f64 / total as f64 }
}
}
pub struct LockFreeMemoryPool {
config: LockFreePoolConfig,
memory: NonNull<u8>,
memory_layout: Layout,
fast_bins: Vec<CachePadded<LockFreeHead>>,
skip_list_head: Mutex<[AtomicU32; SKIP_LIST_MAX_LEVELS]>,
next_offset: AtomicU32,
stats: Option<Arc<LockFreePoolStats>>,
cache_allocator: Option<CacheOptimizedAllocator>,
}
unsafe impl Send for LockFreeMemoryPool {}
unsafe impl Sync for LockFreeMemoryPool {}
impl LockFreeMemoryPool {
#[inline]
fn pack_head(offset: u32, generation: u32) -> u64 {
((generation as u64) << 32) | (offset as u64)
}
#[inline]
fn unpack_head(packed: u64) -> (u32, u32) {
let offset = (packed & 0xFFFFFFFF) as u32;
let generation = (packed >> 32) as u32;
(offset, generation)
}
pub fn new(config: LockFreePoolConfig) -> Result<Self> {
let layout = Layout::from_size_align(config.memory_size, ALIGN_SIZE)
.map_err(|e| ZiporaError::invalid_data(&format!("Invalid layout: {}", e)))?;
let memory = NonNull::new(unsafe { alloc(layout) })
.ok_or_else(|| ZiporaError::out_of_memory(config.memory_size))?;
let mut fast_bins = Vec::with_capacity(FAST_BIN_COUNT);
for _ in 0..FAST_BIN_COUNT {
fast_bins.push(CachePadded::new(LockFreeHead::new()));
}
const ATOMIC_U32_INIT: AtomicU32 = AtomicU32::new(LIST_TAIL);
let skip_list_head = Mutex::new([ATOMIC_U32_INIT; SKIP_LIST_MAX_LEVELS]);
let stats = if config.enable_stats {
Some(Arc::new(LockFreePoolStats::default()))
} else {
None
};
let cache_allocator = if config.enable_cache_alignment && config.cache_config.is_some() {
Some(CacheOptimizedAllocator::new(config.cache_config.clone().expect("cache_config present when cache_friendly enabled")))
} else {
None
};
Ok(Self {
config,
memory,
memory_layout: layout,
fast_bins,
skip_list_head,
next_offset: AtomicU32::new(ALIGN_SIZE as u32), stats,
cache_allocator,
})
}
pub fn allocate(&self, size: usize) -> Result<NonNull<u8>> {
if size == 0 {
return Err(ZiporaError::invalid_data("Cannot allocate zero bytes"));
}
let aligned_size = self.align_size(size);
if aligned_size <= FAST_BIN_THRESHOLD {
self.allocate_from_fast_bin(aligned_size)
} else {
self.allocate_from_skip_list(aligned_size)
}
}
pub fn deallocate(&self, ptr: NonNull<u8>, size: usize) -> Result<()> {
if size == 0 {
return Ok(());
}
let aligned_size = self.align_size(size);
if aligned_size <= FAST_BIN_THRESHOLD {
self.deallocate_to_fast_bin(ptr, aligned_size)
} else {
self.deallocate_to_skip_list(ptr, aligned_size)
}
}
pub fn deallocate_with_zero(&self, ptr: NonNull<u8>, size: usize) -> Result<()> {
if size == 0 {
return Ok(());
}
if self.config.zero_on_free && self.config.enable_simd_optimization {
let slice = unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr(), size) };
fast_fill(slice, 0);
}
self.deallocate(ptr, size)
}
pub fn allocate_bulk_simd(&self, sizes: &[usize]) -> Result<Vec<NonNull<u8>>> {
const PREFETCH_DISTANCE: usize = 8;
let mut results = Vec::with_capacity(sizes.len());
for (i, &size) in sizes.iter().enumerate() {
if i + PREFETCH_DISTANCE < sizes.len() && self.config.enable_simd_optimization {
let future_size = sizes[i + PREFETCH_DISTANCE];
let aligned_future_size = self.align_size(future_size);
if aligned_future_size <= FAST_BIN_THRESHOLD {
if let Ok(bin_idx) = self.size_to_bin_index(aligned_future_size) {
#[cfg(target_arch = "x86_64")]
{
fast_prefetch(&self.fast_bins[bin_idx], PrefetchHint::T0);
}
}
}
}
results.push(self.allocate(size)?);
}
Ok(results)
}
pub fn stats(&self) -> Option<Arc<LockFreePoolStats>> {
self.stats.clone()
}
fn allocate_from_fast_bin(&self, size: usize) -> Result<NonNull<u8>> {
let bin_index = self.size_to_bin_index(size)?;
let bin = &self.fast_bins[bin_index];
for retry in 0..self.config.max_cas_retries {
let packed = bin.head.load(Ordering::Acquire);
let (current_offset, current_gen) = Self::unpack_head(packed);
if current_offset == LIST_TAIL {
return self.allocate_new_block(size);
}
let next_offset = unsafe {
let current_ptr = self.offset_to_ptr(current_offset)?;
*(current_ptr.as_ptr() as *const u32)
};
let next_packed = Self::pack_head(next_offset, current_gen.wrapping_add(1));
match bin.head.compare_exchange_weak(
packed, next_packed, Ordering::AcqRel,
Ordering::Acquire,
) {
Ok(_) => {
bin.count.fetch_sub(1, Ordering::Release);
if let Some(stats) = &self.stats {
stats.fast_allocs.fetch_add(1, Ordering::Relaxed);
stats.cas_successes.fetch_add(1, Ordering::Relaxed);
}
return self.offset_to_ptr(current_offset);
}
Err(_) => {
if let Some(stats) = &self.stats {
stats.cas_failures.fetch_add(1, Ordering::Relaxed);
}
self.backoff(retry);
}
}
}
self.allocate_new_block(size)
}
fn deallocate_to_fast_bin(&self, ptr: NonNull<u8>, size: usize) -> Result<()> {
let bin_index = self.size_to_bin_index(size)?;
let bin = &self.fast_bins[bin_index];
let offset = self.ptr_to_offset(ptr)?;
for retry in 0..self.config.max_cas_retries {
let packed = bin.head.load(Ordering::Acquire);
let (current_offset, current_gen) = Self::unpack_head(packed);
unsafe {
*(ptr.as_ptr() as *mut u32) = current_offset;
}
let new_packed = Self::pack_head(offset, current_gen.wrapping_add(1));
match bin.head.compare_exchange_weak(
packed, new_packed, Ordering::Release,
Ordering::Relaxed,
) {
Ok(_) => {
bin.count.fetch_add(1, Ordering::Relaxed);
if let Some(stats) = &self.stats {
stats.fast_deallocs.fetch_add(1, Ordering::Relaxed);
stats.cas_successes.fetch_add(1, Ordering::Relaxed);
}
return Ok(());
}
Err(_) => {
if let Some(stats) = &self.stats {
stats.cas_failures.fetch_add(1, Ordering::Relaxed);
}
self.backoff(retry);
}
}
}
Err(ZiporaError::invalid_data("Failed to deallocate after max retries"))
}
fn allocate_from_skip_list(&self, size: usize) -> Result<NonNull<u8>> {
self.allocate_new_block(size)
}
fn deallocate_to_skip_list(&self, _ptr: NonNull<u8>, _size: usize) -> Result<()> {
if let Some(stats) = &self.stats {
stats.skip_deallocs.fetch_add(1, Ordering::Relaxed);
}
Ok(())
}
fn allocate_new_block(&self, size: usize) -> Result<NonNull<u8>> {
let aligned_size = self.align_size(size);
let offset = self.next_offset.fetch_add(aligned_size as u32, Ordering::Relaxed);
if offset as usize + aligned_size > self.config.memory_size {
return Err(ZiporaError::out_of_memory(aligned_size));
}
let ptr = self.offset_to_ptr(offset)?;
if let Some(ref cache_allocator) = self.cache_allocator {
if self.config.enable_cache_alignment {
if let Some(stats) = &self.stats {
stats.cache_aligned_allocs.fetch_add(1, Ordering::Relaxed);
}
#[cfg(target_arch = "x86_64")]
unsafe {
std::arch::x86_64::_mm_prefetch(ptr.as_ptr() as *const i8, std::arch::x86_64::_MM_HINT_T0);
}
}
}
if let Some(stats) = &self.stats {
stats.memory_usage.fetch_add(aligned_size as u64, Ordering::Relaxed);
}
Ok(ptr)
}
fn size_to_bin_index(&self, size: usize) -> Result<usize> {
for (index, &bin_size) in FAST_BIN_SIZES.iter().enumerate() {
if size <= bin_size {
return Ok(index);
}
}
Err(ZiporaError::invalid_data("Size too large for fast bins"))
}
fn align_size(&self, size: usize) -> usize {
(size + ALIGN_SIZE - 1) & !(ALIGN_SIZE - 1)
}
fn offset_to_ptr(&self, offset: u32) -> Result<NonNull<u8>> {
if offset == LIST_TAIL {
return Err(ZiporaError::invalid_data("Invalid offset"));
}
let addr = unsafe { self.memory.as_ptr().add(offset as usize) };
NonNull::new(addr).ok_or_else(|| ZiporaError::invalid_data("Invalid pointer"))
}
fn ptr_to_offset(&self, ptr: NonNull<u8>) -> Result<u32> {
let base = self.memory.as_ptr() as usize;
let addr = ptr.as_ptr() as usize;
if addr < base || addr >= base + self.config.memory_size {
return Err(ZiporaError::invalid_data("Pointer outside pool memory"));
}
Ok((addr - base) as u32)
}
fn backoff(&self, retry_count: u32) {
match self.config.backoff_strategy {
BackoffStrategy::None => {},
BackoffStrategy::Linear => {
thread::sleep(Duration::from_micros(retry_count as u64));
},
BackoffStrategy::Exponential { max_delay_us } => {
let delay = std::cmp::min(1u64 << retry_count, max_delay_us);
thread::sleep(Duration::from_micros(delay));
},
}
}
#[inline]
fn find_free_slot_simd(&self, bin: &LockFreeHead) -> Option<u32> {
#[cfg(target_arch = "x86_64")]
{
use crate::system::cpu_features::get_cpu_features;
if self.config.enable_simd_optimization && get_cpu_features().has_avx2 {
return self.find_free_slot_avx2_readonly(bin);
}
}
self.find_free_slot_scalar(bin)
}
#[inline]
fn find_free_slot_scalar(&self, bin: &LockFreeHead) -> Option<u32> {
let packed = bin.head.load(Ordering::Acquire);
let (offset, _gen) = Self::unpack_head(packed);
if offset == LIST_TAIL {
None
} else {
Some(offset)
}
}
#[cfg(target_arch = "x86_64")]
#[inline]
fn find_free_slot_avx2_readonly(&self, bin: &LockFreeHead) -> Option<u32> {
self.find_free_slot_scalar(bin)
}
#[inline]
fn count_free_blocks_simd(&self, bitmap: &[u64]) -> usize {
if !self.config.enable_simd_optimization {
return bitmap.iter().map(|&bits| bits.count_ones() as usize).sum();
}
#[cfg(target_arch = "x86_64")]
{
use crate::system::cpu_features::get_cpu_features;
if get_cpu_features().has_popcnt {
return self.count_free_blocks_popcnt(bitmap);
}
}
bitmap.iter().map(|&bits| bits.count_ones() as usize).sum()
}
#[cfg(target_arch = "x86_64")]
#[inline]
fn count_free_blocks_popcnt(&self, bitmap: &[u64]) -> usize {
use std::arch::x86_64::_popcnt64;
let mut count = 0;
for &bits in bitmap {
count += unsafe { _popcnt64(bits as i64) } as usize;
}
count
}
#[inline]
fn find_large_block_with_prefetch(&self, size: usize) -> Option<*mut u8> {
if !self.config.enable_simd_optimization {
return None;
}
const PREFETCH_DISTANCE: usize = 2;
#[cfg(target_arch = "x86_64")]
{
}
None
}
}
impl Drop for LockFreeMemoryPool {
fn drop(&mut self) {
unsafe {
dealloc(self.memory.as_ptr(), self.memory_layout);
}
}
}
pub struct LockFreeAllocation {
ptr: NonNull<u8>,
size: usize,
pool: Arc<LockFreeMemoryPool>,
}
impl LockFreeAllocation {
pub fn new(ptr: NonNull<u8>, size: usize, pool: Arc<LockFreeMemoryPool>) -> Self {
Self { ptr, size, pool }
}
#[inline]
pub fn as_ptr(&self) -> *mut u8 {
self.ptr.as_ptr()
}
#[inline]
pub fn size(&self) -> usize {
self.size
}
#[inline]
pub fn as_mut_slice(&mut self) -> &mut [u8] {
unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.size) }
}
#[inline]
pub fn as_slice(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size) }
}
}
impl Drop for LockFreeAllocation {
fn drop(&mut self) {
if let Err(e) = self.pool.deallocate(self.ptr, self.size) {
log::error!("Failed to deallocate lock-free memory: {}", e);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use std::thread;
#[test]
fn test_lockfree_pool_creation() {
let config = LockFreePoolConfig::default();
let pool = LockFreeMemoryPool::new(config).unwrap();
assert!(pool.stats.is_some());
}
#[test]
fn test_basic_allocation_deallocation() {
let config = LockFreePoolConfig::default();
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let ptr = pool.allocate(64).unwrap();
assert!(!ptr.as_ptr().is_null());
pool.deallocate(ptr, 64).unwrap();
}
#[test]
fn test_fast_bin_allocation() {
let config = LockFreePoolConfig::default();
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let mut ptrs = Vec::new();
for i in 0..10 {
let size = (i + 1) * 64;
let ptr = pool.allocate(size).unwrap();
ptrs.push((ptr, size));
}
for (ptr, size) in ptrs {
pool.deallocate(ptr, size).unwrap();
}
}
#[test]
#[ignore] fn test_concurrent_allocation() {
let config = LockFreePoolConfig::high_performance();
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let mut handles = Vec::new();
for thread_id in 0..2 { let pool_clone = Arc::clone(&pool);
let handle = thread::spawn(move || {
let mut allocations = Vec::new();
for i in 0..10 { let size = (thread_id + 1) * 32 + i;
if let Ok(ptr) = pool_clone.allocate(size) {
allocations.push((ptr, size));
}
}
for (ptr, size) in allocations {
let _ = pool_clone.deallocate(ptr, size);
}
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
if let Some(stats) = pool.stats() {
let contention = stats.contention_ratio();
println!("CAS contention ratio: {:.2}%", contention * 100.0);
assert!(contention < 0.5); }
}
#[test]
fn test_raii_allocation() {
let config = LockFreePoolConfig::default();
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
{
let ptr = pool.allocate(128).unwrap();
let _alloc = LockFreeAllocation::new(ptr, 128, Arc::clone(&pool));
}
if let Some(stats) = pool.stats() {
assert!(stats.fast_deallocs.load(Ordering::Relaxed) > 0);
}
}
#[test]
fn test_size_alignment() {
let config = LockFreePoolConfig::default();
let pool = LockFreeMemoryPool::new(config).unwrap();
assert_eq!(pool.align_size(1), 8);
assert_eq!(pool.align_size(8), 8);
assert_eq!(pool.align_size(9), 16);
assert_eq!(pool.align_size(15), 16);
assert_eq!(pool.align_size(16), 16);
}
#[test]
#[ignore] fn test_pool_exhaustion() {
use std::time::{Duration, Instant};
let config = LockFreePoolConfig {
memory_size: 1024, max_cas_retries: 3, backoff_strategy: BackoffStrategy::None, enable_cache_alignment: false, cache_config: None, enable_numa_awareness: false, enable_huge_pages: false, enable_stats: false, enable_simd_optimization: false,
zero_on_free: false,
..LockFreePoolConfig::default()
};
let pool = LockFreeMemoryPool::new(config).unwrap();
let mut allocations = Vec::new();
let start = Instant::now();
let timeout = Duration::from_secs(5);
loop {
if start.elapsed() > timeout {
panic!("Test timed out after 5 seconds with {} allocations", allocations.len());
}
match pool.allocate(64) {
Ok(ptr) => {
allocations.push(ptr);
if allocations.len() > 20 {
panic!("Too many allocations: {} - possible infinite loop", allocations.len());
}
}
Err(_) => break, }
}
assert!(allocations.len() > 0, "Should have allocated at least one block");
assert!(allocations.len() < 20, "Should be limited by small pool size, got {}", allocations.len());
println!("Pool exhaustion test completed in {:?} with {} allocations", start.elapsed(), allocations.len());
}
#[test]
fn test_simd_free_block_scanning() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
..LockFreePoolConfig::default()
};
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let ptrs: Vec<_> = (0..10)
.map(|_| pool.allocate(128).unwrap())
.collect();
for ptr in &ptrs[0..5] {
pool.deallocate(*ptr, 128).unwrap();
}
let new_ptr = pool.allocate(128).unwrap();
assert!(!new_ptr.as_ptr().is_null());
pool.deallocate(new_ptr, 128).unwrap();
for ptr in &ptrs[5..10] {
pool.deallocate(*ptr, 128).unwrap();
}
}
#[test]
fn test_popcnt_bitmap_operations() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
..LockFreePoolConfig::default()
};
let pool = LockFreeMemoryPool::new(config).unwrap();
let bitmap = vec![0xFFFFFFFFFFFFFFFF_u64, 0x0000000000000000_u64];
let count = pool.count_free_blocks_simd(&bitmap);
assert_eq!(count, 64); }
#[test]
fn test_bulk_allocation_simd() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
..LockFreePoolConfig::default()
};
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let sizes = vec![64, 128, 256, 512, 1024];
let ptrs = pool.allocate_bulk_simd(&sizes).unwrap();
assert_eq!(ptrs.len(), sizes.len());
assert!(ptrs.iter().all(|&ptr| !ptr.as_ptr().is_null()));
for (ptr, size) in ptrs.iter().zip(&sizes) {
pool.deallocate(*ptr, *size).unwrap();
}
}
#[test]
fn test_concurrent_simd_operations() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
..LockFreePoolConfig::default()
};
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let mut handles = vec![];
for _ in 0..4 {
let pool_clone = Arc::clone(&pool);
handles.push(thread::spawn(move || {
let sizes = vec![64, 128, 256];
let ptrs = pool_clone.allocate_bulk_simd(&sizes).unwrap();
assert!(ptrs.iter().all(|&ptr| !ptr.as_ptr().is_null()));
for (ptr, size) in ptrs.iter().zip(&sizes) {
pool_clone.deallocate(*ptr, *size).unwrap();
}
}));
}
for handle in handles {
handle.join().unwrap();
}
}
#[test]
fn test_simd_zeroing_on_free() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
zero_on_free: true,
..LockFreePoolConfig::default()
};
let pool = LockFreeMemoryPool::new(config).unwrap();
let ptr = pool.allocate(256).unwrap();
unsafe {
std::ptr::write_bytes(ptr.as_ptr(), 0xFF, 256);
}
pool.deallocate_with_zero(ptr, 256).unwrap();
let new_ptr = pool.allocate(256).unwrap();
assert!(!new_ptr.as_ptr().is_null());
pool.deallocate(new_ptr, 256).unwrap();
}
#[test]
fn test_simd_optimization_disabled() {
let config = LockFreePoolConfig {
enable_simd_optimization: false,
..LockFreePoolConfig::default()
};
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let sizes = vec![64, 128, 256];
let ptrs = pool.allocate_bulk_simd(&sizes).unwrap();
assert_eq!(ptrs.len(), sizes.len());
assert!(ptrs.iter().all(|&ptr| !ptr.as_ptr().is_null()));
for (ptr, size) in ptrs.iter().zip(&sizes) {
pool.deallocate(*ptr, *size).unwrap();
}
}
#[test]
fn test_bulk_allocation_with_prefetch() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
enable_cache_alignment: true,
..LockFreePoolConfig::default()
};
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let sizes: Vec<usize> = (0..20).map(|i| 64 + i * 32).collect();
let ptrs = pool.allocate_bulk_simd(&sizes).unwrap();
assert_eq!(ptrs.len(), sizes.len());
assert!(ptrs.iter().all(|&ptr| !ptr.as_ptr().is_null()));
for (ptr, size) in ptrs.iter().zip(&sizes) {
pool.deallocate(*ptr, *size).unwrap();
}
}
#[test]
fn test_simd_zeroing_performance() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
zero_on_free: true,
enable_stats: true,
..LockFreePoolConfig::default()
};
let pool = LockFreeMemoryPool::new(config).unwrap();
let sizes = vec![64, 256, 1024, 4096];
for size in sizes {
let ptr = pool.allocate(size).unwrap();
unsafe {
std::ptr::write_bytes(ptr.as_ptr(), 0xAA, size);
}
pool.deallocate_with_zero(ptr, size).unwrap();
}
if let Some(stats) = pool.stats() {
assert!(stats.fast_deallocs.load(Ordering::Relaxed) > 0);
}
}
#[test]
fn test_bitmap_counting_accuracy() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
..LockFreePoolConfig::default()
};
let pool = LockFreeMemoryPool::new(config).unwrap();
let test_cases = vec![
(vec![0xFFFFFFFFFFFFFFFF_u64], 64),
(vec![0x0000000000000000_u64], 0),
(vec![0xAAAAAAAAAAAAAAAA_u64], 32),
(vec![0x5555555555555555_u64], 32),
(vec![0xFFFFFFFFFFFFFFFF_u64, 0xFFFFFFFFFFFFFFFF_u64], 128),
(vec![0x0000000000000001_u64], 1),
];
for (bitmap, expected_count) in test_cases {
let count = pool.count_free_blocks_simd(&bitmap);
assert_eq!(count, expected_count, "Failed for bitmap: {:?}", bitmap);
}
}
#[test]
fn test_concurrent_bulk_allocations() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
..LockFreePoolConfig::default()
};
let pool = Arc::new(LockFreeMemoryPool::new(config).unwrap());
let mut handles = vec![];
for thread_id in 0..4 {
let pool_clone = Arc::clone(&pool);
handles.push(thread::spawn(move || {
let sizes: Vec<usize> = (0..10).map(|i| 64 + (thread_id + i) * 16).collect();
let ptrs = pool_clone.allocate_bulk_simd(&sizes).unwrap();
assert_eq!(ptrs.len(), sizes.len());
for (ptr, size) in ptrs.iter().zip(&sizes) {
pool_clone.deallocate(*ptr, *size).unwrap();
}
}));
}
for handle in handles {
handle.join().unwrap();
}
}
#[test]
fn test_zero_on_free_with_reuse() {
let config = LockFreePoolConfig {
enable_simd_optimization: true,
zero_on_free: true,
..LockFreePoolConfig::default()
};
let pool = LockFreeMemoryPool::new(config).unwrap();
let ptr1 = pool.allocate(128).unwrap();
unsafe {
std::ptr::write_bytes(ptr1.as_ptr(), 0xFF, 128);
}
pool.deallocate_with_zero(ptr1, 128).unwrap();
let ptr2 = pool.allocate(128).unwrap();
assert!(!ptr2.as_ptr().is_null());
pool.deallocate(ptr2, 128).unwrap();
}
}