memkit 0.1.1-beta.1

Deterministic, intent-driven memory allocation for systems requiring predictable performance
Documentation
//! Turbo Arena - Lock-free thread-local arena with zero overhead.
//!
//! This combines the speed of MkFastArena with automatic thread-local storage,
//! eliminating RefCell overhead by using raw pointers and careful lifetime management.

use std::alloc::{alloc, dealloc, Layout};
use std::cell::UnsafeCell;
use std::ptr::NonNull;

use super::hints::{likely, unlikely};
use super::simd;

/// Thread-local turbo arena storage.
struct TurboArenaInner {
    /// Start of the arena memory.
    base: NonNull<u8>,
    /// End of the arena memory.
    end: *const u8,
    /// Current allocation pointer.
    ptr: *mut u8,
    /// Arena size for deallocation.
    size: usize,
}

impl TurboArenaInner {
    fn new(size: usize) -> Self {
        let layout = Layout::from_size_align(size, 4096).expect("Invalid arena size");
        let base = unsafe {
            let ptr = alloc(layout);
            NonNull::new(ptr).expect("Failed to allocate arena")
        };
        let end = unsafe { base.as_ptr().add(size) };
        
        Self {
            base,
            end,
            ptr: base.as_ptr(),
            size,
        }
    }
    
    #[inline(always)]
    fn alloc_raw(&mut self, layout: Layout) -> *mut u8 {
        let align = layout.align();
        let size = layout.size();
        
        // Align up
        let aligned = ((self.ptr as usize + align - 1) & !(align - 1)) as *mut u8;
        let new_ptr = unsafe { aligned.add(size) };
        
        // Bounds check with likely hint (usually succeeds)
        if likely(new_ptr <= self.end as *mut u8) {
            self.ptr = new_ptr;
            aligned
        } else {
            std::ptr::null_mut()
        }
    }
    
    #[inline(always)]
    fn reset(&mut self) {
        self.ptr = self.base.as_ptr();
    }
}

impl Drop for TurboArenaInner {
    fn drop(&mut self) {
        unsafe {
            let layout = Layout::from_size_align_unchecked(self.size, 4096);
            dealloc(self.base.as_ptr(), layout);
        }
    }
}

/// A turbo-charged thread-local arena.
///
/// This provides the fastest possible allocation by:
/// - Using raw pointers instead of RefCell
/// - Branch prediction hints on hot paths
/// - SIMD-accelerated slice initialization
///
/// # Safety
///
/// This is safe because:
/// - Each thread gets its own arena via thread_local!
/// - UnsafeCell provides interior mutability
/// - No references escape the allocation functions
///
/// # Example
///
/// ```rust,ignore
/// use memkit::MkTurboArena;
///
/// MkTurboArena::with(|arena| {
///     let x = arena.alloc(42u64).unwrap();
///     let slice = arena.alloc_slice_zero::<f32>(1000).unwrap();
///     arena.reset();
/// });
/// ```
pub struct MkTurboArena {
    inner: UnsafeCell<TurboArenaInner>,
}

// Safe because we only access through thread_local!
unsafe impl Sync for MkTurboArena {}

impl MkTurboArena {
    /// Create a new turbo arena with the given size.
    pub fn new(size: usize) -> Self {
        Self {
            inner: UnsafeCell::new(TurboArenaInner::new(size)),
        }
    }
    
    /// Access the thread-local turbo arena.
    ///
    /// The arena is lazily initialized on first access with 4MB capacity.
    #[inline(always)]
    pub fn with<F, R>(f: F) -> R
    where
        F: FnOnce(&MkTurboArena) -> R,
    {
        thread_local! {
            static ARENA: MkTurboArena = MkTurboArena::new(4 * 1024 * 1024);
        }
        ARENA.with(f)
    }
    
    /// Access with a custom-sized arena.
    #[inline]
    pub fn with_size<F, R>(size: usize, f: F) -> R
    where
        F: FnOnce(&MkTurboArena) -> R,
    {
        let arena = MkTurboArena::new(size);
        f(&arena)
    }
    
    /// Allocate and initialize a value.
    ///
    /// # Safety
    ///
    /// The caller must ensure that the returned reference does not outlive the arena.
    #[allow(clippy::mut_from_ref)]
    #[inline(always)]
    pub unsafe fn alloc<T>(&self, value: T) -> Option<&mut T> {
        let ptr = self.alloc_raw::<T>()?;
        unsafe {
            ptr.write(value);
            Some(&mut *ptr)
        }
    }
    
    /// Allocate memory without initialization.
    #[inline(always)]
    pub fn alloc_raw<T>(&self) -> Option<*mut T> {
        let layout = Layout::new::<T>();
        let ptr = unsafe { (*self.inner.get()).alloc_raw(layout) };
        if likely(!ptr.is_null()) {
            Some(ptr as *mut T)
        } else {
            None
        }
    }
    
    /// Allocate a slice and fill with zeros using SIMD.
    ///
    /// # Safety
    ///
    /// The caller must ensure that the returned slice does not outlive the arena.
    #[allow(clippy::mut_from_ref)]
    #[inline(always)]
    pub unsafe fn alloc_slice_zero<T>(&self, len: usize) -> Option<&mut [T]> {
        if unlikely(len == 0) {
            return Some(&mut []);
        }
        
        let layout = Layout::array::<T>(len).ok()?;
        let ptr = unsafe { (*self.inner.get()).alloc_raw(layout) };
        
        if likely(!ptr.is_null()) {
            // SIMD zero fill
            simd::fill_zero(ptr, layout.size());
            Some(unsafe { std::slice::from_raw_parts_mut(ptr as *mut T, len) })
        } else {
            None
        }
    }
    
    /// Allocate a slice and fill with a value.
    ///
    /// # Safety
    ///
    /// The caller must ensure that the returned slice does not outlive the arena.
    #[allow(clippy::mut_from_ref)]
    #[inline(always)]
    pub unsafe fn alloc_slice_fill<T: Copy>(&self, len: usize, value: T) -> Option<&mut [T]> {
        if unlikely(len == 0) {
            return Some(&mut []);
        }
        
        let layout = Layout::array::<T>(len).ok()?;
        let ptr = unsafe { (*self.inner.get()).alloc_raw(layout) };
        
        if likely(!ptr.is_null()) {
            let typed_ptr = ptr as *mut T;
            
            // Use SIMD for common types
            let size = std::mem::size_of::<T>();
            match size {
                4 => {
                    let bits = unsafe { std::mem::transmute_copy::<T, u32>(&value) };
                    simd::fill_u32(typed_ptr as *mut u32, bits, len);
                }
                8 => {
                    let bits = unsafe { std::mem::transmute_copy::<T, u64>(&value) };
                    simd::fill_u64(typed_ptr as *mut u64, bits, len);
                }
                _ => {
                    // Scalar fallback
                    for i in 0..len {
                        unsafe { typed_ptr.add(i).write(value) };
                    }
                }
            }
            
            Some(unsafe { std::slice::from_raw_parts_mut(typed_ptr, len) })
        } else {
            None
        }
    }
    
    /// Allocate a slice of f32 and fill with a value using SIMD.
    ///
    /// # Safety
    ///
    /// The caller must ensure that the returned slice does not outlive the arena.
    #[allow(clippy::mut_from_ref)]
    #[inline(always)]
    pub unsafe fn alloc_slice_f32(&self, len: usize, value: f32) -> Option<&mut [f32]> {
        if unlikely(len == 0) {
            return Some(&mut []);
        }
        
        let layout = Layout::array::<f32>(len).ok()?;
        let ptr = unsafe { (*self.inner.get()).alloc_raw(layout) };
        
        if likely(!ptr.is_null()) {
            simd::fill_f32(ptr as *mut f32, value, len);
            Some(unsafe { std::slice::from_raw_parts_mut(ptr as *mut f32, len) })
        } else {
            None
        }
    }
    
    /// Allocate a slice of f64 and fill with a value using SIMD.
    ///
    /// # Safety
    ///
    /// The caller must ensure that the returned slice does not outlive the arena.
    #[allow(clippy::mut_from_ref)]
    #[inline(always)]
    pub unsafe fn alloc_slice_f64(&self, len: usize, value: f64) -> Option<&mut [f64]> {
        if unlikely(len == 0) {
            return Some(&mut []);
        }
        
        let layout = Layout::array::<f64>(len).ok()?;
        let ptr = unsafe { (*self.inner.get()).alloc_raw(layout) };
        
        if likely(!ptr.is_null()) {
            simd::fill_f64(ptr as *mut f64, value, len);
            Some(unsafe { std::slice::from_raw_parts_mut(ptr as *mut f64, len) })
        } else {
            None
        }
    }
    
    /// Reset the arena, invalidating all allocations.
    #[inline(always)]
    pub fn reset(&self) {
        unsafe { (*self.inner.get()).reset() };
    }
    
    /// Get number of bytes allocated.
    #[inline]
    pub fn allocated(&self) -> usize {
        unsafe {
            let inner = &*self.inner.get();
            inner.ptr as usize - inner.base.as_ptr() as usize
        }
    }
    
    /// Get number of bytes remaining.
    #[inline]
    pub fn remaining(&self) -> usize {
        unsafe {
            let inner = &*self.inner.get();
            inner.end as usize - inner.ptr as usize
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_turbo_alloc() {
        MkTurboArena::with(|arena| {
            unsafe {
                let x = arena.alloc(42u64).unwrap();
                assert_eq!(*x, 42);
                
                let y = arena.alloc(123u32).unwrap();
                assert_eq!(*y, 123);
            }
            
            arena.reset();
        });
    }

    #[test]
    fn test_turbo_slice_zero() {
        MkTurboArena::with(|arena| {
            unsafe {
                let slice = arena.alloc_slice_zero::<u64>(100).unwrap();
                assert_eq!(slice.len(), 100);
                assert!(slice.iter().all(|&v| v == 0));
            }
            arena.reset();
        });
    }

    #[test]
    fn test_turbo_slice_fill() {
        MkTurboArena::with(|arena| {
            unsafe {
                let slice = arena.alloc_slice_fill(100, 42u64).unwrap();
                assert_eq!(slice.len(), 100);
                assert!(slice.iter().all(|&v| v == 42));
            }
            arena.reset();
        });
    }

    #[test]
    fn test_turbo_slice_f32() {
        MkTurboArena::with(|arena| {
            unsafe {
                let slice = arena.alloc_slice_f32(100, 3.14).unwrap();
                assert_eq!(slice.len(), 100);
                assert!(slice.iter().all(|&v| (v - 3.14).abs() < f32::EPSILON));
            }
            arena.reset();
        });
    }
}