use core::sync::atomic::{AtomicU64, AtomicUsize, AtomicBool};
use core::sync::atomic::Ordering::{Acquire, Relaxed, Release};
use core::cell::UnsafeCell;
use core::alloc::{GlobalAlloc, Layout};
use core::ptr::{copy_nonoverlapping, null_mut};
use plat::p::sys_alloc;
#[cfg(target_os = "windows")]
use plat::p::sys_commit;
pub struct Smalloc {
inner: UnsafeCell<SmallocInner>,
}
impl Smalloc {
pub const fn new() -> Self { Self {
inner: UnsafeCell::new(SmallocInner {
smbp: AtomicUsize::new(0),
initlock: AtomicBool::new(false),
}),
} }
}
unsafe impl GlobalAlloc for Smalloc {
#[inline(always)]
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let reqsiz = layout.size();
let reqalign = layout.align();
debug_assert!(reqsiz > 0);
debug_assert!(reqalign > 0);
debug_assert!(reqalign.is_power_of_two());
let sc = reqali_to_sc(reqsiz, reqalign);
if sc >= NUM_SCS {
null_mut()
} else {
let inner = self.inner();
inner.idempotent_init();
inner.alloc(sc, false)
}
}
#[inline(always)]
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
debug_assert!(layout.align().is_power_of_two());
self.inner().dealloc(ptr.addr());
}
#[inline(always)]
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
let reqsiz = layout.size();
let reqalign = layout.align();
debug_assert!(reqsiz > 0);
debug_assert!(reqalign > 0);
debug_assert!(reqalign.is_power_of_two());
let sc = reqali_to_sc(reqsiz, reqalign);
if sc >= NUM_SCS {
null_mut()
} else {
let inner = self.inner();
inner.idempotent_init();
inner.alloc(sc, true)
}
}
#[inline(always)]
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, reqsize: usize) -> *mut u8 {
let inner = self.inner();
let p_addr = ptr.addr();
debug_assert!(inner.is_smalloc_ptr(p_addr));
let oldsize = layout.size();
debug_assert!(oldsize > 0);
let oldalignment = layout.align();
debug_assert!(oldalignment > 0);
debug_assert!(oldalignment.is_power_of_two());
debug_assert!(reqsize > 0);
debug_assert!(reqali_to_sc(oldsize, oldalignment) >= NUM_UNUSED_SCS);
debug_assert!(reqali_to_sc(oldsize, oldalignment) < NUM_SCS);
let oldsc = ptr_to_sc(p_addr);
debug_assert!(oldsc >= NUM_UNUSED_SCS);
debug_assert!(oldsc < NUM_SCS);
debug_assert!(oldsc >= reqali_to_sc(oldsize, oldalignment));
let reqsc = reqali_to_sc(reqsize, oldalignment);
debug_assert!(reqsc >= NUM_UNUSED_SCS);
if reqsc <= oldsc {
return ptr;
}
if reqsc >= NUM_SCS {
null_mut()
} else {
let reqsc = if (plat::p::SC_FOR_PAGE..GROWERS_SC).contains(&reqsc) { GROWERS_SC } else { reqsc };
let newp = inner.alloc(reqsc, false);
if newp.is_null() {
return newp;
}
unsafe { copy_nonoverlapping(ptr, newp, oldsize); }
inner.dealloc(p_addr);
newp
}
}
}
macro_rules! gen_mask { ($bits:expr, $ty:ty) => { ((!0 as $ty) >> (<$ty>::BITS - ($bits) as u32)) }; }
#[doc(hidden)]
pub mod i {
use crate::*;
pub mod plat;
pub struct SmallocInner {
pub smbp: AtomicUsize,
pub initlock: AtomicBool
}
impl SmallocInner {
#[inline(always)]
pub fn is_smalloc_ptr(&self, p_addr: usize) -> bool {
let smbp = self.smbp.load(Relaxed);
p_addr >= smbp + LOWEST_SMALLOC_SLOT_ADDR && p_addr <= smbp + HIGHEST_SMALLOC_SLOT_ADDR &&
ptr_to_sc(p_addr) >= NUM_UNUSED_SCS &&
p_addr.trailing_zeros() >= ptr_to_sc(p_addr) as u32 &&
ptr_to_slotnum(p_addr) != sc_to_sentinel_slotnum(ptr_to_sc(p_addr)) &&
p_addr & ADDR_NEXT_TOUCHED_BIT == 0
}
#[inline(always)]
pub fn idempotent_init(&self) {
let smbpval = self.smbp.load(Relaxed);
if smbpval == 0 {
loop {
if self.initlock.compare_exchange_weak(false, true, Acquire, Relaxed).is_ok() {
break;
}
}
let smbpval = self.smbp.load(Relaxed);
if smbpval == 0 {
let sysbp = sys_alloc(TOTAL_VIRTUAL_MEMORY).unwrap().addr();
assert!(sysbp != 0);
let smbp = sysbp.next_multiple_of(BASEPTR_ALIGN);
#[cfg(any(target_os = "windows", doc))]
{
let size_of_flh_area = (1usize << FLHWORD_SIZE_BITS) * (1usize << NUM_SLABS_BITS) * NUM_SCS as usize;
sys_commit(smbp as *mut u8, size_of_flh_area).unwrap();
}
self.smbp.store(smbp, Release);
}
self.initlock.store(false, Release);
}
}
#[inline(always)]
pub fn alloc(&self, orig_sc: u8, zeromem: bool) -> *mut u8 {
debug_assert!(orig_sc >= NUM_UNUSED_SCS);
debug_assert!(orig_sc < NUM_SCS);
let orig_slabnum = get_slabnum();
let mut slabnum = orig_slabnum;
let mut a_slab_was_full = false;
let mut sc = orig_sc;
let smbp = self.smbp.load(Acquire);
loop {
let slabnum_and_sc = (slabnum as usize) << NUM_SC_BITS | sc as usize;
let flhptr = smbp | slabnum_and_sc << FLHWORD_SIZE_BITS;
let flh = unsafe { AtomicU64::from_ptr(flhptr as *mut u64) };
let flhword = flh.load(Acquire);
let curfirstentry = flhword as u32;
let curfirstentryslotnum = curfirstentry & ENTRY_SLOTNUM_MASK;
let curfirstentrynexttouchedbit = curfirstentry & ENTRY_NEXT_TOUCHED_BIT;
let sentinel_slotnum = sc_to_sentinel_slotnum(sc);
debug_assert!(curfirstentryslotnum <= sentinel_slotnum);
debug_assert!(if curfirstentryslotnum == sentinel_slotnum { curfirstentrynexttouchedbit == 0 } else { true });
if curfirstentry != sentinel_slotnum {
let curfirstentry_p = smbp | (slabnum_and_sc << NUM_SN_D_T_BITS) | (curfirstentryslotnum as usize) << sc;
debug_assert!(self.is_smalloc_ptr(curfirstentry_p), "curfirstentry_p: {curfirstentry_p}/{curfirstentry_p:b}");
let next_entry = if curfirstentrynexttouchedbit != 0 {
unsafe { *(curfirstentry_p as *mut u32) }
} else {
#[cfg(any(target_os = "windows", doc))]
{
let cbits = std::cmp::max(plat::p::SC_FOR_PAGE, sc);
if curfirstentry_p.trailing_zeros() >= cbits as u32 {
sys_commit(curfirstentry_p as *mut u8, 1 << cbits).unwrap();
}
}
curfirstentryslotnum + 1
};
let newflhword = (flhword & FLHWORD_PUSH_COUNTER_MASK) | next_entry as u64;
if flh.compare_exchange_weak(flhword, newflhword, Acquire, Relaxed).is_ok() {
debug_assert!(next_entry & ENTRY_SLOTNUM_MASK != curfirstentryslotnum);
debug_assert!(next_entry & ENTRY_SLOTNUM_MASK <= sentinel_slotnum);
if zeromem && curfirstentrynexttouchedbit != 0 {
unsafe { core::ptr::write_bytes(curfirstentry_p as *mut u8, 0, 1 << sc) };
}
if slabnum != orig_slabnum {
set_slab_num(slabnum);
}
break curfirstentry_p as *mut u8;
} else {
slabnum = failover_slabnum(slabnum);
}
} else {
slabnum = failover_slabnum(slabnum);
if slabnum != orig_slabnum {
a_slab_was_full = true;
} else {
if a_slab_was_full {
if sc == NUM_SCS - 1 {
eprintln!("smalloc exhausted");
break null_mut();
};
sc += 1;
}
}
}
}
}
#[inline(always)]
pub fn dealloc(&self, p_addr: usize) {
debug_assert!(self.is_smalloc_ptr(p_addr));
let sc = ptr_to_sc(p_addr);
debug_assert!(sc >= NUM_UNUSED_SCS);
debug_assert!(sc < NUM_SCS);
let flhptr = self.smbp.load(Relaxed) | ptr_to_flhaddr(p_addr);
let flh = unsafe { AtomicU64::from_ptr(flhptr as *mut u64) };
let newslotnum = ptr_to_slotnum(p_addr);
let sentinel_slotnum = sc_to_sentinel_slotnum(sc);
debug_assert!(newslotnum < sentinel_slotnum);
loop {
let flhword = flh.load(Relaxed);
let curfirstentry = flhword as u32;
let curfirstentryslotnum = curfirstentry & ENTRY_SLOTNUM_MASK;
debug_assert!(newslotnum != curfirstentryslotnum);
debug_assert!(curfirstentryslotnum <= sentinel_slotnum);
unsafe { *(p_addr as *mut u32) = curfirstentry };
let push_counter = (flhword & FLHWORD_PUSH_COUNTER_MASK).wrapping_add(FLHWORD_PUSH_COUNTER_INCR);
let newflhword = push_counter | ENTRY_NEXT_TOUCHED_BIT as u64 | newslotnum as u64;
if flh.compare_exchange_weak(flhword, newflhword, Release, Relaxed).is_ok() {
break;
}
}
}
}
impl Smalloc {
#[doc(hidden)]
#[inline(always)]
pub fn inner(&self) -> &SmallocInner {
unsafe { &*self.inner.get() }
}
}
pub const NUM_SC_BITS: u8 = 5;
pub const NUM_SLABS_BITS: u8 = 6;
pub const NUM_UNUSED_SCS: u8 = 2;
pub const GROWERS_SC: u8 = 22;
pub const NUM_SCS: u8 = 1 << NUM_SC_BITS;
pub const UNUSED_SC_MASK: usize = gen_mask!(NUM_UNUSED_SCS, usize);
pub const NUM_SN_D_T_BITS: u8 = NUM_UNUSED_SCS + NUM_SCS;
pub const SLABNUM_BITS_ALONE_MASK: u8 = gen_mask!(NUM_SLABS_BITS, u8);
pub const SLABNUM_ADDR_SHIFT_BITS: u8 = NUM_SN_D_T_BITS + NUM_SC_BITS;
pub const SLABNUM_BITS_ADDR_MASK: usize = (SLABNUM_BITS_ALONE_MASK as usize) << SLABNUM_ADDR_SHIFT_BITS;
pub const SC_BITS_ADDR_MASK: usize = gen_mask!(NUM_SC_BITS, usize) << NUM_SN_D_T_BITS;
pub const NUM_SLOTS_IN_HIGHEST_SC: u64 = 1 << (NUM_UNUSED_SCS + 1); pub const HIGHEST_SLOTNUM_IN_HIGHEST_SC: u64 = NUM_SLOTS_IN_HIGHEST_SC - 2;
pub const DATA_ADDR_BITS_IN_HIGHEST_SC: u8 = NUM_SCS - 1;
pub const LOWEST_SMALLOC_SLOT_ADDR: usize = (NUM_UNUSED_SCS as usize) << NUM_SN_D_T_BITS;
pub const HIGHEST_SMALLOC_SLOT_ADDR: usize = SLABNUM_BITS_ADDR_MASK | SC_BITS_ADDR_MASK | (HIGHEST_SLOTNUM_IN_HIGHEST_SC as usize) << DATA_ADDR_BITS_IN_HIGHEST_SC;
pub const TOTAL_VIRTUAL_MEMORY: usize = HIGHEST_SMALLOC_SLOT_BYTE_ADDR + BASEPTR_ALIGN - 1;
#[inline(always)]
pub fn ptr_to_sc(p_addr: usize) -> u8 {
((p_addr & SC_BITS_ADDR_MASK) >> NUM_SN_D_T_BITS) as u8
}
}
pub use i::*;
const SLABNUM_ALONE_MASK: u8 = gen_mask!(NUM_SLABS_BITS, u8);
const SLOTNUM_AND_DATA_ADDR_MASK: u64 = gen_mask!(NUM_SN_D_T_BITS, u64);
const FLHWORD_SIZE_BITS: u8 = 3;
const FLHWORD_PUSH_COUNTER_MASK: u64 = gen_mask!(32, u64) << 32;
const FLHWORD_PUSH_COUNTER_INCR: u64 = 1 << 32;
const NUM_SN_BITS: u8 = NUM_SCS - 1;
const ENTRY_NEXT_TOUCHED_BIT: u32 = 1 << NUM_SN_BITS;
const ADDR_NEXT_TOUCHED_BIT: usize = 1 << (NUM_SN_BITS + NUM_UNUSED_SCS);
const ENTRY_SLOTNUM_MASK: u32 = gen_mask!(NUM_SN_BITS, u32);
const HIGHEST_SMALLOC_SLOT_BYTE_ADDR: usize = HIGHEST_SMALLOC_SLOT_ADDR | gen_mask!(DATA_ADDR_BITS_IN_HIGHEST_SC, usize);
const BASEPTR_ALIGN: usize = (HIGHEST_SMALLOC_SLOT_BYTE_ADDR + 1).next_power_of_two();
use core::sync::atomic::AtomicU8;
use core::cell::Cell;
static GLOBAL_THREAD_NUM: AtomicU8 = AtomicU8::new(0);
const SLAB_NUM_SENTINEL: u8 = u8::MAX;
thread_local! {
static SAS_NUMS: Cell<u8> = const { Cell::new(SLAB_NUM_SENTINEL) };
}
#[inline(always)]
fn get_slabnum() -> u8 {
SAS_NUMS.with(|cell| {
let slabnum = cell.get();
if slabnum == SLAB_NUM_SENTINEL {
let threadnum = GLOBAL_THREAD_NUM.fetch_add(1, Relaxed);
let slabnum = threadnum & SLABNUM_ALONE_MASK;
cell.set(slabnum);
slabnum
} else {
slabnum
}
})
}
#[inline(always)]
fn set_slab_num(slabnum: u8) {
SAS_NUMS.with(|cell| {
cell.set(slabnum);
});
}
#[inline(always)]
fn failover_slabnum(slabnum: u8) -> u8 {
const SLAB_FAILOVER_NUM: u8 = (1u8 << NUM_SLABS_BITS) / 3; (slabnum.wrapping_add(SLAB_FAILOVER_NUM)) & SLABNUM_ALONE_MASK
}
#[doc(hidden)]
unsafe impl Sync for Smalloc {}
#[doc(hidden)]
impl Default for Smalloc {
fn default() -> Self {
Self::new()
}
}
#[inline(always)]
const fn reqali_to_sc(siz: usize, ali: usize) -> u8 {
debug_assert!(siz > 0);
debug_assert!(ali > 0);
debug_assert!(ali < 1 << NUM_SCS);
debug_assert!(ali.is_power_of_two());
(((siz - 1) | (ali - 1) | UNUSED_SC_MASK).ilog2() + 1) as u8
}
#[inline(always)]
fn ptr_to_slotnum(p_addr: usize) -> u32 {
let sc = ptr_to_sc(p_addr);
((p_addr as u64 & SLOTNUM_AND_DATA_ADDR_MASK) >> sc) as u32
}
#[inline(always)]
fn sc_to_sentinel_slotnum(sc: u8) -> u32 {
gen_mask!(NUM_SN_BITS - (sc - NUM_UNUSED_SCS), u32)
}
#[inline(always)]
fn ptr_to_flhaddr(p_addr: usize) -> usize {
const SLABNUM_AND_SC_ADDR_MASK: usize = SLABNUM_BITS_ADDR_MASK | SC_BITS_ADDR_MASK;
(p_addr & SLABNUM_AND_SC_ADDR_MASK) >> (NUM_SN_D_T_BITS - FLHWORD_SIZE_BITS)
}
#[cfg(test)]
mod tests;