use core::sync::atomic::{AtomicI32, Ordering as AtomicOrdering};
use crate::slab::size_class::NUM_SIZE_CLASSES;
const CACHE_SIZE: usize = 64;
const FREE_CACHE_SIZE: usize = 64;
#[derive(Clone, Copy)]
#[repr(C)]
pub struct CachedSlot {
pub ptr: *mut u8,
pub slab_ptr: *mut u8,
pub slot_index: u16,
pub arena_index: u8,
pub _pad: u8,
pub cached_size: u32,
}
struct ClassCache {
alloc_slots: [CachedSlot; CACHE_SIZE],
alloc_count: usize,
free_slots: [CachedSlot; FREE_CACHE_SIZE],
free_count: usize,
}
impl ClassCache {
const fn new() -> Self {
const EMPTY: CachedSlot = CachedSlot {
ptr: core::ptr::null_mut(),
slab_ptr: core::ptr::null_mut(),
slot_index: 0,
arena_index: 0,
_pad: 0,
cached_size: 0,
};
ClassCache {
alloc_slots: [EMPTY; CACHE_SIZE],
alloc_count: 0,
free_slots: [EMPTY; FREE_CACHE_SIZE],
free_count: 0,
}
}
#[inline]
fn push(&mut self, slot: CachedSlot) -> bool {
if self.alloc_count < CACHE_SIZE {
self.alloc_slots[self.alloc_count] = slot;
self.alloc_count += 1;
true
} else {
false
}
}
#[inline]
fn pop(&mut self) -> Option<CachedSlot> {
if self.alloc_count > 0 {
self.alloc_count -= 1;
Some(self.alloc_slots[self.alloc_count])
} else {
None
}
}
#[inline]
fn push_free(&mut self, slot: CachedSlot) -> bool {
if self.free_count < FREE_CACHE_SIZE {
self.free_slots[self.free_count] = slot;
self.free_count += 1;
true
} else {
false
}
}
#[inline]
fn pop_free(&mut self) -> Option<CachedSlot> {
if self.free_count > 0 {
self.free_count -= 1;
Some(self.free_slots[self.free_count])
} else {
None
}
}
#[inline]
fn free_is_full(&self) -> bool {
self.free_count >= FREE_CACHE_SIZE
}
#[inline]
fn recycle_frees_to_alloc(&mut self, max_recycle: usize) -> usize {
let available = self.free_count;
if available == 0 {
return 0;
}
let to_recycle = available
.min(max_recycle)
.min(CACHE_SIZE - self.alloc_count);
if to_recycle == 0 {
return available;
}
let start = available - to_recycle;
for i in 0..to_recycle {
self.alloc_slots[self.alloc_count] = self.free_slots[start + i];
self.alloc_count += 1;
}
self.free_count = start;
start }
}
pub struct ThreadCache {
caches: [ClassCache; NUM_SIZE_CLASSES],
}
impl ThreadCache {
#[allow(clippy::new_without_default)]
pub const fn new() -> Self {
const EMPTY: ClassCache = ClassCache::new();
ThreadCache {
caches: [EMPTY; NUM_SIZE_CLASSES],
}
}
#[inline]
pub fn pop(&mut self, class_index: usize) -> Option<CachedSlot> {
self.caches[class_index].pop()
}
#[inline]
pub fn push(&mut self, class_index: usize, slot: CachedSlot) -> bool {
self.caches[class_index].push(slot)
}
#[inline]
pub fn push_free(&mut self, class_index: usize, slot: CachedSlot) -> bool {
self.caches[class_index].push_free(slot)
}
#[inline]
pub fn free_is_full(&self, class_index: usize) -> bool {
self.caches[class_index].free_is_full()
}
#[inline]
pub fn pop_free(&mut self, class_index: usize) -> Option<CachedSlot> {
self.caches[class_index].pop_free()
}
#[inline]
pub fn recycle_frees(&mut self, class_index: usize, max_recycle: usize) -> usize {
self.caches[class_index].recycle_frees_to_alloc(max_recycle)
}
#[inline(always)]
pub fn drain_frees_ref(&mut self, class_index: usize) -> (&[CachedSlot], usize) {
let cache = &mut self.caches[class_index];
let count = cache.free_count;
cache.free_count = 0;
(&cache.free_slots[..count], count)
}
}
pub(crate) struct ThreadState {
pub(crate) cache: ThreadCache,
pub(crate) tid: usize,
pub(crate) rng: u64,
pub(crate) active: bool,
pub(crate) generation: u64,
pub(crate) cached_arena_idx: usize,
pub(crate) arena_idx_valid: bool,
pub(crate) mru_page: usize,
pub(crate) mru_slab_ptr: *mut u8,
pub(crate) mru_arena_index: u8,
pub(crate) mru_class_index: u8,
pub(crate) mru_valid: bool,
pub(crate) fast_reg: CachedSlot,
pub(crate) fast_reg_class: u8,
pub(crate) fork_check_counter: u8,
pub(crate) large_cache_base: *mut u8,
pub(crate) large_cache_total_size: usize,
pub(crate) large_cache_data_size: usize,
pub(crate) large_cache_user_ptr: *mut u8,
pub(crate) large_cache_requested_size: usize,
}
impl ThreadState {
#[inline]
pub(crate) fn check_fork_generation(&mut self) {
let current_gen = crate::hardening::fork::fork_generation();
if self.generation != current_gen {
self.cache = ThreadCache::new();
self.tid = 0;
self.rng = 0;
self.arena_idx_valid = false;
self.mru_valid = false;
self.fast_reg.ptr = core::ptr::null_mut();
self.large_cache_base = core::ptr::null_mut();
self.large_cache_user_ptr = core::ptr::null_mut();
self.generation = current_gen;
}
}
#[inline]
pub(crate) fn thread_id(&mut self) -> usize {
if self.tid != 0 {
return self.tid;
}
let new_tid = unsafe { libc::syscall(libc::SYS_gettid) as usize };
self.tid = new_tid;
new_tid
}
#[inline]
pub(crate) fn arena_index(&mut self, num_arenas: usize) -> usize {
if self.arena_idx_valid {
return self.cached_arena_idx;
}
let tid = self.thread_id();
let idx = crate::platform::splitmix64(tid as u64) as usize % num_arenas;
self.cached_arena_idx = idx;
self.arena_idx_valid = true;
idx
}
#[inline(always)]
pub(crate) fn amortized_fork_check(&mut self) {
if self.fork_check_counter == 0 {
self.check_fork_generation();
self.fork_check_counter = 255;
} else {
self.fork_check_counter -= 1;
}
}
#[allow(dead_code)]
#[inline]
fn fast_random(&mut self) -> u64 {
let mut s = self.rng;
if s == 0 {
let secret = crate::hardening::canary::secret();
s = secret
.wrapping_mul(0x517cc1b727220a95)
.wrapping_add(self.thread_id() as u64)
| 1; }
s ^= s >> 12;
s ^= s << 25;
s ^= s >> 27;
self.rng = s;
s.wrapping_mul(0x2545F4914F6CDD1D)
}
}
#[cfg(target_arch = "x86_64")]
#[inline(always)]
unsafe fn tls_get_inline() -> *mut libc::c_void {
let result: *mut libc::c_void;
core::arch::asm!(
"mov {tmp}, qword ptr [rip + _cm_tls_state@GOTTPOFF]",
"mov {out}, qword ptr fs:[{tmp}]",
tmp = out(reg) _,
out = out(reg) result,
options(nostack, pure, readonly),
);
result
}
#[cfg(target_arch = "x86_64")]
#[inline(always)]
unsafe fn tls_set_inline(ptr: *mut libc::c_void) {
core::arch::asm!(
"mov {tmp}, qword ptr [rip + _cm_tls_state@GOTTPOFF]",
"mov qword ptr fs:[{tmp}], {val}",
tmp = out(reg) _,
val = in(reg) ptr,
options(nostack),
);
}
#[cfg(not(target_arch = "x86_64"))]
extern "C" {
fn compatmalloc_tls_get() -> *mut libc::c_void;
fn compatmalloc_tls_set(ptr: *mut libc::c_void);
}
#[cfg(not(target_arch = "x86_64"))]
#[inline(always)]
unsafe fn tls_get_inline() -> *mut libc::c_void {
compatmalloc_tls_get()
}
#[cfg(not(target_arch = "x86_64"))]
#[inline(always)]
unsafe fn tls_set_inline(ptr: *mut libc::c_void) {
compatmalloc_tls_set(ptr)
}
static PTHREAD_KEY: AtomicI32 = AtomicI32::new(-1);
pub unsafe fn init_tls() {
let mut key: libc::pthread_key_t = 0;
if libc::pthread_key_create(&mut key, Some(thread_state_destructor)) == 0 {
PTHREAD_KEY.store(key as i32, AtomicOrdering::Release);
}
}
unsafe extern "C" fn thread_state_destructor(ptr: *mut libc::c_void) {
if ptr.is_null() {
return;
}
tls_set_inline(core::ptr::null_mut());
let state = &mut *(ptr as *mut ThreadState);
if !state.large_cache_base.is_null() {
let alloc = crate::init::allocator();
alloc.flush_large_cache_on_thread_exit(state);
}
let size = core::mem::size_of::<ThreadState>();
let aligned_size = crate::util::align_up(size, crate::util::page_size());
libc::munmap(ptr, aligned_size);
}
#[inline(always)]
unsafe fn get_thread_state_ptr() -> *mut ThreadState {
let ptr = tls_get_inline() as *mut ThreadState;
if !ptr.is_null() {
return ptr;
}
alloc_thread_state_slow()
}
#[inline(always)]
pub(crate) unsafe fn get_thread_state_raw() -> *mut ThreadState {
get_thread_state_ptr()
}
#[cold]
#[inline(never)]
unsafe fn alloc_thread_state_slow() -> *mut ThreadState {
let size = core::mem::size_of::<ThreadState>();
let aligned_size = crate::util::align_up(size, crate::util::page_size());
let ptr = libc::mmap(
core::ptr::null_mut(),
aligned_size,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-1,
0,
);
if ptr == libc::MAP_FAILED || ptr.is_null() {
return core::ptr::null_mut();
}
tls_set_inline(ptr);
let key = PTHREAD_KEY.load(AtomicOrdering::Relaxed);
if key >= 0 {
libc::pthread_setspecific(key as libc::pthread_key_t, ptr);
}
ptr as *mut ThreadState
}
#[inline(always)]
fn with_thread_state<F, R>(f: F) -> Option<R>
where
F: FnOnce(&mut ThreadState) -> R,
{
let state = unsafe { get_thread_state_ptr() };
if state.is_null() {
return None;
}
let state = unsafe { &mut *state };
if state.active {
return None;
}
state.active = true;
state.check_fork_generation();
let result = f(state);
state.active = false;
Some(result)
}
#[inline(always)]
pub fn with_cache_tid_arena<F, R>(f: F, num_arenas: usize) -> Option<R>
where
F: FnOnce(&mut ThreadCache, usize, usize) -> R,
{
with_thread_state(|state| {
let tid = state.thread_id();
let arena_idx = state.arena_index(num_arenas);
f(&mut state.cache, tid, arena_idx)
})
}
#[inline]
pub fn thread_id() -> usize {
match with_thread_state(|state| state.thread_id()) {
Some(tid) => tid,
None => crate::platform::thread_id(),
}
}
#[allow(dead_code)]
#[inline]
pub fn fast_random_u64() -> u64 {
match with_thread_state(|state| state.fast_random()) {
Some(val) => val,
None => crate::platform::fast_random_u64(),
}
}