#![cfg(feature = "nvcomp-gpu")]
use std::collections::HashMap;
use std::ffi::c_void;
use std::ptr::null_mut;
use super::error::{Error, Result};
use super::nvcomp_sys::cuda::{CUDA_SUCCESS, cudaFree, cudaMalloc};
pub const SLAB_MIN_BUCKET_BYTES: usize = 4 * 1024;
pub const SLAB_MAX_BUCKET_BYTES: usize = 2 * 1024 * 1024;
pub struct SlabAllocator {
free_lists: HashMap<usize, Vec<*mut c_void>>,
live_counts: HashMap<usize, usize>,
high_water_bytes: usize,
pool_bytes: usize,
oversize_fallback_count: u64,
pool_hits: u64,
pool_misses: u64,
}
unsafe impl Send for SlabAllocator {}
unsafe impl Sync for SlabAllocator {}
impl std::fmt::Debug for SlabAllocator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SlabAllocator")
.field("buckets", &self.free_lists.len())
.field("high_water_bytes", &self.high_water_bytes)
.field("pool_bytes", &self.pool_bytes)
.field("pool_hits", &self.pool_hits)
.field("pool_misses", &self.pool_misses)
.field("oversize_fallback_count", &self.oversize_fallback_count)
.finish()
}
}
impl Default for SlabAllocator {
fn default() -> Self {
Self::new()
}
}
impl SlabAllocator {
pub fn new() -> Self {
Self {
free_lists: HashMap::new(),
live_counts: HashMap::new(),
high_water_bytes: 0,
pool_bytes: 0,
oversize_fallback_count: 0,
pool_hits: 0,
pool_misses: 0,
}
}
fn bucket_of(size: usize) -> Option<usize> {
if size == 0 {
return Some(SLAB_MIN_BUCKET_BYTES);
}
let bucket = size.next_power_of_two().max(SLAB_MIN_BUCKET_BYTES);
if bucket > SLAB_MAX_BUCKET_BYTES {
None
} else {
Some(bucket)
}
}
pub fn alloc(&mut self, size: usize) -> Result<*mut c_void> {
match Self::bucket_of(size) {
Some(bucket) => {
if let Some(list) = self.free_lists.get_mut(&bucket) {
if let Some(p) = list.pop() {
self.pool_bytes -= bucket;
*self.live_counts.entry(bucket).or_insert(0) += 1;
self.pool_hits += 1;
return Ok(p);
}
}
let mut p: *mut c_void = null_mut();
let rc = unsafe { cudaMalloc(&mut p, bucket) };
if rc != CUDA_SUCCESS {
return Err(Error::Compress(format!(
"SlabAllocator::alloc: cudaMalloc({bucket} bytes) failed: code={rc}"
)));
}
self.high_water_bytes += bucket;
*self.live_counts.entry(bucket).or_insert(0) += 1;
self.pool_misses += 1;
Ok(p)
}
None => {
let alloc_size = size.div_ceil(256) * 256;
let mut p: *mut c_void = null_mut();
let rc = unsafe { cudaMalloc(&mut p, alloc_size) };
if rc != CUDA_SUCCESS {
return Err(Error::Compress(format!(
"SlabAllocator::alloc: oversize cudaMalloc({alloc_size} bytes) \
failed: code={rc}"
)));
}
self.high_water_bytes += alloc_size;
self.oversize_fallback_count += 1;
Ok(p)
}
}
}
pub unsafe fn release(&mut self, ptr: *mut c_void, size: usize) {
if ptr.is_null() {
return;
}
match Self::bucket_of(size) {
Some(bucket) => {
self.free_lists.entry(bucket).or_default().push(ptr);
self.pool_bytes += bucket;
if let Some(c) = self.live_counts.get_mut(&bucket) {
*c = c.saturating_sub(1);
}
}
None => {
unsafe {
let _ = cudaFree(ptr);
}
}
}
}
pub fn high_water_bytes(&self) -> usize {
self.high_water_bytes
}
pub fn pool_bytes(&self) -> usize {
self.pool_bytes
}
pub fn pool_hits(&self) -> u64 {
self.pool_hits
}
pub fn pool_misses(&self) -> u64 {
self.pool_misses
}
pub fn oversize_fallback_count(&self) -> u64 {
self.oversize_fallback_count
}
pub fn bucket_count(&self) -> usize {
self.free_lists.len()
}
}
impl Drop for SlabAllocator {
fn drop(&mut self) {
for (_bucket, list) in self.free_lists.drain() {
for p in list {
if !p.is_null() {
unsafe {
let _ = cudaFree(p);
}
}
}
}
self.pool_bytes = 0;
}
}
#[cfg(test)]
mod tests {
use super::*;
fn cuda_available() -> bool {
let mut p: *mut c_void = null_mut();
let rc = unsafe { cudaMalloc(&mut p, 16) };
if rc == CUDA_SUCCESS {
unsafe {
let _ = cudaFree(p);
}
true
} else {
false
}
}
#[test]
fn bucket_of_rounds_up_to_power_of_two() {
assert_eq!(SlabAllocator::bucket_of(1), Some(SLAB_MIN_BUCKET_BYTES));
assert_eq!(SlabAllocator::bucket_of(0), Some(SLAB_MIN_BUCKET_BYTES));
assert_eq!(SlabAllocator::bucket_of(4096), Some(4096));
assert_eq!(SlabAllocator::bucket_of(5000), Some(8 * 1024));
assert_eq!(SlabAllocator::bucket_of(8192), Some(8192));
assert_eq!(SlabAllocator::bucket_of(9000), Some(16 * 1024));
assert_eq!(SlabAllocator::bucket_of(8 * 1024), Some(8 * 1024));
assert_eq!(
SlabAllocator::bucket_of(SLAB_MAX_BUCKET_BYTES),
Some(SLAB_MAX_BUCKET_BYTES)
);
assert_eq!(SlabAllocator::bucket_of(SLAB_MAX_BUCKET_BYTES + 1), None);
assert_eq!(SlabAllocator::bucket_of(16 * 1024 * 1024), None);
}
#[test]
fn empty_pool_initial_state() {
let slab = SlabAllocator::new();
assert_eq!(slab.high_water_bytes(), 0);
assert_eq!(slab.pool_bytes(), 0);
assert_eq!(slab.pool_hits(), 0);
assert_eq!(slab.pool_misses(), 0);
assert_eq!(slab.oversize_fallback_count(), 0);
assert_eq!(slab.bucket_count(), 0);
}
#[test]
fn slab_alloc_basic_alloc_release_reuse() {
if !cuda_available() {
return;
}
let mut slab = SlabAllocator::new();
let p1 = slab.alloc(8 * 1024).expect("alloc 8 KiB");
assert!(!p1.is_null());
assert_eq!(slab.pool_misses(), 1);
assert_eq!(slab.pool_hits(), 0);
assert_eq!(slab.high_water_bytes(), 8 * 1024);
assert_eq!(slab.pool_bytes(), 0);
unsafe { slab.release(p1, 8 * 1024) };
assert_eq!(slab.pool_bytes(), 8 * 1024);
assert_eq!(slab.high_water_bytes(), 8 * 1024);
let p2 = slab.alloc(8 * 1024).expect("alloc 8 KiB reuse");
assert_eq!(p1, p2, "reuse must return the same pointer (LIFO)");
assert_eq!(slab.pool_hits(), 1);
assert_eq!(slab.pool_misses(), 1);
assert_eq!(slab.pool_bytes(), 0);
assert_eq!(slab.high_water_bytes(), 8 * 1024);
unsafe { slab.release(p2, 8 * 1024) };
}
#[test]
fn slab_alloc_size_class_bucketing() {
if !cuda_available() {
return;
}
let mut slab = SlabAllocator::new();
let p_5k = slab.alloc(5000).expect("alloc 5000");
let p_9k = slab.alloc(9000).expect("alloc 9000");
assert_eq!(slab.high_water_bytes(), 8 * 1024 + 16 * 1024);
unsafe {
slab.release(p_5k, 5000);
slab.release(p_9k, 9000);
}
assert_eq!(slab.pool_bytes(), 8 * 1024 + 16 * 1024);
let p_5k2 = slab.alloc(4500).expect("alloc 4500 reuse 8 KiB");
assert_eq!(p_5k, p_5k2, "4500 and 5000 both bucket-of 8 KiB");
assert_eq!(slab.pool_bytes(), 16 * 1024);
assert_eq!(slab.pool_hits(), 1);
unsafe { slab.release(p_5k2, 4500) };
}
#[test]
fn slab_alloc_size_class_independence() {
if !cuda_available() {
return;
}
let mut slab = SlabAllocator::new();
let p8 = slab.alloc(8 * 1024).expect("alloc 8 KiB");
let p16 = slab.alloc(16 * 1024).expect("alloc 16 KiB");
assert_ne!(p8, p16, "different buckets must yield different ptrs");
unsafe {
slab.release(p8, 8 * 1024);
slab.release(p16, 16 * 1024);
}
let p16_again = slab.alloc(16 * 1024).expect("realloc 16 KiB");
assert_eq!(p16_again, p16, "16 KiB bucket reuse");
let p8_again = slab.alloc(8 * 1024).expect("realloc 8 KiB");
assert_eq!(p8_again, p8, "8 KiB bucket reuse independent of 16 KiB");
unsafe {
slab.release(p8_again, 8 * 1024);
slab.release(p16_again, 16 * 1024);
}
}
#[test]
fn slab_alloc_above_max_falls_back() {
if !cuda_available() {
return;
}
let mut slab = SlabAllocator::new();
let oversize = SLAB_MAX_BUCKET_BYTES + 1;
let p = slab.alloc(oversize).expect("alloc oversize");
assert!(!p.is_null());
assert_eq!(slab.oversize_fallback_count(), 1);
assert_eq!(slab.pool_hits(), 0);
assert_eq!(slab.pool_misses(), 0);
assert_eq!(slab.pool_bytes(), 0);
assert_eq!(slab.bucket_count(), 0);
let alloc_size = oversize.div_ceil(256) * 256;
assert_eq!(slab.high_water_bytes(), alloc_size);
unsafe { slab.release(p, oversize) };
assert_eq!(slab.pool_bytes(), 0);
assert_eq!(slab.bucket_count(), 0);
}
#[test]
fn slab_alloc_drop_frees_all() {
if !cuda_available() {
return;
}
let mut slab = SlabAllocator::new();
let p4 = slab.alloc(4 * 1024).expect("4 KiB");
let p8 = slab.alloc(8 * 1024).expect("8 KiB");
let p16 = slab.alloc(16 * 1024).expect("16 KiB");
unsafe {
slab.release(p4, 4 * 1024);
slab.release(p8, 8 * 1024);
slab.release(p16, 16 * 1024);
}
assert_eq!(slab.pool_bytes(), 4 * 1024 + 8 * 1024 + 16 * 1024);
assert_eq!(slab.high_water_bytes(), 4 * 1024 + 8 * 1024 + 16 * 1024);
drop(slab);
}
#[test]
fn slab_alloc_lifo_within_bucket() {
if !cuda_available() {
return;
}
let mut slab = SlabAllocator::new();
let p1 = slab.alloc(8 * 1024).expect("a");
let p2 = slab.alloc(8 * 1024).expect("b");
assert_ne!(p1, p2);
unsafe {
slab.release(p1, 8 * 1024);
slab.release(p2, 8 * 1024);
}
let q1 = slab.alloc(8 * 1024).expect("c");
let q2 = slab.alloc(8 * 1024).expect("d");
assert_eq!(q1, p2, "LIFO top");
assert_eq!(q2, p1, "LIFO under-top");
unsafe {
slab.release(q1, 8 * 1024);
slab.release(q2, 8 * 1024);
}
}
}