use super::*;
static mut _STATIC_HOST_ALLOC: Option<GlobalStaticHost> = None;
pub(crate) fn _static_host_alloc() -> GlobalStaticHost {
unsafe {
_STATIC_HOST_ALLOC
.as_ref()
.expect("initialize static host allocator")
.clone()
}
}
pub(crate) fn init_static_host_alloc(domain_size: usize) {
unsafe {
if _STATIC_HOST_ALLOC.is_some() {
println!("fflonk pinned memory already initialized, ignoring");
return;
}
}
assert!(domain_size.is_power_of_two());
let num_blocks = 3;
let block_size_in_bytes = 9 * 32 * domain_size;
let allocator = GlobalStaticHost::init(num_blocks, block_size_in_bytes)
.expect("initialize static allocator");
unsafe { _STATIC_HOST_ALLOC = Some(allocator) }
}
pub(crate) fn free_static_host_alloc() {
unsafe {
if let Some(alloc) = _STATIC_HOST_ALLOC.take() {
alloc.free().expect("Couldn't free static allocator");
}
}
}
#[derive(Clone)]
pub struct GlobalStaticHost(StaticBitmapAllocator);
impl Default for GlobalStaticHost {
fn default() -> Self {
_static_host_alloc()
}
}
pub trait HostAllocator: Allocator + Default + Clone + Send + Sync + 'static {}
impl GlobalStaticHost {
pub fn init(num_blocks: usize, block_size_in_bytes: usize) -> CudaResult<Self> {
assert_ne!(num_blocks, 0);
let memory_size_in_bytes = num_blocks * block_size_in_bytes;
let memory = host_allocate(memory_size_in_bytes)
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, memory_size_in_bytes))?;
println!("allocated {memory_size_in_bytes} bytes on pinned host memory");
let allocator = StaticBitmapAllocator::init(memory, num_blocks, block_size_in_bytes);
Ok(Self(allocator))
}
pub(crate) fn free(self) -> CudaResult<()> {
println!("freeing static cuda allocation");
assert_eq!(std::sync::Arc::weak_count(&self.0.memory.0), 0);
let StaticBitmapAllocator { mut memory, .. } = self.0;
host_dealloc(memory.as_mut_ptr().cast())
}
}
unsafe impl Allocator for GlobalStaticHost {
fn allocate(
&self,
layout: std::alloc::Layout,
) -> Result<NonNull<[u8]>, std::alloc::AllocError> {
self.0.allocate(layout).map_err(|_| std::alloc::AllocError)
}
fn allocate_zeroed(
&self,
layout: std::alloc::Layout,
) -> Result<std::ptr::NonNull<[u8]>, std::alloc::AllocError> {
let ptr = self.allocate(layout)?;
let num_bytes = layout.size();
unsafe {
std::ptr::write_bytes(ptr.as_ptr() as *mut u8, 0, layout.size());
let result = gpu_ffi::bc_memset(ptr.as_ptr().cast(), 0, num_bytes as u64);
if result != 0 {
panic!("Couldn't allocate zeroed buffer")
}
}
Ok(ptr)
}
unsafe fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
self.0.deallocate(ptr, layout);
}
}
impl HostAllocator for GlobalStaticHost {}
impl HostAllocator for std::alloc::Global {}