miden-gpu 0.6.0

GPU acceleration for the Miden VM prover
Documentation
/// WARNING: keep the original data around, or it will be freed.
// TODO: see buffer_mut_no_copy comments
pub fn buffer_no_copy<T: Sized>(device: &metal::DeviceRef, v: &[T]) -> metal::Buffer {
    assert!(is_page_aligned(v));
    let byte_len = core::mem::size_of_val(v);
    device.new_buffer_with_bytes_no_copy(
        v.as_ptr() as *mut core::ffi::c_void,
        byte_len.try_into().unwrap(),
        metal::MTLResourceOptions::StorageModeShared,
        None,
    )
}

/// WARNING: keep the original data around, or it will be freed.
// TODO: This method previously passed a vec instead of a slice to make sure capacity was aligned to
// the page size (as per doc requirements). Seems to work in practice (on M1 at least) if only the
// pointer is aligned. Passing a slice if handy because passing a vec with any allocator requires
// nightly allocator_api feature. https://developer.apple.com/documentation/metal/mtldevice/1433382-makebuffer
pub fn buffer_mut_no_copy<T: Sized>(device: &metal::DeviceRef, v: &mut [T]) -> metal::Buffer {
    assert!(is_page_aligned(v));
    // TODO: once allocator_api stabilized check capacity is aligned to page size
    // this current implementation may be brittle.
    let byte_len = core::mem::size_of_val(v);
    device.new_buffer_with_bytes_no_copy(
        v.as_mut_ptr() as *mut core::ffi::c_void,
        byte_len.try_into().unwrap(),
        metal::MTLResourceOptions::StorageModeShared,
        None,
    )
}

// Converts a reference to a void pointer
pub(crate) fn void_ptr<T>(v: &T) -> *const core::ffi::c_void {
    v as *const T as *const core::ffi::c_void
}

#[repr(C, align(16384))]
struct Page([u8; 16384]);

/// Checks a slice is page aligned on
pub fn is_page_aligned<T>(v: &[T]) -> bool {
    v.as_ptr().align_offset(core::mem::align_of::<Page>()) == 0
}

/// Returns a page aligned vector of the specified length with un-initialized
/// memory. This is for Apple Silicon targets only. Apple Silicon supports
/// shared memory between CPU and GPU. The data resides in system memory and is
/// visible and modifiable by both the CPU and the GPU if it's page aligned.
///
/// # Safety
/// Using values from the returned vector before initializing them will lead to
/// undefined behavior.
#[allow(clippy::uninit_vec)]
pub unsafe fn page_aligned_uninit_vector<T>(length: usize) -> Vec<T> {
    #[repr(C, align(16384))]
    struct Page([u8; 16384]);
    let item_size = core::mem::size_of::<T>();
    let page_size = core::mem::size_of::<Page>();
    // assert_eq!(page_size % item_size, 0, "item size must divide page size");
    let num_pages = item_size * length / page_size + 1;
    let mut aligned: Vec<Page> = Vec::with_capacity(num_pages);
    let ptr = aligned.as_mut_ptr();
    let capacity = num_pages * page_size / item_size;
    core::mem::forget(aligned);
    unsafe { Vec::from_raw_parts(ptr as *mut T, length, capacity) }
}