#![deny(missing_docs)]
#![allow(internal_features)]
#![feature(core_intrinsics, link_llvm_intrinsics)]
#![no_std]
#[cfg(feature = "alloc")]
extern crate alloc;
#[cfg(feature = "alloc")]
use core::alloc::{GlobalAlloc, Layout};
use core::ffi;
#[cfg(feature = "print")]
#[macro_export]
macro_rules! print {
($($arg:tt)*) => {
$crate::print(::alloc::format!($($arg)*));
};
}
#[cfg(feature = "print")]
#[macro_export]
macro_rules! println {
($($arg:tt)*) => {
let mut s = ::alloc::format!($($arg)*);
s.push('\n');
$crate::print(&s);
};
}
pub mod intrinsics;
pub mod prelude {
#[cfg(feature = "device_libs")]
pub use crate::dispatch_ptr;
pub use crate::intrinsics::{
s_barrier, workgroup_id_x, workgroup_id_y, workgroup_id_z, workitem_id_x, workitem_id_y,
workitem_id_z,
};
#[cfg(feature = "print")]
pub use print;
#[cfg(feature = "print")]
pub use println;
}
#[cfg(feature = "device_libs")]
unsafe extern "C" {
#[cfg(feature = "hostcall")]
#[allow(improper_ctypes)]
fn __ockl_call_host_function(
fptr: ffi::c_ulong,
arg0: ffi::c_ulong,
arg1: ffi::c_ulong,
arg2: ffi::c_ulong,
arg3: ffi::c_ulong,
arg4: ffi::c_ulong,
arg5: ffi::c_ulong,
arg6: ffi::c_ulong,
) -> u128;
fn __amdgpu_util_alloc(size: ffi::c_ulong) -> *mut ffi::c_void;
fn __amdgpu_util_dealloc(addr: *mut ffi::c_void);
fn __amdgpu_util_print_stdout(s: *const ffi::c_char, size: ffi::c_int);
safe fn __amdgpu_util_dispatch_ptr() -> *const ffi::c_void;
safe fn __amdgpu_util_queue_ptr() -> *mut ffi::c_void;
safe fn __amdgpu_util_kernarg_segment_ptr() -> *const ffi::c_void;
safe fn __amdgpu_util_implicitarg_ptr() -> *const ffi::c_void;
}
#[cfg(feature = "device_libs")]
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[repr(C)]
pub struct HsaSignal {
pub handle: u64,
}
#[cfg(feature = "device_libs")]
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[repr(C)]
pub struct HsaKernelDispatchPacket {
pub header: u16,
pub setup: u16,
pub workgroup_size_x: u16,
pub workgroup_size_y: u16,
pub workgroup_size_z: u16,
pub reserved0: u16,
pub grid_size_x: u32,
pub grid_size_y: u32,
pub grid_size_z: u32,
pub private_segment_size: u32,
pub group_segment_size: u32,
pub kernel_object: u64,
pub kernarg_address: *mut ffi::c_void,
pub reserved2: u64,
pub completion_signal: HsaSignal,
}
#[cfg(feature = "panic_handler")]
#[cfg_attr(not(feature = "print"), allow(unused_variables))]
#[panic_handler]
#[inline]
fn panic(panic_info: &core::panic::PanicInfo) -> ! {
#[cfg(feature = "print")]
{
use prelude::*;
println!(
"workgroup {},{},{} thread {},{},{} {panic_info}",
workgroup_id_x(),
workgroup_id_y(),
workgroup_id_z(),
workitem_id_x(),
workitem_id_y(),
workitem_id_z()
);
}
core::intrinsics::abort();
}
#[cfg(feature = "alloc")]
pub struct Allocator;
#[cfg(feature = "alloc")]
unsafe impl GlobalAlloc for Allocator {
#[inline]
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
unsafe { __amdgpu_util_alloc(layout.size() as ffi::c_ulong) as *mut _ }
}
#[inline]
unsafe fn dealloc(&self, ptr: *mut u8, _: Layout) {
unsafe { __amdgpu_util_dealloc(ptr as *mut _) };
}
}
#[cfg(feature = "print")]
#[inline]
pub fn print(s: &str) {
unsafe {
__amdgpu_util_print_stdout(
s.as_ptr() as *const ffi::c_char,
s.len().try_into().expect("String too long to print"),
);
}
}
#[cfg(feature = "device_libs")]
#[inline]
pub fn dispatch_ptr() -> &'static HsaKernelDispatchPacket {
unsafe { &*(__amdgpu_util_dispatch_ptr() as *const HsaKernelDispatchPacket) }
}
#[cfg(feature = "hostcall")]
#[allow(clippy::missing_safety_doc, clippy::too_many_arguments)]
pub unsafe fn call_host_function(
function: u64,
arg0: u64,
arg1: u64,
arg2: u64,
arg3: u64,
arg4: u64,
arg5: u64,
arg6: u64,
) -> u128 {
unsafe { __ockl_call_host_function(function, arg0, arg1, arg2, arg3, arg4, arg5, arg6) }
}
#[unsafe(no_mangle)]
#[inline]
extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
for i in 0..n {
let diff = unsafe { i32::from(s1.add(i).read()) - i32::from(s2.add(i).read()) };
if diff != 0 {
return diff;
}
}
0
}
#[cfg(feature = "global_allocator")]
#[global_allocator]
static HEAP: Allocator = Allocator;