pub use crate::std::layers_std::LayerPlanStd;
pub use native_neural_network::engine::{
active_backend_contract_capabilities, backend_supported_request_flags,
clear_contract_reject_handler, clear_gpu_command_handler, clear_gpu_kernel_f32,
clear_gpu_kernel_f64, clear_last_contract_reject, clear_lpu_command_handler,
clear_tpu_command_handler, contract_abi_version_for_backend, get_compute_backend,
get_contract_runtime_flags, get_gpu_contract_opcode_mask, get_lpu_contract_opcode_mask,
get_tpu_contract_opcode_mask, is_contract_abi_compatible, is_hardware_contract_strict,
last_contract_reject_backend, last_contract_reject_opcode, last_contract_reject_reason,
probe_backend_contract, register_contract_reject_handler, register_gpu_command_handler,
register_gpu_command_handler_with_capabilities, register_gpu_kernel_f32,
register_gpu_kernel_f64, register_lpu_command_handler,
register_lpu_command_handler_with_capabilities, register_tpu_command_handler,
register_tpu_command_handler_with_capabilities, set_compute_backend,
set_contract_runtime_flags, set_gpu_contract_opcode_mask, set_hardware_contract_strict,
set_lpu_contract_opcode_mask, set_tpu_contract_opcode_mask, BackendContractCapabilities,
ComputeBackend, ContractRejectHandler, ContractRejectReason, GpuCommandHandler,
GpuContractRequest, GpuDispatchStatus, GpuKernelF32, GpuKernelF64, GpuOpCode, KernelCmdF32,
LpuCommandHandler, TpuCommandHandler, CONTRACT_FLAG_DETERMINISTIC_MATH,
CONTRACT_FLAG_REQUIRE_FINITE_INPUTS, CONTRACT_FLAG_REQUIRE_STRICT_ALIGNMENT,
CONTRACT_KNOWN_FLAGS_MASK,
};
#[derive(Debug)]
pub enum EngineStdError {
InvalidPlan,
ShapeMismatch,
ScratchTooSmall,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BackendAvailabilityError {
NoContractForCpu,
BackendUnavailable(ComputeBackend),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OsTarget {
Linux,
Windows,
Ios,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
pub enum MaterialKind {
Cpu = 1 << 0,
Ram = 1 << 1,
Gpu = 1 << 2,
Lpu = 1 << 3,
Tpu = 1 << 4,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PlatformFlags {
pub os: OsTarget,
pub material_flags: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct BackendDriverSnapshot {
pub backend: ComputeBackend,
pub runtime_flags: u32,
pub strict: bool,
}
impl From<native_neural_network::engine::ForwardError> for EngineStdError {
fn from(e: native_neural_network::engine::ForwardError) -> Self {
match e {
native_neural_network::engine::ForwardError::InvalidPlan => EngineStdError::InvalidPlan,
native_neural_network::engine::ForwardError::ShapeMismatch => {
EngineStdError::ShapeMismatch
}
native_neural_network::engine::ForwardError::ScratchTooSmall => {
EngineStdError::ScratchTooSmall
}
}
}
}
pub fn forward_plan(
plan: &LayerPlanStd,
input: &[f32],
output: &mut [f32],
scratch: &mut [f32],
) -> Result<(), EngineStdError> {
plan.as_native_plan(|p| {
native_neural_network::engine::forward_plan(p, input, output, scratch).map_err(Into::into)
})
}
pub fn forward_batch_big_kernel(
plan: &LayerPlanStd,
input_batch: &[f32],
output_batch: &mut [f32],
batch_size: usize,
scratch: &mut [f32],
) -> Result<(), EngineStdError> {
plan.as_native_plan(|p| {
native_neural_network::engine::forward_plan_kernel(
p,
input_batch,
output_batch,
batch_size,
scratch,
)
.map_err(Into::into)
})
}
pub fn forward_plan_big_kernel(
plan: &LayerPlanStd,
input_batch: &[f32],
output_batch: &mut [f32],
batch_size: usize,
scratch: &mut [f32],
) -> Result<(), EngineStdError> {
plan.as_native_plan(|p| {
native_neural_network::engine::forward_plan_kernel(
p,
input_batch,
output_batch,
batch_size,
scratch,
)
.map_err(Into::into)
})
}
pub fn required_batch_scratch_len(plan: &LayerPlanStd, batch_size: usize) -> Option<usize> {
plan.as_native_plan(|p| {
native_neural_network::engine::required_batch_scratch_len(p, batch_size)
})
}
pub fn required_single_infer_scratch(plan: &LayerPlanStd) -> Option<usize> {
plan.as_native_plan(native_neural_network::engine::required_single_infer_scratch)
}
pub fn validate_forward_io(plan: &LayerPlanStd, input_len: usize, output_len: usize) -> bool {
plan.as_native_plan(|p| {
native_neural_network::engine::validate_forward_io(p, input_len, output_len)
})
}
impl core::fmt::Display for EngineStdError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "EngineStdError::{:?}", self)
}
}
impl std::error::Error for EngineStdError {}
impl core::fmt::Display for BackendAvailabilityError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
BackendAvailabilityError::NoContractForCpu => {
write!(f, "CPU backend has no hardware contract")
}
BackendAvailabilityError::BackendUnavailable(backend) => {
write!(f, "backend {:?} has no active contract", backend)
}
}
}
}
impl std::error::Error for BackendAvailabilityError {}
pub fn snapshot_backend_driver_state() -> BackendDriverSnapshot {
BackendDriverSnapshot {
backend: get_compute_backend(),
runtime_flags: get_contract_runtime_flags(),
strict: is_hardware_contract_strict(),
}
}
pub fn restore_backend_driver_state(snapshot: BackendDriverSnapshot) {
set_compute_backend(snapshot.backend);
set_contract_runtime_flags(snapshot.runtime_flags);
set_hardware_contract_strict(snapshot.strict);
clear_last_contract_reject();
}
pub fn select_backend_driver(backend: ComputeBackend) {
set_compute_backend(backend);
clear_last_contract_reject();
}
pub fn select_contract_backend_driver(
backend: ComputeBackend,
) -> Result<BackendContractCapabilities, BackendAvailabilityError> {
let caps = require_backend_contract(backend)?;
select_backend_driver(backend);
Ok(caps)
}
pub fn current_os_target() -> OsTarget {
if cfg!(target_os = "linux") {
OsTarget::Linux
} else if cfg!(target_os = "windows") {
OsTarget::Windows
} else if cfg!(target_os = "ios") {
OsTarget::Ios
} else {
OsTarget::Unknown
}
}
pub fn available_material_flags() -> u32 {
let mut flags = MaterialKind::Cpu as u32 | MaterialKind::Ram as u32;
if require_backend_contract(ComputeBackend::Gpu).is_ok() {
flags |= MaterialKind::Gpu as u32;
}
if require_backend_contract(ComputeBackend::Lpu).is_ok() {
flags |= MaterialKind::Lpu as u32;
}
if require_backend_contract(ComputeBackend::Tpu).is_ok() {
flags |= MaterialKind::Tpu as u32;
}
flags
}
pub fn platform_flags() -> PlatformFlags {
PlatformFlags {
os: current_os_target(),
material_flags: available_material_flags(),
}
}
pub fn has_material(material: MaterialKind) -> bool {
(available_material_flags() & (material as u32)) != 0
}
pub fn material_buffers_preallocated(material: MaterialKind) -> bool {
match material {
MaterialKind::Cpu | MaterialKind::Ram => true,
MaterialKind::Gpu => require_backend_contract(ComputeBackend::Gpu).is_ok(),
MaterialKind::Lpu => require_backend_contract(ComputeBackend::Lpu).is_ok(),
MaterialKind::Tpu => require_backend_contract(ComputeBackend::Tpu).is_ok(),
}
}
unsafe fn cmd_f32_from_request<'a>(request: *const GpuContractRequest) -> Option<&'a KernelCmdF32> {
if request.is_null() {
return None;
}
let req = unsafe { &*request };
if req.payload_ptr.is_null() {
return None;
}
if req.header.payload_len as usize != core::mem::size_of::<KernelCmdF32>() {
return None;
}
Some(unsafe { &*(req.payload_ptr as *const KernelCmdF32) })
}
extern "C" fn workspace_gpu_contract_handler(
request: *const GpuContractRequest,
_user_ctx: usize,
) -> u32 {
let Some(cmd) = (unsafe { cmd_f32_from_request(request) }) else {
return GpuDispatchStatus::BadPayload as u32;
};
let activation =
match native_neural_network::activations::ActivationKind::from_u8(cmd.activation) {
Some(kind) => kind,
None => return GpuDispatchStatus::BadPayload as u32,
};
if cmd.out_size == 0 || cmd.in_size == 0 || cmd.stride < cmd.out_size {
return GpuDispatchStatus::BadPayload as u32;
}
if cmd.weights_len < cmd.in_size * cmd.out_size || cmd.biases_len < cmd.out_size {
return GpuDispatchStatus::BadPayload as u32;
}
if cmd.src_len < cmd.batch_size * cmd.stride || cmd.dst_len < cmd.batch_size * cmd.stride {
return GpuDispatchStatus::BadPayload as u32;
}
if cmd.src_ptr.is_null()
|| cmd.dst_ptr.is_null()
|| cmd.weights_ptr.is_null()
|| cmd.biases_ptr.is_null()
{
return GpuDispatchStatus::BadPayload as u32;
}
let src = unsafe { core::slice::from_raw_parts(cmd.src_ptr, cmd.src_len) };
let dst = unsafe { core::slice::from_raw_parts_mut(cmd.dst_ptr, cmd.dst_len) };
let weights = unsafe { core::slice::from_raw_parts(cmd.weights_ptr, cmd.weights_len) };
let biases = unsafe { core::slice::from_raw_parts(cmd.biases_ptr, cmd.biases_len) };
for batch in 0..cmd.batch_size {
let src_base = batch * cmd.stride;
let dst_base = batch * cmd.stride;
for output in 0..cmd.out_size {
let mut acc = biases[output];
let row_off = output * cmd.in_size;
for input in 0..cmd.in_size {
acc += weights[row_off + input] * src[src_base + input];
}
dst[dst_base + output] = activation.apply(acc);
}
}
GpuDispatchStatus::Ok as u32
}
pub fn register_workspace_gpu_contract_driver() {
let f32_mask = 1u32 << ((GpuOpCode::F32 as u32) - 1);
register_gpu_command_handler_with_capabilities(workspace_gpu_contract_handler, 0, f32_mask);
}
pub fn register_workspace_gpu_driver() {
register_workspace_gpu_contract_driver();
}
pub fn require_backend_contract(
backend: ComputeBackend,
) -> Result<BackendContractCapabilities, BackendAvailabilityError> {
if backend == ComputeBackend::Cpu {
return Err(BackendAvailabilityError::NoContractForCpu);
}
probe_backend_contract(backend)
.filter(|caps| caps.opcode_mask != 0)
.ok_or(BackendAvailabilityError::BackendUnavailable(backend))
}
pub fn require_active_backend_contract(
) -> Result<BackendContractCapabilities, BackendAvailabilityError> {
require_backend_contract(get_compute_backend())
}