cubecl-runtime 0.5.0

Crate that helps creating high performance async runtimes for CubeCL.
Documentation
use core::future::Future;
use cubecl_common::{ExecutionMode, benchmark::ProfileDuration};

use crate::{
    server::{Binding, BindingWithMeta, Bindings, ComputeServer, CubeCount, Handle},
    storage::{BindingResource, ComputeStorage},
};
use alloc::vec::Vec;

/// The ComputeChannel trait links the ComputeClient to the ComputeServer
/// while ensuring thread-safety
pub trait ComputeChannel<Server: ComputeServer>: Clone + core::fmt::Debug + Send + Sync {
    /// Given bindings, returns owned resources as bytes
    fn read(&self, bindings: Vec<Binding>) -> impl Future<Output = Vec<Vec<u8>>> + Send;

    /// Given bindings, returns owned resources as bytes
    fn read_tensor(
        &self,
        bindings: Vec<BindingWithMeta>,
    ) -> impl Future<Output = Vec<Vec<u8>>> + Send;

    /// Given a resource handle, return the storage resource.
    fn get_resource(
        &self,
        binding: Binding,
    ) -> BindingResource<<Server::Storage as ComputeStorage>::Resource>;

    /// Given a resource as bytes, stores it and returns the resource handle
    fn create(&self, data: &[u8]) -> Handle;

    /// Given a resource as bytes and a shape, stores it and returns the tensor handle
    fn create_tensor(&self, data: &[u8], shape: &[usize], elem_size: usize)
    -> (Handle, Vec<usize>);

    /// Reserves `size` bytes in the storage, and returns a handle over them
    fn empty(&self, size: usize) -> Handle;

    /// Reserves a tensor with `shape` in the storage, and returns a handle to it
    fn empty_tensor(&self, shape: &[usize], elem_size: usize) -> (Handle, Vec<usize>);

    /// Executes the `kernel` over the given `bindings`.
    ///
    /// # Safety
    ///
    /// When executing with mode [ExecutionMode::Unchecked], out-of-bound reads and writes can happen.
    unsafe fn execute(
        &self,
        kernel: Server::Kernel,
        count: CubeCount,
        bindings: Bindings,
        mode: ExecutionMode,
    );

    /// Flush outstanding work of the server.
    fn flush(&self);

    /// Wait for the completion of every task in the server.
    fn sync(&self) -> impl Future<Output = ()> + Send;

    /// Get the current memory usage of the server.
    fn memory_usage(&self) -> crate::memory_management::MemoryUsage;

    /// Ask the server to release memory that it can release.
    fn memory_cleanup(&self);

    /// Start a profile on the server. This allows you to profile kernels.
    ///
    /// This will measure execution time either by measuring the 'full' execution time by synchronizing
    /// the execution at the start and the end of the profile, or 'device' time by using device timestamps.
    /// This function will handle any required synchronization.
    ///
    /// Recursive profiling is not allowed and will panic.
    fn start_profile(&self);

    /// End the profile and return a [`ProfileDuration`].
    ///
    /// You can retrieve the Duration of the client profile asynchronously. This function will handle any required synchronization.
    fn end_profile(&self) -> ProfileDuration;
}