Struct cudarc::cublas::safe::CudaBlas

source ·

pub struct CudaBlas { /* private fields */ }

Expand description

Wrapper around sys::cublasHandle_t

Create with CudaBlas::new()
Execute gemm/gemv kernels with Gemv and Gemm. Both f32 and f64 are supported for both

Note: This maintains a instance of Arc<CudaDevice>, so will prevent the device from being dropped.

Implementations§

source §

impl CudaBlas

source

pub fn new(device: Arc<CudaDevice>) -> Result<Self, CublasError>

Creates a new cublas handle and sets the stream to the device’s stream.

source

pub unsafe fn set_stream( &self, opt_stream: Option<&CudaStream> ) -> Result<(), CublasError>

Sets the handle’s current to either the stream specified, or the device’s default work stream.

Safety

This is unsafe because you can end up scheduling multiple concurrent kernels that all write to the same memory address.

Trait Implementations§

source §

impl Debug for CudaBlas

source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

source §

impl Drop for CudaBlas

source §

fn drop(&mut self)

Executes the destructor for this type. Read more

source §

impl Gemm<f16> for CudaBlas

source §

unsafe fn gemm<A: DevicePtr<f16>, B: DevicePtr<f16>, C: DevicePtrMut<f16>>( &self, cfg: GemmConfig<f16>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

Matrix matrix multiplication. See nvidia docs Read more

source §

unsafe fn gemm_strided_batched<A: DevicePtr<f16>, B: DevicePtr<f16>, C: DevicePtrMut<f16>>( &self, cfg: StridedBatchedConfig<f16>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

Batched matrix multiplication with stride support on batch dimension. See nvidia docs Read more

source §

impl Gemm<f32> for CudaBlas

source §

unsafe fn gemm<A: DevicePtr<f32>, B: DevicePtr<f32>, C: DevicePtrMut<f32>>( &self, cfg: GemmConfig<f32>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

Matrix matrix multiplication. See nvidia docs Read more

source §

unsafe fn gemm_strided_batched<A: DevicePtr<f32>, B: DevicePtr<f32>, C: DevicePtrMut<f32>>( &self, cfg: StridedBatchedConfig<f32>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

Batched matrix multiplication with stride support on batch dimension. See nvidia docs Read more

source §

impl Gemm<f64> for CudaBlas

source §

unsafe fn gemm<A: DevicePtr<f64>, B: DevicePtr<f64>, C: DevicePtrMut<f64>>( &self, cfg: GemmConfig<f64>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

Matrix matrix multiplication. See nvidia docs Read more

source §

unsafe fn gemm_strided_batched<A: DevicePtr<f64>, B: DevicePtr<f64>, C: DevicePtrMut<f64>>( &self, cfg: StridedBatchedConfig<f64>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

Batched matrix multiplication with stride support on batch dimension. See nvidia docs Read more

source §

impl Gemv<f32> for CudaBlas

source §

unsafe fn gemv<A: DevicePtr<f32>, X: DevicePtr<f32>, Y: DevicePtrMut<f32>>( &self, cfg: GemvConfig<f32>, a: &A, x: &X, y: &mut Y ) -> Result<(), CublasError>

Matrix vector multiplication. Read more

source §

impl Gemv<f64> for CudaBlas

source §

unsafe fn gemv<A: DevicePtr<f64>, X: DevicePtr<f64>, Y: DevicePtrMut<f64>>( &self, cfg: GemvConfig<f64>, a: &A, x: &X, y: &mut Y ) -> Result<(), CublasError>

Matrix vector multiplication. Read more

source §

impl Send for CudaBlas

source §

impl Sync for CudaBlas

Auto Trait Implementations§

§

impl RefUnwindSafe for CudaBlas

§

impl Unpin for CudaBlas

§

impl UnwindSafe for CudaBlas

Blanket Implementations§

source §

impl<T> Any for Twhere T: 'static + ?Sized,

source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source §

impl<T> Borrow<T> for Twhere T: ?Sized,

source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source §

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source §

impl<T> From<T> for T

source §

fn from(t: T) -> T

Returns the argument unchanged.

source §

impl<T, U> Into for Twhere U: From<T>,

source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source §

impl<T, U> TryFrom for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.

source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source §

impl<T, U> TryInto for Twhere U: TryFrom<T>,

§

type Error = >::Error

The type returned in the event of a conversion error.

source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct cudarc::cublas::safe::CudaBlas

Implementations§

impl CudaBlas

pub fn new(device: Arc<CudaDevice>) -> Result<Self, CublasError>

pub unsafe fn set_stream( &self, opt_stream: Option<&CudaStream> ) -> Result<(), CublasError>

Safety

Trait Implementations§

impl Debug for CudaBlas

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Drop for CudaBlas

fn drop(&mut self)

impl Gemm<f16> for CudaBlas

unsafe fn gemm<A: DevicePtr<f16>, B: DevicePtr<f16>, C: DevicePtrMut<f16>>( &self, cfg: GemmConfig<f16>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

unsafe fn gemm_strided_batched<A: DevicePtr<f16>, B: DevicePtr<f16>, C: DevicePtrMut<f16>>( &self, cfg: StridedBatchedConfig<f16>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

impl Gemm<f32> for CudaBlas

unsafe fn gemm<A: DevicePtr<f32>, B: DevicePtr<f32>, C: DevicePtrMut<f32>>( &self, cfg: GemmConfig<f32>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

unsafe fn gemm_strided_batched<A: DevicePtr<f32>, B: DevicePtr<f32>, C: DevicePtrMut<f32>>( &self, cfg: StridedBatchedConfig<f32>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

impl Gemm<f64> for CudaBlas

unsafe fn gemm<A: DevicePtr<f64>, B: DevicePtr<f64>, C: DevicePtrMut<f64>>( &self, cfg: GemmConfig<f64>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

unsafe fn gemm_strided_batched<A: DevicePtr<f64>, B: DevicePtr<f64>, C: DevicePtrMut<f64>>( &self, cfg: StridedBatchedConfig<f64>, a: &A, b: &B, c: &mut C ) -> Result<(), CublasError>

impl Gemv<f32> for CudaBlas

unsafe fn gemv<A: DevicePtr<f32>, X: DevicePtr<f32>, Y: DevicePtrMut<f32>>( &self, cfg: GemvConfig<f32>, a: &A, x: &X, y: &mut Y ) -> Result<(), CublasError>

impl Gemv<f64> for CudaBlas

unsafe fn gemv<A: DevicePtr<f64>, X: DevicePtr<f64>, Y: DevicePtrMut<f64>>( &self, cfg: GemvConfig<f64>, a: &A, x: &X, y: &mut Y ) -> Result<(), CublasError>

impl Send for CudaBlas

impl Sync for CudaBlas

Auto Trait Implementations§

impl RefUnwindSafe for CudaBlas

impl Unpin for CudaBlas

impl UnwindSafe for CudaBlas

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>