cublas-hgemm 0.1.0

HGEMM kernels with Tensor Core support built on cuda-oxide
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// HGEMM Tensor Core: WMMA/MMA instruction implementation
//
// Uses Tensor Core via WMMA (Warp Matrix Multiply-Accumulate) or
// MMA (Matrix Multiply-Accumulate) PTX instructions.

use cublas_core::GemmConfig;
use half::f16;

/// Tensor Core HGEMM kernel launch (f16).
pub fn hgemm_tensor_core(
    config: &GemmConfig<f16>,
    a: &[f16],
    b: &[f16],
    c: &mut [f16],
) {
    let _ = (config, a, b, c);
    todo!("launch Tensor Core HGEMM kernel")
}