trueno-gpu 0.4.29

Pure Rust PTX generation for NVIDIA CUDA - no LLVM, no nvcc
Documentation
1
2
3
4
5
6
7
8
9
10
//! DP4A-based Q4_K GEMV Kernels for 4x Instruction Reduction
//!
//! - `Dp4aQ4KGemvKernel`: Basic DP4A implementation
//! - `TrueDp4aQ4KGemvKernel`: Full DP4A with Q8 activations

mod basic;
mod vectorized;

pub use basic::Dp4aQ4KGemvKernel;
pub use vectorized::TrueDp4aQ4KGemvKernel;