use crate::activation::Activation;
pub mod opcodes {
pub const DOT: u32 = 0x01;
pub const ACTIVATION: u32 = 0x02;
pub const ELEM_WISE_ADD: u32 = 0x03;
pub const ELEM_WISE_MUL: u32 = 0x04;
pub const COPY: u32 = 0x05;
}
pub mod activation_types {
pub const NONE: u32 = 0x00;
pub const RELU: u32 = 0x01;
pub const SIGMOID: u32 = 0x02;
pub const SOFTMAX: u32 = 0x03;
pub const TANH: u32 = 0x04;
pub const SQRT: u32 = 0x05;
pub const LOG: u32 = 0x06;
pub const LOG10: u32 = 0x07;
pub const INVERSE: u32 = 0x08;
pub const GELU: u32 = 0x09;
pub const SOFTPLUS: u32 = 0x0A;
pub const EXP: u32 = 0x0B;
pub const SIGN: u32 = 0x0C;
}
pub fn activation_to_gpu(activation: Option<Activation>) -> u32 {
match activation {
None => activation_types::NONE,
Some(Activation::Relu) => activation_types::RELU,
Some(Activation::Sigmoid) => activation_types::SIGMOID,
Some(Activation::Softmax) => activation_types::SOFTMAX,
Some(Activation::Tanh) => activation_types::TANH,
Some(Activation::Sqrt) => activation_types::SQRT,
Some(Activation::Log) => activation_types::LOG,
Some(Activation::Log10) => activation_types::LOG10,
Some(Activation::Inverse) => activation_types::INVERSE,
Some(Activation::Gelu) => activation_types::GELU,
Some(Activation::Softplus) => activation_types::SOFTPLUS,
Some(Activation::Exp) => activation_types::EXP,
Some(Activation::Sign) => activation_types::SIGN,
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Default, bytemuck::Pod, bytemuck::Zeroable)]
pub struct GpuInstruction {
pub opcode: u32,
pub input_ptr: u32,
pub output_ptr: u32,
pub data_size: u32,
pub param0: u32,
pub param1: u32,
pub param2: u32,
pub reserved: u32,
}
impl GpuInstruction {
pub const SIZE_BYTES: usize = 32;
pub const SIZE_U32S: usize = 8;
pub fn dot(
input_ptr: u32,
output_ptr: u32,
output_size: u32,
weights_offset: u32,
input_size: u32,
activation: Option<Activation>,
) -> Self {
Self {
opcode: opcodes::DOT,
input_ptr,
output_ptr,
data_size: output_size,
param0: weights_offset,
param1: input_size,
param2: activation_to_gpu(activation),
reserved: 0,
}
}
pub fn activation(ptr: u32, size: u32, activation: Activation) -> Self {
Self {
opcode: opcodes::ACTIVATION,
input_ptr: ptr,
output_ptr: ptr,
data_size: size,
param0: 0,
param1: 0,
param2: activation_to_gpu(Some(activation)),
reserved: 0,
}
}
pub fn elem_wise_add(ptr: u32, size: u32, params_offset: u32) -> Self {
Self {
opcode: opcodes::ELEM_WISE_ADD,
input_ptr: ptr,
output_ptr: ptr,
data_size: size,
param0: params_offset,
param1: 0,
param2: 0,
reserved: 0,
}
}
pub fn elem_wise_mul(ptr: u32, size: u32, params_offset: u32) -> Self {
Self {
opcode: opcodes::ELEM_WISE_MUL,
input_ptr: ptr,
output_ptr: ptr,
data_size: size,
param0: params_offset,
param1: 0,
param2: 0,
reserved: 0,
}
}
pub fn copy(src_ptr: u32, dst_ptr: u32, size: u32) -> Self {
Self {
opcode: opcodes::COPY,
input_ptr: src_ptr,
output_ptr: dst_ptr,
data_size: size,
param0: 0,
param1: 0,
param2: 0,
reserved: 0,
}
}
pub fn to_f32_array(&self) -> [f32; Self::SIZE_U32S] {
[
f32::from_bits(self.opcode),
f32::from_bits(self.input_ptr),
f32::from_bits(self.output_ptr),
f32::from_bits(self.data_size),
f32::from_bits(self.param0),
f32::from_bits(self.param1),
f32::from_bits(self.param2),
f32::from_bits(self.reserved),
]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gpu_instruction_size() {
assert_eq!(
std::mem::size_of::<GpuInstruction>(),
GpuInstruction::SIZE_BYTES
);
}
#[test]
fn test_dot_instruction() {
let inst = GpuInstruction::dot(0, 10, 5, 100, 8, Some(Activation::Relu));
assert_eq!(inst.opcode, opcodes::DOT);
assert_eq!(inst.input_ptr, 0);
assert_eq!(inst.output_ptr, 10);
assert_eq!(inst.data_size, 5);
assert_eq!(inst.param0, 100);
assert_eq!(inst.param1, 8);
assert_eq!(inst.param2, activation_types::RELU);
}
#[test]
fn test_activation_to_gpu() {
assert_eq!(activation_to_gpu(None), activation_types::NONE);
assert_eq!(
activation_to_gpu(Some(Activation::Relu)),
activation_types::RELU
);
assert_eq!(
activation_to_gpu(Some(Activation::Sigmoid)),
activation_types::SIGMOID
);
assert_eq!(
activation_to_gpu(Some(Activation::Softmax)),
activation_types::SOFTMAX
);
assert_eq!(
activation_to_gpu(Some(Activation::Tanh)),
activation_types::TANH
);
assert_eq!(
activation_to_gpu(Some(Activation::Sqrt)),
activation_types::SQRT
);
assert_eq!(
activation_to_gpu(Some(Activation::Log)),
activation_types::LOG
);
assert_eq!(
activation_to_gpu(Some(Activation::Log10)),
activation_types::LOG10
);
assert_eq!(
activation_to_gpu(Some(Activation::Inverse)),
activation_types::INVERSE
);
assert_eq!(
activation_to_gpu(Some(Activation::Gelu)),
activation_types::GELU
);
assert_eq!(
activation_to_gpu(Some(Activation::Softplus)),
activation_types::SOFTPLUS
);
}
#[test]
fn test_to_f32_array_roundtrip() {
let inst = GpuInstruction::dot(42, 100, 16, 500, 32, Some(Activation::Sigmoid));
let f32_array = inst.to_f32_array();
assert_eq!(f32_array[0].to_bits(), opcodes::DOT);
assert_eq!(f32_array[1].to_bits(), 42);
assert_eq!(f32_array[2].to_bits(), 100);
assert_eq!(f32_array[3].to_bits(), 16);
assert_eq!(f32_array[4].to_bits(), 500);
assert_eq!(f32_array[5].to_bits(), 32);
assert_eq!(f32_array[6].to_bits(), activation_types::SIGMOID);
assert_eq!(f32_array[7].to_bits(), 0);
}
}