use scivex_core::Tensor;
use scivex_core::random::Rng;
use scivex_gpu::{GpuDevice, GpuTensor};
use super::variable::GpuVariable;
use crate::error::Result;
pub trait GpuLayer {
fn forward(&self, x: &GpuVariable) -> Result<GpuVariable>;
fn parameters(&self) -> Vec<GpuVariable>;
fn train(&mut self);
fn eval(&mut self);
}
fn gpu_transpose_var(x: &GpuVariable) -> Result<GpuVariable> {
let transposed = x.with_data(scivex_gpu::ops::transpose)?;
Ok(GpuVariable::from_op(
transposed,
vec![x.clone()],
Box::new(|grad: &GpuTensor| {
vec![scivex_gpu::ops::transpose(grad).expect("transpose in backward")]
}),
))
}
fn gpu_matmul_var(a: &GpuVariable, b: &GpuVariable) -> Result<GpuVariable> {
let result = {
let a_ref = a.clone();
let b_ref = b.clone();
a_ref.with_data(|a_data| b_ref.with_data(|b_data| scivex_gpu::ops::matmul(a_data, b_data)))
}?;
let a_clone = a.clone();
let b_clone = b.clone();
Ok(GpuVariable::from_op(
result,
vec![a.clone(), b.clone()],
Box::new(move |grad: &GpuTensor| {
let bt = b_clone
.with_data(scivex_gpu::ops::transpose)
.expect("transpose b in backward");
let grad_a = scivex_gpu::ops::matmul(grad, &bt).expect("matmul grad_a in backward");
let at = a_clone
.with_data(scivex_gpu::ops::transpose)
.expect("transpose a in backward");
let grad_b = scivex_gpu::ops::matmul(&at, grad).expect("matmul grad_b in backward");
vec![grad_a, grad_b]
}),
))
}
fn gpu_add_bias_var(input: &GpuVariable, bias: &GpuVariable) -> Result<GpuVariable> {
let input_cpu = input.data_cpu()?;
let bias_cpu = bias.data_cpu()?;
let batch = input_cpu.shape()[0];
let features = input_cpu.shape()[1];
let in_slice = input_cpu.as_slice();
let b_slice = bias_cpu.as_slice();
let mut out = Vec::with_capacity(batch * features);
for row in 0..batch {
for col in 0..features {
out.push(in_slice[row * features + col] + b_slice[col]);
}
}
let out_tensor = Tensor::from_vec(out, vec![batch, features]).expect("bias add output shape");
let device = input.device();
let out_gpu = GpuTensor::from_tensor(&device, &out_tensor);
let batch_size = batch;
Ok(GpuVariable::from_op(
out_gpu,
vec![input.clone(), bias.clone()],
Box::new(move |grad: &GpuTensor| {
let grad_cpu = grad.to_tensor().expect("download grad in bias backward");
let g_slice = grad_cpu.as_slice();
let feats = grad_cpu.shape()[1];
let dev = grad.device().clone();
let grad_input = GpuTensor::from_tensor(&dev, &grad_cpu);
let mut bias_grad = vec![0.0f32; feats];
for row in 0..batch_size {
for col in 0..feats {
bias_grad[col] += g_slice[row * feats + col];
}
}
let bias_grad_tensor =
Tensor::from_vec(bias_grad, vec![feats]).expect("bias grad shape");
let grad_bias = GpuTensor::from_tensor(&dev, &bias_grad_tensor);
vec![grad_input, grad_bias]
}),
))
}
pub struct GpuLinear {
weight: GpuVariable,
bias: Option<GpuVariable>,
}
impl GpuLinear {
pub fn new(
device: &GpuDevice,
in_features: usize,
out_features: usize,
use_bias: bool,
rng: &mut Rng,
) -> Self {
let w_data = crate::init::kaiming_uniform::<f32>(&[out_features, in_features], rng);
let w_gpu = GpuTensor::from_tensor(device, &w_data);
let weight = GpuVariable::new(w_gpu, true);
let bias = if use_bias {
let b_data = Tensor::<f32>::zeros(vec![out_features]);
let b_gpu = GpuTensor::from_tensor(device, &b_data);
Some(GpuVariable::new(b_gpu, true))
} else {
None
};
Self { weight, bias }
}
pub fn weight(&self) -> &GpuVariable {
&self.weight
}
pub fn bias(&self) -> Option<&GpuVariable> {
self.bias.as_ref()
}
}
impl GpuLayer for GpuLinear {
fn forward(&self, x: &GpuVariable) -> Result<GpuVariable> {
let wt_var = gpu_transpose_var(&self.weight)?;
let y = gpu_matmul_var(x, &wt_var)?;
match &self.bias {
Some(b) => Ok(gpu_add_bias_var(&y, b)?),
None => Ok(y),
}
}
fn parameters(&self) -> Vec<GpuVariable> {
let mut params = vec![self.weight.clone()];
if let Some(b) = &self.bias {
params.push(b.clone());
}
params
}
fn train(&mut self) {}
fn eval(&mut self) {}
}