mod cpu;
#[cfg(feature = "gpu")]
pub mod gpu;
pub use cpu::CpuBackend;
pub trait Backend {
fn zeros(len: usize) -> Vec<f32>;
fn full(len: usize, value: f32) -> Vec<f32>;
fn add(lhs: &[f32], rhs: &[f32], dst: &mut [f32]);
fn sub(lhs: &[f32], rhs: &[f32], dst: &mut [f32]);
fn mul(lhs: &[f32], rhs: &[f32], dst: &mut [f32]);
fn scale(src: &[f32], dst: &mut [f32], scalar: f32);
fn relu(src: &[f32], dst: &mut [f32]);
fn relu_backward(input: &[f32], out_grad: &[f32], dst: &mut [f32]);
fn matmul(a: &[f32], b: &[f32], dst: &mut [f32], m: usize, k: usize, n: usize);
fn add_bias(matrix: &[f32], bias: &[f32], dst: &mut [f32], m: usize, n: usize);
fn sum_rows(src: &[f32], dst: &mut [f32], m: usize, n: usize);
fn im2col(
src: &[f32], dst: &mut [f32],
c_in: usize, h: usize, w: usize,
k: usize, stride: usize, pad: usize,
out_h: usize, out_w: usize,
);
fn col2im(
src: &[f32], dst: &mut [f32],
c_in: usize, h: usize, w: usize,
k: usize, stride: usize, pad: usize,
out_h: usize, out_w: usize,
);
fn add_channel_bias(
src: &[f32], bias: &[f32], dst: &mut [f32],
channels: usize, spatial: usize,
);
fn sum_channel_bias_grad(
src: &[f32], dst: &mut [f32],
channels: usize, spatial: usize,
);
fn max_pool2d(
src: &[f32], dst: &mut [f32], indices: &mut [f32],
channels: usize, h: usize, w: usize,
k: usize, stride: usize,
out_h: usize, out_w: usize,
);
fn max_pool2d_backward(
out_grad: &[f32], indices: &[f32], dst: &mut [f32],
channels: usize, h: usize, w: usize,
out_h: usize, out_w: usize,
);
fn dropout(
src: &[f32], dst: &mut [f32], mask: &mut [f32],
numel: usize, p: f32, step: u64,
);
fn sigmoid(src: &[f32], dst: &mut [f32]);
fn sigmoid_backward(saved_out: &[f32], out_grad: &[f32], dst: &mut [f32]);
fn tanh_act(src: &[f32], dst: &mut [f32]);
fn tanh_backward(saved_out: &[f32], out_grad: &[f32], dst: &mut [f32]);
fn gelu(src: &[f32], dst: &mut [f32]);
fn gelu_backward(saved_input: &[f32], out_grad: &[f32], dst: &mut [f32]);
fn leaky_relu(src: &[f32], dst: &mut [f32], alpha: f32);
fn leaky_relu_backward(saved_input: &[f32], out_grad: &[f32], dst: &mut [f32], alpha: f32);
fn batch_norm_forward(
input: &[f32], weight: &[f32], bias: &[f32],
running_mean: &mut [f32], running_var: &mut [f32],
output: &mut [f32], save: &mut [f32],
batch: usize, channels: usize, height: usize, width: usize,
epsilon: f32, momentum: f32, is_training: bool,
);
fn batch_norm_backward(
grad_out: &[f32], input: &[f32], weight: &[f32], save: &[f32],
grad_input: &mut [f32],
batch: usize, channels: usize, height: usize, width: usize,
);
fn adaptive_avg_pool2d(
input: &[f32], output: &mut [f32],
batch: usize, channels: usize, h_in: usize, w_in: usize, h_out: usize, w_out: usize,
);
fn adaptive_avg_pool2d_backward(
grad_out: &[f32], grad_in: &mut [f32],
batch: usize, channels: usize, h_in: usize, w_in: usize, h_out: usize, w_out: usize,
);
fn bmm(a: &[f32], b: &[f32], dst: &mut [f32], batch: usize, m: usize, k: usize, n: usize);
fn softmax_forward(input: &[f32], output: &mut [f32], num_rows: usize, row_size: usize);
fn softmax_backward(saved_out: &[f32], grad_out: &[f32], grad_in: &mut [f32], num_rows: usize, row_size: usize);
fn layer_norm_forward(
input: &[f32], weight: &[f32], bias: &[f32],
output: &mut [f32], save_mean_invstd: &mut [f32],
num_instances: usize, norm_size: usize, epsilon: f32,
);
fn layer_norm_backward(
grad_out: &[f32], input: &[f32], weight: &[f32],
save_mean_invstd: &[f32], grad_input: &mut [f32],
num_instances: usize, norm_size: usize,
);
fn embedding_forward(
indices: &[f32], weight: &[f32], output: &mut [f32],
total_lookups: usize, embed_dim: usize,
);
fn embedding_backward(
grad_out: &[f32], indices: &[f32], grad_weight: &mut [f32],
total_lookups: usize, embed_dim: usize,
);
fn cross_entropy_forward(
logits: &[f32], targets: &[f32],
grad: &mut [f32], loss_per_b: &mut [f32],
batch: usize, num_classes: usize,
);
}