mod cpu;
#[cfg(feature = "gpu")]
pub mod gpu;
pub use cpu::CpuBackend;
pub trait Backend {
fn zeros(len: usize) -> Vec<f32>;
fn full(len: usize, value: f32) -> Vec<f32>;
fn add(lhs: &[f32], rhs: &[f32], dst: &mut [f32]);
fn sub(lhs: &[f32], rhs: &[f32], dst: &mut [f32]);
fn mul(lhs: &[f32], rhs: &[f32], dst: &mut [f32]);
fn scale(src: &[f32], dst: &mut [f32], scalar: f32);
fn relu(src: &[f32], dst: &mut [f32]);
fn relu_backward(input: &[f32], out_grad: &[f32], dst: &mut [f32]);
fn matmul(a: &[f32], b: &[f32], dst: &mut [f32], m: usize, k: usize, n: usize);
fn add_bias(matrix: &[f32], bias: &[f32], dst: &mut [f32], m: usize, n: usize);
fn sum_rows(src: &[f32], dst: &mut [f32], m: usize, n: usize);
fn im2col(
src: &[f32], dst: &mut [f32],
c_in: usize, h: usize, w: usize,
k: usize, stride: usize, pad: usize,
out_h: usize, out_w: usize,
);
fn col2im(
src: &[f32], dst: &mut [f32],
c_in: usize, h: usize, w: usize,
k: usize, stride: usize, pad: usize,
out_h: usize, out_w: usize,
);
fn add_channel_bias(
src: &[f32], bias: &[f32], dst: &mut [f32],
channels: usize, spatial: usize,
);
fn sum_channel_bias_grad(
src: &[f32], dst: &mut [f32],
channels: usize, spatial: usize,
);
fn max_pool2d(
src: &[f32], dst: &mut [f32], indices: &mut [f32],
channels: usize, h: usize, w: usize,
k: usize, stride: usize,
out_h: usize, out_w: usize,
);
fn max_pool2d_backward(
out_grad: &[f32], indices: &[f32], dst: &mut [f32],
channels: usize, h: usize, w: usize,
out_h: usize, out_w: usize,
);
fn dropout(
src: &[f32], dst: &mut [f32], mask: &mut [f32],
numel: usize, p: f32, step: u64,
);
fn cross_entropy_forward(
logits: &[f32], targets: &[f32],
grad: &mut [f32], loss_per_b: &mut [f32],
batch: usize, num_classes: usize,
);
}