mod conv2d;
mod pool2d;
mod gemm;
use crate::tensor::*;
use crate::backend::*;
use self::conv2d::*;
use self::pool2d::*;
use self::gemm::*;
use core::fmt;
use core::fmt::Write;
use rand_distr::{Normal, Distribution};
pub struct NativeTensorF32 {
shape: TensorShape,
ptr: Option<Box<[f32]>>
}
impl NativeTensorF32 {
pub fn read(&self) -> &[f32] {
self.ptr.as_ref().unwrap()
}
pub fn write(&mut self) -> &mut [f32] {
if self.ptr.is_none() {
self.ptr = Some(vec![0.0; self.shape.size()].into_boxed_slice());
}
return self.ptr.as_mut().unwrap()
}
}
impl Tensor<f32> for NativeTensorF32 {
fn new<S: Into<TensorShape>>(shape: S) -> Self {
NativeTensorF32 {
shape: shape.into(),
ptr: None,
}
}
fn shape(&self) -> &TensorShape {
&self.shape
}
fn resize(&mut self, shape: TensorShape) {
self.ptr = if let Some(ptr) = self.ptr.take() {
let size = self.shape.size();
let raw = Box::into_raw(ptr) as *mut f32;
let mut data = unsafe {Vec::from_raw_parts(raw, size, size)};
data.resize(shape.size(), 0.0);
Some(data.into_boxed_slice())
} else {
None
};
self.shape = shape;
}
}
pub struct Native;
impl Native {
fn fmt_tensor(&self, t: &NativeTensorF32, f: &mut String) -> fmt::Result {
let strides = t.shape.default_strides();
let last_idx = strides.dims - 1;
writeln!(f, "default stridses {} {}", t.shape.default_strides(), last_idx)?;
write!(f, "Tensor(shape={}, data=[", t.shape)?;
for (idx, val) in t.read().iter().enumerate() {
let is_first = idx == 0;
let mut need_nl = false;
let padding = 2;
for (sidx, s) in strides.iter().enumerate() {
if sidx != last_idx && idx % s as usize == 0 {
need_nl = true;
}
}
if !is_first {
write!(f, ", ")?;
}
if need_nl {
write!(f, "\n{}", " ".repeat(padding))?;
}
write!(f, "{}", val)?;
}
writeln!(f, "\n])")?;
Ok(())
}
}
impl Backend<f32> for Native {
type Tensor = NativeTensorF32;
fn store_tensor_f32(&self, t: &Self::Tensor, data: &mut [f32]) {
let size = t.shape().size();
assert!(data.len() >= size);
let dst = t.read();
for i in 0 .. size {
data[i] = dst[i] as f32;
}
}
fn load_tensor_u8(&self, t: &mut Self::Tensor, data: &[u8]) {
let size = t.shape().size();
assert!(data.len() >= size);
let dst = &mut t.write()[0..size];
for i in 0 .. size {
dst[i] = data[i] as f32;
}
}
fn load_tensor_f32(&self, t: &mut Self::Tensor, data: &[f32]) {
let size = t.shape().size();
assert!(data.len() >= size);
let dst = &mut t.write()[0..size];
for i in 0 .. size {
dst[i] = data[i];
}
}
#[inline]
fn scalar_f32(&self, val: f32) -> f32 {
val
}
#[inline]
fn fill_scalar(&self, t: &mut Self::Tensor, scalar: f32) {
let size = t.shape().size();
let dst = t.write();
for i in 0 .. size {
dst[i] = scalar;
}
}
#[inline]
fn fill_random(&self, t: &mut Self::Tensor, from: f32, to: f32) {
let seed = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut rng: rand::rngs::StdRng = rand::SeedableRng::from_seed(seed);
let normal = Normal::new(from, to).unwrap();
let size = t.shape().size();
let dst = t.write();
for i in 0 .. size {
dst[i] = normal.sample(&mut rng);
}
}
fn print_tensor(&self, t: &Self::Tensor) {
let mut s = String::new();
self.fmt_tensor(t, &mut s).unwrap();
println!("{}", s);
}
}
impl BackendGemm<f32> for Native {
fn matmul(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) {
let a_shape = a.shape();
let b_shape = b.shape();
let c_shape = dst.shape().clone();
assert_eq!(a_shape.get(0), c_shape.get(0));
assert_eq!(b_shape.get(1), c_shape.get(1));
assert_eq!(a_shape.dims, 2);
assert_eq!(b_shape.dims, 2);
let m = a_shape.get(0) as usize;
let n = b_shape.get(1) as usize;
let k = b_shape.get(0) as usize;
gemm(
false, false,
m, n, k,
1.0,
a.read(), k,
b.read(), n,
0.0,
&mut dst.write(), n
);
}
fn matmul_nt(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) {
let a_shape = a.shape();
let b_shape = b.shape();
let c_shape = dst.shape().clone();
assert_eq!(a_shape.get(0), c_shape.get(0));
assert_eq!(b_shape.get(0), c_shape.get(1));
assert_eq!(a_shape.dims, 2);
assert_eq!(b_shape.dims, 2);
let m = a_shape.get(0) as usize;
let n = b_shape.get(0) as usize;
let k = b_shape.get(1) as usize;
gemm(false, true,
m, n, k,
1.0,
a.read(), k,
b.read(), k,
0.0,
&mut dst.write(), n);
}
fn matmul_tn(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) {
let a_shape = a.shape();
let b_shape = b.shape();
let c_shape = dst.shape().clone();
assert_eq!(a_shape.get(1), c_shape.get(0));
assert_eq!(b_shape.get(1), c_shape.get(1));
assert_eq!(a_shape.dims, 2);
assert_eq!(b_shape.dims, 2);
let m = a_shape.get(1) as usize;
let n = b_shape.get(1) as usize;
let k = b_shape.get(0) as usize;
gemm(true, false,
m, n, k,
1.0,
a.read(), m,
b.read(), n,
0.0,
&mut dst.write(), n);
}
fn matmul_tt(&self, _dst: &mut Self::Tensor, _a: &Self::Tensor, _b: &Self::Tensor) {
unimplemented!();
}
}
impl BackendSigmoid<f32> for Native {
fn sigmoid(&self, dst: &mut Self::Tensor, data: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(dst.shape() == data.shape());
let data_s = &data.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] = 1.0 / (1.0 + (-data_s[i]).exp());
}
}
fn sigmoid_grad(&self, dst: &mut Self::Tensor, z: &Self::Tensor, d: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(dst.shape() == z.shape());
assert!(dst.shape() == d.shape());
let z_s = &z.read()[0 .. dst_size];
let d_s = &d.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] = (z_s[i] * (1.0 - z_s[i])) * d_s[i];
}
}
}
impl BackendReLu<f32> for Native {
fn relu(&self, dst: &mut Self::Tensor, data: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(dst.shape() == data.shape());
let data_s = &data.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
let val = if data_s[i] > 0.0 {
data_s[i]
} else {
0.0
};
dst_s[i] = val;
}
}
fn relu_grad(&self, dst: &mut Self::Tensor, z: &Self::Tensor, d: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(dst.shape() == z.shape());
assert!(dst.shape() == d.shape());
let z_s = &z.read()[0 .. dst_size];
let d_s = &d.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] = if z_s[i] > 0.0 {
d_s[i]
} else {
0.0
};
}
}
}
impl BackendBias<f32> for Native {
fn bias_add(&self, dst: &mut Self::Tensor, biases: &Self::Tensor) {
let biases_shape = biases.shape();
let dst_shape = dst.shape().clone();
let biases_size = biases_shape.get(0) as usize;
let dst_size = dst_shape.size();
assert!(dst_shape.get(dst_shape.dims - 1) as usize == biases_size);
let batch_size = dst_shape.get(0) as usize;
let biases_s = &biases.read()[0 .. biases_size];
let dst_s = &mut dst.write()[0 .. dst_size];
let mut inner = 1usize;
for (idx, i) in dst_shape.as_slice().iter().enumerate() {
if idx == 0 || idx == dst_shape.dims - 1 {
continue;
}
inner *= *i as usize;
}
for b in 0 .. batch_size {
for i in 0..inner {
for l in 0..biases_size {
let offset = b * (inner * biases_size) + i * biases_size + l;
dst_s[offset] += biases_s[l];
}
}
}
}
fn bias_grad(&self, dbiases: &mut Self::Tensor, deltas: &Self::Tensor) {
let dbiases_shape = dbiases.shape();
let deltas_shape = deltas.shape();
let dbiases_size = dbiases_shape.get(0) as usize;
let deltas_size = deltas_shape.size();
assert!(deltas_shape.get(deltas_shape.dims - 1) as usize == dbiases_size);
let batch_size = deltas_shape.get(0) as usize;
let dbiases_s = &mut dbiases.write()[0 .. dbiases_size];
let deltas_s = &deltas.read()[0 .. deltas_size];
let mut inner = 1usize;
for (idx, i) in deltas_shape.as_slice().iter().enumerate() {
if idx == 0 || idx == deltas_shape.dims - 1 {
continue;
}
inner *= *i as usize;
}
for b in 0 .. batch_size {
for l in 0 .. dbiases_size {
let mut bias_grad = 0.0;
for i in 0 .. inner {
let offset = b * (inner * dbiases_size) + i * dbiases_size + l;
bias_grad += deltas_s[offset];
}
dbiases_s[l] = bias_grad;
}
}
}
}
impl BackendScale<f32> for Native {
fn scale(&self, dst: &mut Self::Tensor, scale: f32) {
let dst_size = dst.shape().size();
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] *= scale;
}
}
}
impl BackendMse<f32> for Native {
fn scaled_square_diff(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor, scale: f32) {
let a_size = a.shape().size();
let b_size = b.shape().size();
let dst_size = dst.shape().size();
assert_eq!(a_size, dst_size);
assert_eq!(b_size, dst_size);
let a_s = &a.read()[0 .. dst_size];
let b_s = &b.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
let diff = a_s[i] - b_s[i];
dst_s[i] = scale * diff * diff;
}
}
fn scaled_diff(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor, scale: f32) {
let a_size = a.shape().size();
let b_size = b.shape().size();
let dst_size = dst.shape().size();
assert_eq!(a_size, dst_size);
assert_eq!(b_size, dst_size);
let a_s = &a.read()[0 .. dst_size];
let b_s = &b.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] = scale * (a_s[i] - b_s[i]);
}
}
}
impl BackendAxpy<f32> for Native {
default fn axpy(&self, dst: &mut Self::Tensor, scale: f32, a: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(a.shape() == dst.shape());
let a_s = &a.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] += scale * a_s[i];
}
}
}
impl BackendAxpys<f32> for Native {
fn axpys(&self, dst: &mut Self::Tensor, scale: f32, a: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(a.shape() == dst.shape());
let a_s = &a.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] += scale * a_s[i] * a_s[i];
}
}
}
impl BackendAdd<f32> for Native {
fn add(&self, dst: &mut Self::Tensor, a: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(a.shape() == dst.shape());
let a_s = &a.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] += a_s[i];
}
}
}
impl BackendSub<f32> for Native {
fn sub(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) {
let a_size = a.shape().size();
let b_size = b.shape().size();
let dst_size = dst.shape().size();
assert_eq!(dst_size, a_size);
assert_eq!(dst_size, b_size);
let a_s = &a.read()[0 .. dst_size];
let b_s = &b.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] = a_s[i] - b_s[i];
}
}
}
impl BackendMul<f32> for Native {
fn mul(&self, dst: &mut Self::Tensor, a: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(a.shape() == dst.shape());
let a_s = &a.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] *= a_s[i];
}
}
}
impl BackendCopy<f32> for Native {
fn copy(&self, dst: &mut Self::Tensor, a: &Self::Tensor) {
let size = dst.shape().size();
assert!(a.shape().size() == size);
let a_s = &a.read()[0 .. size];
let dst_s = &mut dst.write()[0 .. size];
for i in 0 .. size {
dst_s[i] = a_s[i];
}
}
}
impl BackendMaximum<f32> for Native {
fn maximum(&self, dst: &mut Self::Tensor, a: &Self::Tensor) {
let dst_size = dst.shape().size();
assert!(a.shape() == dst.shape());
let a_s = &a.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] = f32::max(a_s[i], dst_s[i]);
}
}
}
impl BackendAdam<f32> for Native {
fn adam_p(&self, dst: &mut Self::Tensor, lr: f32, moms: &Self::Tensor, vels: &Self::Tensor, eps: f32) {
let dst_size = dst.shape().size();
assert!(moms.shape() == dst.shape());
assert!(vels.shape() == dst.shape());
let moms_s = &moms.read()[0 .. dst_size];
let vels_s = &vels.read()[0 .. dst_size];
let dst_s = &mut dst.write()[0 .. dst_size];
for i in 0 .. dst_size {
dst_s[i] += lr * moms_s[i] / (vels_s[i].sqrt() + eps)
}
}
}
impl BackendSoftmax<f32> for Native {
fn softmax(&self, y: &mut Self::Tensor, x: &Self::Tensor) {
let y_shape = y.shape();
let x_shape = x.shape();
let size = y_shape.size();
let axis = y_shape.last_axis() as usize;
assert!(y_shape == x_shape);
let x_s = &x.read()[0 .. size];
let y_s = &mut y.write()[0 .. size];
for i in 0..size {
y_s[i] = x_s[i];
}
for i in (0..size).step_by(axis as usize) {
assert!(i + (axis - 1) < size);
let mut max_x = core::f32::NEG_INFINITY;
for j in 0..axis {
let val = x_s[i + j];
if val > max_x {
max_x = val;
}
}
for j in 0..axis {
let offset = i + j;
y_s[offset] = (y_s[offset] - max_x).exp();
}
let mut sum = 0.0;
for j in 0..axis {
sum += y_s[i + j];
}
let rsum = 1.0 / sum;
for j in 0..axis {
y_s[i + j] *= rsum;
}
}
}
}
impl BackendConv2d<f32> for Native {
type Context = ();
fn conv2d_forward(&self, y: &mut Self::Tensor, x: &Self::Tensor, w: &Self::Tensor, conv_info: &Conv2dInfo) {
let x_shape = &x.shape().as_slice()[0..4];
let y_shape = &y.shape().as_slice()[0..4];
let w_shape = &w.shape().as_slice()[0..3];
assert_eq!(x_shape[0], y_shape[0]);
let batch_size = x_shape[0] as isize;
let y_channels = y_shape[1] as isize;
let x_channels = x_shape[1] as isize;
let x_height = x_shape[2] as isize;
let x_width = x_shape[3] as isize;
let filter_height = w_shape[1] as isize;
let filter_width = w_shape[2] as isize;
let (stride_y, stride_x) = conv_info.strides;
let _padding = conv_info.padding;
self.fill_scalar(y, 0.0);
if filter_height == 3 && filter_width == 3 {
conv2d_forward_3x3(
y.write(), x.read(), w.read(),
batch_size, x_channels, y_channels,
x_height, x_width, stride_y as isize, stride_x as isize
)
} else if filter_height == 5 && filter_width == 5 {
conv2d_forward_5x5(
y.write(), x.read(), w.read(),
batch_size, x_channels, y_channels,
x_height, x_width, stride_y as isize, stride_x as isize
)
} else {
conv2d_forward(
y.write(), x.read(), w.read(),
batch_size, x_channels, y_channels,
x_height, x_width, filter_height, filter_width,
stride_y as isize, stride_x as isize
)
}
}
fn conv2d_backward_input(&self, dx: &mut Self::Tensor, dy: &Self::Tensor, w: &Self::Tensor, conv_info: &Conv2dInfo) {
let dx_shape = &dx.shape().as_slice()[0..4];
let dy_shape = &dy.shape().as_slice()[0..4];
let w_shape = &w.shape().as_slice()[0..3];
assert_eq!(dx_shape[0], dy_shape[0]);
let batch_size = dx_shape[0] as isize;
let dy_channels = dy_shape[1] as isize;
let dy_height = dy_shape[2] as isize;
let dy_width = dy_shape[3] as isize;
let dx_channels = dx_shape[1] as isize;
let filter_height = w_shape[1] as isize;
let filter_width = w_shape[2] as isize;
let _padding = conv_info.padding;
let (stride_y, stride_x) = conv_info.strides;
self.fill_scalar(dx, 0.0);
if filter_height == 3 && filter_width == 3 {
conv2d_backward_3x3(
dx.write(), dy.read(), w.read(),
batch_size, dx_channels, dy_channels,
dy_height, dy_width,
stride_y as isize, stride_x as isize
)
} else if filter_height == 5 && filter_width == 5 {
conv2d_backward_5x5(
dx.write(), dy.read(), w.read(),
batch_size, dx_channels, dy_channels,
dy_height, dy_width,
stride_y as isize, stride_x as isize
)
} else {
conv2d_backward(
dx.write(), dy.read(), w.read(),
batch_size, dx_channels, dy_channels,
dy_height, dy_width,
filter_height, filter_width,
stride_y as isize, stride_x as isize
)
}
}
fn conv2d_backward_filter(&self, dw: &mut Self::Tensor, x: &Self::Tensor, dy: &Self::Tensor, conv_info: &Conv2dInfo) {
let x_shape = &x.shape().as_slice()[0..4];
let dy_shape = &dy.shape().as_slice()[0..4];
assert_eq!(x_shape[0], dy_shape[0]);
let batch_size = x_shape[0] as isize;
let dy_channels = dy_shape[1] as isize;
let dy_height = dy_shape[2] as isize;
let dy_width = dy_shape[3] as isize;
let x_channels = x_shape[1] as isize;
let x_height = x_shape[2] as isize;
let x_width = x_shape[3] as isize;
let _padding = conv_info.padding;
let (stride_y, stride_x) = conv_info.strides;
self.fill_scalar(dw, 0.0);
conv2d_grads(
dw.write(), x.read(), dy.read(),
batch_size, x_channels, dy_channels,
x_height, x_width, dy_height, dy_width,
stride_y as isize, stride_x as isize
)
}
}
impl BackendMaxPool2d<f32> for Native {
fn max_pool2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) {
let x_shape = &x.shape().as_slice()[0..4];
let y_shape = &y.shape().as_slice()[0..4];
assert_eq!(x_shape[0], y_shape[0]);
assert_eq!(x_shape[1], y_shape[1]);
let (stride_y, stride_x) = conv_info.strides;
let (stride_y, stride_x) = (stride_y as isize, stride_x as isize);
let (pool_y, pool_x) = conv_info.kernel;
let (pool_y, pool_x) = (pool_y as isize, pool_x as isize);
let batch_size = x_shape[0] as isize;
let channels = x_shape[1] as isize;
let x_rows = x_shape[2] as isize;
let x_cols = x_shape[3] as isize;
let y_rows = (x_rows - pool_y) / stride_y + 1;
let y_cols = (x_cols - pool_x) / stride_x + 1;
assert_eq!(y_rows, y_shape[2] as isize);
assert_eq!(y_cols, y_shape[3] as isize);
let x_img_size = x_rows * x_cols;
let x_batch_size = x_img_size * channels;
let y_img_size = y_rows * y_cols;
let y_batch_size = y_img_size * channels;
let x_vals = &x.read()[0..(batch_size * channels * x_img_size) as usize];
let y_vals = &mut y.write()[0..(batch_size * channels * y_img_size) as usize];
for bi in 0..batch_size {
for ch in 0..channels {
let x_offset = (bi * x_batch_size + ch * x_img_size) as usize;
let x_img = &x_vals[x_offset..x_offset + x_img_size as usize];
let y_offset = (bi * y_batch_size + ch * y_img_size) as usize;
let y_img = &mut y_vals[y_offset..y_offset + y_img_size as usize];
maxpool2d(y_img, x_img, y_rows, y_cols, x_rows, x_cols,
pool_y, pool_x, stride_y, stride_x);
}
}
}
fn max_pool2d_backprop(&self, dx: &mut Self::Tensor, dy: &Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) {
let x_shape = &x.shape().as_slice()[0..4];
let dy_shape = &dy.shape().as_slice()[0..4];
let dx_shape = &dx.shape().as_slice()[0..4];
assert_eq!(x_shape, dx_shape);
assert_eq!(x_shape[0], dy_shape[0]);
assert_eq!(x_shape[1], dy_shape[1]);
let batch_size = x_shape[0] as isize;
let channels = x_shape[1] as isize;
let x_rows = x_shape[2] as isize;
let x_cols = x_shape[3] as isize;
let (stride_y, stride_x) = conv_info.strides;
let (stride_y, stride_x) = (stride_y as isize, stride_x as isize);
let (pool_y, pool_x) = conv_info.kernel;
let (pool_y, pool_x) = (pool_y as isize, pool_x as isize);
let x_img_size = x_rows * x_cols;
let x_batch_size = x_img_size * channels;
let y_rows = (x_rows - pool_y) / stride_y + 1;
let y_cols = (x_cols - pool_x) / stride_x + 1;
let y_img_size = y_rows * y_cols;
let y_batch_size = y_img_size * channels;
let x_size = (batch_size * channels * x_img_size) as usize;
let y_size = (batch_size * channels * y_img_size) as usize;
let x_vals = &x.read()[0..x_size];
let dy_vals = &dy.read()[0..y_size];
let dx_vals = &mut dx.write()[0..x_size];
for bi in 0..batch_size {
for ch in 0..channels {
let x_offset = (bi * x_batch_size + ch * x_img_size) as usize;
let x_img = &x_vals[x_offset..x_offset + x_img_size as usize];
let dx_img = &mut dx_vals[x_offset..x_offset + x_img_size as usize];
let dy_offset = (bi * y_batch_size + ch * y_img_size) as usize;
let dy_img = &dy_vals[dy_offset..dy_offset + y_img_size as usize];
maxpool2d_backward(dx_img, x_img, dy_img,
x_rows, x_cols, y_rows, y_cols,
pool_y, pool_x, stride_y, stride_x);
}
}
}
}
impl BackendAvgPool2d<f32> for Native {
fn avg_pool2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) {
let x_shape = &x.shape().as_slice()[0..4];
let y_shape = &y.shape().as_slice()[0..4];
assert_eq!(x_shape[0], y_shape[0]);
assert_eq!(x_shape[1], y_shape[1]);
let (stride_y, stride_x) = conv_info.strides;
let (stride_y, stride_x) = (stride_y as isize, stride_x as isize);
let (pool_y, pool_x) = conv_info.kernel;
let (pool_y, pool_x) = (pool_y as isize, pool_x as isize);
let batch_size = x_shape[0] as isize;
let channels = x_shape[1] as isize;
let x_rows = x_shape[2] as isize;
let x_cols = x_shape[3] as isize;
let y_rows = (x_rows - pool_y) / stride_y + 1;
let y_cols = (x_cols - pool_x) / stride_x + 1;
assert_eq!(y_rows, y_shape[2] as isize);
assert_eq!(y_cols, y_shape[3] as isize);
let x_img_size = x_rows * x_cols;
let x_batch_size = x_img_size * channels;
let y_img_size = y_rows * y_cols;
let y_batch_size = y_img_size * channels;
let x_vals = &x.read()[0..(batch_size * channels * x_img_size) as usize];
let y_vals = &mut y.write()[0..(batch_size * channels * y_img_size) as usize];
for bi in 0..batch_size {
for ch in 0..channels {
let x_offset = (bi * x_batch_size + ch * x_img_size) as usize;
let x_img = &x_vals[x_offset..x_offset + x_img_size as usize];
let y_offset = (bi * y_batch_size + ch * y_img_size) as usize;
let y_img = &mut y_vals[y_offset..y_offset + y_img_size as usize];
avgpool2d(y_img, x_img, y_rows, y_cols, x_rows, x_cols,
pool_y, pool_x, stride_y, stride_x);
}
}
}
fn avg_pool2d_backprop(&self, _dx: &mut Self::Tensor, _dy: &Self::Tensor, _x: &Self::Tensor, _conv_info: &Conv2dInfo) {
unimplemented!()
}
}
impl BackendPaddingCopy2d<f32> for Native {
fn copy_with_padding2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, y_paddings: (u32, u32), x_paddings: (u32, u32)) {
let y_shape = &y.shape().as_slice()[0..4];
let x_shape = &x.shape().as_slice()[0..4];
let y_batch_size = y_shape[0] as usize;
let y_filters = y_shape[1] as usize;
let y_rows = y_shape[2] as usize;
let y_cols = y_shape[3] as usize;
let x_batch_size = x_shape[0] as usize;
let x_filters = x_shape[1] as usize;
let x_rows = x_shape[2] as usize;
let x_cols = x_shape[3] as usize;
assert_eq!(y_batch_size, x_batch_size);
assert_eq!(y_filters, x_filters);
let y_filter_stride = y_rows * y_cols;
let y_batch_stride = y_filters * y_filter_stride;
let x_filter_stride = x_rows * x_cols;
let x_batch_stride = x_filters * x_filter_stride;
let y_size = y_batch_size * y_filters * y_rows * y_cols;
let x_size = x_batch_size * x_filters * x_rows * x_cols;
let y_s = &mut y.write()[0 .. y_size];
let x_s = &x.read()[0 .. x_size];
for batch in 0 .. y_batch_size {
for filter in 0..y_filters {
for y_row in 0..y_rows {
for y_col in 0..y_cols {
if y_row < y_paddings.0 as usize ||
y_col < y_paddings.1 as usize {
continue;
}
if y_row - y_paddings.0 as usize >= x_rows ||
y_col - y_paddings.1 as usize >= x_cols {
continue;
}
let x_row = y_row - y_paddings.0 as usize + x_paddings.0 as usize;
let x_col = y_col - y_paddings.1 as usize + x_paddings.1 as usize;
println!("{} {}, {} {}", y_row, y_col, x_row, x_col);
let y_idx = batch * y_batch_stride + filter * y_filter_stride + y_row * y_cols + y_col;
let x_idx = batch * x_batch_stride + filter * x_filter_stride + x_row * x_cols + x_col;
y_s[y_idx] = x_s[x_idx];
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use crate::backend::*;
use super::{Native, NativeTensorF32};
use crate::tensor::Tensor;
#[test]
fn test_copy_with_padding2d() {
let bac = Native;
let mut a1 = NativeTensorF32::new((1, 1, 3, 3));
let mut b1 = NativeTensorF32::new((1, 1, 5, 5));
let mut a2 = NativeTensorF32::new((1, 1, 5, 5));
let mut b2 = NativeTensorF32::new((1, 1, 3, 3));
bac.load_tensor_u8(&mut a1, &[
1, 2, 3,
4, 5, 6,
7, 8, 9,
]);
bac.load_tensor_u8(&mut a2, &[
1, 2, 3, 4, 5,
6, 7, 8, 9, 10,
11, 12, 13, 14, 15,
16, 17, 18, 19, 20,
21, 22, 23, 24, 25,
]);
bac.copy_with_padding2d(&mut b1, &a1, (1, 1), (0, 0));
bac.copy_with_padding2d(&mut b2, &a2, (0, 0), (1, 1));
assert!(
b1.read() == &[
0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 1.0, 2.0, 3.0, 0.0,
0.0, 4.0, 5.0, 6.0, 0.0,
0.0, 7.0, 8.0, 9.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0,
]
);
assert!(
b2.read() == &[
7.0, 8.0, 9.0,
12.0, 13.0, 14.0,
17.0, 18.0, 19.0,
]
);
}
#[test]
fn test_softmax() {
let bac = Native;
let mut a = NativeTensorF32::new((3, 3));
let mut b = NativeTensorF32::new((3, 3));
bac.load_tensor_u8(&mut a, &[
1,2,3,
4,5,6,
7,8,9,
]);
bac.softmax(&mut b, &a);
assert!(
b.read() == &[
0.09003057, 0.24472847, 0.66524096,
0.09003057, 0.24472847, 0.66524096,
0.09003057, 0.24472847, 0.66524096,
]
);
}
#[test]
fn test_matmul() {
let bac = Native;
let mut a = NativeTensorF32::new((2, 3));
let mut b = NativeTensorF32::new((3, 4));
let mut c = NativeTensorF32::new((2, 4));
bac.load_tensor_u8(&mut a, &[
1,2,3,
4,5,6
]);
bac.load_tensor_u8(&mut b, &[
1,2,3,4,
5,6,7,8,
9,10,11,12
]);
bac.matmul(&mut c, &a, &b);
assert!(
c.read() == &[
38.0, 44.0, 50.0, 56.0,
83.0, 98.0, 113.0, 128.0,
]
);
}
#[test]
fn test_matmul_nt() {
let bac = Native;
let mut a = NativeTensorF32::new((2, 3));
let mut b = NativeTensorF32::new((4, 3));
let mut c = NativeTensorF32::new((2, 4));
bac.load_tensor_u8(&mut a, &[
1,2,3,
4,5,6
]);
bac.load_tensor_u8(&mut b, &[
1,5,9,
2,6,10,
3,7,11,
4,8,12
]);
bac.matmul_nt(&mut c, &a, &b);
assert!(
c.read() == &[
38.0, 44.0, 50.0, 56.0,
83.0, 98.0, 113.0, 128.0,
]
);
}
#[test]
fn test_matmul_tn() {
let bac = Native;
let mut a = NativeTensorF32::new((8, 5));
let mut b = NativeTensorF32::new((8, 3));
let mut c = NativeTensorF32::new((5, 3));
bac.load_tensor_u8(&mut a, &[
0, 1, 2, 3, 4,
5, 6, 7, 8, 9,
10, 11, 12, 13, 14,
15, 16, 17, 18, 19,
20, 21, 22, 23, 24,
25, 26, 27, 28, 29,
30, 31, 32, 33, 34,
35, 36, 37, 38, 39
]);
bac.load_tensor_u8(&mut b, &[
0, 1, 2,
3, 4, 5,
6, 7, 8,
9, 10, 11,
12, 13, 14,
15, 16, 17,
18, 19, 20,
21, 22, 23
]);
bac.matmul_tn(&mut c, &a, &b);
assert!(
c.read() == &[
2100.0, 2240.0, 2380.0,
2184.0, 2332.0, 2480.0,
2268.0, 2424.0, 2580.0,
2352.0, 2516.0, 2680.0,
2436.0, 2608.0, 2780.0
]
);
}
#[test]
fn test_axpy() {
let bac = Native;
let mut a = NativeTensorF32::new((2, 2));
let mut b = NativeTensorF32::new((2, 2));
bac.load_tensor_u8(&mut a, &[1, 2, 3, 4]);
bac.load_tensor_u8(&mut b, &[1, 2, 3, 4]);
bac.axpy(&mut a, 2.0f32, &b);
assert!(
a.read() == &[3.0, 6.0, 9.0, 12.0]
);
}
#[test]
fn test_add() {
let bac = Native;
let mut a = NativeTensorF32::new((2, 2));
let mut b = NativeTensorF32::new((2, 2));
bac.load_tensor_u8(&mut a, &[1, 2, 3, 4]);
bac.load_tensor_u8(&mut b, &[1, 2, 3, 4]);
bac.add(&mut a, &b);
assert!(
a.read() == &[2.0, 4.0, 6.0, 8.0]
);
}
}