use std::any::{Any, TypeId};
use std::collections::HashMap;
use ::ndarray::{Array, ArrayBase, Dimension, Ix2, IxDyn, OwnedRepr};
use crate::array_protocol::{
ArrayFunction, ArrayProtocol, GPUNdarray, NotImplemented, NdarrayWrapper
};
#[allow(dead_code)]
pub fn register_cuda_operations() {
}
#[allow(dead_code)]
pub fn cuda_matmul<D1, D2>(
a: &GPUNdarray<f64, D1>,
b: &GPUNdarray<f64, D2>,
) -> Result<GPUNdarray<f64, Ix2>, NotImplemented>
where
D1: Dimension,
D2: Dimension,
{
if a.device_id() != b.device_id() {
return Err(NotImplemented);
}
let ashape = a.shape();
let bshape = b.shape();
if ashape.len() != 2 || bshape.len() != 2 || ashape[1] != bshape[0] {
return Err(NotImplemented);
}
let a_cpu = a.to_cpu().expect("Operation failed");
let b_cpu = b.to_cpu().expect("Operation failed");
let a_array = a_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
let b_array = b_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
let result = a_array.dot(b_array);
let result_gpu = GPUNdarray::new(result, a.config().clone());
Ok(result_gpu)
}
#[allow(dead_code)]
pub fn cuda_add<D1, D2>(
a: &GPUNdarray<f64, D1>,
b: &GPUNdarray<f64, D2>,
) -> Result<GPUNdarray<f64, IxDyn>, NotImplemented>
where
D1: Dimension,
D2: Dimension,
{
if a.device_id() != b.device_id() {
return Err(NotImplemented);
}
let ashape = a.shape();
let bshape = b.shape();
let a_cpu = a.to_cpu().expect("Operation failed");
let b_cpu = b.to_cpu().expect("Operation failed");
let a_array = a_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
let b_array = b_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
let result = a_array + b_array;
let result_gpu = GPUNdarray::new(result, a.config().clone());
Ok(result_gpu)
}
#[allow(dead_code)]
pub fn cuda_multiply<D1, D2>(
a: &GPUNdarray<f64, D1>,
b: &GPUNdarray<f64, D2>,
) -> Result<GPUNdarray<f64, IxDyn>, NotImplemented>
where
D1: Dimension,
D2: Dimension,
{
if a.device_id() != b.device_id() {
return Err(NotImplemented);
}
let a_cpu = a.to_cpu().expect("Operation failed");
let b_cpu = b.to_cpu().expect("Operation failed");
let a_array = a_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
let b_array = b_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
let result = a_array * b_array;
let result_gpu = GPUNdarray::new(result, a.config().clone());
Ok(result_gpu)
}
#[allow(dead_code)]
pub fn cuda_transpose<D>(
a: &GPUNdarray<f64, D>,
) -> Result<GPUNdarray<f64, Ix2>, NotImplemented>
where
D: Dimension,
{
let shape = a.shape();
if shape.len() != 2 {
return Err(NotImplemented);
}
let a_cpu = a.to_cpu().expect("Operation failed");
let a_array = a_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
let result = a_array.t().to_owned();
let result_gpu = GPUNdarray::new(result, a.config().clone());
Ok(result_gpu)
}
#[allow(dead_code)]
pub fn cuda_sum<D>(
a: &GPUNdarray<f64, D>,
axis: Option<usize>,
) -> Result<Box<dyn Any>, NotImplemented>
where
D: Dimension,
{
let a_cpu = a.to_cpu().expect("Operation failed");
let a_array = a_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
match axis {
Some(ax) => {
let result = a_array.sum_axis(crate::ndarray::Axis(ax));
let result_gpu = GPUNdarray::new(result, a.config().clone());
Ok(Box::new(result_gpu))
},
None => {
let result = a_array.sum();
Ok(Box::new(result))
}
}
}
#[allow(dead_code)]
pub fn cuda_reshape<D>(
a: &GPUNdarray<f64, D>,
shape: &[usize],
) -> Result<GPUNdarray<f64, IxDyn>, NotImplemented>
where
D: Dimension,
{
let a_cpu = a.to_cpu().expect("Operation failed");
let a_array = a_cpu.downcast_ref::<NdarrayWrapper<f64_>>().expect("Operation failed").as_array();
match a_array.clone().intoshape(shape) {
Ok(result) => {
let result_gpu = GPUNdarray::new(result, a.config().clone());
Ok(result_gpu)
},
Err(_) => Err(NotImplemented),
}
}
#[allow(dead_code)]
pub fn cuda_conv2d<D1, D2>(
input: &GPUNdarray<f64, D1>,
kernel: &GPUNdarray<f64, D2>,
stride: (usize, usize),
padding: (usize, usize),
) -> Result<GPUNdarray<f64, Ix2>, NotImplemented>
where
D1: Dimension,
D2: Dimension,
{
if input.device_id() != kernel.device_id() {
return Err(NotImplemented);
}
let inputshape = input.shape();
if inputshape.len() != 2 {
return Err(NotImplemented);
}
let h_out = (inputshape[0] - kernel.shape()[0] + 2 * padding.0) / stride.0 + 1;
let w_out = (inputshape[1] - kernel.shape()[1] + 2 * padding.1) / stride.1 + 1;
let result = Array::<f64>::zeros((h_out, w_out));
let result_gpu = GPUNdarray::new(result, input.config().clone());
Ok(result_gpu)
}
#[allow(dead_code)]
pub fn cuda_svd<D>(
a: &GPUNdarray<f64, D>,
) -> Result<(GPUNdarray<f64, Ix2>, GPUNdarray<f64, IxDyn>, GPUNdarray<f64, Ix2>), NotImplemented>
where
D: Dimension,
{
let shape = a.shape();
if shape.len() != 2 {
return Err(NotImplemented);
}
let (m, n) = (shape[0], shape[1]);
let u = Array::<f64>::eye(m);
let s = Array::<f64>::ones(m.min(n));
let vt = Array::<f64>::eye(n);
let u_gpu = GPUNdarray::new(u, a.config().clone());
let s_gpu = GPUNdarray::new(s, a.config().clone());
let vt_gpu = GPUNdarray::new(vt, a.config().clone());
Ok((u_gpu, s_gpu, vt_gpu))
}
#[allow(dead_code)]
pub fn cuda_inverse<D>(
a: &GPUNdarray<f64, D>,
) -> Result<GPUNdarray<f64, Ix2>, NotImplemented>
where
D: Dimension,
{
let shape = a.shape();
if shape.len() != 2 || shape[0] != shape[1] {
return Err(NotImplemented);
}
let n = shape[0];
let result = Array::<f64>::eye(n);
let result_gpu = GPUNdarray::new(result, a.config().clone());
Ok(result_gpu)
}