use std::sync::{Arc, RwLock};
use co::prelude::*;
use coblas::plugin::*;
use conn;
use num::traits::{NumCast, cast};
pub type ArcLock<T> = Arc<RwLock<T>>;
pub fn native_backend() -> Backend<Native> {
let framework = Native::new();
let hardwares = &framework.hardwares().to_vec();
let backend_config = BackendConfig::new(framework, hardwares);
Backend::new(backend_config).unwrap()
}
pub fn write_to_memory<T: NumCast + ::std::marker::Copy>(mem: &mut MemoryType, data: &[T]) {
write_to_memory_offset(mem, data, 0);
}
pub fn write_to_memory_offset<T: NumCast + ::std::marker::Copy>(mem: &mut MemoryType, data: &[T], offset: usize) {
match mem {
&mut MemoryType::Native(ref mut mem) => {
let mut mem_buffer = mem.as_mut_slice::<f32>();
for (index, datum) in data.iter().enumerate() {
mem_buffer[index + offset] = cast(*datum).unwrap();
}
},
#[cfg(any(feature = "opencl", feature = "cuda"))]
_ => {}
}
}
pub fn write_batch_sample<T: NumCast + ::std::marker::Copy>(tensor: &mut SharedTensor<f32>, data: &[T], i: usize) {
let native_backend = native_backend();
let batch_size = tensor.desc().size();
let sample_size = batch_size / tensor.desc()[0];
let _ = tensor.add_device(native_backend.device());
tensor.sync(native_backend.device()).unwrap();
write_to_memory_offset(tensor.get_mut(native_backend.device()).unwrap(), &data, i * sample_size);
}
pub fn native_scalar<T: NumCast + ::std::marker::Copy>(scalar: T) -> SharedTensor<T> {
let native = native_backend();
let mut shared_scalar = SharedTensor::<T>::new(native.device(), &vec![1]).unwrap();
write_to_memory(shared_scalar.get_mut(native.device()).unwrap(), &[scalar]);
shared_scalar
}
pub fn cast_vec_usize_to_i32(input: Vec<usize>) -> Vec<i32> {
let mut out = Vec::new();
for i in input.iter() {
out.push(*i as i32);
}
out
}
pub trait Axpby<F> : Axpy<F> + Scal<F> {
fn axpby(&self, a: &mut SharedTensor<F>, x: &mut SharedTensor<F>, b: &mut SharedTensor<F>, y: &mut SharedTensor<F>) -> Result<(), ::co::error::Error> {
try!(self.scal(b, y));
try!(self.axpy(a, x, y));
Ok(())
}
fn axpby_plain(&self, a: &SharedTensor<F>, x: &SharedTensor<F>, b: &SharedTensor<F>, y: &mut SharedTensor<F>) -> Result<(), ::co::error::Error> {
try!(self.scal_plain(b, y));
try!(self.axpy_plain(a, x, y));
Ok(())
}
}
impl<T: Axpy<f32> + Scal<f32>> Axpby<f32> for T {}
pub trait SolverOps<F> : LayerOps<F> + Axpby<F> + Dot<F> + Copy<F> {}
impl<T: LayerOps<f32> + Axpby<f32> + Dot<f32> + Copy<f32>> SolverOps<f32> for T {}
#[cfg(all(feature="cuda", not(feature="native")))]
pub trait LayerOps<F> : conn::Convolution<F>
+ conn::Pooling<F>
+ conn::Relu<F> + conn::ReluPointwise<F>
+ conn::Sigmoid<F> + conn::SigmoidPointwise<F>
+ conn::Tanh<F> + conn::TanhPointwise<F>
+ conn::Softmax<F> + conn::LogSoftmax<F>
+ Gemm<F> {}
#[cfg(feature="native")]
pub trait LayerOps<F> : conn::Relu<F>
+ conn::Sigmoid<F>
+ conn::Tanh<F>
+ conn::Softmax<F> + conn::LogSoftmax<F>
+ Gemm<F> {}
#[cfg(all(feature="cuda", not(feature="native")))]
impl<T: conn::Convolution<f32>
+ conn::Pooling<f32>
+ conn::Relu<f32> + conn::ReluPointwise<f32>
+ conn::Sigmoid<f32> + conn::SigmoidPointwise<f32>
+ conn::Tanh<f32> + conn::TanhPointwise<f32>
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
+ Gemm<f32>> LayerOps<f32> for T {}
#[cfg(feature="native")]
impl<T: conn::Relu<f32>
+ conn::Sigmoid<f32>
+ conn::Tanh<f32>
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
+ Gemm<f32>> LayerOps<f32> for T {}