tensorgraph-math
Mathematics primitives used by tensorgraph.
Builds upon tensorgraph-sys
to support many BLAS backends and devices.
Basic example using openblas:
Enable features in the Cargo.toml:
tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas"] }
use tensorgraph_math::{tensor::Tensor, sys::View};
let a = [0., 2., 4., 1., 3., 5.];
let b = [0., 2., 1., 3.];
let a = Tensor::from_shape([3, 2], a); let b = Tensor::from_shape([2, 2], b);
let c = a.matmul(b.view());
assert_eq!(c.into_inner().into_std(), [2., 6., 10., 3., 11., 19.]);
Intermediate example using cublas globals and openblas together:
Enable features in the Cargo.toml:
tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
use tensorgraph_math::{
blas::{DefaultBLASContext, cublas::CublasContext, BLAS},
sys::{
device::{DefaultDeviceAllocator, cuda::{Context, Cuda, Stream}, cpu::Cpu},
DefaultVec, View,
},
tensor::Tensor,
};
fn main() {
let cuda_ctx = Context::quick_init().unwrap();
let stream = Stream::new(&cuda_ctx).unwrap();
let _handle = stream.as_global();
let cublas_ctx = CublasContext::new();
let _handle = cublas_ctx.with_stream(Some(&stream)).as_global();
run::<Cuda>();
run::<Cpu>();
}
fn run<D: DefaultDeviceAllocator + DefaultBLASContext>()
where
f32: BLAS<D::Context>,
{
let a = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 4., 1., 3., 5.]);
let b = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 1., 3.]);
let a = Tensor::from_shape([3, 2], a); let b = Tensor::from_shape([2, 2], b);
let c = a.matmul(b.view());
let mut out = [0.; 6];
c.into_inner().copy_to_host(&mut out);
assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
}
Advanced example using openblas and cublas by passing blas contexts and allocators:
Enable features in the Cargo.toml:
tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
#![feature(allocator_api)]
use std::{alloc::Global, ops::Deref};
use tensorgraph_math::{
blas::{BLASContext, cublas::{CublasContext}, BLAS},
sys::{
device::{cuda::{Context, Cuda, Stream}, cpu::Cpu, Device, DeviceAllocator},
Vec, View,
},
tensor::Tensor,
};
fn main() {
let cuda_ctx = Context::quick_init().unwrap();
let stream = Stream::new(&cuda_ctx).unwrap();
let cublas_ctx = CublasContext::new();
let cublas_ctx = cublas_ctx.with_stream(Some(&stream));
run(cublas_ctx, stream.deref());
run((), Global);
}
fn run<C: BLASContext, A: DeviceAllocator<Device = C::Device> + Copy>(ctx: C, alloc: A)
where
f32: BLAS<C>,
{
let a = Vec::copy_from_host_in(&[0., 2., 4., 1., 3., 5.], alloc);
let b = Vec::copy_from_host_in(&[0., 2., 1., 3.0_f32], alloc);
let a = Tensor::from_shape([3, 2], a); let b = Tensor::from_shape([2, 2], b);
let c = a.matmul_into(b.view(), ctx, alloc);
let mut out = [0.; 6];
c.into_inner().copy_to_host(&mut out);
assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
}