1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
//! # tensorgraph-math
//! Mathematics primitives used by tensorgraph.
//! Builds upon [tensorgraph-sys](https://docs.rs/tensorgraph-sys/latest/tensorgraph_sys/)
//! to support many BLAS backends and devices.
//!
//! ## Basic example using openblas:
//!
//! Enable features in the Cargo.toml:
//! ```toml
//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas"] }
//! ```
//!
//! ```
//! use tensorgraph_math::{tensor::Tensor, sys::View};
//!
//! // 0 1
//! // A = 2 3
//! // 4 5
//!
//! // B = 0 1
//! // 2 3
//!
//! // column major (read each column first)
//! let a = [0., 2., 4., 1., 3., 5.];
//! let b = [0., 2., 1., 3.];
//!
//! let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
//! let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
//!
//! // 2 3
//! // C = AB = 6 11
//! // 10 19
//!
//! let c = a.dot(b.view());
//! assert_eq!(c.into_inner().into_std(), [2., 6., 10., 3., 11., 19.]);
//! ```
//!
//! ## Intermediate example using cublas globals and openblas together:
//!
//! Enable features in the Cargo.toml:
//! ```toml
//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
//! ```
//!
//! ```
//! use tensorgraph_math::{
//! blas::{DefaultBLASContext, cublas::CublasContext, GEMM},
//! sys::{
//! device::{DefaultDeviceAllocator, cuda::{Context, Cuda, Stream}, cpu::Cpu},
//! DefaultVec, View,
//! },
//! tensor::Tensor,
//! };
//!
//! fn main() {
//! // init cuda context
//! let cuda_ctx = Context::quick_init().unwrap();
//!
//! // create cuda stream and configure it as the global
//! let stream = Stream::new(&cuda_ctx).unwrap();
//! let _handle = stream.as_global();
//!
//! // create cublas context, with the provided stream, and configure it as the global
//! let cublas_ctx = CublasContext::new();
//! let _handle = cublas_ctx.with_stream(Some(&stream)).as_global();
//!
//! // cublas is the default BLAS implementation for CUDA when the feature is enabled
//! run::<Cuda>();
//!
//! // openblas is the default BLAS implemenetation for CPU when the feature is enabled
//! run::<Cpu>();
//! }
//!
//! /// Generic code that runs on the specified device
//! /// using that devices default allocator and BLAS provider
//! fn run<D: DefaultDeviceAllocator + DefaultBLASContext>()
//! where
//! f32: GEMM<D::Context, D>,
//! {
//! // 0 1
//! // A = 2 3
//! // 4 5
//!
//! // B = 0 1
//! // 2 3
//!
//! // column major (read each column first)
//! let a = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 4., 1., 3., 5.]);
//! let b = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 1., 3.]);
//!
//! let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
//! let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
//!
//! // 2 3
//! // C = AB = 6 11
//! // 10 19
//!
//! let c = a.dot(b.view());
//!
//! let mut out = [0.; 6];
//! c.into_inner().copy_to_host(&mut out);
//! assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
//! }
//! ```
//!
//! ## Advanced example using openblas and cublas by passing blas contexts and allocators:
//!
//! Enable features in the Cargo.toml:
//! ```toml
//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
//! ```
//!
//! ```
//! #![feature(allocator_api)]
//! use std::{alloc::Global, ops::Deref};
//! use tensorgraph_math::{
//! blas::{BLASContext, cublas::{CublasContext}, GEMM},
//! sys::{
//! device::{cuda::{Context, Cuda, Stream}, cpu::Cpu, Device, DeviceAllocator},
//! Vec, View,
//! },
//! tensor::Tensor,
//! };
//!
//! fn main() {
//! // init cuda context
//! let cuda_ctx = Context::quick_init().unwrap();
//!
//! // create cuda stream
//! let stream = Stream::new(&cuda_ctx).unwrap();
//!
//! // create cublas context, with the provided stream
//! let cublas_ctx = CublasContext::new();
//! let cublas_ctx = cublas_ctx.with_stream(Some(&stream));
//!
//! // run using the CUDA stream as the allocator, and cublas
//! // as the BLAS provider
//! run::<Cuda, _, _>(cublas_ctx, stream.deref());
//!
//! // run using the CPU default BLAS and Global allocator
//! run::<Cpu, _, _>((), Global);
//! }
//!
//! fn run<D: Device, C: BLASContext<D>, A: DeviceAllocator<D> + Copy>(ctx: C, alloc: A)
//! where
//! f32: GEMM<C, D>,
//! {
//! // 0 1
//! // A = 2 3
//! // 4 5
//!
//! // B = 0 1
//! // 2 3
//!
//! // column major (read each column first)
//! let a = Vec::copy_from_host_in(&[0., 2., 4., 1., 3., 5.], alloc);
//! let b = Vec::copy_from_host_in(&[0., 2., 1., 3.0_f32], alloc);
//!
//! let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
//! let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
//!
//! // 2 3
//! // C = AB = 6 11
//! // 10 19
//!
//! let c = a.dot_into(b.view(), ctx, alloc);
//!
//! let mut out = [0.; 6];
//! c.into_inner().copy_to_host(&mut out);
//! assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
//! }
//! ```
#![allow(incomplete_features)]
#![feature(
generic_associated_types,
allocator_api,
alloc_layout_extra,
nonnull_slice_from_raw_parts,
slice_ptr_len,
ptr_metadata,
maybe_uninit_slice,
generic_const_exprs,
thread_local,
once_cell,
layout_for_ptr
)]
/// Re-export of tensorgraph_sys.
pub use tensorgraph_sys as sys;
/// Traits and implementations of BLAS providers
pub mod blas;
/// Traits and implementations for basic dimension types
pub mod dims;
/// Traits and implementations for basic storage buffers
pub mod storage;
/// Implementations for tensor operations and structures
pub mod tensor;
