tensorgraph_math/lib.rs
1//! # tensorgraph-math
2//! Mathematics primitives used by tensorgraph.
3//! Builds upon [tensorgraph-sys](https://docs.rs/tensorgraph-sys/latest/tensorgraph_sys/)
4//! to support many BLAS backends and devices.
5//!
6//! ## Basic example using openblas:
7//!
8//! Enable features in the Cargo.toml:
9//! ```toml
10//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas"] }
11//! ```
12//!
13//! ```
14//! use tensorgraph_math::{tensor::Tensor, sys::View};
15//!
16//! // 0 1
17//! // A = 2 3
18//! // 4 5
19//!
20//! // B = 0 1
21//! // 2 3
22//!
23//! // column major (read each column first)
24//! let a = [0., 2., 4., 1., 3., 5.];
25//! let b = [0., 2., 1., 3.];
26//!
27//! let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
28//! let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
29//!
30//! // 2 3
31//! // C = AB = 6 11
32//! // 10 19
33//!
34//! let c = a.matmul(b.view());
35//! assert_eq!(c.into_inner().into_std(), [2., 6., 10., 3., 11., 19.]);
36//! ```
37//!
38//! ## Intermediate example using cublas globals and openblas together:
39//!
40//! Enable features in the Cargo.toml:
41//! ```toml
42//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
43//! ```
44//!
45//! ```
46//! use tensorgraph_math::{
47//! blas::{DefaultBLASContext, cublas::CublasContext, BLAS},
48//! sys::{
49//! device::{DefaultDeviceAllocator, cuda::{Context, Cuda, Stream}, cpu::Cpu},
50//! DefaultVec, View,
51//! },
52//! tensor::Tensor,
53//! };
54//!
55//! fn main() {
56//! // init cuda context
57//! let cuda_ctx = Context::quick_init().unwrap();
58//!
59//! // create cuda stream and configure it as the global
60//! let stream = Stream::new(&cuda_ctx).unwrap();
61//! let _handle = stream.as_global();
62//!
63//! // create cublas context, with the provided stream, and configure it as the global
64//! let cublas_ctx = CublasContext::new();
65//! let _handle = cublas_ctx.with_stream(Some(&stream)).as_global();
66//!
67//! // cublas is the default BLAS implementation for CUDA when the feature is enabled
68//! run::<Cuda>();
69//!
70//! // openblas is the default BLAS implemenetation for CPU when the feature is enabled
71//! run::<Cpu>();
72//! }
73//!
74//! /// Generic code that runs on the specified device
75//! /// using that devices default allocator and BLAS provider
76//! fn run<D: DefaultDeviceAllocator + DefaultBLASContext>()
77//! where
78//! f32: BLAS<D::Context>,
79//! {
80//! // 0 1
81//! // A = 2 3
82//! // 4 5
83//!
84//! // B = 0 1
85//! // 2 3
86//!
87//! // column major (read each column first)
88//! let a = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 4., 1., 3., 5.]);
89//! let b = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 1., 3.]);
90//!
91//! let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
92//! let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
93//!
94//! // 2 3
95//! // C = AB = 6 11
96//! // 10 19
97//!
98//! let c = a.matmul(b.view());
99//!
100//! let mut out = [0.; 6];
101//! c.into_inner().copy_to_host(&mut out);
102//! assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
103//! }
104//! ```
105//!
106//! ## Advanced example using openblas and cublas by passing blas contexts and allocators:
107//!
108//! Enable features in the Cargo.toml:
109//! ```toml
110//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
111//! ```
112//!
113//! ```
114//! #![feature(allocator_api)]
115//! use std::{alloc::Global, ops::Deref};
116//! use tensorgraph_math::{
117//! blas::{BLASContext, cublas::{CublasContext}, BLAS},
118//! sys::{
119//! device::{cuda::{Context, Cuda, Stream}, cpu::Cpu, Device, DeviceAllocator},
120//! Vec, View,
121//! },
122//! tensor::Tensor,
123//! };
124//!
125//! fn main() {
126//! // init cuda context
127//! let cuda_ctx = Context::quick_init().unwrap();
128//!
129//! // create cuda stream
130//! let stream = Stream::new(&cuda_ctx).unwrap();
131//!
132//! // create cublas context, with the provided stream
133//! let cublas_ctx = CublasContext::new();
134//! let cublas_ctx = cublas_ctx.with_stream(Some(&stream));
135//!
136//! // run using the CUDA stream as the allocator, and cublas
137//! // as the BLAS provider
138//! run(cublas_ctx, stream.deref());
139//!
140//! // run using the CPU default BLAS and Global allocator
141//! run((), Global);
142//! }
143//!
144//! fn run<C: BLASContext, A: DeviceAllocator<Device = C::Device> + Copy>(ctx: C, alloc: A)
145//! where
146//! f32: BLAS<C>,
147//! {
148//! // 0 1
149//! // A = 2 3
150//! // 4 5
151//!
152//! // B = 0 1
153//! // 2 3
154//!
155//! // column major (read each column first)
156//! let a = Vec::copy_from_host_in(&[0., 2., 4., 1., 3., 5.], alloc);
157//! let b = Vec::copy_from_host_in(&[0., 2., 1., 3.0_f32], alloc);
158//!
159//! let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
160//! let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
161//!
162//! // 2 3
163//! // C = AB = 6 11
164//! // 10 19
165//!
166//! let c = a.matmul_into(b.view(), ctx, alloc);
167//!
168//! let mut out = [0.; 6];
169//! c.into_inner().copy_to_host(&mut out);
170//! assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
171//! }
172//! ```
173
174#![warn(clippy::pedantic, clippy::nursery)]
175#![allow(
176 clippy::module_name_repetitions,
177 clippy::float_cmp,
178 clippy::many_single_char_names,
179 clippy::similar_names,
180 clippy::unreadable_literal
181)]
182#![allow(incomplete_features)]
183#![feature(
184 generic_associated_types,
185 allocator_api,
186 alloc_layout_extra,
187 nonnull_slice_from_raw_parts,
188 slice_ptr_len,
189 ptr_metadata,
190 maybe_uninit_slice,
191 generic_const_exprs,
192 thread_local,
193 once_cell,
194 layout_for_ptr
195)]
196
197pub use tensorgraph_sys as sys;
198
199/// Traits and implementations of BLAS providers
200pub mod blas;
201
202/// Traits and implementations for basic dimension types
203pub mod dims;
204
205/// Traits and implementations for basic storage buffers
206pub mod storage;
207
208/// Implementations for tensor operations and structures
209pub mod tensor;