tensorgraph_math/
lib.rs

1//! # tensorgraph-math
2//! Mathematics primitives used by tensorgraph.
3//! Builds upon [tensorgraph-sys](https://docs.rs/tensorgraph-sys/latest/tensorgraph_sys/)
4//! to support many BLAS backends and devices.
5//!
6//! ## Basic example using openblas:
7//!
8//! Enable features in the Cargo.toml:
9//! ```toml
10//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas"] }
11//! ```
12//!
13//! ```
14//! use tensorgraph_math::{tensor::Tensor, sys::View};
15//!
16//! //     0 1
17//! // A = 2 3
18//! //     4 5
19//!
20//! // B = 0 1
21//! //     2 3
22//!
23//! // column major (read each column first)
24//! let a = [0., 2., 4., 1., 3., 5.];
25//! let b = [0., 2., 1., 3.];
26//!
27//! let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
28//! let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
29//!
30//! //           2  3
31//! // C = AB =  6 11
32//! //          10 19
33//!
34//! let c = a.matmul(b.view());
35//! assert_eq!(c.into_inner().into_std(), [2., 6., 10., 3., 11., 19.]);
36//! ```
37//!
38//! ## Intermediate example using cublas globals and openblas together:
39//!
40//! Enable features in the Cargo.toml:
41//! ```toml
42//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
43//! ```
44//!
45//! ```
46//! use tensorgraph_math::{
47//!     blas::{DefaultBLASContext, cublas::CublasContext, BLAS},
48//!     sys::{
49//!         device::{DefaultDeviceAllocator, cuda::{Context, Cuda, Stream}, cpu::Cpu},
50//!         DefaultVec, View,
51//!     },
52//!     tensor::Tensor,
53//! };
54//!
55//! fn main() {
56//!     // init cuda context
57//!     let cuda_ctx = Context::quick_init().unwrap();
58//!
59//!     // create cuda stream and configure it as the global
60//!     let stream = Stream::new(&cuda_ctx).unwrap();
61//!     let _handle = stream.as_global();
62//!
63//!     // create cublas context, with the provided stream, and configure it as the global
64//!     let cublas_ctx = CublasContext::new();
65//!     let _handle = cublas_ctx.with_stream(Some(&stream)).as_global();
66//!
67//!     // cublas is the default BLAS implementation for CUDA when the feature is enabled
68//!     run::<Cuda>();
69//!
70//!     // openblas is the default BLAS implemenetation for CPU when the feature is enabled
71//!     run::<Cpu>();
72//! }
73//!
74//! /// Generic code that runs on the specified device
75//! /// using that devices default allocator and BLAS provider
76//! fn run<D: DefaultDeviceAllocator + DefaultBLASContext>()
77//! where
78//!     f32: BLAS<D::Context>,
79//! {
80//!     //     0 1
81//!     // A = 2 3
82//!     //     4 5
83//!
84//!     // B = 0 1
85//!     //     2 3
86//!
87//!     // column major (read each column first)
88//!     let a = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 4., 1., 3., 5.]);
89//!     let b = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 1., 3.]);
90//!
91//!     let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
92//!     let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
93//!
94//!     //           2  3
95//!     // C = AB =  6 11
96//!     //          10 19
97//!
98//!     let c = a.matmul(b.view());
99//!
100//!     let mut out = [0.; 6];
101//!     c.into_inner().copy_to_host(&mut out);
102//!     assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
103//! }
104//! ```
105//!
106//! ## Advanced example using openblas and cublas by passing blas contexts and allocators:
107//!
108//! Enable features in the Cargo.toml:
109//! ```toml
110//! tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
111//! ```
112//!
113//! ```
114//! #![feature(allocator_api)]
115//! use std::{alloc::Global, ops::Deref};
116//! use tensorgraph_math::{
117//!     blas::{BLASContext, cublas::{CublasContext}, BLAS},
118//!     sys::{
119//!         device::{cuda::{Context, Cuda, Stream}, cpu::Cpu, Device, DeviceAllocator},
120//!         Vec, View,
121//!     },
122//!     tensor::Tensor,
123//! };
124//!
125//! fn main() {
126//!     // init cuda context
127//!     let cuda_ctx = Context::quick_init().unwrap();
128//!
129//!     // create cuda stream
130//!     let stream = Stream::new(&cuda_ctx).unwrap();
131//!
132//!     // create cublas context, with the provided stream
133//!     let cublas_ctx = CublasContext::new();
134//!     let cublas_ctx = cublas_ctx.with_stream(Some(&stream));
135//!
136//!     // run using the CUDA stream as the allocator, and cublas
137//!     // as the BLAS provider
138//!     run(cublas_ctx, stream.deref());
139//!
140//!     // run using the CPU default BLAS and Global allocator
141//!     run((), Global);
142//! }
143//!
144//! fn run<C: BLASContext, A: DeviceAllocator<Device = C::Device> + Copy>(ctx: C, alloc: A)
145//! where
146//!     f32: BLAS<C>,
147//! {
148//!     //     0 1
149//!     // A = 2 3
150//!     //     4 5
151//!
152//!     // B = 0 1
153//!     //     2 3
154//!
155//!     // column major (read each column first)
156//!     let a = Vec::copy_from_host_in(&[0., 2., 4., 1., 3., 5.], alloc);
157//!     let b = Vec::copy_from_host_in(&[0., 2., 1., 3.0_f32], alloc);
158//!
159//!     let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
160//!     let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
161//!
162//!     //           2  3
163//!     // C = AB =  6 11
164//!     //          10 19
165//!
166//!     let c = a.matmul_into(b.view(), ctx, alloc);
167//!
168//!     let mut out = [0.; 6];
169//!     c.into_inner().copy_to_host(&mut out);
170//!     assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
171//! }
172//! ```
173
174#![warn(clippy::pedantic, clippy::nursery)]
175#![allow(
176    clippy::module_name_repetitions,
177    clippy::float_cmp,
178    clippy::many_single_char_names,
179    clippy::similar_names,
180    clippy::unreadable_literal
181)]
182#![allow(incomplete_features)]
183#![feature(
184    generic_associated_types,
185    allocator_api,
186    alloc_layout_extra,
187    nonnull_slice_from_raw_parts,
188    slice_ptr_len,
189    ptr_metadata,
190    maybe_uninit_slice,
191    generic_const_exprs,
192    thread_local,
193    once_cell,
194    layout_for_ptr
195)]
196
197pub use tensorgraph_sys as sys;
198
199/// Traits and implementations of BLAS providers
200pub mod blas;
201
202/// Traits and implementations for basic dimension types
203pub mod dims;
204
205/// Traits and implementations for basic storage buffers
206pub mod storage;
207
208/// Implementations for tensor operations and structures
209pub mod tensor;