Skip to main content

numr/
lib.rs

1//! # numr
2//!
3//! **High-performance numerical computing for Rust with multi-backend GPU acceleration.**
4//!
5//! numr provides n-dimensional arrays (tensors), linear algebra, FFT, and automatic
6//! differentiation - with the same API across CPU, CUDA, and WebGPU backends.
7//!
8//! ## Why numr?
9//!
10//! - **Multi-backend**: Same code runs on CPU, CUDA, and WebGPU
11//! - **No vendor lock-in**: Native kernels, not cuBLAS/MKL wrappers
12//! - **Pure Rust**: No Python runtime, no FFI overhead, single binary deployment
13//! - **Autograd included**: Reverse-mode automatic differentiation built-in
14//! - **Sparse tensors**: CSR, CSC, COO formats with GPU support
15//!
16//! ## Features
17//!
18//! - **Tensors**: N-dimensional arrays with broadcasting, slicing, views
19//! - **Linear algebra**: Matmul, LU, QR, SVD, Cholesky, eigendecomposition
20//! - **FFT**: Fast Fourier transforms (1D, 2D, ND)
21//! - **Element-wise ops**: Full set of math functions
22//! - **Reductions**: Sum, mean, max, min, argmax, argmin along axes
23//! - **Multiple dtypes**: f64, f32, f16, bf16, fp8, integers, bool
24//!
25//! ## Quick Start
26//!
27//! ```rust,ignore
28//! use numr::prelude::*;
29//!
30//! let device = CpuDevice;
31//! let a = Tensor::<CpuRuntime>::from_slice(&[1.0, 2.0, 3.0, 4.0], &[2, 2], &device);
32//! let b = Tensor::<CpuRuntime>::from_slice(&[5.0, 6.0, 7.0, 8.0], &[2, 2], &device);
33//!
34//! let c = &a + &b;
35//! let d = a.matmul(&b)?;
36//! ```
37//!
38//! ## Feature Flags
39//!
40//! - `cpu` (default): CPU backend
41//! - `cuda`: NVIDIA CUDA backend
42//! - `wgpu`: Cross-platform GPU via WebGPU
43//! - `rayon` (default): Multi-threaded CPU operations
44//! - `f16`: Half-precision floats (F16, BF16)
45//! - `sparse`: Sparse tensor formats (CSR, CSC, COO)
46
47#![warn(missing_docs)]
48#![warn(clippy::all)]
49#![allow(clippy::module_inception)]
50#![allow(clippy::too_many_arguments)]
51#![allow(clippy::manual_div_ceil)]
52#![allow(clippy::needless_range_loop)]
53#![allow(clippy::excessive_precision)]
54#![allow(clippy::manual_memcpy)]
55#![allow(clippy::items_after_test_module)]
56#![allow(clippy::len_without_is_empty)]
57#![allow(clippy::needless_return)]
58#![allow(clippy::unnecessary_cast)]
59#![allow(clippy::manual_range_contains)]
60#![allow(clippy::identity_op)]
61#![allow(clippy::useless_vec)]
62#![allow(clippy::type_complexity)]
63#![allow(clippy::let_and_return)]
64#![allow(clippy::explicit_auto_deref)]
65#![allow(clippy::unnecessary_unwrap)]
66#![allow(clippy::needless_borrow)]
67#![allow(clippy::erasing_op)]
68#![allow(clippy::unnecessary_lazy_evaluations)]
69#![allow(clippy::len_zero)]
70#![allow(clippy::unnecessary_wraps)]
71#![allow(clippy::if_same_then_else)]
72#![allow(clippy::repeat_once)]
73#![allow(clippy::unused_unit)]
74#![allow(clippy::extra_unused_type_parameters)]
75#![allow(clippy::needless_question_mark)]
76#![allow(clippy::manual_repeat_n)]
77
78pub mod algorithm;
79pub mod autograd;
80pub mod dtype;
81pub mod error;
82pub mod ops;
83pub mod runtime;
84#[cfg(feature = "sparse")]
85pub mod sparse;
86pub mod tensor;
87
88/// Prelude module for convenient imports
89///
90/// Import everything needed for tensor operations with `use numr::prelude::*`:
91/// - Core types: `Tensor`, `DType`, `Layout`, `Error`, `Result`
92/// - Runtime traits: `Runtime`, `Device`, `RuntimeClient`
93/// - Operation traits: `TensorOps`, `ScalarOps`, `CompareOps`
94/// - Algorithm traits: `LinearAlgebraAlgorithms`, `FftAlgorithms`, `SpecialFunctions`
95/// - Backend runtimes: `CpuRuntime`, `CudaRuntime`, `WgpuRuntime` (feature-gated)
96pub mod prelude {
97    // Core types
98    pub use crate::dtype::{DType, DataType};
99    pub use crate::error::{Error, Result};
100    pub use crate::tensor::{Layout, Shape, Strides, Tensor};
101
102    // Runtime traits
103    pub use crate::runtime::{Device, Runtime, RuntimeClient};
104
105    // Operation traits (same API across all backends)
106    pub use crate::ops::{
107        ActivationOps, BinaryOps, CompareOps, ComplexOps, ConditionalOps, ConvOps, CumulativeOps,
108        DistanceMetric, DistanceOps, IndexingOps, LinalgOps, LogicalOps, MatmulOps,
109        MeshgridIndexing, NormalizationOps, PaddingMode, ReduceOps, ScalarOps, ShapeOps,
110        SortingOps, StatisticalOps, TensorOps, TypeConversionOps, UnaryOps, UtilityOps,
111    };
112    pub use crate::ops::{AdvancedRandomOps, MultivariateRandomOps, QuasiRandomOps, RandomOps};
113
114    // Algorithm traits
115    pub use crate::algorithm::SpecialFunctions;
116    pub use crate::algorithm::fft::{FftAlgorithms, FftDirection, FftNormalization};
117
118    // Backend runtimes
119    pub use crate::runtime::cpu::{CpuClient, CpuDevice, CpuRuntime, ParallelismConfig};
120
121    #[cfg(feature = "cuda")]
122    pub use crate::runtime::cuda::{CudaClient, CudaDevice, CudaRuntime};
123
124    #[cfg(feature = "wgpu")]
125    pub use crate::runtime::wgpu::{WgpuClient, WgpuDevice, WgpuRuntime};
126
127    // Sparse tensors (feature-gated)
128    #[cfg(feature = "sparse")]
129    pub use crate::sparse::Sparse24Ops;
130    #[cfg(feature = "sparse")]
131    pub use crate::sparse::{Sparse24Tensor, SparseFormat, SparseOps, SparseTensor};
132}
133
134/// Default runtime based on enabled features
135///
136/// - With `cuda` feature: `CudaRuntime`
137/// - With `wgpu` feature (no cuda): `WgpuRuntime`
138/// - Otherwise: `CpuRuntime`
139#[cfg(feature = "cuda")]
140pub type DefaultRuntime = runtime::cuda::CudaRuntime;
141
142/// Default runtime based on enabled features
143#[cfg(all(feature = "wgpu", not(feature = "cuda")))]
144pub type DefaultRuntime = runtime::wgpu::WgpuRuntime;
145
146/// Default runtime based on enabled features
147#[cfg(not(any(feature = "cuda", feature = "wgpu")))]
148pub type DefaultRuntime = runtime::cpu::CpuRuntime;