Skip to main content

oxicuda_blas/
lib.rs

1//! # OxiCUDA BLAS — GPU-Accelerated BLAS Operations
2//!
3//! This crate provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS),
4//! serving as a pure Rust equivalent to cuBLAS.
5//!
6//! ## Quick start
7//!
8//! ```rust,no_run
9//! use std::sync::Arc;
10//! use oxicuda_driver::Context;
11//! use oxicuda_blas::handle::BlasHandle;
12//!
13//! # fn main() -> Result<(), oxicuda_blas::error::BlasError> {
14//! # let ctx: Arc<Context> = unimplemented!();
15//! let handle = BlasHandle::new(&ctx)?;
16//! // ... call BLAS routines via the handle ...
17//! # Ok(())
18//! # }
19//! ```
20
21#![warn(clippy::all)]
22#![warn(missing_docs)]
23
24pub mod algorithm_selection;
25pub mod batched;
26pub mod complex_gemm;
27pub mod elementwise;
28pub mod error;
29pub mod handle;
30pub mod level1;
31pub mod level2;
32pub mod level3;
33pub mod precision;
34pub mod reduction;
35pub mod types;
36
37#[cfg(test)]
38mod test_matrices;
39
40pub use algorithm_selection::{
41    AlgorithmConfig, AlgorithmHeuristic, AlgorithmId, AlgorithmSelector, EpiloguePreference,
42    SwizzleMode,
43};
44pub use error::{BlasError, BlasResult};
45pub use handle::BlasHandle;
46pub use types::{
47    DiagType, E4M3, E5M2, FillMode, GpuFloat, Layout, MathMode, MatrixDesc, MatrixDescMut,
48    PointerMode, Side, Transpose, VectorDesc,
49};
50
51/// Convenience re-exports for common BLAS usage.
52///
53/// ```rust,no_run
54/// use oxicuda_blas::prelude::*;
55/// ```
56pub mod prelude {
57    // Algorithm selection (cuBLASLt-style)
58    pub use crate::algorithm_selection::{
59        AlgorithmConfig, AlgorithmHeuristic, AlgorithmId, AlgorithmSelector, EpiloguePreference,
60        SwizzleMode,
61    };
62
63    // Core types
64    pub use crate::error::{BlasError, BlasResult};
65    pub use crate::handle::BlasHandle;
66    pub use crate::types::{
67        DiagType, E4M3, E5M2, FillMode, GpuFloat, Layout, MathMode, MatrixDesc, MatrixDescMut,
68        PointerMode, Side, Transpose, VectorDesc,
69    };
70
71    // BLAS Level 1
72    pub use crate::level1::{asum, axpy, copy_vec, dot, iamax, nrm2, scal, swap};
73
74    // BLAS Level 2
75    pub use crate::level2::{gemv, ger, symv, syr, trmv, trsv};
76
77    // BLAS Level 3
78    pub use crate::level3::persistent_gemm::PersistentGemmConfig;
79    pub use crate::level3::stream_k::StreamKConfig;
80    pub use crate::level3::{
81        batched_trsm, gemm_api, persistent_gemm, stream_k, symm, syr2k, syrk, trmm, trsm,
82    };
83
84    // Complex GEMM/GEMV
85    pub use crate::complex_gemm::{complex_gemm, complex_gemv};
86
87    // Batched operations
88    pub use crate::batched::{batched_gemm, grouped_gemm, strided_gemm};
89
90    // Elementwise operations
91    pub use crate::elementwise;
92
93    // Reduction operations
94    pub use crate::reduction;
95}