coral_aarch64/
lib.rs

1//! `coral-aarch64` is a BLAS implementation in pure rust for AArch64.
2//! 
3//! - no dependencies.
4//! - column-major only. 
5//! - level1 and level2 routines fully implemented. 
6//! - level3 only has `GEMM`. 
7//!
8//! This crate is very `unsafe` and only for AArch64. Although it is fast, 
9//! a fully-`safe` and `portable-simd` modern implementation
10//! [exists](https://crates.io/crates/coral-blas) and is not far off in speed.
11//! I highly suggest using that instead. 
12//!
13//! benchmarks: <https://dev-undergrad.dev/posts/benchmarks/>
14//!
15//! ## example
16//!
17//! `sgemm` 
18//! \\[ 
19//!     C \ \leftarrow \ \alpha A B + \beta C 
20//! \\]
21//!
22//! ```
23//! use coral_aarch64::level3::sgemm;
24//! use coral_aarch64::enums::CoralTranspose;
25//!
26//! fn main() {
27//!     // A = [[1, 3],
28//!     //      [2, 4]]
29//!     let a = vec![
30//!         1.0, 2.0,   // column 0
31//!         3.0, 4.0,   // column 1
32//!     ];
33//!
34//!     // B = [[5, 7],
35//!     //      [6, 8]]
36//!     let b = vec![
37//!         5.0, 6.0,   // column 0
38//!         7.0, 8.0,   // column 1
39//!     ];
40//!
41//!     // C = identity
42//!     let mut c = vec![
43//!         1.0, 0.0,
44//!         0.0, 1.0,
45//!     ];
46//!
47//!     let m = 2;
48//!     let n = 2;
49//!     let k = 2;
50//!
51//!     let alpha = 2.0;
52//!     let beta  = 1.0;
53//!
54//!     sgemm(
55//!         CoralTranspose::NoTranspose,
56//!         CoralTranspose::NoTranspose,
57//!         m, n, k,
58//!         alpha,
59//!         a.as_ptr(), m,
60//!         b.as_ptr(), k,
61//!         beta,
62//!         c.as_mut_ptr(), m,
63//!     );
64//!
65//!     // C = [[47, 62],
66//!     //      [68, 93]]
67//!     assert!((c[0] - 47.0).abs() < 1e-6);
68//!     assert!((c[1] - 68.0).abs() < 1e-6);
69//!     assert!((c[2] - 62.0).abs() < 1e-6);
70//!     assert!((c[3] - 93.0).abs() < 1e-6);
71//! }
72//! ```
73
74
75#[cfg(target_arch = "aarch64")]
76pub mod level1;
77
78#[cfg(target_arch = "aarch64")]
79pub mod level2;
80
81#[cfg(target_arch = "aarch64")]
82pub mod level3;
83
84pub mod enums;
85
86#[cfg(target_arch = "aarch64")]
87pub(crate) mod level1_special;
88
89