generic_simd/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(
3    all(feature = "nightly", target_arch = "wasm32"),
4    feature(wasm_simd, wasm_target_feature)
5)]
6#![cfg_attr(
7    all(feature = "nightly", target_arch = "aarch64"),
8    feature(stdsimd, aarch64_target_feature)
9)]
10//! `generic-simd` provides safe and idiomatic zero-cost abstractions for writing explicit
11//! cross-platform SIMD operations.
12//!
13//! # Supported architectures
14//! All architectures are supported via scalar fallbacks, but the following instruction sets are
15//! also supported:
16//! * SSE4.1 (x86/x86-64)
17//! * AVX (x86/x86-64)
18//! * NEON (aarch64, with `nightly` cargo feature)
19//! * SIMD128 (wasm32, with `nightly` cargo feature and `simd128` target feature)
20//!
21//! The various architecture-specific types are available in the [`arch`](arch/index.html) module.
22//!
23//! # Abstractions
24//! Vector abstractions are provided via the traits in the [`vector`](vector/index.html) module.
25//! Generics that use these traits are able to utilize any of the supported instruction sets.
26//!
27//! The following example performs a vector-accelerated sum of an input slice:
28//! ```
29//! use generic_simd::{
30//!     arch::Token,
31//!     dispatch,
32//!     scalar::ScalarExt,
33//!     slice::SliceExt,
34//!     vector::NativeVector,
35//! };
36//!
37//! // This function provides a generic implementation for any instruction set.
38//! // Here we use the "native" vector type, i.e. the widest vector directly supported by the
39//! // architecture.
40//! #[inline]
41//! fn sum_impl<T>(token: T, input: &[f32]) -> f32
42//! where
43//!     T: Token,
44//!     f32: ScalarExt<T> + core::iter::Sum<NativeVector<f32, T>>,
45//! {
46//!     // Use aligned loads in this example, which may be better on some architectures.
47//!     let (start, vectors, end) = input.align_native(token);
48//!
49//!     // Sum across the vector lanes, plus the unaligned portions
50//!     vectors.iter().copied().sum::<f32>() + start.iter().chain(end).sum::<f32>()
51//! }
52//!
53//! // This function selects the best instruction set at runtime.
54//! // The "dispatch" macro compiles this function for each supported architecture.
55//! #[dispatch(token)]
56//! fn sum(input: &[f32]) -> f32 {
57//!     sum_impl(token, input)
58//! }
59//!
60//! assert_eq!(sum(&[1f32; 10]), 10.);
61//! ```
62//!
63//! # Vector shims
64//! Various instruction sets provide vectors with different widths, so shims are provided to
65//! create vectors of particular widths regardless of architecture.  These are available in the
66//! [`shim`](shim/index.html) module.
67//!
68//! For example, the following function performs an [Array of Structures of Arrays](https://en.wikipedia.org/wiki/AoS_and_SoA)
69//! operation using arrays of 4 `f64`s regardless of instruction set:
70//! ```
71//! use generic_simd::{
72//!     arch::Token,
73//!     dispatch,
74//!     scalar::Scalar,
75//!     slice::Slice,
76//!     vector::{Signed, Vector, width},
77//! };
78//!
79//! // Equivalent to an array of 4 2-dimensional coordinates,
80//! // but with a vectorizable memory layout.
81//! struct Coordinates {
82//!     x: [f64; 4],
83//!     y: [f64; 4],
84//! }
85//!
86//! // A generic mean implementation for any instruction set.
87//! fn mean_impl<T>(token: T, input: &[Coordinates]) -> (f64, f64)
88//! where
89//!     T: Token,
90//!     f64: Scalar<T, width::W4>,
91//!     <f64 as Scalar<T, width::W4>>::Vector: Signed,
92//! {
93//!     let mut xsum = f64::zeroed(token);
94//!     let mut ysum = f64::zeroed(token);
95//!
96//!     for Coordinates { x, y } in input {
97//!         // read the arrays into vectors
98//!         xsum += x.read(token);
99//!         ysum += y.read(token);
100//!     }
101//!
102//!     // sum across the vector lanes
103//!     (
104//!         xsum.iter().sum::<f64>() / (input.len() * 4) as f64,
105//!         ysum.iter().sum::<f64>() / (input.len() * 4) as f64,
106//!     )
107//! }
108//!
109//! // Selects the best instruction set at runtime.
110//! #[dispatch(token)]
111//! fn mean(input: &[Coordinates]) -> (f64, f64) {
112//!     mean_impl(token, input)
113//! }
114//! ```
115
116// Re-export for use from macros.
117#[doc(hidden)]
118pub use multiversion;
119
120/// Multiversions a function over all supported instruction sets.
121///
122/// Tagging a function with `#[dispatch(token)]` creates a version of the function for each
123/// supported instruction set and provides its token as `token`.
124/// The best supported function variant is selected at runtime.
125///
126/// # Implementation
127/// This attribute is a wrapper for [`multiversion`] and supports all of its
128/// conditional compilation and static dispatch features.
129///
130/// # Example
131/// ```
132/// use generic_simd::slice::SliceExt;
133///
134/// #[generic_simd::dispatch(token)]
135/// pub fn add_one(x: &mut [f32]) {
136///     let (start, vecs, end) = x.align_native_mut(token);
137///     for s in start.iter_mut().chain(end.iter_mut()) {
138///         *s += 1.;
139///     }
140///
141///     for v in vecs {
142///         *v += 1.;
143///     }
144/// }
145///
146/// #[generic_simd::dispatch(_token)]
147/// pub fn add_two(x: &mut [f32]) {
148///     // Static dispatching provided by `multiversion`.
149///     // This does not perform runtime feature selection and allows inlining.
150///     dispatch!(add_one(x));
151///     dispatch!(add_one(x));
152/// }
153/// ```
154///
155/// [Abstractions]: index.html#abstractions
156/// [Vector shims]: index.html#vector-shims
157/// [`multiversion`]: ../multiversion/attr.multiversion.html
158pub use generic_simd_macros::dispatch;
159
160#[macro_use]
161mod implementation;
162
163pub mod alignment;
164pub mod arch;
165pub mod pointer;
166pub mod scalar;
167pub mod shim;
168pub mod slice;
169pub mod vector;