generic_simd/lib.rs
1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(
3 all(feature = "nightly", target_arch = "wasm32"),
4 feature(wasm_simd, wasm_target_feature)
5)]
6#![cfg_attr(
7 all(feature = "nightly", target_arch = "aarch64"),
8 feature(stdsimd, aarch64_target_feature)
9)]
10//! `generic-simd` provides safe and idiomatic zero-cost abstractions for writing explicit
11//! cross-platform SIMD operations.
12//!
13//! # Supported architectures
14//! All architectures are supported via scalar fallbacks, but the following instruction sets are
15//! also supported:
16//! * SSE4.1 (x86/x86-64)
17//! * AVX (x86/x86-64)
18//! * NEON (aarch64, with `nightly` cargo feature)
19//! * SIMD128 (wasm32, with `nightly` cargo feature and `simd128` target feature)
20//!
21//! The various architecture-specific types are available in the [`arch`](arch/index.html) module.
22//!
23//! # Abstractions
24//! Vector abstractions are provided via the traits in the [`vector`](vector/index.html) module.
25//! Generics that use these traits are able to utilize any of the supported instruction sets.
26//!
27//! The following example performs a vector-accelerated sum of an input slice:
28//! ```
29//! use generic_simd::{
30//! arch::Token,
31//! dispatch,
32//! scalar::ScalarExt,
33//! slice::SliceExt,
34//! vector::NativeVector,
35//! };
36//!
37//! // This function provides a generic implementation for any instruction set.
38//! // Here we use the "native" vector type, i.e. the widest vector directly supported by the
39//! // architecture.
40//! #[inline]
41//! fn sum_impl<T>(token: T, input: &[f32]) -> f32
42//! where
43//! T: Token,
44//! f32: ScalarExt<T> + core::iter::Sum<NativeVector<f32, T>>,
45//! {
46//! // Use aligned loads in this example, which may be better on some architectures.
47//! let (start, vectors, end) = input.align_native(token);
48//!
49//! // Sum across the vector lanes, plus the unaligned portions
50//! vectors.iter().copied().sum::<f32>() + start.iter().chain(end).sum::<f32>()
51//! }
52//!
53//! // This function selects the best instruction set at runtime.
54//! // The "dispatch" macro compiles this function for each supported architecture.
55//! #[dispatch(token)]
56//! fn sum(input: &[f32]) -> f32 {
57//! sum_impl(token, input)
58//! }
59//!
60//! assert_eq!(sum(&[1f32; 10]), 10.);
61//! ```
62//!
63//! # Vector shims
64//! Various instruction sets provide vectors with different widths, so shims are provided to
65//! create vectors of particular widths regardless of architecture. These are available in the
66//! [`shim`](shim/index.html) module.
67//!
68//! For example, the following function performs an [Array of Structures of Arrays](https://en.wikipedia.org/wiki/AoS_and_SoA)
69//! operation using arrays of 4 `f64`s regardless of instruction set:
70//! ```
71//! use generic_simd::{
72//! arch::Token,
73//! dispatch,
74//! scalar::Scalar,
75//! slice::Slice,
76//! vector::{Signed, Vector, width},
77//! };
78//!
79//! // Equivalent to an array of 4 2-dimensional coordinates,
80//! // but with a vectorizable memory layout.
81//! struct Coordinates {
82//! x: [f64; 4],
83//! y: [f64; 4],
84//! }
85//!
86//! // A generic mean implementation for any instruction set.
87//! fn mean_impl<T>(token: T, input: &[Coordinates]) -> (f64, f64)
88//! where
89//! T: Token,
90//! f64: Scalar<T, width::W4>,
91//! <f64 as Scalar<T, width::W4>>::Vector: Signed,
92//! {
93//! let mut xsum = f64::zeroed(token);
94//! let mut ysum = f64::zeroed(token);
95//!
96//! for Coordinates { x, y } in input {
97//! // read the arrays into vectors
98//! xsum += x.read(token);
99//! ysum += y.read(token);
100//! }
101//!
102//! // sum across the vector lanes
103//! (
104//! xsum.iter().sum::<f64>() / (input.len() * 4) as f64,
105//! ysum.iter().sum::<f64>() / (input.len() * 4) as f64,
106//! )
107//! }
108//!
109//! // Selects the best instruction set at runtime.
110//! #[dispatch(token)]
111//! fn mean(input: &[Coordinates]) -> (f64, f64) {
112//! mean_impl(token, input)
113//! }
114//! ```
115
116// Re-export for use from macros.
117#[doc(hidden)]
118pub use multiversion;
119
120/// Multiversions a function over all supported instruction sets.
121///
122/// Tagging a function with `#[dispatch(token)]` creates a version of the function for each
123/// supported instruction set and provides its token as `token`.
124/// The best supported function variant is selected at runtime.
125///
126/// # Implementation
127/// This attribute is a wrapper for [`multiversion`] and supports all of its
128/// conditional compilation and static dispatch features.
129///
130/// # Example
131/// ```
132/// use generic_simd::slice::SliceExt;
133///
134/// #[generic_simd::dispatch(token)]
135/// pub fn add_one(x: &mut [f32]) {
136/// let (start, vecs, end) = x.align_native_mut(token);
137/// for s in start.iter_mut().chain(end.iter_mut()) {
138/// *s += 1.;
139/// }
140///
141/// for v in vecs {
142/// *v += 1.;
143/// }
144/// }
145///
146/// #[generic_simd::dispatch(_token)]
147/// pub fn add_two(x: &mut [f32]) {
148/// // Static dispatching provided by `multiversion`.
149/// // This does not perform runtime feature selection and allows inlining.
150/// dispatch!(add_one(x));
151/// dispatch!(add_one(x));
152/// }
153/// ```
154///
155/// [Abstractions]: index.html#abstractions
156/// [Vector shims]: index.html#vector-shims
157/// [`multiversion`]: ../multiversion/attr.multiversion.html
158pub use generic_simd_macros::dispatch;
159
160#[macro_use]
161mod implementation;
162
163pub mod alignment;
164pub mod arch;
165pub mod pointer;
166pub mod scalar;
167pub mod shim;
168pub mod slice;
169pub mod vector;