npsimd 0.3.0

An ergonomic library for architecture-specific vectorization.
Documentation
//! Generic SIMD intrinsic functions.

use core::mem::size_of;
use core::ops::Not;

use super::types::Vector;

// SIMD intrinsics that the Rust compiler supports directly.
//
// We use these over the LLVM intrinsics whenever possible, since the Rust
// compiler frontend understands them better and can probably optimize them a
// bit.
pub use core::intrinsics::simd::*;

/// A type carrying a constant value.
///
/// This trait can be used to work around the limitations of `const` parameters,
/// such as the restriction to primitive scalar types (integers, `bool`, and
/// `char`) and the inability to manipulate `const` parameters (when the nightly
/// `generic_const_exprs` feature is not used).  Instead of receiving a `const`
/// parameter, functions can receive a type parameter implementing this trait.
pub trait Imm<T> {
    /// The associated constant.
    const VAL: T;
}

/// Construct a vector with a specific first element and all else zero.
#[inline(always)]
pub unsafe fn simd_set_first<T: Vector>(value: T::Elem) -> T
where T::Elem: Default {
    simd_insert(T::splat(T::Elem::default()), 0, value)
}

/// Bitwise NOT every element.
#[inline(always)]
pub unsafe fn simd_not<T: Vector>(x: T) -> T
where T::Elem: Default + Not<Output = T::Elem> {
    simd_xor(x, T::splat(!T::Elem::default()))
}

/// Bitwise AND NOT corresponding elements.
#[inline(always)]
pub unsafe fn simd_andnot<T: Vector>(x: T, y: T) -> T
where T::Elem: Default + Not<Output = T::Elem> {
    simd_and(x, simd_not(y))
}

/// Average corresponding elements.
#[inline(always)]
pub unsafe fn simd_avg<T: Vector, U: Vector>(x: T, y: T) -> T
where U::Elem: From<u8> {
    let one = U::splat(1u8.into());
    let sum = simd_add(simd_cast(x), simd_cast(y));
    let avg = simd_shr(simd_add(sum, one), one);
    simd_cast(avg)
}

/// Maximum of corresponding elements.
#[inline(always)]
pub unsafe fn simd_max<T: Vector>(x: T, y: T) -> T {
    simd_select(simd_gt::<T, T>(x, y), x, y)
}

/// Minimum of corresponding elements.
#[inline(always)]
pub unsafe fn simd_min<T: Vector>(x: T, y: T) -> T {
    simd_select(simd_gt::<T, T>(x, y), y, x)
}

/// Left-shift integer elements by a scalar.
#[inline(always)]
pub unsafe fn simd_shl_all<T: Vector, const BITS: u8>(x: T) -> T
where T::Elem: From<u8> {
    let size = size_of::<T::Elem>() as u8;
    if BITS >= size * 8 { return T::splat(0u8.into()) };
    simd_shl(x, T::splat(BITS.into()))
}

/// Right-shift integer elements by a scalar.
#[inline(always)]
pub unsafe fn simd_shr_all<T: Vector, const BITS: u8>(x: T) -> T
where T::Elem: From<u8> {
    let size = size_of::<T::Elem>() as u8;
    if BITS >= size * 8 {
        // For unsigned elements: this is always 0.
        // For signed elements: this is !0 for negative values.
        simd_lt(x, T::splat(0u8.into()))
    } else {
        simd_shr(x, T::splat(BITS.into()))
    }
}

/// Absolute value of each element.
#[inline(always)]
pub unsafe fn simd_abs<T: Vector>(x: T) -> T
where T::Elem: Default {
    let neg: T = simd_lt(x, T::splat(T::Elem::default()));
    simd_select(neg, simd_neg(x), x)
}

/// Expand integer elements and take the lower half of the vector.
#[inline(always)]
pub unsafe fn simd_expand<T: Vector, U: Vector, R: Vector>(x: T) -> R
where T::Elem: Default, U: Vector<Elem = T::Elem>, R::Elem: From<T::Elem>,
      [u32; R::LEN]: Sized {
    simd_cast::<U, R>(simd_shuffle(x, T::splat(T::Elem::default()), const {
        simd_slice_indices::<R>(0)
    }))
}

/// Indices for [`simd_shuffle`] for inserting a sub-vector into a vector.
pub const fn simd_insert_indices<T, R>(start: usize) -> [i32; R::LEN]
where T: Vector, R: Vector, [i32; R::LEN]: Sized {
    let mut indices = [0i32; R::LEN];

    let mut index = 0;
    while index < indices.len() {
        indices[index] = index as i32;
        index += 1;
    }

    let mut index = 0;
    while index < T::LEN && start + index < indices.len() {
        indices[start + index] = (index + R::LEN) as i32;
        index += 1;
    }

    indices
}

/// Indices for [`simd_shuffle`] for slicing into a vector.
pub const fn simd_slice_indices<T: Vector>(start: usize) -> [i32; T::LEN]
where [i32; T::LEN]: Sized {
    let mut indices = [0i32; T::LEN];
    let mut index = 0;
    while index < indices.len() {
        indices[index] = (start + index) as i32;
        index += 1;
    }
    indices
}

/// Indices for [`simd_shuffle`] for interleaving two vectors.
pub const fn simd_unpack_indices<T: Vector>(start: usize) -> [i32; T::LEN]
where [i32; T::LEN]: Sized {
    let mut indices = [0i32; T::LEN];
    let mut index = 0;
    while index * 2 < indices.len() {
        indices[2 * index + 0] = (start + index + 0 * T::LEN) as i32;
        indices[2 * index + 1] = (start + index + 1 * T::LEN) as i32;
        index += 1;
    }
    indices
}

/// Indices for [`simd_shuffle`] for slicing into sub-vectors of a vector.
pub const fn simd_subslice_indices<T, R>(start: usize) -> [i32; R::LEN]
where T: Vector, R: Vector, [i32; R::LEN]: Sized {
    let mut indices = [0i32; R::LEN];
    let mut index = 0;
    while index < indices.len() {
        indices[index] = (
            (start + index) % T::LEN +
            (start + index) / T::LEN * R::LEN) as i32;
        index += 1;
    }
    indices
}