npsimd 0.3.0

An ergonomic library for architecture-specific vectorization.
Documentation
//! Testing for SIMD features.
//!
//! SIMD instructions can only be used if the running CPU supports them, else
//! undefined behaviour occurs (the CPU will crash).  This module provides a way
//! to test for SIMD features at run-time, then to prove (at zero cost) that a
//! feature has been tested for.
//!
//! Intel's SIMD features are organized into four distinct generations.  Only
//! one generation should be used at a time (they use different instruction
//! encoding formats and switching generations may incur a runtime penalty).
//!
//! TODO: Link to all defined generations and features here.

use core::fmt;
use core::marker::PhantomData;

#[doc(inline)]
pub use crate::intel_features as features;

/// One or more CPU features.
///
/// A CPU features type is a zero-sized marker type.  An instance of such a type
/// is a guarantee that the corresponding features are supported by the current
/// CPU.  [`Features::query()`] can be used to test for the feature.
pub trait Features<G>: Copy + Sized {
    /// Determine whether these features are supported.
    ///
    /// Given a [`RuntimeSupport`], which tracks which features the current CPU
    /// supports, this function extracts the features represented by this type.
    /// If the CPU does not support this type, [`None`] is returned.
    fn query(runtime: &RuntimeSupport) -> Option<Self>;
}

impl<G> Features<G> for () {
    fn query(_support: &RuntimeSupport) -> Option<Self> {
        Some(())
    }
}

impl<G, H: Feature<G>, T: Features<G>> Features<G> for (H, T) {
    fn query(support: &RuntimeSupport) -> Option<Self> {
        Some((H::query(support)?, T::query(support)?))
    }
}

/// A single CPU feature.
///
/// This is distinguished from [`Features`] to ensure that a feature set does
/// not recursively contain more feature sets.
pub trait Feature<G>: Features<G> {}

/// Whether a CPU feature set contains a particular feature.
///
/// # Safety
///
/// A type `T` can safely implement `HasFeature<G, F>`, for any `G`, and `F`, if
/// and only if the following condition holds:
///
/// - If a soundly constructed value of type `T` exists, then an instance of the
///   feature `F` exists, indicating that the current CPU implements it.
#[marker]
pub unsafe trait HasFeature<G, F: Feature<G>>: Features<G> {}

unsafe impl<G, F> HasFeature<G, F> for ()
where F: Feature<G> {}

unsafe impl<G, H, T> HasFeature<G, H> for (H, T)
where H: Feature<G>, T: Features<G> {}

unsafe impl<G, F, H, T> HasFeature<G, F> for (H, T)
where F: Feature<G>, H: Feature<G>, T: HasFeature<G, F> {}

/// A set of CPU features.
///
/// This is a zero-sized wrapper around an arbitrary CPU feature set type.  If
/// an instance of `FeatureSet` exists, then it is guaranteed that an instance
/// of the underlying feature set exists.
pub struct FeatureSet<G, L: Features<G>> {
    group: PhantomData<G>,
    feats: PhantomData<L>,
}

impl<G, L: Features<G>> Clone for FeatureSet<G, L> {
    fn clone(&self) -> Self {
        *self
    }
}

impl<G, L: Features<G>> Copy for FeatureSet<G, L> {}

impl<G, L: Features<G>> FeatureSet<G, L> {
    /// Construct a [`FeatureSet`] with a proof of existence.
    pub const fn new(_proof: L) -> Self {
        Self {
            group: PhantomData,
            feats: PhantomData,
        }
    }
}

impl<G, L: Features<G>> From<L> for FeatureSet<G, L> {
    fn from(proof: L) -> Self {
        Self::new(proof)
    }
}

impl<G, L: Features<G> + Default> Default for FeatureSet<G, L> {
    fn default() -> Self {
        // We need to construct the default in case it panics.
        L::default().into()
    }
}

impl<G, L: Features<G>> fmt::Debug for FeatureSet<G, L> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(core::any::type_name_of_val(self))
    }
}

/// The empty feature group.
///
/// No features in this group exist; it should be used as the default group for
/// an empty feature set.
pub enum EmptyGroup {}

/// Construct a CPU feature set.
///
/// Given a list of types, a type implementing [`FeatureSet`] is returned (for
/// any `G` parameter such that all given types implement [`Feature`] for that
/// `G`).  This will implement [`HasFeature`] for every type in the list.
#[doc(hidden)]
#[macro_export]
macro_rules! intel_features {
    ($head:ty $(, $tail:ty)* $(,)?) => {
        ($head, $crate::intel_features!($($tail),*))
    };

    () => { () };
}

/// Feature support information for the current CPU.
///
/// This structure contains parsed information from `CPUID` about what SIMD and
/// related features are supported by the current CPU.  It is a read-only type,
/// offering no way to directly mutate the internal feature readings (so that
/// features are not misrepresented as being supported).
pub struct RuntimeSupport {
    // The SSE generation of instructions.
    sse: bool,
    sse2: bool,
    sse3: bool,
    ssse3: bool,
    sse4_1: bool,
    sse4_2: bool,

    // The AVX generation of instructions.
    avx: bool,
    f16c: bool,
    fma: bool,
    avx2: bool,
}

impl RuntimeSupport {
    /// Detect feature support in the current CPU.
    pub fn detect() -> Self {
        use raw_cpuid::CpuId;

        let cpuid = CpuId::new();
        let feats = cpuid.get_feature_info();
        let feats = feats.as_ref();
        let efeats = cpuid.get_extended_feature_info();
        let efeats = efeats.as_ref();

        Self {
            sse: feats.is_some_and(|x| x.has_sse()),
            sse2: feats.is_some_and(|x| x.has_sse2()),
            sse3: feats.is_some_and(|x| x.has_sse3()),
            ssse3: feats.is_some_and(|x| x.has_ssse3()),
            sse4_1: feats.is_some_and(|x| x.has_sse41()),
            sse4_2: feats.is_some_and(|x| x.has_sse42()),

            avx: feats.is_some_and(|x| x.has_avx()),
            f16c: feats.is_some_and(|x| x.has_f16c()),
            fma: feats.is_some_and(|x| x.has_fma()),
            avx2: efeats.is_some_and(|x| x.has_avx2()),
        }
    }

    /// Whether SSE is supported.
    pub fn sse(&self) -> bool {
        self.sse
    }

    /// Whether SSE2 is supported.
    pub fn sse2(&self) -> bool {
        self.sse2
    }

    /// Whether SSE3 is supported.
    pub fn sse3(&self) -> bool {
        self.sse3
    }

    /// Whether SSSE3 is supported.
    pub fn ssse3(&self) -> bool {
        self.ssse3
    }

    /// Whether SSE4.1 is supported.
    pub fn sse4_1(&self) -> bool {
        self.sse4_1
    }

    /// Whether SSE4.2 is supported.
    pub fn sse4_2(&self) -> bool {
        self.sse4_2
    }

    /// Whether AVX is supported.
    pub fn avx(&self) -> bool {
        self.avx
    }

    /// Whether F16C is supported.
    pub fn f16c(&self) -> bool {
        self.f16c
    }

    /// Whether FMA is supported.
    pub fn fma(&self) -> bool {
        self.fma
    }

    /// Whether AVX2 is supported.
    pub fn avx2(&self) -> bool {
        self.avx2
    }
}