npsimd 0.3.0

An ergonomic library for architecture-specific vectorization.
Documentation
//! CPU support for SIMD features.
//!
//! This module defines a type-safe, zero-cost mechanism for feature testing.
//! Functions relying on particular SIMD features take a type-generic parameter
//! that implements [`FeatureSet`], and use `where` bounds to test that it has
//! certain features.

/// A CPU feature.
///
/// Features are organized into _generations_, such as [`sse::FeatureGroup`].
/// At one time, only one generation of extensions should be used -- they use
/// different instruction formats and mixing them degrades performance.  The
/// encoding of generations into the type system prevents incompatible features
/// from being mixed accidentally.
///
/// [`sse::FeatureGroup`]: super::sse::FeatureGroup
pub trait Feature<G>: Sized {
    /// Determine whether this feature is supported.
    ///
    /// Given a [`RuntimeSupport`], which tracks which features the current CPU
    /// supports, this function extracts the feature represented by this type.
    /// If the CPU does not support this type, [`None`] is returned.
    fn get_support(runtime: &RuntimeSupport) -> Option<Self>;
}

/// A set of CPU features.
///
/// Types that implement this trait contain a fixed set of features.  Whether a
/// particular feature is contained can be tested using [`HasFeature`].
///
/// The standard implementation of this type is a heterogenous list, using the
/// [`unit`] type `()` for the empty list and the [`tuple`] type `(H, T)` for
/// prepending an element `H` to a list.  The [`feature_set`] macro can be
/// used to construct such a list more easily.
///
/// [`feature_set`]: crate::intel::low::feature_set
pub trait FeatureSet<G>: Sized {
    /// Determine whether this set of features is supported.
    ///
    /// Given a [`RuntimeSupport`], which tracks which features the current CPU
    /// supports, this function extracts the features represented by this type.
    /// If the CPU does not support all these features, [`None`] is returned.
    fn get_support(runtime: &RuntimeSupport) -> Option<Self>;
}

impl<G> FeatureSet<G> for () {
    fn get_support(_: &RuntimeSupport) -> Option<Self> {
        // The empty feature list is always supported.
        Some(())
    }
}

impl<G, H, T> FeatureSet<G> for (H, T)
where H: Feature<G>, T: FeatureSet<G> {
    fn get_support(runtime: &RuntimeSupport) -> Option<Self> {
        Some((H::get_support(runtime)?, T::get_support(runtime)?))
    }
}

/// Whether a CPU feature set contains a particular feature.
///
/// # Safety
///
/// A type `T` can safely implement `HasFeature<G, F>`, for any `G`, and `F`, if
/// and only if the following condition holds:
///
/// - If a soundly constructed value of type `T` exists, then an instance of the
///   feature `F` exists, indicating that the current CPU implements it.
#[marker]
pub unsafe trait HasFeature<G, F: Feature<G>>: FeatureSet<G> {}

unsafe impl<G, H, T> HasFeature<G, H> for (H, T)
where H: Feature<G>, T: FeatureSet<G> {}

unsafe impl<G, F, H, T> HasFeature<G, F> for (H, T)
where F: Feature<G>, H: Feature<G>, T: HasFeature<G, F> {}

unsafe impl<G, F> HasFeature<G, F> for ()
where F: Feature<G> {}

/// Feature support information for the current CPU.
///
/// This structure contains parsed information from `CPUID` about what SIMD and
/// related features are supported by the current CPU.  It is a read-only type,
/// offering no way to directly mutate the internal feature readings (so that
/// features are not misrepresented as being supported).
pub struct RuntimeSupport {
    // The SSE generation of instructions.
    sse: bool,
    sse2: bool,
    sse3: bool,
    ssse3: bool,
    sse4_1: bool,
    sse4_2: bool,

    // The AVX generation of instructions.
    avx: bool,
    f16c: bool,
    fma: bool,
    avx2: bool,
}

impl RuntimeSupport {
    /// Detect feature support in the current CPU.
    pub fn detect() -> Self {
        use raw_cpuid::CpuId;

        let cpuid = CpuId::new();
        let feats = cpuid.get_feature_info();
        let feats = feats.as_ref();
        let efeats = cpuid.get_extended_feature_info();
        let efeats = efeats.as_ref();

        Self {
            sse: feats.is_some_and(|x| x.has_sse()),
            sse2: feats.is_some_and(|x| x.has_sse2()),
            sse3: feats.is_some_and(|x| x.has_sse3()),
            ssse3: feats.is_some_and(|x| x.has_ssse3()),
            sse4_1: feats.is_some_and(|x| x.has_sse41()),
            sse4_2: feats.is_some_and(|x| x.has_sse42()),

            avx: feats.is_some_and(|x| x.has_avx()),
            f16c: feats.is_some_and(|x| x.has_f16c()),
            fma: feats.is_some_and(|x| x.has_fma()),
            avx2: efeats.is_some_and(|x| x.has_avx2()),
        }
    }

    /// Whether SSE is supported.
    pub fn sse(&self) -> bool {
        self.sse
    }

    /// Whether SSE2 is supported.
    pub fn sse2(&self) -> bool {
        self.sse2
    }

    /// Whether SSE3 is supported.
    pub fn sse3(&self) -> bool {
        self.sse3
    }

    /// Whether SSSE3 is supported.
    pub fn ssse3(&self) -> bool {
        self.ssse3
    }

    /// Whether SSE4.1 is supported.
    pub fn sse4_1(&self) -> bool {
        self.sse4_1
    }

    /// Whether SSE4.2 is supported.
    pub fn sse4_2(&self) -> bool {
        self.sse4_2
    }

    /// Whether AVX is supported.
    pub fn avx(&self) -> bool {
        self.avx
    }

    /// Whether F16C is supported.
    pub fn f16c(&self) -> bool {
        self.f16c
    }

    /// Whether FMA is supported.
    pub fn fma(&self) -> bool {
        self.fma
    }

    /// Whether AVX2 is supported.
    pub fn avx2(&self) -> bool {
        self.avx2
    }
}