fashex 0.0.10

Hexadecimal string encoding and decoding with best-effort SIMD acceleration.
Documentation
//! The actual implementation of the hexadecimal encoding / decoding logic.

#![allow(unsafe_code, reason = "SIMD")]
#![allow(unsafe_op_in_unsafe_fn, reason = "SIMD")]
#![allow(clippy::cast_lossless, reason = "SIMD")]
#![allow(clippy::cast_possible_truncation, reason = "SIMD")]
#![allow(clippy::cast_possible_wrap, reason = "SIMD")]
#![allow(clippy::cast_ptr_alignment, reason = "SIMD")]
#![allow(clippy::upper_case_acronyms, reason = "SIMD")]
#![allow(clippy::wildcard_imports, reason = "SIMD")]

#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
pub(crate) mod aarch64;
pub(crate) mod generic;
#[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
pub(crate) mod loongarch64;
#[cfg(feature = "portable-simd")]
pub(crate) mod simd;
#[cfg(target_arch = "wasm32")]
pub(crate) mod wasm32;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(crate) mod x86;

use core::mem::MaybeUninit;

use crate::error::InvalidInput;

/// Encodes the input bytes to hexadecimal string and writes it to the output
/// buffer.
///
/// This is a wrapper function that dispatches to the appropriate backend based
/// on the detected SIMD features for better performance.
pub(crate) fn encode<const UPPER: bool>(
    src: &[u8],
    dst: &mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
    let Some(dst) = dst.get_mut(..2 * src.len()) else {
        return Err(InvalidInput);
    };

    #[allow(unsafe_code, reason = "XXX")]
    unsafe {
        encode_unchecked::<UPPER>(src, dst);
    }

    Ok(())
}

#[allow(unsafe_code, reason = "See below")]
/// The unsafe version of [`encode`].
///
/// ## Safety
///
/// We assume that:
///
/// 1. `2 * src.len() == dst.len()`.
unsafe fn encode_unchecked<const UPPER: bool>(src: &[u8], dst: &mut [MaybeUninit<u8>]) {
    #[allow(unsafe_code, reason = "The length is validated by caller")]
    let dst: &mut [[MaybeUninit<u8>; 2]] = unsafe { dst.as_chunks_unchecked_mut() };

    #[allow(
        unsafe_code,
        reason = "CPU feature detected; the length is validated by caller"
    )]
    match Vectorization::detect() {
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        Vectorization::AVX512 => unsafe {
            x86::encode_avx512_unchecked::<UPPER>(src, dst);
        },
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        Vectorization::AVX2 => unsafe {
            x86::encode_avx2_unchecked::<UPPER>(src, dst);
        },
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        Vectorization::SSSE3 => unsafe {
            x86::encode_ssse3_unchecked::<UPPER>(src, dst);
        },
        #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
        Vectorization::NEON => unsafe {
            aarch64::encode_neon_unchecked::<UPPER>(src, dst);
        },
        #[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
        Vectorization::LASX => unsafe {
            loongarch64::encode_lasx_unchecked::<UPPER>(src, dst);
        },
        #[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
        Vectorization::LSX => unsafe {
            loongarch64::encode_lsx_unchecked::<UPPER>(src, dst);
        },
        #[cfg(target_arch = "wasm32")]
        Vectorization::Wasm32SIMD128 => unsafe {
            wasm32::encode_simd128_unchecked::<UPPER>(src, dst);
        },
        #[cfg(feature = "portable-simd")]
        Vectorization::PortableSIMD128 => unsafe {
            simd::encode_simd128_unchecked::<UPPER>(src, dst);
        },
        Vectorization::None => unsafe {
            generic::encode_generic_unchecked::<UPPER>(src, dst);
        },
    }
}

/// Decodes hexadecimal string to bytes and writes it to the output buffer.
///
/// This is a wrapper function that dispatches to the appropriate backend based
/// on the detected SIMD features for better performance.
pub(crate) fn decode(src: &[[u8; 2]], dst: &mut [MaybeUninit<u8>]) -> Result<(), InvalidInput> {
    let Some(dst) = dst.get_mut(..src.len()) else {
        return Err(InvalidInput);
    };

    #[allow(unsafe_code, reason = "XXX")]
    unsafe {
        decode_unchecked::<false>(src, dst)
    }
}

#[allow(unsafe_code, reason = "See below")]
/// The unsafe version of [`decode`].
///
/// ## Safety
///
/// We assume that:
///
/// 1. `src.len() == dst.len()`.
/// 2. When `VALIDATED`, the input contains only valid hexadecimal characters.
///
/// ## Errors
///
/// When `!VALIDATED` and the input contains invalid hexadecimal characters.
unsafe fn decode_unchecked<const VALIDATED: bool>(
    src: &[[u8; 2]],
    dst: &mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
    #[allow(
        unsafe_code,
        reason = "CPU feature detected; the length is validated by caller."
    )]
    match Vectorization::detect() {
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        Vectorization::AVX512 => unsafe { x86::decode_avx512_unchecked(src, dst) },
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        Vectorization::AVX2 => unsafe { x86::decode_avx2_unchecked(src, dst) },
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        Vectorization::SSSE3 => unsafe { x86::decode_ssse3_unchecked(src, dst) },
        #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
        Vectorization::NEON => unsafe { aarch64::decode_neon_unchecked(src, dst) },
        #[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
        Vectorization::LASX => unsafe { loongarch64::decode_lasx_unchecked(src, dst) },
        #[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
        Vectorization::LSX => unsafe { loongarch64::decode_lsx_unchecked(src, dst) },
        #[cfg(target_arch = "wasm32")]
        Vectorization::Wasm32SIMD128 => unsafe {
            // TODO: optimize?

            generic::decode_generic_unchecked::<VALIDATED>(src, dst)
        },
        #[cfg(feature = "portable-simd")]
        Vectorization::PortableSIMD128 => unsafe { simd::decode_simd128_unchecked(src, dst) },
        Vectorization::None => unsafe { generic::decode_generic_unchecked::<VALIDATED>(src, dst) },
    }
}

#[cfg_attr(debug_assertions, derive(Debug))]
enum Vectorization {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    /// `avx512f` and `avx512bw` instructions on `x86` / `x86_64` architectures.
    AVX512,

    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    /// `avx2` instructions on `x86` / `x86_64` architectures.
    AVX2,

    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    /// `ssse3` instructions on `x86` / `x86_64` architectures.
    SSSE3,

    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
    /// `neon` instructions on `aarch64` / `arm64ec` architectures.
    NEON,

    #[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
    /// `lasx` instructions on `loongarch64` architectures.
    LASX,

    #[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
    /// `lsx` instructions on `loongarch64` architectures.
    LSX,

    #[cfg(target_arch = "wasm32")]
    #[cfg_attr(not(target_feature = "simd128"), allow(dead_code, reason = "SIMD"))]
    /// `simd128` instructions on `wasm32` architectures.
    Wasm32SIMD128,

    #[cfg(feature = "portable-simd")]
    /// Portable SIMD.
    PortableSIMD128,

    /// No SIMD, or scalar fallback.
    None,
}

impl Vectorization {
    #[allow(unreachable_code, reason = "XXX")]
    #[allow(clippy::needless_return, reason = "XXX")]
    #[inline]
    fn detect() -> Self {
        #[cfg(miri)]
        {
            return Self::None;
        }

        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        {
            use crate::util::cpufeatures;

            cpufeatures::new!(cpuid_avx512, "avx512f", "avx512bw", "avx512vl");
            cpufeatures::new!(cpuid_avx2, "avx2");
            cpufeatures::new!(cpuid_ssse3, "ssse3");

            if cfg!(all(
                target_feature = "avx512f",
                target_feature = "avx512bw",
                target_feature = "avx512vl"
            )) || cpuid_avx512::get()
            {
                return Self::AVX512;
            }

            if cfg!(target_feature = "avx2") || cpuid_avx2::get() {
                return Self::AVX2;
            }

            if cfg!(target_feature = "ssse3") || cpuid_ssse3::get() {
                return Self::SSSE3;
            }
        }

        #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
        {
            use crate::util::cpufeatures;

            cpufeatures::new!(cpuid_neon, "neon");

            if cfg!(target_feature = "neon") || cpuid_neon::get() {
                return Self::NEON;
            }
        }

        #[cfg(all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"))]
        {
            use crate::util::cpufeatures;

            cpufeatures::new!(cpuid_lasx, "lasx");
            cpufeatures::new!(cpuid_lsx, "lsx");

            if cfg!(target_feature = "lasx") || cpuid_lasx::get() {
                return Self::LASX;
            }

            if cfg!(target_feature = "lsx") || cpuid_lsx::get() {
                return Self::LSX;
            }
        }

        #[cfg(target_arch = "wasm32")]
        {
            #[cfg(target_feature = "simd128")]
            {
                return Self::Wasm32SIMD128;
            }
        }

        #[cfg(feature = "portable-simd")]
        {
            return Self::PortableSIMD128;
        }

        return Self::None;
    }
}