small-map 0.1.5

An inline SIMD accelerated hashmap designed for small amount of data.
Documentation
use core::{mem, ptr};

use super::bitmask::BitMask;

cfg_if! {
    if #[cfg(any(
        target_pointer_width = "64",
        target_arch = "aarch64",
        target_arch = "x86_64",
        target_arch = "wasm32",
    ))] {
        type GroupWord = u64;
        type NonZeroGroupWord = core::num::NonZeroU64;
    } else {
        type GroupWord = u32;
        type NonZeroGroupWord = core::num::NonZeroU32;
    }
}

pub(crate) type BitMaskWord = GroupWord;
pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord;
pub(crate) const BITMASK_STRIDE: usize = 8;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;

/// Helper function to replicate a byte across a `GroupWord`.
#[inline]
fn repeat(byte: u8) -> GroupWord {
    GroupWord::from_ne_bytes([byte; Group::WIDTH])
}

/// Abstraction over a group of control bytes which can be scanned in
/// parallel.
///
/// This implementation uses a word-sized integer.
#[derive(Copy, Clone)]
#[repr(transparent)]
pub(crate) struct Group(GroupWord);

// We perform all operations in the native endianness, and convert to
// little-endian just before creating a BitMask. The can potentially
// enable the compiler to eliminate unnecessary byte swaps if we are
// only checking whether a BitMask is empty.
#[allow(clippy::use_self)]
impl Group {
    /// Number of bytes in the group.
    pub(crate) const WIDTH: usize = mem::size_of::<Self>();
    #[cfg(any(
        target_pointer_width = "64",
        target_arch = "aarch64",
        target_arch = "x86_64",
        target_arch = "wasm32",
    ))]
    pub(crate) const LOWEST_MASK: [BitMaskWord; Group::WIDTH] = [
        0x0000_0000_0000_0000,
        0x0000_0000_0000_0080,
        0x0000_0000_0000_8080,
        0x0000_0000_0080_8080,
        0x0000_0000_8080_8080,
        0x0000_0080_8080_8080,
        0x0000_8080_8080_8080,
        0x0080_8080_8080_8080,
    ];
    #[cfg(not(any(
        target_pointer_width = "64",
        target_arch = "aarch64",
        target_arch = "x86_64",
        target_arch = "wasm32",
    )))]
    pub(crate) const LOWEST_MASK: [BitMaskWord; Group::WIDTH] =
        [0x0000_0000, 0x0000_0080, 0x0000_8080, 0x0080_8080];

    /// Loads a group of bytes starting at the given address.
    #[inline]
    #[allow(clippy::cast_ptr_alignment)] // unaligned load
    pub(crate) unsafe fn load(ptr: *const u8) -> Self {
        Group(ptr::read_unaligned(ptr.cast()))
    }

    /// Returns a `BitMask` indicating all bytes in the group which *may*
    /// have the given value.
    ///
    /// This function may return a false positive in certain cases where
    /// the byte in the group differs from the searched value only in its
    /// lowest bit. This is fine because:
    /// - This never happens for `EMPTY` and `DELETED`, only full entries.
    /// - The check for key equality will catch these.
    /// - This only happens if there is at least 1 true match.
    /// - The chance of this happening is very low (< 1% chance per byte).
    #[inline]
    pub(crate) fn match_byte(self, byte: u8) -> BitMask {
        // This algorithm is derived from
        // https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
        let cmp = self.0 ^ repeat(byte);
        BitMask((cmp.wrapping_sub(repeat(0x01)) & !cmp & repeat(0x80)).to_le())
    }
}