pe-sigscan 0.3.0

Fast in-process byte-pattern (signature) scanning over the .text and other executable sections of a loaded PE module on Windows. Supports IDA-style wildcard patterns.
Documentation
//! Fast first-byte search primitives used by the anchor pre-filter.
//!
//! The scan loop tests one non-wildcard byte of the pattern at every
//! candidate offset before invoking the (more expensive) full pattern
//! comparison. Making that single-byte search fast dominates throughput
//! when the haystack is large and matches are rare — which is the typical
//! case for signature scanning of game/code binaries.
//!
//! Two implementations live here:
//!
//! - [`first_byte_in_slice`] (slice path) — uses the optional `memchr`
//!   crate when the `memchr` Cargo feature is enabled, otherwise falls
//!   back to a portable SWAR (8-byte word) search.
//! - [`first_byte_in_raw`] (raw-pointer path) — always uses the SWAR
//!   fallback because the in-process scanner does not have a `&[u8]` to
//!   hand `memchr` and we want to avoid pulling external dependencies
//!   into a code path that is also called against `IMAGE_SCN_MEM_EXECUTE`
//!   sections.

/// Find the index of the first byte equal to `needle` in `haystack`.
///
/// Returns `None` if `needle` does not occur within `haystack`.
#[inline]
#[must_use]
pub(crate) fn first_byte_in_slice(haystack: &[u8], needle: u8) -> Option<usize> {
    #[cfg(feature = "memchr")]
    {
        memchr::memchr(needle, haystack)
    }
    #[cfg(not(feature = "memchr"))]
    {
        swar_first_byte(haystack, needle)
    }
}

/// Same as [`first_byte_in_slice`] but operating on a raw `(start, len)`
/// pair. Used by the in-process scanner where `start` is an absolute
/// virtual address inside a PE section.
///
/// # Safety
///
/// The caller must guarantee that `[start, start + len)` is a contiguous
/// readable range of bytes for the duration of the call.
#[inline]
#[must_use]
pub(crate) unsafe fn first_byte_in_raw(start: usize, len: usize, needle: u8) -> Option<usize> {
    // Borrowed slice over the same memory range. This is sound because the
    // caller upholds the readability invariant for the entire range.
    let haystack: &[u8] = core::slice::from_raw_parts(start as *const u8, len);
    swar_first_byte(haystack, needle)
}

/// Portable 8-byte SWAR first-byte search.
///
/// Reads `usize`-sized chunks (8 bytes on 64-bit targets, 4 on 32-bit) and
/// uses the standard "has-zero-byte" bit-twiddle to detect the needle in
/// parallel. Falls back to a per-byte tail loop for the trailing
/// remainder.
///
/// On modern x86_64 / aarch64 hardware LLVM autovectorizes the inner loop
/// to true SIMD; on older or exotic targets the SWAR baseline still
/// outperforms the naive byte-by-byte version by 3–5×.
#[inline]
fn swar_first_byte(haystack: &[u8], needle: u8) -> Option<usize> {
    // Splat the needle byte across the whole word: 0x41 -> 0x4141414141414141.
    const LO: usize = usize::from_le_bytes([0x01; core::mem::size_of::<usize>()]);
    const HI: usize = usize::from_le_bytes([0x80; core::mem::size_of::<usize>()]);
    let needle_word = (needle as usize) * LO;

    let chunks = haystack.chunks_exact(core::mem::size_of::<usize>());
    let remainder = chunks.remainder();
    let mut idx = 0usize;

    for chunk in chunks.clone() {
        // Read aligned word. `chunks_exact` yields exactly word-sized
        // chunks, so `try_into().unwrap()` is infallible (the unwrap is
        // optimized out).
        let word = usize::from_le_bytes(chunk.try_into().unwrap());
        let xor = word ^ needle_word;
        // Classic "has zero byte" trick: a zero byte in `xor` means the
        // corresponding byte in `word` matched `needle`.
        if (xor.wrapping_sub(LO)) & !xor & HI != 0 {
            // Hit somewhere in this word — locate the exact byte.
            for (j, &b) in chunk.iter().enumerate() {
                if b == needle {
                    return Some(idx + j);
                }
            }
        }
        idx += core::mem::size_of::<usize>();
    }

    // Tail bytes that didn't fit a full word.
    for (j, &b) in remainder.iter().enumerate() {
        if b == needle {
            return Some(idx + j);
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn finds_first_in_aligned_chunk() {
        let buf = b"hello world";
        assert_eq!(first_byte_in_slice(buf, b'o'), Some(4));
    }

    #[test]
    fn finds_in_tail() {
        // Make the needle land strictly in the unaligned tail.
        let mut buf = vec![0u8; 17];
        buf[16] = 0x42;
        assert_eq!(first_byte_in_slice(&buf, 0x42), Some(16));
    }

    #[test]
    fn returns_none_when_absent() {
        let buf = vec![0u8; 1024];
        assert_eq!(first_byte_in_slice(&buf, 0xFF), None);
    }

    #[test]
    fn empty_slice_returns_none() {
        assert_eq!(first_byte_in_slice(&[], 0x42), None);
    }

    /// SWAR false-positive path: the bit-trick over-approximates when
    /// bytes have the high bit set. We need to ensure the inner per-byte
    /// confirmation loop runs and correctly rejects non-matches.
    ///
    /// We use the SWAR variant directly so the check runs even when the
    /// `memchr` feature is enabled (in which case `first_byte_in_slice`
    /// would otherwise delegate to memchr).
    #[test]
    fn swar_handles_high_bit_bytes_without_false_positive() {
        // High-bit bytes everywhere; needle is absent.
        let buf = vec![0xFFu8; 64];
        assert_eq!(swar_first_byte(&buf, 0x42), None);
    }

    #[test]
    fn swar_finds_needle_among_high_bit_bytes() {
        let mut buf = vec![0xFFu8; 64];
        buf[37] = 0x42;
        assert_eq!(swar_first_byte(&buf, 0x42), Some(37));
    }

    /// `first_byte_in_raw` is exercised indirectly via the in-process
    /// scanners, but a direct test makes failure modes obvious and keeps
    /// the helper's safety contract explicit.
    #[test]
    fn first_byte_in_raw_finds_needle() {
        let buf = b"sigscan-test";
        let result = unsafe { first_byte_in_raw(buf.as_ptr() as usize, buf.len(), b's') };
        assert_eq!(result, Some(0));
        let result_t = unsafe { first_byte_in_raw(buf.as_ptr() as usize, buf.len(), b't') };
        assert_eq!(result_t, Some(8));
    }

    #[test]
    fn first_byte_in_raw_returns_none_when_absent() {
        let buf = vec![0u8; 64];
        let result = unsafe { first_byte_in_raw(buf.as_ptr() as usize, buf.len(), 0xFF) };
        assert_eq!(result, None);
    }
}