crc-fast 1.10.0

World's fastest generic CRC16, CRC32, and CRC64 calculator using SIMD. Supplies a C-compatible shared library for use in other languages.
Documentation
//! Provides CRC-32/ISCSI and CRC-32/ISO-HDLC calculations using a fusion of native CLMUL
//! instructions and native CRC calculation instructions on aarch64.
//!
//! https://dougallj.wordpress.com/2022/05/22/faster-crc32-on-the-apple-m1/
//!
//! Converted to Rust from the original C code generated by https://github.com/corsix/fast-crc32/
//! with the help of Claude.ai.
//!
//! Modified as necessary for this Rust implementation.
//!
//! MIT licensed.

#![cfg(target_arch = "aarch64")]

mod iscsi;
mod iso_hdlc;

use core::arch::aarch64::*;
#[cfg(feature = "std")]
use std::arch::is_aarch64_feature_detected;

use iscsi::crc_pmull::crc32_iscsi_v12e_v1;
use iscsi::crc_pmull_sha3::crc32_iscsi_eor3_v9s3x2e_s3;
use iso_hdlc::crc_pmull::crc32_iso_hdlc_v12e_v1;
use iso_hdlc::crc_pmull_sha3::crc32_iso_hdlc_eor3_v9s3x2e_s3;

#[inline(always)]
pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 {
    let data_len = data.len();

    // there's some variance among different aarch64 CPUs (Apple Silicon, AWS Graviton, etc.), but
    // 127 bytes is the "small" threshold where this is generally faster
    if data_len < 128 {
        return unsafe { crc32_iscsi_small_fast(crc, data) };
    }

    #[cfg(feature = "std")]
    let has_sha3 = is_aarch64_feature_detected!("sha3");
    #[cfg(not(feature = "std"))]
    let has_sha3 = cfg!(target_feature = "sha3");

    if has_sha3 {
        unsafe { crc32_iscsi_aes_sha3(crc, data, data_len) }
    } else {
        unsafe { crc32_iscsi_aes(crc, data, data_len) }
    }
}

#[inline(always)]
pub fn crc32_iso_hdlc(crc: u32, data: &[u8]) -> u32 {
    let data_len = data.len();

    // there's some variance among different aarch64 CPUs (Apple Silicon, AWS Graviton, etc.), but
    // 127 bytes is the "small" threshold where this is generally faster
    if data_len < 128 {
        return unsafe { crc32_iso_hdlc_small_fast(crc, data) };
    }

    #[cfg(feature = "std")]
    let has_sha3 = is_aarch64_feature_detected!("sha3");
    #[cfg(not(feature = "std"))]
    let has_sha3 = cfg!(target_feature = "sha3");

    if has_sha3 {
        unsafe { crc32_iso_hdlc_aes_sha3(crc, data, data_len) }
    } else {
        unsafe { crc32_iso_hdlc_aes(crc, data, data_len) }
    }
}

#[inline]
#[target_feature(enable = "crc,aes,sha3")]
unsafe fn crc32_iscsi_aes_sha3(crc: u32, data: &[u8], data_len: usize) -> u32 {
    unsafe {
        const LARGE_BUFFER_THRESHOLD: usize = 1024;

        // Select implementation based on buffer size
        if data_len <= LARGE_BUFFER_THRESHOLD {
            crc32_iscsi_v12e_v1(crc, data.as_ptr(), data_len)
        } else {
            crc32_iscsi_eor3_v9s3x2e_s3(crc, data.as_ptr(), data_len)
        }
    }
}

#[inline]
#[target_feature(enable = "crc,aes")]
unsafe fn crc32_iscsi_aes(crc: u32, data: &[u8], data_len: usize) -> u32 {
    unsafe { crc32_iscsi_v12e_v1(crc, data.as_ptr(), data_len) }
}

#[inline]
#[target_feature(enable = "crc,aes,sha3")]
unsafe fn crc32_iso_hdlc_aes_sha3(crc: u32, data: &[u8], data_len: usize) -> u32 {
    unsafe {
        const LARGE_BUFFER_THRESHOLD: usize = 1024;

        // Select implementation based on buffer size
        if data_len <= LARGE_BUFFER_THRESHOLD {
            crc32_iso_hdlc_v12e_v1(crc, data.as_ptr(), data_len)
        } else {
            crc32_iso_hdlc_eor3_v9s3x2e_s3(crc, data.as_ptr(), data_len)
        }
    }
}

#[inline]
#[target_feature(enable = "crc,aes")]
unsafe fn crc32_iso_hdlc_aes(crc: u32, data: &[u8], data_len: usize) -> u32 {
    unsafe { crc32_iso_hdlc_v12e_v1(crc, data.as_ptr(), data_len) }
}

#[inline]
#[target_feature(enable = "aes")]
unsafe fn clmul_lo(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
    // Polynomial multiply low parts - convert u128 result to uint64x2_t
    let result = vmull_p64(vgetq_lane_u64(a, 0), vgetq_lane_u64(b, 0));
    vreinterpretq_u64_p128(result)
}

#[inline]
#[target_feature(enable = "aes")]
unsafe fn clmul_hi(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
    // Polynomial multiply high parts - convert u128 result to uint64x2_t
    let result = vmull_p64(vgetq_lane_u64(a, 1), vgetq_lane_u64(b, 1));
    vreinterpretq_u64_p128(result)
}

#[inline]
#[target_feature(enable = "aes")]
unsafe fn clmul_scalar(a: u32, b: u32) -> uint64x2_t {
    // Polynomial multiply scalars - convert u128 result to uint64x2_t
    let result = vmull_p64(a as u64, b as u64);
    vreinterpretq_u64_p128(result)
}

#[inline]
#[target_feature(enable = "aes")]
unsafe fn clmul_lo_and_xor(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
    veorq_u64(clmul_lo(a, b), c)
}

#[inline]
#[target_feature(enable = "aes")]
unsafe fn clmul_hi_and_xor(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
    veorq_u64(clmul_hi(a, b), c)
}

/// CRC-32/ISCSI calculation for small buffers (< 128 bytes) using unrolled native CRC instructions
#[inline]
#[target_feature(enable = "crc")]
pub unsafe fn crc32_iscsi_small_fast(mut crc: u32, data: &[u8]) -> u32 {
    let (prefix, aligned, suffix) = data.align_to::<u64>();

    // Process unaligned prefix bytes
    for &byte in prefix {
        crc = __crc32cb(crc, byte);
    }

    // Process aligned u64s with 8-way unrolling (64 bytes per iteration)
    let mut chunks = aligned.chunks_exact(8);
    for chunk in &mut chunks {
        crc = __crc32cd(crc, chunk[0]);
        crc = __crc32cd(crc, chunk[1]);
        crc = __crc32cd(crc, chunk[2]);
        crc = __crc32cd(crc, chunk[3]);
        crc = __crc32cd(crc, chunk[4]);
        crc = __crc32cd(crc, chunk[5]);
        crc = __crc32cd(crc, chunk[6]);
        crc = __crc32cd(crc, chunk[7]);
    }

    // Process remaining aligned u64s
    for &val in chunks.remainder() {
        crc = __crc32cd(crc, val);
    }

    // Process unaligned suffix bytes
    for &byte in suffix {
        crc = __crc32cb(crc, byte);
    }

    crc
}

/// CRC-32/ISO-HDLC calculation for small buffers (< 128 bytes) using unrolled native CRC instructions
#[inline]
#[target_feature(enable = "crc")]
pub unsafe fn crc32_iso_hdlc_small_fast(mut crc: u32, data: &[u8]) -> u32 {
    let (prefix, aligned, suffix) = data.align_to::<u64>();

    // Process unaligned prefix bytes
    for &byte in prefix {
        crc = __crc32b(crc, byte);
    }

    // Process aligned u64s with 8-way unrolling (64 bytes per iteration)
    let mut chunks = aligned.chunks_exact(8);
    for chunk in &mut chunks {
        crc = __crc32d(crc, chunk[0]);
        crc = __crc32d(crc, chunk[1]);
        crc = __crc32d(crc, chunk[2]);
        crc = __crc32d(crc, chunk[3]);
        crc = __crc32d(crc, chunk[4]);
        crc = __crc32d(crc, chunk[5]);
        crc = __crc32d(crc, chunk[6]);
        crc = __crc32d(crc, chunk[7]);
    }

    // Process remaining aligned u64s
    for &val in chunks.remainder() {
        crc = __crc32d(crc, val);
    }

    // Process unaligned suffix bytes
    for &byte in suffix {
        crc = __crc32b(crc, byte);
    }

    crc
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test::consts::TEST_CHECK_STRING;
    use crc::{Crc, Table};
    use rand::{rng, Rng};

    const RUST_CRC32_ISO_HDLC: Crc<u32, Table<16>> =
        Crc::<u32, Table<16>>::new(&crc::CRC_32_ISO_HDLC);

    const RUST_CRC32_ISCSI: Crc<u32, Table<16>> = Crc::<u32, Table<16>>::new(&crc::CRC_32_ISCSI);

    #[test]
    fn test_crc32_iso_hdlc_check() {
        assert_eq!(
            crc32_iso_hdlc(0xffffffff, TEST_CHECK_STRING) ^ 0xffffffff,
            0xcbf43926
        );
    }

    #[test]
    fn test_crc32_iso_hdlc_small_all_lengths() {
        for len in 1..=255 {
            crc32_iso_hdlc_random(len)
        }
    }

    #[test]
    fn test_crc32_iso_hdlc_medium_lengths() {
        // Test each length from 256 to 1024, which should fold and include handling remainders
        for len in 256..=1024 {
            crc32_iso_hdlc_random(len)
        }
    }

    #[test]
    fn test_crc32_iso_hdlc_large_lengths() {
        // Test 1 MiB just before, at, and just after the folding boundaries
        for len in 1048575..1048577 {
            crc32_iso_hdlc_random(len)
        }
    }

    #[test]
    fn test_crc32_iscsi_check() {
        assert_eq!(
            crc32_iscsi(0xffffffff, TEST_CHECK_STRING) ^ 0xffffffff,
            0xe3069283
        );
    }

    #[test]
    fn test_crc32_iscsi_small_all_lengths() {
        for len in 1..=255 {
            crc32_iscsi_random(len);
        }
    }

    #[test]
    fn test_crc32_iscsi_medium_lengths() {
        // Test each length from 256 to 1024, which should fold and include handling remainders
        for len in 256..=1024 {
            crc32_iscsi_random(len);
        }
    }

    #[test]
    fn test_crc32_iscsi_large_lengths() {
        // Test 1 MiB just before, at, and just after the folding boundaries
        for len in 1048575..1048577 {
            crc32_iscsi_random(len);
        }
    }

    #[cfg(target_feature = "sha3")]
    fn crc32_iso_hdlc_random(len: usize) {
        let mut data = vec![0u8; len];
        rng().fill(&mut data[..]);

        let checksum = RUST_CRC32_ISO_HDLC.checksum(&data);

        assert_eq!(crc32_iso_hdlc(0xffffffff, &data) ^ 0xffffffff, checksum);

        unsafe {
            assert_eq!(
                crc32_iso_hdlc_eor3_v9s3x2e_s3(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff,
                checksum
            );

            assert_eq!(
                crc32_iso_hdlc_v12e_v1(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff,
                checksum
            );
        }
    }

    #[cfg(not(target_feature = "sha3"))]
    fn crc32_iso_hdlc_random(len: usize) {
        let mut data = vec![0u8; len];
        rng().fill(&mut data[..]);

        let checksum = RUST_CRC32_ISO_HDLC.checksum(&data);

        assert_eq!(crc32_iso_hdlc(0xffffffff, &data) ^ 0xffffffff, checksum);

        unsafe {
            assert_eq!(
                crc32_iso_hdlc_v12e_v1(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff,
                checksum
            );
        }
    }

    #[cfg(target_feature = "sha3")]
    fn crc32_iscsi_random(len: usize) {
        let mut data = vec![0u8; len];
        rng().fill(&mut data[..]);

        let checksum = RUST_CRC32_ISCSI.checksum(&data);

        assert_eq!(crc32_iscsi(0xffffffff, &data) ^ 0xffffffff, checksum);

        unsafe {
            assert_eq!(
                crc32_iscsi_eor3_v9s3x2e_s3(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff,
                checksum
            );

            assert_eq!(
                crc32_iscsi_v12e_v1(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff,
                checksum
            );
        }
    }

    #[cfg(not(target_feature = "sha3"))]
    fn crc32_iscsi_random(len: usize) {
        let mut data = vec![0u8; len];
        rng().fill(&mut data[..]);

        let checksum = RUST_CRC32_ISCSI.checksum(&data);

        assert_eq!(crc32_iscsi(0xffffffff, &data) ^ 0xffffffff, checksum);

        unsafe {
            assert_eq!(
                crc32_iscsi_v12e_v1(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff,
                checksum
            );
        }
    }
}