#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod iscsi;
use iscsi::sse_pclmulqdq::crc32_iscsi_sse_v4s3x3;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
#[cfg(target_arch = "x86_64")]
use iscsi::avx512_pclmulqdq::crc32_iscsi_avx512_v4s3x3;
#[cfg(target_arch = "x86_64")]
use iscsi::avx512_vpclmulqdq::crc32_iscsi_avx512_vpclmulqdq_v3x2;
#[inline(always)]
pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 {
let data_len = data.len();
if data_len <= 256 && is_x86_feature_detected!("sse4.2") {
unsafe {
return crc32_iscsi_small_fast(crc, data);
}
}
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx512vl") && is_x86_feature_detected!("vpclmulqdq") {
unsafe {
return crc32_iscsi_avx512_vpclmulqdq_v3x2(crc, data.as_ptr(), data_len);
}
}
if is_x86_feature_detected!("avx512vl") {
unsafe {
return crc32_iscsi_avx512_v4s3x3(crc, data.as_ptr(), data_len);
}
}
}
unsafe { crc32_iscsi_sse_v4s3x3(crc, data.as_ptr(), data_len) }
}
#[cfg(target_arch = "x86_64")]
#[inline]
#[target_feature(enable = "avx512vl,vpclmulqdq")]
unsafe fn clmul_lo_avx512_vpclmulqdq(a: __m512i, b: __m512i) -> __m512i {
_mm512_clmulepi64_epi128(a, b, 0)
}
#[cfg(target_arch = "x86_64")]
#[inline]
#[target_feature(enable = "avx512vl,vpclmulqdq")]
unsafe fn clmul_hi_avx512_vpclmulqdq(a: __m512i, b: __m512i) -> __m512i {
_mm512_clmulepi64_epi128(a, b, 17)
}
#[inline]
#[target_feature(enable = "pclmulqdq")]
unsafe fn clmul_lo_sse(a: __m128i, b: __m128i) -> __m128i {
_mm_clmulepi64_si128(a, b, 0)
}
#[inline]
#[target_feature(enable = "pclmulqdq")]
unsafe fn clmul_hi_sse(a: __m128i, b: __m128i) -> __m128i {
_mm_clmulepi64_si128(a, b, 17)
}
#[inline]
#[target_feature(enable = "pclmulqdq")]
unsafe fn clmul_scalar_sse(a: u32, b: u32) -> __m128i {
_mm_clmulepi64_si128(_mm_cvtsi32_si128(a as i32), _mm_cvtsi32_si128(b as i32), 0)
}
#[target_feature(enable = "sse4.2,pclmulqdq")]
unsafe fn xnmodp_iscsi_sse(mut n: u64) -> u32 {
let mut stack = !1u64;
let mut acc: u32;
let mut low: u32;
while n > 191 {
stack = (stack << 1) + (n & 1);
n = (n >> 1) - 16;
}
stack = !stack;
acc = 0x80000000u32 >> (n & 31);
n >>= 5;
while n > 0 {
acc = _mm_crc32_u32(acc, 0);
n -= 1;
}
while {
low = (stack & 1) as u32;
stack >>= 1;
stack != 0
} {
let x = _mm_cvtsi32_si128(acc as i32);
let clmul_result = _mm_clmulepi64_si128(x, x, 0);
let y = mm_extract_epi64(clmul_result, 0);
acc = mm_crc32_u64(0, y << low);
}
acc
}
#[inline]
#[target_feature(enable = "pclmulqdq")]
unsafe fn crc_shift_iscsi_sse(crc: u32, nbytes: usize) -> __m128i {
clmul_scalar_sse(crc, xnmodp_iscsi_sse((nbytes * 8 - 33) as u64))
}
#[inline]
#[target_feature(enable = "sse4.1")]
unsafe fn mm_extract_epi64(val: __m128i, idx: i32) -> u64 {
#[cfg(target_arch = "x86_64")]
{
if idx == 0 {
_mm_cvtsi128_si64(val) as u64
} else {
_mm_cvtsi128_si64(_mm_srli_si128(val, 8)) as u64
}
}
#[cfg(target_arch = "x86")]
{
let shifted = if idx == 0 {
val
} else {
_mm_srli_si128(val, 8)
};
let low = _mm_cvtsi128_si32(shifted) as u32;
let high = _mm_cvtsi128_si32(_mm_srli_si128(shifted, 4)) as u32;
(low as u64) | ((high as u64) << 32)
}
}
#[inline]
#[target_feature(enable = "sse4.2")]
unsafe fn mm_crc32_u64(crc: u32, val: u64) -> u32 {
#[cfg(target_arch = "x86_64")]
{
_mm_crc32_u64(crc.into(), val) as u32
}
#[cfg(target_arch = "x86")]
{
let low = val as u32;
let high = (val >> 32) as u32;
let crc = _mm_crc32_u32(crc, low);
_mm_crc32_u32(crc, high)
}
}
#[inline]
#[target_feature(enable = "sse4.2")]
pub unsafe fn crc32_iscsi_small_fast(mut crc: u32, data: &[u8]) -> u32 {
let (prefix, aligned, suffix) = data.align_to::<u64>();
for &byte in prefix {
crc = _mm_crc32_u8(crc, byte);
}
let mut chunks = aligned.chunks_exact(8);
for chunk in &mut chunks {
crc = mm_crc32_u64(crc, chunk[0]);
crc = mm_crc32_u64(crc, chunk[1]);
crc = mm_crc32_u64(crc, chunk[2]);
crc = mm_crc32_u64(crc, chunk[3]);
crc = mm_crc32_u64(crc, chunk[4]);
crc = mm_crc32_u64(crc, chunk[5]);
crc = mm_crc32_u64(crc, chunk[6]);
crc = mm_crc32_u64(crc, chunk[7]);
}
for &val in chunks.remainder() {
crc = mm_crc32_u64(crc, val);
}
for &byte in suffix {
crc = _mm_crc32_u8(crc, byte);
}
crc
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test::consts::TEST_CHECK_STRING;
use crc::{Crc, Table};
use rand::{rng, Rng};
const RUST_CRC32_ISCSI: Crc<u32, Table<16>> = Crc::<u32, Table<16>>::new(&crc::CRC_32_ISCSI);
#[test]
fn test_crc32_iscsi_check() {
if is_x86_feature_detected!("sse4.2") && is_x86_feature_detected!("pclmulqdq") {
assert_eq!(
crc32_iscsi(0xffffffff, TEST_CHECK_STRING) ^ 0xffffffff,
0xe3069283
);
}
}
#[test]
fn test_crc32_iscsi_small_fast_check() {
if is_x86_feature_detected!("sse4.2") {
unsafe {
assert_eq!(
crc32_iscsi_small_fast(0xffffffff, TEST_CHECK_STRING) ^ 0xffffffff,
0xe3069283
);
}
}
}
#[test]
fn test_crc32_iscsi_small_fast_all_lengths() {
if is_x86_feature_detected!("sse4.2") {
for len in 1..=255 {
test_crc32_iscsi_small_fast_random(len);
}
}
}
fn test_crc32_iscsi_small_fast_random(len: usize) {
let mut data = vec![0u8; len];
rng().fill(&mut data[..]);
let checksum = RUST_CRC32_ISCSI.checksum(&data);
if is_x86_feature_detected!("sse4.2") {
unsafe {
assert_eq!(
crc32_iscsi_small_fast(0xffffffff, &data) ^ 0xffffffff,
checksum
);
}
}
}
#[test]
fn test_crc32_iscsi_small_all_lengths() {
if is_x86_feature_detected!("sse4.2") && is_x86_feature_detected!("pclmulqdq") {
for len in 1..=255 {
test_crc32_iscsi_random(len);
}
}
}
#[test]
fn test_crc32_iscsi_medium_lengths() {
if is_x86_feature_detected!("sse4.2") && is_x86_feature_detected!("pclmulqdq") {
for len in 256..=1024 {
test_crc32_iscsi_random(len);
}
}
}
#[test]
fn test_crc32_iscsi_large_lengths() {
if is_x86_feature_detected!("sse4.2") && is_x86_feature_detected!("pclmulqdq") {
for len in 1048575..1048577 {
test_crc32_iscsi_random(len);
}
}
}
fn test_crc32_iscsi_random(len: usize) {
let mut data = vec![0u8; len];
rng().fill(&mut data[..]);
let checksum = RUST_CRC32_ISCSI.checksum(&data);
if is_x86_feature_detected!("sse4.2") && is_x86_feature_detected!("pclmulqdq") {
assert_eq!(crc32_iscsi(0xffffffff, &data) ^ 0xffffffff, checksum);
}
unsafe {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("vpclmulqdq")
&& is_x86_feature_detected!("avx512vl")
&& is_x86_feature_detected!("avx512f")
{
assert_eq!(
crc32_iscsi_avx512_vpclmulqdq_v3x2(0xffffffff, data.as_ptr(), data.len())
^ 0xffffffff,
checksum
);
}
if is_x86_feature_detected!("avx512vl")
&& is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("pclmulqdq")
{
assert_eq!(
crc32_iscsi_avx512_v4s3x3(0xffffffff, data.as_ptr(), data.len())
^ 0xffffffff,
checksum
);
}
}
assert_eq!(
crc32_iscsi_sse_v4s3x3(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff,
checksum
);
}
}
}