use archmage::prelude::*;
use super::tables::CRC32_SLICE8_TABLE;
#[must_use]
#[allow(unexpected_cfgs)]
pub fn crc32(crc: u32, data: &[u8]) -> u32 {
if data.is_empty() {
return crc;
}
#[cfg(feature = "avx512")]
{
!incant!(crc32_impl(!crc, data), [v4x, x64_crypto, neon_aes])
}
#[cfg(not(feature = "avx512"))]
{
!incant!(crc32_impl(!crc, data), [x64_crypto, neon_aes])
}
}
#[must_use]
pub fn crc32_combine(crc1: u32, crc2: u32, len2: usize) -> u32 {
if len2 == 0 {
return crc1;
}
let mut even = [0u32; 32];
let mut odd = [0u32; 32];
odd[0] = CRC32_POLY;
let mut row = 1u32;
for item in &mut odd[1..] {
*item = row;
row <<= 1;
}
gf2_matrix_square(&mut even, &odd);
gf2_matrix_square(&mut odd, &even);
let mut result = crc1;
let mut n = len2;
loop {
gf2_matrix_square(&mut even, &odd);
if n & 1 != 0 {
result = gf2_matrix_times(&even, result);
}
n >>= 1;
if n == 0 {
break;
}
gf2_matrix_square(&mut odd, &even);
if n & 1 != 0 {
result = gf2_matrix_times(&odd, result);
}
n >>= 1;
if n == 0 {
break;
}
}
result ^ crc2
}
const CRC32_POLY: u32 = 0xEDB88320;
fn gf2_matrix_times(mat: &[u32; 32], mut vec: u32) -> u32 {
let mut sum = 0u32;
let mut i = 0;
while vec != 0 {
if vec & 1 != 0 {
sum ^= mat[i];
}
vec >>= 1;
i += 1;
}
sum
}
fn gf2_matrix_square(square: &mut [u32; 32], mat: &[u32; 32]) {
for n in 0..32 {
square[n] = gf2_matrix_times(mat, mat[n]);
}
}
const CRC32_X4127_MODG: i64 = 0x1072db28_u32 as i64;
const CRC32_X4063_MODG: i64 = 0x0c30f51d_u32 as i64;
const CRC32_X2079_MODG: i64 = 0xce3371cb_u32 as i64;
const CRC32_X2015_MODG: i64 = 0xe95c1271_u32 as i64;
const CRC32_X1055_MODG: i64 = 0x33fff533;
const CRC32_X991_MODG: i64 = 0x910eeec1;
const CRC32_X543_MODG: i64 = 0x8f352d95;
const CRC32_X479_MODG: i64 = 0x1d9513d7;
const CRC32_X287_MODG: i64 = 0xf1da05aa;
const CRC32_X223_MODG: i64 = 0x81256527;
const CRC32_X159_MODG: i64 = 0xae689191;
const CRC32_X95_MODG: i64 = 0xccaa009e;
const CRC32_BARRETT_1: i64 = 0xb4e5b025f7011641u64 as i64; const CRC32_BARRETT_2: i64 = 0x00000001db710641u64 as i64;
macro_rules! fold128 {
($src:expr, $dst:expr, $mults:expr) => {{
let src = $src;
let mults = $mults;
_mm_xor_si128(
_mm_xor_si128($dst, _mm_clmulepi64_si128(src, mults, 0x00)),
_mm_clmulepi64_si128(src, mults, 0x11),
)
}};
}
macro_rules! barrett_reduce {
($x0:expr, $mults_128b:expr, $barrett:expr) => {{
let x0 = _mm_xor_si128(
_mm_clmulepi64_si128($x0, $mults_128b, 0x10),
_mm_bsrli_si128::<8>($x0),
);
let x1 = _mm_clmulepi64_si128(x0, $barrett, 0x00);
let x1 = _mm_clmulepi64_si128(x1, $barrett, 0x10);
_mm_extract_epi32::<2>(_mm_xor_si128(x0, x1)) as u32
}};
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
#[arcane]
#[allow(clippy::incompatible_msrv)] fn crc32_impl_v4x(_token: X64V4xToken, crc: u32, data: &[u8]) -> u32 {
let len = data.len();
if len < 64 {
return crc32_slice8(crc, data);
}
let mults_8v = _mm512_set_epi64(
CRC32_X4063_MODG,
CRC32_X4127_MODG,
CRC32_X4063_MODG,
CRC32_X4127_MODG,
CRC32_X4063_MODG,
CRC32_X4127_MODG,
CRC32_X4063_MODG,
CRC32_X4127_MODG,
);
let mults_4v = _mm512_set_epi64(
CRC32_X2015_MODG,
CRC32_X2079_MODG,
CRC32_X2015_MODG,
CRC32_X2079_MODG,
CRC32_X2015_MODG,
CRC32_X2079_MODG,
CRC32_X2015_MODG,
CRC32_X2079_MODG,
);
let mults_2v = _mm512_set_epi64(
CRC32_X991_MODG,
CRC32_X1055_MODG,
CRC32_X991_MODG,
CRC32_X1055_MODG,
CRC32_X991_MODG,
CRC32_X1055_MODG,
CRC32_X991_MODG,
CRC32_X1055_MODG,
);
let mults_1v = _mm512_set_epi64(
CRC32_X479_MODG,
CRC32_X543_MODG,
CRC32_X479_MODG,
CRC32_X543_MODG,
CRC32_X479_MODG,
CRC32_X543_MODG,
CRC32_X479_MODG,
CRC32_X543_MODG,
);
let mults_256b = _mm256_set_epi64x(
CRC32_X223_MODG,
CRC32_X287_MODG,
CRC32_X223_MODG,
CRC32_X287_MODG,
);
let mults_128b = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
let barrett = _mm_set_epi64x(CRC32_BARRETT_2, CRC32_BARRETT_1);
macro_rules! fold512 {
($src:expr, $dst:expr, $mults:expr) => {{
let src = $src;
let mults = $mults;
_mm512_ternarylogic_epi32(
_mm512_clmulepi64_epi128(src, mults, 0x00),
_mm512_clmulepi64_epi128(src, mults, 0x11),
$dst,
0x96,
)
}};
}
macro_rules! fold256 {
($src:expr, $dst:expr, $mults:expr) => {{
let src = $src;
let mults = $mults;
_mm256_ternarylogic_epi32(
_mm256_clmulepi64_epi128(src, mults, 0x00),
_mm256_clmulepi64_epi128(src, mults, 0x11),
$dst,
0x96,
)
}};
}
#[inline(always)]
fn ld64(data: &[u8], off: usize) -> &[u8; 64] {
data[off..off + 64].try_into().unwrap()
}
#[inline(always)]
fn ld32(data: &[u8], off: usize) -> &[u8; 32] {
data[off..off + 32].try_into().unwrap()
}
#[inline(always)]
fn ld16(data: &[u8], off: usize) -> &[u8; 16] {
data[off..off + 16].try_into().unwrap()
}
let mut p = data;
let crc_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, crc as i32);
let mut x0 = _mm512_xor_si512(_mm512_loadu_si512(ld64(p, 0)), crc_v);
p = &p[64..];
if p.len() >= 448 {
let mut v0 = x0;
let mut v1 = _mm512_loadu_si512(ld64(p, 0));
let mut v2 = _mm512_loadu_si512(ld64(p, 64));
let mut v3 = _mm512_loadu_si512(ld64(p, 128));
let mut v4 = _mm512_loadu_si512(ld64(p, 192));
let mut v5 = _mm512_loadu_si512(ld64(p, 256));
let mut v6 = _mm512_loadu_si512(ld64(p, 320));
let mut v7 = _mm512_loadu_si512(ld64(p, 384));
p = &p[448..];
while p.len() >= 512 {
v0 = fold512!(v0, _mm512_loadu_si512(ld64(p, 0)), mults_8v);
v1 = fold512!(v1, _mm512_loadu_si512(ld64(p, 64)), mults_8v);
v2 = fold512!(v2, _mm512_loadu_si512(ld64(p, 128)), mults_8v);
v3 = fold512!(v3, _mm512_loadu_si512(ld64(p, 192)), mults_8v);
v4 = fold512!(v4, _mm512_loadu_si512(ld64(p, 256)), mults_8v);
v5 = fold512!(v5, _mm512_loadu_si512(ld64(p, 320)), mults_8v);
v6 = fold512!(v6, _mm512_loadu_si512(ld64(p, 384)), mults_8v);
v7 = fold512!(v7, _mm512_loadu_si512(ld64(p, 448)), mults_8v);
p = &p[512..];
}
v0 = fold512!(v0, v4, mults_4v);
v1 = fold512!(v1, v5, mults_4v);
v2 = fold512!(v2, v6, mults_4v);
v3 = fold512!(v3, v7, mults_4v);
if p.len() >= 256 {
v0 = fold512!(v0, _mm512_loadu_si512(ld64(p, 0)), mults_4v);
v1 = fold512!(v1, _mm512_loadu_si512(ld64(p, 64)), mults_4v);
v2 = fold512!(v2, _mm512_loadu_si512(ld64(p, 128)), mults_4v);
v3 = fold512!(v3, _mm512_loadu_si512(ld64(p, 192)), mults_4v);
p = &p[256..];
}
v0 = fold512!(v0, v2, mults_2v);
v1 = fold512!(v1, v3, mults_2v);
if p.len() >= 128 {
v0 = fold512!(v0, _mm512_loadu_si512(ld64(p, 0)), mults_2v);
v1 = fold512!(v1, _mm512_loadu_si512(ld64(p, 64)), mults_2v);
p = &p[128..];
}
x0 = fold512!(v0, v1, mults_1v);
if p.len() >= 64 {
x0 = fold512!(x0, _mm512_loadu_si512(ld64(p, 0)), mults_1v);
p = &p[64..];
}
} else {
while p.len() >= 64 {
x0 = fold512!(x0, _mm512_loadu_si512(ld64(p, 0)), mults_1v);
p = &p[64..];
}
}
let x256_lo = _mm512_castsi512_si256(x0);
let x256_hi = _mm512_extracti64x4_epi64(x0, 1);
let mut y0 = fold256!(x256_lo, x256_hi, mults_256b);
if p.len() >= 32 {
y0 = fold256!(y0, _mm256_loadu_si256(ld32(p, 0)), mults_256b);
p = &p[32..];
}
let x128_lo = _mm256_castsi256_si128(y0);
let x128_hi = _mm256_extracti128_si256(y0, 1);
let mut x128 = fold128!(x128_lo, x128_hi, mults_128b);
if p.len() >= 16 {
x128 = fold128!(x128, _mm_loadu_si128(ld16(p, 0)), mults_128b);
p = &p[16..];
}
if !p.is_empty() {
let partial = barrett_reduce!(x128, mults_128b, barrett);
return crc32_slice8(partial, p);
}
barrett_reduce!(x128, mults_128b, barrett)
}
#[cfg(target_arch = "x86_64")]
#[arcane]
fn crc32_impl_x64_crypto(_token: X64CryptoToken, crc: u32, data: &[u8]) -> u32 {
let len = data.len();
if len < 16 {
return crc32_slice8(crc, data);
}
#[inline(always)]
fn chunk16(data: &[u8], offset: usize) -> &[u8; 16] {
data[offset..offset + 16].try_into().unwrap()
}
let mults_8v = _mm_set_epi64x(CRC32_X991_MODG, CRC32_X1055_MODG);
let mults_4v = _mm_set_epi64x(CRC32_X479_MODG, CRC32_X543_MODG);
let mults_2v = _mm_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG);
let mults_1v = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
let barrett = _mm_set_epi64x(CRC32_BARRETT_2, CRC32_BARRETT_1);
let mut x0 = _mm_xor_si128(
_mm_loadu_si128(chunk16(data, 0)),
_mm_cvtsi32_si128(crc as i32),
);
let mut pos = 16;
if len >= 128 {
let mut v0 = x0;
let mut v1 = _mm_loadu_si128(chunk16(data, 16));
let mut v2 = _mm_loadu_si128(chunk16(data, 32));
let mut v3 = _mm_loadu_si128(chunk16(data, 48));
let mut v4 = _mm_loadu_si128(chunk16(data, 64));
let mut v5 = _mm_loadu_si128(chunk16(data, 80));
let mut v6 = _mm_loadu_si128(chunk16(data, 96));
let mut v7 = _mm_loadu_si128(chunk16(data, 112));
pos = 128;
while pos + 128 <= len {
v0 = fold128!(v0, _mm_loadu_si128(chunk16(data, pos)), mults_8v);
v1 = fold128!(v1, _mm_loadu_si128(chunk16(data, pos + 16)), mults_8v);
v2 = fold128!(v2, _mm_loadu_si128(chunk16(data, pos + 32)), mults_8v);
v3 = fold128!(v3, _mm_loadu_si128(chunk16(data, pos + 48)), mults_8v);
v4 = fold128!(v4, _mm_loadu_si128(chunk16(data, pos + 64)), mults_8v);
v5 = fold128!(v5, _mm_loadu_si128(chunk16(data, pos + 80)), mults_8v);
v6 = fold128!(v6, _mm_loadu_si128(chunk16(data, pos + 96)), mults_8v);
v7 = fold128!(v7, _mm_loadu_si128(chunk16(data, pos + 112)), mults_8v);
pos += 128;
}
v0 = fold128!(v0, v4, mults_4v);
v1 = fold128!(v1, v5, mults_4v);
v2 = fold128!(v2, v6, mults_4v);
v3 = fold128!(v3, v7, mults_4v);
if pos + 64 <= len {
v0 = fold128!(v0, _mm_loadu_si128(chunk16(data, pos)), mults_4v);
v1 = fold128!(v1, _mm_loadu_si128(chunk16(data, pos + 16)), mults_4v);
v2 = fold128!(v2, _mm_loadu_si128(chunk16(data, pos + 32)), mults_4v);
v3 = fold128!(v3, _mm_loadu_si128(chunk16(data, pos + 48)), mults_4v);
pos += 64;
}
v0 = fold128!(v0, v2, mults_2v);
v1 = fold128!(v1, v3, mults_2v);
if pos + 32 <= len {
v0 = fold128!(v0, _mm_loadu_si128(chunk16(data, pos)), mults_2v);
v1 = fold128!(v1, _mm_loadu_si128(chunk16(data, pos + 16)), mults_2v);
pos += 32;
}
x0 = fold128!(v0, v1, mults_1v);
if pos + 16 <= len {
x0 = fold128!(x0, _mm_loadu_si128(chunk16(data, pos)), mults_1v);
pos += 16;
}
} else {
while pos + 16 <= len {
x0 = fold128!(x0, _mm_loadu_si128(chunk16(data, pos)), mults_1v);
pos += 16;
}
}
let tail = len - pos;
if tail > 0 {
let partial = barrett_reduce!(x0, mults_1v, barrett);
return crc32_slice8(partial, &data[pos..]);
}
barrett_reduce!(x0, mults_1v, barrett)
}
#[cfg(target_arch = "aarch64")]
macro_rules! neon_clmul_low {
($a:expr, $b:expr) => {{
let a = $a;
let b = $b;
vreinterpretq_u8_p128(vmull_p64(
vgetq_lane_p64(vreinterpretq_p64_u8(a), 0),
vgetq_lane_p64(b, 0),
))
}};
}
#[cfg(target_arch = "aarch64")]
macro_rules! neon_clmul_high {
($a:expr, $b:expr) => {{
let a = $a;
let b = $b;
vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(a), b))
}};
}
#[cfg(target_arch = "aarch64")]
macro_rules! neon_fold_vec {
($src:expr, $dst:expr, $mults:expr) => {{
let src = $src;
let mults = $mults;
let a = neon_clmul_low!(src, mults);
let b = neon_clmul_high!(src, mults);
veorq_u8(veorq_u8(a, b), $dst)
}};
}
#[cfg(target_arch = "aarch64")]
macro_rules! neon_barrett_reduce {
($v0:expr, $c0:expr, $c1:expr, $c2:expr) => {{
let zero = vdupq_n_u8(0);
let x0 = veorq_u8(neon_clmul_low!($v0, $c0), vextq_u8($v0, zero, 8));
let x1 = neon_clmul_low!(x0, $c1);
let x1 = neon_clmul_low!(x1, $c2);
vgetq_lane_u32(vreinterpretq_u32_u8(veorq_u8(x0, x1)), 2)
}};
}
#[cfg(target_arch = "aarch64")]
#[arcane]
fn crc32_impl_neon_aes(_token: NeonAesToken, crc: u32, data: &[u8]) -> u32 {
let len = data.len();
if len < 16 {
return crc32_slice8(crc, data);
}
static MULTS_1V: [u64; 2] = [CRC32_X159_MODG as u64, CRC32_X95_MODG as u64];
static MULTS_4V: [u64; 2] = [CRC32_X543_MODG as u64, CRC32_X479_MODG as u64];
static MULTS_2V: [u64; 2] = [CRC32_X287_MODG as u64, CRC32_X223_MODG as u64];
static BARRETT_0: [u64; 2] = [CRC32_X95_MODG as u64, 0];
static BARRETT_1: [u64; 2] = [CRC32_BARRETT_1 as u64, 0];
static BARRETT_2: [u64; 2] = [CRC32_BARRETT_2 as u64, 0];
let multipliers_1 = vreinterpretq_p64_u64(vld1q_u64(&MULTS_1V));
let multipliers_4 = vreinterpretq_p64_u64(vld1q_u64(&MULTS_4V));
let multipliers_2 = vreinterpretq_p64_u64(vld1q_u64(&MULTS_2V));
let barrett_c0 = vreinterpretq_p64_u64(vld1q_u64(&BARRETT_0));
let barrett_c1 = vreinterpretq_p64_u64(vld1q_u64(&BARRETT_1));
let barrett_c2 = vreinterpretq_p64_u64(vld1q_u64(&BARRETT_2));
#[inline(always)]
fn ld16(data: &[u8], off: usize) -> &[u8; 16] {
data[off..off + 16].try_into().unwrap()
}
let mut p = data;
let crc_vec = vreinterpretq_u8_u32(vsetq_lane_u32(crc, vdupq_n_u32(0), 0));
let mut v0 = veorq_u8(vld1q_u8(ld16(p, 0)), crc_vec);
p = &p[16..];
if p.len() >= 48 {
let mut v1 = vld1q_u8(ld16(p, 0));
let mut v2 = vld1q_u8(ld16(p, 16));
let mut v3 = vld1q_u8(ld16(p, 32));
p = &p[48..];
while p.len() >= 64 {
v0 = neon_fold_vec!(v0, vld1q_u8(ld16(p, 0)), multipliers_4);
v1 = neon_fold_vec!(v1, vld1q_u8(ld16(p, 16)), multipliers_4);
v2 = neon_fold_vec!(v2, vld1q_u8(ld16(p, 32)), multipliers_4);
v3 = neon_fold_vec!(v3, vld1q_u8(ld16(p, 48)), multipliers_4);
p = &p[64..];
}
v0 = neon_fold_vec!(v0, v2, multipliers_2);
v1 = neon_fold_vec!(v1, v3, multipliers_2);
if p.len() >= 32 {
v0 = neon_fold_vec!(v0, vld1q_u8(ld16(p, 0)), multipliers_2);
v1 = neon_fold_vec!(v1, vld1q_u8(ld16(p, 16)), multipliers_2);
p = &p[32..];
}
v0 = neon_fold_vec!(v0, v1, multipliers_1);
if p.len() >= 16 {
v0 = neon_fold_vec!(v0, vld1q_u8(ld16(p, 0)), multipliers_1);
p = &p[16..];
}
} else {
while p.len() >= 16 {
v0 = neon_fold_vec!(v0, vld1q_u8(ld16(p, 0)), multipliers_1);
p = &p[16..];
}
}
if !p.is_empty() {
let partial = neon_barrett_reduce!(v0, barrett_c0, barrett_c1, barrett_c2);
return crc32_slice8(partial, p);
}
neon_barrett_reduce!(v0, barrett_c0, barrett_c1, barrett_c2)
}
fn crc32_impl_scalar(_token: ScalarToken, crc: u32, data: &[u8]) -> u32 {
crc32_slice8(crc, data)
}
fn crc32_slice8(mut crc: u32, data: &[u8]) -> u32 {
let table = &CRC32_SLICE8_TABLE;
let lead = data.len() % 8;
for &b in &data[..lead] {
crc = (crc >> 8) ^ table[((crc as u8) ^ b) as usize];
}
for chunk in data[lead..].chunks_exact(8) {
let v1 = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
let v2 = u32::from_le_bytes([chunk[4], chunk[5], chunk[6], chunk[7]]);
#[allow(clippy::identity_op)]
{
crc = table[0x700 + ((crc ^ v1) as u8) as usize]
^ table[0x600 + (((crc ^ v1) >> 8) as u8) as usize]
^ table[0x500 + (((crc ^ v1) >> 16) as u8) as usize]
^ table[0x400 + (((crc ^ v1) >> 24) as u8) as usize]
^ table[0x300 + (v2 as u8) as usize]
^ table[0x200 + ((v2 >> 8) as u8) as usize]
^ table[0x100 + ((v2 >> 16) as u8) as usize]
^ table[0x000 + ((v2 >> 24) as u8) as usize];
}
}
crc
}
#[derive(Clone, Debug)]
pub struct Crc32Hasher {
crc: u32,
amount: u64,
}
impl Crc32Hasher {
pub fn new() -> Self {
Self { crc: 0, amount: 0 }
}
pub fn new_with_initial(init: u32) -> Self {
Self {
crc: init,
amount: 0,
}
}
#[must_use]
pub fn hash(data: &[u8]) -> u32 {
crc32(0, data)
}
pub fn update(&mut self, buf: &[u8]) {
self.crc = crc32(self.crc, buf);
self.amount += buf.len() as u64;
}
#[must_use]
pub fn finalize(&self) -> u32 {
self.crc
}
pub fn reset(&mut self) {
self.crc = 0;
self.amount = 0;
}
pub fn combine(&mut self, other: &Self) {
self.crc = crc32_combine(self.crc, other.crc, other.amount as usize);
self.amount += other.amount;
}
pub fn amount(&self) -> u64 {
self.amount
}
}
impl Default for Crc32Hasher {
fn default() -> Self {
Self::new()
}
}
impl core::hash::Hasher for Crc32Hasher {
fn finish(&self) -> u64 {
self.crc as u64
}
fn write(&mut self, bytes: &[u8]) {
self.update(bytes);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_initial_value() {
assert_eq!(crc32(0, &[]), 0);
}
#[test]
fn test_known_value() {
assert_eq!(crc32(0, b"123456789"), 0xCBF43926);
}
#[test]
fn test_incremental() {
let data = b"Hello World";
let full = crc32(0, data);
let partial = crc32(0, &data[..5]);
let incremental = crc32(partial, &data[5..]);
assert_eq!(full, incremental);
}
#[test]
fn hasher_new_update_finalize() {
let mut h = Crc32Hasher::new();
h.update(b"Hello");
h.update(b" World");
assert_eq!(h.finalize(), crc32(0, b"Hello World"));
assert_eq!(h.amount(), 11);
}
#[test]
fn hasher_default() {
let h = Crc32Hasher::default();
assert_eq!(h.finalize(), 0);
assert_eq!(h.amount(), 0);
}
#[test]
fn hasher_new_with_initial() {
let partial = crc32(0, b"Hello");
let mut h = Crc32Hasher::new_with_initial(partial);
h.update(b" World");
assert_eq!(h.finalize(), crc32(0, b"Hello World"));
}
#[test]
fn hasher_hash_one_shot() {
assert_eq!(Crc32Hasher::hash(b"123456789"), 0xCBF43926);
assert_eq!(Crc32Hasher::hash(b"123456789"), crc32(0, b"123456789"));
}
#[test]
fn hasher_reset() {
let mut h = Crc32Hasher::new();
h.update(b"data");
h.reset();
assert_eq!(h.finalize(), 0);
assert_eq!(h.amount(), 0);
}
#[test]
fn hasher_combine() {
let mut h1 = Crc32Hasher::new();
h1.update(b"Hello, ");
let mut h2 = Crc32Hasher::new();
h2.update(b"World!");
h1.combine(&h2);
assert_eq!(h1.finalize(), crc32(0, b"Hello, World!"));
assert_eq!(h1.amount(), 13);
}
#[test]
fn hasher_core_hash_hasher_trait() {
use core::hash::Hasher;
let mut h = Crc32Hasher::new();
Hasher::write(&mut h, b"Hello World");
assert_eq!(Hasher::finish(&h), crc32(0, b"Hello World") as u64);
}
#[test]
fn hasher_clone() {
let mut h = Crc32Hasher::new();
h.update(b"Hello");
let h2 = h.clone();
assert_eq!(h.finalize(), h2.finalize());
assert_eq!(h.amount(), h2.amount());
}
#[test]
fn hasher_empty_update() {
let mut h = Crc32Hasher::new();
h.update(b"");
assert_eq!(h.finalize(), 0);
assert_eq!(h.amount(), 0);
}
}
#[cfg(all(test, not(miri)))]
mod parity {
use super::*;
fn check_parity(data: &[u8]) {
let ours = crc32(0, data);
let theirs = libdeflater::crc32(data);
assert_eq!(ours, theirs, "crc32 mismatch for {} bytes", data.len());
}
fn check_parity_incremental(data: &[u8], split: usize) {
let split = split.min(data.len());
let ours = {
let c = crc32(0, &data[..split]);
crc32(c, &data[split..])
};
let theirs = libdeflater::crc32(data);
assert_eq!(
ours,
theirs,
"incremental crc32 mismatch for {} bytes split at {}",
data.len(),
split
);
}
#[test]
fn parity_empty() {
check_parity(&[]);
}
#[test]
fn parity_single_byte() {
for b in 0..=255u8 {
check_parity(&[b]);
}
}
#[test]
fn parity_all_zeros() {
for &len in &[1, 100, 8, 16, 64, 65536] {
check_parity(&vec![0u8; len]);
}
}
#[test]
fn parity_all_ff() {
for &len in &[1, 100, 8, 16, 64, 65536] {
check_parity(&vec![0xFFu8; len]);
}
}
#[test]
fn parity_sequential() {
let data: Vec<u8> = (0..=255).cycle().take(100_000).collect();
check_parity(&data);
}
#[test]
fn parity_alignment_variants() {
for offset in 0..32 {
let data: Vec<u8> = (0..=255).cycle().take(1000 + offset).collect();
check_parity(&data);
}
}
#[test]
fn parity_incremental() {
let data: Vec<u8> = (0..=255).cycle().take(20_000).collect();
for &split in &[0, 1, 7, 8, 9, 15, 16, 17, 100, 127, 128, 129, 10000, 20000] {
check_parity_incremental(&data, split);
}
}
#[test]
fn parity_large() {
let data: Vec<u8> = (0..=255).cycle().take(1_000_000).collect();
check_parity(&data);
}
#[test]
fn parity_simd_boundaries() {
for len in (0..300).chain([512, 1024, 4096, 65536].iter().copied()) {
let data: Vec<u8> = (0..=255).cycle().take(len).collect();
check_parity(&data);
}
}
#[test]
#[cfg(target_arch = "x86_64")]
fn pclmulqdq_compiles_in_arcane() {
use archmage::prelude::*;
#[arcane]
fn clmul_test(_token: X64CryptoToken, a: u64, b: u64) -> u64 {
let va = _mm_set_epi64x(0, a as i64);
let vb = _mm_set_epi64x(0, b as i64);
let result = _mm_clmulepi64_si128(va, vb, 0x00);
_mm_extract_epi64(result, 0) as u64
}
let Some(token) = X64CryptoToken::summon() else {
eprintln!("skipping pclmulqdq test: X64CryptoToken not available on this CPU");
return;
};
let result = clmul_test(token, 7, 3);
assert_eq!(result, 9);
}
#[test]
fn test_crc32_combine_basic() {
let data1 = b"Hello, ";
let data2 = b"World!";
let full = b"Hello, World!";
let crc_full = super::crc32(0, full);
let crc1 = super::crc32(0, data1);
let crc2 = super::crc32(0, data2);
let combined = super::crc32_combine(crc1, crc2, data2.len());
assert_eq!(combined, crc_full);
}
#[test]
fn test_crc32_combine_large() {
let data: Vec<u8> = (0..=255).cycle().take(100_000).collect();
for split in [1, 100, 1000, 32768, 50000, 99999] {
let (a, b) = data.split_at(split);
let crc_full = super::crc32(0, &data);
let crc1 = super::crc32(0, a);
let crc2 = super::crc32(0, b);
let combined = super::crc32_combine(crc1, crc2, b.len());
assert_eq!(combined, crc_full, "failed at split={split}");
}
}
#[test]
fn test_crc32_combine_empty() {
let data = b"test data";
let crc = super::crc32(0, data);
assert_eq!(super::crc32_combine(crc, 0, 0), crc);
assert_eq!(super::crc32_combine(0, crc, data.len()), crc);
}
#[test]
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
fn avx512_modern_crc32_if_available() {
use archmage::prelude::*;
if let Some(_token) = X64V4xToken::summon() {
let data: Vec<u8> = (0..=255).cycle().take(8192).collect();
let expected = super::crc32(0, &data);
assert_eq!(expected, libdeflater::crc32(&data));
}
}
#[test]
fn hasher_parity_with_libdeflater() {
let data: Vec<u8> = (0..=255).cycle().take(100_000).collect();
let expected = libdeflater::crc32(&data);
let mut h = Crc32Hasher::new();
h.update(&data);
assert_eq!(h.finalize(), expected);
let mut h = Crc32Hasher::new();
for chunk in data.chunks(1337) {
h.update(chunk);
}
assert_eq!(h.finalize(), expected);
let (a, b) = data.split_at(50_000);
let mut h1 = Crc32Hasher::new();
h1.update(a);
let mut h2 = Crc32Hasher::new();
h2.update(b);
h1.combine(&h2);
assert_eq!(h1.finalize(), expected);
assert_eq!(Crc32Hasher::hash(&data), expected);
}
#[test]
fn crc32_all_simd_tiers() {
use archmage::testing::{CompileTimePolicy, for_each_token_permutation};
let sizes = [
0, 1, 7, 8, 15, 16, 17, 32, 64, 127, 128, 129, 256, 512, 100_000,
];
let reference: Vec<u32> = sizes
.iter()
.map(|&sz| {
let data: Vec<u8> = (0..=255u8).cycle().take(sz).collect();
libdeflater::crc32(&data)
})
.collect();
let report = for_each_token_permutation(CompileTimePolicy::Warn, |perm| {
for (i, &sz) in sizes.iter().enumerate() {
let data: Vec<u8> = (0..=255u8).cycle().take(sz).collect();
let result = super::crc32(0, &data);
assert_eq!(
result, reference[i],
"crc32 mismatch at size={sz}, tier: {perm}"
);
}
});
eprintln!("crc32 permutation test: {report}");
}
}