#[derive(Debug, PartialEq, Clone, Copy)]
enum Backend {
Hardware,
Software,
}
type TCrc = u32;
type TBuf = [u8];
#[derive(Debug, PartialEq, Clone, Copy)]
pub struct Crc32C(Backend);
impl Crc32C {
pub fn new() -> Self {
#[cfg(target_arch = "x86_64")]
if std::is_x86_feature_detected!("sse4.2") {
return Self(Backend::Hardware);
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("crc") {
return Self(Backend::Hardware);
}
Self(Backend::Software)
}
pub fn is_hardware_acceleration_available(&self) -> bool {
self.0 == Backend::Hardware
}
pub fn crc(&self, buf: &TBuf) -> TCrc {
match self.0 {
Backend::Software => crc32c_slice8(buf),
Backend::Hardware => unsafe { crc32c_hardware(buf) },
}
}
pub fn crc_2x(&self, buffers: [&TBuf; 2]) -> [TCrc; 2] {
match self.0 {
Backend::Software => crc32c_slice8_2x(buffers),
Backend::Hardware => unsafe { crc32c_hardware_2x(buffers) },
}
}
pub fn crc_4x(&self, buffers: [&TBuf; 4]) -> [TCrc; 4] {
match self.0 {
Backend::Software => crc32c_slice8_4x(buffers),
Backend::Hardware => unsafe { crc32c_hardware_4x(buffers) },
}
}
}
impl Default for Crc32C {
fn default() -> Self {
Self::new()
}
}
const CASTAGNOLI_POLYNOMIAL: TCrc = 0x82F63B78;
const CRC_TABLE: [[TCrc; 0x100]; 8] = {
let mut table = [[0; 0x100]; 8];
let mut i = 0;
while i < 0x100 {
let mut crc = i as TCrc;
let mut j = 0;
while j < 8 {
if (crc & 1) != 0 {
crc = (crc >> 1) ^ CASTAGNOLI_POLYNOMIAL;
} else {
crc >>= 1;
}
j += 1;
}
table[0][i] = crc;
i += 1;
}
let mut t = 1;
while t < 8 {
let mut n = 0;
while n < 0x100 {
let crc = table[t - 1][n];
table[t][n] = (crc >> 8) ^ table[0][(crc & 0xFF) as usize];
n += 1;
}
t += 1;
}
table
};
#[inline(always)]
fn crc32c_slice8(buf: &TBuf) -> TCrc {
debug_assert!(buf.len() & 7 == 0);
let table = &CRC_TABLE[..];
let mut crc: TCrc = !0;
let mut len = buf.len();
let mut ptr = buf.as_ptr();
while len > 0 {
let word: u64 = unsafe { core::ptr::read_unaligned(ptr as *const u64) };
let hi = (word >> 0x20) as TCrc;
let w = word as TCrc;
let x = crc ^ w;
crc = table[7][(x & 0xFF) as usize]
^ table[6][((x >> 8) & 0xFF) as usize]
^ table[5][((x >> 0x10) & 0xFF) as usize]
^ table[4][((x >> 0x18) & 0xFF) as usize]
^ table[3][(hi & 0xFF) as usize]
^ table[2][((hi >> 8) & 0xFF) as usize]
^ table[1][((hi >> 0x10) & 0xFF) as usize]
^ table[0][((hi >> 0x18) & 0xFF) as usize];
ptr = unsafe { ptr.add(8) };
len -= 8;
}
!crc
}
#[inline(always)]
fn crc32c_slice8_2x(buffers: [&TBuf; 2]) -> [TCrc; 2] {
let mut len = buffers[0].len();
debug_assert!(len & 7 == 0, "bytes_buf must be 8 bytes aligned");
debug_assert!(
buffers.iter().all(|b| b.len() == len),
"each buf in bytes_bufs must be of same length"
);
let table = &CRC_TABLE[..];
let mut crc0: TCrc = !0;
let mut crc1: TCrc = !0;
let mut p0 = buffers[0].as_ptr();
let mut p1 = buffers[1].as_ptr();
while len > 0 {
unsafe {
let w0: u64 = core::ptr::read_unaligned(p0 as *const u64);
let w1: u64 = core::ptr::read_unaligned(p1 as *const u64);
let lo0 = w0 as TCrc;
let lo1 = w1 as TCrc;
let hi0 = (w0 >> 0x20) as TCrc;
let hi1 = (w1 >> 0x20) as TCrc;
let x0 = crc0 ^ lo0;
let x1 = crc1 ^ lo1;
crc0 = table[7][(x0 & 0xFF) as usize]
^ table[6][((x0 >> 8) & 0xFF) as usize]
^ table[5][((x0 >> 0x10) & 0xFF) as usize]
^ table[4][((x0 >> 0x18) & 0xFF) as usize]
^ table[3][(hi0 & 0xFF) as usize]
^ table[2][((hi0 >> 8) & 0xFF) as usize]
^ table[1][((hi0 >> 0x10) & 0xFF) as usize]
^ table[0][((hi0 >> 0x18) & 0xFF) as usize];
crc1 = table[7][(x1 & 0xFF) as usize]
^ table[6][((x1 >> 8) & 0xFF) as usize]
^ table[5][((x1 >> 0x10) & 0xFF) as usize]
^ table[4][((x1 >> 0x18) & 0xFF) as usize]
^ table[3][(hi1 & 0xFF) as usize]
^ table[2][((hi1 >> 8) & 0xFF) as usize]
^ table[1][((hi1 >> 0x10) & 0xFF) as usize]
^ table[0][((hi1 >> 0x18) & 0xFF) as usize];
p0 = p0.add(8);
p1 = p1.add(8);
}
len -= 8;
}
[!crc0, !crc1]
}
#[inline(always)]
fn crc32c_slice8_4x(buffers: [&TBuf; 4]) -> [TCrc; 4] {
let mut len = buffers[0].len();
debug_assert!(len & 7 == 0, "bytes_buf must be 8 bytes aligned");
debug_assert!(
buffers.iter().all(|b| b.len() == len),
"each buf in bytes_bufs must be of same length"
);
let table = &CRC_TABLE[..];
let mut crc0: TCrc = !0;
let mut crc1: TCrc = !0;
let mut crc2: TCrc = !0;
let mut crc3: TCrc = !0;
let mut p0 = buffers[0].as_ptr();
let mut p1 = buffers[1].as_ptr();
let mut p2 = buffers[2].as_ptr();
let mut p3 = buffers[3].as_ptr();
while len > 0 {
unsafe {
let w0: u64 = core::ptr::read_unaligned(p0 as *const u64);
let w1: u64 = core::ptr::read_unaligned(p1 as *const u64);
let w2: u64 = core::ptr::read_unaligned(p2 as *const u64);
let w3: u64 = core::ptr::read_unaligned(p3 as *const u64);
let lo0 = w0 as TCrc;
let lo1 = w1 as TCrc;
let lo2 = w2 as TCrc;
let lo3 = w3 as TCrc;
let hi0 = (w0 >> 0x20) as TCrc;
let hi1 = (w1 >> 0x20) as TCrc;
let hi2 = (w2 >> 0x20) as TCrc;
let hi3 = (w3 >> 0x20) as TCrc;
let x0 = crc0 ^ lo0;
let x1 = crc1 ^ lo1;
let x2 = crc2 ^ lo2;
let x3 = crc3 ^ lo3;
crc0 = table[7][(x0 & 0xFF) as usize]
^ table[6][((x0 >> 8) & 0xFF) as usize]
^ table[5][((x0 >> 0x10) & 0xFF) as usize]
^ table[4][((x0 >> 0x18) & 0xFF) as usize]
^ table[3][(hi0 & 0xFF) as usize]
^ table[2][((hi0 >> 8) & 0xFF) as usize]
^ table[1][((hi0 >> 0x10) & 0xFF) as usize]
^ table[0][((hi0 >> 0x18) & 0xFF) as usize];
crc1 = table[7][(x1 & 0xFF) as usize]
^ table[6][((x1 >> 8) & 0xFF) as usize]
^ table[5][((x1 >> 0x10) & 0xFF) as usize]
^ table[4][((x1 >> 0x18) & 0xFF) as usize]
^ table[3][(hi1 & 0xFF) as usize]
^ table[2][((hi1 >> 8) & 0xFF) as usize]
^ table[1][((hi1 >> 0x10) & 0xFF) as usize]
^ table[0][((hi1 >> 0x18) & 0xFF) as usize];
crc2 = table[7][(x2 & 0xFF) as usize]
^ table[6][((x2 >> 8) & 0xFF) as usize]
^ table[5][((x2 >> 0x10) & 0xFF) as usize]
^ table[4][((x2 >> 0x18) & 0xFF) as usize]
^ table[3][(hi2 & 0xFF) as usize]
^ table[2][((hi2 >> 8) & 0xFF) as usize]
^ table[1][((hi2 >> 0x10) & 0xFF) as usize]
^ table[0][((hi2 >> 0x18) & 0xFF) as usize];
crc3 = table[7][(x3 & 0xFF) as usize]
^ table[6][((x3 >> 8) & 0xFF) as usize]
^ table[5][((x3 >> 0x10) & 0xFF) as usize]
^ table[4][((x3 >> 0x18) & 0xFF) as usize]
^ table[3][(hi3 & 0xFF) as usize]
^ table[2][((hi3 >> 8) & 0xFF) as usize]
^ table[1][((hi3 >> 0x10) & 0xFF) as usize]
^ table[0][((hi3 >> 0x18) & 0xFF) as usize];
p0 = p0.add(8);
p1 = p1.add(8);
p2 = p2.add(8);
p3 = p3.add(8);
}
len -= 8;
}
[!crc0, !crc1, !crc2, !crc3]
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn crc32c_hardware(buf: &TBuf) -> TCrc {
debug_assert!(buf.len() & 7 == 0, "bytes_buf must be 8 bytes aligned");
let mut crc: u64 = (!0u32) as u64;
let mut len = buf.len();
let mut ptr = buf.as_ptr();
while len > 0 {
let word = core::ptr::read_unaligned(ptr as *const u64);
crc = core::arch::x86_64::_mm_crc32_u64(crc, word);
ptr = ptr.add(8);
len -= 8;
}
(!crc) as TCrc
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn crc32c_hardware_2x(buffers: [&TBuf; 2]) -> [TCrc; 2] {
let mut len = buffers[0].len();
debug_assert!(len & 7 == 0, "bytes_buf must be 8 bytes aligned");
debug_assert!(
buffers.iter().all(|b| b.len() == len),
"each buf in bytes_bufs must be of same length"
);
let mut c0: u64 = (!0u32) as u64;
let mut c1: u64 = (!0u32) as u64;
let mut p0 = buffers[0].as_ptr();
let mut p1 = buffers[1].as_ptr();
while len > 0 {
let w0 = core::ptr::read_unaligned(p0 as *const u64);
let w1 = core::ptr::read_unaligned(p1 as *const u64);
c0 = core::arch::x86_64::_mm_crc32_u64(c0, w0);
c1 = core::arch::x86_64::_mm_crc32_u64(c1, w1);
p0 = p0.add(8);
p1 = p1.add(8);
len -= 8;
}
[!(c0 as TCrc), !(c1 as TCrc)]
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn crc32c_hardware_4x(buffers: [&TBuf; 4]) -> [TCrc; 4] {
let mut len = buffers[0].len();
debug_assert!(len & 7 == 0, "bytes_buf must be 8 bytes aligned");
debug_assert!(
buffers.iter().all(|b| b.len() == len),
"each buf in bytes_bufs must be of same length"
);
let mut c0: u64 = (!0u32) as u64;
let mut c1: u64 = (!0u32) as u64;
let mut c2: u64 = (!0u32) as u64;
let mut c3: u64 = (!0u32) as u64;
let mut p0 = buffers[0].as_ptr();
let mut p1 = buffers[1].as_ptr();
let mut p2 = buffers[2].as_ptr();
let mut p3 = buffers[3].as_ptr();
while len > 0 {
let w0 = core::ptr::read_unaligned(p0 as *const u64);
let w1 = core::ptr::read_unaligned(p1 as *const u64);
let w2 = core::ptr::read_unaligned(p2 as *const u64);
let w3 = core::ptr::read_unaligned(p3 as *const u64);
c0 = core::arch::x86_64::_mm_crc32_u64(c0, w0);
c1 = core::arch::x86_64::_mm_crc32_u64(c1, w1);
c2 = core::arch::x86_64::_mm_crc32_u64(c2, w2);
c3 = core::arch::x86_64::_mm_crc32_u64(c3, w3);
p0 = p0.add(8);
p1 = p1.add(8);
p2 = p2.add(8);
p3 = p3.add(8);
len -= 8;
}
[!(c0 as TCrc), !(c1 as TCrc), !(c2 as TCrc), !(c3 as TCrc)]
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "crc")]
unsafe fn crc32c_hardware(buf: &TBuf) -> TCrc {
debug_assert!(buf.len() & 7 == 0, "bytes_buf must be 8 bytes aligned");
let mut crc: TCrc = !0;
let mut len = buf.len();
let mut ptr = buf.as_ptr();
while len > 0 {
let word: u64 = core::ptr::read_unaligned(ptr as *const u64);
crc = core::arch::aarch64::__crc32cd(crc, word);
ptr = ptr.add(8);
len -= 8;
}
!crc
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "crc")]
unsafe fn crc32c_hardware_2x(buffers: [&TBuf; 2]) -> [TCrc; 2] {
let mut len = buffers[0].len();
debug_assert!(len & 7 == 0, "bytes_buf must be 8 bytes aligned");
debug_assert!(
buffers.iter().all(|b| b.len() == len),
"each buf in bytes_bufs must be of same length"
);
let mut c0: TCrc = !0;
let mut c1: TCrc = !0;
let mut p0 = buffers[0].as_ptr();
let mut p1 = buffers[1].as_ptr();
while len > 0 {
let w0: u64 = core::ptr::read_unaligned(p0 as *const u64);
let w1: u64 = core::ptr::read_unaligned(p1 as *const u64);
c0 = core::arch::aarch64::__crc32cd(c0, w0);
c1 = core::arch::aarch64::__crc32cd(c1, w1);
p0 = p0.add(8);
p1 = p1.add(8);
len -= 8;
}
[!c0, !c1]
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "crc")]
unsafe fn crc32c_hardware_4x(buffers: [&TBuf; 4]) -> [TCrc; 4] {
let mut len = buffers[0].len();
debug_assert!(len & 7 == 0, "bytes_buf must be 8 bytes aligned");
debug_assert!(
buffers.iter().all(|b| b.len() == len),
"each buf in bytes_bufs must be of same length"
);
let mut c0: TCrc = !0;
let mut c1: TCrc = !0;
let mut c2: TCrc = !0;
let mut c3: TCrc = !0;
let mut p0 = buffers[0].as_ptr();
let mut p1 = buffers[1].as_ptr();
let mut p2 = buffers[2].as_ptr();
let mut p3 = buffers[3].as_ptr();
while len > 0 {
let w0: u64 = core::ptr::read_unaligned(p0 as *const u64);
let w1: u64 = core::ptr::read_unaligned(p1 as *const u64);
let w2: u64 = core::ptr::read_unaligned(p2 as *const u64);
let w3: u64 = core::ptr::read_unaligned(p3 as *const u64);
c0 = core::arch::aarch64::__crc32cd(c0, w0);
c1 = core::arch::aarch64::__crc32cd(c1, w1);
c2 = core::arch::aarch64::__crc32cd(c2, w2);
c3 = core::arch::aarch64::__crc32cd(c3, w3);
p0 = p0.add(8);
p1 = p1.add(8);
p2 = p2.add(8);
p3 = p3.add(8);
len -= 8;
}
[!c0, !c1, !c2, !c3]
}
#[cfg(test)]
mod tests {
use super::*;
fn make_buf(len: usize, seed: u8) -> Vec<u8> {
(0..len).map(|i| seed.wrapping_add(i as u8)).collect()
}
#[inline]
fn is_simd_available() -> bool {
#[cfg(target_arch = "x86_64")]
return std::is_x86_feature_detected!("sse4.2");
#[cfg(target_arch = "aarch64")]
return std::arch::is_aarch64_feature_detected!("crc");
}
mod public_api {
use super::*;
#[test]
fn ok_known_crc_vectors() {
let crc = Crc32C::default();
assert_eq!(crc.crc(b"12345678"), 0x6087809A);
}
#[test]
fn ok_crc_is_deterministic() {
let crc = Crc32C::default();
let buf = make_buf(0x1000, 0x77);
let a = crc.crc(&buf);
let b = crc.crc(&buf);
let c = crc.crc(&buf);
assert_eq!(a, b);
assert_eq!(b, c);
}
#[test]
fn ok_different_buffers_have_different_crc() {
let crc = Crc32C::default();
let a = make_buf(0x1000, 0x10);
let b = make_buf(0x1000, 0x11);
assert_ne!(crc.crc(&a), crc.crc(&b));
}
#[test]
fn ok_parallel_crc_matches_single() {
let crc = Crc32C::default();
let b0 = make_buf(0x1000, 1);
let b1 = make_buf(0x1000, 2);
let b2 = make_buf(0x1000, 3);
let b3 = make_buf(0x1000, 4);
let s0 = crc.crc(&b0);
let s1 = crc.crc(&b1);
let s2 = crc.crc(&b2);
let s3 = crc.crc(&b3);
let p2 = crc.crc_2x([&b0, &b1]);
let p4 = crc.crc_4x([&b0, &b1, &b2, &b3]);
assert_eq!([s0, s1], p2);
assert_eq!([s0, s1, s2, s3], p4);
}
#[test]
fn ok_zero_buffer_crc() {
let crc = Crc32C::default();
let buf = vec![0u8; 0x1000];
let a = crc.crc(&buf);
let b = crc.crc(&buf);
assert_eq!(a, b);
}
}
mod hw_sw_consistency {
use super::*;
#[test]
fn ok_crc_single_buf() {
if !is_simd_available() {
return;
}
let buf = make_buf(0x1000, 0x0A);
let sw = crc32c_slice8(&buf);
let hw = unsafe { crc32c_hardware(&buf) };
assert_eq!(sw, hw);
}
#[test]
fn ok_crc_random_bufs() {
for seed in 0..0x20u8 {
let buf = make_buf(0x2000, seed);
let sw = crc32c_slice8(&buf);
let hw = unsafe { crc32c_hardware(&buf) };
assert_eq!(sw, hw);
}
}
#[test]
fn ok_crc_buf_2x() {
if !is_simd_available() {
return;
}
let b0 = make_buf(0x1000, 0x0A);
let b1 = make_buf(0x1000, 0x0B);
let sw = crc32c_slice8_2x([&b0, &b1]);
let hw = unsafe { crc32c_hardware_2x([&b0, &b1]) };
assert_eq!(sw, hw);
}
#[test]
fn ok_crc_buf_4x() {
if !is_simd_available() {
return;
}
let b0 = make_buf(0x1000, 0x0A);
let b1 = make_buf(0x1000, 0x0B);
let b2 = make_buf(0x1000, 0x0C);
let b3 = make_buf(0x1000, 0x0D);
let sw = crc32c_slice8_4x([&b0, &b1, &b2, &b3]);
let hw = unsafe { crc32c_hardware_4x([&b0, &b1, &b2, &b3]) };
assert_eq!(sw, hw);
}
}
mod corruption_detection {
use super::*;
#[test]
fn ok_single_bit_flip_changes_crc() {
let crc = Crc32C::default();
let mut buf = make_buf(0x1000, 0xAA);
let original = crc.crc(&buf);
buf[0x1A] ^= 1;
let corrupted = crc.crc(&buf);
assert_ne!(original, corrupted);
}
#[test]
fn ok_multiple_bit_flips_change_crc() {
let crc = Crc32C::default();
let mut buf = make_buf(0x2000, 0x11);
let original = crc.crc(&buf);
buf[0] ^= 0b0000_0001;
buf[0x12A] ^= 0b1000_0000;
buf[0x23B] ^= 0b0001_0000;
buf[0x100B] ^= 0b0100_0000;
let corrupted = crc.crc(&buf);
assert_ne!(original, corrupted);
}
#[test]
fn ok_detects_torn_write_simulation() {
let crc = Crc32C::default();
let mut buf = make_buf(0x1000, 0x55);
let original = crc.crc(&buf);
for b in &mut buf[0x80..0x100] {
*b = 0;
}
let corrupted = crc.crc(&buf);
assert_ne!(original, corrupted);
}
#[test]
fn ok_detects_random_corruption() {
let crc = Crc32C::default();
let mut buf = make_buf(0x1000, 0x42);
let original = crc.crc(&buf);
for i in (0..buf.len()).step_by(0x5F) {
buf[i] ^= 0xFF;
}
let corrupted = crc.crc(&buf);
assert_ne!(original, corrupted);
}
#[test]
fn ok_every_bit_flip_changes_crc() {
let crc = Crc32C::default();
let mut buf = make_buf(0x40, 0xAB);
let base = crc.crc(&buf);
for i in 0..buf.len() {
for bit in 0..8 {
buf[i] ^= 1 << bit;
assert_ne!(base, crc.crc(&buf));
buf[i] ^= 1 << bit;
}
}
}
}
}