#![cfg(target_arch = "aarch64")]
use crate::crc32::fusion::aarch64::{clmul_hi_and_xor, clmul_lo_and_xor};
use core::arch::aarch64::{
__crc32cb, __crc32cd, veorq_u64, vgetq_lane_u64, vld1q_u64, vmovq_n_u64, vsetq_lane_u64,
};
#[inline]
#[target_feature(enable = "crc,aes")]
pub(crate) unsafe fn crc32_iscsi_v12e_v1(mut crc0: u32, mut buf: *const u8, mut len: usize) -> u32 {
while len > 0 && (buf as usize & 7) != 0 {
crc0 = __crc32cb(crc0, *buf);
buf = buf.add(1);
len -= 1;
}
if (buf as usize & 8) != 0 && len >= 8 {
crc0 = __crc32cd(crc0, *(buf as *const u64));
buf = buf.add(8);
len -= 8;
}
if len >= 192 {
let end = buf.add(len);
let limit = buf.add(len - 192);
let mut x0 = vld1q_u64(buf as *const u64);
let mut x1 = vld1q_u64(buf.add(16) as *const u64);
let mut x2 = vld1q_u64(buf.add(32) as *const u64);
let mut x3 = vld1q_u64(buf.add(48) as *const u64);
let mut x4 = vld1q_u64(buf.add(64) as *const u64);
let mut x5 = vld1q_u64(buf.add(80) as *const u64);
let mut x6 = vld1q_u64(buf.add(96) as *const u64);
let mut x7 = vld1q_u64(buf.add(112) as *const u64);
let mut x8 = vld1q_u64(buf.add(128) as *const u64);
let mut x9 = vld1q_u64(buf.add(144) as *const u64);
let mut x10 = vld1q_u64(buf.add(160) as *const u64);
let mut x11 = vld1q_u64(buf.add(176) as *const u64);
let k_vals: [u64; 2] = [0xa87ab8a8, 0xab7aff2a];
let mut k = vld1q_u64(k_vals.as_ptr());
let crc_vec = vsetq_lane_u64(crc0 as u64, vmovq_n_u64(0), 0);
x0 = veorq_u64(crc_vec, x0);
buf = buf.add(192);
while buf <= limit {
let y0 = clmul_lo_and_xor(x0, k, vld1q_u64(buf as *const u64));
x0 = clmul_hi_and_xor(x0, k, y0);
let y1 = clmul_lo_and_xor(x1, k, vld1q_u64(buf.add(16) as *const u64));
x1 = clmul_hi_and_xor(x1, k, y1);
let y2 = clmul_lo_and_xor(x2, k, vld1q_u64(buf.add(32) as *const u64));
x2 = clmul_hi_and_xor(x2, k, y2);
let y3 = clmul_lo_and_xor(x3, k, vld1q_u64(buf.add(48) as *const u64));
x3 = clmul_hi_and_xor(x3, k, y3);
let y4 = clmul_lo_and_xor(x4, k, vld1q_u64(buf.add(64) as *const u64));
x4 = clmul_hi_and_xor(x4, k, y4);
let y5 = clmul_lo_and_xor(x5, k, vld1q_u64(buf.add(80) as *const u64));
x5 = clmul_hi_and_xor(x5, k, y5);
let y6 = clmul_lo_and_xor(x6, k, vld1q_u64(buf.add(96) as *const u64));
x6 = clmul_hi_and_xor(x6, k, y6);
let y7 = clmul_lo_and_xor(x7, k, vld1q_u64(buf.add(112) as *const u64));
x7 = clmul_hi_and_xor(x7, k, y7);
let y8 = clmul_lo_and_xor(x8, k, vld1q_u64(buf.add(128) as *const u64));
x8 = clmul_hi_and_xor(x8, k, y8);
let y9 = clmul_lo_and_xor(x9, k, vld1q_u64(buf.add(144) as *const u64));
x9 = clmul_hi_and_xor(x9, k, y9);
let y10 = clmul_lo_and_xor(x10, k, vld1q_u64(buf.add(160) as *const u64));
x10 = clmul_hi_and_xor(x10, k, y10);
let y11 = clmul_lo_and_xor(x11, k, vld1q_u64(buf.add(176) as *const u64));
x11 = clmul_hi_and_xor(x11, k, y11);
buf = buf.add(192);
}
let k_vals: [u64; 2] = [0xf20c0dfe, 0x493c7d27];
k = vld1q_u64(k_vals.as_ptr());
let y0 = clmul_lo_and_xor(x0, k, x1);
x0 = clmul_hi_and_xor(x0, k, y0);
let y2 = clmul_lo_and_xor(x2, k, x3);
x2 = clmul_hi_and_xor(x2, k, y2);
let y4 = clmul_lo_and_xor(x4, k, x5);
x4 = clmul_hi_and_xor(x4, k, y4);
let y6 = clmul_lo_and_xor(x6, k, x7);
x6 = clmul_hi_and_xor(x6, k, y6);
let y8 = clmul_lo_and_xor(x8, k, x9);
x8 = clmul_hi_and_xor(x8, k, y8);
let y10 = clmul_lo_and_xor(x10, k, x11);
x10 = clmul_hi_and_xor(x10, k, y10);
let k_vals: [u64; 2] = [0x3da6d0cb, 0xba4fc28e];
k = vld1q_u64(k_vals.as_ptr());
let y0 = clmul_lo_and_xor(x0, k, x2);
x0 = clmul_hi_and_xor(x0, k, y0);
let y4 = clmul_lo_and_xor(x4, k, x6);
x4 = clmul_hi_and_xor(x4, k, y4);
let y8 = clmul_lo_and_xor(x8, k, x10);
x8 = clmul_hi_and_xor(x8, k, y8);
let k_vals: [u64; 2] = [0x740eef02, 0x9e4addf8];
k = vld1q_u64(k_vals.as_ptr());
let y0 = clmul_lo_and_xor(x0, k, x4);
x0 = clmul_hi_and_xor(x0, k, y0);
x4 = x8;
let y0 = clmul_lo_and_xor(x0, k, x4);
x0 = clmul_hi_and_xor(x0, k, y0);
crc0 = __crc32cd(0, vgetq_lane_u64(x0, 0));
crc0 = __crc32cd(crc0, vgetq_lane_u64(x0, 1));
len = end.offset_from(buf) as usize;
}
if len >= 16 {
let mut x0 = vld1q_u64(buf as *const u64);
let k_vals: [u64; 2] = [0xf20c0dfe, 0x493c7d27];
let k = vld1q_u64(k_vals.as_ptr());
let crc_vec = vsetq_lane_u64(crc0 as u64, vmovq_n_u64(0), 0);
x0 = veorq_u64(crc_vec, x0);
buf = buf.add(16);
len -= 16;
while len >= 16 {
let y0 = clmul_lo_and_xor(x0, k, vld1q_u64(buf as *const u64));
x0 = clmul_hi_and_xor(x0, k, y0);
buf = buf.add(16);
len -= 16;
}
crc0 = __crc32cd(0, vgetq_lane_u64(x0, 0));
crc0 = __crc32cd(crc0, vgetq_lane_u64(x0, 1));
}
while len >= 8 {
crc0 = __crc32cd(crc0, *(buf as *const u64));
buf = buf.add(8);
len -= 8;
}
while len > 0 {
crc0 = __crc32cb(crc0, *buf);
buf = buf.add(1);
len -= 1;
}
crc0
}