use core::arch::x86_64::*;
pub fn verify(a: &[u8], b: &[u8], mut len: usize) -> u8
{
let (mut f, mut g);
let mut r: u64;
unsafe {
let mut h = _mm256_setzero_si256();
for i in 0..(len/32) {
f = _mm256_loadu_si256(a[32*i..].as_ptr() as *const __m256i);
g = _mm256_loadu_si256(b[32*i..].as_ptr() as *const __m256i);
f = _mm256_xor_si256(f,g);
h = _mm256_or_si256(h,f);
}
r = 1 - _mm256_testz_si256(h,h) as u64;
}
let idx = 32*(len/32);
len -= idx;
for i in 0..len {
r |= (a[idx+i] ^ b[idx+i]) as u64;
}
(r.wrapping_neg() >> 63) as u8
}
pub fn cmov(r: &mut[u8], x: &[u8], mut len: usize, mut b: u8)
{
let (mut xvec, mut rvec);
unsafe {
let bvec = _mm256_set1_epi64x(b as i64);
for i in 0..(len/32) {
rvec = _mm256_loadu_si256(r[32*i..].as_ptr() as *const __m256i);
xvec = _mm256_loadu_si256(x[32*i..].as_ptr() as *const __m256i);
rvec = _mm256_blendv_epi8(rvec,xvec,bvec);
_mm256_storeu_si256(r[32*i..].as_mut_ptr() as *mut __m256i,rvec);
}
}
let idx = 32*(len/32);
len -= idx;
b = b.wrapping_neg();
for i in 0..len {
r[idx+i] ^= b & (x[idx+i] ^ r[idx+i]);
}
}