#[cfg(all(feature = "std", target_arch = "aarch64"))]
use core::arch::aarch64::*;
#[cfg(all(feature = "std", target_arch = "x86"))]
use core::arch::x86::*;
#[cfg(all(feature = "std", target_arch = "x86_64"))]
use core::arch::x86_64::*;
use internals::slice::SliceExt;
use super::{HashEngine, Midstate, BLOCK_SIZE};
#[allow(non_snake_case)]
const fn Ch(x: u32, y: u32, z: u32) -> u32 { z ^ (x & (y ^ z)) }
#[allow(non_snake_case)]
const fn Maj(x: u32, y: u32, z: u32) -> u32 { (x & y) | (z & (x | y)) }
#[allow(non_snake_case)]
const fn Sigma0(x: u32) -> u32 { x.rotate_left(30) ^ x.rotate_left(19) ^ x.rotate_left(10) }
#[allow(non_snake_case)]
const fn Sigma1(x: u32) -> u32 { x.rotate_left(26) ^ x.rotate_left(21) ^ x.rotate_left(7) }
const fn sigma0(x: u32) -> u32 { x.rotate_left(25) ^ x.rotate_left(14) ^ (x >> 3) }
const fn sigma1(x: u32) -> u32 { x.rotate_left(15) ^ x.rotate_left(13) ^ (x >> 10) }
#[cfg(feature = "small-hash")]
#[macro_use]
mod small_hash {
use super::*;
#[rustfmt::skip]
#[allow(clippy::too_many_arguments)]
pub(super) const fn round(a: u32, b: u32, c: u32, d: u32, e: u32,
f: u32, g: u32, h: u32, k: u32, w: u32) -> (u32, u32) {
let t1 =
h.wrapping_add(Sigma1(e)).wrapping_add(Ch(e, f, g)).wrapping_add(k).wrapping_add(w);
let t2 = Sigma0(a).wrapping_add(Maj(a, b, c));
(d.wrapping_add(t1), t1.wrapping_add(t2))
}
#[rustfmt::skip]
#[allow(clippy::too_many_arguments)]
pub(super) const fn later_round(a: u32, b: u32, c: u32, d: u32, e: u32,
f: u32, g: u32, h: u32, k: u32, w: u32,
w1: u32, w2: u32, w3: u32,
) -> (u32, u32, u32) {
let w = w.wrapping_add(sigma1(w1)).wrapping_add(w2).wrapping_add(sigma0(w3));
let (d, h) = round(a, b, c, d, e, f, g, h, k, w);
(d, h, w)
}
macro_rules! round(
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => (
let updates = small_hash::round($a, $b, $c, $d, $e, $f, $g, $h, $k, $w);
$d = updates.0;
$h = updates.1;
);
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => (
let updates = small_hash::later_round($a, $b, $c, $d, $e, $f, $g, $h, $k, $w, $w1, $w2, $w3);
$d = updates.0;
$h = updates.1;
$w = updates.2;
)
);
}
#[cfg(not(feature = "small-hash"))]
#[macro_use]
mod fast_hash {
macro_rules! round(
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => (
let t1 = $h.wrapping_add(Sigma1($e)).wrapping_add(Ch($e, $f, $g)).wrapping_add($k).wrapping_add($w);
let t2 = Sigma0($a).wrapping_add(Maj($a, $b, $c));
$d = $d.wrapping_add(t1);
$h = t1.wrapping_add(t2);
);
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => (
$w = $w.wrapping_add(sigma1($w1)).wrapping_add($w2).wrapping_add(sigma0($w3));
round!($a, $b, $c, $d, $e, $f, $g, $h, $k, $w);
)
);
}
impl Midstate {
#[allow(clippy::identity_op)] const fn read_u32(bytes: &[u8], index: usize) -> u32 {
((bytes[index + 0] as u32) << 24)
| ((bytes[index + 1] as u32) << 16)
| ((bytes[index + 2] as u32) << 8)
| ((bytes[index + 3] as u32) << 0)
}
const fn copy_w(bytes: &[u8], index: usize) -> [u32; 16] {
let mut w = [0u32; 16];
let mut i = 0;
while i < 16 {
w[i] = Self::read_u32(bytes, index + i * 4);
i += 1;
}
w
}
pub(super) const fn compute_midstate_unoptimized(bytes: &[u8], finalize: bool) -> Self {
let mut state = [
0x6a09e667u32,
0xbb67ae85,
0x3c6ef372,
0xa54ff53a,
0x510e527f,
0x9b05688c,
0x1f83d9ab,
0x5be0cd19,
];
let num_chunks = (bytes.len() + 9).div_ceil(64);
let mut chunk = 0;
#[allow(clippy::precedence)]
while chunk < num_chunks {
if !finalize && chunk + 1 == num_chunks {
break;
}
let mut w = if chunk * 64 + 64 <= bytes.len() {
Self::copy_w(bytes, chunk * 64)
} else {
let mut buf = [0; 64];
let mut i = 0;
let offset = chunk * 64;
while offset + i < bytes.len() {
buf[i] = bytes[offset + i];
i += 1;
}
if (bytes.len() % 64 <= 64 - 9) || (chunk + 2 == num_chunks) {
buf[i] = 0x80;
}
#[allow(clippy::identity_op)] #[allow(clippy::erasing_op)]
if chunk + 1 == num_chunks {
let bit_len = bytes.len() as u64 * 8;
buf[64 - 8] = ((bit_len >> 8 * 7) & 0xFF) as u8;
buf[64 - 7] = ((bit_len >> 8 * 6) & 0xFF) as u8;
buf[64 - 6] = ((bit_len >> 8 * 5) & 0xFF) as u8;
buf[64 - 5] = ((bit_len >> 8 * 4) & 0xFF) as u8;
buf[64 - 4] = ((bit_len >> 8 * 3) & 0xFF) as u8;
buf[64 - 3] = ((bit_len >> 8 * 2) & 0xFF) as u8;
buf[64 - 2] = ((bit_len >> 8 * 1) & 0xFF) as u8;
buf[64 - 1] = ((bit_len >> 8 * 0) & 0xFF) as u8;
}
Self::copy_w(&buf, 0)
};
chunk += 1;
let mut a = state[0];
let mut b = state[1];
let mut c = state[2];
let mut d = state[3];
let mut e = state[4];
let mut f = state[5];
let mut g = state[6];
let mut h = state[7];
round!(a, b, c, d, e, f, g, h, 0x428a2f98, w[0]);
round!(h, a, b, c, d, e, f, g, 0x71374491, w[1]);
round!(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w[2]);
round!(f, g, h, a, b, c, d, e, 0xe9b5dba5, w[3]);
round!(e, f, g, h, a, b, c, d, 0x3956c25b, w[4]);
round!(d, e, f, g, h, a, b, c, 0x59f111f1, w[5]);
round!(c, d, e, f, g, h, a, b, 0x923f82a4, w[6]);
round!(b, c, d, e, f, g, h, a, 0xab1c5ed5, w[7]);
round!(a, b, c, d, e, f, g, h, 0xd807aa98, w[8]);
round!(h, a, b, c, d, e, f, g, 0x12835b01, w[9]);
round!(g, h, a, b, c, d, e, f, 0x243185be, w[10]);
round!(f, g, h, a, b, c, d, e, 0x550c7dc3, w[11]);
round!(e, f, g, h, a, b, c, d, 0x72be5d74, w[12]);
round!(d, e, f, g, h, a, b, c, 0x80deb1fe, w[13]);
round!(c, d, e, f, g, h, a, b, 0x9bdc06a7, w[14]);
round!(b, c, d, e, f, g, h, a, 0xc19bf174, w[15]);
round!(a, b, c, d, e, f, g, h, 0xe49b69c1, w[0], w[14], w[9], w[1]);
round!(h, a, b, c, d, e, f, g, 0xefbe4786, w[1], w[15], w[10], w[2]);
round!(g, h, a, b, c, d, e, f, 0x0fc19dc6, w[2], w[0], w[11], w[3]);
round!(f, g, h, a, b, c, d, e, 0x240ca1cc, w[3], w[1], w[12], w[4]);
round!(e, f, g, h, a, b, c, d, 0x2de92c6f, w[4], w[2], w[13], w[5]);
round!(d, e, f, g, h, a, b, c, 0x4a7484aa, w[5], w[3], w[14], w[6]);
round!(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w[6], w[4], w[15], w[7]);
round!(b, c, d, e, f, g, h, a, 0x76f988da, w[7], w[5], w[0], w[8]);
round!(a, b, c, d, e, f, g, h, 0x983e5152, w[8], w[6], w[1], w[9]);
round!(h, a, b, c, d, e, f, g, 0xa831c66d, w[9], w[7], w[2], w[10]);
round!(g, h, a, b, c, d, e, f, 0xb00327c8, w[10], w[8], w[3], w[11]);
round!(f, g, h, a, b, c, d, e, 0xbf597fc7, w[11], w[9], w[4], w[12]);
round!(e, f, g, h, a, b, c, d, 0xc6e00bf3, w[12], w[10], w[5], w[13]);
round!(d, e, f, g, h, a, b, c, 0xd5a79147, w[13], w[11], w[6], w[14]);
round!(c, d, e, f, g, h, a, b, 0x06ca6351, w[14], w[12], w[7], w[15]);
round!(b, c, d, e, f, g, h, a, 0x14292967, w[15], w[13], w[8], w[0]);
round!(a, b, c, d, e, f, g, h, 0x27b70a85, w[0], w[14], w[9], w[1]);
round!(h, a, b, c, d, e, f, g, 0x2e1b2138, w[1], w[15], w[10], w[2]);
round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w[2], w[0], w[11], w[3]);
round!(f, g, h, a, b, c, d, e, 0x53380d13, w[3], w[1], w[12], w[4]);
round!(e, f, g, h, a, b, c, d, 0x650a7354, w[4], w[2], w[13], w[5]);
round!(d, e, f, g, h, a, b, c, 0x766a0abb, w[5], w[3], w[14], w[6]);
round!(c, d, e, f, g, h, a, b, 0x81c2c92e, w[6], w[4], w[15], w[7]);
round!(b, c, d, e, f, g, h, a, 0x92722c85, w[7], w[5], w[0], w[8]);
round!(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w[8], w[6], w[1], w[9]);
round!(h, a, b, c, d, e, f, g, 0xa81a664b, w[9], w[7], w[2], w[10]);
round!(g, h, a, b, c, d, e, f, 0xc24b8b70, w[10], w[8], w[3], w[11]);
round!(f, g, h, a, b, c, d, e, 0xc76c51a3, w[11], w[9], w[4], w[12]);
round!(e, f, g, h, a, b, c, d, 0xd192e819, w[12], w[10], w[5], w[13]);
round!(d, e, f, g, h, a, b, c, 0xd6990624, w[13], w[11], w[6], w[14]);
round!(c, d, e, f, g, h, a, b, 0xf40e3585, w[14], w[12], w[7], w[15]);
round!(b, c, d, e, f, g, h, a, 0x106aa070, w[15], w[13], w[8], w[0]);
round!(a, b, c, d, e, f, g, h, 0x19a4c116, w[0], w[14], w[9], w[1]);
round!(h, a, b, c, d, e, f, g, 0x1e376c08, w[1], w[15], w[10], w[2]);
round!(g, h, a, b, c, d, e, f, 0x2748774c, w[2], w[0], w[11], w[3]);
round!(f, g, h, a, b, c, d, e, 0x34b0bcb5, w[3], w[1], w[12], w[4]);
round!(e, f, g, h, a, b, c, d, 0x391c0cb3, w[4], w[2], w[13], w[5]);
round!(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w[5], w[3], w[14], w[6]);
round!(c, d, e, f, g, h, a, b, 0x5b9cca4f, w[6], w[4], w[15], w[7]);
round!(b, c, d, e, f, g, h, a, 0x682e6ff3, w[7], w[5], w[0], w[8]);
round!(a, b, c, d, e, f, g, h, 0x748f82ee, w[8], w[6], w[1], w[9]);
round!(h, a, b, c, d, e, f, g, 0x78a5636f, w[9], w[7], w[2], w[10]);
round!(g, h, a, b, c, d, e, f, 0x84c87814, w[10], w[8], w[3], w[11]);
round!(f, g, h, a, b, c, d, e, 0x8cc70208, w[11], w[9], w[4], w[12]);
round!(e, f, g, h, a, b, c, d, 0x90befffa, w[12], w[10], w[5], w[13]);
round!(d, e, f, g, h, a, b, c, 0xa4506ceb, w[13], w[11], w[6], w[14]);
round!(c, d, e, f, g, h, a, b, 0xbef9a3f7, w[14], w[12], w[7], w[15]);
round!(b, c, d, e, f, g, h, a, 0xc67178f2, w[15], w[13], w[8], w[0]);
state[0] = state[0].wrapping_add(a);
state[1] = state[1].wrapping_add(b);
state[2] = state[2].wrapping_add(c);
state[3] = state[3].wrapping_add(d);
state[4] = state[4].wrapping_add(e);
state[5] = state[5].wrapping_add(f);
state[6] = state[6].wrapping_add(g);
state[7] = state[7].wrapping_add(h);
}
let mut output = [0u8; 32];
let mut i = 0;
#[allow(clippy::identity_op)] while i < 8 {
output[i * 4 + 0] = (state[i + 0] >> 24) as u8;
output[i * 4 + 1] = (state[i + 0] >> 16) as u8;
output[i * 4 + 2] = (state[i + 0] >> 8) as u8;
output[i * 4 + 3] = (state[i + 0] >> 0) as u8;
i += 1;
}
Self { bytes: output, bytes_hashed: bytes.len() as u64 }
}
}
impl HashEngine {
pub(super) fn process_block(&mut self) {
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
{
if std::is_x86_feature_detected!("sse4.1")
&& std::is_x86_feature_detected!("sha")
&& std::is_x86_feature_detected!("sse2")
&& std::is_x86_feature_detected!("ssse3")
{
return unsafe { self.process_block_simd_x86_intrinsics() };
}
}
#[cfg(all(feature = "std", target_arch = "aarch64"))]
{
if std::arch::is_aarch64_feature_detected!("sha2") {
return unsafe { self.process_block_simd_arm_intrinsics() };
}
}
self.software_process_block()
}
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn process_block_simd_x86_intrinsics(&mut self) {
let (mut state0, mut state1);
let (mut msg, mut tmp);
let (mut msg0, mut msg1, mut msg2, mut msg3);
let (abef_save, cdgh_save);
#[allow(non_snake_case)]
let MASK: __m128i =
_mm_set_epi64x(0x0c0d_0e0f_0809_0a0bu64 as i64, 0x0405_0607_0001_0203u64 as i64);
let block_offset = 0;
tmp = _mm_loadu_si128(self.h.as_ptr().add(0).cast::<__m128i>());
state1 = _mm_loadu_si128(self.h.as_ptr().add(4).cast::<__m128i>());
tmp = _mm_shuffle_epi32(tmp, 0xB1); state1 = _mm_shuffle_epi32(state1, 0x1B); state0 = _mm_alignr_epi8(tmp, state1, 8); state1 = _mm_blend_epi16(state1, tmp, 0xF0);
{
abef_save = state0;
cdgh_save = state1;
msg = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset).cast::<__m128i>());
msg0 = _mm_shuffle_epi8(msg, MASK);
msg = _mm_add_epi32(
msg0,
_mm_set_epi64x(0xE9B5DBA5B5C0FBCFu64 as i64, 0x71374491428A2F98u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg1 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 16).cast::<__m128i>());
msg1 = _mm_shuffle_epi8(msg1, MASK);
msg = _mm_add_epi32(
msg1,
_mm_set_epi64x(0xAB1C5ED5923F82A4u64 as i64, 0x59F111F13956C25Bu64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg0 = _mm_sha256msg1_epu32(msg0, msg1);
msg2 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 32).cast::<__m128i>());
msg2 = _mm_shuffle_epi8(msg2, MASK);
msg = _mm_add_epi32(
msg2,
_mm_set_epi64x(0x550C7DC3243185BEu64 as i64, 0x12835B01D807AA98u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg1 = _mm_sha256msg1_epu32(msg1, msg2);
msg3 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 48).cast::<__m128i>());
msg3 = _mm_shuffle_epi8(msg3, MASK);
msg = _mm_add_epi32(
msg3,
_mm_set_epi64x(0xC19BF1749BDC06A7u64 as i64, 0x80DEB1FE72BE5D74u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg3, msg2, 4);
msg0 = _mm_add_epi32(msg0, tmp);
msg0 = _mm_sha256msg2_epu32(msg0, msg3);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg2 = _mm_sha256msg1_epu32(msg2, msg3);
msg = _mm_add_epi32(
msg0,
_mm_set_epi64x(0x240CA1CC0FC19DC6u64 as i64, 0xEFBE4786E49B69C1u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg0, msg3, 4);
msg1 = _mm_add_epi32(msg1, tmp);
msg1 = _mm_sha256msg2_epu32(msg1, msg0);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg3 = _mm_sha256msg1_epu32(msg3, msg0);
msg = _mm_add_epi32(
msg1,
_mm_set_epi64x(0x76F988DA5CB0A9DCu64 as i64, 0x4A7484AA2DE92C6Fu64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg1, msg0, 4);
msg2 = _mm_add_epi32(msg2, tmp);
msg2 = _mm_sha256msg2_epu32(msg2, msg1);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg0 = _mm_sha256msg1_epu32(msg0, msg1);
msg = _mm_add_epi32(
msg2,
_mm_set_epi64x(0xBF597FC7B00327C8u64 as i64, 0xA831C66D983E5152u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg2, msg1, 4);
msg3 = _mm_add_epi32(msg3, tmp);
msg3 = _mm_sha256msg2_epu32(msg3, msg2);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg1 = _mm_sha256msg1_epu32(msg1, msg2);
msg = _mm_add_epi32(
msg3,
_mm_set_epi64x(0x1429296706CA6351u64 as i64, 0xD5A79147C6E00BF3u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg3, msg2, 4);
msg0 = _mm_add_epi32(msg0, tmp);
msg0 = _mm_sha256msg2_epu32(msg0, msg3);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg2 = _mm_sha256msg1_epu32(msg2, msg3);
msg = _mm_add_epi32(
msg0,
_mm_set_epi64x(0x53380D134D2C6DFCu64 as i64, 0x2E1B213827B70A85u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg0, msg3, 4);
msg1 = _mm_add_epi32(msg1, tmp);
msg1 = _mm_sha256msg2_epu32(msg1, msg0);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg3 = _mm_sha256msg1_epu32(msg3, msg0);
msg = _mm_add_epi32(
msg1,
_mm_set_epi64x(0x92722C8581C2C92Eu64 as i64, 0x766A0ABB650A7354u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg1, msg0, 4);
msg2 = _mm_add_epi32(msg2, tmp);
msg2 = _mm_sha256msg2_epu32(msg2, msg1);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg0 = _mm_sha256msg1_epu32(msg0, msg1);
msg = _mm_add_epi32(
msg2,
_mm_set_epi64x(0xC76C51A3C24B8B70u64 as i64, 0xA81A664BA2BFE8A1u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg2, msg1, 4);
msg3 = _mm_add_epi32(msg3, tmp);
msg3 = _mm_sha256msg2_epu32(msg3, msg2);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg1 = _mm_sha256msg1_epu32(msg1, msg2);
msg = _mm_add_epi32(
msg3,
_mm_set_epi64x(0x106AA070F40E3585u64 as i64, 0xD6990624D192E819u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg3, msg2, 4);
msg0 = _mm_add_epi32(msg0, tmp);
msg0 = _mm_sha256msg2_epu32(msg0, msg3);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg2 = _mm_sha256msg1_epu32(msg2, msg3);
msg = _mm_add_epi32(
msg0,
_mm_set_epi64x(0x34B0BCB52748774Cu64 as i64, 0x1E376C0819A4C116u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg0, msg3, 4);
msg1 = _mm_add_epi32(msg1, tmp);
msg1 = _mm_sha256msg2_epu32(msg1, msg0);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg3 = _mm_sha256msg1_epu32(msg3, msg0);
msg = _mm_add_epi32(
msg1,
_mm_set_epi64x(0x682E6FF35B9CCA4Fu64 as i64, 0x4ED8AA4A391C0CB3u64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg1, msg0, 4);
msg2 = _mm_add_epi32(msg2, tmp);
msg2 = _mm_sha256msg2_epu32(msg2, msg1);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg = _mm_add_epi32(
msg2,
_mm_set_epi64x(0x8CC7020884C87814u64 as i64, 0x78A5636F748F82EEu64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
tmp = _mm_alignr_epi8(msg2, msg1, 4);
msg3 = _mm_add_epi32(msg3, tmp);
msg3 = _mm_sha256msg2_epu32(msg3, msg2);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
msg = _mm_add_epi32(
msg3,
_mm_set_epi64x(0xC67178F2BEF9A3F7u64 as i64, 0xA4506CEB90BEFFFAu64 as i64),
);
state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
msg = _mm_shuffle_epi32(msg, 0x0E);
state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
state0 = _mm_add_epi32(state0, abef_save);
state1 = _mm_add_epi32(state1, cdgh_save);
}
tmp = _mm_shuffle_epi32(state0, 0x1B); state1 = _mm_shuffle_epi32(state1, 0xB1); state0 = _mm_blend_epi16(tmp, state1, 0xF0); state1 = _mm_alignr_epi8(state1, tmp, 8);
_mm_storeu_si128(self.h.as_mut_ptr().add(0).cast::<__m128i>(), state0);
_mm_storeu_si128(self.h.as_mut_ptr().add(4).cast::<__m128i>(), state1);
}
#[cfg(all(feature = "std", target_arch = "aarch64"))]
#[target_feature(enable = "sha2")]
unsafe fn process_block_simd_arm_intrinsics(&mut self) {
#[rustfmt::skip]
const K: [u32; 64] = [
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
];
let (mut state0, mut state1);
let (abcd_save, efgh_save);
let (mut msg0, mut msg1, mut msg2, mut msg3);
let (mut tmp0, mut tmp1, mut tmp2);
state0 = vld1q_u32(self.h.as_ptr().add(0));
state1 = vld1q_u32(self.h.as_ptr().add(4));
abcd_save = state0;
efgh_save = state1;
msg0 = vld1q_u32(self.buffer.as_ptr().add(0).cast::<u32>());
msg1 = vld1q_u32(self.buffer.as_ptr().add(16).cast::<u32>());
msg2 = vld1q_u32(self.buffer.as_ptr().add(32).cast::<u32>());
msg3 = vld1q_u32(self.buffer.as_ptr().add(48).cast::<u32>());
msg0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg0)));
msg1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg1)));
msg2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg2)));
msg3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg3)));
tmp0 = vaddq_u32(msg0, vld1q_u32(K.as_ptr().add(0x00)));
msg0 = vsha256su0q_u32(msg0, msg1);
tmp2 = state0;
tmp1 = vaddq_u32(msg1, vld1q_u32(K.as_ptr().add(0x04)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
msg0 = vsha256su1q_u32(msg0, msg2, msg3);
msg1 = vsha256su0q_u32(msg1, msg2);
tmp2 = state0;
tmp0 = vaddq_u32(msg2, vld1q_u32(K.as_ptr().add(0x08)));
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
msg1 = vsha256su1q_u32(msg1, msg3, msg0);
msg2 = vsha256su0q_u32(msg2, msg3);
tmp2 = state0;
tmp1 = vaddq_u32(msg3, vld1q_u32(K.as_ptr().add(0x0c)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
msg2 = vsha256su1q_u32(msg2, msg0, msg1);
msg3 = vsha256su0q_u32(msg3, msg0);
tmp2 = state0;
tmp0 = vaddq_u32(msg0, vld1q_u32(K.as_ptr().add(0x10)));
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
msg3 = vsha256su1q_u32(msg3, msg1, msg2);
msg0 = vsha256su0q_u32(msg0, msg1);
tmp2 = state0;
tmp1 = vaddq_u32(msg1, vld1q_u32(K.as_ptr().add(0x14)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
msg0 = vsha256su1q_u32(msg0, msg2, msg3);
msg1 = vsha256su0q_u32(msg1, msg2);
tmp2 = state0;
tmp0 = vaddq_u32(msg2, vld1q_u32(K.as_ptr().add(0x18)));
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
msg1 = vsha256su1q_u32(msg1, msg3, msg0);
msg2 = vsha256su0q_u32(msg2, msg3);
tmp2 = state0;
tmp1 = vaddq_u32(msg3, vld1q_u32(K.as_ptr().add(0x1c)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
msg2 = vsha256su1q_u32(msg2, msg0, msg1);
msg3 = vsha256su0q_u32(msg3, msg0);
tmp2 = state0;
tmp0 = vaddq_u32(msg0, vld1q_u32(K.as_ptr().add(0x20)));
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
msg3 = vsha256su1q_u32(msg3, msg1, msg2);
msg0 = vsha256su0q_u32(msg0, msg1);
tmp2 = state0;
tmp1 = vaddq_u32(msg1, vld1q_u32(K.as_ptr().add(0x24)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
msg0 = vsha256su1q_u32(msg0, msg2, msg3);
msg1 = vsha256su0q_u32(msg1, msg2);
tmp2 = state0;
tmp0 = vaddq_u32(msg2, vld1q_u32(K.as_ptr().add(0x28)));
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
msg1 = vsha256su1q_u32(msg1, msg3, msg0);
msg2 = vsha256su0q_u32(msg2, msg3);
tmp2 = state0;
tmp1 = vaddq_u32(msg3, vld1q_u32(K.as_ptr().add(0x2c)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
msg2 = vsha256su1q_u32(msg2, msg0, msg1);
msg3 = vsha256su0q_u32(msg3, msg0);
tmp2 = state0;
tmp0 = vaddq_u32(msg0, vld1q_u32(K.as_ptr().add(0x30)));
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
msg3 = vsha256su1q_u32(msg3, msg1, msg2);
tmp2 = state0;
tmp1 = vaddq_u32(msg1, vld1q_u32(K.as_ptr().add(0x34)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
tmp2 = state0;
tmp0 = vaddq_u32(msg2, vld1q_u32(K.as_ptr().add(0x38)));
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
tmp2 = state0;
tmp1 = vaddq_u32(msg3, vld1q_u32(K.as_ptr().add(0x3c)));
state0 = vsha256hq_u32(state0, state1, tmp0);
state1 = vsha256h2q_u32(state1, tmp2, tmp0);
tmp2 = state0;
state0 = vsha256hq_u32(state0, state1, tmp1);
state1 = vsha256h2q_u32(state1, tmp2, tmp1);
state0 = vaddq_u32(state0, abcd_save);
state1 = vaddq_u32(state1, efgh_save);
vst1q_u32(self.h.as_mut_ptr().add(0), state0);
vst1q_u32(self.h.as_mut_ptr().add(4), state1);
}
fn software_process_block(&mut self) {
debug_assert_eq!(self.buffer.len(), BLOCK_SIZE);
let mut w = [0u32; 16];
for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.bitcoin_as_chunks().0) {
*w_val = u32::from_be_bytes(*buff_bytes);
}
let mut a = self.h[0];
let mut b = self.h[1];
let mut c = self.h[2];
let mut d = self.h[3];
let mut e = self.h[4];
let mut f = self.h[5];
let mut g = self.h[6];
let mut h = self.h[7];
round!(a, b, c, d, e, f, g, h, 0x428a2f98, w[0]);
round!(h, a, b, c, d, e, f, g, 0x71374491, w[1]);
round!(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w[2]);
round!(f, g, h, a, b, c, d, e, 0xe9b5dba5, w[3]);
round!(e, f, g, h, a, b, c, d, 0x3956c25b, w[4]);
round!(d, e, f, g, h, a, b, c, 0x59f111f1, w[5]);
round!(c, d, e, f, g, h, a, b, 0x923f82a4, w[6]);
round!(b, c, d, e, f, g, h, a, 0xab1c5ed5, w[7]);
round!(a, b, c, d, e, f, g, h, 0xd807aa98, w[8]);
round!(h, a, b, c, d, e, f, g, 0x12835b01, w[9]);
round!(g, h, a, b, c, d, e, f, 0x243185be, w[10]);
round!(f, g, h, a, b, c, d, e, 0x550c7dc3, w[11]);
round!(e, f, g, h, a, b, c, d, 0x72be5d74, w[12]);
round!(d, e, f, g, h, a, b, c, 0x80deb1fe, w[13]);
round!(c, d, e, f, g, h, a, b, 0x9bdc06a7, w[14]);
round!(b, c, d, e, f, g, h, a, 0xc19bf174, w[15]);
round!(a, b, c, d, e, f, g, h, 0xe49b69c1, w[0], w[14], w[9], w[1]);
round!(h, a, b, c, d, e, f, g, 0xefbe4786, w[1], w[15], w[10], w[2]);
round!(g, h, a, b, c, d, e, f, 0x0fc19dc6, w[2], w[0], w[11], w[3]);
round!(f, g, h, a, b, c, d, e, 0x240ca1cc, w[3], w[1], w[12], w[4]);
round!(e, f, g, h, a, b, c, d, 0x2de92c6f, w[4], w[2], w[13], w[5]);
round!(d, e, f, g, h, a, b, c, 0x4a7484aa, w[5], w[3], w[14], w[6]);
round!(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w[6], w[4], w[15], w[7]);
round!(b, c, d, e, f, g, h, a, 0x76f988da, w[7], w[5], w[0], w[8]);
round!(a, b, c, d, e, f, g, h, 0x983e5152, w[8], w[6], w[1], w[9]);
round!(h, a, b, c, d, e, f, g, 0xa831c66d, w[9], w[7], w[2], w[10]);
round!(g, h, a, b, c, d, e, f, 0xb00327c8, w[10], w[8], w[3], w[11]);
round!(f, g, h, a, b, c, d, e, 0xbf597fc7, w[11], w[9], w[4], w[12]);
round!(e, f, g, h, a, b, c, d, 0xc6e00bf3, w[12], w[10], w[5], w[13]);
round!(d, e, f, g, h, a, b, c, 0xd5a79147, w[13], w[11], w[6], w[14]);
round!(c, d, e, f, g, h, a, b, 0x06ca6351, w[14], w[12], w[7], w[15]);
round!(b, c, d, e, f, g, h, a, 0x14292967, w[15], w[13], w[8], w[0]);
round!(a, b, c, d, e, f, g, h, 0x27b70a85, w[0], w[14], w[9], w[1]);
round!(h, a, b, c, d, e, f, g, 0x2e1b2138, w[1], w[15], w[10], w[2]);
round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w[2], w[0], w[11], w[3]);
round!(f, g, h, a, b, c, d, e, 0x53380d13, w[3], w[1], w[12], w[4]);
round!(e, f, g, h, a, b, c, d, 0x650a7354, w[4], w[2], w[13], w[5]);
round!(d, e, f, g, h, a, b, c, 0x766a0abb, w[5], w[3], w[14], w[6]);
round!(c, d, e, f, g, h, a, b, 0x81c2c92e, w[6], w[4], w[15], w[7]);
round!(b, c, d, e, f, g, h, a, 0x92722c85, w[7], w[5], w[0], w[8]);
round!(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w[8], w[6], w[1], w[9]);
round!(h, a, b, c, d, e, f, g, 0xa81a664b, w[9], w[7], w[2], w[10]);
round!(g, h, a, b, c, d, e, f, 0xc24b8b70, w[10], w[8], w[3], w[11]);
round!(f, g, h, a, b, c, d, e, 0xc76c51a3, w[11], w[9], w[4], w[12]);
round!(e, f, g, h, a, b, c, d, 0xd192e819, w[12], w[10], w[5], w[13]);
round!(d, e, f, g, h, a, b, c, 0xd6990624, w[13], w[11], w[6], w[14]);
round!(c, d, e, f, g, h, a, b, 0xf40e3585, w[14], w[12], w[7], w[15]);
round!(b, c, d, e, f, g, h, a, 0x106aa070, w[15], w[13], w[8], w[0]);
round!(a, b, c, d, e, f, g, h, 0x19a4c116, w[0], w[14], w[9], w[1]);
round!(h, a, b, c, d, e, f, g, 0x1e376c08, w[1], w[15], w[10], w[2]);
round!(g, h, a, b, c, d, e, f, 0x2748774c, w[2], w[0], w[11], w[3]);
round!(f, g, h, a, b, c, d, e, 0x34b0bcb5, w[3], w[1], w[12], w[4]);
round!(e, f, g, h, a, b, c, d, 0x391c0cb3, w[4], w[2], w[13], w[5]);
round!(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w[5], w[3], w[14], w[6]);
round!(c, d, e, f, g, h, a, b, 0x5b9cca4f, w[6], w[4], w[15], w[7]);
round!(b, c, d, e, f, g, h, a, 0x682e6ff3, w[7], w[5], w[0], w[8]);
round!(a, b, c, d, e, f, g, h, 0x748f82ee, w[8], w[6], w[1], w[9]);
round!(h, a, b, c, d, e, f, g, 0x78a5636f, w[9], w[7], w[2], w[10]);
round!(g, h, a, b, c, d, e, f, 0x84c87814, w[10], w[8], w[3], w[11]);
round!(f, g, h, a, b, c, d, e, 0x8cc70208, w[11], w[9], w[4], w[12]);
round!(e, f, g, h, a, b, c, d, 0x90befffa, w[12], w[10], w[5], w[13]);
round!(d, e, f, g, h, a, b, c, 0xa4506ceb, w[13], w[11], w[6], w[14]);
round!(c, d, e, f, g, h, a, b, 0xbef9a3f7, w[14], w[12], w[7], w[15]);
round!(b, c, d, e, f, g, h, a, 0xc67178f2, w[15], w[13], w[8], w[0]);
let _ = w[15];
self.h[0] = self.h[0].wrapping_add(a);
self.h[1] = self.h[1].wrapping_add(b);
self.h[2] = self.h[2].wrapping_add(c);
self.h[3] = self.h[3].wrapping_add(d);
self.h[4] = self.h[4].wrapping_add(e);
self.h[5] = self.h[5].wrapping_add(f);
self.h[6] = self.h[6].wrapping_add(g);
self.h[7] = self.h[7].wrapping_add(h);
}
}