1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
use super::{Xoodoo, ROUND_KEYS};
use core::arch::x86_64::*;
impl Xoodoo {
#[allow(
non_upper_case_globals,
clippy::many_single_char_names,
clippy::cast_ptr_alignment
)]
pub fn permute(&mut self) {
let st = &mut self.st;
unsafe {
let rho_east_2 = _mm_set_epi32(0x0605_0407, 0x0201_0003, 0x0e0d_0c0f, 0x0a09_080b);
let mut a = _mm_loadu_si128(st.as_ptr().add(0) as *const _);
let mut b = _mm_loadu_si128(st.as_ptr().add(4) as *const _);
let mut c = _mm_loadu_si128(st.as_ptr().add(8) as *const _);
for &round_key in &ROUND_KEYS {
let mut p = _mm_shuffle_epi32(_mm_xor_si128(_mm_xor_si128(a, b), c), 0x93);
let mut e = _mm_or_si128(_mm_slli_epi32(p, 5), _mm_srli_epi32(p, 32 - 5));
p = _mm_or_si128(_mm_slli_epi32(p, 14), _mm_srli_epi32(p, 32 - 14));
e = _mm_xor_si128(e, p);
a = _mm_xor_si128(a, e);
b = _mm_xor_si128(b, e);
c = _mm_xor_si128(c, e);
b = _mm_shuffle_epi32(b, 0x93);
c = _mm_or_si128(_mm_slli_epi32(c, 11), _mm_srli_epi32(c, 32 - 11));
a = _mm_xor_si128(a, _mm_set_epi32(0, 0, 0, round_key as _));
a = _mm_xor_si128(a, _mm_andnot_si128(b, c));
b = _mm_xor_si128(b, _mm_andnot_si128(c, a));
c = _mm_xor_si128(c, _mm_andnot_si128(a, b));
b = _mm_or_si128(_mm_slli_epi32(b, 1), _mm_srli_epi32(b, 32 - 1));
c = _mm_shuffle_epi8(c, rho_east_2);
}
_mm_storeu_si128(st.as_mut_ptr().add(0) as *mut _, a);
_mm_storeu_si128(st.as_mut_ptr().add(4) as *mut _, b);
_mm_storeu_si128(st.as_mut_ptr().add(8) as *mut _, c);
}
}
}