cyfs_sha2/
sha512_utils.rs

1#![cfg_attr(feature = "cargo-clippy", allow(clippy::many_single_char_names))]
2
3use simd::u64x2;
4use consts::{BLOCK_LEN, K64X2};
5use block_buffer::byteorder::{BE, ByteOrder};
6
7/// Not an intrinsic, but works like an unaligned load.
8#[inline]
9fn sha512load(v0: u64x2, v1: u64x2) -> u64x2 {
10    u64x2(v1.1, v0.0)
11}
12
13/// Performs 2 rounds of the SHA-512 message schedule update.
14pub fn sha512_schedule_x2(v0: u64x2, v1: u64x2, v4to5: u64x2, v7: u64x2)
15                          -> u64x2 {
16
17    // sigma 0
18    fn sigma0(x: u64) -> u64 {
19        ((x << 63) | (x >> 1)) ^ ((x << 56) | (x >> 8)) ^ (x >> 7)
20    }
21
22    // sigma 1
23    fn sigma1(x: u64) -> u64 {
24        ((x << 45) | (x >> 19)) ^ ((x << 3) | (x >> 61)) ^ (x >> 6)
25    }
26
27    let u64x2(w1, w0) = v0;
28    let u64x2(_, w2) = v1;
29    let u64x2(w10, w9) = v4to5;
30    let u64x2(w15, w14) = v7;
31
32    let w16 =
33        sigma1(w14).wrapping_add(w9).wrapping_add(sigma0(w1)).wrapping_add(w0);
34    let w17 =
35        sigma1(w15).wrapping_add(w10).wrapping_add(sigma0(w2)).wrapping_add(w1);
36
37    u64x2(w17, w16)
38}
39
40/// Performs one round of the SHA-512 message block digest.
41pub fn sha512_digest_round(ae: u64x2, bf: u64x2, cg: u64x2, dh: u64x2,
42                           wk0: u64)
43                           -> u64x2 {
44
45    macro_rules! big_sigma0 {
46        ($a:expr) => (($a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39)))
47    }
48    macro_rules! big_sigma1 {
49        ($a:expr) => (($a.rotate_right(14) ^ $a.rotate_right(18) ^ $a.rotate_right(41)))
50    }
51    macro_rules! bool3ary_202 {
52        ($a:expr, $b:expr, $c:expr) => ($c ^ ($a & ($b ^ $c)))
53    } // Choose, MD5F, SHA1C
54    macro_rules! bool3ary_232 {
55        ($a:expr, $b:expr, $c:expr) => (($a & $b) ^ ($a & $c) ^ ($b & $c))
56    } // Majority, SHA1M
57
58    let u64x2(a0, e0) = ae;
59    let u64x2(b0, f0) = bf;
60    let u64x2(c0, g0) = cg;
61    let u64x2(d0, h0) = dh;
62
63    // a round
64    let x0 = big_sigma1!(e0)
65        .wrapping_add(bool3ary_202!(e0, f0, g0))
66        .wrapping_add(wk0)
67        .wrapping_add(h0);
68    let y0 = big_sigma0!(a0).wrapping_add(bool3ary_232!(a0, b0, c0));
69    let (a1, _, _, _, e1, _, _, _) =
70        (x0.wrapping_add(y0), a0, b0, c0, x0.wrapping_add(d0), e0, f0, g0);
71
72    u64x2(a1, e1)
73}
74
75/// Process a block with the SHA-512 algorithm.
76pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
77    let k = &K64X2;
78
79    macro_rules! schedule {
80        ($v0:expr, $v1:expr, $v4:expr, $v5:expr, $v7:expr) => (
81             sha512_schedule_x2($v0, $v1, sha512load($v4, $v5), $v7)
82        )
83    }
84
85    macro_rules! rounds4 {
86        ($ae:ident, $bf:ident, $cg:ident, $dh:ident, $wk0:expr, $wk1:expr) => {
87            {
88                let u64x2(u, t) = $wk0;
89                let u64x2(w, v) = $wk1;
90
91                $dh = sha512_digest_round($ae, $bf, $cg, $dh, t);
92                $cg = sha512_digest_round($dh, $ae, $bf, $cg, u);
93                $bf = sha512_digest_round($cg, $dh, $ae, $bf, v);
94                $ae = sha512_digest_round($bf, $cg, $dh, $ae, w);
95            }
96        }
97    }
98
99    let mut ae = u64x2(state[0], state[4]);
100    let mut bf = u64x2(state[1], state[5]);
101    let mut cg = u64x2(state[2], state[6]);
102    let mut dh = u64x2(state[3], state[7]);
103
104    // Rounds 0..20
105    let (mut w1, mut w0) = (u64x2(block[3], block[2]),
106                            u64x2(block[1], block[0]));
107    rounds4!(ae, bf, cg, dh, k[0] + w0, k[1] + w1);
108    let (mut w3, mut w2) = (u64x2(block[7], block[6]),
109                            u64x2(block[5], block[4]));
110    rounds4!(ae, bf, cg, dh, k[2] + w2, k[3] + w3);
111    let (mut w5, mut w4) = (u64x2(block[11], block[10]),
112                            u64x2(block[9], block[8]));
113    rounds4!(ae, bf, cg, dh, k[4] + w4, k[5] + w5);
114    let (mut w7, mut w6) = (u64x2(block[15], block[14]),
115                            u64x2(block[13], block[12]));
116    rounds4!(ae, bf, cg, dh, k[6] + w6, k[7] + w7);
117    let mut w8 = schedule!(w0, w1, w4, w5, w7);
118    let mut w9 = schedule!(w1, w2, w5, w6, w8);
119    rounds4!(ae, bf, cg, dh, k[8] + w8, k[9] + w9);
120
121    // Rounds 20..40
122    w0 = schedule!(w2, w3, w6, w7, w9);
123    w1 = schedule!(w3, w4, w7, w8, w0);
124    rounds4!(ae, bf, cg, dh, k[10] + w0, k[11] + w1);
125    w2 = schedule!(w4, w5, w8, w9, w1);
126    w3 = schedule!(w5, w6, w9, w0, w2);
127    rounds4!(ae, bf, cg, dh, k[12] + w2, k[13] + w3);
128    w4 = schedule!(w6, w7, w0, w1, w3);
129    w5 = schedule!(w7, w8, w1, w2, w4);
130    rounds4!(ae, bf, cg, dh, k[14] + w4, k[15] + w5);
131    w6 = schedule!(w8, w9, w2, w3, w5);
132    w7 = schedule!(w9, w0, w3, w4, w6);
133    rounds4!(ae, bf, cg, dh, k[16] + w6, k[17] + w7);
134    w8 = schedule!(w0, w1, w4, w5, w7);
135    w9 = schedule!(w1, w2, w5, w6, w8);
136    rounds4!(ae, bf, cg, dh, k[18] + w8, k[19] + w9);
137
138    // Rounds 40..60
139    w0 = schedule!(w2, w3, w6, w7, w9);
140    w1 = schedule!(w3, w4, w7, w8, w0);
141    rounds4!(ae, bf, cg, dh, k[20] + w0, k[21] + w1);
142    w2 = schedule!(w4, w5, w8, w9, w1);
143    w3 = schedule!(w5, w6, w9, w0, w2);
144    rounds4!(ae, bf, cg, dh, k[22] + w2, k[23] + w3);
145    w4 = schedule!(w6, w7, w0, w1, w3);
146    w5 = schedule!(w7, w8, w1, w2, w4);
147    rounds4!(ae, bf, cg, dh, k[24] + w4, k[25] + w5);
148    w6 = schedule!(w8, w9, w2, w3, w5);
149    w7 = schedule!(w9, w0, w3, w4, w6);
150    rounds4!(ae, bf, cg, dh, k[26] + w6, k[27] + w7);
151    w8 = schedule!(w0, w1, w4, w5, w7);
152    w9 = schedule!(w1, w2, w5, w6, w8);
153    rounds4!(ae, bf, cg, dh, k[28] + w8, k[29] + w9);
154
155    // Rounds 60..80
156    w0 = schedule!(w2, w3, w6, w7, w9);
157    w1 = schedule!(w3, w4, w7, w8, w0);
158    rounds4!(ae, bf, cg, dh, k[30] + w0, k[31] + w1);
159    w2 = schedule!(w4, w5, w8, w9, w1);
160    w3 = schedule!(w5, w6, w9, w0, w2);
161    rounds4!(ae, bf, cg, dh, k[32] + w2, k[33] + w3);
162    w4 = schedule!(w6, w7, w0, w1, w3);
163    w5 = schedule!(w7, w8, w1, w2, w4);
164    rounds4!(ae, bf, cg, dh, k[34] + w4, k[35] + w5);
165    w6 = schedule!(w8, w9, w2, w3, w5);
166    w7 = schedule!(w9, w0, w3, w4, w6);
167    rounds4!(ae, bf, cg, dh, k[36] + w6, k[37] + w7);
168    w8 = schedule!(w0, w1, w4, w5, w7);
169    w9 = schedule!(w1, w2, w5, w6, w8);
170    rounds4!(ae, bf, cg, dh, k[38] + w8, k[39] + w9);
171
172    let u64x2(a, e) = ae;
173    let u64x2(b, f) = bf;
174    let u64x2(c, g) = cg;
175    let u64x2(d, h) = dh;
176
177    state[0] = state[0].wrapping_add(a);
178    state[1] = state[1].wrapping_add(b);
179    state[2] = state[2].wrapping_add(c);
180    state[3] = state[3].wrapping_add(d);
181    state[4] = state[4].wrapping_add(e);
182    state[5] = state[5].wrapping_add(f);
183    state[6] = state[6].wrapping_add(g);
184    state[7] = state[7].wrapping_add(h);
185}
186
187/// Process a block with the SHA-512 algorithm. (See more...)
188///
189/// Internally, this uses functions that resemble the new Intel SHA
190/// instruction set extensions, but since no architecture seems to
191/// have any designs, these may not be the final designs if and/or when
192/// there are instruction set extensions with SHA-512. So to summarize:
193/// SHA-1 and SHA-256 are being implemented in hardware soon (at the time
194/// of this writing), but it doesn't look like SHA-512 will be hardware
195/// accelerated any time soon.
196///
197/// # Implementation
198///
199/// These functions fall into two categories: message schedule calculation, and
200/// the message block 64-round digest calculation. The schedule-related
201/// functions allow 4 rounds to be calculated as:
202///
203/// ```ignore
204/// use std::simd::u64x2;
205/// use self::crypto::sha2::{
206///     sha512msg,
207///     sha512load
208/// };
209///
210/// fn schedule4_data(work: &mut [u64x2], w: &[u64]) {
211///
212///     // this is to illustrate the data order
213///     work[0] = u64x2(w[1], w[0]);
214///     work[1] = u64x2(w[3], w[2]);
215///     work[2] = u64x2(w[5], w[4]);
216///     work[3] = u64x2(w[7], w[6]);
217///     work[4] = u64x2(w[9], w[8]);
218///     work[5] = u64x2(w[11], w[10]);
219///     work[6] = u64x2(w[13], w[12]);
220///     work[7] = u64x2(w[15], w[14]);
221/// }
222///
223/// fn schedule4_work(work: &mut [u64x2], t: usize) {
224///
225///     // this is the core expression
226///     work[t] = sha512msg(work[t - 8],
227///                         work[t - 7],
228///                         sha512load(work[t - 4], work[t - 3]),
229///                         work[t - 1]);
230/// }
231/// ```
232///
233/// instead of 4 rounds of:
234///
235/// ```ignore
236/// fn schedule_work(w: &mut [u64], t: usize) {
237///     w[t] = sigma1!(w[t - 2]) + w[t - 7] + sigma0!(w[t - 15]) + w[t - 16];
238/// }
239/// ```
240///
241/// and the digest-related functions allow 4 rounds to be calculated as:
242///
243/// ```ignore
244/// use std::simd::u64x2;
245/// use self::crypto::sha2::{K64X2, sha512rnd};
246///
247/// fn rounds4(state: &mut [u64; 8], work: &mut [u64x2], t: usize) {
248///     let [a, b, c, d, e, f, g, h]: [u64; 8] = *state;
249///
250///     // this is to illustrate the data order
251///     let mut ae = u64x2(a, e);
252///     let mut bf = u64x2(b, f);
253///     let mut cg = u64x2(c, g);
254///     let mut dh = u64x2(d, h);
255///     let u64x2(w1, w0) = K64X2[2*t]     + work[2*t];
256///     let u64x2(w3, w2) = K64X2[2*t + 1] + work[2*t + 1];
257///
258///     // this is the core expression
259///     dh = sha512rnd(ae, bf, cg, dh, w0);
260///     cg = sha512rnd(dh, ae, bf, cg, w1);
261///     bf = sha512rnd(cg, dh, ae, bf, w2);
262///     ae = sha512rnd(bf, cg, dh, ae, w3);
263///
264///     *state = [ae.0, bf.0, cg.0, dh.0,
265///               ae.1, bf.1, cg.1, dh.1];
266/// }
267/// ```
268///
269/// instead of 4 rounds of:
270///
271/// ```ignore
272/// fn round(state: &mut [u64; 8], w: &mut [u64], t: usize) {
273///     let [a, b, c, mut d, e, f, g, mut h]: [u64; 8] = *state;
274///
275///     h += big_sigma1!(e) +   choose!(e, f, g) + K64[t] + w[t]; d += h;
276///     h += big_sigma0!(a) + majority!(a, b, c);
277///
278///     *state = [h, a, b, c, d, e, f, g];
279/// }
280/// ```
281///
282pub fn compress512(state: &mut [u64; 8], block: &[u8; 128]) {
283    let mut block_u64 = [0u64; BLOCK_LEN];
284    BE::read_u64_into(block, &mut block_u64[..]);
285    sha512_digest_block_u64(state, &block_u64);
286}