Skip to main content

cryptography/hash/
sha3.rs

1//! SHA-3 (Keccak-f[1600]) from FIPS 202.
2//!
3//! This module implements the fixed-output SHA-3 family:
4//!
5//! - `Sha3_224`
6//! - `Sha3_256`
7//! - `Sha3_384`
8//! - `Sha3_512`
9//!
10//! The core is the Keccak sponge over the 1600-bit permutation with the SHA-3
11//! domain-separation suffix `0x06`.
12//!
13//! `Shake128` and `Shake256` are built on the same permutation, but use the
14//! SHAKE domain suffix (`0x1f`) and expose the sponge's natural
15//! absorb-then-squeeze interface through the `Xof` trait.
16
17use super::{Digest, Xof};
18
19// Keccak-f[1600] rho-step rotation offsets for lanes A[x,y] (FIPS 202,
20// Keccak-p permutation definition).
21const RHO: [u32; 25] = [
22    0, 1, 62, 28, 27, 36, 44, 6, 55, 20, 3, 10, 43, 25, 39, 41, 45, 15, 21, 8, 18, 2, 61, 56, 14,
23];
24
25// Keccak-f[1600] pi-step lane permutation, flattened as index x + 5*y where
26// (x, y) -> (y, (2x + 3y) mod 5).
27const PI: [usize; 25] = [
28    0, 10, 20, 5, 15, 16, 1, 11, 21, 6, 7, 17, 2, 12, 22, 23, 8, 18, 3, 13, 14, 24, 9, 19, 4,
29];
30
31// Keccak-f[1600] iota-step round constants for rounds 0..23.
32const RC: [u64; 24] = [
33    0x0000_0000_0000_0001,
34    0x0000_0000_0000_8082,
35    0x8000_0000_0000_808A,
36    0x8000_0000_8000_8000,
37    0x0000_0000_0000_808B,
38    0x0000_0000_8000_0001,
39    0x8000_0000_8000_8081,
40    0x8000_0000_0000_8009,
41    0x0000_0000_0000_008A,
42    0x0000_0000_0000_0088,
43    0x0000_0000_8000_8009,
44    0x0000_0000_8000_000A,
45    0x0000_0000_8000_808B,
46    0x8000_0000_0000_008B,
47    0x8000_0000_0000_8089,
48    0x8000_0000_0000_8003,
49    0x8000_0000_0000_8002,
50    0x8000_0000_0000_0080,
51    0x0000_0000_0000_800A,
52    0x8000_0000_8000_000A,
53    0x8000_0000_8000_8081,
54    0x8000_0000_0000_8080,
55    0x0000_0000_8000_0001,
56    0x8000_0000_8000_8008,
57];
58
59// Runtime-dispatching entry point: hardware on aarch64 + FEAT_SHA3, else soft.
60#[inline]
61fn keccak_f1600(state: &mut [u64; 25]) {
62    #[cfg(target_arch = "aarch64")]
63    {
64        if std::arch::is_aarch64_feature_detected!("sha3") {
65            // SAFETY: feature detection confirms FEAT_SHA3 is present.
66            unsafe { return keccak_f1600_sha3(state); }
67        }
68    }
69    keccak_f1600_soft(state);
70}
71
72// Pure-Rust fallback — 24 rounds of theta → rho → pi → chi → iota.
73fn keccak_f1600_soft(state: &mut [u64; 25]) {
74    for &rc in &RC {
75        // theta: parity of each column.
76        let mut c = [0u64; 5];
77        for x in 0..5 {
78            c[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ state[x + 20];
79        }
80
81        // theta: mix neighboring column parities.
82        let mut d = [0u64; 5];
83        for x in 0..5 {
84            d[x] = c[(x + 4) % 5] ^ c[(x + 1) % 5].rotate_left(1);
85        }
86
87        // theta: xor D[x] into each lane of column x.
88        for y in 0..5 {
89            for x in 0..5 {
90                state[x + 5 * y] ^= d[x];
91            }
92        }
93
94        // rho + pi: per-lane rotate, then permute lane positions.
95        let mut b = [0u64; 25];
96        for i in 0..25 {
97            b[PI[i]] = state[i].rotate_left(RHO[i]);
98        }
99
100        // chi: nonlinear row step.
101        for y in 0..5 {
102            let row = 5 * y;
103            for x in 0..5 {
104                state[row + x] = b[row + x] ^ ((!b[row + ((x + 1) % 5)]) & b[row + ((x + 2) % 5)]);
105            }
106        }
107
108        // iota: inject round constant.
109        state[0] ^= rc;
110    }
111}
112
113// FEAT_SHA3 hardware path: uses EOR3 (3-way XOR), RAX1 (rotate-and-XOR),
114// and BCAX (bit-clear-and-XOR) intrinsics.  Theta column parities and D
115// vectors are computed with EOR3/RAX1; chi replaces the scalar NOT+AND+XOR
116// triple with a single BCAX per lane pair.  Rho+Pi remain scalar (each of
117// the 24 non-zero lanes has a distinct rotation, so XAR offers no advantage
118// when lanes are processed sequentially).
119#[cfg(target_arch = "aarch64")]
120#[target_feature(enable = "sha3")]
121unsafe fn keccak_f1600_sha3(state: &mut [u64; 25]) {
122    use core::arch::aarch64::*;
123
124    // Pack two scalars into a 128-bit SIMD register [a, b].
125    #[inline(always)]
126    unsafe fn u64x2(a: u64, b: u64) -> uint64x2_t {
127        vcombine_u64(vdup_n_u64(a), vdup_n_u64(b))
128    }
129
130    for &rc in &RC {
131        // === Theta ===
132        // Column parities: c[x] = XOR of 5 lanes in column x.
133        // EOR3(a,b,c) = a^b^c; chaining two EOR3s gives a 5-way XOR.
134
135        // c[0] and c[1] in one SIMD register.
136        let c01 = {
137            let t = veor3q_u64(
138                u64x2(state[0],  state[1]),
139                u64x2(state[5],  state[6]),
140                u64x2(state[10], state[11]),
141            );
142            veor3q_u64(t, u64x2(state[15], state[16]), u64x2(state[20], state[21]))
143        };
144        // c[2] and c[3].
145        let c23 = {
146            let t = veor3q_u64(
147                u64x2(state[2],  state[3]),
148                u64x2(state[7],  state[8]),
149                u64x2(state[12], state[13]),
150            );
151            veor3q_u64(t, u64x2(state[17], state[18]), u64x2(state[22], state[23]))
152        };
153        // c[4] is the odd column; compute it scalar.
154        let c4 = state[4] ^ state[9] ^ state[14] ^ state[19] ^ state[24];
155
156        let c0 = vgetq_lane_u64::<0>(c01);
157        let c1 = vgetq_lane_u64::<1>(c01);
158        let c2 = vgetq_lane_u64::<0>(c23);
159        let c3 = vgetq_lane_u64::<1>(c23);
160
161        // D[x] = C[(x+4)%5] ^ rotate_left(C[(x+1)%5], 1).
162        // vrax1q_u64(a, b) = a ^ rotate_left(b, 1), elementwise.
163        //   D[0] = c4 ^ rotl(c1, 1)    D[1] = c0 ^ rotl(c2, 1)
164        //   D[2] = c1 ^ rotl(c3, 1)    D[3] = c2 ^ rotl(c4, 1)
165        let d01 = vrax1q_u64(u64x2(c4, c0), u64x2(c1, c2));
166        let d23 = vrax1q_u64(u64x2(c1, c2), u64x2(c3, c4));
167        let d4  = c3 ^ c0.rotate_left(1);   // scalar
168
169        let d = [
170            vgetq_lane_u64::<0>(d01),
171            vgetq_lane_u64::<1>(d01),
172            vgetq_lane_u64::<0>(d23),
173            vgetq_lane_u64::<1>(d23),
174            d4,
175        ];
176
177        // Apply D[x] to every lane in column x.
178        for y in 0..5 {
179            for x in 0..5 {
180                state[x + 5 * y] ^= d[x];
181            }
182        }
183
184        // === Rho + Pi (scalar) ===
185        // Each of the 24 non-zero-rotation lanes has a unique RHO value, so
186        // XAR (which applies the same rotation to both elements of a pair)
187        // gives no advantage.  Keep the existing scalar loop.
188        let mut b = [0u64; 25];
189        for i in 0..25 {
190            b[PI[i]] = state[i].rotate_left(RHO[i]);
191        }
192
193        // === Chi using BCAX ===
194        // chi[x] = b[x] ^ (!b[(x+1)%5] & b[(x+2)%5])
195        //        = BCAX(b[x], b[(x+2)%5], b[(x+1)%5])
196        // vbcaxq_u64(a, b, c) = a ^ (b & !c), elementwise.
197        // Process each row of 5 lanes as two SIMD pairs + one scalar.
198        for y in 0..5 {
199            let r = y * 5;
200            // x=0: BCAX(b[0], b[2], b[1])    x=1: BCAX(b[1], b[3], b[2])
201            let chi01 = vbcaxq_u64(
202                u64x2(b[r],   b[r+1]),
203                u64x2(b[r+2], b[r+3]),
204                u64x2(b[r+1], b[r+2]),
205            );
206            // x=2: BCAX(b[2], b[4], b[3])    x=3: BCAX(b[3], b[0], b[4])
207            let chi23 = vbcaxq_u64(
208                u64x2(b[r+2], b[r+3]),
209                u64x2(b[r+4], b[r  ]),
210                u64x2(b[r+3], b[r+4]),
211            );
212            state[r  ] = vgetq_lane_u64::<0>(chi01);
213            state[r+1] = vgetq_lane_u64::<1>(chi01);
214            state[r+2] = vgetq_lane_u64::<0>(chi23);
215            state[r+3] = vgetq_lane_u64::<1>(chi23);
216            // x=4: BCAX(b[4], b[1], b[0]) = b[4] ^ (b[1] & !b[0])
217            state[r+4] = b[r+4] ^ (b[r+1] & !b[r]);
218        }
219
220        // === Iota ===
221        state[0] ^= rc;
222    }
223}
224
225#[inline]
226fn absorb_block<const RATE: usize>(state: &mut [u64; 25], block: &[u8; RATE]) {
227    debug_assert_eq!(RATE % 8, 0, "Keccak rate must be lane-aligned");
228    let lanes = RATE / 8;
229    let mut i = 0usize;
230    while i < lanes {
231        let lane = u64::from_le_bytes(block[i * 8..i * 8 + 8].try_into().unwrap());
232        state[i] ^= lane;
233        i += 1;
234    }
235    keccak_f1600(state);
236}
237
238#[derive(Clone)]
239struct Keccak<const RATE: usize> {
240    state: [u64; 25],
241    block: [u8; RATE],
242    pos: usize,
243}
244
245#[derive(Clone)]
246struct KeccakSponge<const RATE: usize> {
247    state: [u64; 25],
248    block: [u8; RATE],
249    offset: usize,
250}
251
252#[derive(Clone)]
253enum XofState<const RATE: usize> {
254    Absorbing(Keccak<RATE>),
255    Squeezing(KeccakSponge<RATE>),
256}
257
258impl<const RATE: usize> XofState<RATE> {
259    fn zeroize(&mut self) {
260        match self {
261            XofState::Absorbing(inner) => {
262                crate::ct::zeroize_slice(inner.state.as_mut_slice());
263                crate::ct::zeroize_slice(inner.block.as_mut_slice());
264                inner.pos = 0;
265            }
266            XofState::Squeezing(sponge) => {
267                crate::ct::zeroize_slice(sponge.state.as_mut_slice());
268                crate::ct::zeroize_slice(sponge.block.as_mut_slice());
269                sponge.offset = 0;
270            }
271        }
272    }
273}
274
275#[inline]
276fn state_to_rate_bytes<const RATE: usize>(state: &[u64; 25]) -> [u8; RATE] {
277    let mut rate_bytes = [0u8; RATE];
278    let lanes = RATE / 8;
279    let mut i = 0usize;
280    while i < lanes {
281        rate_bytes[i * 8..i * 8 + 8].copy_from_slice(&state[i].to_le_bytes());
282        i += 1;
283    }
284    rate_bytes
285}
286
287impl<const RATE: usize> Keccak<RATE> {
288    fn new() -> Self {
289        Self {
290            state: [0u64; 25],
291            block: [0u8; RATE],
292            pos: 0,
293        }
294    }
295
296    fn update(&mut self, mut data: &[u8]) {
297        while !data.is_empty() {
298            let take = (RATE - self.pos).min(data.len());
299            self.block[self.pos..self.pos + take].copy_from_slice(&data[..take]);
300            self.pos += take;
301            data = &data[take..];
302
303            if self.pos == RATE {
304                absorb_block(&mut self.state, &self.block);
305                self.block = [0u8; RATE];
306                self.pos = 0;
307            }
308        }
309    }
310
311    fn finalize_sponge(mut self, suffix: u8) -> KeccakSponge<RATE> {
312        self.block[self.pos] ^= suffix;
313        self.block[RATE - 1] ^= 0x80;
314        absorb_block(&mut self.state, &self.block);
315        KeccakSponge {
316            block: state_to_rate_bytes(&self.state),
317            state: self.state,
318            offset: 0,
319        }
320    }
321
322    fn finalize<const OUT: usize>(self) -> [u8; OUT] {
323        let mut sponge = self.finalize_sponge(0x06);
324        let mut out = [0u8; OUT];
325        sponge.squeeze(&mut out);
326        out
327    }
328
329    fn finalize_into_reset<const OUT: usize>(&mut self, suffix: u8, out: &mut [u8; OUT]) {
330        self.block[self.pos] ^= suffix;
331        self.block[RATE - 1] ^= 0x80;
332        absorb_block(&mut self.state, &self.block);
333
334        let mut sponge: KeccakSponge<RATE> = KeccakSponge {
335            block: state_to_rate_bytes(&self.state),
336            state: self.state,
337            offset: 0,
338        };
339        sponge.squeeze(out);
340
341        crate::ct::zeroize_slice(sponge.state.as_mut_slice());
342        crate::ct::zeroize_slice(sponge.block.as_mut_slice());
343
344        crate::ct::zeroize_slice(self.state.as_mut_slice());
345        crate::ct::zeroize_slice(self.block.as_mut_slice());
346        self.pos = 0;
347    }
348}
349
350impl<const RATE: usize> KeccakSponge<RATE> {
351    fn squeeze(&mut self, out: &mut [u8]) {
352        let mut produced = 0usize;
353        while produced < out.len() {
354            if self.offset == RATE {
355                keccak_f1600(&mut self.state);
356                self.block = state_to_rate_bytes(&self.state);
357                self.offset = 0;
358            }
359
360            let take = (out.len() - produced).min(RATE - self.offset);
361            out[produced..produced + take]
362                .copy_from_slice(&self.block[self.offset..self.offset + take]);
363            produced += take;
364            self.offset += take;
365        }
366    }
367}
368
369macro_rules! define_sha3 {
370    ($name:ident, $rate:expr, $out_len:expr) => {
371        #[derive(Clone)]
372        pub struct $name {
373            inner: Keccak<$rate>,
374        }
375
376        impl Default for $name {
377            fn default() -> Self {
378                Self::new()
379            }
380        }
381
382        impl $name {
383            pub const BLOCK_LEN: usize = $rate;
384            pub const OUTPUT_LEN: usize = $out_len;
385
386            #[must_use]
387            pub fn new() -> Self {
388                Self {
389                    inner: Keccak::new(),
390                }
391            }
392
393            pub fn update(&mut self, data: &[u8]) {
394                self.inner.update(data);
395            }
396
397            #[must_use]
398            pub fn finalize(self) -> [u8; $out_len] {
399                self.inner.finalize()
400            }
401
402            #[must_use]
403            pub fn digest(data: &[u8]) -> [u8; $out_len] {
404                let mut h = Self::new();
405                h.update(data);
406                h.finalize()
407            }
408        }
409
410        impl Digest for $name {
411            const BLOCK_LEN: usize = $rate;
412            const OUTPUT_LEN: usize = $out_len;
413
414            fn new() -> Self {
415                $name {
416                    inner: Keccak::new(),
417                }
418            }
419
420            fn update(&mut self, data: &[u8]) {
421                self.inner.update(data);
422            }
423
424            fn finalize_into(self, out: &mut [u8]) {
425                assert_eq!(out.len(), $out_len, "wrong digest length");
426                let digest = self.inner.finalize::<$out_len>();
427                out.copy_from_slice(&digest);
428            }
429
430            fn finalize_reset(&mut self, out: &mut [u8]) {
431                let out: &mut [u8; $out_len] = out.try_into().expect("wrong digest length");
432                self.inner.finalize_into_reset::<$out_len>(0x06, out);
433            }
434
435            fn zeroize(&mut self) {
436                crate::ct::zeroize_slice(self.inner.state.as_mut_slice());
437                crate::ct::zeroize_slice(self.inner.block.as_mut_slice());
438                self.inner.pos = 0;
439            }
440        }
441    };
442}
443
444define_sha3!(Sha3_224, 144, 28);
445define_sha3!(Sha3_256, 136, 32);
446define_sha3!(Sha3_384, 104, 48);
447define_sha3!(Sha3_512, 72, 64);
448
449macro_rules! define_shake {
450    ($name:ident, $rate:expr) => {
451        #[derive(Clone)]
452        pub struct $name {
453            inner: XofState<$rate>,
454        }
455
456        impl Default for $name {
457            fn default() -> Self {
458                Self::new()
459            }
460        }
461
462        impl $name {
463            pub const BLOCK_LEN: usize = $rate;
464
465            #[must_use]
466            pub fn new() -> Self {
467                Self {
468                    inner: XofState::Absorbing(Keccak::new()),
469                }
470            }
471
472            pub fn digest(data: &[u8], out: &mut [u8]) {
473                let mut xof = Self::new();
474                xof.update(data);
475                xof.squeeze(out);
476            }
477        }
478
479        impl Xof for $name {
480            fn update(&mut self, data: &[u8]) {
481                match &mut self.inner {
482                    XofState::Absorbing(inner) => inner.update(data),
483                    XofState::Squeezing(_) => panic!("cannot absorb after squeezing"),
484                }
485            }
486
487            fn squeeze(&mut self, out: &mut [u8]) {
488                if let XofState::Absorbing(_) = self.inner {
489                    let prev =
490                        core::mem::replace(&mut self.inner, XofState::Absorbing(Keccak::new()));
491                    let sponge = match prev {
492                        XofState::Absorbing(inner) => inner.finalize_sponge(0x1f),
493                        XofState::Squeezing(sponge) => sponge,
494                    };
495                    self.inner = XofState::Squeezing(sponge);
496                }
497
498                match &mut self.inner {
499                    XofState::Absorbing(_) => unreachable!(),
500                    XofState::Squeezing(sponge) => sponge.squeeze(out),
501                }
502            }
503        }
504
505        impl Drop for $name {
506            fn drop(&mut self) {
507                self.inner.zeroize();
508            }
509        }
510    };
511}
512
513define_shake!(Shake128, 168);
514define_shake!(Shake256, 136);
515
516#[cfg(test)]
517mod tests {
518    use super::*;
519
520    fn hex(bytes: &[u8]) -> String {
521        let mut out = String::with_capacity(bytes.len() * 2);
522        for b in bytes {
523            use core::fmt::Write;
524            let _ = write!(&mut out, "{b:02x}");
525        }
526        out
527    }
528
529    #[test]
530    fn sha3_224_empty() {
531        assert_eq!(
532            hex(&Sha3_224::digest(b"")),
533            "6b4e03423667dbb73b6e15454f0eb1ab".to_owned() + "d4597f9a1b078e3f5b5a6bc7"
534        );
535    }
536
537    #[test]
538    fn sha3_256_empty() {
539        assert_eq!(
540            hex(&Sha3_256::digest(b"")),
541            "a7ffc6f8bf1ed76651c14756a061d662".to_owned() + "f580ff4de43b49fa82d80a4b80f8434a"
542        );
543    }
544
545    #[test]
546    fn sha3_256_abc_streaming() {
547        let mut h = Sha3_256::new();
548        h.update(b"a");
549        h.update(b"b");
550        h.update(b"c");
551        assert_eq!(
552            hex(&h.finalize()),
553            "3a985da74fe225b2045c172d6bd390bd".to_owned() + "855f086e3e9d525b46bfe24511431532"
554        );
555    }
556
557    #[test]
558    fn sha3_384_empty() {
559        assert_eq!(
560            hex(&Sha3_384::digest(b"")),
561            "0c63a75b845e4f7d01107d852e4c2485".to_owned()
562                + "c51a50aaaa94fc61995e71bbee983a2a"
563                + "c3713831264adb47fb6bd1e058d5f004"
564        );
565    }
566
567    #[test]
568    fn sha3_512_empty() {
569        assert_eq!(
570            hex(&Sha3_512::digest(b"")),
571            "a69f73cca23a9ac5c8b567dc185a756e".to_owned()
572                + "97c982164fe25859e0d1dcc1475c80a6"
573                + "15b2123af1f5f94c11e3e9402c3ac558"
574                + "f500199d95b6d3e301758586281dcd26"
575        );
576    }
577
578    #[test]
579    fn shake128_empty_32() {
580        let mut out = [0u8; 32];
581        Shake128::digest(b"", &mut out);
582        assert_eq!(
583            hex(&out),
584            "7f9c2ba4e88f827d616045507605853e".to_owned() + "d73b8093f6efbc88eb1a6eacfa66ef26"
585        );
586    }
587
588    #[test]
589    fn shake128_abc_streaming_32() {
590        let mut xof = Shake128::new();
591        xof.update(b"a");
592        xof.update(b"b");
593        xof.update(b"c");
594        let mut out = [0u8; 32];
595        xof.squeeze(&mut out);
596        assert_eq!(
597            hex(&out),
598            "5881092dd818bf5cf8a3ddb793fbcba7".to_owned() + "4097d5c526a6d35f97b83351940f2cc8"
599        );
600    }
601
602    #[test]
603    fn shake128_chunked_squeeze_matches_one_shot() {
604        let mut one_shot = Shake128::new();
605        one_shot.update(b"abc");
606        let mut full = [0u8; 64];
607        one_shot.squeeze(&mut full);
608
609        let mut chunked = Shake128::new();
610        chunked.update(b"abc");
611        let mut left = [0u8; 32];
612        let mut right = [0u8; 32];
613        chunked.squeeze(&mut left);
614        chunked.squeeze(&mut right);
615
616        assert_eq!([left.as_slice(), right.as_slice()].concat(), full);
617    }
618
619    #[test]
620    fn shake256_empty_64() {
621        let mut out = [0u8; 64];
622        Shake256::digest(b"", &mut out);
623        assert_eq!(
624            hex(&out),
625            "46b9dd2b0ba88d13233b3feb743eeb24".to_owned()
626                + "3fcd52ea62b81b82b50c27646ed5762f"
627                + "d75dc4ddd8c0f200cb05019d67b592f6"
628                + "fc821c49479ab48640292eacb3b7c4be"
629        );
630    }
631
632    #[test]
633    fn sha3_224_matches_openssl() {
634        let msg = b"The quick brown fox jumps over the lazy dog";
635        let Some(expected) = crate::test_utils::run_openssl(&["dgst", "-sha3-224", "-binary"], msg) else {
636            return;
637        };
638        assert_eq!(Sha3_224::digest(msg).as_slice(), expected.as_slice());
639    }
640
641    #[test]
642    fn sha3_256_matches_openssl() {
643        let msg = b"The quick brown fox jumps over the lazy dog";
644        let Some(expected) = crate::test_utils::run_openssl(&["dgst", "-sha3-256", "-binary"], msg) else {
645            return;
646        };
647        assert_eq!(Sha3_256::digest(msg).as_slice(), expected.as_slice());
648    }
649
650    #[test]
651    fn sha3_384_matches_openssl() {
652        let msg = b"The quick brown fox jumps over the lazy dog";
653        let Some(expected) = crate::test_utils::run_openssl(&["dgst", "-sha3-384", "-binary"], msg) else {
654            return;
655        };
656        assert_eq!(Sha3_384::digest(msg).as_slice(), expected.as_slice());
657    }
658
659    #[test]
660    fn sha3_512_matches_openssl() {
661        let msg = b"The quick brown fox jumps over the lazy dog";
662        let Some(expected) = crate::test_utils::run_openssl(&["dgst", "-sha3-512", "-binary"], msg) else {
663            return;
664        };
665        assert_eq!(Sha3_512::digest(msg).as_slice(), expected.as_slice());
666    }
667}