vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
//! SHA3-256 and SHA3-512 CPU reference implementations.

/// XOR one rate-sized little-endian block into the Keccak state.
pub fn absorb_block(state: &mut [u64; 25], block: &[u8]) {
    for (lane, bytes) in state.iter_mut().zip(block.chunks_exact(8)) {
        *lane ^= u64::from_le_bytes([
            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
        ]);
    }
}

/// Apply the Keccak-f[1600] permutation in place.
pub fn keccak_f(a: &mut [u64; 25]) {
    for &rc in &RC {
        let mut c = [0u64; 5];
        for x in 0..5 {
            c[x] = a[x] ^ a[x + 5] ^ a[x + 10] ^ a[x + 15] ^ a[x + 20];
        }
        for x in 0..5 {
            let d = c[(x + 4) % 5] ^ c[(x + 1) % 5].rotate_left(1);
            for y in 0..5 {
                a[x + 5 * y] ^= d;
            }
        }

        let mut b = [0u64; 25];
        for x in 0..5 {
            for y in 0..5 {
                b[y + 5 * ((2 * x + 3 * y) % 5)] = a[x + 5 * y].rotate_left(RHO[x][y]);
            }
        }

        for x in 0..5 {
            for y in 0..5 {
                a[x + 5 * y] =
                    b[x + 5 * y] ^ ((!b[((x + 1) % 5) + 5 * y]) & b[((x + 2) % 5) + 5 * y]);
            }
        }
        a[0] ^= rc;
    }
}

/// Round constants for Keccak-f[1600].
pub const RC: [u64; 24] = [
    0x0000000000000001,
    0x0000000000008082,
    0x800000000000808a,
    0x8000000080008000,
    0x000000000000808b,
    0x0000000080000001,
    0x8000000080008081,
    0x8000000000008009,
    0x000000000000008a,
    0x0000000000000088,
    0x0000000080008009,
    0x000000008000000a,
    0x000000008000808b,
    0x800000000000008b,
    0x8000000000008089,
    0x8000000000008003,
    0x8000000000008002,
    0x8000000000000080,
    0x000000000000800a,
    0x800000008000000a,
    0x8000000080008081,
    0x8000000000008080,
    0x0000000080000001,
    0x8000000080008008,
];

/// Rho rotation offsets for Keccak-f[1600].
pub const RHO: [[u32; 5]; 5] = [
    [0, 36, 3, 41, 18],
    [1, 44, 10, 45, 2],
    [62, 6, 43, 15, 61],
    [28, 55, 25, 21, 56],
    [27, 20, 39, 8, 14],
];

// sha3_256_words.rs
/// Compute SHA3-256 and return eight big-endian digest words.
#[must_use]
pub(crate) fn sha3_256_words(input: &[u8]) -> [u32; 8] {
    let bytes = sponge(input, 136, 32);
    let mut out = [0u32; 8];
    for (slot, chunk) in out.iter_mut().zip(bytes.chunks_exact(4)) {
        *slot = u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
    }
    out
}

// sha3_512_words.rs
/// Compute SHA3-512 and return eight big-endian digest words.
#[must_use]
pub(crate) fn sha3_512_words(input: &[u8]) -> [u64; 8] {
    let bytes = sponge(input, 72, 64);
    let mut out = [0u64; 8];
    for (slot, chunk) in out.iter_mut().zip(bytes.chunks_exact(8)) {
        *slot = u64::from_be_bytes([
            chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6], chunk[7],
        ]);
    }
    out
}

/// Run the SHA3 sponge with a caller-selected rate and output length.
pub fn sponge(input: &[u8], rate: usize, out_len: usize) -> Vec<u8> {
    let mut state = [0u64; 25];
    let mut chunks = input.chunks_exact(rate);
    for block in &mut chunks {
        absorb_block(&mut state, block);
        keccak_f(&mut state);
    }

    let rem = chunks.remainder();
    let mut final_block = vec![0u8; rate];
    final_block[..rem.len()].copy_from_slice(rem);
    final_block[rem.len()] ^= 0x06;
    final_block[rate - 1] ^= 0x80;
    absorb_block(&mut state, &final_block);
    keccak_f(&mut state);

    let mut out = Vec::with_capacity(out_len);
    while out.len() < out_len {
        for lane in state.iter().take(rate / 8) {
            out.extend_from_slice(&lane.to_le_bytes());
            if out.len() >= out_len {
                out.truncate(out_len);
                return out;
            }
        }
        keccak_f(&mut state);
    }
    out
}