use core::hint::black_box;
use zeroize::Zeroize;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
pub const STATE_WORDS: usize = 25;
pub const STATE_BYTES: usize = STATE_WORDS * 8;
pub const ROUNDS: usize = 32;
pub const KDF_SQUEEZE_ROUNDS: usize = 20;
pub const RATE_WORDS: usize = 19;
pub const RATE_BYTES: usize = RATE_WORDS * 8;
pub const CAPACITY_WORDS: usize = STATE_WORDS - RATE_WORDS;
pub(crate) const DEFAULT_ROTATIONS: [[u32; 2]; 15] = [
[7, 41],
[13, 29],
[19, 37],
[23, 43],
[3, 53],
[11, 47],
[17, 39],
[5, 59],
[31, 49],
[9, 51],
[15, 33],
[21, 45],
[27, 35],
[1, 57],
[25, 55],
];
const DOMAIN_HASH: u8 = 0x01;
const DOMAIN_KDF: u8 = 0x02;
const DOMAIN_MAC: u8 = 0x03;
pub(crate) const KK_IV: [u64; STATE_WORDS] = [
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179, 0xCBBB9D5DC1059ED8, 0x629A292A367CD507, 0x9159015A3070DD17, 0x152FECD8F70E5939, 0x67332667FFC00B31, 0x8EB44A8768581511, 0xDB0C2E0D64F98FA7, 0x47B5481DBEFA4FA4, 0xAE5F9156E7B6D99B, 0xCF6C85D39D1A1E15, 0x2F73477D6A4563CA, 0x6D1826CAFD82E1ED, 0x8B43D4570A51B936, 0xE360B596DC380C3F, 0x1C456002CE13E9F8, 0x6F19633143A0AF0E, 0xD94EBEB1AB313933, ];
pub type KkState = [u64; STATE_WORDS];
const DIAGS: [[usize; 5]; 5] = [
[24, 0, 6, 12, 18],
[20, 1, 7, 13, 19],
[21, 2, 8, 14, 15],
[22, 3, 9, 10, 16],
[23, 4, 5, 11, 17],
];
#[inline(always)]
fn mfr(a: u64, b: u64, rot: u32) -> u64 {
let product = a.wrapping_mul(b | 1);
let folded = product ^ (product >> 32) ^ b;
folded.rotate_left(rot)
}
const DDR_MIX: u64 = 0xB5C0FBCFEC4D3B2F;
#[inline(always)]
fn ddr(a: u64, b: u64) -> u64 {
let s = (b.wrapping_mul(DDR_MIX)) >> 58;
let mut v = a;
let m = 0u64.wrapping_sub(s & 1);
v = (v & !m) | (v.rotate_left(1) & m);
let m = 0u64.wrapping_sub((s >> 1) & 1);
v = (v & !m) | (v.rotate_left(2) & m);
let m = 0u64.wrapping_sub((s >> 2) & 1);
v = (v & !m) | (v.rotate_left(4) & m);
let m = 0u64.wrapping_sub((s >> 3) & 1);
v = (v & !m) | (v.rotate_left(8) & m);
let m = 0u64.wrapping_sub((s >> 4) & 1);
v = (v & !m) | (v.rotate_left(16) & m);
let m = 0u64.wrapping_sub((s >> 5) & 1);
v = (v & !m) | (v.rotate_left(32) & m);
v
}
#[inline(always)]
fn quintet_round(a: &mut u64, b: &mut u64, c: &mut u64, d: &mut u64, e: &mut u64, rot: [u32; 2]) {
*a = mfr(*a, *b, rot[0]);
*c ^= *a;
*d = ddr(*d, *c);
*e = mfr(*e, *d, rot[1]);
*b ^= *e;
}
pub fn kk_permute(state: &mut KkState) {
kk_permute_with_schedule(state, &DEFAULT_ROTATIONS);
}
pub(crate) fn kk_permute_n(state: &mut KkState, rotations: &[[u32; 2]; 15], rounds: usize) {
for round in 0..rounds as u64 {
for (row, rot) in rotations.iter().enumerate().take(5) {
let base = row * 5;
let (mut s0, mut s1, mut s2, mut s3, mut s4) = (
state[base],
state[base + 1],
state[base + 2],
state[base + 3],
state[base + 4],
);
quintet_round(&mut s0, &mut s1, &mut s2, &mut s3, &mut s4, *rot);
state[base] = s0;
state[base + 1] = s1;
state[base + 2] = s2;
state[base + 3] = s3;
state[base + 4] = s4;
}
for col in 0..5usize {
let (mut s0, mut s1, mut s2, mut s3, mut s4) = (
state[col],
state[col + 5],
state[col + 10],
state[col + 15],
state[col + 20],
);
quintet_round(
&mut s0,
&mut s1,
&mut s2,
&mut s3,
&mut s4,
rotations[5 + col],
);
state[col] = s0;
state[col + 5] = s1;
state[col + 10] = s2;
state[col + 15] = s3;
state[col + 20] = s4;
}
for d in 0..5usize {
let [i0, i1, i2, i3, i4] = DIAGS[d];
let (mut s0, mut s1, mut s2, mut s3, mut s4) =
(state[i0], state[i1], state[i2], state[i3], state[i4]);
quintet_round(
&mut s0,
&mut s1,
&mut s2,
&mut s3,
&mut s4,
rotations[10 + d],
);
state[i0] = s0;
state[i1] = s1;
state[i2] = s2;
state[i3] = s3;
state[i4] = s4;
}
state[0] = state[0].wrapping_add(round);
state[4] = state[4].wrapping_add(round.wrapping_mul(0x9E3779B97F4A7C15));
state[12] = state[12].wrapping_add(round.wrapping_mul(0xB7E151628AED2A6A));
state[20] = state[20].wrapping_add(round.wrapping_mul(0x243F6A8885A2F7A4));
state[24] = state[24].wrapping_add(round.wrapping_mul(0x298B075B4B6A5240));
if round % 8 == 7 {
for i in 0..RATE_WORDS {
state[i] ^= state[RATE_WORDS + (i % CAPACITY_WORDS)].rotate_left(round as u32);
}
}
}
}
pub fn kk_permute_with_schedule(state: &mut KkState, rotations: &[[u32; 2]; 15]) {
kk_permute_n(state, rotations, ROUNDS);
}
pub fn rotations_from_entropy(entropy: &[u8]) -> [[u32; 2]; 15] {
let mut rots = DEFAULT_ROTATIONS;
for (i, rot) in rots.iter_mut().enumerate() {
for (j, r) in rot.iter_mut().enumerate() {
let idx = i * 2 + j;
if idx < entropy.len() {
*r = (entropy[idx] as u32 & 63) | 1;
}
}
}
rots
}
pub struct KkSponge {
state: KkState,
rotations: [[u32; 2]; 15],
buf_pos: usize,
}
impl Clone for KkSponge {
fn clone(&self) -> Self {
Self {
state: self.state,
rotations: self.rotations,
buf_pos: self.buf_pos,
}
}
}
impl Drop for KkSponge {
fn drop(&mut self) {
self.state.zeroize();
}
}
impl Default for KkSponge {
fn default() -> Self {
Self::new()
}
}
impl KkSponge {
pub fn new() -> Self {
Self {
state: KK_IV,
rotations: DEFAULT_ROTATIONS,
buf_pos: 0,
}
}
pub fn with_entropy_rotations(entropy: &[u8]) -> Self {
Self {
state: KK_IV,
rotations: rotations_from_entropy(entropy),
buf_pos: 0,
}
}
#[cfg(any(feature = "gpu", feature = "cuda"))]
pub fn state(&self) -> KkState {
self.state
}
#[cfg(any(feature = "gpu", feature = "cuda"))]
pub fn rotations(&self) -> [[u32; 2]; 15] {
self.rotations
}
#[cfg(any(feature = "gpu", feature = "cuda"))]
pub fn finalize_absorb_kdf(&mut self) {
self.finalize_absorb(DOMAIN_KDF);
}
fn permute(&mut self) {
kk_permute_with_schedule(&mut self.state, &self.rotations);
}
fn rate_bytes(&self) -> [u8; RATE_BYTES] {
let mut out = [0u8; RATE_BYTES];
for i in 0..RATE_WORDS {
out[i * 8..(i + 1) * 8].copy_from_slice(&self.state[i].to_le_bytes());
}
out
}
fn xor_rate_byte(&mut self, pos: usize, byte: u8) {
let word_idx = pos / 8;
let byte_idx = pos % 8;
self.state[word_idx] ^= (byte as u64) << (byte_idx * 8);
}
pub fn absorb(&mut self, data: &[u8]) {
let mut offset = 0;
while offset < data.len() {
if !self.buf_pos.is_multiple_of(8) || data.len() - offset < 8 {
self.xor_rate_byte(self.buf_pos, data[offset]);
offset += 1;
self.buf_pos += 1;
if self.buf_pos == RATE_BYTES {
self.permute();
self.buf_pos = 0;
}
continue;
}
let word_idx = self.buf_pos / 8;
let words_in_rate = (RATE_BYTES - self.buf_pos) / 8;
let words_in_data = (data.len() - offset) / 8;
let words = words_in_rate.min(words_in_data);
for i in 0..words {
let start = offset + i * 8;
let w = u64::from_le_bytes(data[start..start + 8].try_into().unwrap());
self.state[word_idx + i] ^= w;
}
offset += words * 8;
self.buf_pos += words * 8;
if self.buf_pos == RATE_BYTES {
self.permute();
self.buf_pos = 0;
}
}
}
fn finalize_absorb(&mut self, domain: u8) {
self.xor_rate_byte(self.buf_pos, domain);
self.xor_rate_byte(RATE_BYTES - 1, 0x80);
self.permute();
self.buf_pos = 0;
}
pub fn squeeze(&mut self, len: usize) -> Vec<u8> {
let mut output = Vec::with_capacity(len);
while output.len() < len {
let rate = self.rate_bytes();
let take = (len - output.len()).min(RATE_BYTES);
output.extend_from_slice(&rate[..take]);
if output.len() < len {
self.permute();
}
}
output
}
fn permute_n(&mut self, rounds: usize) {
kk_permute_n(&mut self.state, &self.rotations, rounds);
}
fn squeeze_kdf(&mut self, len: usize) -> Vec<u8> {
let mut output = Vec::with_capacity(len);
while output.len() < len {
let rate = self.rate_bytes();
let take = (len - output.len()).min(RATE_BYTES);
output.extend_from_slice(&rate[..take]);
if output.len() < len {
self.permute_n(KDF_SQUEEZE_ROUNDS);
}
}
output
}
}
#[must_use = "hash digest computed but not used, did you mean kk_mac() for authentication?"]
pub fn kk_hash(data: &[u8]) -> [u8; 32] {
let mut sponge = KkSponge::new();
sponge.absorb(data);
sponge.finalize_absorb(DOMAIN_HASH);
let mut out = sponge.squeeze(32);
let mut digest = [0u8; 32];
digest.copy_from_slice(&out);
out.zeroize();
digest
}
#[must_use = "derived key material computed but not used, zeroize it when done"]
pub fn kk_kdf(key: &[u8], salt: &[u8], info: &[u8], output_len: usize) -> Vec<u8> {
let mut sponge = KkSponge::with_entropy_rotations(salt);
sponge.absorb(key);
sponge.absorb(&(salt.len() as u64).to_le_bytes());
sponge.absorb(salt);
sponge.absorb(&(info.len() as u64).to_le_bytes());
sponge.absorb(info);
sponge.finalize_absorb(DOMAIN_KDF);
sponge.squeeze_kdf(output_len)
}
#[cfg(all(target_arch = "x86_64", feature = "std"))]
fn rate_bytes_from_state(state: &KkState) -> [u8; RATE_BYTES] {
let mut out = [0u8; RATE_BYTES];
for i in 0..RATE_WORDS {
out[i * 8..(i + 1) * 8].copy_from_slice(&state[i].to_le_bytes());
}
out
}
pub fn kk_kdf_batch_8(
key: &[u8],
salt: &[u8],
infos: [&[u8]; 8],
output_len: usize,
) -> [Vec<u8>; 8] {
let mut shared = KkSponge::with_entropy_rotations(salt);
shared.absorb(key);
shared.absorb(&(salt.len() as u64).to_le_bytes());
shared.absorb(salt);
let mut sponges: [KkSponge; 8] = core::array::from_fn(|_| shared.clone());
drop(shared);
for i in 0..8 {
sponges[i].absorb(&(infos[i].len() as u64).to_le_bytes());
sponges[i].absorb(infos[i]);
}
#[cfg(all(target_arch = "x86_64", feature = "std"))]
{
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512dq") {
for sponge in sponges.iter_mut() {
sponge.xor_rate_byte(sponge.buf_pos, DOMAIN_KDF);
sponge.xor_rate_byte(RATE_BYTES - 1, 0x80);
sponge.buf_pos = 0;
}
let mut raw_states: [KkState; 8] = core::array::from_fn(|i| sponges[i].state);
let rotations = sponges[0].rotations;
drop(sponges);
let result = unsafe {
let mut packed = crate::kk_mix_avx512::load_8_states(&raw_states);
crate::kk_mix_avx512::kk_permute_n_x8(&mut packed, &rotations, ROUNDS);
raw_states.zeroize();
vectorized_squeeze_8_packed(packed, &rotations, output_len)
};
return result;
}
}
for sponge in sponges.iter_mut() {
sponge.finalize_absorb(DOMAIN_KDF);
}
let mut results: [Vec<u8>; 8] = core::array::from_fn(|_| Vec::new());
for i in 0..8 {
results[i] = sponges[i].squeeze_kdf(output_len);
}
results
}
#[cfg(all(target_arch = "x86_64", feature = "std"))]
#[target_feature(enable = "avx512f,avx512dq")]
#[allow(dead_code)]
unsafe fn vectorized_squeeze_8(
states: &mut [KkState; 8],
rotations: &[[u32; 2]; 15],
output_len: usize,
) -> [Vec<u8>; 8] {
let packed = crate::kk_mix_avx512::load_8_states(states);
vectorized_squeeze_8_packed(packed, rotations, output_len)
}
#[cfg(all(target_arch = "x86_64", feature = "std"))]
#[target_feature(enable = "avx512f,avx512dq")]
unsafe fn vectorized_squeeze_8_packed(
mut packed: crate::kk_mix_avx512::KkState8,
rotations: &[[u32; 2]; 15],
output_len: usize,
) -> [Vec<u8>; 8] {
use crate::kk_mix_avx512::{kk_permute_n_x8, store_8_states};
let mut outputs: [Vec<u8>; 8] = core::array::from_fn(|_| Vec::with_capacity(output_len));
loop {
let unpacked = store_8_states(&packed);
let remaining = output_len - outputs[0].len();
let take = remaining.min(RATE_BYTES);
for lane in 0..8 {
let rate = rate_bytes_from_state(&unpacked[lane]);
outputs[lane].extend_from_slice(&rate[..take]);
}
if outputs[0].len() >= output_len {
break;
}
kk_permute_n_x8(&mut packed, rotations, KDF_SQUEEZE_ROUNDS);
}
outputs
}
#[must_use = "MAC tag computed but not used, verify it with kk_mac_verify()"]
pub fn kk_mac(key: &[u8], message: &[u8]) -> [u8; 32] {
let mut sponge = KkSponge::new();
sponge.absorb(&(key.len() as u64).to_le_bytes());
sponge.absorb(key);
sponge.absorb(message);
sponge.finalize_absorb(DOMAIN_MAC);
let mut out = sponge.squeeze(32);
let mut tag = [0u8; 32];
tag.copy_from_slice(&out);
out.zeroize();
tag
}
pub fn kk_mac_verify(key: &[u8], message: &[u8], expected_tag: &[u8; 32]) -> bool {
let computed = kk_mac(key, message);
constant_time_eq(&computed, expected_tag)
}
#[must_use = "MAC tag computed but not used, verify it with kk_mac_verify_with_entropy()"]
pub fn kk_mac_with_entropy(key: &[u8], message: &[u8], entropy: &[u8]) -> [u8; 32] {
let mut sponge = KkSponge::with_entropy_rotations(entropy);
sponge.absorb(&(key.len() as u64).to_le_bytes());
sponge.absorb(key);
sponge.absorb(message);
sponge.finalize_absorb(DOMAIN_MAC);
let mut out = sponge.squeeze(32);
let mut tag = [0u8; 32];
tag.copy_from_slice(&out);
out.zeroize();
tag
}
pub fn kk_mac_verify_with_entropy(
key: &[u8],
message: &[u8],
expected_tag: &[u8; 32],
entropy: &[u8],
) -> bool {
let computed = kk_mac_with_entropy(key, message, entropy);
constant_time_eq(&computed, expected_tag)
}
#[allow(dead_code)]
pub(crate) fn kk_mac_batch_8(keys: [&[u8]; 8], messages: [&[u8]; 8]) -> [[u8; 32]; 8] {
let keys_uniform = keys.windows(2).all(|w| w[0].len() == w[1].len());
let msgs_uniform = messages.windows(2).all(|w| w[0].len() == w[1].len());
#[cfg(all(target_arch = "x86_64", feature = "std"))]
{
if keys_uniform
&& msgs_uniform
&& is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512dq")
&& keys[0].len().is_multiple_of(8)
{
return unsafe { kk_mac_batch_8_avx512(keys, messages) };
}
}
let _ = (keys_uniform, msgs_uniform); core::array::from_fn(|i| kk_mac(keys[i], messages[i]))
}
#[cfg(all(target_arch = "x86_64", feature = "std"))]
#[target_feature(enable = "avx512f,avx512dq")]
#[allow(dead_code)]
unsafe fn kk_mac_batch_8_avx512(keys: [&[u8]; 8], messages: [&[u8]; 8]) -> [[u8; 32]; 8] {
use crate::kk_mix_avx512::{kk_permute_n_x8, load_8_states, store_8_states};
use core::arch::x86_64::*;
let rotations = DEFAULT_ROTATIONS;
let mut sponges: [KkSponge; 8] = core::array::from_fn(|_| KkSponge::new());
for i in 0..8 {
sponges[i].absorb(&(keys[i].len() as u64).to_le_bytes());
sponges[i].absorb(keys[i]);
}
let buf_pos = sponges[0].buf_pos;
let mut raw_states: [KkState; 8] = core::array::from_fn(|i| sponges[i].state);
drop(sponges); let mut packed = load_8_states(&raw_states);
raw_states.zeroize();
let msg_len = messages[0].len();
let mut msg_off: usize = 0;
let mut rate_pos = buf_pos;
while msg_off < msg_len {
let fill = RATE_BYTES - rate_pos;
if msg_len - msg_off < fill {
break;
}
let start_word = rate_pos / 8;
let n_words = fill / 8;
for w in 0..n_words {
let d = msg_off + w * 8;
let v = _mm512_set_epi64(
i64::from_le_bytes(messages[7][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(messages[6][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(messages[5][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(messages[4][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(messages[3][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(messages[2][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(messages[1][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(messages[0][d..d + 8].try_into().unwrap()),
);
packed.0[start_word + w] = _mm512_xor_si512(packed.0[start_word + w], v);
}
msg_off += fill;
rate_pos = 0;
kk_permute_n_x8(&mut packed, &rotations, ROUNDS);
}
let remaining = msg_len - msg_off;
let mut states = store_8_states(&packed);
for i in 0..8 {
for j in 0..remaining {
let pos = rate_pos + j;
let word_idx = pos / 8;
let byte_idx = pos % 8;
states[i][word_idx] ^= (messages[i][msg_off + j] as u64) << (byte_idx * 8);
}
let pad_pos = rate_pos + remaining;
states[i][pad_pos / 8] ^= (DOMAIN_MAC as u64) << ((pad_pos % 8) * 8);
states[i][(RATE_BYTES - 1) / 8] ^= 0x80u64 << (((RATE_BYTES - 1) % 8) * 8);
}
packed = load_8_states(&states);
states.zeroize();
kk_permute_n_x8(&mut packed, &rotations, ROUNDS);
let mut final_states = store_8_states(&packed);
let mut out = [[0u8; 32]; 8];
for i in 0..8 {
for w in 0..4 {
out[i][w * 8..(w + 1) * 8].copy_from_slice(&final_states[i][w].to_le_bytes());
}
}
final_states.zeroize();
out
}
pub(crate) fn kk_mac_batch_8_multipart(
keys: [&[u8]; 8],
prefixes: [&[u8]; 8],
bodies: [&[u8]; 8],
) -> [[u8; 32]; 8] {
let keys_uniform = keys.windows(2).all(|w| w[0].len() == w[1].len());
let prefixes_uniform = prefixes.windows(2).all(|w| w[0].len() == w[1].len());
let bodies_uniform = bodies.windows(2).all(|w| w[0].len() == w[1].len());
#[cfg(all(target_arch = "x86_64", feature = "std"))]
{
if keys_uniform
&& prefixes_uniform
&& bodies_uniform
&& is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512dq")
&& keys[0].len().is_multiple_of(8)
{
return unsafe { kk_mac_batch_8_multipart_avx512(keys, prefixes, bodies) };
}
}
let _ = (keys_uniform, prefixes_uniform, bodies_uniform);
core::array::from_fn(|i| {
let mut msg = Vec::with_capacity(prefixes[i].len() + bodies[i].len());
msg.extend_from_slice(prefixes[i]);
msg.extend_from_slice(bodies[i]);
kk_mac(keys[i], &msg)
})
}
#[cfg(all(target_arch = "x86_64", feature = "std"))]
#[target_feature(enable = "avx512f,avx512dq")]
unsafe fn kk_mac_batch_8_multipart_avx512(
keys: [&[u8]; 8],
prefixes: [&[u8]; 8],
bodies: [&[u8]; 8],
) -> [[u8; 32]; 8] {
use crate::kk_mix_avx512::{kk_permute_n_x8, load_8_states, store_8_states};
use core::arch::x86_64::*;
let rotations = DEFAULT_ROTATIONS;
let mut sponges: [KkSponge; 8] = core::array::from_fn(|_| KkSponge::new());
for i in 0..8 {
sponges[i].absorb(&(keys[i].len() as u64).to_le_bytes());
sponges[i].absorb(keys[i]);
sponges[i].absorb(prefixes[i]);
}
let mut body_off = 0usize;
let unaligned = sponges[0].buf_pos % 8;
if unaligned != 0 {
let align = (8 - unaligned).min(bodies[0].len());
for i in 0..8 {
sponges[i].absorb(&bodies[i][..align]);
}
body_off = align;
}
let buf_pos = sponges[0].buf_pos;
let mut raw_states: [KkState; 8] = core::array::from_fn(|i| sponges[i].state);
drop(sponges);
let mut packed = load_8_states(&raw_states);
raw_states.zeroize();
let body_len = bodies[0].len();
let mut rate_pos = buf_pos;
while body_off < body_len {
let fill = RATE_BYTES - rate_pos;
if body_len - body_off < fill {
break;
}
let start_word = rate_pos / 8;
let n_words = fill / 8;
for w in 0..n_words {
let d = body_off + w * 8;
let v = _mm512_set_epi64(
i64::from_le_bytes(bodies[7][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(bodies[6][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(bodies[5][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(bodies[4][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(bodies[3][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(bodies[2][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(bodies[1][d..d + 8].try_into().unwrap()),
i64::from_le_bytes(bodies[0][d..d + 8].try_into().unwrap()),
);
packed.0[start_word + w] = _mm512_xor_si512(packed.0[start_word + w], v);
}
body_off += fill;
rate_pos = 0;
kk_permute_n_x8(&mut packed, &rotations, ROUNDS);
}
let remaining = body_len - body_off;
let mut states = store_8_states(&packed);
for i in 0..8 {
for j in 0..remaining {
let pos = rate_pos + j;
let word_idx = pos / 8;
let byte_idx = pos % 8;
states[i][word_idx] ^= (bodies[i][body_off + j] as u64) << (byte_idx * 8);
}
let pad_pos = rate_pos + remaining;
states[i][pad_pos / 8] ^= (DOMAIN_MAC as u64) << ((pad_pos % 8) * 8);
states[i][(RATE_BYTES - 1) / 8] ^= 0x80u64 << (((RATE_BYTES - 1) % 8) * 8);
}
packed = load_8_states(&states);
states.zeroize();
kk_permute_n_x8(&mut packed, &rotations, ROUNDS);
let mut final_states = store_8_states(&packed);
let mut out = [[0u8; 32]; 8];
for i in 0..8 {
for w in 0..4 {
out[i][w * 8..(w + 1) * 8].copy_from_slice(&final_states[i][w].to_le_bytes());
}
}
final_states.zeroize();
out
}
fn constant_time_eq(a: &[u8], b: &[u8]) -> bool {
if a.len() != b.len() {
return false;
}
let mut diff: u8 = 0;
for (x, y) in a.iter().zip(b.iter()) {
diff |= x ^ y;
}
black_box(diff) == 0
}
#[must_use = "mixed entropy computed but not used, zeroize it when done"]
pub fn kk_entropy_mix(sources: &[&[u8]], output_len: usize) -> Vec<u8> {
let mut sponge = KkSponge::new();
for (i, source) in sources.iter().enumerate() {
sponge.absorb(&(i as u64).to_le_bytes());
sponge.absorb(&(source.len() as u64).to_le_bytes());
sponge.absorb(source);
}
sponge.finalize_absorb(DOMAIN_HASH);
sponge.squeeze(output_len)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn permutation_changes_state() {
let mut state = KK_IV;
let original = state;
kk_permute(&mut state);
assert_ne!(state, original, "Permutation must change the state");
}
#[test]
fn permutation_is_deterministic() {
let mut s1 = KK_IV;
s1[0] ^= 0xDEAD;
s1[1] ^= 0xBEEF;
let mut s2 = s1;
kk_permute(&mut s1);
kk_permute(&mut s2);
assert_eq!(s1, s2, "Same input must produce same output");
}
#[test]
fn permutation_avalanche() {
let mut s1 = KK_IV;
let mut s2 = KK_IV;
s2[0] ^= 1; kk_permute(&mut s1);
kk_permute(&mut s2);
let mut diff_bits = 0u32;
for (a, b) in s1.iter().zip(s2.iter()) {
diff_bits += (a ^ b).count_ones();
}
assert!(
diff_bits > 300,
"Poor avalanche: only {diff_bits}/1600 bits differ (expected ~800)"
);
}
#[test]
fn entropy_rotations_change_output() {
let mut s1 = KK_IV;
let mut s2 = s1;
kk_permute(&mut s1);
let alt_rots: [[u32; 2]; 15] = [
[5, 50],
[11, 33],
[17, 39],
[21, 47],
[9, 53],
[7, 41],
[13, 29],
[19, 37],
[23, 43],
[3, 55],
[15, 35],
[21, 45],
[27, 33],
[1, 57],
[25, 51],
];
kk_permute_with_schedule(&mut s2, &alt_rots);
assert_ne!(
s1, s2,
"Different rotation schedules must produce different permutations"
);
}
#[test]
fn ddr_sensitivity() {
let a = 0xDEADBEEF_CAFEBABE_u64;
let r1 = ddr(a, 7);
let r2 = ddr(a, 8);
assert_ne!(
r1, r2,
"Different rotation sources must give different results"
);
}
#[test]
fn ddr_full_range() {
let a = 0xFFFF_FFFF_FFFF_FFFE_u64; let mut seen = std::collections::HashSet::new();
for b in 0..64u64 {
seen.insert(ddr(a, b));
}
assert!(
seen.len() > 32,
"DDR should produce diverse outputs: got {} unique values from 64 rotations",
seen.len()
);
}
#[test]
fn quintet_round_diffusion() {
let (mut a, mut b, mut c, mut d, mut e) = (0x1111u64, 0x2222, 0x3333, 0x4444, 0x5555);
let (a0, b0, c0, d0, e0) = (a, b, c, d, e);
quintet_round(&mut a, &mut b, &mut c, &mut d, &mut e, [7, 41]);
assert_ne!(a, a0, "word a unchanged");
assert_ne!(b, b0, "word b unchanged");
assert_ne!(c, c0, "word c unchanged");
assert_ne!(d, d0, "word d unchanged");
assert_ne!(e, e0, "word e unchanged");
}
#[test]
fn wide_state_avalanche() {
let mut s1 = KK_IV;
let mut s2 = KK_IV;
s2[12] ^= 1;
kk_permute(&mut s1);
kk_permute(&mut s2);
let mut diff_bits = 0u32;
for (a, b) in s1.iter().zip(s2.iter()) {
diff_bits += (a ^ b).count_ones();
}
assert!(
diff_bits > 300,
"Poor wide avalanche: only {diff_bits}/1600 bits differ"
);
}
#[test]
fn hash_deterministic() {
let h1 = kk_hash(b"hello KK");
let h2 = kk_hash(b"hello KK");
assert_eq!(h1, h2);
}
#[test]
fn hash_different_input_different_output() {
let h1 = kk_hash(b"hello");
let h2 = kk_hash(b"hellp"); assert_ne!(h1, h2);
}
#[test]
fn hash_empty_vs_nonempty() {
let h1 = kk_hash(b"");
let h2 = kk_hash(b"x");
assert_ne!(h1, h2);
}
#[test]
fn kdf_deterministic_same_inputs() {
let k1 = kk_kdf(b"secret", b"salt", b"info", 32);
let k2 = kk_kdf(b"secret", b"salt", b"info", 32);
assert_eq!(k1, k2);
}
#[test]
fn kdf_different_salt_different_output() {
let k1 = kk_kdf(b"secret", b"salt-a", b"info", 32);
let k2 = kk_kdf(b"secret", b"salt-b", b"info", 32);
assert_ne!(k1, k2);
}
#[test]
fn kdf_different_info_different_output() {
let k1 = kk_kdf(b"secret", b"salt", b"pos-0", 32);
let k2 = kk_kdf(b"secret", b"salt", b"pos-1", 32);
assert_ne!(k1, k2);
}
#[test]
fn kdf_variable_length() {
let k16 = kk_kdf(b"key", b"salt", b"info", 16);
let k64 = kk_kdf(b"key", b"salt", b"info", 64);
assert_eq!(k16.len(), 16);
assert_eq!(k64.len(), 64);
}
#[test]
fn mac_deterministic() {
let t1 = kk_mac(b"key", b"message");
let t2 = kk_mac(b"key", b"message");
assert_eq!(t1, t2);
}
#[test]
fn mac_different_key_different_tag() {
let t1 = kk_mac(b"key-a", b"message");
let t2 = kk_mac(b"key-b", b"message");
assert_ne!(t1, t2);
}
#[test]
fn mac_different_message_different_tag() {
let t1 = kk_mac(b"key", b"msg-a");
let t2 = kk_mac(b"key", b"msg-b");
assert_ne!(t1, t2);
}
#[test]
fn mac_verify_valid() {
let tag = kk_mac(b"key", b"important data");
assert!(kk_mac_verify(b"key", b"important data", &tag));
}
#[test]
fn mac_verify_tampered() {
let tag = kk_mac(b"key", b"important data");
assert!(!kk_mac_verify(b"key", b"TAMPERED data", &tag));
}
#[test]
fn mac_verify_wrong_key() {
let tag = kk_mac(b"correct-key", b"data");
assert!(!kk_mac_verify(b"wrong-key", b"data", &tag));
}
#[test]
fn mac_batch_8_matches_scalar() {
let keys: [[u8; 32]; 8] = core::array::from_fn(|i| {
let mut k = [0u8; 32];
k[0] = i as u8;
k[31] = (i as u8).wrapping_mul(37);
k
});
let msgs: [Vec<u8>; 8] = core::array::from_fn(|i| {
(0..4096u16)
.map(|j| (j as u8).wrapping_add(i as u8))
.collect()
});
let key_refs: [&[u8]; 8] = core::array::from_fn(|i| keys[i].as_slice());
let msg_refs: [&[u8]; 8] = core::array::from_fn(|i| msgs[i].as_slice());
let batch_tags = kk_mac_batch_8(key_refs, msg_refs);
for i in 0..8 {
let scalar_tag = kk_mac(&keys[i], &msgs[i]);
assert_eq!(
batch_tags[i], scalar_tag,
"batch lane {i} must match scalar kk_mac"
);
}
}
#[test]
fn mac_batch_8_short_messages() {
let keys: [[u8; 32]; 8] = core::array::from_fn(|i| {
let mut k = [0u8; 32];
k[0] = (i as u8) + 100;
k
});
let msgs: [Vec<u8>; 8] = core::array::from_fn(|i| {
vec![(i as u8).wrapping_mul(7); 50] });
let key_refs: [&[u8]; 8] = core::array::from_fn(|i| keys[i].as_slice());
let msg_refs: [&[u8]; 8] = core::array::from_fn(|i| msgs[i].as_slice());
let batch_tags = kk_mac_batch_8(key_refs, msg_refs);
for i in 0..8 {
let scalar_tag = kk_mac(&keys[i], &msgs[i]);
assert_eq!(
batch_tags[i], scalar_tag,
"batch lane {i} (short msg) must match scalar kk_mac"
);
}
}
#[test]
fn entropy_mix_deterministic() {
let sources: Vec<&[u8]> = vec![b"source1", b"source2", b"source3"];
let m1 = kk_entropy_mix(&sources, 32);
let m2 = kk_entropy_mix(&sources, 32);
assert_eq!(m1, m2);
}
#[test]
fn entropy_mix_different_sources_different_output() {
let m1 = kk_entropy_mix(&[b"aaa", b"bbb"], 32);
let m2 = kk_entropy_mix(&[b"aaa", b"ccc"], 32);
assert_ne!(m1, m2);
}
#[test]
fn constant_time_eq_works() {
assert!(constant_time_eq(b"hello", b"hello"));
assert!(!constant_time_eq(b"hello", b"hellp"));
assert!(!constant_time_eq(b"short", b"longer"));
}
#[test]
fn vector_hash_empty() {
let h = kk_hash(b"");
assert_eq!(
hex::encode(h),
"2081a4b4103da0f32a5bbcb8228bc36a19c631800f932f00f94d85c695a545f6",
"REGRESSION: kk_hash(\"\") output changed"
);
}
#[test]
fn vector_hash_kk() {
let h = kk_hash(b"KK-Keeney-Kode");
assert_eq!(
hex::encode(h),
"a2c79f9fb85d9a500c3754f69845e626f235f33fb3185f414cde68a28744a191",
"REGRESSION: kk_hash(\"KK-Keeney-Kode\") output changed"
);
}
#[test]
fn vector_hash_1024_ab() {
let h = kk_hash(&[0xABu8; 1024]);
assert_eq!(
hex::encode(h),
"f12befd96fc0610f7bda952265a85b080a5b1bc89867b2967e678ff82ae80c14",
"REGRESSION: kk_hash([0xAB; 1024]) output changed"
);
}
#[test]
fn vector_mac() {
let tag = kk_mac(b"secret-key-2026", b"authenticate this");
assert_eq!(
hex::encode(tag),
"f193a05de470757c5bd755249df610219f1ac4eba5cb7144d1f26671b0d5acfe",
"REGRESSION: kk_mac output changed"
);
}
#[test]
fn vector_kdf() {
let k = kk_kdf(b"master-key", b"salt-value", b"kdf-context", 32);
assert_eq!(
hex::encode(k),
"9d94dc8417cafb791fdc403e6968b7a83ca2426ca591b1a598eb4b6b2d68ee46",
"REGRESSION: kk_kdf output changed"
);
}
#[test]
fn batch_kdf_matches_scalar() {
let key = b"batch-test-master-key";
let salt = b"batch-test-salt-entropy-bytes";
let infos_raw: [Vec<u8>; 8] = core::array::from_fn(|i| {
let mut info = Vec::with_capacity(18 + 8 + 8);
info.extend_from_slice(b"KK-sym-v1\0");
info.extend_from_slice(&(i as u64).to_le_bytes());
info.extend_from_slice(&0x1234_5678_ABCD_EF00u64.to_le_bytes());
info
});
let infos: [&[u8]; 8] = core::array::from_fn(|i| infos_raw[i].as_slice());
let output_len = 4096;
let scalar: [Vec<u8>; 8] =
core::array::from_fn(|i| kk_kdf(key, salt, infos[i], output_len));
let batch = kk_kdf_batch_8(key, salt, infos, output_len);
for i in 0..8 {
assert_eq!(
batch[i], scalar[i],
"Batch KDF lane {i} diverged from scalar kk_kdf"
);
}
}
#[test]
fn batch_kdf_multi_block_squeeze() {
let key = b"multi-block-key";
let salt = b"multi-block-salt";
let infos: [&[u8]; 8] = [
b"info-0", b"info-1", b"info-2", b"info-3", b"info-4", b"info-5", b"info-6", b"info-7",
];
let output_len = 1024;
let scalar: [Vec<u8>; 8] =
core::array::from_fn(|i| kk_kdf(key, salt, infos[i], output_len));
let batch = kk_kdf_batch_8(key, salt, infos, output_len);
for i in 0..8 {
assert_eq!(
batch[i], scalar[i],
"Multi-block batch KDF lane {i} diverged from scalar"
);
}
}
#[test]
fn absorb_state_differs_for_different_messages() {
let key = vec![0x78u8; 32];
let key_len_bytes = (key.len() as u64).to_le_bytes();
let msg1 = vec![0xAAu8; 76];
let mut msg2 = msg1.clone();
msg2[62] = 0x55;
let mut s1 = KkSponge::new();
s1.absorb(&key_len_bytes);
s1.absorb(&key);
s1.absorb(&msg1);
let mut s2 = KkSponge::new();
s2.absorb(&key_len_bytes);
s2.absorb(&key);
s2.absorb(&msg2);
for i in 0..STATE_WORDS {
if s1.state[i] != s2.state[i] {
break;
} }
assert_ne!(
s1.state, s2.state,
"Sponge states MUST differ after absorbing different messages"
);
let domain = DOMAIN_MAC;
s1.xor_rate_byte(s1.buf_pos, domain);
s1.xor_rate_byte(RATE_BYTES - 1, 0x80);
s2.xor_rate_byte(s2.buf_pos, domain);
s2.xor_rate_byte(RATE_BYTES - 1, 0x80);
assert_ne!(
s1.state, s2.state,
"States must differ after padding, before permute"
);
let mut state1 = s1.state;
let mut state2 = s2.state;
kk_permute(&mut state1);
kk_permute(&mut state2);
assert_ne!(
state1, state2,
"Permutation MUST produce different outputs for different inputs"
);
}
}