use crate::cryptoutil::xor_array64_mut;
use crate::hashing::blake2b;
use alloc::borrow::ToOwned;
use alloc::boxed::Box;
use alloc::vec;
use core::num::NonZeroU32;
use core::ops::{BitXorAssign, Index, IndexMut};
pub struct Params {
parallelism: NonZeroU32,
iterations: NonZeroU32,
memory_kb: u32,
version: u32,
hash_type: Type,
memory_blocks: u32,
segment_length: u32,
lane_length: u32,
}
#[derive(Clone, Copy, Debug)]
pub enum InvalidParam {
ParallelismZero,
ParallelismTooHigh,
IterationsZero,
UnknownVersion,
MemoryTooHigh,
}
impl Params {
fn def(hash_type: Type) -> Self {
Self {
parallelism: NonZeroU32::new(1).unwrap(),
iterations: NonZeroU32::new(1).unwrap(),
memory_kb: 32,
version: 0x13, hash_type,
memory_blocks: 32,
segment_length: 8,
lane_length: 32,
}
}
pub fn argon2d() -> Self {
Params::def(Type::Argon2d)
}
pub fn argon2id() -> Self {
Params::def(Type::Argon2id)
}
pub fn argon2i() -> Self {
Params::def(Type::Argon2i)
}
pub fn memory_kb(mut self, memory_kb: u32) -> Result<Self, InvalidParam> {
self.memory_kb = memory_kb;
self.parallelism_override_memory();
Ok(self)
}
pub fn parallelism(mut self, parallelism: u32) -> Result<Self, InvalidParam> {
if parallelism >= 0x1000000 {
return Err(InvalidParam::ParallelismTooHigh);
}
self.parallelism = NonZeroU32::new(parallelism).ok_or(InvalidParam::ParallelismZero)?;
self.parallelism_override_memory();
Ok(self)
}
pub fn iterations(mut self, iterations: u32) -> Result<Self, InvalidParam> {
self.iterations = NonZeroU32::new(iterations).ok_or(InvalidParam::IterationsZero)?;
Ok(self)
}
pub fn version(mut self, version: u32) -> Result<Self, InvalidParam> {
if !(version == 0x13 || version == 0x10) {
return Err(InvalidParam::UnknownVersion);
}
self.version = version;
Ok(self)
}
fn parallelism_override_memory(&mut self) {
let mut memory_blocks = self.memory_kb;
if memory_blocks < 8 * self.parallelism.get() {
memory_blocks = 8 * self.parallelism.get();
self.memory_kb = memory_blocks;
}
self.segment_length = memory_blocks / (self.parallelism.get() * SYNC_POINTS);
self.memory_blocks = self.segment_length * (self.parallelism.get() * SYNC_POINTS);
self.lane_length = self.segment_length * SYNC_POINTS;
}
}
const SYNC_POINTS: u32 = 4;
const BLOCK_SIZE_U64: usize = 128; const BLOCK_SIZE: usize = BLOCK_SIZE_U64 * 8;
#[derive(Clone)]
struct Block([u64; BLOCK_SIZE_U64]);
impl Block {
pub fn new() -> Block {
Block([0u64; BLOCK_SIZE_U64])
}
pub fn as_u8(&self) -> &[u8] {
let bytes: &[u8; BLOCK_SIZE] =
unsafe { &*(&self.0 as *const [u64; BLOCK_SIZE_U64] as *const [u8; BLOCK_SIZE]) };
bytes
}
pub fn as_u8_mut(&mut self) -> &mut [u8; BLOCK_SIZE] {
let bytes: &mut [u8; BLOCK_SIZE] =
unsafe { &mut *(&mut self.0 as *mut [u64; BLOCK_SIZE_U64] as *mut [u8; BLOCK_SIZE]) };
bytes
}
}
impl<'a> BitXorAssign<&'a Block> for Block {
fn bitxor_assign(&mut self, rhs: &Block) {
xor_array64_mut(&mut self.0, &rhs.0)
}
}
impl Index<usize> for Block {
type Output = u64;
fn index(&self, index: usize) -> &u64 {
&self.0[index]
}
}
impl IndexMut<usize> for Block {
fn index_mut(&mut self, index: usize) -> &mut u64 {
&mut self.0[index]
}
}
struct Memory {
lane_length: u32,
blocks: Box<[Block]>,
}
impl Memory {
fn stride(&self) -> u32 {
self.lane_length
}
fn new(params: &Params) -> Memory {
let nb_blocks = params.parallelism.get() as usize * params.lane_length as usize;
let blocks = vec![Block::new(); nb_blocks].into_boxed_slice();
Memory {
lane_length: params.lane_length,
blocks,
}
}
fn block_index(&self, index: u32) -> &Block {
&self.blocks[index as usize]
}
fn block_index64(&self, index64: u64) -> &Block {
&self.blocks[index64 as usize]
}
fn mut_block_index(&mut self, index: u32) -> &mut Block {
&mut self.blocks[index as usize]
}
fn mut_block_at(&mut self, row: u32, col: u32) -> &mut Block {
let pos = ((row as usize) * (self.lane_length as usize)) + (col as usize);
&mut self.blocks[pos]
}
}
#[derive(Clone, Debug)]
struct BlockPos {
pass: u32,
lane: u32,
slice: u32,
index: u32,
}
fn process(params: &Params, h0: &H0, memory: &mut Memory, out: &mut [u8]) {
for lane in 0..params.parallelism.get() {
hprime_block_init(memory.mut_block_at(lane, 0).as_u8_mut(), &h0.0, 0, lane);
hprime_block_init(memory.mut_block_at(lane, 1).as_u8_mut(), &h0.0, 1, lane);
}
for pass in 0..params.iterations.get() {
for slice in 0..SYNC_POINTS {
for lane in 0..params.parallelism.get() {
let position = BlockPos {
pass,
lane,
slice,
index: 0,
};
fill_segment(params, &position, memory);
}
}
}
let mut blockhash = memory.block_index(memory.stride() - 1).clone();
for l in 1..params.parallelism.get() {
let last_block_in_lane = l * memory.stride() + (memory.stride() - 1);
blockhash ^= memory.block_index(last_block_in_lane);
}
hprime(out, blockhash.as_u8());
}
fn fill_block(prev_block: &Block, ref_block: &Block, next_block: &mut Block, with_xor: bool) {
let mut block_r = ref_block.clone();
block_r ^= prev_block;
let mut block_tmp = block_r.clone();
if with_xor {
block_tmp ^= next_block;
}
for i in 0..8 {
let mut v0 = block_r[16 * i];
let mut v1 = block_r[16 * i + 1];
let mut v2 = block_r[16 * i + 2];
let mut v3 = block_r[16 * i + 3];
let mut v4 = block_r[16 * i + 4];
let mut v5 = block_r[16 * i + 5];
let mut v6 = block_r[16 * i + 6];
let mut v7 = block_r[16 * i + 7];
let mut v8 = block_r[16 * i + 8];
let mut v9 = block_r[16 * i + 9];
let mut v10 = block_r[16 * i + 10];
let mut v11 = block_r[16 * i + 11];
let mut v12 = block_r[16 * i + 12];
let mut v13 = block_r[16 * i + 13];
let mut v14 = block_r[16 * i + 14];
let mut v15 = block_r[16 * i + 15];
p(
&mut v0, &mut v1, &mut v2, &mut v3, &mut v4, &mut v5, &mut v6, &mut v7, &mut v8,
&mut v9, &mut v10, &mut v11, &mut v12, &mut v13, &mut v14, &mut v15,
);
block_r[16 * i] = v0;
block_r[16 * i + 1] = v1;
block_r[16 * i + 2] = v2;
block_r[16 * i + 3] = v3;
block_r[16 * i + 4] = v4;
block_r[16 * i + 5] = v5;
block_r[16 * i + 6] = v6;
block_r[16 * i + 7] = v7;
block_r[16 * i + 8] = v8;
block_r[16 * i + 9] = v9;
block_r[16 * i + 10] = v10;
block_r[16 * i + 11] = v11;
block_r[16 * i + 12] = v12;
block_r[16 * i + 13] = v13;
block_r[16 * i + 14] = v14;
block_r[16 * i + 15] = v15;
}
for i in 0..8 {
let mut v0 = block_r[2 * i];
let mut v1 = block_r[2 * i + 1];
let mut v2 = block_r[2 * i + 16];
let mut v3 = block_r[2 * i + 17];
let mut v4 = block_r[2 * i + 32];
let mut v5 = block_r[2 * i + 33];
let mut v6 = block_r[2 * i + 48];
let mut v7 = block_r[2 * i + 49];
let mut v8 = block_r[2 * i + 64];
let mut v9 = block_r[2 * i + 65];
let mut v10 = block_r[2 * i + 80];
let mut v11 = block_r[2 * i + 81];
let mut v12 = block_r[2 * i + 96];
let mut v13 = block_r[2 * i + 97];
let mut v14 = block_r[2 * i + 112];
let mut v15 = block_r[2 * i + 113];
p(
&mut v0, &mut v1, &mut v2, &mut v3, &mut v4, &mut v5, &mut v6, &mut v7, &mut v8,
&mut v9, &mut v10, &mut v11, &mut v12, &mut v13, &mut v14, &mut v15,
);
block_r[2 * i] = v0;
block_r[2 * i + 1] = v1;
block_r[2 * i + 16] = v2;
block_r[2 * i + 17] = v3;
block_r[2 * i + 32] = v4;
block_r[2 * i + 33] = v5;
block_r[2 * i + 48] = v6;
block_r[2 * i + 49] = v7;
block_r[2 * i + 64] = v8;
block_r[2 * i + 65] = v9;
block_r[2 * i + 80] = v10;
block_r[2 * i + 81] = v11;
block_r[2 * i + 96] = v12;
block_r[2 * i + 97] = v13;
block_r[2 * i + 112] = v14;
block_r[2 * i + 113] = v15;
}
block_tmp.clone_into(next_block);
*next_block ^= &block_r;
}
fn fill_segment(params: &Params, position: &BlockPos, memory: &mut Memory) {
let mut position = position.clone();
let data_independent_addressing = (params.hash_type == Type::Argon2i)
|| (params.hash_type == Type::Argon2id && position.pass == 0)
&& (position.slice < (SYNC_POINTS / 2));
let zero_block = Block::new();
let mut input_block = Block::new();
let mut address_block = Block::new();
if data_independent_addressing {
input_block[0] = position.pass as u64;
input_block[1] = position.lane as u64;
input_block[2] = position.slice as u64;
input_block[3] = params.memory_blocks as u64;
input_block[4] = params.iterations.get() as u64;
input_block[5] = params.hash_type as u64;
}
let mut starting_index = 0u32;
if position.pass == 0 && position.slice == 0 {
starting_index = 2;
if data_independent_addressing {
next_addresses(&mut address_block, &mut input_block, &zero_block);
}
}
let mut curr_offset = (position.lane * memory.stride())
+ (position.slice * params.segment_length)
+ starting_index;
let mut prev_offset = if curr_offset % memory.stride() == 0 {
curr_offset + memory.stride() - 1
} else {
curr_offset - 1
};
let mut pseudo_rand;
for i in starting_index..params.segment_length {
if curr_offset % memory.stride() == 1 {
prev_offset = curr_offset - 1;
}
if data_independent_addressing {
if i % 128 == 0 {
next_addresses(&mut address_block, &mut input_block, &zero_block);
}
pseudo_rand = address_block[(i % 128) as usize];
} else {
pseudo_rand = memory.block_index(prev_offset)[0];
}
let ref_lane = if (position.pass == 0) && (position.slice == 0) {
position.lane as u64
} else {
(pseudo_rand >> 32) % params.parallelism.get() as u64
};
position.index = i;
let pseudo_rand_u32 = (pseudo_rand & 0xffff_ffff) as u32;
let same_lane = ref_lane == (position.lane as u64);
let ref_index = index_alpha(params, &position, pseudo_rand_u32, same_lane);
let index = params.lane_length as u64 * ref_lane + ref_index as u64;
let mut curr_block = memory.block_index(curr_offset).clone();
let prev_block = memory.block_index(prev_offset);
let ref_block = memory.block_index64(index);
let with_xor = !(params.version == 0x10 || position.pass == 0);
fill_block(prev_block, ref_block, &mut curr_block, with_xor);
*memory.mut_block_index(curr_offset) = curr_block;
curr_offset += 1;
prev_offset += 1;
}
}
#[doc(hidden)]
pub fn hprime(output: &mut [u8], input: &[u8]) {
if output.len() <= 64 {
blake2b::ContextDyn::new(output.len())
.update(&(output.len() as u32).to_le_bytes())
.update(&input)
.finalize_at(output);
return;
}
let output_len = output.len();
let v0 = blake2b::Context::<512>::new()
.update(&(output_len as u32).to_le_bytes())
.update(input)
.finalize();
output[0..32].copy_from_slice(&v0[0..32]);
let mut bytes = output_len - 32;
let mut pos = 32;
let mut vi_prev = v0;
while bytes > 64 {
blake2b::Context::<512>::new()
.update(&vi_prev)
.finalize_at(&mut vi_prev);
output[pos..pos + 32].copy_from_slice(&vi_prev[0..32]);
bytes -= 32;
pos += 32;
}
blake2b::ContextDyn::new(bytes)
.update(&vi_prev)
.finalize_at(&mut output[pos..pos + bytes]);
}
fn hprime_block_init(output: &mut [u8; 1024], h0: &[u8; 64], col: u32, lane: u32) {
let v0 = blake2b::Context::<512>::new()
.update(&1024u32.to_le_bytes())
.update(h0)
.update(&u32::to_le_bytes(col))
.update(&u32::to_le_bytes(lane))
.finalize();
output[0..32].copy_from_slice(&v0[0..32]);
let mut pos = 32;
let mut vi_prev = v0;
for _ in 0..29 {
blake2b::Context::<512>::new()
.update(&vi_prev)
.finalize_at(&mut vi_prev);
output[pos..pos + 32].copy_from_slice(&vi_prev[0..32]);
pos += 32;
}
blake2b::Context::<512>::new()
.update(&vi_prev)
.finalize_at(&mut output[pos..pos + 64]);
}
fn index_alpha(params: &Params, position: &BlockPos, pseudo_rand: u32, same_lane: bool) -> u32 {
let reference_area_size = if position.pass == 0 {
if position.slice == 0 {
position.index - 1
} else if same_lane {
position.slice * params.segment_length + position.index - 1
} else if position.index == 0 {
position.slice * params.segment_length - 1
} else {
position.slice * params.segment_length
}
} else {
if same_lane {
params.lane_length - params.segment_length + position.index - 1
} else if position.index == 0 {
params.lane_length - params.segment_length - 1
} else {
params.lane_length - params.segment_length
}
};
let reference_area_size = reference_area_size as u64;
let mut relative_position = pseudo_rand as u64;
relative_position = (relative_position * relative_position) >> 32;
relative_position = reference_area_size - 1 - ((reference_area_size * relative_position) >> 32);
let start_position = if position.pass != 0 {
if position.slice == SYNC_POINTS - 1 {
0u32
} else {
(position.slice + 1) * params.segment_length
}
} else {
0u32
};
((start_position as u64 + relative_position) % params.lane_length as u64) as u32
}
fn next_addresses(address_block: &mut Block, input_block: &mut Block, zero_block: &Block) {
input_block[6] += 1;
fill_block(zero_block, input_block, address_block, false);
fill_block(zero_block, &address_block.clone(), address_block, false);
}
fn p(
v0: &mut u64,
v1: &mut u64,
v2: &mut u64,
v3: &mut u64,
v4: &mut u64,
v5: &mut u64,
v6: &mut u64,
v7: &mut u64,
v8: &mut u64,
v9: &mut u64,
v10: &mut u64,
v11: &mut u64,
v12: &mut u64,
v13: &mut u64,
v14: &mut u64,
v15: &mut u64,
) {
#[inline]
fn add_and_mul(x: u64, y: u64) -> u64 {
let xy = (x & 0xffff_ffff) * (y & 0xffff_ffff);
x.wrapping_add(y.wrapping_add(xy << 1))
}
fn gb(a: &mut u64, b: &mut u64, c: &mut u64, d: &mut u64) {
*a = add_and_mul(*a, *b);
*d = (*d ^ *a).rotate_right(32);
*c = add_and_mul(*c, *d);
*b = (*b ^ *c).rotate_right(24);
*a = add_and_mul(*a, *b);
*d = (*d ^ *a).rotate_right(16);
*c = add_and_mul(*c, *d);
*b = (*b ^ *c).rotate_right(63);
}
gb(v0, v4, v8, v12);
gb(v1, v5, v9, v13);
gb(v2, v6, v10, v14);
gb(v3, v7, v11, v15);
gb(v0, v5, v10, v15);
gb(v1, v6, v11, v12);
gb(v2, v7, v8, v13);
gb(v3, v4, v9, v14);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
enum Type {
Argon2d = 0,
Argon2i = 1,
Argon2id = 2,
}
#[derive(Clone)]
pub struct H0([u8; 64]);
impl H0 {
pub fn new(
params: &Params,
password: &[u8],
salt: &[u8],
key: &[u8],
aad: &[u8],
tag_length: u32,
) -> Self {
let h0 = blake2b::Context::<512>::new()
.update(¶ms.parallelism.get().to_le_bytes())
.update(&tag_length.to_le_bytes())
.update(¶ms.memory_kb.to_le_bytes())
.update(¶ms.iterations.get().to_le_bytes())
.update(¶ms.version.to_le_bytes())
.update(&(params.hash_type as u32).to_le_bytes())
.update(&u32::to_le_bytes(password.len() as u32))
.update(password)
.update(&u32::to_le_bytes(salt.len() as u32))
.update(salt)
.update(&u32::to_le_bytes(key.len() as u32))
.update(key)
.update(&u32::to_le_bytes(aad.len() as u32))
.update(aad)
.finalize();
Self(h0)
}
}
pub fn argon2_at(
params: &Params,
password: &[u8],
salt: &[u8],
key: &[u8],
aad: &[u8],
tag: &mut [u8],
) {
let h0 = H0::new(¶ms, password, salt, key, aad, tag.len() as u32);
let mut memory = Memory::new(params);
process(¶ms, &h0, &mut memory, tag);
}
pub fn argon2<const T: usize>(
params: &Params,
password: &[u8],
salt: &[u8],
key: &[u8],
aad: &[u8],
) -> [u8; T] {
let mut tag = [0u8; T];
let h0 = H0::new(¶ms, password, salt, key, aad, T as u32);
let mut memory = Memory::new(params);
process(¶ms, &h0, &mut memory, &mut tag);
tag
}
#[cfg(test)]
mod tests {
use super::*;
fn rfc9106_params(params: Params) -> Params {
params
.memory_kb(32)
.unwrap()
.iterations(3)
.unwrap()
.parallelism(4)
.unwrap()
}
fn run_std(params: Params, expected: &[u8; 32]) {
let params = rfc9106_params(params);
let tag = argon2(¶ms, &[0x01; 32], &[0x02; 16], &[0x03; 8], &[0x04; 12]);
assert_eq!(*expected, tag, "expected tag failed")
}
#[test]
fn argon2d_rfc9106() {
const EXPECTED: [u8; 32] = [
0x51, 0x2b, 0x39, 0x1b, 0x6f, 0x11, 0x62, 0x97, 0x53, 0x71, 0xd3, 0x09, 0x19, 0x73,
0x42, 0x94, 0xf8, 0x68, 0xe3, 0xbe, 0x39, 0x84, 0xf3, 0xc1, 0xa1, 0x3a, 0x4d, 0xb9,
0xfa, 0xbe, 0x4a, 0xcb,
];
run_std(Params::argon2d(), &EXPECTED)
}
#[test]
fn argon2i_rfc9106() {
const EXPECTED: [u8; 32] = [
0xc8, 0x14, 0xd9, 0xd1, 0xdc, 0x7f, 0x37, 0xaa, 0x13, 0xf0, 0xd7, 0x7f, 0x24, 0x94,
0xbd, 0xa1, 0xc8, 0xde, 0x6b, 0x01, 0x6d, 0xd3, 0x88, 0xd2, 0x99, 0x52, 0xa4, 0xc4,
0x67, 0x2b, 0x6c, 0xe8,
];
run_std(Params::argon2i(), &EXPECTED)
}
#[test]
fn argon2id_rfc9106() {
const EXPECTED: [u8; 32] = [
0x0d, 0x64, 0x0d, 0xf5, 0x8d, 0x78, 0x76, 0x6c, 0x08, 0xc0, 0x37, 0xa3, 0x4a, 0x8b,
0x53, 0xc9, 0xd0, 0x1e, 0xf0, 0x45, 0x2d, 0x75, 0xb6, 0x5e, 0xb5, 0x25, 0x20, 0xe9,
0x6b, 0x01, 0xe6, 0x59,
];
run_std(Params::argon2id(), &EXPECTED);
}
}