#![allow(clippy::ptr_offset_with_cast)]
use arrayref::{array_mut_ref, array_ref, array_refs, mut_array_refs};
#[cfg(target_arch = "aarch64")]
mod aarch64_simd_transpose;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86_simd_transpose;
pub const MD4_SIZE: usize = 16;
const S: [u32; 4] = [0x6745_2301, 0xEFCD_AB89, 0x98BA_DCFE, 0x1032_5476];
macro_rules! md4 {
(
($($prefix:tt)*),
$u32:ty,
add = $add:path,
and = $and:path,
or = $or:path,
andnot = $andnot:path,
xor = $xor:path,
rol = ($($rol:tt)*),
splat = $splat:path,
) => {
#[derive(Copy, Clone)]
struct Md4State {
s: [$u32; 4],
}
#[allow(unused_parens)]
impl Md4State {
$($prefix)*
fn process_block(&mut self, data: &[$u32; 16]) {
macro_rules! f {
($x:expr, $y:expr, $z:expr) => ($or($and($x, $y), $andnot($x, $z)));
}
macro_rules! g {
($x:expr, $y:expr, $z:expr) => ($or($or($and($x, $y), $and($x, $z)), $and($y, $z)));
}
macro_rules! h {
($x:expr, $y:expr, $z:expr) => ($xor($xor($x, $y), $z));
}
macro_rules! op1 {
($a:expr, $b:expr, $c:expr, $d:expr, $k:expr, $s:expr) => (
$($rol)*($add($add($a, f!($b, $c, $d)), $k), $s)
);
}
macro_rules! op2 {
($a:expr, $b:expr, $c:expr, $d:expr, $k:expr, $s:expr) => (
$($rol)*($add($add($add($a, g!($b, $c, $d)), $k), $splat(0x5A82_7999)), $s)
);
}
macro_rules! op3 {
($a:expr, $b:expr, $c:expr, $d:expr, $k:expr, $s:expr) => (
$($rol)*($add($add($add($a, h!($b, $c, $d)), $k), $splat(0x6ED9_EBA1)), $s)
);
}
let mut a = self.s[0];
let mut b = self.s[1];
let mut c = self.s[2];
let mut d = self.s[3];
macro_rules! round1 {
($i:expr) => {
a = op1!(a, b, c, d, data[$i], 3);
d = op1!(d, a, b, c, data[$i + 1], 7);
c = op1!(c, d, a, b, data[$i + 2], 11);
b = op1!(b, c, d, a, data[$i + 3], 19);
}
}
round1!(0);
round1!(4);
round1!(8);
round1!(12);
macro_rules! round2 {
($i:expr) => {
a = op2!(a, b, c, d, data[$i], 3);
d = op2!(d, a, b, c, data[$i + 4], 5);
c = op2!(c, d, a, b, data[$i + 8], 9);
b = op2!(b, c, d, a, data[$i + 12], 13);
}
}
round2!(0);
round2!(1);
round2!(2);
round2!(3);
macro_rules! round3 {
($i:expr) => {
a = op3!(a, b, c, d, data[$i], 3);
d = op3!(d, a, b, c, data[$i + 8], 9);
c = op3!(c, d, a, b, data[$i + 4], 11);
b = op3!(b, c, d, a, data[$i + 12], 15);
}
}
round3!(0);
round3!(2);
round3!(1);
round3!(3);
self.s[0] = $add(self.s[0], a);
self.s[1] = $add(self.s[1], b);
self.s[2] = $add(self.s[2], c);
self.s[3] = $add(self.s[3], d);
}
}
};
}
use std::convert::identity;
use std::ops::{BitAnd, BitOr, BitXor};
fn andnot(x: u32, y: u32) -> u32 {
!x & y
}
md4!(
(),
u32,
add = u32::wrapping_add,
and = u32::bitand,
or = u32::bitor,
andnot = andnot,
xor = u32::bitxor,
rol = (u32::rotate_left),
splat = identity,
);
fn load_block(input: &[u8; 64]) -> [u32; 16] {
macro_rules! split {
($($name: ident $(. $dummy:tt)*)*) => ({
let ($($name),*) = array_refs![input, $(4 $($dummy)*),*];
[$(u32::from_le_bytes(*$name)),*]
});
}
split!(x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15)
}
pub fn md4(data: &[u8]) -> [u8; 16] {
let mut state = Md4State { s: S };
let mut chunks = data.chunks_exact(64);
for block in &mut chunks {
state.process_block(&load_block(array_ref![block, 0, 64]));
}
let remainder = chunks.remainder();
let mut last_blocks = [0; 128];
last_blocks[..remainder.len()].copy_from_slice(remainder);
last_blocks[remainder.len()] = 0x80;
let end = if remainder.len() >= 56 { 128 } else { 64 };
*array_mut_ref![&mut last_blocks, end - 8, 8] = (data.len() as u64 * 8).to_le_bytes();
let (last_block_0, last_block_1) = array_refs![&last_blocks, 64, 64];
state.process_block(&load_block(last_block_0));
if end == 128 {
state.process_block(&load_block(last_block_1));
}
let mut digest = [0; 16];
let (a, b, c, d) = mut_array_refs!(&mut digest, 4, 4, 4, 4);
*a = state.s[0].to_le_bytes();
*b = state.s[1].to_le_bytes();
*c = state.s[2].to_le_bytes();
*d = state.s[3].to_le_bytes();
digest
}
mod simd {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub const MAX_LANES: usize = 8;
#[cfg(any(target_arch = "aarch64"))]
pub const MAX_LANES: usize = 4;
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
pub const MAX_LANES: usize = 0;
pub struct Md4xN {
lanes: usize,
fun: fn(&[&[u8]]) -> [[u8; 16]; MAX_LANES],
}
impl Md4xN {
pub fn lanes(&self) -> usize {
self.lanes
}
pub fn md4(&self, data: &[&[u8]]) -> [[u8; 16]; MAX_LANES] {
(self.fun)(data)
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
mod real_impl {
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64 as arch;
#[cfg(target_arch = "x86")]
use std::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64 as arch;
macro_rules! n_lanes {
(
$u32xN:path,
$feature:tt,
$feature_enabled:expr,
load = $load:path,
add = $add:path,
and = $and:path,
or = $or:path,
andnot = $andnot:path,
xor = $xor:path,
rol = $rol:tt,
splat = $splat:path,
) => (
use crate::md4::S;
use crate::md4::simd::{Md4xN, MAX_LANES};
use arrayref::{array_ref, mut_array_refs};
use std::mem;
#[allow(non_camel_case_types)]
type u32xN = $u32xN;
pub const LANES: usize = mem::size_of::<u32xN>() / mem::size_of::<u32>();
md4!(
(#[target_feature(enable = $feature)] unsafe),
u32xN,
add = $add,
and = $and,
or = $or,
andnot = $andnot,
xor = $xor,
rol = $rol,
splat = $splat,
);
#[allow(non_snake_case)]
#[target_feature(enable = $feature)]
unsafe fn md4xN(data: &[&[u8]; LANES]) -> [[u8; 16]; LANES] {
let mut state = Md4State {
s: [
$splat(S[0]),
$splat(S[1]),
$splat(S[2]),
$splat(S[3]),
],
};
let len = data[0].len();
for ix in 1..LANES {
assert_eq!(len, data[ix].len());
}
for block in 0..(len / 64) {
let blocks = $load(|lane| array_ref![&data[lane], 64 * block, 64]);
state.process_block(&blocks);
}
let remainder = len % 64;
let bit_len = len as u64 * 8;
{
let mut padded = [[0; 64]; LANES];
for lane in 0..LANES {
padded[lane][..remainder].copy_from_slice(&data[lane][len - remainder..]);
padded[lane][remainder] = 0x80;
}
let mut blocks = $load(|lane| &padded[lane]);
if remainder < 56 {
blocks[14] = $splat(bit_len as u32);
blocks[15] = $splat((bit_len >> 32) as u32);
}
state.process_block(&blocks);
}
if remainder >= 56 {
let mut blocks = [$splat(0); 16];
blocks[14] = $splat(bit_len as u32);
blocks[15] = $splat((bit_len >> 32) as u32);
state.process_block(&blocks);
}
let mut digests = [[0; 16]; LANES];
let final_state = mem::transmute::<[u32xN; 4], [[u32; LANES]; 4]>(state.s);
for lane in 0..LANES {
let (a, b, c, d) = mut_array_refs!(&mut digests[lane], 4, 4, 4, 4);
*a = final_state[0][lane].to_le_bytes();
*b = final_state[1][lane].to_le_bytes();
*c = final_state[2][lane].to_le_bytes();
*d = final_state[3][lane].to_le_bytes();
}
digests
}
pub fn select() -> Option<Md4xN> {
if $feature_enabled {
Some(Md4xN {
lanes: LANES,
fun: |data| {
let mut ret = [[0; 16]; MAX_LANES];
let (prefix, _) = mut_array_refs!(&mut ret, LANES, MAX_LANES-LANES);
*prefix = unsafe { md4xN(array_ref![data, 0, LANES]) };
ret
}
})
} else {
None
}
}
);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod lanes_4 {
#[inline(always)]
unsafe fn splat(x: u32) -> super::arch::__m128i {
super::arch::_mm_set1_epi32(x as i32)
}
macro_rules! rotate_left {
($x: expr, $shift: expr) => {{
let x = $x;
super::arch::_mm_or_si128(
super::arch::_mm_slli_epi32(x, $shift as i32),
super::arch::_mm_srli_epi32(x, 32 - $shift as i32),
)
}};
}
n_lanes!(
super::arch::__m128i,
"sse2",
is_x86_feature_detected!("sse2"),
load = crate::md4::x86_simd_transpose::load_16x4_sse2,
add = super::arch::_mm_add_epi32,
and = super::arch::_mm_and_si128,
or = super::arch::_mm_or_si128,
andnot = super::arch::_mm_andnot_si128,
xor = super::arch::_mm_xor_si128,
rol = (rotate_left!),
splat = splat,
);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod lanes_8 {
#[inline(always)]
unsafe fn splat(x: u32) -> super::arch::__m256i {
super::arch::_mm256_set1_epi32(x as i32)
}
macro_rules! rotate_left {
($x: expr, $shift: expr) => {{
let x = $x;
super::arch::_mm256_or_si256(
super::arch::_mm256_slli_epi32(x, $shift as i32),
super::arch::_mm256_srli_epi32(x, 32 - $shift as i32),
)
}};
}
n_lanes!(
super::arch::__m256i,
"avx2",
is_x86_feature_detected!("avx2"),
load = crate::md4::x86_simd_transpose::load_16x8_avx2,
add = super::arch::_mm256_add_epi32,
and = super::arch::_mm256_and_si256,
or = super::arch::_mm256_or_si256,
andnot = super::arch::_mm256_andnot_si256,
xor = super::arch::_mm256_xor_si256,
rol = (rotate_left!),
splat = splat,
);
}
#[cfg(target_arch = "aarch64")]
mod lanes_4 {
macro_rules! rotate_left {
($x: expr, $shift: expr) => {{
let x = $x;
super::arch::vorrq_u32(
super::arch::vshlq_n_u32::<{ $shift as i32 }>(x),
super::arch::vshrq_n_u32::<{ 32 - $shift as i32 }>(x),
)
}};
}
#[inline(always)]
unsafe fn andnot(
a: super::arch::uint32x4_t,
b: super::arch::uint32x4_t,
) -> super::arch::uint32x4_t {
super::arch::vbicq_u32(b, a)
}
n_lanes!(
super::arch::uint32x4_t,
"neon",
std::arch::is_aarch64_feature_detected!("neon"),
load = crate::md4::aarch64_simd_transpose::load_16x4,
add = super::arch::vaddq_u32,
and = super::arch::vandq_u32,
or = super::arch::vorrq_u32,
andnot = andnot,
xor = super::arch::veorq_u32,
rol = (rotate_left!),
splat = super::arch::vdupq_n_u32,
);
}
use super::Md4xN;
impl Md4xN {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn select() -> Option<Md4xN> {
lanes_8::select().or_else(lanes_4::select)
}
#[cfg(target_arch = "aarch64")]
pub fn select() -> Option<Md4xN> {
lanes_4::select()
}
}
}
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
mod no_simd {
use super::Md4xN;
impl Md4xN {
pub fn select() -> Option<Md4xN> {
None
}
}
}
}
pub fn md4_many<'a>(
datas: impl ExactSizeIterator<Item = &'a [u8]>,
) -> impl ExactSizeIterator<Item = (&'a [u8], [u8; 16])> {
struct SimdImpl<'a> {
simd_impl: simd::Md4xN,
buf: [(&'a [u8], [u8; 16]); simd::MAX_LANES],
buf_len: usize,
}
struct It<'a, I: Iterator<Item = &'a [u8]>> {
len: usize,
inner: I,
simd: Option<SimdImpl<'a>>,
}
impl<'a, I: Iterator<Item = &'a [u8]>> Iterator for It<'a, I> {
type Item = (&'a [u8], [u8; 16]);
#[allow(clippy::needless_range_loop)]
fn next(&mut self) -> Option<Self::Item> {
if let Some(simd) = &mut self.simd {
if simd.buf_len == 0 && self.len >= simd.simd_impl.lanes() {
let mut datas: [&[u8]; simd::MAX_LANES] = [&[]; simd::MAX_LANES];
for ix in 0..simd.simd_impl.lanes() {
datas[ix] = self.inner.next().unwrap();
}
self.len -= simd.simd_impl.lanes();
let digests = simd.simd_impl.md4(&datas);
simd.buf_len = simd.simd_impl.lanes();
for lane in 0..simd.simd_impl.lanes() {
simd.buf[lane] = (datas[lane], digests[lane]);
}
}
if simd.buf_len > 0 {
let digest = simd.buf[simd.simd_impl.lanes() - simd.buf_len];
simd.buf_len -= 1;
return Some(digest);
}
}
self.inner.next().map(|data| {
self.len -= 1;
(data, md4(data))
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.len, Some(self.len))
}
}
impl<'a, I: Iterator<Item = &'a [u8]>> ExactSizeIterator for It<'a, I> {
fn len(&self) -> usize {
self.len
}
}
It {
len: datas.len(),
inner: datas,
simd: simd::Md4xN::select().map(|simd_impl| SimdImpl {
simd_impl,
buf: [(&[] as &[_], [0; 16]); simd::MAX_LANES],
buf_len: 0,
}),
}
}
#[test]
fn tests() {
let test_vectors: &[(&[u8], [u8; 16])] = &[
(
b"",
*b"\x31\xd6\xcf\xe0\xd1\x6a\xe9\x31\xb7\x3c\x59\xd7\xe0\xc0\x89\xc0",
),
(
b"a",
*b"\xbd\xe5\x2c\xb3\x1d\xe3\x3e\x46\x24\x5e\x05\xfb\xdb\xd6\xfb\x24",
),
(
b"abc",
*b"\xa4\x48\x01\x7a\xaf\x21\xd8\x52\x5f\xc1\x0a\xe8\x7a\xa6\x72\x9d",
),
(
b"message digest",
*b"\xd9\x13\x0a\x81\x64\x54\x9f\xe8\x18\x87\x48\x06\xe1\xc7\x01\x4b",
),
(
b"abcdefghijklmnopqrstuvwxyz",
*b"\xd7\x9e\x1c\x30\x8a\xa5\xbb\xcd\xee\xa8\xed\x63\xdf\x41\x2d\xa9",
),
(
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
*b"\x04\x3f\x85\x82\xf2\x41\xdb\x35\x1c\xe6\x27\xe1\x53\xe7\xf0\xe4",
),
(
b"12345678901234567890123456789012345678901234567890123456789012345678901234567890",
*b"\xe3\x3b\x4d\xdc\x9c\x38\xf2\x19\x9c\x3e\x7b\x16\x4f\xcc\x05\x36",
),
];
for &(msg, expected) in test_vectors {
assert_eq!(md4(msg), expected);
if let Some(simd_impl) = simd::Md4xN::select() {
assert_eq!(
simd_impl.md4(&vec![msg; simd_impl.lanes()])[..simd_impl.lanes()],
vec![expected; simd_impl.lanes()][..]
);
}
if msg.len() > 0 {
let tail = &msg[1..];
let tail_md4 = md4(tail);
if let Some(simd_impl) = simd::Md4xN::select() {
assert_eq!(
simd_impl.md4(&vec![tail; simd_impl.lanes()])[..simd_impl.lanes()],
vec![tail_md4; simd_impl.lanes()][..]
);
}
}
}
}