#![no_std]
#![allow(clippy::cast_possible_truncation)]
#![allow(unsafe_code)]
extern crate alloc;
pub mod base64;
pub mod crc32;
pub mod hmac;
pub mod lzss;
pub mod pbkdf2;
pub mod sha256;
use alloc::vec::Vec;
pub const OUT_LEN: usize = 32;
const BLOCK_LEN: usize = 64;
const CHUNK_LEN: usize = 1024;
const CHUNK_START: u32 = 1;
const CHUNK_END: u32 = 2;
const PARENT: u32 = 4;
const ROOT: u32 = 8;
const IV: [u32; 8] = [
0x6A09_E667,
0xBB67_AE85,
0x3C6E_F372,
0xA54F_F53A,
0x510E_527F,
0x9B05_688C,
0x1F83_D9AB,
0x5BE0_CD19,
];
const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];
#[cfg(all(target_arch = "aarch64", test))]
mod neon {
use super::{IV, MSG_PERMUTATION};
use core::arch::aarch64::{
uint32x4_t, vaddq_u32, veorq_u32, vextq_u32, vld1q_u32, vld2q_u32, vsetq_lane_u32,
vshlq_n_u32, vshrq_n_u32, vst1q_u32,
};
#[inline]
unsafe fn vrotr16(x: uint32x4_t) -> uint32x4_t {
unsafe { veorq_u32(vshrq_n_u32::<16>(x), vshlq_n_u32::<16>(x)) }
}
#[inline]
unsafe fn vrotr12(x: uint32x4_t) -> uint32x4_t {
unsafe { veorq_u32(vshrq_n_u32::<12>(x), vshlq_n_u32::<20>(x)) }
}
#[inline]
unsafe fn vrotr8(x: uint32x4_t) -> uint32x4_t {
unsafe { veorq_u32(vshrq_n_u32::<8>(x), vshlq_n_u32::<24>(x)) }
}
#[inline]
unsafe fn vrotr7(x: uint32x4_t) -> uint32x4_t {
unsafe { veorq_u32(vshrq_n_u32::<7>(x), vshlq_n_u32::<25>(x)) }
}
#[inline]
unsafe fn g(
a: &mut uint32x4_t,
b: &mut uint32x4_t,
c: &mut uint32x4_t,
d: &mut uint32x4_t,
mx: uint32x4_t,
my: uint32x4_t,
) {
unsafe {
*a = vaddq_u32(vaddq_u32(*a, *b), mx);
*d = vrotr16(veorq_u32(*d, *a));
*c = vaddq_u32(*c, *d);
*b = vrotr12(veorq_u32(*b, *c));
*a = vaddq_u32(vaddq_u32(*a, *b), my);
*d = vrotr8(veorq_u32(*d, *a));
*c = vaddq_u32(*c, *d);
*b = vrotr7(veorq_u32(*b, *c));
}
}
#[inline]
unsafe fn one_round(
v0: &mut uint32x4_t,
v1: &mut uint32x4_t,
v2: &mut uint32x4_t,
v3: &mut uint32x4_t,
m: &[u32; 16],
) {
unsafe {
let pair = vld2q_u32(m.as_ptr());
g(v0, v1, v2, v3, pair.0, pair.1);
let v1r = vextq_u32::<1>(*v1, *v1);
let v2r = vextq_u32::<2>(*v2, *v2);
let v3r = vextq_u32::<3>(*v3, *v3);
let mut v1r = v1r;
let mut v2r = v2r;
let mut v3r = v3r;
let pair = vld2q_u32(m[8..].as_ptr());
g(v0, &mut v1r, &mut v2r, &mut v3r, pair.0, pair.1);
*v1 = vextq_u32::<3>(v1r, v1r);
*v2 = vextq_u32::<2>(v2r, v2r);
*v3 = vextq_u32::<1>(v3r, v3r);
}
}
#[target_feature(enable = "neon")]
pub unsafe fn compress(
chaining_value: &[u32; 8],
block_words: &[u32; 16],
counter: u64,
block_len: u32,
flags: u32,
) -> [u32; 16] {
unsafe {
let mut v0 = vld1q_u32(chaining_value.as_ptr());
let mut v1 = vld1q_u32(chaining_value[4..].as_ptr());
let mut v2 = vld1q_u32(IV.as_ptr());
let mut v3 = vsetq_lane_u32::<0>(counter as u32, vld1q_u32(IV[4..].as_ptr()));
v3 = vsetq_lane_u32::<1>((counter >> 32) as u32, v3);
v3 = vsetq_lane_u32::<2>(block_len, v3);
v3 = vsetq_lane_u32::<3>(flags, v3);
let mut block = *block_words;
for round_idx in 0..7 {
one_round(&mut v0, &mut v1, &mut v2, &mut v3, &block);
if round_idx < 6 {
let original = block;
for i in 0..16 {
block[i] = original[MSG_PERMUTATION[i]];
}
}
}
v0 = veorq_u32(v0, v2);
v1 = veorq_u32(v1, v3);
v2 = veorq_u32(v2, vld1q_u32(chaining_value.as_ptr()));
v3 = veorq_u32(v3, vld1q_u32(chaining_value[4..].as_ptr()));
let mut out = [0u32; 16];
vst1q_u32(out.as_mut_ptr(), v0);
vst1q_u32(out[4..].as_mut_ptr(), v1);
vst1q_u32(out[8..].as_mut_ptr(), v2);
vst1q_u32(out[12..].as_mut_ptr(), v3);
out
}
}
}
#[inline]
fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32) {
state[a] = state[a].wrapping_add(state[b]).wrapping_add(mx);
state[d] = (state[d] ^ state[a]).rotate_right(16);
state[c] = state[c].wrapping_add(state[d]);
state[b] = (state[b] ^ state[c]).rotate_right(12);
state[a] = state[a].wrapping_add(state[b]).wrapping_add(my);
state[d] = (state[d] ^ state[a]).rotate_right(8);
state[c] = state[c].wrapping_add(state[d]);
state[b] = (state[b] ^ state[c]).rotate_right(7);
}
fn round(state: &mut [u32; 16], m: &[u32; 16]) {
g(state, 0, 4, 8, 12, m[0], m[1]);
g(state, 1, 5, 9, 13, m[2], m[3]);
g(state, 2, 6, 10, 14, m[4], m[5]);
g(state, 3, 7, 11, 15, m[6], m[7]);
g(state, 0, 5, 10, 15, m[8], m[9]);
g(state, 1, 6, 11, 12, m[10], m[11]);
g(state, 2, 7, 8, 13, m[12], m[13]);
g(state, 3, 4, 9, 14, m[14], m[15]);
}
fn permute(m: &mut [u32; 16]) {
let original = *m;
for i in 0..16 {
m[i] = original[MSG_PERMUTATION[i]];
}
}
fn compress(
chaining_value: &[u32; 8],
block_words: &[u32; 16],
counter: u64,
block_len: u32,
flags: u32,
) -> [u32; 16] {
compress_scalar(chaining_value, block_words, counter, block_len, flags)
}
fn compress_scalar(
chaining_value: &[u32; 8],
block_words: &[u32; 16],
counter: u64,
block_len: u32,
flags: u32,
) -> [u32; 16] {
let mut state = [
chaining_value[0],
chaining_value[1],
chaining_value[2],
chaining_value[3],
chaining_value[4],
chaining_value[5],
chaining_value[6],
chaining_value[7],
IV[0],
IV[1],
IV[2],
IV[3],
counter as u32,
(counter >> 32) as u32,
block_len,
flags,
];
let mut block = *block_words;
round(&mut state, &block); permute(&mut block);
round(&mut state, &block); permute(&mut block);
round(&mut state, &block); permute(&mut block);
round(&mut state, &block); permute(&mut block);
round(&mut state, &block); permute(&mut block);
round(&mut state, &block); permute(&mut block);
round(&mut state, &block);
for i in 0..8 {
state[i] ^= state[i + 8];
state[i + 8] ^= chaining_value[i];
}
state
}
fn words_from_le_bytes(bytes: &[u8; BLOCK_LEN]) -> [u32; 16] {
let mut m = [0u32; 16];
for (i, chunk) in bytes.chunks_exact(4).enumerate() {
m[i] = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
}
m
}
fn bytes_from_le_words(words: &[u32; 8]) -> [u8; OUT_LEN] {
let mut out = [0u8; OUT_LEN];
for (i, w) in words.iter().enumerate() {
out[i * 4..(i + 1) * 4].copy_from_slice(&w.to_le_bytes());
}
out
}
fn hash_chunk(input: &[u8], chunk_counter: u64, is_root: bool, base_flags: u32) -> [u32; 8] {
debug_assert!(input.len() <= CHUNK_LEN);
let block_count = if input.is_empty() {
1
} else {
input.len().div_ceil(BLOCK_LEN)
};
let mut cv = IV;
for b_idx in 0..block_count {
let start = b_idx * BLOCK_LEN;
let end = core::cmp::min(start + BLOCK_LEN, input.len());
let mut block = [0u8; BLOCK_LEN];
if end > start {
block[..end - start].copy_from_slice(&input[start..end]);
}
let block_words = words_from_le_bytes(&block);
let block_len = (end - start) as u32;
let mut flags = base_flags;
if b_idx == 0 {
flags |= CHUNK_START;
}
if b_idx == block_count - 1 {
flags |= CHUNK_END;
if is_root {
flags |= ROOT;
}
}
let state = compress(&cv, &block_words, chunk_counter, block_len, flags);
cv.copy_from_slice(&state[..8]);
}
cv
}
fn parent_cv(left: &[u32; 8], right: &[u32; 8], is_root: bool, base_flags: u32) -> [u32; 8] {
let mut block_words = [0u32; 16];
block_words[..8].copy_from_slice(left);
block_words[8..].copy_from_slice(right);
let mut flags = base_flags | PARENT;
if is_root {
flags |= ROOT;
}
let state = compress(&IV, &block_words, 0, BLOCK_LEN as u32, flags);
let mut cv = [0u32; 8];
cv.copy_from_slice(&state[..8]);
cv
}
fn hash_subtree(input: &[u8], chunk_counter_base: u64, base_flags: u32) -> [u32; 8] {
if input.len() <= CHUNK_LEN {
return hash_chunk(input, chunk_counter_base, false, base_flags);
}
let total_chunks = input.len().div_ceil(CHUNK_LEN);
let left_chunks = largest_power_of_two_leq(total_chunks - 1);
let left_len = left_chunks * CHUNK_LEN;
let left = &input[..left_len];
let right = &input[left_len..];
let left_cv = hash_subtree(left, chunk_counter_base, base_flags);
let right_cv = hash_subtree(right, chunk_counter_base + left_chunks as u64, base_flags);
parent_cv(&left_cv, &right_cv, false, base_flags)
}
fn largest_power_of_two_leq(n: usize) -> usize {
debug_assert!(n >= 1);
let bits = usize::BITS - 1 - n.leading_zeros();
1usize << bits
}
pub fn hash(input: &[u8]) -> [u8; OUT_LEN] {
let base_flags: u32 = 0;
if input.len() <= CHUNK_LEN {
let cv = hash_chunk(input, 0, true, base_flags);
return bytes_from_le_words(&cv);
}
let total_chunks = input.len().div_ceil(CHUNK_LEN);
let left_chunks = largest_power_of_two_leq(total_chunks - 1);
let left_len = left_chunks * CHUNK_LEN;
let left = &input[..left_len];
let right = &input[left_len..];
let left_cv = hash_subtree(left, 0, base_flags);
let right_cv = hash_subtree(right, left_chunks as u64, base_flags);
let root_cv = parent_cv(&left_cv, &right_cv, true, base_flags);
bytes_from_le_words(&root_cv)
}
pub fn hex(digest: &[u8; OUT_LEN]) -> alloc::string::String {
const HEX: &[u8; 16] = b"0123456789abcdef";
let mut out = Vec::with_capacity(OUT_LEN * 2);
for &b in digest {
out.push(HEX[(b >> 4) as usize]);
out.push(HEX[(b & 0x0F) as usize]);
}
alloc::string::String::from_utf8(out).expect("hex output is ASCII")
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::string::String;
fn h(s: &str) -> String {
hex(&hash(s.as_bytes()))
}
#[test]
fn empty_input_matches_blake3_kat() {
assert_eq!(
h(""),
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
);
}
#[test]
fn abc_matches_blake3_kat() {
assert_eq!(
h("abc"),
"6437b3ac38465133ffb63b75273a8db548c558465d79db03fd359c6cd5bd9d85"
);
}
#[cfg(target_arch = "aarch64")]
#[test]
fn neon_matches_scalar() {
let cv = IV;
let block = [0xAA55_AA55u32; 16];
for counter in [0u64, 1, 0xFFFF_FFFFu64, u64::MAX] {
for &flags in &[0u32, CHUNK_START, CHUNK_END, ROOT, PARENT] {
for &block_len in &[0u32, 1, 32, 64] {
let s = compress_scalar(&cv, &block, counter, block_len, flags);
let n = unsafe { neon::compress(&cv, &block, counter, block_len, flags) };
assert_eq!(
s, n,
"scalar vs NEON mismatch at counter={counter} flags={flags} block_len={block_len}"
);
}
}
}
assert_eq!(
h(""),
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
);
assert_eq!(
h("abc"),
"6437b3ac38465133ffb63b75273a8db548c558465d79db03fd359c6cd5bd9d85"
);
}
#[test]
fn deterministic() {
let input = b"hello world";
assert_eq!(hash(input), hash(input));
}
#[test]
fn one_byte_difference_changes_hash() {
assert_ne!(hash(b"abc"), hash(b"abd"));
}
#[test]
fn largest_power_of_two_helper() {
assert_eq!(largest_power_of_two_leq(1), 1);
assert_eq!(largest_power_of_two_leq(2), 2);
assert_eq!(largest_power_of_two_leq(3), 2);
assert_eq!(largest_power_of_two_leq(4), 4);
assert_eq!(largest_power_of_two_leq(7), 4);
assert_eq!(largest_power_of_two_leq(8), 8);
assert_eq!(largest_power_of_two_leq(1023), 512);
}
}