polyval 0.6.0

POLYVAL is a GHASH-like universal hash over GF(2^128) useful for constructing a Message Authentication Code (MAC)
Documentation
//! ARMv8 `PMULL`-accelerated implementation of POLYVAL.
//!
//! Based on this C intrinsics implementation:
//! <https://github.com/noloader/AES-Intrinsics/blob/master/clmul-arm.c>
//!
//! Original C written and placed in public domain by Jeffrey Walton.
//! Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and
//! Barry O'Rourke for the mbedTLS project.
//!
//! For more information about PMULL, see:
//! - <https://developer.arm.com/documentation/100069/0608/A64-SIMD-Vector-Instructions/PMULL--PMULL2--vector->
//! - <https://eprint.iacr.org/2015/688.pdf>

use crate::{Block, Key, Tag};
use core::{arch::aarch64::*, mem};
use universal_hash::{
    consts::{U1, U16},
    crypto_common::{BlockSizeUser, KeySizeUser, ParBlocksSizeUser},
    KeyInit, Reset, UhfBackend,
};

/// **POLYVAL**: GHASH-like universal hash over GF(2^128).
#[derive(Clone)]
pub struct Polyval {
    h: uint8x16_t,
    y: uint8x16_t,
}

impl KeySizeUser for Polyval {
    type KeySize = U16;
}

impl KeyInit for Polyval {
    /// Initialize POLYVAL with the given `H` field element
    fn new(h: &Key) -> Self {
        unsafe {
            Self {
                h: vld1q_u8(h.as_ptr()),
                y: vdupq_n_u8(0), // all zeroes
            }
        }
    }
}

impl BlockSizeUser for Polyval {
    type BlockSize = U16;
}

impl ParBlocksSizeUser for Polyval {
    type ParBlocksSize = U1;
}

impl UhfBackend for Polyval {
    fn proc_block(&mut self, x: &Block) {
        unsafe {
            self.mul(x);
        }
    }
}

impl Reset for Polyval {
    fn reset(&mut self) {
        unsafe {
            self.y = vdupq_n_u8(0);
        }
    }
}

impl Polyval {
    /// Mask value used when performing reduction.
    /// This corresponds to POLYVAL's polynomial with the highest bit unset.
    const MASK: u128 = 1 << 127 | 1 << 126 | 1 << 121 | 1;

    /// Get POLYVAL output.
    pub(crate) fn finalize(self) -> Tag {
        unsafe { mem::transmute(self.y) }
    }

    /// POLYVAL carryless multiplication.
    // TODO(tarcieri): investigate ordering optimizations and fusions e.g.`fuse-crypto-eor`
    #[inline]
    #[target_feature(enable = "neon")]
    unsafe fn mul(&mut self, x: &Block) {
        let h = self.h;
        let y = veorq_u8(self.y, vld1q_u8(x.as_ptr()));

        // polynomial multiply
        let z = vdupq_n_u8(0);
        let r0 = pmull::<0, 0>(h, y);
        let r1 = pmull::<1, 1>(h, y);
        let t0 = pmull::<0, 1>(h, y);
        let t1 = pmull::<1, 0>(h, y);
        let t0 = veorq_u8(t0, t1);
        let t1 = vextq_u8(z, t0, 8);
        let r0 = veorq_u8(r0, t1);
        let t1 = vextq_u8(t0, z, 8);
        let r1 = veorq_u8(r1, t1);

        // polynomial reduction
        let p = mem::transmute(Self::MASK);
        let t0 = pmull::<0, 1>(r0, p);
        let t1 = vextq_u8(t0, t0, 8);
        let r0 = veorq_u8(r0, t1);
        let t1 = pmull::<1, 1>(r0, p);
        let r0 = veorq_u8(r0, t1);

        self.y = veorq_u8(r0, r1);
    }
}

/// Wrapper for the ARM64 `PMULL` instruction.
#[inline(always)]
unsafe fn pmull<const A_LANE: i32, const B_LANE: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
    mem::transmute(vmull_p64(
        vgetq_lane_u64(vreinterpretq_u64_u8(a), A_LANE),
        vgetq_lane_u64(vreinterpretq_u64_u8(b), B_LANE),
    ))
}

// TODO(tarcieri): zeroize support
// #[cfg(feature = "zeroize")]
// impl Drop for Polyval {
//     fn drop(&mut self) {
//         use zeroize::Zeroize;
//         self.h.zeroize();
//         self.y.zeroize();
//     }
// }