sha1 0.11.0

SHA-1 hash function
Documentation
//! LoongArch64 assembly backend

use crate::consts::K;
use core::arch::asm;

#[cfg(not(target_arch = "loongarch64"))]
compile_error!("loongarch-asm backend can be used only on loongarch64 target arches");

macro_rules! c {
    ($($l:expr)*) => {
        concat!($($l ,)*)
    };
}

macro_rules! round0a {
    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
        c!(
            "ld.w    $t5, $a1, (" $i " * 4);"
            "revb.2h $t5, $t5;"
            "rotri.w $t5, $t5, 16;"
            "add.w " $e ", " $e ", $t5;"
            "st.w    $t5, $sp, (" $i " * 4);"
            "xor     $t5, " $c "," $d ";"
            "and     $t5, $t5, " $b ";"
            "xor     $t5, $t5, " $d ";"
            roundtail!($a, $b, $e, $i, "$a4")
        )
    };
}

macro_rules! scheldule {
    ($i:literal, $e:literal) => {
        c!(
            "ld.w    $t5, $sp, (((" $i " - 3) & 0xF) * 4);"
            "ld.w    $t6, $sp, (((" $i " - 8) & 0xF) * 4);"
            "ld.w    $t7, $sp, (((" $i " - 14) & 0xF) * 4);"
            "ld.w    $t8, $sp, (((" $i " - 16) & 0xF) * 4);"
            "xor     $t5, $t5, $t6;"
            "xor     $t5, $t5, $t7;"
            "xor     $t5, $t5, $t8;"
            "rotri.w $t5, $t5, 31;"
            "add.w " $e "," $e ", $t5;"
            "st.w    $t5, $sp, ((" $i " & 0xF) * 4);"
        )
    };
}

macro_rules! round0b {
    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
        c!(
            scheldule!($i, $e)
            "xor     $t5," $c "," $d ";"
            "and     $t5, $t5," $b ";"
            "xor     $t5, $t5," $d ";"
            roundtail!($a, $b, $e, $i, "$a4")
        )
    };
}

macro_rules! round1 {
    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
        c!(
            scheldule!($i, $e)
            "xor     $t5," $b "," $c ";"
            "xor     $t5, $t5," $d ";"
            roundtail!($a, $b, $e, $i, "$a5")
        )
    };
}

macro_rules! round2 {
    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
        c!(
            scheldule!($i, $e)
            "or      $t5," $c "," $d ";"
            "and     $t5, $t5, " $b ";"
            "and     $t7," $c "," $d ";"
            "or      $t5, $t5, $t7;"
            roundtail!($a, $b, $e, $i, "$a6")
        )
    };
}

macro_rules! round3 {
    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
        c!(
            scheldule!($i, $e)
            "xor     $t5," $b "," $c ";"
            "xor     $t5, $t5," $d ";"
            roundtail!($a, $b, $e, $i, "$a7")
        )
    };
}

macro_rules! roundtail {
    ($a:literal, $b:literal, $e:literal, $i:literal, $k:literal) => {
        c!(
            "rotri.w " $b "," $b ", 2;"
            "add.w " $e "," $e ", $t5;"
            "add.w " $e "," $e "," $k ";"
            "rotri.w $t5," $a ", 27;"
            "add.w " $e "," $e ", $t5;"
        )
    };
}

pub(crate) fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
    if blocks.is_empty() {
        return;
    }

    unsafe {
        asm!(
            // Allocate scratch stack space
            "addi.d  $sp, $sp, -64;",

            // Load state
            "ld.w    $t0, $a0, 0",
            "ld.w    $t1, $a0, 4",
            "ld.w    $t2, $a0, 8",
            "ld.w    $t3, $a0, 12",
            "ld.w    $t4, $a0, 16",

            "42:",

            round0a!("$t0", "$t1", "$t2", "$t3", "$t4",  0),
            round0a!("$t4", "$t0", "$t1", "$t2", "$t3",  1),
            round0a!("$t3", "$t4", "$t0", "$t1", "$t2",  2),
            round0a!("$t2", "$t3", "$t4", "$t0", "$t1",  3),
            round0a!("$t1", "$t2", "$t3", "$t4", "$t0",  4),
            round0a!("$t0", "$t1", "$t2", "$t3", "$t4",  5),
            round0a!("$t4", "$t0", "$t1", "$t2", "$t3",  6),
            round0a!("$t3", "$t4", "$t0", "$t1", "$t2",  7),
            round0a!("$t2", "$t3", "$t4", "$t0", "$t1",  8),
            round0a!("$t1", "$t2", "$t3", "$t4", "$t0",  9),
            round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 10),
            round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 11),
            round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 12),
            round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 13),
            round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 14),
            round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 15),
            round0b!("$t4", "$t0", "$t1", "$t2", "$t3", 16),
            round0b!("$t3", "$t4", "$t0", "$t1", "$t2", 17),
            round0b!("$t2", "$t3", "$t4", "$t0", "$t1", 18),
            round0b!("$t1", "$t2", "$t3", "$t4", "$t0", 19),
            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 20),
            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 21),
            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 22),
            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 23),
            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 24),
            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 25),
            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 26),
            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 27),
            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 28),
            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 29),
            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 30),
            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 31),
            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 32),
            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 33),
            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 34),
            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 35),
            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 36),
            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 37),
            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 38),
            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 39),
            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 40),
            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 41),
            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 42),
            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 43),
            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 44),
            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 45),
            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 46),
            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 47),
            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 48),
            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 49),
            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 50),
            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 51),
            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 52),
            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 53),
            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 54),
            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 55),
            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 56),
            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 57),
            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 58),
            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 59),
            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 60),
            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 61),
            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 62),
            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 63),
            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 64),
            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 65),
            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 66),
            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 67),
            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 68),
            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 69),
            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 70),
            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 71),
            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 72),
            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 73),
            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 74),
            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 75),
            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 76),
            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 77),
            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 78),
            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 79),

            // Update state registers
            "ld.w    $t5, $a0, 0",  // a
            "ld.w    $t6, $a0, 4",  // b
            "ld.w    $t7, $a0, 8",  // c
            "ld.w    $t8, $a0, 12", // d
            "add.w   $t0, $t0, $t5",
            "ld.w    $t5, $a0, 16", // e
            "add.w   $t1, $t1, $t6",
            "add.w   $t2, $t2, $t7",
            "add.w   $t3, $t3, $t8",
            "add.w   $t4, $t4, $t5",

            // Save updated state
            "st.w    $t0, $a0, 0",
            "st.w    $t1, $a0, 4",
            "st.w    $t2, $a0, 8",
            "st.w    $t3, $a0, 12",
            "st.w    $t4, $a0, 16",

            // Looping over blocks
            "addi.d  $a1, $a1, 64",
            "addi.d  $a2, $a2, -1",
            "bnez    $a2, 42b",

            // Restore stack register
            "addi.d  $sp, $sp, 64",

            in("$a0") state,
            inout("$a1") blocks.as_ptr() => _,
            inout("$a2") blocks.len() => _,

            in("$a4") K[0],
            in("$a5") K[1],
            in("$a6") K[2],
            in("$a7") K[3],

            // Clobbers
            out("$t0") _,
            out("$t1") _,
            out("$t2") _,
            out("$t3") _,
            out("$t4") _,
            out("$t5") _,
            out("$t6") _,
            out("$t7") _,
            out("$t8") _,

            options(preserves_flags),
        );
    }
}