sha2-asm 0.6.4

Assembly implementation of SHA-2 compression functions
Documentation
/*
 * SHA-256 hash in AArch64 assembly for macos/M1
 *
 * Based on the following C intrinsics implementation:
 * <https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-arm.c>
 *
 * Original C written and placed in public domain by Jeffrey Walton.
 * Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and
 * Barry O'Rourke for the mbedTLS project.
 */


/* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */
.global _sha256_compress
_sha256_compress:
        mov     x8, #0
        ldp     q0, q1, [x0]
        ldp     q2, q3, [x1]
        ldp     q4, q5, [x1, #32]
        stp     q2, q3, [sp, #-64]!
        stp     q4, q5, [sp, #32]
        mov     x9, sp
LBB0_1:
        ldr     q2, [x9, x8]
        rev32.16b       v2, v2
        str     q2, [x9, x8]
        add     x8, x8, #16
        cmp     x8, #64
        b.ne    LBB0_1
        adrp    x8, K_0@PAGE
        ldr     q2, [x8, K_0@PAGEOFF]
        ldp     q6, q4, [sp]
        add.4s  v3, v6, v2

        // Rounds 0-3
        sha256su0.4s    v6, v4
        adrp    x8, K_1@PAGE
        ldr     q2, [x8, K_1@PAGEOFF]
        add.4s  v7, v4, v2
        mov.16b v16, v0
        sha256h.4s      q16, q1, v3
        mov.16b v2, v1
        sha256h2.4s     q2, q0, v3
        ldp     q5, q3, [sp, #32]
        sha256su1.4s    v6, v5, v3

        // Rounds 4-7
        sha256su0.4s    v4, v5
        adrp    x8, K_2@PAGE
        ldr     q17, [x8, K_2@PAGEOFF]
        add.4s  v17, v5, v17
        mov.16b v18, v16
        sha256h.4s      q18, q2, v7
        sha256h2.4s     q2, q16, v7
        sha256su1.4s    v4, v3, v6

        // Rounds 8-11
        sha256su0.4s    v5, v3
        adrp    x8, K_3@PAGE
        ldr     q7, [x8, K_3@PAGEOFF]
        add.4s  v7, v3, v7
        mov.16b v16, v18
        sha256h.4s      q16, q2, v17
        sha256h2.4s     q2, q18, v17
        sha256su1.4s    v5, v6, v4

        // Rounds 12-15
        sha256su0.4s    v3, v6
        adrp    x8, K_4@PAGE
        ldr     q17, [x8, K_4@PAGEOFF]
        add.4s  v17, v6, v17
        mov.16b v18, v16
        sha256h.4s      q18, q2, v7
        sha256h2.4s     q2, q16, v7
        sha256su1.4s    v3, v4, v5

        // Rounds 16-19
        sha256su0.4s    v6, v4
        adrp    x8, K_5@PAGE
        ldr     q7, [x8, K_5@PAGEOFF]
        add.4s  v7, v4, v7
        mov.16b v16, v18
        sha256h.4s      q16, q2, v17
        sha256h2.4s     q2, q18, v17
        sha256su1.4s    v6, v5, v3

        // Rounds 20-23
        sha256su0.4s    v4, v5
        adrp    x8, K_6@PAGE
        ldr     q17, [x8, K_6@PAGEOFF]
        add.4s  v17, v5, v17
        mov.16b v18, v16
        sha256h.4s      q18, q2, v7
        sha256h2.4s     q2, q16, v7
        sha256su1.4s    v4, v3, v6

        // Rounds 24-27
        sha256su0.4s    v5, v3
        adrp    x8, K_7@PAGE
        ldr     q7, [x8, K_7@PAGEOFF]
        add.4s  v7, v3, v7
        mov.16b v16, v18
        sha256h.4s      q16, q2, v17
        sha256h2.4s     q2, q18, v17
        sha256su1.4s    v5, v6, v4

        // Rounds 28-31
        sha256su0.4s    v3, v6
        adrp    x8, K_8@PAGE
        ldr     q17, [x8, K_8@PAGEOFF]
        add.4s  v17, v6, v17
        mov.16b v18, v16
        sha256h.4s      q18, q2, v7
        sha256h2.4s     q2, q16, v7
        sha256su1.4s    v3, v4, v5

        // Rounds 32-35
        sha256su0.4s    v6, v4
        adrp    x8, K_9@PAGE
        ldr     q7, [x8, K_9@PAGEOFF]
        add.4s  v7, v4, v7
        mov.16b v16, v18
        sha256h.4s      q16, q2, v17
        sha256h2.4s     q2, q18, v17
        sha256su1.4s    v6, v5, v3

        // Rounds 36-39
        sha256su0.4s    v4, v5
        adrp    x8, K_10@PAGE
        ldr     q17, [x8, K_10@PAGEOFF]
        add.4s  v17, v5, v17
        mov.16b v18, v16
        sha256h.4s      q18, q2, v7
        sha256h2.4s     q2, q16, v7
        sha256su1.4s    v4, v3, v6

        // Rounds 40-43
        sha256su0.4s    v5, v3
        adrp    x8, K_11@PAGE
        ldr     q7, [x8, K_11@PAGEOFF]
        add.4s  v7, v3, v7
        mov.16b v16, v18
        sha256h.4s      q16, q2, v17
        sha256h2.4s     q2, q18, v17
        sha256su1.4s    v5, v6, v4

        // Rounds 44-47
        sha256su0.4s    v3, v6
        adrp    x8, K_12@PAGE
        ldr     q17, [x8, K_12@PAGEOFF]
        add.4s  v6, v6, v17
        mov.16b v17, v16
        sha256h.4s      q17, q2, v7
        sha256h2.4s     q2, q16, v7
        sha256su1.4s    v3, v4, v5

        // Rounds 48-51
        adrp    x8, K_13@PAGE
        ldr     q7, [x8, K_13@PAGEOFF]
        add.4s  v4, v4, v7
        mov.16b v7, v17
        sha256h.4s      q7, q2, v6
        sha256h2.4s     q2, q17, v6

        // Rounds 52-55
        adrp    x8, K_14@PAGE
        ldr     q6, [x8, K_14@PAGEOFF]
        add.4s  v5, v5, v6
        mov.16b v6, v7
        sha256h.4s      q6, q2, v4
        sha256h2.4s     q2, q7, v4

        // Rounds 56-59
        adrp    x8, K_15@PAGE
        ldr     q4, [x8, K_15@PAGEOFF]
        add.4s  v3, v3, v4
        mov.16b v4, v6
        sha256h.4s      q4, q2, v5
        sha256h2.4s     q2, q6, v5
        
        // Rounds 60-63
        mov.16b v5, v4
        sha256h.4s      q5, q2, v3
        sha256h2.4s     q2, q4, v3

        // Update state
        add.4s  v0, v5, v0
        add.4s  v1, v2, v1

        // restore
        stp     q0, q1, [x0]
        add     sp, sp, #64

        ret


.align 4
K_0:
        .long   1116352408
        .long   1899447441
        .long   3049323471
        .long   3921009573
.align 4
K_1:
        .long   961987163
        .long   1508970993
        .long   2453635748
        .long   2870763221
.align 4
K_2:
        .long   3624381080
        .long   310598401
        .long   607225278
        .long   1426881987
.align 4
K_3:
        .long   1925078388
        .long   2162078206
        .long   2614888103
        .long   3248222580
.align 4
K_4:
        .long   3835390401
        .long   4022224774
        .long   264347078
        .long   604807628
.align 4
K_5:
        .long   770255983
        .long   1249150122
        .long   1555081692
        .long   1996064986
.align 4
K_6:
        .long   2554220882
        .long   2821834349
        .long   2952996808
        .long   3210313671
.align 4
K_7:
        .long   3336571891
        .long   3584528711
        .long   113926993
        .long   338241895
.align 4
K_8:
        .long   666307205
        .long   773529912
        .long   1294757372
        .long   1396182291
.align 4
K_9:
        .long   1695183700
        .long   1986661051
        .long   2177026350
        .long   2456956037
.align 4
K_10:
        .long   2730485921
        .long   2820302411
        .long   3259730800
        .long   3345764771
.align 4
K_11:
        .long   3516065817
        .long   3600352804
        .long   4094571909
        .long   275423344
.align 4
K_12:
        .long   430227734
        .long   506948616
        .long   659060556
        .long   883997877
.align 4
K_13:
        .long   958139571
        .long   1322822218
        .long   1537002063
        .long   1747873779
.align 4
K_14:
        .long   1955562222
        .long   2024104815
        .long   2227730452
        .long   2361852424
.align 4
K_15:
        .long   2428436474
        .long   2756734187
        .long   3204031479
        .long   3329325298