sha2-asm 0.6.4

Assembly implementation of SHA-2 compression functions
Documentation
/* 
 * SHA-256 hash in x86 assembly
 * 
 * Copyright (c) 2014 Project Nayuki. (MIT License)
 * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 * the Software, and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 * - The above copyright notice and this permission notice shall be included in
 *   all copies or substantial portions of the Software.
 * - The Software is provided "as is", without warranty of any kind, express or
 *   implied, including but not limited to the warranties of merchantability,
 *   fitness for a particular purpose and noninfringement. In no event shall the
 *   authors or copyright holders be liable for any claim, damages or other
 *   liability, whether in an action of contract, tort or otherwise, arising from,
 *   out of or in connection with the Software or the use or other dealings in the
 *   Software.
 */


/* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */
#if defined(__APPLE__) || defined(_WIN32)
.globl _sha256_compress
_sha256_compress:
#else
.globl sha256_compress
sha256_compress:
#endif
    /* 
     * Storage usage:
     *   Bytes  Location   Description
     *       4  eax        Temporary for calculation per round
     *       4  ebx        Temporary for calculation per round
     *       4  ecx        Temporary for calculation per round
     *       4  edx        Temporary for calculation per round
     *       4  ebp        Temporary for calculation per round
     *       4  esi        (During state loading and update) base address of state array argument
     *                     (During hash rounds) temporary for calculation per round
     *       4  edi        Base address of block array argument (during key schedule loading rounds only)
     *       4  esp        x86 stack pointer
     *      32  [esp+  0]  SHA-256 state variables A,B,C,D,E,F,G,H (4 bytes each)
     *      64  [esp+ 32]  Key schedule of 16 * 4 bytes
     *       4  [esp+ 96]  Caller's value of ebx
     *       4  [esp+100]  Caller's value of esi
     *       4  [esp+104]  Caller's value of edi
     *       4  [esp+108]  Caller's value of ebp
     */
    
    #define SCHED(i)  ((((i)&0xF)+8)*4)(%esp)
    
    #define ROUNDa(i, a, b, c, d, e, f, g, h, k)  \
        movl    (i*4)(%edi), %ebp;  \
        bswapl  %ebp;               \
        movl    %ebp, SCHED(i);     \
        ROUNDTAIL(i, a, b, c, d, e, f, g, h, k)
    
    #define ROUNDb(i, a, b, c, d, e, f, g, h, k)  \
        movl  SCHED(i-15), %eax;  \
        movl  SCHED(i-16), %ebp;  \
        movl  %eax, %ebx;         \
        addl  SCHED(i- 7), %ebp;  \
        movl  %eax, %ecx;         \
        rorl  $18, %ebx;          \
        shrl  $3, %ecx;           \
        rorl  $7, %eax;           \
        xorl  %ecx, %ebx;         \
        xorl  %ebx, %eax;         \
        addl  %eax, %ebp;         \
        movl  SCHED(i- 2), %eax;  \
        movl  %eax, %ebx;         \
        movl  %eax, %ecx;         \
        rorl  $19, %ebx;          \
        shrl  $10, %ecx;          \
        rorl  $17, %eax;          \
        xorl  %ecx, %ebx;         \
        xorl  %ebx, %eax;         \
        addl  %eax, %ebp;         \
        movl  %ebp, SCHED(i);     \
        ROUNDTAIL(i, a, b, c, d, e, f, g, h, k)
    
    #define STATE(i)  (i*4)(%esp)
    
    #define ROUNDTAIL(i, a, b, c, d, e, f, g, h, k)  \
        /* Part 0 */               \
        movl  STATE(e), %eax;      \
        movl  %eax, %ebx;          \
        movl  %eax, %ecx;          \
        movl  %eax, %edx;          \
        rorl  $11, %eax;           \
        rorl  $25, %ebx;           \
        rorl  $6, %ecx;            \
        movl  STATE(h), %esi;      \
        xorl  %ebx, %eax;          \
        xorl  %eax, %ecx;          \
        addl  %ebp, %esi;          \
        movl  STATE(g), %ebx;      \
        movl  STATE(f), %eax;      \
        xorl  %ebx, %eax;          \
        andl  %edx, %eax;          \
        xorl  %ebx, %eax;          \
        leal  k(%ecx,%eax), %ecx;  \
        addl  %ecx, %esi;          \
        /* Part 1 */               \
        addl  %esi, STATE(d);      \
        /* Part 2 */               \
        movl  STATE(a), %eax;      \
        movl  %eax, %ebx;          \
        movl  %eax, %ecx;          \
        movl  %eax, %edx;          \
        rorl  $13, %eax;           \
        rorl  $22, %ebx;           \
        rorl  $2, %ecx;            \
        xorl  %ebx, %eax;          \
        xorl  %eax, %ecx;          \
        movl  STATE(c), %eax;      \
        addl  %ecx, %esi;          \
        movl  %eax, %ecx;          \
        movl  STATE(b), %ebx;      \
        orl   %ebx, %ecx;          \
        andl  %ebx, %eax;          \
        andl  %edx, %ecx;          \
        orl   %eax, %ecx;          \
        addl  %ecx, %esi;          \
        movl  %esi, STATE(h);
    
    /* Allocate scratch space, save registers */
    subl  $112, %esp
    movl  %ebx,  96(%esp)
    movl  %esi, 100(%esp)
    movl  %edi, 104(%esp)
    movl  %ebp, 108(%esp)
    
    /* Copy state */
    movl  116(%esp), %esi  /* Argument: state */
    movl   0(%esi), %eax;  movl %eax,  0(%esp)
    movl   4(%esi), %eax;  movl %eax,  4(%esp)
    movl   8(%esi), %eax;  movl %eax,  8(%esp)
    movl  12(%esi), %eax;  movl %eax, 12(%esp)
    movl  16(%esi), %eax;  movl %eax, 16(%esp)
    movl  20(%esi), %eax;  movl %eax, 20(%esp)
    movl  24(%esi), %eax;  movl %eax, 24(%esp)
    movl  28(%esi), %eax;  movl %eax, 28(%esp)
    
    /* Do 64 rounds of hashing */
    movl    120(%esp), %edi  /* Argument: block */
    ROUNDa( 0, 0, 1, 2, 3, 4, 5, 6, 7, 0x428A2F98)
    ROUNDa( 1, 7, 0, 1, 2, 3, 4, 5, 6, 0x71374491)
    ROUNDa( 2, 6, 7, 0, 1, 2, 3, 4, 5, 0xB5C0FBCF)
    ROUNDa( 3, 5, 6, 7, 0, 1, 2, 3, 4, 0xE9B5DBA5)
    ROUNDa( 4, 4, 5, 6, 7, 0, 1, 2, 3, 0x3956C25B)
    ROUNDa( 5, 3, 4, 5, 6, 7, 0, 1, 2, 0x59F111F1)
    ROUNDa( 6, 2, 3, 4, 5, 6, 7, 0, 1, 0x923F82A4)
    ROUNDa( 7, 1, 2, 3, 4, 5, 6, 7, 0, 0xAB1C5ED5)
    ROUNDa( 8, 0, 1, 2, 3, 4, 5, 6, 7, 0xD807AA98)
    ROUNDa( 9, 7, 0, 1, 2, 3, 4, 5, 6, 0x12835B01)
    ROUNDa(10, 6, 7, 0, 1, 2, 3, 4, 5, 0x243185BE)
    ROUNDa(11, 5, 6, 7, 0, 1, 2, 3, 4, 0x550C7DC3)
    ROUNDa(12, 4, 5, 6, 7, 0, 1, 2, 3, 0x72BE5D74)
    ROUNDa(13, 3, 4, 5, 6, 7, 0, 1, 2, 0x80DEB1FE)
    ROUNDa(14, 2, 3, 4, 5, 6, 7, 0, 1, 0x9BDC06A7)
    ROUNDa(15, 1, 2, 3, 4, 5, 6, 7, 0, 0xC19BF174)
    ROUNDb(16, 0, 1, 2, 3, 4, 5, 6, 7, 0xE49B69C1)
    ROUNDb(17, 7, 0, 1, 2, 3, 4, 5, 6, 0xEFBE4786)
    ROUNDb(18, 6, 7, 0, 1, 2, 3, 4, 5, 0x0FC19DC6)
    ROUNDb(19, 5, 6, 7, 0, 1, 2, 3, 4, 0x240CA1CC)
    ROUNDb(20, 4, 5, 6, 7, 0, 1, 2, 3, 0x2DE92C6F)
    ROUNDb(21, 3, 4, 5, 6, 7, 0, 1, 2, 0x4A7484AA)
    ROUNDb(22, 2, 3, 4, 5, 6, 7, 0, 1, 0x5CB0A9DC)
    ROUNDb(23, 1, 2, 3, 4, 5, 6, 7, 0, 0x76F988DA)
    ROUNDb(24, 0, 1, 2, 3, 4, 5, 6, 7, 0x983E5152)
    ROUNDb(25, 7, 0, 1, 2, 3, 4, 5, 6, 0xA831C66D)
    ROUNDb(26, 6, 7, 0, 1, 2, 3, 4, 5, 0xB00327C8)
    ROUNDb(27, 5, 6, 7, 0, 1, 2, 3, 4, 0xBF597FC7)
    ROUNDb(28, 4, 5, 6, 7, 0, 1, 2, 3, 0xC6E00BF3)
    ROUNDb(29, 3, 4, 5, 6, 7, 0, 1, 2, 0xD5A79147)
    ROUNDb(30, 2, 3, 4, 5, 6, 7, 0, 1, 0x06CA6351)
    ROUNDb(31, 1, 2, 3, 4, 5, 6, 7, 0, 0x14292967)
    ROUNDb(32, 0, 1, 2, 3, 4, 5, 6, 7, 0x27B70A85)
    ROUNDb(33, 7, 0, 1, 2, 3, 4, 5, 6, 0x2E1B2138)
    ROUNDb(34, 6, 7, 0, 1, 2, 3, 4, 5, 0x4D2C6DFC)
    ROUNDb(35, 5, 6, 7, 0, 1, 2, 3, 4, 0x53380D13)
    ROUNDb(36, 4, 5, 6, 7, 0, 1, 2, 3, 0x650A7354)
    ROUNDb(37, 3, 4, 5, 6, 7, 0, 1, 2, 0x766A0ABB)
    ROUNDb(38, 2, 3, 4, 5, 6, 7, 0, 1, 0x81C2C92E)
    ROUNDb(39, 1, 2, 3, 4, 5, 6, 7, 0, 0x92722C85)
    ROUNDb(40, 0, 1, 2, 3, 4, 5, 6, 7, 0xA2BFE8A1)
    ROUNDb(41, 7, 0, 1, 2, 3, 4, 5, 6, 0xA81A664B)
    ROUNDb(42, 6, 7, 0, 1, 2, 3, 4, 5, 0xC24B8B70)
    ROUNDb(43, 5, 6, 7, 0, 1, 2, 3, 4, 0xC76C51A3)
    ROUNDb(44, 4, 5, 6, 7, 0, 1, 2, 3, 0xD192E819)
    ROUNDb(45, 3, 4, 5, 6, 7, 0, 1, 2, 0xD6990624)
    ROUNDb(46, 2, 3, 4, 5, 6, 7, 0, 1, 0xF40E3585)
    ROUNDb(47, 1, 2, 3, 4, 5, 6, 7, 0, 0x106AA070)
    ROUNDb(48, 0, 1, 2, 3, 4, 5, 6, 7, 0x19A4C116)
    ROUNDb(49, 7, 0, 1, 2, 3, 4, 5, 6, 0x1E376C08)
    ROUNDb(50, 6, 7, 0, 1, 2, 3, 4, 5, 0x2748774C)
    ROUNDb(51, 5, 6, 7, 0, 1, 2, 3, 4, 0x34B0BCB5)
    ROUNDb(52, 4, 5, 6, 7, 0, 1, 2, 3, 0x391C0CB3)
    ROUNDb(53, 3, 4, 5, 6, 7, 0, 1, 2, 0x4ED8AA4A)
    ROUNDb(54, 2, 3, 4, 5, 6, 7, 0, 1, 0x5B9CCA4F)
    ROUNDb(55, 1, 2, 3, 4, 5, 6, 7, 0, 0x682E6FF3)
    ROUNDb(56, 0, 1, 2, 3, 4, 5, 6, 7, 0x748F82EE)
    ROUNDb(57, 7, 0, 1, 2, 3, 4, 5, 6, 0x78A5636F)
    ROUNDb(58, 6, 7, 0, 1, 2, 3, 4, 5, 0x84C87814)
    ROUNDb(59, 5, 6, 7, 0, 1, 2, 3, 4, 0x8CC70208)
    ROUNDb(60, 4, 5, 6, 7, 0, 1, 2, 3, 0x90BEFFFA)
    ROUNDb(61, 3, 4, 5, 6, 7, 0, 1, 2, 0xA4506CEB)
    ROUNDb(62, 2, 3, 4, 5, 6, 7, 0, 1, 0xBEF9A3F7)
    ROUNDb(63, 1, 2, 3, 4, 5, 6, 7, 0, 0xC67178F2)
    
    /* Add to state */
    movl  116(%esp), %esi  /* Argument: state */
    movl   0(%esp), %eax;  addl %eax,  0(%esi)
    movl   4(%esp), %eax;  addl %eax,  4(%esi)
    movl   8(%esp), %eax;  addl %eax,  8(%esi)
    movl  12(%esp), %eax;  addl %eax, 12(%esi)
    movl  16(%esp), %eax;  addl %eax, 16(%esi)
    movl  20(%esp), %eax;  addl %eax, 20(%esi)
    movl  24(%esp), %eax;  addl %eax, 24(%esi)
    movl  28(%esp), %eax;  addl %eax, 28(%esi)
    
    /* Restore registers */
    movl   96(%esp), %ebx
    movl  100(%esp), %esi
    movl  104(%esp), %edi
    movl  108(%esp), %ebp
    addl  $112, %esp
    retl