md5-asm 0.5.0

Assembly implementation of MD5 compression function
Documentation
/* 
 * MD5 hash in x86 assembly
 * 
 * Copyright (c) 2016 Project Nayuki. (MIT License)
 * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 * the Software, and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 * - The above copyright notice and this permission notice shall be included in
 *   all copies or substantial portions of the Software.
 * - The Software is provided "as is", without warranty of any kind, express or
 *   implied, including but not limited to the warranties of merchantability,
 *   fitness for a particular purpose and noninfringement. In no event shall the
 *   authors or copyright holders be liable for any claim, damages or other
 *   liability, whether in an action of contract, tort or otherwise, arising from,
 *   out of or in connection with the Software or the use or other dealings in the
 *   Software.
 */


/* void md5_compress(uint32_t state[4], const uint8_t block[64]) */
#ifdef __APPLE__
.globl _md5_compress
_md5_compress:
#else
.globl md5_compress
md5_compress:
#endif
    /* 
     * Storage usage:
     *   Bytes  Location  Description
     *       4  eax       MD5 state variable A
     *       4  ebx       MD5 state variable B
     *       4  ecx       MD5 state variable C
     *       4  edx       MD5 state variable D
     *       4  esi       Temporary for calculation per round
     *       4  edi       Temporary for calculation per round
     *       4  ebp       Base address of block array argument (read-only)
     *       4  esp       x86 stack pointer
     *       4  [esp+ 0]  Caller's value of ebx
     *       4  [esp+ 4]  Caller's value of esi
     *       4  [esp+ 8]  Caller's value of edi
     *       4  [esp+12]  Caller's value of ebp
     */
    
    #define ROUND0(a, b, c, d, k, s, t)  \
        movl  %c, %esi;         \
        addl  (k*4)(%ebp), %a;  \
        xorl  %d, %esi;         \
        andl  %b, %esi;         \
        xorl  %d, %esi;         \
        leal  t(%esi,%a), %a;   \
        roll  $s, %a;           \
        addl  %b, %a;
    
    #define ROUND1(a, b, c, d, k, s, t)  \
        movl  %d, %esi;         \
        movl  %d, %edi;         \
        addl  (k*4)(%ebp), %a;  \
        notl  %esi;             \
        andl  %b, %edi;         \
        andl  %c, %esi;         \
        orl   %edi, %esi;       \
        leal  t(%esi,%a), %a;   \
        roll  $s, %a;           \
        addl  %b, %a;
    
    #define ROUND2(a, b, c, d, k, s, t)  \
        movl  %c, %esi;         \
        addl  (k*4)(%ebp), %a;  \
        xorl  %d, %esi;         \
        xorl  %b, %esi;         \
        leal  t(%esi,%a), %a;   \
        roll  $s, %a;           \
        addl  %b, %a;
    
    #define ROUND3(a, b, c, d, k, s, t)  \
        movl  %d, %esi;         \
        not   %esi;             \
        addl  (k*4)(%ebp), %a;  \
        orl   %b, %esi;         \
        xorl  %c, %esi;         \
        leal  t(%esi,%a), %a;   \
        roll  $s, %a;           \
        addl  %b, %a;
    
    /* Save registers */
    subl  $16, %esp
    movl  %ebx,  0(%esp)
    movl  %esi,  4(%esp)
    movl  %edi,  8(%esp)
    movl  %ebp, 12(%esp)
    
    /* Load arguments */
    movl  20(%esp), %esi  /* state */
    movl  24(%esp), %ebp  /* block */
    movl   0(%esi), %eax  /* a */
    movl   4(%esi), %ebx  /* b */
    movl   8(%esi), %ecx  /* c */
    movl  12(%esi), %edx  /* d */
    
    /* 64 rounds of hashing */
    ROUND0(eax, ebx, ecx, edx,  0,  7, 0xD76AA478)
    ROUND0(edx, eax, ebx, ecx,  1, 12, 0xE8C7B756)
    ROUND0(ecx, edx, eax, ebx,  2, 17, 0x242070DB)
    ROUND0(ebx, ecx, edx, eax,  3, 22, 0xC1BDCEEE)
    ROUND0(eax, ebx, ecx, edx,  4,  7, 0xF57C0FAF)
    ROUND0(edx, eax, ebx, ecx,  5, 12, 0x4787C62A)
    ROUND0(ecx, edx, eax, ebx,  6, 17, 0xA8304613)
    ROUND0(ebx, ecx, edx, eax,  7, 22, 0xFD469501)
    ROUND0(eax, ebx, ecx, edx,  8,  7, 0x698098D8)
    ROUND0(edx, eax, ebx, ecx,  9, 12, 0x8B44F7AF)
    ROUND0(ecx, edx, eax, ebx, 10, 17, 0xFFFF5BB1)
    ROUND0(ebx, ecx, edx, eax, 11, 22, 0x895CD7BE)
    ROUND0(eax, ebx, ecx, edx, 12,  7, 0x6B901122)
    ROUND0(edx, eax, ebx, ecx, 13, 12, 0xFD987193)
    ROUND0(ecx, edx, eax, ebx, 14, 17, 0xA679438E)
    ROUND0(ebx, ecx, edx, eax, 15, 22, 0x49B40821)
    ROUND1(eax, ebx, ecx, edx,  1,  5, 0xF61E2562)
    ROUND1(edx, eax, ebx, ecx,  6,  9, 0xC040B340)
    ROUND1(ecx, edx, eax, ebx, 11, 14, 0x265E5A51)
    ROUND1(ebx, ecx, edx, eax,  0, 20, 0xE9B6C7AA)
    ROUND1(eax, ebx, ecx, edx,  5,  5, 0xD62F105D)
    ROUND1(edx, eax, ebx, ecx, 10,  9, 0x02441453)
    ROUND1(ecx, edx, eax, ebx, 15, 14, 0xD8A1E681)
    ROUND1(ebx, ecx, edx, eax,  4, 20, 0xE7D3FBC8)
    ROUND1(eax, ebx, ecx, edx,  9,  5, 0x21E1CDE6)
    ROUND1(edx, eax, ebx, ecx, 14,  9, 0xC33707D6)
    ROUND1(ecx, edx, eax, ebx,  3, 14, 0xF4D50D87)
    ROUND1(ebx, ecx, edx, eax,  8, 20, 0x455A14ED)
    ROUND1(eax, ebx, ecx, edx, 13,  5, 0xA9E3E905)
    ROUND1(edx, eax, ebx, ecx,  2,  9, 0xFCEFA3F8)
    ROUND1(ecx, edx, eax, ebx,  7, 14, 0x676F02D9)
    ROUND1(ebx, ecx, edx, eax, 12, 20, 0x8D2A4C8A)
    ROUND2(eax, ebx, ecx, edx,  5,  4, 0xFFFA3942)
    ROUND2(edx, eax, ebx, ecx,  8, 11, 0x8771F681)
    ROUND2(ecx, edx, eax, ebx, 11, 16, 0x6D9D6122)
    ROUND2(ebx, ecx, edx, eax, 14, 23, 0xFDE5380C)
    ROUND2(eax, ebx, ecx, edx,  1,  4, 0xA4BEEA44)
    ROUND2(edx, eax, ebx, ecx,  4, 11, 0x4BDECFA9)
    ROUND2(ecx, edx, eax, ebx,  7, 16, 0xF6BB4B60)
    ROUND2(ebx, ecx, edx, eax, 10, 23, 0xBEBFBC70)
    ROUND2(eax, ebx, ecx, edx, 13,  4, 0x289B7EC6)
    ROUND2(edx, eax, ebx, ecx,  0, 11, 0xEAA127FA)
    ROUND2(ecx, edx, eax, ebx,  3, 16, 0xD4EF3085)
    ROUND2(ebx, ecx, edx, eax,  6, 23, 0x04881D05)
    ROUND2(eax, ebx, ecx, edx,  9,  4, 0xD9D4D039)
    ROUND2(edx, eax, ebx, ecx, 12, 11, 0xE6DB99E5)
    ROUND2(ecx, edx, eax, ebx, 15, 16, 0x1FA27CF8)
    ROUND2(ebx, ecx, edx, eax,  2, 23, 0xC4AC5665)
    ROUND3(eax, ebx, ecx, edx,  0,  6, 0xF4292244)
    ROUND3(edx, eax, ebx, ecx,  7, 10, 0x432AFF97)
    ROUND3(ecx, edx, eax, ebx, 14, 15, 0xAB9423A7)
    ROUND3(ebx, ecx, edx, eax,  5, 21, 0xFC93A039)
    ROUND3(eax, ebx, ecx, edx, 12,  6, 0x655B59C3)
    ROUND3(edx, eax, ebx, ecx,  3, 10, 0x8F0CCC92)
    ROUND3(ecx, edx, eax, ebx, 10, 15, 0xFFEFF47D)
    ROUND3(ebx, ecx, edx, eax,  1, 21, 0x85845DD1)
    ROUND3(eax, ebx, ecx, edx,  8,  6, 0x6FA87E4F)
    ROUND3(edx, eax, ebx, ecx, 15, 10, 0xFE2CE6E0)
    ROUND3(ecx, edx, eax, ebx,  6, 15, 0xA3014314)
    ROUND3(ebx, ecx, edx, eax, 13, 21, 0x4E0811A1)
    ROUND3(eax, ebx, ecx, edx,  4,  6, 0xF7537E82)
    ROUND3(edx, eax, ebx, ecx, 11, 10, 0xBD3AF235)
    ROUND3(ecx, edx, eax, ebx,  2, 15, 0x2AD7D2BB)
    ROUND3(ebx, ecx, edx, eax,  9, 21, 0xEB86D391)
    
    /* Save updated state */
    movl  20(%esp), %esi
    addl  %eax,  0(%esi)
    addl  %ebx,  4(%esi)
    addl  %ecx,  8(%esi)
    addl  %edx, 12(%esi)
    
    /* Restore registers */
    movl   0(%esp), %ebx
    movl   4(%esp), %esi
    movl   8(%esp), %edi
    movl  12(%esp), %ebp
    addl  $16, %esp
    retl