; /* sp_x86_64_asm.asm */
; /*
; * Copyright (C) 2006-2026 wolfSSL Inc.
; *
; * This file is part of wolfSSL.
; *
; * wolfSSL is free software; you can redistribute it and/or modify
; * it under the terms of the GNU General Public License as published by
; * the Free Software Foundation; either version 3 of the License, or
; * (at your option) any later version.
; *
; * wolfSSL is distributed in the hope that it will be useful,
; * but WITHOUT ANY WARRANTY; without even the implied warranty of
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; * GNU General Public License for more details.
; *
; * You should have received a copy of the GNU General Public License
; * along with this program; if not, write to the Free Software
; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
; */
IF @Version LT 1200
; AVX2 instructions not recognized by old versions of MASM
IFNDEF NO_AVX2_SUPPORT
NO_AVX2_SUPPORT = 1
ENDIF
; MOVBE instruction not recognized by old versions of MASM
IFNDEF NO_MOVBE_SUPPORT
NO_MOVBE_SUPPORT = 1
ENDIF
ENDIF
IFNDEF HAVE_INTEL_AVX1
HAVE_INTEL_AVX1 = 1
ENDIF
IFNDEF NO_AVX2_SUPPORT
HAVE_INTEL_AVX2 = 1
ENDIF
IFNDEF _WIN64
_WIN64 = 1
ENDIF
IFNDEF WOLFSSL_SP_NO_2048
IFNDEF WOLFSSL_SP_NO_2048
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_2048_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 256
xor r13, r13
jmp L_2048_from_bin_bswap_64_end
L_2048_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_2048_from_bin_bswap_64_end:
cmp r9, 63
jg L_2048_from_bin_bswap_64_start
jmp L_2048_from_bin_bswap_8_end
L_2048_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_2048_from_bin_bswap_8_end:
cmp r9, 7
jg L_2048_from_bin_bswap_8_start
cmp r9, r13
je L_2048_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_2048_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_2048_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_2048_from_bin_bswap_hi_end:
cmp rcx, r12
jge L_2048_from_bin_bswap_zero_end
L_2048_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_2048_from_bin_bswap_zero_start
L_2048_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_2048_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_2048_from_bin_movbe PROC
push r12
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 256
jmp L_2048_from_bin_movbe_64_end
L_2048_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_2048_from_bin_movbe_64_end:
cmp r9, 63
jg L_2048_from_bin_movbe_64_start
jmp L_2048_from_bin_movbe_8_end
L_2048_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_2048_from_bin_movbe_8_end:
cmp r9, 7
jg L_2048_from_bin_movbe_8_start
cmp r9, 0
je L_2048_from_bin_movbe_hi_end
mov r10, 0
mov rax, 0
L_2048_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_2048_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_2048_from_bin_movbe_hi_end:
cmp rcx, r12
jge L_2048_from_bin_movbe_zero_end
L_2048_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], 0
add rcx, 8
cmp rcx, r12
jl L_2048_from_bin_movbe_zero_start
L_2048_from_bin_movbe_zero_end:
pop r12
ret
sp_2048_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 256
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_2048_to_bin_bswap_32 PROC
mov rax, QWORD PTR [rcx+248]
mov r8, QWORD PTR [rcx+240]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+232]
mov r8, QWORD PTR [rcx+224]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+216]
mov r8, QWORD PTR [rcx+208]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
mov rax, QWORD PTR [rcx+200]
mov r8, QWORD PTR [rcx+192]
bswap rax
bswap r8
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
mov rax, QWORD PTR [rcx+184]
mov r8, QWORD PTR [rcx+176]
bswap rax
bswap r8
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
mov rax, QWORD PTR [rcx+168]
mov r8, QWORD PTR [rcx+160]
bswap rax
bswap r8
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
mov rax, QWORD PTR [rcx+152]
mov r8, QWORD PTR [rcx+144]
bswap rax
bswap r8
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
mov rax, QWORD PTR [rcx+136]
mov r8, QWORD PTR [rcx+128]
bswap rax
bswap r8
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
mov rax, QWORD PTR [rcx+120]
mov r8, QWORD PTR [rcx+112]
bswap rax
bswap r8
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
mov rax, QWORD PTR [rcx+104]
mov r8, QWORD PTR [rcx+96]
bswap rax
bswap r8
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
mov rax, QWORD PTR [rcx+88]
mov r8, QWORD PTR [rcx+80]
bswap rax
bswap r8
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
mov rax, QWORD PTR [rcx+72]
mov r8, QWORD PTR [rcx+64]
bswap rax
bswap r8
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
mov rax, QWORD PTR [rcx+56]
mov r8, QWORD PTR [rcx+48]
bswap rax
bswap r8
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
ret
sp_2048_to_bin_bswap_32 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 256
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_2048_to_bin_movbe_32 PROC
movbe rax, QWORD PTR [rcx+248]
movbe r8, QWORD PTR [rcx+240]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+232]
movbe r8, QWORD PTR [rcx+224]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+216]
movbe r8, QWORD PTR [rcx+208]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
movbe rax, QWORD PTR [rcx+200]
movbe r8, QWORD PTR [rcx+192]
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
movbe rax, QWORD PTR [rcx+184]
movbe r8, QWORD PTR [rcx+176]
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
movbe rax, QWORD PTR [rcx+168]
movbe r8, QWORD PTR [rcx+160]
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
movbe rax, QWORD PTR [rcx+152]
movbe r8, QWORD PTR [rcx+144]
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
movbe rax, QWORD PTR [rcx+136]
movbe r8, QWORD PTR [rcx+128]
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
movbe rax, QWORD PTR [rcx+120]
movbe r8, QWORD PTR [rcx+112]
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
movbe rax, QWORD PTR [rcx+104]
movbe r8, QWORD PTR [rcx+96]
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
movbe rax, QWORD PTR [rcx+88]
movbe r8, QWORD PTR [rcx+80]
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
movbe rax, QWORD PTR [rcx+72]
movbe r8, QWORD PTR [rcx+64]
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
movbe rax, QWORD PTR [rcx+56]
movbe r8, QWORD PTR [rcx+48]
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
ret
sp_2048_to_bin_movbe_32 ENDP
_text ENDS
ENDIF
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_16 PROC
push r12
mov r9, rdx
sub rsp, 128
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+48], r10
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+56], r11
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+64], r12
; A[0] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+72], r10
; A[0] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+80], r11
; A[0] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+88], r12
; A[0] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+96], r10
; A[0] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+104], r11
; A[0] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+112], r12
; A[0] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+120], r10
; A[1] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+128], r11
; A[2] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+16]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+136], r12
; A[3] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+24]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+144], r10
; A[4] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+32]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+152], r11
; A[5] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+40]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+160], r12
; A[6] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+48]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+168], r10
; A[7] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+56]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+176], r11
; A[8] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+64]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+184], r12
; A[9] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+72]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+192], r10
; A[10] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+80]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+200], r11
; A[11] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+88]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+208], r12
; A[12] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+96]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+216], r10
; A[13] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+104]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+224], r11
; A[14] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+112]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+232], r12
; A[15] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r10, QWORD PTR [rsp+48]
mov r11, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r10, QWORD PTR [rsp+80]
mov r11, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r10, QWORD PTR [rsp+112]
mov r11, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
add rsp, 128
pop r12
ret
sp_2048_mul_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_avx2_16 PROC
push rbx
push rbp
push r12
push r13
push r14
push r15
push rdi
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbx, rsp
cmovne rbx, r8
cmp rbp, r8
cmove rbx, rsp
add r8, 128
xor rdi, rdi
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
; A[0] * B[2]
mulx r13, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
; A[0] * B[3]
mulx r14, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
mov QWORD PTR [rbx+24], r13
; A[0] * B[4]
mulx r10, rax, QWORD PTR [rbp+32]
adcx r14, rax
; A[0] * B[5]
mulx r11, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
; A[0] * B[6]
mulx r12, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
; A[0] * B[7]
mulx r13, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
mov QWORD PTR [rbx+56], r12
; A[0] * B[8]
mulx r14, rax, QWORD PTR [rbp+64]
adcx r13, rax
; A[0] * B[9]
mulx r10, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
; A[0] * B[10]
mulx r11, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
; A[0] * B[11]
mulx r12, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
mov QWORD PTR [rbx+88], r11
; A[0] * B[12]
mulx r13, rax, QWORD PTR [rbp+96]
adcx r12, rax
; A[0] * B[13]
mulx r14, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
; A[0] * B[14]
mulx r10, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
; A[0] * B[15]
mulx r11, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adcx r11, rdi
mov r15, rdi
adcx r15, rdi
mov QWORD PTR [rbx+120], r10
mov QWORD PTR [r8], r11
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+32], r14
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[1] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[1] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[1] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[1] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+120], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8], r11
mov QWORD PTR [r8+8], r12
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+40], r10
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[2] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[2] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[2] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[2] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r13
mov rdx, QWORD PTR [r9+24]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+48], r11
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[3] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[3] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[3] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[3] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+8], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+16], r13
mov QWORD PTR [r8+24], r14
mov rdx, QWORD PTR [r9+32]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+56], r12
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[4] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[4] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[4] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[4] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+16], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+24], r14
mov QWORD PTR [r8+32], r10
mov rdx, QWORD PTR [r9+40]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[5] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[5] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[5] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[5] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+24], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov rdx, QWORD PTR [r9+48]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[6] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[6] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[6] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[6] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+32], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov rdx, QWORD PTR [r9+56]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[7] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[7] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[7] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[7] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+40], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+48], r12
mov QWORD PTR [r8+56], r13
mov rdx, QWORD PTR [r9+64]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[8] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[8] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[8] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[8] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+48], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+56], r13
mov QWORD PTR [r8+64], r14
mov rdx, QWORD PTR [r9+72]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[9] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[9] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[9] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[9] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[9] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[9] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[9] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[9] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[9] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[9] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+56], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+64], r14
mov QWORD PTR [r8+72], r10
mov rdx, QWORD PTR [r9+80]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[10] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[10] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[10] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[10] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[10] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[10] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[10] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[10] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[10] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[10] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+64], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov rdx, QWORD PTR [r9+88]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[11] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[11] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[11] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[11] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[11] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[11] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[11] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[11] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[11] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[11] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+72], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+80], r11
mov QWORD PTR [r8+88], r12
mov rdx, QWORD PTR [r9+96]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[12] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[12] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[12] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[12] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[12] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[12] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[12] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+56], r13
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[12] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[12] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[12] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+80], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+88], r12
mov QWORD PTR [r8+96], r13
mov rdx, QWORD PTR [r9+104]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[13] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[13] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[13] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[13] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[13] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[13] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[13] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+64], r14
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
; A[13] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[13] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[13] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+88], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+96], r13
mov QWORD PTR [r8+104], r14
mov rdx, QWORD PTR [r9+112]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[14] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[14] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[14] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[14] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[14] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[14] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[14] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+72], r10
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
; A[14] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[14] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[14] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+96], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+104], r14
mov QWORD PTR [r8+112], r10
mov rdx, QWORD PTR [r9+120]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[15] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[15] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[15] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[15] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[15] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[15] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[15] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+80], r11
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
mov r10, QWORD PTR [r8+112]
; A[15] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[15] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[15] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+96], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+104], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
sub r8, 128
cmp r9, r8
je L_start_2048_mul_avx2_16
cmp rbp, r8
jne L_end_2048_mul_avx2_16
L_start_2048_mul_avx2_16:
vmovdqu xmm0, OWORD PTR [rbx]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbx+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbx+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbx+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbx+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbx+80]
vmovups OWORD PTR [r8+80], xmm0
vmovdqu xmm0, OWORD PTR [rbx+96]
vmovups OWORD PTR [r8+96], xmm0
vmovdqu xmm0, OWORD PTR [rbx+112]
vmovups OWORD PTR [r8+112], xmm0
L_end_2048_mul_avx2_16:
add rsp, 128
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_2048_mul_avx2_16 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_add_16 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov QWORD PTR [rcx+120], r10
adc rax, 0
ret
sp_2048_add_16 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sub_in_place_32 PROC
mov r8, QWORD PTR [rcx]
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r9
sbb r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb r9, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r9
sbb r8, QWORD PTR [rdx+208]
mov r9, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb r9, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r9
sbb r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb r9, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r9
sbb r8, QWORD PTR [rdx+240]
mov r9, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb r9, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+248], r9
sbb rax, rax
ret
sp_2048_sub_in_place_32 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_add_32 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
adc r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
adc r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
adc r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
adc r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
adc r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
adc r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
adc r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
adc r10, QWORD PTR [r8+248]
mov QWORD PTR [rcx+248], r10
adc rax, 0
ret
sp_2048_add_32 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 808
mov QWORD PTR [rsp+768], rcx
mov QWORD PTR [rsp+776], rdx
mov QWORD PTR [rsp+784], r8
lea r12, QWORD PTR [rsp+512]
lea r14, QWORD PTR [rdx+128]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r12+120], rax
adc r15, 0
mov QWORD PTR [rsp+792], r15
lea r13, QWORD PTR [rsp+640]
lea r14, QWORD PTR [r8+128]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r13+120], rax
adc rdi, 0
mov QWORD PTR [rsp+800], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
lea rcx, QWORD PTR [rsp+256]
add r8, 128
add rdx, 128
call sp_2048_mul_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
call sp_2048_mul_16
IFDEF _WIN64
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
ENDIF
mov r15, QWORD PTR [rsp+792]
mov rdi, QWORD PTR [rsp+800]
mov rsi, QWORD PTR [rsp+768]
mov r11, r15
lea r12, QWORD PTR [rsp+512]
lea r13, QWORD PTR [rsp+640]
and r11, rdi
neg r15
neg rdi
add rsi, 256
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+96], rax
mov QWORD PTR [r13+96], r9
mov rax, QWORD PTR [r12+104]
mov r9, QWORD PTR [r13+104]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+104], rax
mov QWORD PTR [r13+104], r9
mov rax, QWORD PTR [r12+112]
mov r9, QWORD PTR [r13+112]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+112], rax
mov QWORD PTR [r13+112], r9
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+120], rax
mov QWORD PTR [r13+120], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov QWORD PTR [rsi+120], rax
adc r11, 0
lea r13, QWORD PTR [rsp+256]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
sub rsi, 128
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov QWORD PTR [rsi+248], r9
adc r11, 0
mov QWORD PTR [rcx+384], r11
add rsi, 128
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov QWORD PTR [rsi+128], r9
; Add to zero
mov rax, QWORD PTR [r13+136]
adc rax, 0
mov r9, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], rax
adc r9, 0
mov r10, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r9
adc r10, 0
mov rax, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], r10
adc rax, 0
mov r9, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], rax
adc r9, 0
mov r10, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r9
adc r10, 0
mov rax, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], r10
adc rax, 0
mov r9, QWORD PTR [r13+192]
mov QWORD PTR [rsi+184], rax
adc r9, 0
mov r10, QWORD PTR [r13+200]
mov QWORD PTR [rsi+192], r9
adc r10, 0
mov rax, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], r10
adc rax, 0
mov r9, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], rax
adc r9, 0
mov r10, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r9
adc r10, 0
mov rax, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], r10
adc rax, 0
mov r9, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], rax
adc r9, 0
mov r10, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r9
adc r10, 0
mov QWORD PTR [rsi+248], r10
add rsp, 808
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mul_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_avx2_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 808
mov QWORD PTR [rsp+768], rcx
mov QWORD PTR [rsp+776], rdx
mov QWORD PTR [rsp+784], r8
lea r12, QWORD PTR [rsp+512]
lea r14, QWORD PTR [rdx+128]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r12+120], rax
adc r15, 0
mov QWORD PTR [rsp+792], r15
lea r13, QWORD PTR [rsp+640]
lea r14, QWORD PTR [r8+128]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r13+120], rax
adc rdi, 0
mov QWORD PTR [rsp+800], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_avx2_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
lea rcx, QWORD PTR [rsp+256]
add r8, 128
add rdx, 128
call sp_2048_mul_avx2_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
call sp_2048_mul_avx2_16
IFDEF _WIN64
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
ENDIF
mov r15, QWORD PTR [rsp+792]
mov rdi, QWORD PTR [rsp+800]
mov rsi, QWORD PTR [rsp+768]
mov r11, r15
lea r12, QWORD PTR [rsp+512]
lea r13, QWORD PTR [rsp+640]
and r11, rdi
neg r15
neg rdi
add rsi, 256
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+88], r10
adc rax, r9
mov r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [r13+104]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+96], rax
adc r9, r10
mov r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [r13+112]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+104], r9
adc r10, rax
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+112], r10
adc rax, r9
mov QWORD PTR [rsi+120], rax
adc r11, 0
lea r13, QWORD PTR [rsp+256]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
sub rsi, 128
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov QWORD PTR [rsi+248], r9
adc r11, 0
mov QWORD PTR [rcx+384], r11
add rsi, 128
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov QWORD PTR [rsi+128], r9
; Add to zero
mov rax, QWORD PTR [r13+136]
adc rax, 0
mov r9, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], rax
adc r9, 0
mov r10, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r9
adc r10, 0
mov rax, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], r10
adc rax, 0
mov r9, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], rax
adc r9, 0
mov r10, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r9
adc r10, 0
mov rax, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], r10
adc rax, 0
mov r9, QWORD PTR [r13+192]
mov QWORD PTR [rsi+184], rax
adc r9, 0
mov r10, QWORD PTR [r13+200]
mov QWORD PTR [rsi+192], r9
adc r10, 0
mov rax, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], r10
adc rax, 0
mov r9, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], rax
adc r9, 0
mov r10, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r9
adc r10, 0
mov rax, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], r10
adc rax, 0
mov r9, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], rax
adc r9, 0
mov r10, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r9
adc r10, 0
mov QWORD PTR [rsi+248], r10
add rsp, 808
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mul_avx2_32 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_16 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 128
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+48], r9
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+56], r10
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+64], r11
; A[0] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+72], r9
; A[0] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+80], r10
; A[0] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+88], r11
; A[0] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+96], r9
; A[0] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+104], r10
; A[0] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+112], r11
; A[0] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+120], r9
; A[1] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[2] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+128], r10
; A[2] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+16]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[3] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+136], r11
; A[3] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+24]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[4] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[9]
mov rax, QWORD PTR [r8+72]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+144], r9
; A[4] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+32]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[5] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+152], r10
; A[5] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+40]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[6] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[10]
mov rax, QWORD PTR [r8+80]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+160], r11
; A[6] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+48]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[7] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+168], r9
; A[7] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+56]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[8] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[11]
mov rax, QWORD PTR [r8+88]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+176], r10
; A[8] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+64]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[9] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+184], r11
; A[9] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+72]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[10] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[12]
mov rax, QWORD PTR [r8+96]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+192], r9
; A[10] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+80]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[11] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+96]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+200], r10
; A[11] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+88]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[12] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+96]
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[13] * A[13]
mov rax, QWORD PTR [r8+104]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+208], r11
; A[12] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+96]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[13] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+104]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+216], r9
; A[13] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+104]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[14] * A[14]
mov rax, QWORD PTR [r8+112]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+224], r10
; A[14] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+112]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+232], r11
; A[15] * A[15]
mov rax, QWORD PTR [r8+120]
mul rax
add r9, rax
adc r10, rdx
mov QWORD PTR [rcx+240], r9
mov QWORD PTR [rcx+248], r10
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r12, QWORD PTR [rsp+48]
mov r13, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r12, QWORD PTR [rsp+80]
mov r13, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r12
mov QWORD PTR [rcx+88], r13
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r12, QWORD PTR [rsp+112]
mov r13, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r12
mov QWORD PTR [rcx+120], r13
add rsp, 128
pop r14
pop r13
pop r12
ret
sp_2048_sqr_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_avx2_16 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbp, rsp
cmovne rbp, r8
add r8, 128
xor r13, r13
; Diagonal 1
; Zero into %r9
; Zero into %r10
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
; A[2] x A[0]
mulx r12, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r12, r13
mov QWORD PTR [rbp+8], r10
mov QWORD PTR [rbp+16], r11
; Zero into %r8
; Zero into %r9
; A[3] x A[0]
mulx r10, rax, QWORD PTR [r9+24]
adcx r12, rax
adox r10, r13
; A[4] x A[0]
mulx r11, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, r13
mov QWORD PTR [rbp+24], r12
mov QWORD PTR [rbp+32], r10
; Zero into %r10
; Zero into %r8
; A[5] x A[0]
mulx r12, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, r13
; A[6] x A[0]
mulx r10, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, r13
mov QWORD PTR [rbp+40], r11
mov QWORD PTR [rbp+48], r12
; Zero into %r9
; Zero into %r10
; A[7] x A[0]
mulx r11, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, r13
; A[8] x A[0]
mulx r12, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r12, r13
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
; Zero into %r8
; Zero into %r9
; A[9] x A[0]
mulx r10, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, r13
; A[10] x A[0]
mulx r11, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, r13
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
; No load %r13 - %r10
; A[11] x A[0]
mulx r15, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r15, r13
; A[12] x A[0]
mulx rdi, rax, QWORD PTR [r9+96]
adcx r15, rax
adox rdi, r13
mov QWORD PTR [rbp+88], r11
; No store %r13 - %r10
; No load %r15 - %r9
; A[13] x A[0]
mulx rsi, rax, QWORD PTR [r9+104]
adcx rdi, rax
adox rsi, r13
; A[14] x A[0]
mulx rbx, rax, QWORD PTR [r9+112]
adcx rsi, rax
adox rbx, r13
; No store %r14 - %r8
; No store %r15 - %r9
; Zero into %r8
; Zero into %r9
; A[15] x A[0]
mulx r10, rax, QWORD PTR [r9+120]
adcx rbx, rax
adox r10, r13
; No store %rbx - %r10
; Carry
adcx r10, r13
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8], r10
; Diagonal 2
mov r10, QWORD PTR [rbp+24]
mov r11, QWORD PTR [rbp+32]
mov r12, QWORD PTR [rbp+40]
; A[2] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r10, rax
adox r11, rcx
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+24], r10
mov QWORD PTR [rbp+32], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r12, rax
adox r10, rcx
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+40], r12
mov QWORD PTR [rbp+48], r10
mov r12, QWORD PTR [rbp+64]
mov r10, QWORD PTR [rbp+72]
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+56], r11
mov QWORD PTR [rbp+64], r12
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[8] x A[1]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
; A[9] x A[1]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; A[10] x A[1]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r15, rcx
; A[11] x A[1]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13 - %r8
; No load %r15 - %r10
; A[12] x A[1]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rdi, rax
adox rsi, rcx
; A[13] x A[1]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r9
; No store %r15 - %r10
mov r11, QWORD PTR [r8]
; Zero into %r10
; A[14] x A[1]
mulx rcx, rax, QWORD PTR [r9+112]
adcx rbx, rax
adox r11, rcx
; A[15] x A[1]
mulx r12, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, r13
; No store %rbx - %r8
mov QWORD PTR [r8], r11
; Zero into %r8
; Zero into %r9
; A[15] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx r10, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+8], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+16], r10
; Diagonal 3
mov r10, QWORD PTR [rbp+40]
mov r11, QWORD PTR [rbp+48]
mov r12, QWORD PTR [rbp+56]
; A[3] x A[2]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+40], r10
mov QWORD PTR [rbp+48], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+56], r12
mov QWORD PTR [rbp+64], r10
mov r12, QWORD PTR [rbp+80]
mov r10, QWORD PTR [rbp+88]
; A[7] x A[2]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[8] x A[2]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+72], r11
mov QWORD PTR [rbp+80], r12
; No load %r13 - %r9
; A[9] x A[2]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r15, rcx
; A[10] x A[2]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13 - %r9
; No load %r15 - %r8
; A[11] x A[2]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rdi, rax
adox rsi, rcx
; A[12] x A[2]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r10
; No store %r15 - %r8
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[13] x A[2]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rbx, rax
adox r12, rcx
; A[14] x A[2]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
; No store %rbx - %r9
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
; Zero into %r10
; A[14] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r10, rax
adox r11, rcx
; A[14] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx r12, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
; Zero into %r8
; Zero into %r9
; A[14] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx r10, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+24], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+32], r10
; Diagonal 4
mov r10, QWORD PTR [rbp+56]
mov r11, QWORD PTR [rbp+64]
mov r12, QWORD PTR [rbp+72]
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, rcx
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[6] x A[3]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, rcx
; A[7] x A[3]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
; No load %r13 - %r10
; A[8] x A[3]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r15, rcx
; A[9] x A[3]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r11
; No store %r13 - %r10
; No load %r15 - %r9
; A[10] x A[3]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rdi, rax
adox rsi, rcx
; A[11] x A[3]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r8
; No store %r15 - %r9
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[12] x A[3]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rbx, rax
adox r10, rcx
; A[13] x A[3]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; No store %rbx - %r10
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[13] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, rcx
; A[13] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
; Zero into %r10
; A[13] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; A[13] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx r12, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
; Zero into %r8
; Zero into %r9
; A[13] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx r10, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+40], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+48], r10
; Diagonal 5
mov r10, QWORD PTR [rbp+72]
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
; A[6] x A[4]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; A[7] x A[4]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r15, rcx
; A[8] x A[4]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13 - %r8
; No load %r15 - %r10
; A[9] x A[4]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rdi, rax
adox rsi, rcx
; A[10] x A[4]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r9
; No store %r15 - %r10
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[11] x A[4]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rbx, rax
adox r11, rcx
; A[12] x A[4]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
; No store %rbx - %r8
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[12] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
; A[12] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[12] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
; A[12] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
; Zero into %r10
; A[12] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r10, rax
adox r11, rcx
; A[12] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx r12, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
; Zero into %r8
; Zero into %r9
; A[12] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx r10, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+56], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+64], r10
; Diagonal 6
mov r10, QWORD PTR [rbp+88]
; No load %r13 - %r9
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r15, rcx
; A[7] x A[5]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13 - %r9
; No load %r15 - %r8
; A[8] x A[5]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rdi, rax
adox rsi, rcx
; A[9] x A[5]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r10
; No store %r15 - %r8
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[10] x A[5]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rbx, rax
adox r12, rcx
; A[11] x A[5]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
; No store %rbx - %r9
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
; A[11] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
; A[11] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[11] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
; A[11] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r12
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r10, QWORD PTR [r8+56]
; A[11] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
; A[13] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov r11, QWORD PTR [r8+64]
; Zero into %r10
; A[13] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; A[13] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx r12, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+56], r10
mov QWORD PTR [r8+64], r11
; Zero into %r8
; Zero into %r9
; A[13] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx r10, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+72], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+80], r10
; Diagonal 7
; No load %r15 - %r9
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rdi, rax
adox rsi, rcx
; A[8] x A[6]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r8
; No store %r15 - %r9
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[9] x A[6]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rbx, rax
adox r10, rcx
; A[10] x A[6]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; No store %rbx - %r10
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[10] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
; A[10] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[10] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; A[14] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[14] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
; A[14] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+40], r12
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[14] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, rcx
; A[14] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+56], r11
mov QWORD PTR [r8+64], r12
mov r11, QWORD PTR [r8+80]
; Zero into %r10
; A[14] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r10, rax
adox r11, rcx
; A[14] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx r12, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
; Zero into %r8
; Zero into %r9
; A[14] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx r10, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+88], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+96], r10
; Diagonal 8
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[8] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r11, rcx
; A[9] x A[7]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
; No store %rbx - %r8
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
; A[15] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[15] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
; A[15] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
mov r12, QWORD PTR [r8+56]
; A[15] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
; A[15] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[15] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
; A[15] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+56], r12
mov QWORD PTR [r8+64], r10
mov r12, QWORD PTR [r8+80]
mov r10, QWORD PTR [r8+88]
; A[15] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
; A[15] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+72], r11
mov QWORD PTR [r8+80], r12
mov r11, QWORD PTR [r8+96]
; Zero into %r10
; A[15] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
; A[15] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx r12, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+88], r10
mov QWORD PTR [r8+96], r11
; Zero into %r8
; Zero into %r9
; A[15] x A[14]
mov rdx, QWORD PTR [r9+112]
mulx r10, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+104], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r14
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
mov r10, QWORD PTR [rbp+32]
mov r11, QWORD PTR [rbp+40]
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+32], r10
mov QWORD PTR [rbp+40], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+48], r10
mov QWORD PTR [rbp+56], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+64], r10
mov QWORD PTR [rbp+72], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+80], r10
mov QWORD PTR [rbp+88], r11
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r15, r15
adox rdi, rdi
adcx r15, rax
adcx rdi, rcx
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox rsi, rsi
adox rbx, rbx
adcx rsi, rax
adcx rbx, rcx
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+16], r10
mov QWORD PTR [r8+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[10] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[11] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[12] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+64], r10
mov QWORD PTR [r8+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
; A[13] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
; A[14] x A[14]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+96], r10
mov QWORD PTR [r8+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
; A[15] x A[15]
mov rdx, QWORD PTR [r9+120]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
mov QWORD PTR [r8+-32], r15
mov QWORD PTR [r8+-24], rdi
mov QWORD PTR [r8+-16], rsi
mov QWORD PTR [r8+-8], rbx
sub r8, 128
cmp r9, r8
jne L_end_2048_sqr_avx2_16
vmovdqu xmm0, OWORD PTR [rbp]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbp+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbp+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbp+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbp+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbp+80]
vmovups OWORD PTR [r8+80], xmm0
L_end_2048_sqr_avx2_16:
add rsp, 128
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_2048_sqr_avx2_16 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_32 PROC
sub rsp, 272
mov QWORD PTR [rsp+256], rcx
mov QWORD PTR [rsp+264], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+128]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [r11+120]
mov QWORD PTR [r10+120], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+96]
setc r11b
mov QWORD PTR [r10+88], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+104]
setc r11b
mov QWORD PTR [r10+96], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+112]
setc r11b
mov QWORD PTR [r10+104], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+120]
setc r11b
mov QWORD PTR [r10+112], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+120], r8
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_16
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
add rdx, 128
add rcx, 256
call sp_2048_sqr_16
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
call sp_2048_sqr_16
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
ENDIF
mov rdx, QWORD PTR [rsp+256]
lea r10, QWORD PTR [rsp+128]
add rdx, 384
mov r9, 0
mov r8, QWORD PTR [r10+-128]
sub r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov QWORD PTR [r10+120], rax
sbb r9, 0
sub rdx, 256
mov r8, QWORD PTR [r10+-128]
sub r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov QWORD PTR [r10+120], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+256]
neg r9
add rcx, 256
mov r8, QWORD PTR [rcx+-128]
sub r8, QWORD PTR [r10+-128]
mov rax, QWORD PTR [rcx+-120]
mov QWORD PTR [rcx+-128], r8
sbb rax, QWORD PTR [r10+-120]
mov r8, QWORD PTR [rcx+-112]
mov QWORD PTR [rcx+-120], rax
sbb r8, QWORD PTR [r10+-112]
mov rax, QWORD PTR [rcx+-104]
mov QWORD PTR [rcx+-112], r8
sbb rax, QWORD PTR [r10+-104]
mov r8, QWORD PTR [rcx+-96]
mov QWORD PTR [rcx+-104], rax
sbb r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
sbb r8, QWORD PTR [r10+96]
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb rax, QWORD PTR [r10+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
sbb r8, QWORD PTR [r10+112]
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb rax, QWORD PTR [r10+120]
mov QWORD PTR [rcx+120], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+256]
add rcx, 384
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
adc r8, 0
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
adc rax, 0
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
adc r8, 0
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
adc rax, 0
mov QWORD PTR [rcx+120], rax
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
add rsp, 272
ret
sp_2048_sqr_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_avx2_32 PROC
sub rsp, 272
mov QWORD PTR [rsp+256], rcx
mov QWORD PTR [rsp+264], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+128]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [r11+120]
mov QWORD PTR [r10+120], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+96]
setc r11b
mov QWORD PTR [r10+88], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+104]
setc r11b
mov QWORD PTR [r10+96], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+112]
setc r11b
mov QWORD PTR [r10+104], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+120]
setc r11b
mov QWORD PTR [r10+112], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+120], r8
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_avx2_16
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
add rdx, 128
add rcx, 256
call sp_2048_sqr_avx2_16
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
call sp_2048_sqr_avx2_16
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
ENDIF
mov rdx, QWORD PTR [rsp+256]
lea r10, QWORD PTR [rsp+128]
add rdx, 384
mov r9, 0
mov r8, QWORD PTR [r10+-128]
sub r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov QWORD PTR [r10+120], rax
sbb r9, 0
sub rdx, 256
mov r8, QWORD PTR [r10+-128]
sub r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov QWORD PTR [r10+120], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+256]
neg r9
add rcx, 256
mov r8, QWORD PTR [rcx+-128]
sub r8, QWORD PTR [r10+-128]
mov rax, QWORD PTR [rcx+-120]
mov QWORD PTR [rcx+-128], r8
sbb rax, QWORD PTR [r10+-120]
mov r8, QWORD PTR [rcx+-112]
mov QWORD PTR [rcx+-120], rax
sbb r8, QWORD PTR [r10+-112]
mov rax, QWORD PTR [rcx+-104]
mov QWORD PTR [rcx+-112], r8
sbb rax, QWORD PTR [r10+-104]
mov r8, QWORD PTR [rcx+-96]
mov QWORD PTR [rcx+-104], rax
sbb r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
sbb r8, QWORD PTR [r10+96]
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb rax, QWORD PTR [r10+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
sbb r8, QWORD PTR [r10+112]
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb rax, QWORD PTR [r10+120]
mov QWORD PTR [rcx+120], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+256]
add rcx, 384
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
adc r8, 0
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
adc rax, 0
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
adc r8, 0
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
adc rax, 0
mov QWORD PTR [rcx+120], rax
mov rdx, QWORD PTR [rsp+264]
mov rcx, QWORD PTR [rsp+256]
add rsp, 272
ret
sp_2048_sqr_avx2_32 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sub_in_place_16 PROC
mov r8, QWORD PTR [rcx]
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+120], r9
sbb rax, rax
ret
sp_2048_sub_in_place_16 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_32 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+184]
add r12, rax
mov QWORD PTR [rcx+184], r12
adc r10, rdx
adc r11, 0
; A[24] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+192]
add r10, rax
mov QWORD PTR [rcx+192], r10
adc r11, rdx
adc r12, 0
; A[25] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+200]
add r11, rax
mov QWORD PTR [rcx+200], r11
adc r12, rdx
adc r10, 0
; A[26] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+208]
add r12, rax
mov QWORD PTR [rcx+208], r12
adc r10, rdx
adc r11, 0
; A[27] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+216]
add r10, rax
mov QWORD PTR [rcx+216], r10
adc r11, rdx
adc r12, 0
; A[28] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+224]
add r11, rax
mov QWORD PTR [rcx+224], r11
adc r12, rdx
adc r10, 0
; A[29] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+232]
add r12, rax
mov QWORD PTR [rcx+232], r12
adc r10, rdx
adc r11, 0
; A[30] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+240]
add r10, rax
mov QWORD PTR [rcx+240], r10
adc r11, rdx
adc r12, 0
; A[31] * B
mov rax, r8
mul QWORD PTR [r9+248]
add r11, rax
adc r12, rdx
mov QWORD PTR [rcx+248], r11
mov QWORD PTR [rcx+256], r12
pop r12
ret
sp_2048_mul_d_32 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_16 PROC
sub rsp, 128
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb rax, rax
add rsp, 128
ret
sp_2048_cond_sub_16 ENDP
_text ENDS
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 16
mov r10, 16
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_2048_mont_reduce_16_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+120], r14
adc QWORD PTR [rcx+128], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_2048_mont_reduce_16_loop
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 128
call sp_2048_cond_sub_16
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_avx2_16 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov QWORD PTR [rcx+120], r10
sbb rax, rax
pop r12
ret
sp_2048_cond_sub_avx2_16 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_16 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+120], r10
mov QWORD PTR [rcx+128], r11
pop r12
ret
sp_2048_mul_d_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_avx2_16 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+120], r12
mov QWORD PTR [rcx+128], r11
pop r13
pop r12
ret
sp_2048_mul_d_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_2048_word_asm_16 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_2048_word_asm_16 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_2048_cmp_16 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_2048_cmp_16 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_16 PROC
sub rsp, 128
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
pxor xmm13, xmm13
pshufd xmm11, xmm11, 0
pshufd xmm10, xmm10, 0
; START: 0-7
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 0-7
; START: 8-15
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
; END: 8-15
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_2048_get_from_table_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_avx2_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 16
mov r11, 16
mov r14, QWORD PTR [r9]
mov r15, QWORD PTR [r9+8]
mov rdi, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r9+24]
add r9, 64
xor rbp, rbp
L_2048_mont_reduce_avx2_16_loop:
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-32]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-24]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+64], r12
adox rbp, rbx
adcx rbp, rbx
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-24]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-16]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-16], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-8], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+16]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+8], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+24]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+16], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+32]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+24], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+40]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+32], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+48]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+40], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+56]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+48], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+64]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+56], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+72]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+64], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+72], r12
adox rbp, rbx
adcx rbp, rbx
; a += 2
add r9, 16
; i -= 2
sub r11, 2
jnz L_2048_mont_reduce_avx2_16_loop
sub r9, 64
neg rbp
mov r8, r9
sub r9, 128
mov rcx, QWORD PTR [r10]
mov rdx, r14
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, r15
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rdi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rsi
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov QWORD PTR [r9+120], rdx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_avx2_16 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_avx2_16 PROC
sub rsp, 128
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
vpxor ymm13, ymm13, ymm13
vpermd ymm10, ymm13, ymm10
vpermd ymm11, ymm13, ymm11
; START: 0-15
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
; END: 0-15
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_2048_get_from_table_avx2_16 ENDP
_text ENDS
ENDIF
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_32 PROC
sub rsp, 256
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
sbb r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
sbb r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
sbb r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
sbb r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
sbb r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
sbb r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
sbb r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
sbb r11, r8
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
sbb rax, rax
add rsp, 256
ret
sp_2048_cond_sub_32 ENDP
_text ENDS
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 32
mov r10, 32
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_2048_mont_reduce_32_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+184], r14
adc r11, 0
; a[i+24] += m[24] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+192]
mov r14, QWORD PTR [rcx+192]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+192], r14
adc r12, 0
; a[i+25] += m[25] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+200]
mov r14, QWORD PTR [rcx+200]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+200], r14
adc r11, 0
; a[i+26] += m[26] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+208]
mov r14, QWORD PTR [rcx+208]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+208], r14
adc r12, 0
; a[i+27] += m[27] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+216]
mov r14, QWORD PTR [rcx+216]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+216], r14
adc r11, 0
; a[i+28] += m[28] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+224]
mov r14, QWORD PTR [rcx+224]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+224], r14
adc r12, 0
; a[i+29] += m[29] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+232]
mov r14, QWORD PTR [rcx+232]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+232], r14
adc r11, 0
; a[i+30] += m[30] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+240]
mov r14, QWORD PTR [rcx+240]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+240], r14
adc r12, 0
; a[i+31] += m[31] * mu
mov rax, r13
mul QWORD PTR [r9+248]
mov r14, QWORD PTR [rcx+248]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+248], r14
adc QWORD PTR [rcx+256], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_2048_mont_reduce_32_loop
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 256
call sp_2048_cond_sub_32
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_32 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sub_32 PROC
mov r9, QWORD PTR [rdx]
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
sbb r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
sbb r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
sbb r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
sbb r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
sbb r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
sbb r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
sbb r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
sbb r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
sbb r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
sbb r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
sbb r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
sbb r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
sbb r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
sbb r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
sbb r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
sbb r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
sbb r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
sbb r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
sbb r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
sbb r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
sbb r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
sbb r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
sbb r10, QWORD PTR [r8+248]
mov QWORD PTR [rcx+248], r10
sbb rax, rax
ret
sp_2048_sub_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_avx2_32 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+184], r12
; A[24] * B
mulx r10, r9, QWORD PTR [rax+192]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+192], r11
; A[25] * B
mulx r10, r9, QWORD PTR [rax+200]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+200], r12
; A[26] * B
mulx r10, r9, QWORD PTR [rax+208]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+208], r11
; A[27] * B
mulx r10, r9, QWORD PTR [rax+216]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+216], r12
; A[28] * B
mulx r10, r9, QWORD PTR [rax+224]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+224], r11
; A[29] * B
mulx r10, r9, QWORD PTR [rax+232]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+232], r12
; A[30] * B
mulx r10, r9, QWORD PTR [rax+240]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+240], r11
; A[31] * B
mulx r10, r9, QWORD PTR [rax+248]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+248], r12
mov QWORD PTR [rcx+256], r11
pop r13
pop r12
ret
sp_2048_mul_d_avx2_32 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_2048_word_asm_32 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_2048_word_asm_32 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_avx2_32 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
sbb r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
sbb r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
sbb r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
sbb r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
sbb r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
sbb r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
sbb r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
sbb r11, r12
mov QWORD PTR [rcx+248], r11
sbb rax, rax
pop r12
ret
sp_2048_cond_sub_avx2_32 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_2048_cmp_32 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+248]
mov r12, QWORD PTR [rdx+248]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+240]
mov r12, QWORD PTR [rdx+240]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+232]
mov r12, QWORD PTR [rdx+232]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+224]
mov r12, QWORD PTR [rdx+224]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+216]
mov r12, QWORD PTR [rdx+216]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+208]
mov r12, QWORD PTR [rdx+208]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+200]
mov r12, QWORD PTR [rdx+200]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+192]
mov r12, QWORD PTR [rdx+192]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_2048_cmp_32 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_32 PROC
sub rsp, 128
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
pxor xmm13, xmm13
pshufd xmm11, xmm11, 0
pshufd xmm10, xmm10, 0
; START: 0-7
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 32
mov r9, QWORD PTR [rdx+256]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 33
mov r9, QWORD PTR [rdx+264]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 34
mov r9, QWORD PTR [rdx+272]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 35
mov r9, QWORD PTR [rdx+280]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 36
mov r9, QWORD PTR [rdx+288]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 37
mov r9, QWORD PTR [rdx+296]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 38
mov r9, QWORD PTR [rdx+304]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 39
mov r9, QWORD PTR [rdx+312]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 40
mov r9, QWORD PTR [rdx+320]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 41
mov r9, QWORD PTR [rdx+328]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 42
mov r9, QWORD PTR [rdx+336]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 43
mov r9, QWORD PTR [rdx+344]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 44
mov r9, QWORD PTR [rdx+352]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 45
mov r9, QWORD PTR [rdx+360]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 46
mov r9, QWORD PTR [rdx+368]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 47
mov r9, QWORD PTR [rdx+376]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 48
mov r9, QWORD PTR [rdx+384]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 49
mov r9, QWORD PTR [rdx+392]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 50
mov r9, QWORD PTR [rdx+400]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 51
mov r9, QWORD PTR [rdx+408]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 52
mov r9, QWORD PTR [rdx+416]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 53
mov r9, QWORD PTR [rdx+424]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 54
mov r9, QWORD PTR [rdx+432]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 55
mov r9, QWORD PTR [rdx+440]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 56
mov r9, QWORD PTR [rdx+448]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 57
mov r9, QWORD PTR [rdx+456]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 58
mov r9, QWORD PTR [rdx+464]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 59
mov r9, QWORD PTR [rdx+472]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 60
mov r9, QWORD PTR [rdx+480]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 61
mov r9, QWORD PTR [rdx+488]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 62
mov r9, QWORD PTR [rdx+496]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 63
mov r9, QWORD PTR [rdx+504]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 0-7
; START: 8-15
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 32
mov r9, QWORD PTR [rdx+256]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 33
mov r9, QWORD PTR [rdx+264]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 34
mov r9, QWORD PTR [rdx+272]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 35
mov r9, QWORD PTR [rdx+280]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 36
mov r9, QWORD PTR [rdx+288]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 37
mov r9, QWORD PTR [rdx+296]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 38
mov r9, QWORD PTR [rdx+304]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 39
mov r9, QWORD PTR [rdx+312]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 40
mov r9, QWORD PTR [rdx+320]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 41
mov r9, QWORD PTR [rdx+328]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 42
mov r9, QWORD PTR [rdx+336]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 43
mov r9, QWORD PTR [rdx+344]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 44
mov r9, QWORD PTR [rdx+352]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 45
mov r9, QWORD PTR [rdx+360]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 46
mov r9, QWORD PTR [rdx+368]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 47
mov r9, QWORD PTR [rdx+376]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 48
mov r9, QWORD PTR [rdx+384]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 49
mov r9, QWORD PTR [rdx+392]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 50
mov r9, QWORD PTR [rdx+400]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 51
mov r9, QWORD PTR [rdx+408]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 52
mov r9, QWORD PTR [rdx+416]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 53
mov r9, QWORD PTR [rdx+424]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 54
mov r9, QWORD PTR [rdx+432]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 55
mov r9, QWORD PTR [rdx+440]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 56
mov r9, QWORD PTR [rdx+448]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 57
mov r9, QWORD PTR [rdx+456]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 58
mov r9, QWORD PTR [rdx+464]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 59
mov r9, QWORD PTR [rdx+472]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 60
mov r9, QWORD PTR [rdx+480]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 61
mov r9, QWORD PTR [rdx+488]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 62
mov r9, QWORD PTR [rdx+496]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 63
mov r9, QWORD PTR [rdx+504]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 8-15
; START: 16-23
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 32
mov r9, QWORD PTR [rdx+256]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 33
mov r9, QWORD PTR [rdx+264]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 34
mov r9, QWORD PTR [rdx+272]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 35
mov r9, QWORD PTR [rdx+280]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 36
mov r9, QWORD PTR [rdx+288]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 37
mov r9, QWORD PTR [rdx+296]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 38
mov r9, QWORD PTR [rdx+304]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 39
mov r9, QWORD PTR [rdx+312]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 40
mov r9, QWORD PTR [rdx+320]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 41
mov r9, QWORD PTR [rdx+328]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 42
mov r9, QWORD PTR [rdx+336]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 43
mov r9, QWORD PTR [rdx+344]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 44
mov r9, QWORD PTR [rdx+352]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 45
mov r9, QWORD PTR [rdx+360]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 46
mov r9, QWORD PTR [rdx+368]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 47
mov r9, QWORD PTR [rdx+376]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 48
mov r9, QWORD PTR [rdx+384]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 49
mov r9, QWORD PTR [rdx+392]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 50
mov r9, QWORD PTR [rdx+400]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 51
mov r9, QWORD PTR [rdx+408]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 52
mov r9, QWORD PTR [rdx+416]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 53
mov r9, QWORD PTR [rdx+424]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 54
mov r9, QWORD PTR [rdx+432]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 55
mov r9, QWORD PTR [rdx+440]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 56
mov r9, QWORD PTR [rdx+448]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 57
mov r9, QWORD PTR [rdx+456]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 58
mov r9, QWORD PTR [rdx+464]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 59
mov r9, QWORD PTR [rdx+472]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 60
mov r9, QWORD PTR [rdx+480]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 61
mov r9, QWORD PTR [rdx+488]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 62
mov r9, QWORD PTR [rdx+496]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 63
mov r9, QWORD PTR [rdx+504]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 16-23
; START: 24-31
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 32
mov r9, QWORD PTR [rdx+256]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 33
mov r9, QWORD PTR [rdx+264]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 34
mov r9, QWORD PTR [rdx+272]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 35
mov r9, QWORD PTR [rdx+280]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 36
mov r9, QWORD PTR [rdx+288]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 37
mov r9, QWORD PTR [rdx+296]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 38
mov r9, QWORD PTR [rdx+304]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 39
mov r9, QWORD PTR [rdx+312]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 40
mov r9, QWORD PTR [rdx+320]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 41
mov r9, QWORD PTR [rdx+328]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 42
mov r9, QWORD PTR [rdx+336]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 43
mov r9, QWORD PTR [rdx+344]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 44
mov r9, QWORD PTR [rdx+352]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 45
mov r9, QWORD PTR [rdx+360]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 46
mov r9, QWORD PTR [rdx+368]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 47
mov r9, QWORD PTR [rdx+376]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 48
mov r9, QWORD PTR [rdx+384]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 49
mov r9, QWORD PTR [rdx+392]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 50
mov r9, QWORD PTR [rdx+400]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 51
mov r9, QWORD PTR [rdx+408]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 52
mov r9, QWORD PTR [rdx+416]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 53
mov r9, QWORD PTR [rdx+424]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 54
mov r9, QWORD PTR [rdx+432]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 55
mov r9, QWORD PTR [rdx+440]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 56
mov r9, QWORD PTR [rdx+448]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 57
mov r9, QWORD PTR [rdx+456]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 58
mov r9, QWORD PTR [rdx+464]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 59
mov r9, QWORD PTR [rdx+472]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 60
mov r9, QWORD PTR [rdx+480]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 61
mov r9, QWORD PTR [rdx+488]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 62
mov r9, QWORD PTR [rdx+496]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 63
mov r9, QWORD PTR [rdx+504]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
; END: 24-31
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_2048_get_from_table_32 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_avx2_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 32
mov r11, 32
mov r14, QWORD PTR [r9]
mov r15, QWORD PTR [r9+8]
mov rdi, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r9+24]
add r9, 128
xor rbp, rbp
L_2048_mont_reduce_avx2_32_loop:
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-96]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-88]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-88], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-80], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-72], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-64], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+24] += m[24] * mu
mulx rcx, rax, QWORD PTR [r10+192]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+25] += m[25] * mu
mulx rcx, rax, QWORD PTR [r10+200]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+26] += m[26] * mu
mulx rcx, rax, QWORD PTR [r10+208]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+27] += m[27] * mu
mulx rcx, rax, QWORD PTR [r10+216]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
; a[i+28] += m[28] * mu
mulx rcx, rax, QWORD PTR [r10+224]
mov r13, QWORD PTR [r9+104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+96], r12
; a[i+29] += m[29] * mu
mulx rcx, rax, QWORD PTR [r10+232]
mov r12, QWORD PTR [r9+112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+104], r13
; a[i+30] += m[30] * mu
mulx rcx, rax, QWORD PTR [r10+240]
mov r13, QWORD PTR [r9+120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+112], r12
; a[i+31] += m[31] * mu
mulx rcx, rax, QWORD PTR [r10+248]
mov r12, QWORD PTR [r9+128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+120], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+128], r12
adox rbp, rbx
adcx rbp, rbx
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_2048_mont_reduce_avx2_32_loop
sub r9, 128
neg rbp
mov r8, r9
sub r9, 256
mov rcx, QWORD PTR [r10]
mov rdx, r14
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, r15
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rdi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rsi
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+192]
mov rdx, QWORD PTR [r8+192]
pext rax, rax, rbp
mov QWORD PTR [r9+184], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+200]
mov rax, QWORD PTR [r8+200]
pext rcx, rcx, rbp
mov QWORD PTR [r9+192], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+208]
mov rcx, QWORD PTR [r8+208]
pext rdx, rdx, rbp
mov QWORD PTR [r9+200], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+216]
mov rdx, QWORD PTR [r8+216]
pext rax, rax, rbp
mov QWORD PTR [r9+208], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+224]
mov rax, QWORD PTR [r8+224]
pext rcx, rcx, rbp
mov QWORD PTR [r9+216], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+232]
mov rcx, QWORD PTR [r8+232]
pext rdx, rdx, rbp
mov QWORD PTR [r9+224], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+240]
mov rdx, QWORD PTR [r8+240]
pext rax, rax, rbp
mov QWORD PTR [r9+232], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+248]
mov rax, QWORD PTR [r8+248]
pext rcx, rcx, rbp
mov QWORD PTR [r9+240], rdx
sbb rax, rcx
mov QWORD PTR [r9+248], rax
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_avx2_32 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_avx2_32 PROC
sub rsp, 128
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
vpxor ymm13, ymm13, ymm13
vpermd ymm10, ymm13, ymm10
vpermd ymm11, ymm13, ymm11
; START: 0-15
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 32
mov r9, QWORD PTR [rdx+256]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 33
mov r9, QWORD PTR [rdx+264]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 34
mov r9, QWORD PTR [rdx+272]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 35
mov r9, QWORD PTR [rdx+280]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 36
mov r9, QWORD PTR [rdx+288]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 37
mov r9, QWORD PTR [rdx+296]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 38
mov r9, QWORD PTR [rdx+304]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 39
mov r9, QWORD PTR [rdx+312]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 40
mov r9, QWORD PTR [rdx+320]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 41
mov r9, QWORD PTR [rdx+328]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 42
mov r9, QWORD PTR [rdx+336]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 43
mov r9, QWORD PTR [rdx+344]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 44
mov r9, QWORD PTR [rdx+352]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 45
mov r9, QWORD PTR [rdx+360]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 46
mov r9, QWORD PTR [rdx+368]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 47
mov r9, QWORD PTR [rdx+376]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 48
mov r9, QWORD PTR [rdx+384]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 49
mov r9, QWORD PTR [rdx+392]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 50
mov r9, QWORD PTR [rdx+400]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 51
mov r9, QWORD PTR [rdx+408]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 52
mov r9, QWORD PTR [rdx+416]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 53
mov r9, QWORD PTR [rdx+424]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 54
mov r9, QWORD PTR [rdx+432]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 55
mov r9, QWORD PTR [rdx+440]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 56
mov r9, QWORD PTR [rdx+448]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 57
mov r9, QWORD PTR [rdx+456]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 58
mov r9, QWORD PTR [rdx+464]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 59
mov r9, QWORD PTR [rdx+472]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 60
mov r9, QWORD PTR [rdx+480]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 61
mov r9, QWORD PTR [rdx+488]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 62
mov r9, QWORD PTR [rdx+496]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 63
mov r9, QWORD PTR [rdx+504]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
add rcx, 128
; END: 0-15
; START: 16-31
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 32
mov r9, QWORD PTR [rdx+256]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 33
mov r9, QWORD PTR [rdx+264]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 34
mov r9, QWORD PTR [rdx+272]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 35
mov r9, QWORD PTR [rdx+280]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 36
mov r9, QWORD PTR [rdx+288]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 37
mov r9, QWORD PTR [rdx+296]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 38
mov r9, QWORD PTR [rdx+304]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 39
mov r9, QWORD PTR [rdx+312]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 40
mov r9, QWORD PTR [rdx+320]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 41
mov r9, QWORD PTR [rdx+328]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 42
mov r9, QWORD PTR [rdx+336]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 43
mov r9, QWORD PTR [rdx+344]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 44
mov r9, QWORD PTR [rdx+352]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 45
mov r9, QWORD PTR [rdx+360]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 46
mov r9, QWORD PTR [rdx+368]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 47
mov r9, QWORD PTR [rdx+376]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 48
mov r9, QWORD PTR [rdx+384]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 49
mov r9, QWORD PTR [rdx+392]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 50
mov r9, QWORD PTR [rdx+400]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 51
mov r9, QWORD PTR [rdx+408]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 52
mov r9, QWORD PTR [rdx+416]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 53
mov r9, QWORD PTR [rdx+424]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 54
mov r9, QWORD PTR [rdx+432]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 55
mov r9, QWORD PTR [rdx+440]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 56
mov r9, QWORD PTR [rdx+448]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 57
mov r9, QWORD PTR [rdx+456]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 58
mov r9, QWORD PTR [rdx+464]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 59
mov r9, QWORD PTR [rdx+472]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 60
mov r9, QWORD PTR [rdx+480]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 61
mov r9, QWORD PTR [rdx+488]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 62
mov r9, QWORD PTR [rdx+496]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 63
mov r9, QWORD PTR [rdx+504]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
; END: 16-31
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_2048_get_from_table_avx2_32 ENDP
_text ENDS
ENDIF
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_add_16 PROC
sub rsp, 128
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
add r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
adc r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
adc r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
adc r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
adc r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
adc r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
adc r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
adc r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
adc r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
adc r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
adc r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
adc r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
adc r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
adc r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
adc r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
adc r11, r8
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
adc rax, 0
add rsp, 128
ret
sp_2048_cond_add_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_add_avx2_16 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
add r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
adc r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
adc r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
adc r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
adc r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
adc r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
adc r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
adc r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
adc r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
adc r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
adc r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
adc r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
adc r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
adc r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
adc r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
adc r10, r11
mov QWORD PTR [rcx+120], r10
adc rax, 0
pop r12
ret
sp_2048_cond_add_avx2_16 ENDP
_text ENDS
ENDIF
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_2048_lshift_32 PROC
push r12
push r13
mov rax, rcx
mov cl, r8b
mov r12, 0
mov r13, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rdx+232]
mov r10, QWORD PTR [rdx+240]
mov r11, QWORD PTR [rdx+248]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+224], r8
mov QWORD PTR [rax+232], r9
mov QWORD PTR [rax+240], r10
mov QWORD PTR [rax+248], r11
mov QWORD PTR [rax+256], r12
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rdx+200]
mov r10, QWORD PTR [rdx+208]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+192], r8
mov QWORD PTR [rax+200], r9
mov QWORD PTR [rax+208], r10
mov QWORD PTR [rax+216], r13
mov r13, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rdx+168]
mov r10, QWORD PTR [rdx+176]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+160], r8
mov QWORD PTR [rax+168], r9
mov QWORD PTR [rax+176], r10
mov QWORD PTR [rax+184], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rdx+136]
mov r10, QWORD PTR [rdx+144]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+128], r8
mov QWORD PTR [rax+136], r9
mov QWORD PTR [rax+144], r10
mov QWORD PTR [rax+152], r13
mov r13, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+96], r8
mov QWORD PTR [rax+104], r9
mov QWORD PTR [rax+112], r10
mov QWORD PTR [rax+120], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+64], r8
mov QWORD PTR [rax+72], r9
mov QWORD PTR [rax+80], r10
mov QWORD PTR [rax+88], r13
mov r13, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+32], r8
mov QWORD PTR [rax+40], r9
mov QWORD PTR [rax+48], r10
mov QWORD PTR [rax+56], r11
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shl r8, cl
mov QWORD PTR [rax], r8
mov QWORD PTR [rax+8], r9
mov QWORD PTR [rax+16], r10
mov QWORD PTR [rax+24], r13
pop r13
pop r12
ret
sp_2048_lshift_32 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WOLFSSL_SP_NO_3072
IFNDEF WOLFSSL_SP_NO_3072
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_3072_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 384
xor r13, r13
jmp L_3072_from_bin_bswap_64_end
L_3072_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_3072_from_bin_bswap_64_end:
cmp r9, 63
jg L_3072_from_bin_bswap_64_start
jmp L_3072_from_bin_bswap_8_end
L_3072_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_3072_from_bin_bswap_8_end:
cmp r9, 7
jg L_3072_from_bin_bswap_8_start
cmp r9, r13
je L_3072_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_3072_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_3072_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_3072_from_bin_bswap_hi_end:
cmp rcx, r12
jge L_3072_from_bin_bswap_zero_end
L_3072_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_3072_from_bin_bswap_zero_start
L_3072_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_3072_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_3072_from_bin_movbe PROC
push r12
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 384
jmp L_3072_from_bin_movbe_64_end
L_3072_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_3072_from_bin_movbe_64_end:
cmp r9, 63
jg L_3072_from_bin_movbe_64_start
jmp L_3072_from_bin_movbe_8_end
L_3072_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_3072_from_bin_movbe_8_end:
cmp r9, 7
jg L_3072_from_bin_movbe_8_start
cmp r9, 0
je L_3072_from_bin_movbe_hi_end
mov r10, 0
mov rax, 0
L_3072_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_3072_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_3072_from_bin_movbe_hi_end:
cmp rcx, r12
jge L_3072_from_bin_movbe_zero_end
L_3072_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], 0
add rcx, 8
cmp rcx, r12
jl L_3072_from_bin_movbe_zero_start
L_3072_from_bin_movbe_zero_end:
pop r12
ret
sp_3072_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 384
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_3072_to_bin_bswap_48 PROC
mov rax, QWORD PTR [rcx+376]
mov r8, QWORD PTR [rcx+368]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+360]
mov r8, QWORD PTR [rcx+352]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+344]
mov r8, QWORD PTR [rcx+336]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
mov rax, QWORD PTR [rcx+328]
mov r8, QWORD PTR [rcx+320]
bswap rax
bswap r8
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
mov rax, QWORD PTR [rcx+312]
mov r8, QWORD PTR [rcx+304]
bswap rax
bswap r8
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
mov rax, QWORD PTR [rcx+296]
mov r8, QWORD PTR [rcx+288]
bswap rax
bswap r8
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
mov rax, QWORD PTR [rcx+280]
mov r8, QWORD PTR [rcx+272]
bswap rax
bswap r8
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
mov rax, QWORD PTR [rcx+264]
mov r8, QWORD PTR [rcx+256]
bswap rax
bswap r8
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
mov rax, QWORD PTR [rcx+248]
mov r8, QWORD PTR [rcx+240]
bswap rax
bswap r8
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
mov rax, QWORD PTR [rcx+232]
mov r8, QWORD PTR [rcx+224]
bswap rax
bswap r8
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
mov rax, QWORD PTR [rcx+216]
mov r8, QWORD PTR [rcx+208]
bswap rax
bswap r8
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
mov rax, QWORD PTR [rcx+200]
mov r8, QWORD PTR [rcx+192]
bswap rax
bswap r8
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
mov rax, QWORD PTR [rcx+184]
mov r8, QWORD PTR [rcx+176]
bswap rax
bswap r8
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
mov rax, QWORD PTR [rcx+168]
mov r8, QWORD PTR [rcx+160]
bswap rax
bswap r8
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
mov rax, QWORD PTR [rcx+152]
mov r8, QWORD PTR [rcx+144]
bswap rax
bswap r8
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
mov rax, QWORD PTR [rcx+136]
mov r8, QWORD PTR [rcx+128]
bswap rax
bswap r8
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
mov rax, QWORD PTR [rcx+120]
mov r8, QWORD PTR [rcx+112]
bswap rax
bswap r8
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
mov rax, QWORD PTR [rcx+104]
mov r8, QWORD PTR [rcx+96]
bswap rax
bswap r8
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
mov rax, QWORD PTR [rcx+88]
mov r8, QWORD PTR [rcx+80]
bswap rax
bswap r8
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
mov rax, QWORD PTR [rcx+72]
mov r8, QWORD PTR [rcx+64]
bswap rax
bswap r8
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
mov rax, QWORD PTR [rcx+56]
mov r8, QWORD PTR [rcx+48]
bswap rax
bswap r8
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
ret
sp_3072_to_bin_bswap_48 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 384
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_3072_to_bin_movbe_48 PROC
movbe rax, QWORD PTR [rcx+376]
movbe r8, QWORD PTR [rcx+368]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+360]
movbe r8, QWORD PTR [rcx+352]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+344]
movbe r8, QWORD PTR [rcx+336]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
movbe rax, QWORD PTR [rcx+328]
movbe r8, QWORD PTR [rcx+320]
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
movbe rax, QWORD PTR [rcx+312]
movbe r8, QWORD PTR [rcx+304]
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
movbe rax, QWORD PTR [rcx+296]
movbe r8, QWORD PTR [rcx+288]
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
movbe rax, QWORD PTR [rcx+280]
movbe r8, QWORD PTR [rcx+272]
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
movbe rax, QWORD PTR [rcx+264]
movbe r8, QWORD PTR [rcx+256]
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
movbe rax, QWORD PTR [rcx+248]
movbe r8, QWORD PTR [rcx+240]
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
movbe rax, QWORD PTR [rcx+232]
movbe r8, QWORD PTR [rcx+224]
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
movbe rax, QWORD PTR [rcx+216]
movbe r8, QWORD PTR [rcx+208]
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
movbe rax, QWORD PTR [rcx+200]
movbe r8, QWORD PTR [rcx+192]
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
movbe rax, QWORD PTR [rcx+184]
movbe r8, QWORD PTR [rcx+176]
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
movbe rax, QWORD PTR [rcx+168]
movbe r8, QWORD PTR [rcx+160]
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
movbe rax, QWORD PTR [rcx+152]
movbe r8, QWORD PTR [rcx+144]
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
movbe rax, QWORD PTR [rcx+136]
movbe r8, QWORD PTR [rcx+128]
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
movbe rax, QWORD PTR [rcx+120]
movbe r8, QWORD PTR [rcx+112]
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
movbe rax, QWORD PTR [rcx+104]
movbe r8, QWORD PTR [rcx+96]
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
movbe rax, QWORD PTR [rcx+88]
movbe r8, QWORD PTR [rcx+80]
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
movbe rax, QWORD PTR [rcx+72]
movbe r8, QWORD PTR [rcx+64]
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
movbe rax, QWORD PTR [rcx+56]
movbe r8, QWORD PTR [rcx+48]
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
ret
sp_3072_to_bin_movbe_48 ENDP
_text ENDS
ENDIF
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_12 PROC
push r12
mov r9, rdx
sub rsp, 96
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+48], r10
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+56], r11
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+64], r12
; A[0] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+72], r10
; A[0] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+80], r11
; A[0] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+88], r12
; A[1] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+8]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+96], r10
; A[2] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+16]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+104], r11
; A[3] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+24]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+112], r12
; A[4] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+32]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+120], r10
; A[5] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+40]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+128], r11
; A[6] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+48]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+136], r12
; A[7] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+56]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+144], r10
; A[8] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+64]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+152], r11
; A[9] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+72]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+160], r12
; A[10] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+80]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+168], r10
; A[11] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
mov QWORD PTR [rcx+176], r11
mov QWORD PTR [rcx+184], r12
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r10, QWORD PTR [rsp+48]
mov r11, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r10, QWORD PTR [rsp+80]
mov r11, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
add rsp, 96
pop r12
ret
sp_3072_mul_12 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_12 PROC
push rbx
push rbp
push r12
push r13
push r14
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 96
cmp r9, r8
mov rbx, rsp
cmovne rbx, r8
cmp rbp, r8
cmove rbx, rsp
add r8, 96
xor r14, r14
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
mov QWORD PTR [rbx+8], r11
; A[0] * B[2]
mulx r10, rax, QWORD PTR [rbp+16]
adcx r12, rax
; A[0] * B[3]
mulx r11, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+16], r12
adcx r10, rax
mov QWORD PTR [rbx+24], r10
; A[0] * B[4]
mulx r12, rax, QWORD PTR [rbp+32]
adcx r11, rax
; A[0] * B[5]
mulx r10, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r11
adcx r12, rax
mov QWORD PTR [rbx+40], r12
; A[0] * B[6]
mulx r11, rax, QWORD PTR [rbp+48]
adcx r10, rax
; A[0] * B[7]
mulx r12, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
mov QWORD PTR [rbx+56], r11
; A[0] * B[8]
mulx r10, rax, QWORD PTR [rbp+64]
adcx r12, rax
; A[0] * B[9]
mulx r11, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
mov QWORD PTR [rbx+72], r10
; A[0] * B[10]
mulx r12, rax, QWORD PTR [rbp+80]
adcx r11, rax
; A[0] * B[11]
mulx r10, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adcx r10, r14
mov r13, r14
adcx r13, r14
mov QWORD PTR [rbx+88], r12
mov QWORD PTR [r8], r10
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r10, QWORD PTR [rbx+24]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+16], r12
mov r11, QWORD PTR [rbx+32]
mov r12, QWORD PTR [rbx+40]
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+32], r11
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+48], r10
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[1] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
; A[1] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[1] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+88], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r10, QWORD PTR [rbx+24]
mov r11, QWORD PTR [rbx+32]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+24], r10
mov r12, QWORD PTR [rbx+40]
mov r10, QWORD PTR [rbx+48]
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+40], r12
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+56], r11
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[2] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
; A[2] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[2] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov rdx, QWORD PTR [r9+24]
mov r10, QWORD PTR [rbx+24]
mov r11, QWORD PTR [rbx+32]
mov r12, QWORD PTR [rbx+40]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+32], r11
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r12, rax
adox r10, rcx
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+48], r10
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r10, rcx
; A[3] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
; A[3] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r11, rax
adox r12, rcx
; A[3] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+8], r11
mov r10, r14
adcx r12, rax
adox r10, rcx
adcx r10, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+16], r12
mov QWORD PTR [r8+24], r10
mov rdx, QWORD PTR [r9+32]
mov r11, QWORD PTR [rbx+32]
mov r12, QWORD PTR [rbx+40]
mov r10, QWORD PTR [rbx+48]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+40], r12
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+56], r11
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[4] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
; A[4] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[4] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+16], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov rdx, QWORD PTR [r9+40]
mov r12, QWORD PTR [rbx+40]
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+48], r10
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[5] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
; A[5] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[5] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+24], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+32], r11
mov QWORD PTR [r8+40], r12
mov rdx, QWORD PTR [r9+48]
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+56], r11
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r12, rax
adox r10, rcx
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r10, rcx
; A[6] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r12, QWORD PTR [r8+40]
; A[6] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r11, rax
adox r12, rcx
; A[6] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+32], r11
mov r10, r14
adcx r12, rax
adox r10, rcx
adcx r10, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+40], r12
mov QWORD PTR [r8+48], r10
mov rdx, QWORD PTR [r9+56]
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[7] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
; A[7] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[7] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+40], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov rdx, QWORD PTR [r9+64]
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r12, QWORD PTR [r8+40]
mov r10, QWORD PTR [r8+48]
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[8] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r12
mov r11, QWORD PTR [r8+56]
; A[8] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[8] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+48], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+56], r11
mov QWORD PTR [r8+64], r12
mov rdx, QWORD PTR [r9+72]
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[9] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[9] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[9] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r12, rax
adox r10, rcx
; A[9] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[9] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[9] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[9] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[9] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[9] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r10, rcx
; A[9] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
; A[9] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r11, rax
adox r12, rcx
; A[9] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+56], r11
mov r10, r14
adcx r12, rax
adox r10, rcx
adcx r10, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+64], r12
mov QWORD PTR [r8+72], r10
mov rdx, QWORD PTR [r9+80]
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[10] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[10] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[10] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[10] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[10] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[10] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r12, QWORD PTR [r8+40]
mov r10, QWORD PTR [r8+48]
; A[10] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[10] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r12
mov r11, QWORD PTR [r8+56]
mov r12, QWORD PTR [r8+64]
; A[10] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[10] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+72]
; A[10] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[10] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+64], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov rdx, QWORD PTR [r9+88]
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[11] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[11] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[11] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[11] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[11] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[11] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[11] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[11] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[11] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[11] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+64], r12
mov r11, QWORD PTR [r8+80]
; A[11] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[11] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+72], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov QWORD PTR [r8+80], r11
mov QWORD PTR [r8+88], r12
sub r8, 96
cmp r9, r8
je L_start_3072_mul_avx2_12
cmp rbp, r8
jne L_end_3072_mul_avx2_12
L_start_3072_mul_avx2_12:
vmovdqu xmm0, OWORD PTR [rbx]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbx+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbx+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbx+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbx+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbx+80]
vmovups OWORD PTR [r8+80], xmm0
L_end_3072_mul_avx2_12:
add rsp, 96
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_3072_mul_avx2_12 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_add_12 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov QWORD PTR [rcx+88], r10
adc rax, 0
ret
sp_3072_add_12 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sub_in_place_24 PROC
mov r8, QWORD PTR [rcx]
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+184], r9
sbb rax, rax
ret
sp_3072_sub_in_place_24 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_add_24 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov QWORD PTR [rcx+184], r10
adc rax, 0
ret
sp_3072_add_24 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 616
mov QWORD PTR [rsp+576], rcx
mov QWORD PTR [rsp+584], rdx
mov QWORD PTR [rsp+592], r8
lea r12, QWORD PTR [rsp+384]
lea r14, QWORD PTR [rdx+96]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r12+88], r10
adc r15, 0
mov QWORD PTR [rsp+600], r15
lea r13, QWORD PTR [rsp+480]
lea r14, QWORD PTR [r8+96]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r13+88], r10
adc rdi, 0
mov QWORD PTR [rsp+608], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
lea rcx, QWORD PTR [rsp+192]
add r8, 96
add rdx, 96
call sp_3072_mul_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
call sp_3072_mul_12
IFDEF _WIN64
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
ENDIF
mov r15, QWORD PTR [rsp+600]
mov rdi, QWORD PTR [rsp+608]
mov rsi, QWORD PTR [rsp+576]
mov r11, r15
lea r12, QWORD PTR [rsp+384]
lea r13, QWORD PTR [rsp+480]
and r11, rdi
neg r15
neg rdi
add rsi, 192
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov QWORD PTR [rsi+88], r10
adc r11, 0
lea r13, QWORD PTR [rsp+192]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
sub rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+184], r10
adc r11, 0
mov QWORD PTR [rcx+288], r11
add rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov QWORD PTR [rsi+96], rax
; Add to zero
mov rax, QWORD PTR [r13+104]
adc rax, 0
mov r9, QWORD PTR [r13+112]
mov QWORD PTR [rsi+104], rax
adc r9, 0
mov r10, QWORD PTR [r13+120]
mov QWORD PTR [rsi+112], r9
adc r10, 0
mov rax, QWORD PTR [r13+128]
mov QWORD PTR [rsi+120], r10
adc rax, 0
mov r9, QWORD PTR [r13+136]
mov QWORD PTR [rsi+128], rax
adc r9, 0
mov r10, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], r9
adc r10, 0
mov rax, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r10
adc rax, 0
mov r9, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], rax
adc r9, 0
mov r10, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], r9
adc r10, 0
mov rax, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r10
adc rax, 0
mov r9, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], rax
adc r9, 0
mov QWORD PTR [rsi+184], r9
add rsp, 616
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 616
mov QWORD PTR [rsp+576], rcx
mov QWORD PTR [rsp+584], rdx
mov QWORD PTR [rsp+592], r8
lea r12, QWORD PTR [rsp+384]
lea r14, QWORD PTR [rdx+96]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r12+88], r10
adc r15, 0
mov QWORD PTR [rsp+600], r15
lea r13, QWORD PTR [rsp+480]
lea r14, QWORD PTR [r8+96]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r13+88], r10
adc rdi, 0
mov QWORD PTR [rsp+608], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_avx2_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
lea rcx, QWORD PTR [rsp+192]
add r8, 96
add rdx, 96
call sp_3072_mul_avx2_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
call sp_3072_mul_avx2_12
IFDEF _WIN64
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
ENDIF
mov r15, QWORD PTR [rsp+600]
mov rdi, QWORD PTR [rsp+608]
mov rsi, QWORD PTR [rsp+576]
mov r11, r15
lea r12, QWORD PTR [rsp+384]
lea r13, QWORD PTR [rsp+480]
and r11, rdi
neg r15
neg rdi
add rsi, 192
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov QWORD PTR [rsi+88], r10
adc r11, 0
lea r13, QWORD PTR [rsp+192]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
sub rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+184], r10
adc r11, 0
mov QWORD PTR [rcx+288], r11
add rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov QWORD PTR [rsi+96], rax
; Add to zero
mov rax, QWORD PTR [r13+104]
adc rax, 0
mov r9, QWORD PTR [r13+112]
mov QWORD PTR [rsi+104], rax
adc r9, 0
mov r10, QWORD PTR [r13+120]
mov QWORD PTR [rsi+112], r9
adc r10, 0
mov rax, QWORD PTR [r13+128]
mov QWORD PTR [rsi+120], r10
adc rax, 0
mov r9, QWORD PTR [r13+136]
mov QWORD PTR [rsi+128], rax
adc r9, 0
mov r10, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], r9
adc r10, 0
mov rax, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r10
adc rax, 0
mov r9, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], rax
adc r9, 0
mov r10, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], r9
adc r10, 0
mov rax, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r10
adc rax, 0
mov r9, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], rax
adc r9, 0
mov QWORD PTR [rsi+184], r9
add rsp, 616
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_avx2_24 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sub_in_place_48 PROC
mov r8, QWORD PTR [rcx]
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r9
sbb r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb r9, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r9
sbb r8, QWORD PTR [rdx+208]
mov r9, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb r9, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r9
sbb r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb r9, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r9
sbb r8, QWORD PTR [rdx+240]
mov r9, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb r9, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rcx+256]
mov QWORD PTR [rcx+248], r9
sbb r8, QWORD PTR [rdx+256]
mov r9, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], r8
sbb r9, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r9
sbb r8, QWORD PTR [rdx+272]
mov r9, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], r8
sbb r9, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r9
sbb r8, QWORD PTR [rdx+288]
mov r9, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], r8
sbb r9, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r9
sbb r8, QWORD PTR [rdx+304]
mov r9, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], r8
sbb r9, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r9
sbb r8, QWORD PTR [rdx+320]
mov r9, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], r8
sbb r9, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r9
sbb r8, QWORD PTR [rdx+336]
mov r9, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], r8
sbb r9, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r9
sbb r8, QWORD PTR [rdx+352]
mov r9, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], r8
sbb r9, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r9
sbb r8, QWORD PTR [rdx+368]
mov r9, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], r8
sbb r9, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+376], r9
sbb rax, rax
ret
sp_3072_sub_in_place_48 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_add_48 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
adc r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
adc r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
adc r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
adc r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
adc r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
adc r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
adc r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
adc r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
adc r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
adc r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
adc r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
adc r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
adc r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
adc r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
adc r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
adc r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
adc r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
adc r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
adc r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
adc r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
adc r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
adc r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
adc r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
adc r10, QWORD PTR [r8+376]
mov QWORD PTR [rcx+376], r10
adc rax, 0
ret
sp_3072_add_48 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1192
mov QWORD PTR [rsp+1152], rcx
mov QWORD PTR [rsp+1160], rdx
mov QWORD PTR [rsp+1168], r8
lea r12, QWORD PTR [rsp+768]
lea r14, QWORD PTR [rdx+192]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r12+184], r10
adc r15, 0
mov QWORD PTR [rsp+1176], r15
lea r13, QWORD PTR [rsp+960]
lea r14, QWORD PTR [r8+192]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r13+184], r10
adc rdi, 0
mov QWORD PTR [rsp+1184], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
lea rcx, QWORD PTR [rsp+384]
add r8, 192
add rdx, 192
call sp_3072_mul_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
call sp_3072_mul_24
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
ENDIF
mov r15, QWORD PTR [rsp+1176]
mov rdi, QWORD PTR [rsp+1184]
mov rsi, QWORD PTR [rsp+1152]
mov r11, r15
lea r12, QWORD PTR [rsp+768]
lea r13, QWORD PTR [rsp+960]
and r11, rdi
neg r15
neg rdi
add rsi, 384
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+96], rax
mov QWORD PTR [r13+96], r9
mov rax, QWORD PTR [r12+104]
mov r9, QWORD PTR [r13+104]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+104], rax
mov QWORD PTR [r13+104], r9
mov rax, QWORD PTR [r12+112]
mov r9, QWORD PTR [r13+112]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+112], rax
mov QWORD PTR [r13+112], r9
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+120], rax
mov QWORD PTR [r13+120], r9
mov rax, QWORD PTR [r12+128]
mov r9, QWORD PTR [r13+128]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+128], rax
mov QWORD PTR [r13+128], r9
mov rax, QWORD PTR [r12+136]
mov r9, QWORD PTR [r13+136]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+136], rax
mov QWORD PTR [r13+136], r9
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+144], rax
mov QWORD PTR [r13+144], r9
mov rax, QWORD PTR [r12+152]
mov r9, QWORD PTR [r13+152]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+152], rax
mov QWORD PTR [r13+152], r9
mov rax, QWORD PTR [r12+160]
mov r9, QWORD PTR [r13+160]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+160], rax
mov QWORD PTR [r13+160], r9
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+168], rax
mov QWORD PTR [r13+168], r9
mov rax, QWORD PTR [r12+176]
mov r9, QWORD PTR [r13+176]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+176], rax
mov QWORD PTR [r13+176], r9
mov rax, QWORD PTR [r12+184]
mov r9, QWORD PTR [r13+184]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+184], rax
mov QWORD PTR [r13+184], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov QWORD PTR [rsi+184], r10
adc r11, 0
lea r13, QWORD PTR [rsp+384]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
sub rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov QWORD PTR [rsi+376], r10
adc r11, 0
mov QWORD PTR [rcx+576], r11
add rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov QWORD PTR [rsi+192], rax
; Add to zero
mov rax, QWORD PTR [r13+200]
adc rax, 0
mov r9, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], rax
adc r9, 0
mov r10, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], r9
adc r10, 0
mov rax, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r10
adc rax, 0
mov r9, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], rax
adc r9, 0
mov r10, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], r9
adc r10, 0
mov rax, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r10
adc rax, 0
mov r9, QWORD PTR [r13+256]
mov QWORD PTR [rsi+248], rax
adc r9, 0
mov r10, QWORD PTR [r13+264]
mov QWORD PTR [rsi+256], r9
adc r10, 0
mov rax, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], r10
adc rax, 0
mov r9, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], rax
adc r9, 0
mov r10, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r9
adc r10, 0
mov rax, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], r10
adc rax, 0
mov r9, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], rax
adc r9, 0
mov r10, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r9
adc r10, 0
mov rax, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], r10
adc rax, 0
mov r9, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], rax
adc r9, 0
mov r10, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r9
adc r10, 0
mov rax, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], r10
adc rax, 0
mov r9, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], rax
adc r9, 0
mov r10, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r9
adc r10, 0
mov rax, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], r10
adc rax, 0
mov r9, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], rax
adc r9, 0
mov QWORD PTR [rsi+376], r9
add rsp, 1192
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1192
mov QWORD PTR [rsp+1152], rcx
mov QWORD PTR [rsp+1160], rdx
mov QWORD PTR [rsp+1168], r8
lea r12, QWORD PTR [rsp+768]
lea r14, QWORD PTR [rdx+192]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r12+184], r10
adc r15, 0
mov QWORD PTR [rsp+1176], r15
lea r13, QWORD PTR [rsp+960]
lea r14, QWORD PTR [r8+192]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r13+184], r10
adc rdi, 0
mov QWORD PTR [rsp+1184], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_avx2_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
lea rcx, QWORD PTR [rsp+384]
add r8, 192
add rdx, 192
call sp_3072_mul_avx2_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
call sp_3072_mul_avx2_24
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
ENDIF
mov r15, QWORD PTR [rsp+1176]
mov rdi, QWORD PTR [rsp+1184]
mov rsi, QWORD PTR [rsp+1152]
mov r11, r15
lea r12, QWORD PTR [rsp+768]
lea r13, QWORD PTR [rsp+960]
and r11, rdi
neg r15
neg rdi
add rsi, 384
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+88], r10
adc rax, r9
mov r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [r13+104]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+96], rax
adc r9, r10
mov r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [r13+112]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+104], r9
adc r10, rax
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+112], r10
adc rax, r9
mov r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [r13+128]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+120], rax
adc r9, r10
mov r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [r13+136]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+128], r9
adc r10, rax
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+136], r10
adc rax, r9
mov r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [r13+152]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+144], rax
adc r9, r10
mov r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [r13+160]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+152], r9
adc r10, rax
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+160], r10
adc rax, r9
mov r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [r13+176]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+168], rax
adc r9, r10
mov r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [r13+184]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+176], r9
adc r10, rax
mov QWORD PTR [rsi+184], r10
adc r11, 0
lea r13, QWORD PTR [rsp+384]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
sub rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov QWORD PTR [rsi+376], r10
adc r11, 0
mov QWORD PTR [rcx+576], r11
add rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov QWORD PTR [rsi+192], rax
; Add to zero
mov rax, QWORD PTR [r13+200]
adc rax, 0
mov r9, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], rax
adc r9, 0
mov r10, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], r9
adc r10, 0
mov rax, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r10
adc rax, 0
mov r9, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], rax
adc r9, 0
mov r10, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], r9
adc r10, 0
mov rax, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r10
adc rax, 0
mov r9, QWORD PTR [r13+256]
mov QWORD PTR [rsi+248], rax
adc r9, 0
mov r10, QWORD PTR [r13+264]
mov QWORD PTR [rsi+256], r9
adc r10, 0
mov rax, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], r10
adc rax, 0
mov r9, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], rax
adc r9, 0
mov r10, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r9
adc r10, 0
mov rax, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], r10
adc rax, 0
mov r9, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], rax
adc r9, 0
mov r10, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r9
adc r10, 0
mov rax, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], r10
adc rax, 0
mov r9, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], rax
adc r9, 0
mov r10, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r9
adc r10, 0
mov rax, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], r10
adc rax, 0
mov r9, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], rax
adc r9, 0
mov r10, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r9
adc r10, 0
mov rax, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], r10
adc rax, 0
mov r9, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], rax
adc r9, 0
mov QWORD PTR [rsi+376], r9
add rsp, 1192
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_avx2_48 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_12 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 96
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+48], r9
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+56], r10
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+64], r11
; A[0] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+72], r9
; A[0] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+80], r10
; A[0] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+88], r11
; A[1] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[2] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+96], r9
; A[2] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+16]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[3] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+104], r10
; A[3] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+24]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[4] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+112], r11
; A[4] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+32]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[5] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+120], r9
; A[5] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+40]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[6] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+128], r10
; A[6] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+48]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[7] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+136], r11
; A[7] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+56]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[8] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+64]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[9] * A[9]
mov rax, QWORD PTR [r8+72]
mul rax
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+144], r9
; A[8] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+64]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[9] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+72]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+152], r10
; A[9] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+72]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[10] * A[10]
mov rax, QWORD PTR [r8+80]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+160], r11
; A[10] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+80]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+168], r9
; A[11] * A[11]
mov rax, QWORD PTR [r8+88]
mul rax
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+176], r10
mov QWORD PTR [rcx+184], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r12, QWORD PTR [rsp+48]
mov r13, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r12, QWORD PTR [rsp+80]
mov r13, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r12
mov QWORD PTR [rcx+88], r13
add rsp, 96
pop r14
pop r13
pop r12
ret
sp_3072_sqr_12 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_12 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 96
cmp r9, r8
mov rbp, rsp
cmovne rbp, r8
add r8, 96
xor r12, r12
; Diagonal 1
; Zero into %r9
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
mov QWORD PTR [rbp+8], r10
; Zero into %r8
; A[2] x A[0]
mulx r10, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r10, r12
mov QWORD PTR [rbp+16], r11
; Zero into %r9
; A[3] x A[0]
mulx r11, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, r12
mov QWORD PTR [rbp+24], r10
; Zero into %r8
; A[4] x A[0]
mulx r10, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r10, r12
mov QWORD PTR [rbp+32], r11
; Zero into %r9
; A[5] x A[0]
mulx r11, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, r12
mov QWORD PTR [rbp+40], r10
; No load %r12 - %r8
; A[6] x A[0]
mulx r14, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r14, r12
mov QWORD PTR [rbp+48], r11
; No load %r13 - %r9
; A[7] x A[0]
mulx r15, rax, QWORD PTR [r9+56]
adcx r14, rax
adox r15, r12
; No store %r12 - %r8
; No load %r14 - %r8
; A[8] x A[0]
mulx rdi, rax, QWORD PTR [r9+64]
adcx r15, rax
adox rdi, r12
; No store %r13 - %r9
; No load %r15 - %r9
; A[9] x A[0]
mulx rsi, rax, QWORD PTR [r9+72]
adcx rdi, rax
adox rsi, r12
; No store %r14 - %r8
; No load %rbx - %r8
; A[10] x A[0]
mulx rbx, rax, QWORD PTR [r9+80]
adcx rsi, rax
adox rbx, r12
; No store %r15 - %r9
; Zero into %r9
; A[11] x A[0]
mulx r11, rax, QWORD PTR [r9+88]
adcx rbx, rax
adox r11, r12
; No store %rbx - %r8
; Carry
adcx r11, r12
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8], r11
; Diagonal 2
mov r11, QWORD PTR [rbp+24]
mov r10, QWORD PTR [rbp+32]
; A[2] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rbp+24], r11
mov r11, QWORD PTR [rbp+40]
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+32], r10
mov r10, QWORD PTR [rbp+48]
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rbp+40], r11
; No load %r12 - %r9
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r14, rcx
mov QWORD PTR [rbp+48], r10
; No load %r13 - %r8
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r14, rax
adox r15, rcx
; No store %r12 - %r9
; No load %r14 - %r9
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r15, rax
adox rdi, rcx
; No store %r13 - %r8
; No load %r15 - %r8
; A[8] x A[1]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r9
; No load %rbx - %r9
; A[9] x A[1]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r8
mov r10, QWORD PTR [r8]
; A[10] x A[1]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rbx, rax
adox r10, rcx
; No store %rbx - %r9
; Zero into %r9
; A[11] x A[1]
mulx r11, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8], r10
; Zero into %r8
; A[11] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx r10, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+8], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+16], r10
; Diagonal 3
mov r10, QWORD PTR [rbp+40]
mov r11, QWORD PTR [rbp+48]
; A[3] x A[2]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+40], r10
; No load %r12 - %r8
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r14, rcx
mov QWORD PTR [rbp+48], r11
; No load %r13 - %r9
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r14, rax
adox r15, rcx
; No store %r12 - %r8
; No load %r14 - %r8
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r15, rax
adox rdi, rcx
; No store %r13 - %r9
; No load %r15 - %r9
; A[7] x A[2]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r8
; No load %rbx - %r8
; A[8] x A[2]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r9
mov r11, QWORD PTR [r8]
; A[9] x A[2]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rbx, rax
adox r11, rcx
; No store %rbx - %r8
mov r10, QWORD PTR [r8+8]
; A[10] x A[2]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8], r11
mov r11, QWORD PTR [r8+16]
; A[10] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r10
; Zero into %r8
; A[10] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx r10, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+16], r11
; Zero into %r9
; A[10] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx r11, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8+24], r10
; Carry
adcx r11, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+32], r11
; Diagonal 4
; No load %r13 - %r8
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r14, rax
adox r15, rcx
; No store %r12 - %r9
; No load %r14 - %r9
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r15, rax
adox rdi, rcx
; No store %r13 - %r8
; No load %r15 - %r8
; A[6] x A[3]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r9
; No load %rbx - %r9
; A[7] x A[3]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r8
mov r10, QWORD PTR [r8]
; A[8] x A[3]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r10, rcx
; No store %rbx - %r9
mov r11, QWORD PTR [r8+8]
; A[9] x A[3]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r10, QWORD PTR [r8+16]
; A[9] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov r11, QWORD PTR [r8+24]
; A[9] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+16], r10
mov r10, QWORD PTR [r8+32]
; A[9] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
; Zero into %r9
; A[9] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx r11, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8+32], r10
; Zero into %r8
; A[9] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx r10, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+40], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+48], r10
; Diagonal 5
; No load %r15 - %r9
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r8
; No load %rbx - %r8
; A[6] x A[4]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r9
mov r11, QWORD PTR [r8]
; A[7] x A[4]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rbx, rax
adox r11, rcx
; No store %rbx - %r8
mov r10, QWORD PTR [r8+8]
; A[8] x A[4]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8], r11
mov r11, QWORD PTR [r8+16]
; A[8] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r10
mov r10, QWORD PTR [r8+24]
; A[8] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+16], r11
mov r11, QWORD PTR [r8+32]
; A[8] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r10, QWORD PTR [r8+40]
; A[10] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+32], r11
mov r11, QWORD PTR [r8+48]
; A[10] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+40], r10
; Zero into %r8
; A[10] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx r10, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+48], r11
; Zero into %r9
; A[10] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx r11, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8+56], r10
; Carry
adcx r11, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+64], r11
; Diagonal 6
mov r10, QWORD PTR [r8]
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rbx, rax
adox r10, rcx
; No store %rbx - %r9
mov r11, QWORD PTR [r8+8]
; A[7] x A[5]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r10, QWORD PTR [r8+16]
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov r11, QWORD PTR [r8+24]
; A[11] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+16], r10
mov r10, QWORD PTR [r8+32]
; A[11] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov r11, QWORD PTR [r8+40]
; A[11] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r10, QWORD PTR [r8+48]
; A[11] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+40], r11
mov r11, QWORD PTR [r8+56]
; A[11] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+48], r10
mov r10, QWORD PTR [r8+64]
; A[11] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+56], r11
; Zero into %r9
; A[11] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx r11, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8+64], r10
; Zero into %r8
; A[11] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx r10, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+72], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r13
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
mov r10, QWORD PTR [rbp+32]
mov r11, QWORD PTR [rbp+40]
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+32], r10
mov QWORD PTR [rbp+40], r11
mov r10, QWORD PTR [rbp+48]
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox r10, r10
adox r14, r14
adcx r10, rax
adcx r14, rcx
mov QWORD PTR [rbp+48], r10
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox r15, r15
adox rdi, rdi
adcx r15, rax
adcx rdi, rcx
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox rsi, rsi
adox rbx, rbx
adcx rsi, rax
adcx rbx, rcx
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+16], r10
mov QWORD PTR [r8+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[9] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[10] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+64], r10
mov QWORD PTR [r8+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
; A[11] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r11
mov QWORD PTR [r8+-40], r14
mov QWORD PTR [r8+-32], r15
mov QWORD PTR [r8+-24], rdi
mov QWORD PTR [r8+-16], rsi
mov QWORD PTR [r8+-8], rbx
sub r8, 96
cmp r9, r8
jne L_end_3072_sqr_avx2_12
vmovdqu xmm0, OWORD PTR [rbp]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbp+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbp+32]
vmovups OWORD PTR [r8+32], xmm0
mov rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+48], rax
L_end_3072_sqr_avx2_12:
add rsp, 96
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_3072_sqr_avx2_12 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_24 PROC
sub rsp, 208
mov QWORD PTR [rsp+192], rcx
mov QWORD PTR [rsp+200], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+96]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov QWORD PTR [r10+88], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+88], r8
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_12
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
add rdx, 96
add rcx, 192
call sp_3072_sqr_12
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
call sp_3072_sqr_12
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
ENDIF
mov rdx, QWORD PTR [rsp+192]
lea r10, QWORD PTR [rsp+96]
add rdx, 288
mov r9, 0
mov r8, QWORD PTR [r10+-96]
sub r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov QWORD PTR [r10+88], rax
sbb r9, 0
sub rdx, 192
mov r8, QWORD PTR [r10+-96]
sub r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov QWORD PTR [r10+88], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+192]
neg r9
add rcx, 192
mov r8, QWORD PTR [rcx+-96]
sub r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov QWORD PTR [rcx+88], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+192]
add rcx, 288
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov QWORD PTR [rcx+88], rax
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
add rsp, 208
ret
sp_3072_sqr_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_24 PROC
sub rsp, 208
mov QWORD PTR [rsp+192], rcx
mov QWORD PTR [rsp+200], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+96]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov QWORD PTR [r10+88], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+88], r8
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_avx2_12
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
add rdx, 96
add rcx, 192
call sp_3072_sqr_avx2_12
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
call sp_3072_sqr_avx2_12
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
ENDIF
mov rdx, QWORD PTR [rsp+192]
lea r10, QWORD PTR [rsp+96]
add rdx, 288
mov r9, 0
mov r8, QWORD PTR [r10+-96]
sub r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov QWORD PTR [r10+88], rax
sbb r9, 0
sub rdx, 192
mov r8, QWORD PTR [r10+-96]
sub r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov QWORD PTR [r10+88], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+192]
neg r9
add rcx, 192
mov r8, QWORD PTR [rcx+-96]
sub r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov QWORD PTR [rcx+88], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+192]
add rcx, 288
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov QWORD PTR [rcx+88], rax
mov rdx, QWORD PTR [rsp+200]
mov rcx, QWORD PTR [rsp+192]
add rsp, 208
ret
sp_3072_sqr_avx2_24 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_48 PROC
sub rsp, 400
mov QWORD PTR [rsp+384], rcx
mov QWORD PTR [rsp+392], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+192]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [r11+184]
mov QWORD PTR [r10+184], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+96]
setc r11b
mov QWORD PTR [r10+88], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+104]
setc r11b
mov QWORD PTR [r10+96], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+112]
setc r11b
mov QWORD PTR [r10+104], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+120]
setc r11b
mov QWORD PTR [r10+112], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+128]
setc r11b
mov QWORD PTR [r10+120], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+136]
setc r11b
mov QWORD PTR [r10+128], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+144]
setc r11b
mov QWORD PTR [r10+136], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+152]
setc r11b
mov QWORD PTR [r10+144], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+160]
setc r11b
mov QWORD PTR [r10+152], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+168]
setc r11b
mov QWORD PTR [r10+160], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+176]
setc r11b
mov QWORD PTR [r10+168], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+184]
setc r11b
mov QWORD PTR [r10+176], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+184], r8
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_24
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
add rdx, 192
add rcx, 384
call sp_3072_sqr_24
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
call sp_3072_sqr_24
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
ENDIF
mov rdx, QWORD PTR [rsp+384]
lea r10, QWORD PTR [rsp+192]
add rdx, 576
mov r9, 0
mov r8, QWORD PTR [r10+-192]
sub r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov QWORD PTR [r10+184], rax
sbb r9, 0
sub rdx, 384
mov r8, QWORD PTR [r10+-192]
sub r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov QWORD PTR [r10+184], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+384]
neg r9
add rcx, 384
mov r8, QWORD PTR [rcx+-192]
sub r8, QWORD PTR [r10+-192]
mov rax, QWORD PTR [rcx+-184]
mov QWORD PTR [rcx+-192], r8
sbb rax, QWORD PTR [r10+-184]
mov r8, QWORD PTR [rcx+-176]
mov QWORD PTR [rcx+-184], rax
sbb r8, QWORD PTR [r10+-176]
mov rax, QWORD PTR [rcx+-168]
mov QWORD PTR [rcx+-176], r8
sbb rax, QWORD PTR [r10+-168]
mov r8, QWORD PTR [rcx+-160]
mov QWORD PTR [rcx+-168], rax
sbb r8, QWORD PTR [r10+-160]
mov rax, QWORD PTR [rcx+-152]
mov QWORD PTR [rcx+-160], r8
sbb rax, QWORD PTR [r10+-152]
mov r8, QWORD PTR [rcx+-144]
mov QWORD PTR [rcx+-152], rax
sbb r8, QWORD PTR [r10+-144]
mov rax, QWORD PTR [rcx+-136]
mov QWORD PTR [rcx+-144], r8
sbb rax, QWORD PTR [r10+-136]
mov r8, QWORD PTR [rcx+-128]
mov QWORD PTR [rcx+-136], rax
sbb r8, QWORD PTR [r10+-128]
mov rax, QWORD PTR [rcx+-120]
mov QWORD PTR [rcx+-128], r8
sbb rax, QWORD PTR [r10+-120]
mov r8, QWORD PTR [rcx+-112]
mov QWORD PTR [rcx+-120], rax
sbb r8, QWORD PTR [r10+-112]
mov rax, QWORD PTR [rcx+-104]
mov QWORD PTR [rcx+-112], r8
sbb rax, QWORD PTR [r10+-104]
mov r8, QWORD PTR [rcx+-96]
mov QWORD PTR [rcx+-104], rax
sbb r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
sbb r8, QWORD PTR [r10+96]
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb rax, QWORD PTR [r10+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
sbb r8, QWORD PTR [r10+112]
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb rax, QWORD PTR [r10+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
sbb r8, QWORD PTR [r10+128]
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb rax, QWORD PTR [r10+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
sbb r8, QWORD PTR [r10+144]
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb rax, QWORD PTR [r10+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
sbb r8, QWORD PTR [r10+160]
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb rax, QWORD PTR [r10+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
sbb r8, QWORD PTR [r10+176]
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb rax, QWORD PTR [r10+184]
mov QWORD PTR [rcx+184], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+384]
add rcx, 576
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
adc r8, 0
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
adc rax, 0
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
adc r8, 0
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
adc rax, 0
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
adc r8, 0
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
adc rax, 0
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
adc r8, 0
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
adc rax, 0
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
adc r8, 0
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
adc rax, 0
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
adc r8, 0
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
adc rax, 0
mov QWORD PTR [rcx+184], rax
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
add rsp, 400
ret
sp_3072_sqr_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_48 PROC
sub rsp, 400
mov QWORD PTR [rsp+384], rcx
mov QWORD PTR [rsp+392], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+192]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [r11+184]
mov QWORD PTR [r10+184], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+96]
setc r11b
mov QWORD PTR [r10+88], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+104]
setc r11b
mov QWORD PTR [r10+96], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+112]
setc r11b
mov QWORD PTR [r10+104], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+120]
setc r11b
mov QWORD PTR [r10+112], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+128]
setc r11b
mov QWORD PTR [r10+120], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+136]
setc r11b
mov QWORD PTR [r10+128], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+144]
setc r11b
mov QWORD PTR [r10+136], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+152]
setc r11b
mov QWORD PTR [r10+144], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+160]
setc r11b
mov QWORD PTR [r10+152], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+168]
setc r11b
mov QWORD PTR [r10+160], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+176]
setc r11b
mov QWORD PTR [r10+168], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+184]
setc r11b
mov QWORD PTR [r10+176], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+184], r8
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_avx2_24
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
add rdx, 192
add rcx, 384
call sp_3072_sqr_avx2_24
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
call sp_3072_sqr_avx2_24
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
ENDIF
mov rdx, QWORD PTR [rsp+384]
lea r10, QWORD PTR [rsp+192]
add rdx, 576
mov r9, 0
mov r8, QWORD PTR [r10+-192]
sub r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov QWORD PTR [r10+184], rax
sbb r9, 0
sub rdx, 384
mov r8, QWORD PTR [r10+-192]
sub r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov QWORD PTR [r10+184], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+384]
neg r9
add rcx, 384
mov r8, QWORD PTR [rcx+-192]
sub r8, QWORD PTR [r10+-192]
mov rax, QWORD PTR [rcx+-184]
mov QWORD PTR [rcx+-192], r8
sbb rax, QWORD PTR [r10+-184]
mov r8, QWORD PTR [rcx+-176]
mov QWORD PTR [rcx+-184], rax
sbb r8, QWORD PTR [r10+-176]
mov rax, QWORD PTR [rcx+-168]
mov QWORD PTR [rcx+-176], r8
sbb rax, QWORD PTR [r10+-168]
mov r8, QWORD PTR [rcx+-160]
mov QWORD PTR [rcx+-168], rax
sbb r8, QWORD PTR [r10+-160]
mov rax, QWORD PTR [rcx+-152]
mov QWORD PTR [rcx+-160], r8
sbb rax, QWORD PTR [r10+-152]
mov r8, QWORD PTR [rcx+-144]
mov QWORD PTR [rcx+-152], rax
sbb r8, QWORD PTR [r10+-144]
mov rax, QWORD PTR [rcx+-136]
mov QWORD PTR [rcx+-144], r8
sbb rax, QWORD PTR [r10+-136]
mov r8, QWORD PTR [rcx+-128]
mov QWORD PTR [rcx+-136], rax
sbb r8, QWORD PTR [r10+-128]
mov rax, QWORD PTR [rcx+-120]
mov QWORD PTR [rcx+-128], r8
sbb rax, QWORD PTR [r10+-120]
mov r8, QWORD PTR [rcx+-112]
mov QWORD PTR [rcx+-120], rax
sbb r8, QWORD PTR [r10+-112]
mov rax, QWORD PTR [rcx+-104]
mov QWORD PTR [rcx+-112], r8
sbb rax, QWORD PTR [r10+-104]
mov r8, QWORD PTR [rcx+-96]
mov QWORD PTR [rcx+-104], rax
sbb r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
sbb r8, QWORD PTR [r10+96]
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb rax, QWORD PTR [r10+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
sbb r8, QWORD PTR [r10+112]
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb rax, QWORD PTR [r10+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
sbb r8, QWORD PTR [r10+128]
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb rax, QWORD PTR [r10+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
sbb r8, QWORD PTR [r10+144]
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb rax, QWORD PTR [r10+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
sbb r8, QWORD PTR [r10+160]
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb rax, QWORD PTR [r10+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
sbb r8, QWORD PTR [r10+176]
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb rax, QWORD PTR [r10+184]
mov QWORD PTR [rcx+184], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+384]
add rcx, 576
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
adc r8, 0
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
adc rax, 0
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
adc r8, 0
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
adc rax, 0
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
adc r8, 0
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
adc rax, 0
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
adc r8, 0
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
adc rax, 0
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
adc r8, 0
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
adc rax, 0
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
adc r8, 0
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
adc rax, 0
mov QWORD PTR [rcx+184], rax
mov rdx, QWORD PTR [rsp+392]
mov rcx, QWORD PTR [rsp+384]
add rsp, 400
ret
sp_3072_sqr_avx2_48 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_48 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+184]
add r12, rax
mov QWORD PTR [rcx+184], r12
adc r10, rdx
adc r11, 0
; A[24] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+192]
add r10, rax
mov QWORD PTR [rcx+192], r10
adc r11, rdx
adc r12, 0
; A[25] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+200]
add r11, rax
mov QWORD PTR [rcx+200], r11
adc r12, rdx
adc r10, 0
; A[26] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+208]
add r12, rax
mov QWORD PTR [rcx+208], r12
adc r10, rdx
adc r11, 0
; A[27] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+216]
add r10, rax
mov QWORD PTR [rcx+216], r10
adc r11, rdx
adc r12, 0
; A[28] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+224]
add r11, rax
mov QWORD PTR [rcx+224], r11
adc r12, rdx
adc r10, 0
; A[29] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+232]
add r12, rax
mov QWORD PTR [rcx+232], r12
adc r10, rdx
adc r11, 0
; A[30] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+240]
add r10, rax
mov QWORD PTR [rcx+240], r10
adc r11, rdx
adc r12, 0
; A[31] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+248]
add r11, rax
mov QWORD PTR [rcx+248], r11
adc r12, rdx
adc r10, 0
; A[32] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+256]
add r12, rax
mov QWORD PTR [rcx+256], r12
adc r10, rdx
adc r11, 0
; A[33] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+264]
add r10, rax
mov QWORD PTR [rcx+264], r10
adc r11, rdx
adc r12, 0
; A[34] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+272]
add r11, rax
mov QWORD PTR [rcx+272], r11
adc r12, rdx
adc r10, 0
; A[35] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+280]
add r12, rax
mov QWORD PTR [rcx+280], r12
adc r10, rdx
adc r11, 0
; A[36] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+288]
add r10, rax
mov QWORD PTR [rcx+288], r10
adc r11, rdx
adc r12, 0
; A[37] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+296]
add r11, rax
mov QWORD PTR [rcx+296], r11
adc r12, rdx
adc r10, 0
; A[38] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+304]
add r12, rax
mov QWORD PTR [rcx+304], r12
adc r10, rdx
adc r11, 0
; A[39] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+312]
add r10, rax
mov QWORD PTR [rcx+312], r10
adc r11, rdx
adc r12, 0
; A[40] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+320]
add r11, rax
mov QWORD PTR [rcx+320], r11
adc r12, rdx
adc r10, 0
; A[41] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+328]
add r12, rax
mov QWORD PTR [rcx+328], r12
adc r10, rdx
adc r11, 0
; A[42] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+336]
add r10, rax
mov QWORD PTR [rcx+336], r10
adc r11, rdx
adc r12, 0
; A[43] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+344]
add r11, rax
mov QWORD PTR [rcx+344], r11
adc r12, rdx
adc r10, 0
; A[44] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+352]
add r12, rax
mov QWORD PTR [rcx+352], r12
adc r10, rdx
adc r11, 0
; A[45] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+360]
add r10, rax
mov QWORD PTR [rcx+360], r10
adc r11, rdx
adc r12, 0
; A[46] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+368]
add r11, rax
mov QWORD PTR [rcx+368], r11
adc r12, rdx
adc r10, 0
; A[47] * B
mov rax, r8
mul QWORD PTR [r9+376]
add r12, rax
adc r10, rdx
mov QWORD PTR [rcx+376], r12
mov QWORD PTR [rcx+384], r10
pop r12
ret
sp_3072_mul_d_48 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_24 PROC
sub rsp, 192
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov QWORD PTR [rcx+184], r11
sbb rax, rax
add rsp, 192
ret
sp_3072_cond_sub_24 ENDP
_text ENDS
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 24
mov r10, 24
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_3072_mont_reduce_24_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+184], r14
adc QWORD PTR [rcx+192], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_3072_mont_reduce_24_loop
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 192
call sp_3072_cond_sub_24
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_avx2_24 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov QWORD PTR [rcx+184], r12
sbb rax, rax
pop r12
ret
sp_3072_cond_sub_avx2_24 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_24 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
mul QWORD PTR [r9+184]
add r12, rax
adc r10, rdx
mov QWORD PTR [rcx+184], r12
mov QWORD PTR [rcx+192], r10
pop r12
ret
sp_3072_mul_d_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_avx2_24 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+184], r12
mov QWORD PTR [rcx+192], r11
pop r13
pop r12
ret
sp_3072_mul_d_avx2_24 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_3072_word_asm_24 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_3072_word_asm_24 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_3072_cmp_24 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_3072_cmp_24 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_3072_get_from_table_24 PROC
sub rsp, 128
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
pxor xmm13, xmm13
pshufd xmm11, xmm11, 0
pshufd xmm10, xmm10, 0
; START: 0-7
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 0-7
; START: 8-15
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 8-15
; START: 16-23
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
; END: 16-23
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_3072_get_from_table_24 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_avx2_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 24
mov r11, 24
mov r14, QWORD PTR [r9]
mov r15, QWORD PTR [r9+8]
mov rdi, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r9+24]
add r9, 96
xor rbp, rbp
L_3072_mont_reduce_avx2_24_loop:
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-64]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-56]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+96], r12
adox rbp, rbx
adcx rbp, rbx
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_3072_mont_reduce_avx2_24_loop
sub r9, 96
neg rbp
mov r8, r9
sub r9, 192
mov rcx, QWORD PTR [r10]
mov rdx, r14
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, r15
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rdi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rsi
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov QWORD PTR [r9+184], rcx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_avx2_24 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_3072_get_from_table_avx2_24 PROC
sub rsp, 128
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
vpxor ymm13, ymm13, ymm13
vpermd ymm10, ymm13, ymm10
vpermd ymm11, ymm13, ymm11
; START: 0-15
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
add rcx, 128
; END: 0-15
; START: 16-23
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 16
mov r9, QWORD PTR [rdx+128]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 17
mov r9, QWORD PTR [rdx+136]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 18
mov r9, QWORD PTR [rdx+144]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 19
mov r9, QWORD PTR [rdx+152]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 20
mov r9, QWORD PTR [rdx+160]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 21
mov r9, QWORD PTR [rdx+168]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 22
mov r9, QWORD PTR [rdx+176]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 23
mov r9, QWORD PTR [rdx+184]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 24
mov r9, QWORD PTR [rdx+192]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 25
mov r9, QWORD PTR [rdx+200]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 26
mov r9, QWORD PTR [rdx+208]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 27
mov r9, QWORD PTR [rdx+216]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 28
mov r9, QWORD PTR [rdx+224]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 29
mov r9, QWORD PTR [rdx+232]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 30
mov r9, QWORD PTR [rdx+240]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
; ENTRY: 31
mov r9, QWORD PTR [rdx+248]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
; END: 16-23
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_3072_get_from_table_avx2_24 ENDP
_text ENDS
ENDIF
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_48 PROC
sub rsp, 384
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [r8+256]
mov r11, QWORD PTR [r8+264]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+256], r10
mov QWORD PTR [rsp+264], r11
mov r10, QWORD PTR [r8+272]
mov r11, QWORD PTR [r8+280]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+272], r10
mov QWORD PTR [rsp+280], r11
mov r10, QWORD PTR [r8+288]
mov r11, QWORD PTR [r8+296]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+288], r10
mov QWORD PTR [rsp+296], r11
mov r10, QWORD PTR [r8+304]
mov r11, QWORD PTR [r8+312]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+304], r10
mov QWORD PTR [rsp+312], r11
mov r10, QWORD PTR [r8+320]
mov r11, QWORD PTR [r8+328]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+320], r10
mov QWORD PTR [rsp+328], r11
mov r10, QWORD PTR [r8+336]
mov r11, QWORD PTR [r8+344]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+336], r10
mov QWORD PTR [rsp+344], r11
mov r10, QWORD PTR [r8+352]
mov r11, QWORD PTR [r8+360]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+352], r10
mov QWORD PTR [rsp+360], r11
mov r10, QWORD PTR [r8+368]
mov r11, QWORD PTR [r8+376]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+368], r10
mov QWORD PTR [rsp+376], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
sbb r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
sbb r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
sbb r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
sbb r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
sbb r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
sbb r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
sbb r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
sbb r11, r8
mov QWORD PTR [rcx+240], r10
mov r10, QWORD PTR [rdx+256]
mov r8, QWORD PTR [rsp+256]
sbb r10, r8
mov QWORD PTR [rcx+248], r11
mov r11, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rsp+264]
sbb r11, r8
mov QWORD PTR [rcx+256], r10
mov r10, QWORD PTR [rdx+272]
mov r8, QWORD PTR [rsp+272]
sbb r10, r8
mov QWORD PTR [rcx+264], r11
mov r11, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rsp+280]
sbb r11, r8
mov QWORD PTR [rcx+272], r10
mov r10, QWORD PTR [rdx+288]
mov r8, QWORD PTR [rsp+288]
sbb r10, r8
mov QWORD PTR [rcx+280], r11
mov r11, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rsp+296]
sbb r11, r8
mov QWORD PTR [rcx+288], r10
mov r10, QWORD PTR [rdx+304]
mov r8, QWORD PTR [rsp+304]
sbb r10, r8
mov QWORD PTR [rcx+296], r11
mov r11, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rsp+312]
sbb r11, r8
mov QWORD PTR [rcx+304], r10
mov r10, QWORD PTR [rdx+320]
mov r8, QWORD PTR [rsp+320]
sbb r10, r8
mov QWORD PTR [rcx+312], r11
mov r11, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rsp+328]
sbb r11, r8
mov QWORD PTR [rcx+320], r10
mov r10, QWORD PTR [rdx+336]
mov r8, QWORD PTR [rsp+336]
sbb r10, r8
mov QWORD PTR [rcx+328], r11
mov r11, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rsp+344]
sbb r11, r8
mov QWORD PTR [rcx+336], r10
mov r10, QWORD PTR [rdx+352]
mov r8, QWORD PTR [rsp+352]
sbb r10, r8
mov QWORD PTR [rcx+344], r11
mov r11, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rsp+360]
sbb r11, r8
mov QWORD PTR [rcx+352], r10
mov r10, QWORD PTR [rdx+368]
mov r8, QWORD PTR [rsp+368]
sbb r10, r8
mov QWORD PTR [rcx+360], r11
mov r11, QWORD PTR [rdx+376]
mov r8, QWORD PTR [rsp+376]
sbb r11, r8
mov QWORD PTR [rcx+368], r10
mov QWORD PTR [rcx+376], r11
sbb rax, rax
add rsp, 384
ret
sp_3072_cond_sub_48 ENDP
_text ENDS
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 48
mov r10, 48
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_3072_mont_reduce_48_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+184], r14
adc r11, 0
; a[i+24] += m[24] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+192]
mov r14, QWORD PTR [rcx+192]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+192], r14
adc r12, 0
; a[i+25] += m[25] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+200]
mov r14, QWORD PTR [rcx+200]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+200], r14
adc r11, 0
; a[i+26] += m[26] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+208]
mov r14, QWORD PTR [rcx+208]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+208], r14
adc r12, 0
; a[i+27] += m[27] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+216]
mov r14, QWORD PTR [rcx+216]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+216], r14
adc r11, 0
; a[i+28] += m[28] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+224]
mov r14, QWORD PTR [rcx+224]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+224], r14
adc r12, 0
; a[i+29] += m[29] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+232]
mov r14, QWORD PTR [rcx+232]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+232], r14
adc r11, 0
; a[i+30] += m[30] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+240]
mov r14, QWORD PTR [rcx+240]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+240], r14
adc r12, 0
; a[i+31] += m[31] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+248]
mov r14, QWORD PTR [rcx+248]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+248], r14
adc r11, 0
; a[i+32] += m[32] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+256]
mov r14, QWORD PTR [rcx+256]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+256], r14
adc r12, 0
; a[i+33] += m[33] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+264]
mov r14, QWORD PTR [rcx+264]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+264], r14
adc r11, 0
; a[i+34] += m[34] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+272]
mov r14, QWORD PTR [rcx+272]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+272], r14
adc r12, 0
; a[i+35] += m[35] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+280]
mov r14, QWORD PTR [rcx+280]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+280], r14
adc r11, 0
; a[i+36] += m[36] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+288]
mov r14, QWORD PTR [rcx+288]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+288], r14
adc r12, 0
; a[i+37] += m[37] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+296]
mov r14, QWORD PTR [rcx+296]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+296], r14
adc r11, 0
; a[i+38] += m[38] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+304]
mov r14, QWORD PTR [rcx+304]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+304], r14
adc r12, 0
; a[i+39] += m[39] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+312]
mov r14, QWORD PTR [rcx+312]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+312], r14
adc r11, 0
; a[i+40] += m[40] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+320]
mov r14, QWORD PTR [rcx+320]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+320], r14
adc r12, 0
; a[i+41] += m[41] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+328]
mov r14, QWORD PTR [rcx+328]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+328], r14
adc r11, 0
; a[i+42] += m[42] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+336]
mov r14, QWORD PTR [rcx+336]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+336], r14
adc r12, 0
; a[i+43] += m[43] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+344]
mov r14, QWORD PTR [rcx+344]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+344], r14
adc r11, 0
; a[i+44] += m[44] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+352]
mov r14, QWORD PTR [rcx+352]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+352], r14
adc r12, 0
; a[i+45] += m[45] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+360]
mov r14, QWORD PTR [rcx+360]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+360], r14
adc r11, 0
; a[i+46] += m[46] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+368]
mov r14, QWORD PTR [rcx+368]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+368], r14
adc r12, 0
; a[i+47] += m[47] * mu
mov rax, r13
mul QWORD PTR [r9+376]
mov r14, QWORD PTR [rcx+376]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+376], r14
adc QWORD PTR [rcx+384], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_3072_mont_reduce_48_loop
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 384
call sp_3072_cond_sub_48
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_48 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sub_48 PROC
mov r9, QWORD PTR [rdx]
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
sbb r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
sbb r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
sbb r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
sbb r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
sbb r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
sbb r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
sbb r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
sbb r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
sbb r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
sbb r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
sbb r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
sbb r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
sbb r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
sbb r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
sbb r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
sbb r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
sbb r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
sbb r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
sbb r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
sbb r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
sbb r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
sbb r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
sbb r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
sbb r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
sbb r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
sbb r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
sbb r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
sbb r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
sbb r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
sbb r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
sbb r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
sbb r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
sbb r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
sbb r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
sbb r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
sbb r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
sbb r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
sbb r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
sbb r10, QWORD PTR [r8+376]
mov QWORD PTR [rcx+376], r10
sbb rax, rax
ret
sp_3072_sub_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_avx2_48 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+184], r12
; A[24] * B
mulx r10, r9, QWORD PTR [rax+192]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+192], r11
; A[25] * B
mulx r10, r9, QWORD PTR [rax+200]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+200], r12
; A[26] * B
mulx r10, r9, QWORD PTR [rax+208]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+208], r11
; A[27] * B
mulx r10, r9, QWORD PTR [rax+216]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+216], r12
; A[28] * B
mulx r10, r9, QWORD PTR [rax+224]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+224], r11
; A[29] * B
mulx r10, r9, QWORD PTR [rax+232]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+232], r12
; A[30] * B
mulx r10, r9, QWORD PTR [rax+240]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+240], r11
; A[31] * B
mulx r10, r9, QWORD PTR [rax+248]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+248], r12
; A[32] * B
mulx r10, r9, QWORD PTR [rax+256]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+256], r11
; A[33] * B
mulx r10, r9, QWORD PTR [rax+264]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+264], r12
; A[34] * B
mulx r10, r9, QWORD PTR [rax+272]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+272], r11
; A[35] * B
mulx r10, r9, QWORD PTR [rax+280]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+280], r12
; A[36] * B
mulx r10, r9, QWORD PTR [rax+288]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+288], r11
; A[37] * B
mulx r10, r9, QWORD PTR [rax+296]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+296], r12
; A[38] * B
mulx r10, r9, QWORD PTR [rax+304]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+304], r11
; A[39] * B
mulx r10, r9, QWORD PTR [rax+312]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+312], r12
; A[40] * B
mulx r10, r9, QWORD PTR [rax+320]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+320], r11
; A[41] * B
mulx r10, r9, QWORD PTR [rax+328]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+328], r12
; A[42] * B
mulx r10, r9, QWORD PTR [rax+336]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+336], r11
; A[43] * B
mulx r10, r9, QWORD PTR [rax+344]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+344], r12
; A[44] * B
mulx r10, r9, QWORD PTR [rax+352]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+352], r11
; A[45] * B
mulx r10, r9, QWORD PTR [rax+360]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+360], r12
; A[46] * B
mulx r10, r9, QWORD PTR [rax+368]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+368], r11
; A[47] * B
mulx r10, r9, QWORD PTR [rax+376]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+376], r12
mov QWORD PTR [rcx+384], r11
pop r13
pop r12
ret
sp_3072_mul_d_avx2_48 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_3072_word_asm_48 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_3072_word_asm_48 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_avx2_48 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
sbb r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
sbb r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
sbb r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
sbb r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
sbb r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
sbb r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
sbb r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
sbb r11, r12
mov r10, QWORD PTR [r8+256]
mov r12, QWORD PTR [rdx+256]
pext r10, r10, r9
mov QWORD PTR [rcx+248], r11
sbb r12, r10
mov r11, QWORD PTR [r8+264]
mov r10, QWORD PTR [rdx+264]
pext r11, r11, r9
mov QWORD PTR [rcx+256], r12
sbb r10, r11
mov r12, QWORD PTR [r8+272]
mov r11, QWORD PTR [rdx+272]
pext r12, r12, r9
mov QWORD PTR [rcx+264], r10
sbb r11, r12
mov r10, QWORD PTR [r8+280]
mov r12, QWORD PTR [rdx+280]
pext r10, r10, r9
mov QWORD PTR [rcx+272], r11
sbb r12, r10
mov r11, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+288]
pext r11, r11, r9
mov QWORD PTR [rcx+280], r12
sbb r10, r11
mov r12, QWORD PTR [r8+296]
mov r11, QWORD PTR [rdx+296]
pext r12, r12, r9
mov QWORD PTR [rcx+288], r10
sbb r11, r12
mov r10, QWORD PTR [r8+304]
mov r12, QWORD PTR [rdx+304]
pext r10, r10, r9
mov QWORD PTR [rcx+296], r11
sbb r12, r10
mov r11, QWORD PTR [r8+312]
mov r10, QWORD PTR [rdx+312]
pext r11, r11, r9
mov QWORD PTR [rcx+304], r12
sbb r10, r11
mov r12, QWORD PTR [r8+320]
mov r11, QWORD PTR [rdx+320]
pext r12, r12, r9
mov QWORD PTR [rcx+312], r10
sbb r11, r12
mov r10, QWORD PTR [r8+328]
mov r12, QWORD PTR [rdx+328]
pext r10, r10, r9
mov QWORD PTR [rcx+320], r11
sbb r12, r10
mov r11, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+336]
pext r11, r11, r9
mov QWORD PTR [rcx+328], r12
sbb r10, r11
mov r12, QWORD PTR [r8+344]
mov r11, QWORD PTR [rdx+344]
pext r12, r12, r9
mov QWORD PTR [rcx+336], r10
sbb r11, r12
mov r10, QWORD PTR [r8+352]
mov r12, QWORD PTR [rdx+352]
pext r10, r10, r9
mov QWORD PTR [rcx+344], r11
sbb r12, r10
mov r11, QWORD PTR [r8+360]
mov r10, QWORD PTR [rdx+360]
pext r11, r11, r9
mov QWORD PTR [rcx+352], r12
sbb r10, r11
mov r12, QWORD PTR [r8+368]
mov r11, QWORD PTR [rdx+368]
pext r12, r12, r9
mov QWORD PTR [rcx+360], r10
sbb r11, r12
mov r10, QWORD PTR [r8+376]
mov r12, QWORD PTR [rdx+376]
pext r10, r10, r9
mov QWORD PTR [rcx+368], r11
sbb r12, r10
mov QWORD PTR [rcx+376], r12
sbb rax, rax
pop r12
ret
sp_3072_cond_sub_avx2_48 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_3072_cmp_48 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+376]
mov r12, QWORD PTR [rdx+376]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+368]
mov r12, QWORD PTR [rdx+368]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+360]
mov r12, QWORD PTR [rdx+360]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+352]
mov r12, QWORD PTR [rdx+352]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+344]
mov r12, QWORD PTR [rdx+344]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+336]
mov r12, QWORD PTR [rdx+336]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+328]
mov r12, QWORD PTR [rdx+328]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+320]
mov r12, QWORD PTR [rdx+320]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+312]
mov r12, QWORD PTR [rdx+312]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+304]
mov r12, QWORD PTR [rdx+304]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+296]
mov r12, QWORD PTR [rdx+296]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+288]
mov r12, QWORD PTR [rdx+288]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+280]
mov r12, QWORD PTR [rdx+280]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+272]
mov r12, QWORD PTR [rdx+272]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+264]
mov r12, QWORD PTR [rdx+264]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+256]
mov r12, QWORD PTR [rdx+256]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+248]
mov r12, QWORD PTR [rdx+248]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+240]
mov r12, QWORD PTR [rdx+240]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+232]
mov r12, QWORD PTR [rdx+232]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+224]
mov r12, QWORD PTR [rdx+224]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+216]
mov r12, QWORD PTR [rdx+216]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+208]
mov r12, QWORD PTR [rdx+208]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+200]
mov r12, QWORD PTR [rdx+200]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+192]
mov r12, QWORD PTR [rdx+192]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_3072_cmp_48 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_3072_get_from_table_48 PROC
sub rsp, 128
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
pxor xmm13, xmm13
pshufd xmm11, xmm11, 0
pshufd xmm10, xmm10, 0
; START: 0-7
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 0-7
; START: 8-15
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 8-15
; START: 16-23
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 16-23
; START: 24-31
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 24-31
; START: 32-39
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 32-39
; START: 40-47
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
; END: 40-47
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_3072_get_from_table_48 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_avx2_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 48
mov r11, 48
mov r14, QWORD PTR [r9]
mov r15, QWORD PTR [r9+8]
mov rdi, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r9+24]
add r9, 192
xor rbp, rbp
L_3072_mont_reduce_avx2_48_loop:
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-160]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-152]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-152], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-144], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-136], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-128], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-120], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-112], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+-96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-104], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+-88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-96], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+-80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-88], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+-72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-80], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+-64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-72], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+-56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-64], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+24] += m[24] * mu
mulx rcx, rax, QWORD PTR [r10+192]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+25] += m[25] * mu
mulx rcx, rax, QWORD PTR [r10+200]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+26] += m[26] * mu
mulx rcx, rax, QWORD PTR [r10+208]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+27] += m[27] * mu
mulx rcx, rax, QWORD PTR [r10+216]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+28] += m[28] * mu
mulx rcx, rax, QWORD PTR [r10+224]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+29] += m[29] * mu
mulx rcx, rax, QWORD PTR [r10+232]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+30] += m[30] * mu
mulx rcx, rax, QWORD PTR [r10+240]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+31] += m[31] * mu
mulx rcx, rax, QWORD PTR [r10+248]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+32] += m[32] * mu
mulx rcx, rax, QWORD PTR [r10+256]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+33] += m[33] * mu
mulx rcx, rax, QWORD PTR [r10+264]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+34] += m[34] * mu
mulx rcx, rax, QWORD PTR [r10+272]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+35] += m[35] * mu
mulx rcx, rax, QWORD PTR [r10+280]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
; a[i+36] += m[36] * mu
mulx rcx, rax, QWORD PTR [r10+288]
mov r13, QWORD PTR [r9+104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+96], r12
; a[i+37] += m[37] * mu
mulx rcx, rax, QWORD PTR [r10+296]
mov r12, QWORD PTR [r9+112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+104], r13
; a[i+38] += m[38] * mu
mulx rcx, rax, QWORD PTR [r10+304]
mov r13, QWORD PTR [r9+120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+112], r12
; a[i+39] += m[39] * mu
mulx rcx, rax, QWORD PTR [r10+312]
mov r12, QWORD PTR [r9+128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+120], r13
; a[i+40] += m[40] * mu
mulx rcx, rax, QWORD PTR [r10+320]
mov r13, QWORD PTR [r9+136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+128], r12
; a[i+41] += m[41] * mu
mulx rcx, rax, QWORD PTR [r10+328]
mov r12, QWORD PTR [r9+144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+136], r13
; a[i+42] += m[42] * mu
mulx rcx, rax, QWORD PTR [r10+336]
mov r13, QWORD PTR [r9+152]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+144], r12
; a[i+43] += m[43] * mu
mulx rcx, rax, QWORD PTR [r10+344]
mov r12, QWORD PTR [r9+160]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+152], r13
; a[i+44] += m[44] * mu
mulx rcx, rax, QWORD PTR [r10+352]
mov r13, QWORD PTR [r9+168]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+160], r12
; a[i+45] += m[45] * mu
mulx rcx, rax, QWORD PTR [r10+360]
mov r12, QWORD PTR [r9+176]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+168], r13
; a[i+46] += m[46] * mu
mulx rcx, rax, QWORD PTR [r10+368]
mov r13, QWORD PTR [r9+184]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+176], r12
; a[i+47] += m[47] * mu
mulx rcx, rax, QWORD PTR [r10+376]
mov r12, QWORD PTR [r9+192]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+184], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+192], r12
adox rbp, rbx
adcx rbp, rbx
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_3072_mont_reduce_avx2_48_loop
sub r9, 192
neg rbp
mov r8, r9
sub r9, 384
mov rcx, QWORD PTR [r10]
mov rdx, r14
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, r15
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rdi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rsi
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+192]
mov rdx, QWORD PTR [r8+192]
pext rax, rax, rbp
mov QWORD PTR [r9+184], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+200]
mov rax, QWORD PTR [r8+200]
pext rcx, rcx, rbp
mov QWORD PTR [r9+192], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+208]
mov rcx, QWORD PTR [r8+208]
pext rdx, rdx, rbp
mov QWORD PTR [r9+200], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+216]
mov rdx, QWORD PTR [r8+216]
pext rax, rax, rbp
mov QWORD PTR [r9+208], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+224]
mov rax, QWORD PTR [r8+224]
pext rcx, rcx, rbp
mov QWORD PTR [r9+216], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+232]
mov rcx, QWORD PTR [r8+232]
pext rdx, rdx, rbp
mov QWORD PTR [r9+224], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+240]
mov rdx, QWORD PTR [r8+240]
pext rax, rax, rbp
mov QWORD PTR [r9+232], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+248]
mov rax, QWORD PTR [r8+248]
pext rcx, rcx, rbp
mov QWORD PTR [r9+240], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+256]
mov rcx, QWORD PTR [r8+256]
pext rdx, rdx, rbp
mov QWORD PTR [r9+248], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+264]
mov rdx, QWORD PTR [r8+264]
pext rax, rax, rbp
mov QWORD PTR [r9+256], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+272]
mov rax, QWORD PTR [r8+272]
pext rcx, rcx, rbp
mov QWORD PTR [r9+264], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+280]
mov rcx, QWORD PTR [r8+280]
pext rdx, rdx, rbp
mov QWORD PTR [r9+272], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+288]
mov rdx, QWORD PTR [r8+288]
pext rax, rax, rbp
mov QWORD PTR [r9+280], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+296]
mov rax, QWORD PTR [r8+296]
pext rcx, rcx, rbp
mov QWORD PTR [r9+288], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+304]
mov rcx, QWORD PTR [r8+304]
pext rdx, rdx, rbp
mov QWORD PTR [r9+296], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+312]
mov rdx, QWORD PTR [r8+312]
pext rax, rax, rbp
mov QWORD PTR [r9+304], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+320]
mov rax, QWORD PTR [r8+320]
pext rcx, rcx, rbp
mov QWORD PTR [r9+312], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+328]
mov rcx, QWORD PTR [r8+328]
pext rdx, rdx, rbp
mov QWORD PTR [r9+320], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+336]
mov rdx, QWORD PTR [r8+336]
pext rax, rax, rbp
mov QWORD PTR [r9+328], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+344]
mov rax, QWORD PTR [r8+344]
pext rcx, rcx, rbp
mov QWORD PTR [r9+336], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+352]
mov rcx, QWORD PTR [r8+352]
pext rdx, rdx, rbp
mov QWORD PTR [r9+344], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+360]
mov rdx, QWORD PTR [r8+360]
pext rax, rax, rbp
mov QWORD PTR [r9+352], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+368]
mov rax, QWORD PTR [r8+368]
pext rcx, rcx, rbp
mov QWORD PTR [r9+360], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+376]
mov rcx, QWORD PTR [r8+376]
pext rdx, rdx, rbp
mov QWORD PTR [r9+368], rax
sbb rcx, rdx
mov QWORD PTR [r9+376], rcx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_avx2_48 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_3072_get_from_table_avx2_48 PROC
sub rsp, 128
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
vpxor ymm13, ymm13, ymm13
vpermd ymm10, ymm13, ymm10
vpermd ymm11, ymm13, ymm11
; START: 0-15
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
add rcx, 128
; END: 0-15
; START: 16-31
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
add rcx, 128
; END: 16-31
; START: 32-47
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
; END: 32-47
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_3072_get_from_table_avx2_48 ENDP
_text ENDS
ENDIF
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_add_24 PROC
sub rsp, 192
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
add r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
adc r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
adc r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
adc r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
adc r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
adc r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
adc r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
adc r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
adc r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
adc r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
adc r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
adc r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
adc r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
adc r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
adc r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
adc r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
adc r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
adc r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
adc r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
adc r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
adc r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
adc r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
adc r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
adc r11, r8
mov QWORD PTR [rcx+176], r10
mov QWORD PTR [rcx+184], r11
adc rax, 0
add rsp, 192
ret
sp_3072_cond_add_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_add_avx2_24 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
add r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
adc r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
adc r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
adc r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
adc r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
adc r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
adc r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
adc r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
adc r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
adc r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
adc r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
adc r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
adc r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
adc r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
adc r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
adc r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
adc r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
adc r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
adc r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
adc r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
adc r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
adc r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
adc r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
adc r12, r10
mov QWORD PTR [rcx+184], r12
adc rax, 0
pop r12
ret
sp_3072_cond_add_avx2_24 ENDP
_text ENDS
ENDIF
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_3072_lshift_48 PROC
push r12
push r13
mov rax, rcx
mov cl, r8b
mov r12, 0
mov r13, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rdx+352]
mov r9, QWORD PTR [rdx+360]
mov r10, QWORD PTR [rdx+368]
mov r11, QWORD PTR [rdx+376]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+352], r8
mov QWORD PTR [rax+360], r9
mov QWORD PTR [rax+368], r10
mov QWORD PTR [rax+376], r11
mov QWORD PTR [rax+384], r12
mov r11, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rdx+320]
mov r9, QWORD PTR [rdx+328]
mov r10, QWORD PTR [rdx+336]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+320], r8
mov QWORD PTR [rax+328], r9
mov QWORD PTR [rax+336], r10
mov QWORD PTR [rax+344], r13
mov r13, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rdx+288]
mov r9, QWORD PTR [rdx+296]
mov r10, QWORD PTR [rdx+304]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+288], r8
mov QWORD PTR [rax+296], r9
mov QWORD PTR [rax+304], r10
mov QWORD PTR [rax+312], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rdx+256]
mov r9, QWORD PTR [rdx+264]
mov r10, QWORD PTR [rdx+272]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+256], r8
mov QWORD PTR [rax+264], r9
mov QWORD PTR [rax+272], r10
mov QWORD PTR [rax+280], r13
mov r13, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rdx+232]
mov r10, QWORD PTR [rdx+240]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+224], r8
mov QWORD PTR [rax+232], r9
mov QWORD PTR [rax+240], r10
mov QWORD PTR [rax+248], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rdx+200]
mov r10, QWORD PTR [rdx+208]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+192], r8
mov QWORD PTR [rax+200], r9
mov QWORD PTR [rax+208], r10
mov QWORD PTR [rax+216], r13
mov r13, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rdx+168]
mov r10, QWORD PTR [rdx+176]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+160], r8
mov QWORD PTR [rax+168], r9
mov QWORD PTR [rax+176], r10
mov QWORD PTR [rax+184], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rdx+136]
mov r10, QWORD PTR [rdx+144]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+128], r8
mov QWORD PTR [rax+136], r9
mov QWORD PTR [rax+144], r10
mov QWORD PTR [rax+152], r13
mov r13, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+96], r8
mov QWORD PTR [rax+104], r9
mov QWORD PTR [rax+112], r10
mov QWORD PTR [rax+120], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+64], r8
mov QWORD PTR [rax+72], r9
mov QWORD PTR [rax+80], r10
mov QWORD PTR [rax+88], r13
mov r13, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+32], r8
mov QWORD PTR [rax+40], r9
mov QWORD PTR [rax+48], r10
mov QWORD PTR [rax+56], r11
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shl r8, cl
mov QWORD PTR [rax], r8
mov QWORD PTR [rax+8], r9
mov QWORD PTR [rax+16], r10
mov QWORD PTR [rax+24], r13
pop r13
pop r12
ret
sp_3072_lshift_48 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF WOLFSSL_SP_4096
IFDEF WOLFSSL_SP_4096
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_4096_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 512
xor r13, r13
jmp L_4096_from_bin_bswap_64_end
L_4096_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_4096_from_bin_bswap_64_end:
cmp r9, 63
jg L_4096_from_bin_bswap_64_start
jmp L_4096_from_bin_bswap_8_end
L_4096_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_4096_from_bin_bswap_8_end:
cmp r9, 7
jg L_4096_from_bin_bswap_8_start
cmp r9, r13
je L_4096_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_4096_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_4096_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_4096_from_bin_bswap_hi_end:
cmp rcx, r12
jge L_4096_from_bin_bswap_zero_end
L_4096_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_4096_from_bin_bswap_zero_start
L_4096_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_4096_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_4096_from_bin_movbe PROC
push r12
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 512
jmp L_4096_from_bin_movbe_64_end
L_4096_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_4096_from_bin_movbe_64_end:
cmp r9, 63
jg L_4096_from_bin_movbe_64_start
jmp L_4096_from_bin_movbe_8_end
L_4096_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_4096_from_bin_movbe_8_end:
cmp r9, 7
jg L_4096_from_bin_movbe_8_start
cmp r9, 0
je L_4096_from_bin_movbe_hi_end
mov r10, 0
mov rax, 0
L_4096_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_4096_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_4096_from_bin_movbe_hi_end:
cmp rcx, r12
jge L_4096_from_bin_movbe_zero_end
L_4096_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], 0
add rcx, 8
cmp rcx, r12
jl L_4096_from_bin_movbe_zero_start
L_4096_from_bin_movbe_zero_end:
pop r12
ret
sp_4096_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 512
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_4096_to_bin_bswap_64 PROC
mov rax, QWORD PTR [rcx+504]
mov r8, QWORD PTR [rcx+496]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+488]
mov r8, QWORD PTR [rcx+480]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+472]
mov r8, QWORD PTR [rcx+464]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
mov rax, QWORD PTR [rcx+456]
mov r8, QWORD PTR [rcx+448]
bswap rax
bswap r8
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
mov rax, QWORD PTR [rcx+440]
mov r8, QWORD PTR [rcx+432]
bswap rax
bswap r8
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
mov rax, QWORD PTR [rcx+424]
mov r8, QWORD PTR [rcx+416]
bswap rax
bswap r8
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
mov rax, QWORD PTR [rcx+408]
mov r8, QWORD PTR [rcx+400]
bswap rax
bswap r8
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
mov rax, QWORD PTR [rcx+392]
mov r8, QWORD PTR [rcx+384]
bswap rax
bswap r8
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
mov rax, QWORD PTR [rcx+376]
mov r8, QWORD PTR [rcx+368]
bswap rax
bswap r8
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
mov rax, QWORD PTR [rcx+360]
mov r8, QWORD PTR [rcx+352]
bswap rax
bswap r8
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
mov rax, QWORD PTR [rcx+344]
mov r8, QWORD PTR [rcx+336]
bswap rax
bswap r8
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
mov rax, QWORD PTR [rcx+328]
mov r8, QWORD PTR [rcx+320]
bswap rax
bswap r8
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
mov rax, QWORD PTR [rcx+312]
mov r8, QWORD PTR [rcx+304]
bswap rax
bswap r8
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
mov rax, QWORD PTR [rcx+296]
mov r8, QWORD PTR [rcx+288]
bswap rax
bswap r8
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
mov rax, QWORD PTR [rcx+280]
mov r8, QWORD PTR [rcx+272]
bswap rax
bswap r8
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
mov rax, QWORD PTR [rcx+264]
mov r8, QWORD PTR [rcx+256]
bswap rax
bswap r8
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
mov rax, QWORD PTR [rcx+248]
mov r8, QWORD PTR [rcx+240]
bswap rax
bswap r8
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
mov rax, QWORD PTR [rcx+232]
mov r8, QWORD PTR [rcx+224]
bswap rax
bswap r8
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
mov rax, QWORD PTR [rcx+216]
mov r8, QWORD PTR [rcx+208]
bswap rax
bswap r8
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
mov rax, QWORD PTR [rcx+200]
mov r8, QWORD PTR [rcx+192]
bswap rax
bswap r8
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
mov rax, QWORD PTR [rcx+184]
mov r8, QWORD PTR [rcx+176]
bswap rax
bswap r8
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
mov rax, QWORD PTR [rcx+168]
mov r8, QWORD PTR [rcx+160]
bswap rax
bswap r8
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
mov rax, QWORD PTR [rcx+152]
mov r8, QWORD PTR [rcx+144]
bswap rax
bswap r8
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
mov rax, QWORD PTR [rcx+136]
mov r8, QWORD PTR [rcx+128]
bswap rax
bswap r8
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
mov rax, QWORD PTR [rcx+120]
mov r8, QWORD PTR [rcx+112]
bswap rax
bswap r8
mov QWORD PTR [rdx+384], rax
mov QWORD PTR [rdx+392], r8
mov rax, QWORD PTR [rcx+104]
mov r8, QWORD PTR [rcx+96]
bswap rax
bswap r8
mov QWORD PTR [rdx+400], rax
mov QWORD PTR [rdx+408], r8
mov rax, QWORD PTR [rcx+88]
mov r8, QWORD PTR [rcx+80]
bswap rax
bswap r8
mov QWORD PTR [rdx+416], rax
mov QWORD PTR [rdx+424], r8
mov rax, QWORD PTR [rcx+72]
mov r8, QWORD PTR [rcx+64]
bswap rax
bswap r8
mov QWORD PTR [rdx+432], rax
mov QWORD PTR [rdx+440], r8
mov rax, QWORD PTR [rcx+56]
mov r8, QWORD PTR [rcx+48]
bswap rax
bswap r8
mov QWORD PTR [rdx+448], rax
mov QWORD PTR [rdx+456], r8
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx+464], rax
mov QWORD PTR [rdx+472], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+480], rax
mov QWORD PTR [rdx+488], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+496], rax
mov QWORD PTR [rdx+504], r8
ret
sp_4096_to_bin_bswap_64 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 512
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_4096_to_bin_movbe_64 PROC
movbe rax, QWORD PTR [rcx+504]
movbe r8, QWORD PTR [rcx+496]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+488]
movbe r8, QWORD PTR [rcx+480]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+472]
movbe r8, QWORD PTR [rcx+464]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
movbe rax, QWORD PTR [rcx+456]
movbe r8, QWORD PTR [rcx+448]
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
movbe rax, QWORD PTR [rcx+440]
movbe r8, QWORD PTR [rcx+432]
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
movbe rax, QWORD PTR [rcx+424]
movbe r8, QWORD PTR [rcx+416]
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
movbe rax, QWORD PTR [rcx+408]
movbe r8, QWORD PTR [rcx+400]
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
movbe rax, QWORD PTR [rcx+392]
movbe r8, QWORD PTR [rcx+384]
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
movbe rax, QWORD PTR [rcx+376]
movbe r8, QWORD PTR [rcx+368]
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
movbe rax, QWORD PTR [rcx+360]
movbe r8, QWORD PTR [rcx+352]
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
movbe rax, QWORD PTR [rcx+344]
movbe r8, QWORD PTR [rcx+336]
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
movbe rax, QWORD PTR [rcx+328]
movbe r8, QWORD PTR [rcx+320]
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
movbe rax, QWORD PTR [rcx+312]
movbe r8, QWORD PTR [rcx+304]
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
movbe rax, QWORD PTR [rcx+296]
movbe r8, QWORD PTR [rcx+288]
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
movbe rax, QWORD PTR [rcx+280]
movbe r8, QWORD PTR [rcx+272]
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
movbe rax, QWORD PTR [rcx+264]
movbe r8, QWORD PTR [rcx+256]
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
movbe rax, QWORD PTR [rcx+248]
movbe r8, QWORD PTR [rcx+240]
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
movbe rax, QWORD PTR [rcx+232]
movbe r8, QWORD PTR [rcx+224]
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
movbe rax, QWORD PTR [rcx+216]
movbe r8, QWORD PTR [rcx+208]
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
movbe rax, QWORD PTR [rcx+200]
movbe r8, QWORD PTR [rcx+192]
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
movbe rax, QWORD PTR [rcx+184]
movbe r8, QWORD PTR [rcx+176]
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
movbe rax, QWORD PTR [rcx+168]
movbe r8, QWORD PTR [rcx+160]
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
movbe rax, QWORD PTR [rcx+152]
movbe r8, QWORD PTR [rcx+144]
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
movbe rax, QWORD PTR [rcx+136]
movbe r8, QWORD PTR [rcx+128]
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
movbe rax, QWORD PTR [rcx+120]
movbe r8, QWORD PTR [rcx+112]
mov QWORD PTR [rdx+384], rax
mov QWORD PTR [rdx+392], r8
movbe rax, QWORD PTR [rcx+104]
movbe r8, QWORD PTR [rcx+96]
mov QWORD PTR [rdx+400], rax
mov QWORD PTR [rdx+408], r8
movbe rax, QWORD PTR [rcx+88]
movbe r8, QWORD PTR [rcx+80]
mov QWORD PTR [rdx+416], rax
mov QWORD PTR [rdx+424], r8
movbe rax, QWORD PTR [rcx+72]
movbe r8, QWORD PTR [rcx+64]
mov QWORD PTR [rdx+432], rax
mov QWORD PTR [rdx+440], r8
movbe rax, QWORD PTR [rcx+56]
movbe r8, QWORD PTR [rcx+48]
mov QWORD PTR [rdx+448], rax
mov QWORD PTR [rdx+456], r8
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx+464], rax
mov QWORD PTR [rdx+472], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+480], rax
mov QWORD PTR [rdx+488], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+496], rax
mov QWORD PTR [rdx+504], r8
ret
sp_4096_to_bin_movbe_64 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sub_in_place_64 PROC
mov r8, QWORD PTR [rcx]
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r9
sbb r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb r9, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r9
sbb r8, QWORD PTR [rdx+208]
mov r9, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb r9, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r9
sbb r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb r9, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r9
sbb r8, QWORD PTR [rdx+240]
mov r9, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb r9, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rcx+256]
mov QWORD PTR [rcx+248], r9
sbb r8, QWORD PTR [rdx+256]
mov r9, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], r8
sbb r9, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r9
sbb r8, QWORD PTR [rdx+272]
mov r9, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], r8
sbb r9, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r9
sbb r8, QWORD PTR [rdx+288]
mov r9, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], r8
sbb r9, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r9
sbb r8, QWORD PTR [rdx+304]
mov r9, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], r8
sbb r9, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r9
sbb r8, QWORD PTR [rdx+320]
mov r9, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], r8
sbb r9, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r9
sbb r8, QWORD PTR [rdx+336]
mov r9, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], r8
sbb r9, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r9
sbb r8, QWORD PTR [rdx+352]
mov r9, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], r8
sbb r9, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r9
sbb r8, QWORD PTR [rdx+368]
mov r9, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], r8
sbb r9, QWORD PTR [rdx+376]
mov r8, QWORD PTR [rcx+384]
mov QWORD PTR [rcx+376], r9
sbb r8, QWORD PTR [rdx+384]
mov r9, QWORD PTR [rcx+392]
mov QWORD PTR [rcx+384], r8
sbb r9, QWORD PTR [rdx+392]
mov r8, QWORD PTR [rcx+400]
mov QWORD PTR [rcx+392], r9
sbb r8, QWORD PTR [rdx+400]
mov r9, QWORD PTR [rcx+408]
mov QWORD PTR [rcx+400], r8
sbb r9, QWORD PTR [rdx+408]
mov r8, QWORD PTR [rcx+416]
mov QWORD PTR [rcx+408], r9
sbb r8, QWORD PTR [rdx+416]
mov r9, QWORD PTR [rcx+424]
mov QWORD PTR [rcx+416], r8
sbb r9, QWORD PTR [rdx+424]
mov r8, QWORD PTR [rcx+432]
mov QWORD PTR [rcx+424], r9
sbb r8, QWORD PTR [rdx+432]
mov r9, QWORD PTR [rcx+440]
mov QWORD PTR [rcx+432], r8
sbb r9, QWORD PTR [rdx+440]
mov r8, QWORD PTR [rcx+448]
mov QWORD PTR [rcx+440], r9
sbb r8, QWORD PTR [rdx+448]
mov r9, QWORD PTR [rcx+456]
mov QWORD PTR [rcx+448], r8
sbb r9, QWORD PTR [rdx+456]
mov r8, QWORD PTR [rcx+464]
mov QWORD PTR [rcx+456], r9
sbb r8, QWORD PTR [rdx+464]
mov r9, QWORD PTR [rcx+472]
mov QWORD PTR [rcx+464], r8
sbb r9, QWORD PTR [rdx+472]
mov r8, QWORD PTR [rcx+480]
mov QWORD PTR [rcx+472], r9
sbb r8, QWORD PTR [rdx+480]
mov r9, QWORD PTR [rcx+488]
mov QWORD PTR [rcx+480], r8
sbb r9, QWORD PTR [rdx+488]
mov r8, QWORD PTR [rcx+496]
mov QWORD PTR [rcx+488], r9
sbb r8, QWORD PTR [rdx+496]
mov r9, QWORD PTR [rcx+504]
mov QWORD PTR [rcx+496], r8
sbb r9, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+504], r9
sbb rax, rax
ret
sp_4096_sub_in_place_64 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_add_64 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
adc r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
adc r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
adc r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
adc r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
adc r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
adc r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
adc r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
adc r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
adc r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
adc r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
adc r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
adc r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
adc r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
adc r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
adc r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
adc r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
adc r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
adc r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
adc r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
adc r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
adc r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
adc r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
adc r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
adc r10, QWORD PTR [r8+376]
mov r9, QWORD PTR [rdx+384]
mov QWORD PTR [rcx+376], r10
adc r9, QWORD PTR [r8+384]
mov r10, QWORD PTR [rdx+392]
mov QWORD PTR [rcx+384], r9
adc r10, QWORD PTR [r8+392]
mov r9, QWORD PTR [rdx+400]
mov QWORD PTR [rcx+392], r10
adc r9, QWORD PTR [r8+400]
mov r10, QWORD PTR [rdx+408]
mov QWORD PTR [rcx+400], r9
adc r10, QWORD PTR [r8+408]
mov r9, QWORD PTR [rdx+416]
mov QWORD PTR [rcx+408], r10
adc r9, QWORD PTR [r8+416]
mov r10, QWORD PTR [rdx+424]
mov QWORD PTR [rcx+416], r9
adc r10, QWORD PTR [r8+424]
mov r9, QWORD PTR [rdx+432]
mov QWORD PTR [rcx+424], r10
adc r9, QWORD PTR [r8+432]
mov r10, QWORD PTR [rdx+440]
mov QWORD PTR [rcx+432], r9
adc r10, QWORD PTR [r8+440]
mov r9, QWORD PTR [rdx+448]
mov QWORD PTR [rcx+440], r10
adc r9, QWORD PTR [r8+448]
mov r10, QWORD PTR [rdx+456]
mov QWORD PTR [rcx+448], r9
adc r10, QWORD PTR [r8+456]
mov r9, QWORD PTR [rdx+464]
mov QWORD PTR [rcx+456], r10
adc r9, QWORD PTR [r8+464]
mov r10, QWORD PTR [rdx+472]
mov QWORD PTR [rcx+464], r9
adc r10, QWORD PTR [r8+472]
mov r9, QWORD PTR [rdx+480]
mov QWORD PTR [rcx+472], r10
adc r9, QWORD PTR [r8+480]
mov r10, QWORD PTR [rdx+488]
mov QWORD PTR [rcx+480], r9
adc r10, QWORD PTR [r8+488]
mov r9, QWORD PTR [rdx+496]
mov QWORD PTR [rcx+488], r10
adc r9, QWORD PTR [r8+496]
mov r10, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+496], r9
adc r10, QWORD PTR [r8+504]
mov QWORD PTR [rcx+504], r10
adc rax, 0
ret
sp_4096_add_64 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1576
mov QWORD PTR [rsp+1536], rcx
mov QWORD PTR [rsp+1544], rdx
mov QWORD PTR [rsp+1552], r8
lea r12, QWORD PTR [rsp+1024]
lea r14, QWORD PTR [rdx+256]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r12+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [rdx+200]
mov QWORD PTR [r12+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [rdx+208]
mov QWORD PTR [r12+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [r12+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [r12+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [r12+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r12+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [rdx+248]
mov QWORD PTR [r12+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r12+248], r9
adc r15, 0
mov QWORD PTR [rsp+1560], r15
lea r13, QWORD PTR [rsp+1280]
lea r14, QWORD PTR [r8+256]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [r8+192]
mov QWORD PTR [r13+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [r8+200]
mov QWORD PTR [r13+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [r8+208]
mov QWORD PTR [r13+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [r8+216]
mov QWORD PTR [r13+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [r8+224]
mov QWORD PTR [r13+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [r8+232]
mov QWORD PTR [r13+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [r8+240]
mov QWORD PTR [r13+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [r8+248]
mov QWORD PTR [r13+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r13+248], r9
adc rdi, 0
mov QWORD PTR [rsp+1568], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
lea rcx, QWORD PTR [rsp+512]
add r8, 256
add rdx, 256
call sp_2048_mul_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
call sp_2048_mul_32
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
ENDIF
mov r15, QWORD PTR [rsp+1560]
mov rdi, QWORD PTR [rsp+1568]
mov rsi, QWORD PTR [rsp+1536]
mov r11, r15
lea r12, QWORD PTR [rsp+1024]
lea r13, QWORD PTR [rsp+1280]
and r11, rdi
neg r15
neg rdi
add rsi, 512
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+96], rax
mov QWORD PTR [r13+96], r9
mov rax, QWORD PTR [r12+104]
mov r9, QWORD PTR [r13+104]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+104], rax
mov QWORD PTR [r13+104], r9
mov rax, QWORD PTR [r12+112]
mov r9, QWORD PTR [r13+112]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+112], rax
mov QWORD PTR [r13+112], r9
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+120], rax
mov QWORD PTR [r13+120], r9
mov rax, QWORD PTR [r12+128]
mov r9, QWORD PTR [r13+128]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+128], rax
mov QWORD PTR [r13+128], r9
mov rax, QWORD PTR [r12+136]
mov r9, QWORD PTR [r13+136]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+136], rax
mov QWORD PTR [r13+136], r9
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+144], rax
mov QWORD PTR [r13+144], r9
mov rax, QWORD PTR [r12+152]
mov r9, QWORD PTR [r13+152]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+152], rax
mov QWORD PTR [r13+152], r9
mov rax, QWORD PTR [r12+160]
mov r9, QWORD PTR [r13+160]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+160], rax
mov QWORD PTR [r13+160], r9
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+168], rax
mov QWORD PTR [r13+168], r9
mov rax, QWORD PTR [r12+176]
mov r9, QWORD PTR [r13+176]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+176], rax
mov QWORD PTR [r13+176], r9
mov rax, QWORD PTR [r12+184]
mov r9, QWORD PTR [r13+184]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+184], rax
mov QWORD PTR [r13+184], r9
mov rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [r13+192]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+192], rax
mov QWORD PTR [r13+192], r9
mov rax, QWORD PTR [r12+200]
mov r9, QWORD PTR [r13+200]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+200], rax
mov QWORD PTR [r13+200], r9
mov rax, QWORD PTR [r12+208]
mov r9, QWORD PTR [r13+208]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+208], rax
mov QWORD PTR [r13+208], r9
mov rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [r13+216]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+216], rax
mov QWORD PTR [r13+216], r9
mov rax, QWORD PTR [r12+224]
mov r9, QWORD PTR [r13+224]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+224], rax
mov QWORD PTR [r13+224], r9
mov rax, QWORD PTR [r12+232]
mov r9, QWORD PTR [r13+232]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+232], rax
mov QWORD PTR [r13+232], r9
mov rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [r13+240]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+240], rax
mov QWORD PTR [r13+240], r9
mov rax, QWORD PTR [r12+248]
mov r9, QWORD PTR [r13+248]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+248], rax
mov QWORD PTR [r13+248], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r13+248]
mov QWORD PTR [rsi+248], r9
adc r11, 0
lea r13, QWORD PTR [rsp+512]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [r13+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [r13+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [r13+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [r13+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [r13+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [r13+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [r13+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [r13+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [r13+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [r13+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [r13+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [r13+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [r13+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [r13+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [r13+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [r13+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [rcx+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [rcx+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [rcx+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [rcx+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [rcx+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [rcx+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [rcx+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [rcx+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [rcx+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [rcx+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [rcx+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [rcx+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [rcx+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [rcx+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [rcx+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [rcx+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
sub rsi, 256
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov rax, QWORD PTR [rsi+384]
mov QWORD PTR [rsi+376], r10
adc rax, QWORD PTR [r12+384]
mov r9, QWORD PTR [rsi+392]
mov QWORD PTR [rsi+384], rax
adc r9, QWORD PTR [r12+392]
mov r10, QWORD PTR [rsi+400]
mov QWORD PTR [rsi+392], r9
adc r10, QWORD PTR [r12+400]
mov rax, QWORD PTR [rsi+408]
mov QWORD PTR [rsi+400], r10
adc rax, QWORD PTR [r12+408]
mov r9, QWORD PTR [rsi+416]
mov QWORD PTR [rsi+408], rax
adc r9, QWORD PTR [r12+416]
mov r10, QWORD PTR [rsi+424]
mov QWORD PTR [rsi+416], r9
adc r10, QWORD PTR [r12+424]
mov rax, QWORD PTR [rsi+432]
mov QWORD PTR [rsi+424], r10
adc rax, QWORD PTR [r12+432]
mov r9, QWORD PTR [rsi+440]
mov QWORD PTR [rsi+432], rax
adc r9, QWORD PTR [r12+440]
mov r10, QWORD PTR [rsi+448]
mov QWORD PTR [rsi+440], r9
adc r10, QWORD PTR [r12+448]
mov rax, QWORD PTR [rsi+456]
mov QWORD PTR [rsi+448], r10
adc rax, QWORD PTR [r12+456]
mov r9, QWORD PTR [rsi+464]
mov QWORD PTR [rsi+456], rax
adc r9, QWORD PTR [r12+464]
mov r10, QWORD PTR [rsi+472]
mov QWORD PTR [rsi+464], r9
adc r10, QWORD PTR [r12+472]
mov rax, QWORD PTR [rsi+480]
mov QWORD PTR [rsi+472], r10
adc rax, QWORD PTR [r12+480]
mov r9, QWORD PTR [rsi+488]
mov QWORD PTR [rsi+480], rax
adc r9, QWORD PTR [r12+488]
mov r10, QWORD PTR [rsi+496]
mov QWORD PTR [rsi+488], r9
adc r10, QWORD PTR [r12+496]
mov rax, QWORD PTR [rsi+504]
mov QWORD PTR [rsi+496], r10
adc rax, QWORD PTR [r12+504]
mov QWORD PTR [rsi+504], rax
adc r11, 0
mov QWORD PTR [rcx+768], r11
add rsi, 256
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r13+256]
mov QWORD PTR [rsi+256], r10
; Add to zero
mov rax, QWORD PTR [r13+264]
adc rax, 0
mov r9, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], rax
adc r9, 0
mov r10, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], r9
adc r10, 0
mov rax, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r10
adc rax, 0
mov r9, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], rax
adc r9, 0
mov r10, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], r9
adc r10, 0
mov rax, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r10
adc rax, 0
mov r9, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], rax
adc r9, 0
mov r10, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], r9
adc r10, 0
mov rax, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r10
adc rax, 0
mov r9, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], rax
adc r9, 0
mov r10, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], r9
adc r10, 0
mov rax, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r10
adc rax, 0
mov r9, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], rax
adc r9, 0
mov r10, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], r9
adc r10, 0
mov rax, QWORD PTR [r13+384]
mov QWORD PTR [rsi+376], r10
adc rax, 0
mov r9, QWORD PTR [r13+392]
mov QWORD PTR [rsi+384], rax
adc r9, 0
mov r10, QWORD PTR [r13+400]
mov QWORD PTR [rsi+392], r9
adc r10, 0
mov rax, QWORD PTR [r13+408]
mov QWORD PTR [rsi+400], r10
adc rax, 0
mov r9, QWORD PTR [r13+416]
mov QWORD PTR [rsi+408], rax
adc r9, 0
mov r10, QWORD PTR [r13+424]
mov QWORD PTR [rsi+416], r9
adc r10, 0
mov rax, QWORD PTR [r13+432]
mov QWORD PTR [rsi+424], r10
adc rax, 0
mov r9, QWORD PTR [r13+440]
mov QWORD PTR [rsi+432], rax
adc r9, 0
mov r10, QWORD PTR [r13+448]
mov QWORD PTR [rsi+440], r9
adc r10, 0
mov rax, QWORD PTR [r13+456]
mov QWORD PTR [rsi+448], r10
adc rax, 0
mov r9, QWORD PTR [r13+464]
mov QWORD PTR [rsi+456], rax
adc r9, 0
mov r10, QWORD PTR [r13+472]
mov QWORD PTR [rsi+464], r9
adc r10, 0
mov rax, QWORD PTR [r13+480]
mov QWORD PTR [rsi+472], r10
adc rax, 0
mov r9, QWORD PTR [r13+488]
mov QWORD PTR [rsi+480], rax
adc r9, 0
mov r10, QWORD PTR [r13+496]
mov QWORD PTR [rsi+488], r9
adc r10, 0
mov rax, QWORD PTR [r13+504]
mov QWORD PTR [rsi+496], r10
adc rax, 0
mov QWORD PTR [rsi+504], rax
add rsp, 1576
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mul_64 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_avx2_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1576
mov QWORD PTR [rsp+1536], rcx
mov QWORD PTR [rsp+1544], rdx
mov QWORD PTR [rsp+1552], r8
lea r12, QWORD PTR [rsp+1024]
lea r14, QWORD PTR [rdx+256]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r12+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [rdx+200]
mov QWORD PTR [r12+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [rdx+208]
mov QWORD PTR [r12+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [r12+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [r12+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [r12+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r12+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [rdx+248]
mov QWORD PTR [r12+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r12+248], r9
adc r15, 0
mov QWORD PTR [rsp+1560], r15
lea r13, QWORD PTR [rsp+1280]
lea r14, QWORD PTR [r8+256]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [r8+192]
mov QWORD PTR [r13+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [r8+200]
mov QWORD PTR [r13+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [r8+208]
mov QWORD PTR [r13+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [r8+216]
mov QWORD PTR [r13+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [r8+224]
mov QWORD PTR [r13+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [r8+232]
mov QWORD PTR [r13+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [r8+240]
mov QWORD PTR [r13+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [r8+248]
mov QWORD PTR [r13+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r13+248], r9
adc rdi, 0
mov QWORD PTR [rsp+1568], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_avx2_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
lea rcx, QWORD PTR [rsp+512]
add r8, 256
add rdx, 256
call sp_2048_mul_avx2_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
call sp_2048_mul_avx2_32
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
ENDIF
mov r15, QWORD PTR [rsp+1560]
mov rdi, QWORD PTR [rsp+1568]
mov rsi, QWORD PTR [rsp+1536]
mov r11, r15
lea r12, QWORD PTR [rsp+1024]
lea r13, QWORD PTR [rsp+1280]
and r11, rdi
neg r15
neg rdi
add rsi, 512
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+88], r10
adc rax, r9
mov r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [r13+104]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+96], rax
adc r9, r10
mov r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [r13+112]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+104], r9
adc r10, rax
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+112], r10
adc rax, r9
mov r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [r13+128]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+120], rax
adc r9, r10
mov r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [r13+136]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+128], r9
adc r10, rax
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+136], r10
adc rax, r9
mov r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [r13+152]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+144], rax
adc r9, r10
mov r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [r13+160]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+152], r9
adc r10, rax
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+160], r10
adc rax, r9
mov r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [r13+176]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+168], rax
adc r9, r10
mov r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [r13+184]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+176], r9
adc r10, rax
mov rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [r13+192]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+184], r10
adc rax, r9
mov r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [r13+200]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+192], rax
adc r9, r10
mov r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [r13+208]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+200], r9
adc r10, rax
mov rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [r13+216]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+208], r10
adc rax, r9
mov r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [r13+224]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+216], rax
adc r9, r10
mov r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [r13+232]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+224], r9
adc r10, rax
mov rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [r13+240]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+232], r10
adc rax, r9
mov r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [r13+248]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+240], rax
adc r9, r10
mov QWORD PTR [rsi+248], r9
adc r11, 0
lea r13, QWORD PTR [rsp+512]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [r13+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [r13+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [r13+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [r13+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [r13+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [r13+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [r13+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [r13+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [r13+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [r13+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [r13+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [r13+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [r13+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [r13+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [r13+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [r13+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [rcx+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [rcx+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [rcx+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [rcx+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [rcx+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [rcx+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [rcx+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [rcx+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [rcx+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [rcx+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [rcx+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [rcx+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [rcx+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [rcx+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [rcx+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [rcx+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
sub rsi, 256
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov rax, QWORD PTR [rsi+384]
mov QWORD PTR [rsi+376], r10
adc rax, QWORD PTR [r12+384]
mov r9, QWORD PTR [rsi+392]
mov QWORD PTR [rsi+384], rax
adc r9, QWORD PTR [r12+392]
mov r10, QWORD PTR [rsi+400]
mov QWORD PTR [rsi+392], r9
adc r10, QWORD PTR [r12+400]
mov rax, QWORD PTR [rsi+408]
mov QWORD PTR [rsi+400], r10
adc rax, QWORD PTR [r12+408]
mov r9, QWORD PTR [rsi+416]
mov QWORD PTR [rsi+408], rax
adc r9, QWORD PTR [r12+416]
mov r10, QWORD PTR [rsi+424]
mov QWORD PTR [rsi+416], r9
adc r10, QWORD PTR [r12+424]
mov rax, QWORD PTR [rsi+432]
mov QWORD PTR [rsi+424], r10
adc rax, QWORD PTR [r12+432]
mov r9, QWORD PTR [rsi+440]
mov QWORD PTR [rsi+432], rax
adc r9, QWORD PTR [r12+440]
mov r10, QWORD PTR [rsi+448]
mov QWORD PTR [rsi+440], r9
adc r10, QWORD PTR [r12+448]
mov rax, QWORD PTR [rsi+456]
mov QWORD PTR [rsi+448], r10
adc rax, QWORD PTR [r12+456]
mov r9, QWORD PTR [rsi+464]
mov QWORD PTR [rsi+456], rax
adc r9, QWORD PTR [r12+464]
mov r10, QWORD PTR [rsi+472]
mov QWORD PTR [rsi+464], r9
adc r10, QWORD PTR [r12+472]
mov rax, QWORD PTR [rsi+480]
mov QWORD PTR [rsi+472], r10
adc rax, QWORD PTR [r12+480]
mov r9, QWORD PTR [rsi+488]
mov QWORD PTR [rsi+480], rax
adc r9, QWORD PTR [r12+488]
mov r10, QWORD PTR [rsi+496]
mov QWORD PTR [rsi+488], r9
adc r10, QWORD PTR [r12+496]
mov rax, QWORD PTR [rsi+504]
mov QWORD PTR [rsi+496], r10
adc rax, QWORD PTR [r12+504]
mov QWORD PTR [rsi+504], rax
adc r11, 0
mov QWORD PTR [rcx+768], r11
add rsi, 256
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r13+256]
mov QWORD PTR [rsi+256], r10
; Add to zero
mov rax, QWORD PTR [r13+264]
adc rax, 0
mov r9, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], rax
adc r9, 0
mov r10, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], r9
adc r10, 0
mov rax, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r10
adc rax, 0
mov r9, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], rax
adc r9, 0
mov r10, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], r9
adc r10, 0
mov rax, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r10
adc rax, 0
mov r9, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], rax
adc r9, 0
mov r10, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], r9
adc r10, 0
mov rax, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r10
adc rax, 0
mov r9, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], rax
adc r9, 0
mov r10, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], r9
adc r10, 0
mov rax, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r10
adc rax, 0
mov r9, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], rax
adc r9, 0
mov r10, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], r9
adc r10, 0
mov rax, QWORD PTR [r13+384]
mov QWORD PTR [rsi+376], r10
adc rax, 0
mov r9, QWORD PTR [r13+392]
mov QWORD PTR [rsi+384], rax
adc r9, 0
mov r10, QWORD PTR [r13+400]
mov QWORD PTR [rsi+392], r9
adc r10, 0
mov rax, QWORD PTR [r13+408]
mov QWORD PTR [rsi+400], r10
adc rax, 0
mov r9, QWORD PTR [r13+416]
mov QWORD PTR [rsi+408], rax
adc r9, 0
mov r10, QWORD PTR [r13+424]
mov QWORD PTR [rsi+416], r9
adc r10, 0
mov rax, QWORD PTR [r13+432]
mov QWORD PTR [rsi+424], r10
adc rax, 0
mov r9, QWORD PTR [r13+440]
mov QWORD PTR [rsi+432], rax
adc r9, 0
mov r10, QWORD PTR [r13+448]
mov QWORD PTR [rsi+440], r9
adc r10, 0
mov rax, QWORD PTR [r13+456]
mov QWORD PTR [rsi+448], r10
adc rax, 0
mov r9, QWORD PTR [r13+464]
mov QWORD PTR [rsi+456], rax
adc r9, 0
mov r10, QWORD PTR [r13+472]
mov QWORD PTR [rsi+464], r9
adc r10, 0
mov rax, QWORD PTR [r13+480]
mov QWORD PTR [rsi+472], r10
adc rax, 0
mov r9, QWORD PTR [r13+488]
mov QWORD PTR [rsi+480], rax
adc r9, 0
mov r10, QWORD PTR [r13+496]
mov QWORD PTR [rsi+488], r9
adc r10, 0
mov rax, QWORD PTR [r13+504]
mov QWORD PTR [rsi+496], r10
adc rax, 0
mov QWORD PTR [rsi+504], rax
add rsp, 1576
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mul_avx2_64 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sqr_64 PROC
sub rsp, 528
mov QWORD PTR [rsp+512], rcx
mov QWORD PTR [rsp+520], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+256]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [r11+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [r11+192]
mov r8, QWORD PTR [rdx+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [r11+200]
mov rax, QWORD PTR [rdx+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [r11+208]
mov r8, QWORD PTR [rdx+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [r11+216]
mov rax, QWORD PTR [rdx+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [r11+224]
mov r8, QWORD PTR [rdx+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [r11+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [r11+240]
mov r8, QWORD PTR [rdx+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [r11+248]
mov QWORD PTR [r10+248], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+96]
setc r11b
mov QWORD PTR [r10+88], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+104]
setc r11b
mov QWORD PTR [r10+96], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+112]
setc r11b
mov QWORD PTR [r10+104], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+120]
setc r11b
mov QWORD PTR [r10+112], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+128]
setc r11b
mov QWORD PTR [r10+120], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+136]
setc r11b
mov QWORD PTR [r10+128], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+144]
setc r11b
mov QWORD PTR [r10+136], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+152]
setc r11b
mov QWORD PTR [r10+144], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+160]
setc r11b
mov QWORD PTR [r10+152], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+168]
setc r11b
mov QWORD PTR [r10+160], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+176]
setc r11b
mov QWORD PTR [r10+168], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+184]
setc r11b
mov QWORD PTR [r10+176], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+192]
setc r11b
mov QWORD PTR [r10+184], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+200]
setc r11b
mov QWORD PTR [r10+192], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+208]
setc r11b
mov QWORD PTR [r10+200], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+216]
setc r11b
mov QWORD PTR [r10+208], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+224]
setc r11b
mov QWORD PTR [r10+216], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+232]
setc r11b
mov QWORD PTR [r10+224], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+240]
setc r11b
mov QWORD PTR [r10+232], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+248]
setc r11b
mov QWORD PTR [r10+240], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+248], r8
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_32
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
add rdx, 256
add rcx, 512
call sp_2048_sqr_32
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
call sp_2048_sqr_32
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
ENDIF
mov rdx, QWORD PTR [rsp+512]
lea r10, QWORD PTR [rsp+256]
add rdx, 768
mov r9, 0
mov r8, QWORD PTR [r10+-256]
sub r8, QWORD PTR [rdx+-256]
mov rax, QWORD PTR [r10+-248]
mov QWORD PTR [r10+-256], r8
sbb rax, QWORD PTR [rdx+-248]
mov r8, QWORD PTR [r10+-240]
mov QWORD PTR [r10+-248], rax
sbb r8, QWORD PTR [rdx+-240]
mov rax, QWORD PTR [r10+-232]
mov QWORD PTR [r10+-240], r8
sbb rax, QWORD PTR [rdx+-232]
mov r8, QWORD PTR [r10+-224]
mov QWORD PTR [r10+-232], rax
sbb r8, QWORD PTR [rdx+-224]
mov rax, QWORD PTR [r10+-216]
mov QWORD PTR [r10+-224], r8
sbb rax, QWORD PTR [rdx+-216]
mov r8, QWORD PTR [r10+-208]
mov QWORD PTR [r10+-216], rax
sbb r8, QWORD PTR [rdx+-208]
mov rax, QWORD PTR [r10+-200]
mov QWORD PTR [r10+-208], r8
sbb rax, QWORD PTR [rdx+-200]
mov r8, QWORD PTR [r10+-192]
mov QWORD PTR [r10+-200], rax
sbb r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov r8, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], rax
sbb r8, QWORD PTR [rdx+192]
mov rax, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], r8
sbb rax, QWORD PTR [rdx+200]
mov r8, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], rax
sbb r8, QWORD PTR [rdx+208]
mov rax, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], r8
sbb rax, QWORD PTR [rdx+216]
mov r8, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], rax
sbb r8, QWORD PTR [rdx+224]
mov rax, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], r8
sbb rax, QWORD PTR [rdx+232]
mov r8, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], rax
sbb r8, QWORD PTR [rdx+240]
mov rax, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], r8
sbb rax, QWORD PTR [rdx+248]
mov QWORD PTR [r10+248], rax
sbb r9, 0
sub rdx, 512
mov r8, QWORD PTR [r10+-256]
sub r8, QWORD PTR [rdx+-256]
mov rax, QWORD PTR [r10+-248]
mov QWORD PTR [r10+-256], r8
sbb rax, QWORD PTR [rdx+-248]
mov r8, QWORD PTR [r10+-240]
mov QWORD PTR [r10+-248], rax
sbb r8, QWORD PTR [rdx+-240]
mov rax, QWORD PTR [r10+-232]
mov QWORD PTR [r10+-240], r8
sbb rax, QWORD PTR [rdx+-232]
mov r8, QWORD PTR [r10+-224]
mov QWORD PTR [r10+-232], rax
sbb r8, QWORD PTR [rdx+-224]
mov rax, QWORD PTR [r10+-216]
mov QWORD PTR [r10+-224], r8
sbb rax, QWORD PTR [rdx+-216]
mov r8, QWORD PTR [r10+-208]
mov QWORD PTR [r10+-216], rax
sbb r8, QWORD PTR [rdx+-208]
mov rax, QWORD PTR [r10+-200]
mov QWORD PTR [r10+-208], r8
sbb rax, QWORD PTR [rdx+-200]
mov r8, QWORD PTR [r10+-192]
mov QWORD PTR [r10+-200], rax
sbb r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov r8, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], rax
sbb r8, QWORD PTR [rdx+192]
mov rax, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], r8
sbb rax, QWORD PTR [rdx+200]
mov r8, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], rax
sbb r8, QWORD PTR [rdx+208]
mov rax, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], r8
sbb rax, QWORD PTR [rdx+216]
mov r8, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], rax
sbb r8, QWORD PTR [rdx+224]
mov rax, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], r8
sbb rax, QWORD PTR [rdx+232]
mov r8, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], rax
sbb r8, QWORD PTR [rdx+240]
mov rax, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], r8
sbb rax, QWORD PTR [rdx+248]
mov QWORD PTR [r10+248], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+512]
neg r9
add rcx, 512
mov r8, QWORD PTR [rcx+-256]
sub r8, QWORD PTR [r10+-256]
mov rax, QWORD PTR [rcx+-248]
mov QWORD PTR [rcx+-256], r8
sbb rax, QWORD PTR [r10+-248]
mov r8, QWORD PTR [rcx+-240]
mov QWORD PTR [rcx+-248], rax
sbb r8, QWORD PTR [r10+-240]
mov rax, QWORD PTR [rcx+-232]
mov QWORD PTR [rcx+-240], r8
sbb rax, QWORD PTR [r10+-232]
mov r8, QWORD PTR [rcx+-224]
mov QWORD PTR [rcx+-232], rax
sbb r8, QWORD PTR [r10+-224]
mov rax, QWORD PTR [rcx+-216]
mov QWORD PTR [rcx+-224], r8
sbb rax, QWORD PTR [r10+-216]
mov r8, QWORD PTR [rcx+-208]
mov QWORD PTR [rcx+-216], rax
sbb r8, QWORD PTR [r10+-208]
mov rax, QWORD PTR [rcx+-200]
mov QWORD PTR [rcx+-208], r8
sbb rax, QWORD PTR [r10+-200]
mov r8, QWORD PTR [rcx+-192]
mov QWORD PTR [rcx+-200], rax
sbb r8, QWORD PTR [r10+-192]
mov rax, QWORD PTR [rcx+-184]
mov QWORD PTR [rcx+-192], r8
sbb rax, QWORD PTR [r10+-184]
mov r8, QWORD PTR [rcx+-176]
mov QWORD PTR [rcx+-184], rax
sbb r8, QWORD PTR [r10+-176]
mov rax, QWORD PTR [rcx+-168]
mov QWORD PTR [rcx+-176], r8
sbb rax, QWORD PTR [r10+-168]
mov r8, QWORD PTR [rcx+-160]
mov QWORD PTR [rcx+-168], rax
sbb r8, QWORD PTR [r10+-160]
mov rax, QWORD PTR [rcx+-152]
mov QWORD PTR [rcx+-160], r8
sbb rax, QWORD PTR [r10+-152]
mov r8, QWORD PTR [rcx+-144]
mov QWORD PTR [rcx+-152], rax
sbb r8, QWORD PTR [r10+-144]
mov rax, QWORD PTR [rcx+-136]
mov QWORD PTR [rcx+-144], r8
sbb rax, QWORD PTR [r10+-136]
mov r8, QWORD PTR [rcx+-128]
mov QWORD PTR [rcx+-136], rax
sbb r8, QWORD PTR [r10+-128]
mov rax, QWORD PTR [rcx+-120]
mov QWORD PTR [rcx+-128], r8
sbb rax, QWORD PTR [r10+-120]
mov r8, QWORD PTR [rcx+-112]
mov QWORD PTR [rcx+-120], rax
sbb r8, QWORD PTR [r10+-112]
mov rax, QWORD PTR [rcx+-104]
mov QWORD PTR [rcx+-112], r8
sbb rax, QWORD PTR [r10+-104]
mov r8, QWORD PTR [rcx+-96]
mov QWORD PTR [rcx+-104], rax
sbb r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
sbb r8, QWORD PTR [r10+96]
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb rax, QWORD PTR [r10+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
sbb r8, QWORD PTR [r10+112]
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb rax, QWORD PTR [r10+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
sbb r8, QWORD PTR [r10+128]
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb rax, QWORD PTR [r10+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
sbb r8, QWORD PTR [r10+144]
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb rax, QWORD PTR [r10+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
sbb r8, QWORD PTR [r10+160]
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb rax, QWORD PTR [r10+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
sbb r8, QWORD PTR [r10+176]
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb rax, QWORD PTR [r10+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], rax
sbb r8, QWORD PTR [r10+192]
mov rax, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb rax, QWORD PTR [r10+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], rax
sbb r8, QWORD PTR [r10+208]
mov rax, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb rax, QWORD PTR [r10+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], rax
sbb r8, QWORD PTR [r10+224]
mov rax, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb rax, QWORD PTR [r10+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], rax
sbb r8, QWORD PTR [r10+240]
mov rax, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb rax, QWORD PTR [r10+248]
mov QWORD PTR [rcx+248], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+512]
add rcx, 768
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
adc r8, 0
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
adc rax, 0
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
adc r8, 0
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
adc rax, 0
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
adc r8, 0
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
adc rax, 0
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
adc r8, 0
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
adc rax, 0
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
adc r8, 0
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
adc rax, 0
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
adc r8, 0
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
adc rax, 0
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], rax
adc r8, 0
mov rax, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
adc rax, 0
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], rax
adc r8, 0
mov rax, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
adc rax, 0
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], rax
adc r8, 0
mov rax, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
adc rax, 0
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], rax
adc r8, 0
mov rax, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
adc rax, 0
mov QWORD PTR [rcx+248], rax
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
add rsp, 528
ret
sp_4096_sqr_64 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * Karatsuba: ah^2, al^2, (al - ah)^2
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sqr_avx2_64 PROC
sub rsp, 528
mov QWORD PTR [rsp+512], rcx
mov QWORD PTR [rsp+520], rdx
mov r9, 0
mov r10, rsp
lea r11, QWORD PTR [rdx+256]
mov rax, QWORD PTR [rdx]
sub rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [r11+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [r11+192]
mov r8, QWORD PTR [rdx+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [r11+200]
mov rax, QWORD PTR [rdx+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [r11+208]
mov r8, QWORD PTR [rdx+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [r11+216]
mov rax, QWORD PTR [rdx+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [r11+224]
mov r8, QWORD PTR [rdx+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [r11+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [r11+240]
mov r8, QWORD PTR [rdx+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [r11+248]
mov QWORD PTR [r10+248], r8
sbb r9, 0
; Cond Negate
mov rax, QWORD PTR [r10]
mov r11, r9
xor rax, r9
neg r11
sub rax, r9
mov r8, QWORD PTR [r10+8]
sbb r11, 0
mov QWORD PTR [r10], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+16]
setc r11b
mov QWORD PTR [r10+8], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+24]
setc r11b
mov QWORD PTR [r10+16], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+32]
setc r11b
mov QWORD PTR [r10+24], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+40]
setc r11b
mov QWORD PTR [r10+32], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+48]
setc r11b
mov QWORD PTR [r10+40], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+56]
setc r11b
mov QWORD PTR [r10+48], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+64]
setc r11b
mov QWORD PTR [r10+56], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+72]
setc r11b
mov QWORD PTR [r10+64], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+80]
setc r11b
mov QWORD PTR [r10+72], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+88]
setc r11b
mov QWORD PTR [r10+80], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+96]
setc r11b
mov QWORD PTR [r10+88], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+104]
setc r11b
mov QWORD PTR [r10+96], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+112]
setc r11b
mov QWORD PTR [r10+104], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+120]
setc r11b
mov QWORD PTR [r10+112], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+128]
setc r11b
mov QWORD PTR [r10+120], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+136]
setc r11b
mov QWORD PTR [r10+128], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+144]
setc r11b
mov QWORD PTR [r10+136], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+152]
setc r11b
mov QWORD PTR [r10+144], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+160]
setc r11b
mov QWORD PTR [r10+152], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+168]
setc r11b
mov QWORD PTR [r10+160], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+176]
setc r11b
mov QWORD PTR [r10+168], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+184]
setc r11b
mov QWORD PTR [r10+176], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+192]
setc r11b
mov QWORD PTR [r10+184], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+200]
setc r11b
mov QWORD PTR [r10+192], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+208]
setc r11b
mov QWORD PTR [r10+200], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+216]
setc r11b
mov QWORD PTR [r10+208], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+224]
setc r11b
mov QWORD PTR [r10+216], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+232]
setc r11b
mov QWORD PTR [r10+224], rax
xor r8, r9
add r8, r11
mov rax, QWORD PTR [r10+240]
setc r11b
mov QWORD PTR [r10+232], r8
xor rax, r9
add rax, r11
mov r8, QWORD PTR [r10+248]
setc r11b
mov QWORD PTR [r10+240], rax
xor r8, r9
add r8, r11
mov QWORD PTR [r10+248], r8
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_avx2_32
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
add rdx, 256
add rcx, 512
call sp_2048_sqr_avx2_32
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
call sp_2048_sqr_avx2_32
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
ENDIF
mov rdx, QWORD PTR [rsp+512]
lea r10, QWORD PTR [rsp+256]
add rdx, 768
mov r9, 0
mov r8, QWORD PTR [r10+-256]
sub r8, QWORD PTR [rdx+-256]
mov rax, QWORD PTR [r10+-248]
mov QWORD PTR [r10+-256], r8
sbb rax, QWORD PTR [rdx+-248]
mov r8, QWORD PTR [r10+-240]
mov QWORD PTR [r10+-248], rax
sbb r8, QWORD PTR [rdx+-240]
mov rax, QWORD PTR [r10+-232]
mov QWORD PTR [r10+-240], r8
sbb rax, QWORD PTR [rdx+-232]
mov r8, QWORD PTR [r10+-224]
mov QWORD PTR [r10+-232], rax
sbb r8, QWORD PTR [rdx+-224]
mov rax, QWORD PTR [r10+-216]
mov QWORD PTR [r10+-224], r8
sbb rax, QWORD PTR [rdx+-216]
mov r8, QWORD PTR [r10+-208]
mov QWORD PTR [r10+-216], rax
sbb r8, QWORD PTR [rdx+-208]
mov rax, QWORD PTR [r10+-200]
mov QWORD PTR [r10+-208], r8
sbb rax, QWORD PTR [rdx+-200]
mov r8, QWORD PTR [r10+-192]
mov QWORD PTR [r10+-200], rax
sbb r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov r8, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], rax
sbb r8, QWORD PTR [rdx+192]
mov rax, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], r8
sbb rax, QWORD PTR [rdx+200]
mov r8, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], rax
sbb r8, QWORD PTR [rdx+208]
mov rax, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], r8
sbb rax, QWORD PTR [rdx+216]
mov r8, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], rax
sbb r8, QWORD PTR [rdx+224]
mov rax, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], r8
sbb rax, QWORD PTR [rdx+232]
mov r8, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], rax
sbb r8, QWORD PTR [rdx+240]
mov rax, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], r8
sbb rax, QWORD PTR [rdx+248]
mov QWORD PTR [r10+248], rax
sbb r9, 0
sub rdx, 512
mov r8, QWORD PTR [r10+-256]
sub r8, QWORD PTR [rdx+-256]
mov rax, QWORD PTR [r10+-248]
mov QWORD PTR [r10+-256], r8
sbb rax, QWORD PTR [rdx+-248]
mov r8, QWORD PTR [r10+-240]
mov QWORD PTR [r10+-248], rax
sbb r8, QWORD PTR [rdx+-240]
mov rax, QWORD PTR [r10+-232]
mov QWORD PTR [r10+-240], r8
sbb rax, QWORD PTR [rdx+-232]
mov r8, QWORD PTR [r10+-224]
mov QWORD PTR [r10+-232], rax
sbb r8, QWORD PTR [rdx+-224]
mov rax, QWORD PTR [r10+-216]
mov QWORD PTR [r10+-224], r8
sbb rax, QWORD PTR [rdx+-216]
mov r8, QWORD PTR [r10+-208]
mov QWORD PTR [r10+-216], rax
sbb r8, QWORD PTR [rdx+-208]
mov rax, QWORD PTR [r10+-200]
mov QWORD PTR [r10+-208], r8
sbb rax, QWORD PTR [rdx+-200]
mov r8, QWORD PTR [r10+-192]
mov QWORD PTR [r10+-200], rax
sbb r8, QWORD PTR [rdx+-192]
mov rax, QWORD PTR [r10+-184]
mov QWORD PTR [r10+-192], r8
sbb rax, QWORD PTR [rdx+-184]
mov r8, QWORD PTR [r10+-176]
mov QWORD PTR [r10+-184], rax
sbb r8, QWORD PTR [rdx+-176]
mov rax, QWORD PTR [r10+-168]
mov QWORD PTR [r10+-176], r8
sbb rax, QWORD PTR [rdx+-168]
mov r8, QWORD PTR [r10+-160]
mov QWORD PTR [r10+-168], rax
sbb r8, QWORD PTR [rdx+-160]
mov rax, QWORD PTR [r10+-152]
mov QWORD PTR [r10+-160], r8
sbb rax, QWORD PTR [rdx+-152]
mov r8, QWORD PTR [r10+-144]
mov QWORD PTR [r10+-152], rax
sbb r8, QWORD PTR [rdx+-144]
mov rax, QWORD PTR [r10+-136]
mov QWORD PTR [r10+-144], r8
sbb rax, QWORD PTR [rdx+-136]
mov r8, QWORD PTR [r10+-128]
mov QWORD PTR [r10+-136], rax
sbb r8, QWORD PTR [rdx+-128]
mov rax, QWORD PTR [r10+-120]
mov QWORD PTR [r10+-128], r8
sbb rax, QWORD PTR [rdx+-120]
mov r8, QWORD PTR [r10+-112]
mov QWORD PTR [r10+-120], rax
sbb r8, QWORD PTR [rdx+-112]
mov rax, QWORD PTR [r10+-104]
mov QWORD PTR [r10+-112], r8
sbb rax, QWORD PTR [rdx+-104]
mov r8, QWORD PTR [r10+-96]
mov QWORD PTR [r10+-104], rax
sbb r8, QWORD PTR [rdx+-96]
mov rax, QWORD PTR [r10+-88]
mov QWORD PTR [r10+-96], r8
sbb rax, QWORD PTR [rdx+-88]
mov r8, QWORD PTR [r10+-80]
mov QWORD PTR [r10+-88], rax
sbb r8, QWORD PTR [rdx+-80]
mov rax, QWORD PTR [r10+-72]
mov QWORD PTR [r10+-80], r8
sbb rax, QWORD PTR [rdx+-72]
mov r8, QWORD PTR [r10+-64]
mov QWORD PTR [r10+-72], rax
sbb r8, QWORD PTR [rdx+-64]
mov rax, QWORD PTR [r10+-56]
mov QWORD PTR [r10+-64], r8
sbb rax, QWORD PTR [rdx+-56]
mov r8, QWORD PTR [r10+-48]
mov QWORD PTR [r10+-56], rax
sbb r8, QWORD PTR [rdx+-48]
mov rax, QWORD PTR [r10+-40]
mov QWORD PTR [r10+-48], r8
sbb rax, QWORD PTR [rdx+-40]
mov r8, QWORD PTR [r10+-32]
mov QWORD PTR [r10+-40], rax
sbb r8, QWORD PTR [rdx+-32]
mov rax, QWORD PTR [r10+-24]
mov QWORD PTR [r10+-32], r8
sbb rax, QWORD PTR [rdx+-24]
mov r8, QWORD PTR [r10+-16]
mov QWORD PTR [r10+-24], rax
sbb r8, QWORD PTR [rdx+-16]
mov rax, QWORD PTR [r10+-8]
mov QWORD PTR [r10+-16], r8
sbb rax, QWORD PTR [rdx+-8]
mov r8, QWORD PTR [r10]
mov QWORD PTR [r10+-8], rax
sbb r8, QWORD PTR [rdx]
mov rax, QWORD PTR [r10+8]
mov QWORD PTR [r10], r8
sbb rax, QWORD PTR [rdx+8]
mov r8, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], rax
sbb r8, QWORD PTR [rdx+16]
mov rax, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], r8
sbb rax, QWORD PTR [rdx+24]
mov r8, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], rax
sbb r8, QWORD PTR [rdx+32]
mov rax, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], r8
sbb rax, QWORD PTR [rdx+40]
mov r8, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], rax
sbb r8, QWORD PTR [rdx+48]
mov rax, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], r8
sbb rax, QWORD PTR [rdx+56]
mov r8, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], rax
sbb r8, QWORD PTR [rdx+64]
mov rax, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], r8
sbb rax, QWORD PTR [rdx+72]
mov r8, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], rax
sbb r8, QWORD PTR [rdx+80]
mov rax, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], r8
sbb rax, QWORD PTR [rdx+88]
mov r8, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], rax
sbb r8, QWORD PTR [rdx+96]
mov rax, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], r8
sbb rax, QWORD PTR [rdx+104]
mov r8, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], rax
sbb r8, QWORD PTR [rdx+112]
mov rax, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], r8
sbb rax, QWORD PTR [rdx+120]
mov r8, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], rax
sbb r8, QWORD PTR [rdx+128]
mov rax, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], r8
sbb rax, QWORD PTR [rdx+136]
mov r8, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], rax
sbb r8, QWORD PTR [rdx+144]
mov rax, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], r8
sbb rax, QWORD PTR [rdx+152]
mov r8, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], rax
sbb r8, QWORD PTR [rdx+160]
mov rax, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], r8
sbb rax, QWORD PTR [rdx+168]
mov r8, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], rax
sbb r8, QWORD PTR [rdx+176]
mov rax, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], r8
sbb rax, QWORD PTR [rdx+184]
mov r8, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], rax
sbb r8, QWORD PTR [rdx+192]
mov rax, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], r8
sbb rax, QWORD PTR [rdx+200]
mov r8, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], rax
sbb r8, QWORD PTR [rdx+208]
mov rax, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], r8
sbb rax, QWORD PTR [rdx+216]
mov r8, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], rax
sbb r8, QWORD PTR [rdx+224]
mov rax, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], r8
sbb rax, QWORD PTR [rdx+232]
mov r8, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], rax
sbb r8, QWORD PTR [rdx+240]
mov rax, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], r8
sbb rax, QWORD PTR [rdx+248]
mov QWORD PTR [r10+248], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+512]
neg r9
add rcx, 512
mov r8, QWORD PTR [rcx+-256]
sub r8, QWORD PTR [r10+-256]
mov rax, QWORD PTR [rcx+-248]
mov QWORD PTR [rcx+-256], r8
sbb rax, QWORD PTR [r10+-248]
mov r8, QWORD PTR [rcx+-240]
mov QWORD PTR [rcx+-248], rax
sbb r8, QWORD PTR [r10+-240]
mov rax, QWORD PTR [rcx+-232]
mov QWORD PTR [rcx+-240], r8
sbb rax, QWORD PTR [r10+-232]
mov r8, QWORD PTR [rcx+-224]
mov QWORD PTR [rcx+-232], rax
sbb r8, QWORD PTR [r10+-224]
mov rax, QWORD PTR [rcx+-216]
mov QWORD PTR [rcx+-224], r8
sbb rax, QWORD PTR [r10+-216]
mov r8, QWORD PTR [rcx+-208]
mov QWORD PTR [rcx+-216], rax
sbb r8, QWORD PTR [r10+-208]
mov rax, QWORD PTR [rcx+-200]
mov QWORD PTR [rcx+-208], r8
sbb rax, QWORD PTR [r10+-200]
mov r8, QWORD PTR [rcx+-192]
mov QWORD PTR [rcx+-200], rax
sbb r8, QWORD PTR [r10+-192]
mov rax, QWORD PTR [rcx+-184]
mov QWORD PTR [rcx+-192], r8
sbb rax, QWORD PTR [r10+-184]
mov r8, QWORD PTR [rcx+-176]
mov QWORD PTR [rcx+-184], rax
sbb r8, QWORD PTR [r10+-176]
mov rax, QWORD PTR [rcx+-168]
mov QWORD PTR [rcx+-176], r8
sbb rax, QWORD PTR [r10+-168]
mov r8, QWORD PTR [rcx+-160]
mov QWORD PTR [rcx+-168], rax
sbb r8, QWORD PTR [r10+-160]
mov rax, QWORD PTR [rcx+-152]
mov QWORD PTR [rcx+-160], r8
sbb rax, QWORD PTR [r10+-152]
mov r8, QWORD PTR [rcx+-144]
mov QWORD PTR [rcx+-152], rax
sbb r8, QWORD PTR [r10+-144]
mov rax, QWORD PTR [rcx+-136]
mov QWORD PTR [rcx+-144], r8
sbb rax, QWORD PTR [r10+-136]
mov r8, QWORD PTR [rcx+-128]
mov QWORD PTR [rcx+-136], rax
sbb r8, QWORD PTR [r10+-128]
mov rax, QWORD PTR [rcx+-120]
mov QWORD PTR [rcx+-128], r8
sbb rax, QWORD PTR [r10+-120]
mov r8, QWORD PTR [rcx+-112]
mov QWORD PTR [rcx+-120], rax
sbb r8, QWORD PTR [r10+-112]
mov rax, QWORD PTR [rcx+-104]
mov QWORD PTR [rcx+-112], r8
sbb rax, QWORD PTR [r10+-104]
mov r8, QWORD PTR [rcx+-96]
mov QWORD PTR [rcx+-104], rax
sbb r8, QWORD PTR [r10+-96]
mov rax, QWORD PTR [rcx+-88]
mov QWORD PTR [rcx+-96], r8
sbb rax, QWORD PTR [r10+-88]
mov r8, QWORD PTR [rcx+-80]
mov QWORD PTR [rcx+-88], rax
sbb r8, QWORD PTR [r10+-80]
mov rax, QWORD PTR [rcx+-72]
mov QWORD PTR [rcx+-80], r8
sbb rax, QWORD PTR [r10+-72]
mov r8, QWORD PTR [rcx+-64]
mov QWORD PTR [rcx+-72], rax
sbb r8, QWORD PTR [r10+-64]
mov rax, QWORD PTR [rcx+-56]
mov QWORD PTR [rcx+-64], r8
sbb rax, QWORD PTR [r10+-56]
mov r8, QWORD PTR [rcx+-48]
mov QWORD PTR [rcx+-56], rax
sbb r8, QWORD PTR [r10+-48]
mov rax, QWORD PTR [rcx+-40]
mov QWORD PTR [rcx+-48], r8
sbb rax, QWORD PTR [r10+-40]
mov r8, QWORD PTR [rcx+-32]
mov QWORD PTR [rcx+-40], rax
sbb r8, QWORD PTR [r10+-32]
mov rax, QWORD PTR [rcx+-24]
mov QWORD PTR [rcx+-32], r8
sbb rax, QWORD PTR [r10+-24]
mov r8, QWORD PTR [rcx+-16]
mov QWORD PTR [rcx+-24], rax
sbb r8, QWORD PTR [r10+-16]
mov rax, QWORD PTR [rcx+-8]
mov QWORD PTR [rcx+-16], r8
sbb rax, QWORD PTR [r10+-8]
mov r8, QWORD PTR [rcx]
mov QWORD PTR [rcx+-8], rax
sbb r8, QWORD PTR [r10]
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb rax, QWORD PTR [r10+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
sbb r8, QWORD PTR [r10+16]
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb rax, QWORD PTR [r10+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
sbb r8, QWORD PTR [r10+32]
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb rax, QWORD PTR [r10+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
sbb r8, QWORD PTR [r10+48]
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb rax, QWORD PTR [r10+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
sbb r8, QWORD PTR [r10+64]
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb rax, QWORD PTR [r10+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
sbb r8, QWORD PTR [r10+80]
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb rax, QWORD PTR [r10+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
sbb r8, QWORD PTR [r10+96]
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb rax, QWORD PTR [r10+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
sbb r8, QWORD PTR [r10+112]
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb rax, QWORD PTR [r10+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
sbb r8, QWORD PTR [r10+128]
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb rax, QWORD PTR [r10+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
sbb r8, QWORD PTR [r10+144]
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb rax, QWORD PTR [r10+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
sbb r8, QWORD PTR [r10+160]
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb rax, QWORD PTR [r10+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
sbb r8, QWORD PTR [r10+176]
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb rax, QWORD PTR [r10+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], rax
sbb r8, QWORD PTR [r10+192]
mov rax, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb rax, QWORD PTR [r10+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], rax
sbb r8, QWORD PTR [r10+208]
mov rax, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb rax, QWORD PTR [r10+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], rax
sbb r8, QWORD PTR [r10+224]
mov rax, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb rax, QWORD PTR [r10+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], rax
sbb r8, QWORD PTR [r10+240]
mov rax, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb rax, QWORD PTR [r10+248]
mov QWORD PTR [rcx+248], rax
sbb r9, 0
mov rcx, QWORD PTR [rsp+512]
add rcx, 768
; Add in word
mov r8, QWORD PTR [rcx]
add r8, r9
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
adc rax, 0
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], rax
adc r8, 0
mov rax, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
adc rax, 0
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], rax
adc r8, 0
mov rax, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
adc rax, 0
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], rax
adc r8, 0
mov rax, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
adc rax, 0
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], rax
adc r8, 0
mov rax, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
adc rax, 0
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], rax
adc r8, 0
mov rax, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
adc rax, 0
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], rax
adc r8, 0
mov rax, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
adc rax, 0
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], rax
adc r8, 0
mov rax, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
adc rax, 0
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], rax
adc r8, 0
mov rax, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
adc rax, 0
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], rax
adc r8, 0
mov rax, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
adc rax, 0
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], rax
adc r8, 0
mov rax, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
adc rax, 0
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], rax
adc r8, 0
mov rax, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
adc rax, 0
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], rax
adc r8, 0
mov rax, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
adc rax, 0
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], rax
adc r8, 0
mov rax, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
adc rax, 0
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], rax
adc r8, 0
mov rax, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
adc rax, 0
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], rax
adc r8, 0
mov rax, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
adc rax, 0
mov QWORD PTR [rcx+248], rax
mov rdx, QWORD PTR [rsp+520]
mov rcx, QWORD PTR [rsp+512]
add rsp, 528
ret
sp_4096_sqr_avx2_64 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_d_64 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+184]
add r12, rax
mov QWORD PTR [rcx+184], r12
adc r10, rdx
adc r11, 0
; A[24] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+192]
add r10, rax
mov QWORD PTR [rcx+192], r10
adc r11, rdx
adc r12, 0
; A[25] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+200]
add r11, rax
mov QWORD PTR [rcx+200], r11
adc r12, rdx
adc r10, 0
; A[26] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+208]
add r12, rax
mov QWORD PTR [rcx+208], r12
adc r10, rdx
adc r11, 0
; A[27] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+216]
add r10, rax
mov QWORD PTR [rcx+216], r10
adc r11, rdx
adc r12, 0
; A[28] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+224]
add r11, rax
mov QWORD PTR [rcx+224], r11
adc r12, rdx
adc r10, 0
; A[29] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+232]
add r12, rax
mov QWORD PTR [rcx+232], r12
adc r10, rdx
adc r11, 0
; A[30] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+240]
add r10, rax
mov QWORD PTR [rcx+240], r10
adc r11, rdx
adc r12, 0
; A[31] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+248]
add r11, rax
mov QWORD PTR [rcx+248], r11
adc r12, rdx
adc r10, 0
; A[32] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+256]
add r12, rax
mov QWORD PTR [rcx+256], r12
adc r10, rdx
adc r11, 0
; A[33] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+264]
add r10, rax
mov QWORD PTR [rcx+264], r10
adc r11, rdx
adc r12, 0
; A[34] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+272]
add r11, rax
mov QWORD PTR [rcx+272], r11
adc r12, rdx
adc r10, 0
; A[35] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+280]
add r12, rax
mov QWORD PTR [rcx+280], r12
adc r10, rdx
adc r11, 0
; A[36] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+288]
add r10, rax
mov QWORD PTR [rcx+288], r10
adc r11, rdx
adc r12, 0
; A[37] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+296]
add r11, rax
mov QWORD PTR [rcx+296], r11
adc r12, rdx
adc r10, 0
; A[38] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+304]
add r12, rax
mov QWORD PTR [rcx+304], r12
adc r10, rdx
adc r11, 0
; A[39] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+312]
add r10, rax
mov QWORD PTR [rcx+312], r10
adc r11, rdx
adc r12, 0
; A[40] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+320]
add r11, rax
mov QWORD PTR [rcx+320], r11
adc r12, rdx
adc r10, 0
; A[41] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+328]
add r12, rax
mov QWORD PTR [rcx+328], r12
adc r10, rdx
adc r11, 0
; A[42] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+336]
add r10, rax
mov QWORD PTR [rcx+336], r10
adc r11, rdx
adc r12, 0
; A[43] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+344]
add r11, rax
mov QWORD PTR [rcx+344], r11
adc r12, rdx
adc r10, 0
; A[44] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+352]
add r12, rax
mov QWORD PTR [rcx+352], r12
adc r10, rdx
adc r11, 0
; A[45] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+360]
add r10, rax
mov QWORD PTR [rcx+360], r10
adc r11, rdx
adc r12, 0
; A[46] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+368]
add r11, rax
mov QWORD PTR [rcx+368], r11
adc r12, rdx
adc r10, 0
; A[47] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+376]
add r12, rax
mov QWORD PTR [rcx+376], r12
adc r10, rdx
adc r11, 0
; A[48] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+384]
add r10, rax
mov QWORD PTR [rcx+384], r10
adc r11, rdx
adc r12, 0
; A[49] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+392]
add r11, rax
mov QWORD PTR [rcx+392], r11
adc r12, rdx
adc r10, 0
; A[50] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+400]
add r12, rax
mov QWORD PTR [rcx+400], r12
adc r10, rdx
adc r11, 0
; A[51] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+408]
add r10, rax
mov QWORD PTR [rcx+408], r10
adc r11, rdx
adc r12, 0
; A[52] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+416]
add r11, rax
mov QWORD PTR [rcx+416], r11
adc r12, rdx
adc r10, 0
; A[53] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+424]
add r12, rax
mov QWORD PTR [rcx+424], r12
adc r10, rdx
adc r11, 0
; A[54] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+432]
add r10, rax
mov QWORD PTR [rcx+432], r10
adc r11, rdx
adc r12, 0
; A[55] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+440]
add r11, rax
mov QWORD PTR [rcx+440], r11
adc r12, rdx
adc r10, 0
; A[56] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+448]
add r12, rax
mov QWORD PTR [rcx+448], r12
adc r10, rdx
adc r11, 0
; A[57] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+456]
add r10, rax
mov QWORD PTR [rcx+456], r10
adc r11, rdx
adc r12, 0
; A[58] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+464]
add r11, rax
mov QWORD PTR [rcx+464], r11
adc r12, rdx
adc r10, 0
; A[59] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+472]
add r12, rax
mov QWORD PTR [rcx+472], r12
adc r10, rdx
adc r11, 0
; A[60] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+480]
add r10, rax
mov QWORD PTR [rcx+480], r10
adc r11, rdx
adc r12, 0
; A[61] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+488]
add r11, rax
mov QWORD PTR [rcx+488], r11
adc r12, rdx
adc r10, 0
; A[62] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+496]
add r12, rax
mov QWORD PTR [rcx+496], r12
adc r10, rdx
adc r11, 0
; A[63] * B
mov rax, r8
mul QWORD PTR [r9+504]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+504], r10
mov QWORD PTR [rcx+512], r11
pop r12
ret
sp_4096_mul_d_64 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_sub_64 PROC
sub rsp, 512
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [r8+256]
mov r11, QWORD PTR [r8+264]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+256], r10
mov QWORD PTR [rsp+264], r11
mov r10, QWORD PTR [r8+272]
mov r11, QWORD PTR [r8+280]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+272], r10
mov QWORD PTR [rsp+280], r11
mov r10, QWORD PTR [r8+288]
mov r11, QWORD PTR [r8+296]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+288], r10
mov QWORD PTR [rsp+296], r11
mov r10, QWORD PTR [r8+304]
mov r11, QWORD PTR [r8+312]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+304], r10
mov QWORD PTR [rsp+312], r11
mov r10, QWORD PTR [r8+320]
mov r11, QWORD PTR [r8+328]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+320], r10
mov QWORD PTR [rsp+328], r11
mov r10, QWORD PTR [r8+336]
mov r11, QWORD PTR [r8+344]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+336], r10
mov QWORD PTR [rsp+344], r11
mov r10, QWORD PTR [r8+352]
mov r11, QWORD PTR [r8+360]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+352], r10
mov QWORD PTR [rsp+360], r11
mov r10, QWORD PTR [r8+368]
mov r11, QWORD PTR [r8+376]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+368], r10
mov QWORD PTR [rsp+376], r11
mov r10, QWORD PTR [r8+384]
mov r11, QWORD PTR [r8+392]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+384], r10
mov QWORD PTR [rsp+392], r11
mov r10, QWORD PTR [r8+400]
mov r11, QWORD PTR [r8+408]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+400], r10
mov QWORD PTR [rsp+408], r11
mov r10, QWORD PTR [r8+416]
mov r11, QWORD PTR [r8+424]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+416], r10
mov QWORD PTR [rsp+424], r11
mov r10, QWORD PTR [r8+432]
mov r11, QWORD PTR [r8+440]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+432], r10
mov QWORD PTR [rsp+440], r11
mov r10, QWORD PTR [r8+448]
mov r11, QWORD PTR [r8+456]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+448], r10
mov QWORD PTR [rsp+456], r11
mov r10, QWORD PTR [r8+464]
mov r11, QWORD PTR [r8+472]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+464], r10
mov QWORD PTR [rsp+472], r11
mov r10, QWORD PTR [r8+480]
mov r11, QWORD PTR [r8+488]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+480], r10
mov QWORD PTR [rsp+488], r11
mov r10, QWORD PTR [r8+496]
mov r11, QWORD PTR [r8+504]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+496], r10
mov QWORD PTR [rsp+504], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
sbb r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
sbb r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
sbb r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
sbb r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
sbb r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
sbb r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
sbb r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
sbb r11, r8
mov QWORD PTR [rcx+240], r10
mov r10, QWORD PTR [rdx+256]
mov r8, QWORD PTR [rsp+256]
sbb r10, r8
mov QWORD PTR [rcx+248], r11
mov r11, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rsp+264]
sbb r11, r8
mov QWORD PTR [rcx+256], r10
mov r10, QWORD PTR [rdx+272]
mov r8, QWORD PTR [rsp+272]
sbb r10, r8
mov QWORD PTR [rcx+264], r11
mov r11, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rsp+280]
sbb r11, r8
mov QWORD PTR [rcx+272], r10
mov r10, QWORD PTR [rdx+288]
mov r8, QWORD PTR [rsp+288]
sbb r10, r8
mov QWORD PTR [rcx+280], r11
mov r11, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rsp+296]
sbb r11, r8
mov QWORD PTR [rcx+288], r10
mov r10, QWORD PTR [rdx+304]
mov r8, QWORD PTR [rsp+304]
sbb r10, r8
mov QWORD PTR [rcx+296], r11
mov r11, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rsp+312]
sbb r11, r8
mov QWORD PTR [rcx+304], r10
mov r10, QWORD PTR [rdx+320]
mov r8, QWORD PTR [rsp+320]
sbb r10, r8
mov QWORD PTR [rcx+312], r11
mov r11, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rsp+328]
sbb r11, r8
mov QWORD PTR [rcx+320], r10
mov r10, QWORD PTR [rdx+336]
mov r8, QWORD PTR [rsp+336]
sbb r10, r8
mov QWORD PTR [rcx+328], r11
mov r11, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rsp+344]
sbb r11, r8
mov QWORD PTR [rcx+336], r10
mov r10, QWORD PTR [rdx+352]
mov r8, QWORD PTR [rsp+352]
sbb r10, r8
mov QWORD PTR [rcx+344], r11
mov r11, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rsp+360]
sbb r11, r8
mov QWORD PTR [rcx+352], r10
mov r10, QWORD PTR [rdx+368]
mov r8, QWORD PTR [rsp+368]
sbb r10, r8
mov QWORD PTR [rcx+360], r11
mov r11, QWORD PTR [rdx+376]
mov r8, QWORD PTR [rsp+376]
sbb r11, r8
mov QWORD PTR [rcx+368], r10
mov r10, QWORD PTR [rdx+384]
mov r8, QWORD PTR [rsp+384]
sbb r10, r8
mov QWORD PTR [rcx+376], r11
mov r11, QWORD PTR [rdx+392]
mov r8, QWORD PTR [rsp+392]
sbb r11, r8
mov QWORD PTR [rcx+384], r10
mov r10, QWORD PTR [rdx+400]
mov r8, QWORD PTR [rsp+400]
sbb r10, r8
mov QWORD PTR [rcx+392], r11
mov r11, QWORD PTR [rdx+408]
mov r8, QWORD PTR [rsp+408]
sbb r11, r8
mov QWORD PTR [rcx+400], r10
mov r10, QWORD PTR [rdx+416]
mov r8, QWORD PTR [rsp+416]
sbb r10, r8
mov QWORD PTR [rcx+408], r11
mov r11, QWORD PTR [rdx+424]
mov r8, QWORD PTR [rsp+424]
sbb r11, r8
mov QWORD PTR [rcx+416], r10
mov r10, QWORD PTR [rdx+432]
mov r8, QWORD PTR [rsp+432]
sbb r10, r8
mov QWORD PTR [rcx+424], r11
mov r11, QWORD PTR [rdx+440]
mov r8, QWORD PTR [rsp+440]
sbb r11, r8
mov QWORD PTR [rcx+432], r10
mov r10, QWORD PTR [rdx+448]
mov r8, QWORD PTR [rsp+448]
sbb r10, r8
mov QWORD PTR [rcx+440], r11
mov r11, QWORD PTR [rdx+456]
mov r8, QWORD PTR [rsp+456]
sbb r11, r8
mov QWORD PTR [rcx+448], r10
mov r10, QWORD PTR [rdx+464]
mov r8, QWORD PTR [rsp+464]
sbb r10, r8
mov QWORD PTR [rcx+456], r11
mov r11, QWORD PTR [rdx+472]
mov r8, QWORD PTR [rsp+472]
sbb r11, r8
mov QWORD PTR [rcx+464], r10
mov r10, QWORD PTR [rdx+480]
mov r8, QWORD PTR [rsp+480]
sbb r10, r8
mov QWORD PTR [rcx+472], r11
mov r11, QWORD PTR [rdx+488]
mov r8, QWORD PTR [rsp+488]
sbb r11, r8
mov QWORD PTR [rcx+480], r10
mov r10, QWORD PTR [rdx+496]
mov r8, QWORD PTR [rsp+496]
sbb r10, r8
mov QWORD PTR [rcx+488], r11
mov r11, QWORD PTR [rdx+504]
mov r8, QWORD PTR [rsp+504]
sbb r11, r8
mov QWORD PTR [rcx+496], r10
mov QWORD PTR [rcx+504], r11
sbb rax, rax
add rsp, 512
ret
sp_4096_cond_sub_64 ENDP
_text ENDS
; /* Reduce the number back to 4096 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_4096_mont_reduce_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 64
mov r10, 64
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_4096_mont_reduce_64_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+184], r14
adc r11, 0
; a[i+24] += m[24] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+192]
mov r14, QWORD PTR [rcx+192]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+192], r14
adc r12, 0
; a[i+25] += m[25] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+200]
mov r14, QWORD PTR [rcx+200]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+200], r14
adc r11, 0
; a[i+26] += m[26] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+208]
mov r14, QWORD PTR [rcx+208]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+208], r14
adc r12, 0
; a[i+27] += m[27] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+216]
mov r14, QWORD PTR [rcx+216]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+216], r14
adc r11, 0
; a[i+28] += m[28] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+224]
mov r14, QWORD PTR [rcx+224]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+224], r14
adc r12, 0
; a[i+29] += m[29] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+232]
mov r14, QWORD PTR [rcx+232]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+232], r14
adc r11, 0
; a[i+30] += m[30] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+240]
mov r14, QWORD PTR [rcx+240]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+240], r14
adc r12, 0
; a[i+31] += m[31] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+248]
mov r14, QWORD PTR [rcx+248]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+248], r14
adc r11, 0
; a[i+32] += m[32] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+256]
mov r14, QWORD PTR [rcx+256]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+256], r14
adc r12, 0
; a[i+33] += m[33] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+264]
mov r14, QWORD PTR [rcx+264]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+264], r14
adc r11, 0
; a[i+34] += m[34] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+272]
mov r14, QWORD PTR [rcx+272]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+272], r14
adc r12, 0
; a[i+35] += m[35] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+280]
mov r14, QWORD PTR [rcx+280]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+280], r14
adc r11, 0
; a[i+36] += m[36] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+288]
mov r14, QWORD PTR [rcx+288]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+288], r14
adc r12, 0
; a[i+37] += m[37] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+296]
mov r14, QWORD PTR [rcx+296]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+296], r14
adc r11, 0
; a[i+38] += m[38] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+304]
mov r14, QWORD PTR [rcx+304]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+304], r14
adc r12, 0
; a[i+39] += m[39] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+312]
mov r14, QWORD PTR [rcx+312]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+312], r14
adc r11, 0
; a[i+40] += m[40] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+320]
mov r14, QWORD PTR [rcx+320]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+320], r14
adc r12, 0
; a[i+41] += m[41] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+328]
mov r14, QWORD PTR [rcx+328]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+328], r14
adc r11, 0
; a[i+42] += m[42] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+336]
mov r14, QWORD PTR [rcx+336]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+336], r14
adc r12, 0
; a[i+43] += m[43] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+344]
mov r14, QWORD PTR [rcx+344]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+344], r14
adc r11, 0
; a[i+44] += m[44] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+352]
mov r14, QWORD PTR [rcx+352]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+352], r14
adc r12, 0
; a[i+45] += m[45] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+360]
mov r14, QWORD PTR [rcx+360]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+360], r14
adc r11, 0
; a[i+46] += m[46] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+368]
mov r14, QWORD PTR [rcx+368]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+368], r14
adc r12, 0
; a[i+47] += m[47] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+376]
mov r14, QWORD PTR [rcx+376]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+376], r14
adc r11, 0
; a[i+48] += m[48] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+384]
mov r14, QWORD PTR [rcx+384]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+384], r14
adc r12, 0
; a[i+49] += m[49] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+392]
mov r14, QWORD PTR [rcx+392]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+392], r14
adc r11, 0
; a[i+50] += m[50] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+400]
mov r14, QWORD PTR [rcx+400]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+400], r14
adc r12, 0
; a[i+51] += m[51] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+408]
mov r14, QWORD PTR [rcx+408]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+408], r14
adc r11, 0
; a[i+52] += m[52] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+416]
mov r14, QWORD PTR [rcx+416]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+416], r14
adc r12, 0
; a[i+53] += m[53] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+424]
mov r14, QWORD PTR [rcx+424]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+424], r14
adc r11, 0
; a[i+54] += m[54] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+432]
mov r14, QWORD PTR [rcx+432]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+432], r14
adc r12, 0
; a[i+55] += m[55] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+440]
mov r14, QWORD PTR [rcx+440]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+440], r14
adc r11, 0
; a[i+56] += m[56] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+448]
mov r14, QWORD PTR [rcx+448]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+448], r14
adc r12, 0
; a[i+57] += m[57] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+456]
mov r14, QWORD PTR [rcx+456]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+456], r14
adc r11, 0
; a[i+58] += m[58] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+464]
mov r14, QWORD PTR [rcx+464]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+464], r14
adc r12, 0
; a[i+59] += m[59] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+472]
mov r14, QWORD PTR [rcx+472]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+472], r14
adc r11, 0
; a[i+60] += m[60] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+480]
mov r14, QWORD PTR [rcx+480]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+480], r14
adc r12, 0
; a[i+61] += m[61] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+488]
mov r14, QWORD PTR [rcx+488]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+488], r14
adc r11, 0
; a[i+62] += m[62] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+496]
mov r14, QWORD PTR [rcx+496]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+496], r14
adc r12, 0
; a[i+63] += m[63] * mu
mov rax, r13
mul QWORD PTR [r9+504]
mov r14, QWORD PTR [rcx+504]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+504], r14
adc QWORD PTR [rcx+512], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_4096_mont_reduce_64_loop
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 512
call sp_4096_cond_sub_64
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mont_reduce_64 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sub_64 PROC
mov r9, QWORD PTR [rdx]
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
sbb r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
sbb r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
sbb r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
sbb r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
sbb r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
sbb r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
sbb r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
sbb r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
sbb r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
sbb r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
sbb r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
sbb r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
sbb r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
sbb r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
sbb r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
sbb r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
sbb r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
sbb r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
sbb r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
sbb r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
sbb r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
sbb r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
sbb r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
sbb r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
sbb r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
sbb r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
sbb r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
sbb r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
sbb r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
sbb r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
sbb r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
sbb r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
sbb r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
sbb r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
sbb r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
sbb r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
sbb r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
sbb r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
sbb r10, QWORD PTR [r8+376]
mov r9, QWORD PTR [rdx+384]
mov QWORD PTR [rcx+376], r10
sbb r9, QWORD PTR [r8+384]
mov r10, QWORD PTR [rdx+392]
mov QWORD PTR [rcx+384], r9
sbb r10, QWORD PTR [r8+392]
mov r9, QWORD PTR [rdx+400]
mov QWORD PTR [rcx+392], r10
sbb r9, QWORD PTR [r8+400]
mov r10, QWORD PTR [rdx+408]
mov QWORD PTR [rcx+400], r9
sbb r10, QWORD PTR [r8+408]
mov r9, QWORD PTR [rdx+416]
mov QWORD PTR [rcx+408], r10
sbb r9, QWORD PTR [r8+416]
mov r10, QWORD PTR [rdx+424]
mov QWORD PTR [rcx+416], r9
sbb r10, QWORD PTR [r8+424]
mov r9, QWORD PTR [rdx+432]
mov QWORD PTR [rcx+424], r10
sbb r9, QWORD PTR [r8+432]
mov r10, QWORD PTR [rdx+440]
mov QWORD PTR [rcx+432], r9
sbb r10, QWORD PTR [r8+440]
mov r9, QWORD PTR [rdx+448]
mov QWORD PTR [rcx+440], r10
sbb r9, QWORD PTR [r8+448]
mov r10, QWORD PTR [rdx+456]
mov QWORD PTR [rcx+448], r9
sbb r10, QWORD PTR [r8+456]
mov r9, QWORD PTR [rdx+464]
mov QWORD PTR [rcx+456], r10
sbb r9, QWORD PTR [r8+464]
mov r10, QWORD PTR [rdx+472]
mov QWORD PTR [rcx+464], r9
sbb r10, QWORD PTR [r8+472]
mov r9, QWORD PTR [rdx+480]
mov QWORD PTR [rcx+472], r10
sbb r9, QWORD PTR [r8+480]
mov r10, QWORD PTR [rdx+488]
mov QWORD PTR [rcx+480], r9
sbb r10, QWORD PTR [r8+488]
mov r9, QWORD PTR [rdx+496]
mov QWORD PTR [rcx+488], r10
sbb r9, QWORD PTR [r8+496]
mov r10, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+496], r9
sbb r10, QWORD PTR [r8+504]
mov QWORD PTR [rcx+504], r10
sbb rax, rax
ret
sp_4096_sub_64 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_d_avx2_64 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+184], r12
; A[24] * B
mulx r10, r9, QWORD PTR [rax+192]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+192], r11
; A[25] * B
mulx r10, r9, QWORD PTR [rax+200]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+200], r12
; A[26] * B
mulx r10, r9, QWORD PTR [rax+208]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+208], r11
; A[27] * B
mulx r10, r9, QWORD PTR [rax+216]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+216], r12
; A[28] * B
mulx r10, r9, QWORD PTR [rax+224]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+224], r11
; A[29] * B
mulx r10, r9, QWORD PTR [rax+232]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+232], r12
; A[30] * B
mulx r10, r9, QWORD PTR [rax+240]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+240], r11
; A[31] * B
mulx r10, r9, QWORD PTR [rax+248]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+248], r12
; A[32] * B
mulx r10, r9, QWORD PTR [rax+256]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+256], r11
; A[33] * B
mulx r10, r9, QWORD PTR [rax+264]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+264], r12
; A[34] * B
mulx r10, r9, QWORD PTR [rax+272]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+272], r11
; A[35] * B
mulx r10, r9, QWORD PTR [rax+280]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+280], r12
; A[36] * B
mulx r10, r9, QWORD PTR [rax+288]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+288], r11
; A[37] * B
mulx r10, r9, QWORD PTR [rax+296]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+296], r12
; A[38] * B
mulx r10, r9, QWORD PTR [rax+304]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+304], r11
; A[39] * B
mulx r10, r9, QWORD PTR [rax+312]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+312], r12
; A[40] * B
mulx r10, r9, QWORD PTR [rax+320]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+320], r11
; A[41] * B
mulx r10, r9, QWORD PTR [rax+328]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+328], r12
; A[42] * B
mulx r10, r9, QWORD PTR [rax+336]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+336], r11
; A[43] * B
mulx r10, r9, QWORD PTR [rax+344]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+344], r12
; A[44] * B
mulx r10, r9, QWORD PTR [rax+352]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+352], r11
; A[45] * B
mulx r10, r9, QWORD PTR [rax+360]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+360], r12
; A[46] * B
mulx r10, r9, QWORD PTR [rax+368]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+368], r11
; A[47] * B
mulx r10, r9, QWORD PTR [rax+376]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+376], r12
; A[48] * B
mulx r10, r9, QWORD PTR [rax+384]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+384], r11
; A[49] * B
mulx r10, r9, QWORD PTR [rax+392]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+392], r12
; A[50] * B
mulx r10, r9, QWORD PTR [rax+400]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+400], r11
; A[51] * B
mulx r10, r9, QWORD PTR [rax+408]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+408], r12
; A[52] * B
mulx r10, r9, QWORD PTR [rax+416]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+416], r11
; A[53] * B
mulx r10, r9, QWORD PTR [rax+424]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+424], r12
; A[54] * B
mulx r10, r9, QWORD PTR [rax+432]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+432], r11
; A[55] * B
mulx r10, r9, QWORD PTR [rax+440]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+440], r12
; A[56] * B
mulx r10, r9, QWORD PTR [rax+448]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+448], r11
; A[57] * B
mulx r10, r9, QWORD PTR [rax+456]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+456], r12
; A[58] * B
mulx r10, r9, QWORD PTR [rax+464]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+464], r11
; A[59] * B
mulx r10, r9, QWORD PTR [rax+472]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+472], r12
; A[60] * B
mulx r10, r9, QWORD PTR [rax+480]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+480], r11
; A[61] * B
mulx r10, r9, QWORD PTR [rax+488]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+488], r12
; A[62] * B
mulx r10, r9, QWORD PTR [rax+496]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+496], r11
; A[63] * B
mulx r10, r9, QWORD PTR [rax+504]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+504], r12
mov QWORD PTR [rcx+512], r11
pop r13
pop r12
ret
sp_4096_mul_d_avx2_64 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_4096_word_asm_64 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_4096_word_asm_64 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_sub_avx2_64 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
sbb r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
sbb r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
sbb r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
sbb r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
sbb r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
sbb r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
sbb r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
sbb r11, r12
mov r10, QWORD PTR [r8+256]
mov r12, QWORD PTR [rdx+256]
pext r10, r10, r9
mov QWORD PTR [rcx+248], r11
sbb r12, r10
mov r11, QWORD PTR [r8+264]
mov r10, QWORD PTR [rdx+264]
pext r11, r11, r9
mov QWORD PTR [rcx+256], r12
sbb r10, r11
mov r12, QWORD PTR [r8+272]
mov r11, QWORD PTR [rdx+272]
pext r12, r12, r9
mov QWORD PTR [rcx+264], r10
sbb r11, r12
mov r10, QWORD PTR [r8+280]
mov r12, QWORD PTR [rdx+280]
pext r10, r10, r9
mov QWORD PTR [rcx+272], r11
sbb r12, r10
mov r11, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+288]
pext r11, r11, r9
mov QWORD PTR [rcx+280], r12
sbb r10, r11
mov r12, QWORD PTR [r8+296]
mov r11, QWORD PTR [rdx+296]
pext r12, r12, r9
mov QWORD PTR [rcx+288], r10
sbb r11, r12
mov r10, QWORD PTR [r8+304]
mov r12, QWORD PTR [rdx+304]
pext r10, r10, r9
mov QWORD PTR [rcx+296], r11
sbb r12, r10
mov r11, QWORD PTR [r8+312]
mov r10, QWORD PTR [rdx+312]
pext r11, r11, r9
mov QWORD PTR [rcx+304], r12
sbb r10, r11
mov r12, QWORD PTR [r8+320]
mov r11, QWORD PTR [rdx+320]
pext r12, r12, r9
mov QWORD PTR [rcx+312], r10
sbb r11, r12
mov r10, QWORD PTR [r8+328]
mov r12, QWORD PTR [rdx+328]
pext r10, r10, r9
mov QWORD PTR [rcx+320], r11
sbb r12, r10
mov r11, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+336]
pext r11, r11, r9
mov QWORD PTR [rcx+328], r12
sbb r10, r11
mov r12, QWORD PTR [r8+344]
mov r11, QWORD PTR [rdx+344]
pext r12, r12, r9
mov QWORD PTR [rcx+336], r10
sbb r11, r12
mov r10, QWORD PTR [r8+352]
mov r12, QWORD PTR [rdx+352]
pext r10, r10, r9
mov QWORD PTR [rcx+344], r11
sbb r12, r10
mov r11, QWORD PTR [r8+360]
mov r10, QWORD PTR [rdx+360]
pext r11, r11, r9
mov QWORD PTR [rcx+352], r12
sbb r10, r11
mov r12, QWORD PTR [r8+368]
mov r11, QWORD PTR [rdx+368]
pext r12, r12, r9
mov QWORD PTR [rcx+360], r10
sbb r11, r12
mov r10, QWORD PTR [r8+376]
mov r12, QWORD PTR [rdx+376]
pext r10, r10, r9
mov QWORD PTR [rcx+368], r11
sbb r12, r10
mov r11, QWORD PTR [r8+384]
mov r10, QWORD PTR [rdx+384]
pext r11, r11, r9
mov QWORD PTR [rcx+376], r12
sbb r10, r11
mov r12, QWORD PTR [r8+392]
mov r11, QWORD PTR [rdx+392]
pext r12, r12, r9
mov QWORD PTR [rcx+384], r10
sbb r11, r12
mov r10, QWORD PTR [r8+400]
mov r12, QWORD PTR [rdx+400]
pext r10, r10, r9
mov QWORD PTR [rcx+392], r11
sbb r12, r10
mov r11, QWORD PTR [r8+408]
mov r10, QWORD PTR [rdx+408]
pext r11, r11, r9
mov QWORD PTR [rcx+400], r12
sbb r10, r11
mov r12, QWORD PTR [r8+416]
mov r11, QWORD PTR [rdx+416]
pext r12, r12, r9
mov QWORD PTR [rcx+408], r10
sbb r11, r12
mov r10, QWORD PTR [r8+424]
mov r12, QWORD PTR [rdx+424]
pext r10, r10, r9
mov QWORD PTR [rcx+416], r11
sbb r12, r10
mov r11, QWORD PTR [r8+432]
mov r10, QWORD PTR [rdx+432]
pext r11, r11, r9
mov QWORD PTR [rcx+424], r12
sbb r10, r11
mov r12, QWORD PTR [r8+440]
mov r11, QWORD PTR [rdx+440]
pext r12, r12, r9
mov QWORD PTR [rcx+432], r10
sbb r11, r12
mov r10, QWORD PTR [r8+448]
mov r12, QWORD PTR [rdx+448]
pext r10, r10, r9
mov QWORD PTR [rcx+440], r11
sbb r12, r10
mov r11, QWORD PTR [r8+456]
mov r10, QWORD PTR [rdx+456]
pext r11, r11, r9
mov QWORD PTR [rcx+448], r12
sbb r10, r11
mov r12, QWORD PTR [r8+464]
mov r11, QWORD PTR [rdx+464]
pext r12, r12, r9
mov QWORD PTR [rcx+456], r10
sbb r11, r12
mov r10, QWORD PTR [r8+472]
mov r12, QWORD PTR [rdx+472]
pext r10, r10, r9
mov QWORD PTR [rcx+464], r11
sbb r12, r10
mov r11, QWORD PTR [r8+480]
mov r10, QWORD PTR [rdx+480]
pext r11, r11, r9
mov QWORD PTR [rcx+472], r12
sbb r10, r11
mov r12, QWORD PTR [r8+488]
mov r11, QWORD PTR [rdx+488]
pext r12, r12, r9
mov QWORD PTR [rcx+480], r10
sbb r11, r12
mov r10, QWORD PTR [r8+496]
mov r12, QWORD PTR [rdx+496]
pext r10, r10, r9
mov QWORD PTR [rcx+488], r11
sbb r12, r10
mov r11, QWORD PTR [r8+504]
mov r10, QWORD PTR [rdx+504]
pext r11, r11, r9
mov QWORD PTR [rcx+496], r12
sbb r10, r11
mov QWORD PTR [rcx+504], r10
sbb rax, rax
pop r12
ret
sp_4096_cond_sub_avx2_64 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_4096_cmp_64 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+504]
mov r12, QWORD PTR [rdx+504]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+496]
mov r12, QWORD PTR [rdx+496]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+488]
mov r12, QWORD PTR [rdx+488]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+480]
mov r12, QWORD PTR [rdx+480]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+472]
mov r12, QWORD PTR [rdx+472]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+464]
mov r12, QWORD PTR [rdx+464]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+456]
mov r12, QWORD PTR [rdx+456]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+448]
mov r12, QWORD PTR [rdx+448]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+440]
mov r12, QWORD PTR [rdx+440]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+432]
mov r12, QWORD PTR [rdx+432]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+424]
mov r12, QWORD PTR [rdx+424]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+416]
mov r12, QWORD PTR [rdx+416]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+408]
mov r12, QWORD PTR [rdx+408]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+400]
mov r12, QWORD PTR [rdx+400]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+392]
mov r12, QWORD PTR [rdx+392]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+384]
mov r12, QWORD PTR [rdx+384]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+376]
mov r12, QWORD PTR [rdx+376]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+368]
mov r12, QWORD PTR [rdx+368]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+360]
mov r12, QWORD PTR [rdx+360]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+352]
mov r12, QWORD PTR [rdx+352]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+344]
mov r12, QWORD PTR [rdx+344]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+336]
mov r12, QWORD PTR [rdx+336]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+328]
mov r12, QWORD PTR [rdx+328]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+320]
mov r12, QWORD PTR [rdx+320]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+312]
mov r12, QWORD PTR [rdx+312]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+304]
mov r12, QWORD PTR [rdx+304]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+296]
mov r12, QWORD PTR [rdx+296]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+288]
mov r12, QWORD PTR [rdx+288]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+280]
mov r12, QWORD PTR [rdx+280]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+272]
mov r12, QWORD PTR [rdx+272]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+264]
mov r12, QWORD PTR [rdx+264]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+256]
mov r12, QWORD PTR [rdx+256]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+248]
mov r12, QWORD PTR [rdx+248]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+240]
mov r12, QWORD PTR [rdx+240]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+232]
mov r12, QWORD PTR [rdx+232]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+224]
mov r12, QWORD PTR [rdx+224]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+216]
mov r12, QWORD PTR [rdx+216]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+208]
mov r12, QWORD PTR [rdx+208]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+200]
mov r12, QWORD PTR [rdx+200]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+192]
mov r12, QWORD PTR [rdx+192]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_4096_cmp_64 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_4096_get_from_table_64 PROC
sub rsp, 128
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
pxor xmm13, xmm13
pshufd xmm11, xmm11, 0
pshufd xmm10, xmm10, 0
; START: 0-7
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 0-7
; START: 8-15
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 64
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 8-15
; START: 16-23
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 16-23
; START: 24-31
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 192
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 24-31
; START: 32-39
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 256
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 32-39
; START: 40-47
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 320
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 40-47
; START: 48-55
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 384
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
add rcx, 64
; END: 48-55
; START: 56-63
pxor xmm13, xmm13
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
pxor xmm7, xmm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 448
movdqu xmm12, xmm13
pcmpeqd xmm12, xmm10
movdqu xmm0, OWORD PTR [r9]
movdqu xmm1, OWORD PTR [r9+16]
movdqu xmm2, OWORD PTR [r9+32]
movdqu xmm3, OWORD PTR [r9+48]
pand xmm0, xmm12
pand xmm1, xmm12
pand xmm2, xmm12
pand xmm3, xmm12
por xmm4, xmm0
por xmm5, xmm1
por xmm6, xmm2
por xmm7, xmm3
paddd xmm13, xmm11
movdqu OWORD PTR [rcx], xmm4
movdqu OWORD PTR [rcx+16], xmm5
movdqu OWORD PTR [rcx+32], xmm6
movdqu OWORD PTR [rcx+48], xmm7
; END: 56-63
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_4096_get_from_table_64 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 4096 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_4096_mont_reduce_avx2_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 64
mov r11, 64
mov r14, QWORD PTR [r9]
mov r15, QWORD PTR [r9+8]
mov rdi, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r9+24]
add r9, 256
xor rbp, rbp
L_4096_mont_reduce_avx2_64_loop:
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-224]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-216]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-208]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-216], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-200]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-208], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-192]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-200], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-184]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-192], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-176]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-184], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-168]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-176], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+-160]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-168], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+-152]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-160], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+-144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-152], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+-136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-144], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+-128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-136], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+-120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-128], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+-112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-120], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+-104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-112], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+-96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-104], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+-88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-96], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+-80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-88], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+-72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-80], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9+-64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-72], r13
; a[i+24] += m[24] * mu
mulx rcx, rax, QWORD PTR [r10+192]
mov r13, QWORD PTR [r9+-56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-64], r12
; a[i+25] += m[25] * mu
mulx rcx, rax, QWORD PTR [r10+200]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+26] += m[26] * mu
mulx rcx, rax, QWORD PTR [r10+208]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+27] += m[27] * mu
mulx rcx, rax, QWORD PTR [r10+216]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+28] += m[28] * mu
mulx rcx, rax, QWORD PTR [r10+224]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+29] += m[29] * mu
mulx rcx, rax, QWORD PTR [r10+232]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+30] += m[30] * mu
mulx rcx, rax, QWORD PTR [r10+240]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+31] += m[31] * mu
mulx rcx, rax, QWORD PTR [r10+248]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+32] += m[32] * mu
mulx rcx, rax, QWORD PTR [r10+256]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+33] += m[33] * mu
mulx rcx, rax, QWORD PTR [r10+264]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+34] += m[34] * mu
mulx rcx, rax, QWORD PTR [r10+272]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+35] += m[35] * mu
mulx rcx, rax, QWORD PTR [r10+280]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+36] += m[36] * mu
mulx rcx, rax, QWORD PTR [r10+288]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+37] += m[37] * mu
mulx rcx, rax, QWORD PTR [r10+296]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+38] += m[38] * mu
mulx rcx, rax, QWORD PTR [r10+304]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+39] += m[39] * mu
mulx rcx, rax, QWORD PTR [r10+312]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+40] += m[40] * mu
mulx rcx, rax, QWORD PTR [r10+320]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+41] += m[41] * mu
mulx rcx, rax, QWORD PTR [r10+328]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+42] += m[42] * mu
mulx rcx, rax, QWORD PTR [r10+336]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+43] += m[43] * mu
mulx rcx, rax, QWORD PTR [r10+344]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
; a[i+44] += m[44] * mu
mulx rcx, rax, QWORD PTR [r10+352]
mov r13, QWORD PTR [r9+104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+96], r12
; a[i+45] += m[45] * mu
mulx rcx, rax, QWORD PTR [r10+360]
mov r12, QWORD PTR [r9+112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+104], r13
; a[i+46] += m[46] * mu
mulx rcx, rax, QWORD PTR [r10+368]
mov r13, QWORD PTR [r9+120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+112], r12
; a[i+47] += m[47] * mu
mulx rcx, rax, QWORD PTR [r10+376]
mov r12, QWORD PTR [r9+128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+120], r13
; a[i+48] += m[48] * mu
mulx rcx, rax, QWORD PTR [r10+384]
mov r13, QWORD PTR [r9+136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+128], r12
; a[i+49] += m[49] * mu
mulx rcx, rax, QWORD PTR [r10+392]
mov r12, QWORD PTR [r9+144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+136], r13
; a[i+50] += m[50] * mu
mulx rcx, rax, QWORD PTR [r10+400]
mov r13, QWORD PTR [r9+152]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+144], r12
; a[i+51] += m[51] * mu
mulx rcx, rax, QWORD PTR [r10+408]
mov r12, QWORD PTR [r9+160]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+152], r13
; a[i+52] += m[52] * mu
mulx rcx, rax, QWORD PTR [r10+416]
mov r13, QWORD PTR [r9+168]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+160], r12
; a[i+53] += m[53] * mu
mulx rcx, rax, QWORD PTR [r10+424]
mov r12, QWORD PTR [r9+176]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+168], r13
; a[i+54] += m[54] * mu
mulx rcx, rax, QWORD PTR [r10+432]
mov r13, QWORD PTR [r9+184]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+176], r12
; a[i+55] += m[55] * mu
mulx rcx, rax, QWORD PTR [r10+440]
mov r12, QWORD PTR [r9+192]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+184], r13
; a[i+56] += m[56] * mu
mulx rcx, rax, QWORD PTR [r10+448]
mov r13, QWORD PTR [r9+200]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+192], r12
; a[i+57] += m[57] * mu
mulx rcx, rax, QWORD PTR [r10+456]
mov r12, QWORD PTR [r9+208]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+200], r13
; a[i+58] += m[58] * mu
mulx rcx, rax, QWORD PTR [r10+464]
mov r13, QWORD PTR [r9+216]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+208], r12
; a[i+59] += m[59] * mu
mulx rcx, rax, QWORD PTR [r10+472]
mov r12, QWORD PTR [r9+224]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+216], r13
; a[i+60] += m[60] * mu
mulx rcx, rax, QWORD PTR [r10+480]
mov r13, QWORD PTR [r9+232]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+224], r12
; a[i+61] += m[61] * mu
mulx rcx, rax, QWORD PTR [r10+488]
mov r12, QWORD PTR [r9+240]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+232], r13
; a[i+62] += m[62] * mu
mulx rcx, rax, QWORD PTR [r10+496]
mov r13, QWORD PTR [r9+248]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+240], r12
; a[i+63] += m[63] * mu
mulx rcx, rax, QWORD PTR [r10+504]
mov r12, QWORD PTR [r9+256]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+248], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+256], r12
adox rbp, rbx
adcx rbp, rbx
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_4096_mont_reduce_avx2_64_loop
sub r9, 256
neg rbp
mov r8, r9
sub r9, 512
mov rcx, QWORD PTR [r10]
mov rdx, r14
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, r15
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rdi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rsi
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+192]
mov rdx, QWORD PTR [r8+192]
pext rax, rax, rbp
mov QWORD PTR [r9+184], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+200]
mov rax, QWORD PTR [r8+200]
pext rcx, rcx, rbp
mov QWORD PTR [r9+192], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+208]
mov rcx, QWORD PTR [r8+208]
pext rdx, rdx, rbp
mov QWORD PTR [r9+200], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+216]
mov rdx, QWORD PTR [r8+216]
pext rax, rax, rbp
mov QWORD PTR [r9+208], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+224]
mov rax, QWORD PTR [r8+224]
pext rcx, rcx, rbp
mov QWORD PTR [r9+216], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+232]
mov rcx, QWORD PTR [r8+232]
pext rdx, rdx, rbp
mov QWORD PTR [r9+224], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+240]
mov rdx, QWORD PTR [r8+240]
pext rax, rax, rbp
mov QWORD PTR [r9+232], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+248]
mov rax, QWORD PTR [r8+248]
pext rcx, rcx, rbp
mov QWORD PTR [r9+240], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+256]
mov rcx, QWORD PTR [r8+256]
pext rdx, rdx, rbp
mov QWORD PTR [r9+248], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+264]
mov rdx, QWORD PTR [r8+264]
pext rax, rax, rbp
mov QWORD PTR [r9+256], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+272]
mov rax, QWORD PTR [r8+272]
pext rcx, rcx, rbp
mov QWORD PTR [r9+264], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+280]
mov rcx, QWORD PTR [r8+280]
pext rdx, rdx, rbp
mov QWORD PTR [r9+272], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+288]
mov rdx, QWORD PTR [r8+288]
pext rax, rax, rbp
mov QWORD PTR [r9+280], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+296]
mov rax, QWORD PTR [r8+296]
pext rcx, rcx, rbp
mov QWORD PTR [r9+288], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+304]
mov rcx, QWORD PTR [r8+304]
pext rdx, rdx, rbp
mov QWORD PTR [r9+296], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+312]
mov rdx, QWORD PTR [r8+312]
pext rax, rax, rbp
mov QWORD PTR [r9+304], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+320]
mov rax, QWORD PTR [r8+320]
pext rcx, rcx, rbp
mov QWORD PTR [r9+312], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+328]
mov rcx, QWORD PTR [r8+328]
pext rdx, rdx, rbp
mov QWORD PTR [r9+320], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+336]
mov rdx, QWORD PTR [r8+336]
pext rax, rax, rbp
mov QWORD PTR [r9+328], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+344]
mov rax, QWORD PTR [r8+344]
pext rcx, rcx, rbp
mov QWORD PTR [r9+336], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+352]
mov rcx, QWORD PTR [r8+352]
pext rdx, rdx, rbp
mov QWORD PTR [r9+344], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+360]
mov rdx, QWORD PTR [r8+360]
pext rax, rax, rbp
mov QWORD PTR [r9+352], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+368]
mov rax, QWORD PTR [r8+368]
pext rcx, rcx, rbp
mov QWORD PTR [r9+360], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+376]
mov rcx, QWORD PTR [r8+376]
pext rdx, rdx, rbp
mov QWORD PTR [r9+368], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+384]
mov rdx, QWORD PTR [r8+384]
pext rax, rax, rbp
mov QWORD PTR [r9+376], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+392]
mov rax, QWORD PTR [r8+392]
pext rcx, rcx, rbp
mov QWORD PTR [r9+384], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+400]
mov rcx, QWORD PTR [r8+400]
pext rdx, rdx, rbp
mov QWORD PTR [r9+392], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+408]
mov rdx, QWORD PTR [r8+408]
pext rax, rax, rbp
mov QWORD PTR [r9+400], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+416]
mov rax, QWORD PTR [r8+416]
pext rcx, rcx, rbp
mov QWORD PTR [r9+408], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+424]
mov rcx, QWORD PTR [r8+424]
pext rdx, rdx, rbp
mov QWORD PTR [r9+416], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+432]
mov rdx, QWORD PTR [r8+432]
pext rax, rax, rbp
mov QWORD PTR [r9+424], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+440]
mov rax, QWORD PTR [r8+440]
pext rcx, rcx, rbp
mov QWORD PTR [r9+432], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+448]
mov rcx, QWORD PTR [r8+448]
pext rdx, rdx, rbp
mov QWORD PTR [r9+440], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+456]
mov rdx, QWORD PTR [r8+456]
pext rax, rax, rbp
mov QWORD PTR [r9+448], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+464]
mov rax, QWORD PTR [r8+464]
pext rcx, rcx, rbp
mov QWORD PTR [r9+456], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+472]
mov rcx, QWORD PTR [r8+472]
pext rdx, rdx, rbp
mov QWORD PTR [r9+464], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+480]
mov rdx, QWORD PTR [r8+480]
pext rax, rax, rbp
mov QWORD PTR [r9+472], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+488]
mov rax, QWORD PTR [r8+488]
pext rcx, rcx, rbp
mov QWORD PTR [r9+480], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+496]
mov rcx, QWORD PTR [r8+496]
pext rdx, rdx, rbp
mov QWORD PTR [r9+488], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+504]
mov rdx, QWORD PTR [r8+504]
pext rax, rax, rbp
mov QWORD PTR [r9+496], rcx
sbb rdx, rax
mov QWORD PTR [r9+504], rdx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mont_reduce_avx2_64 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_4096_get_from_table_avx2_64 PROC
sub rsp, 128
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
mov rax, 1
movd xmm10, r8
movd xmm11, rax
vpxor ymm13, ymm13, ymm13
vpermd ymm10, ymm13, ymm10
vpermd ymm11, ymm13, ymm11
; START: 0-15
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
add rcx, 128
; END: 0-15
; START: 16-31
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 128
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
add rcx, 128
; END: 16-31
; START: 32-47
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 256
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
add rcx, 128
; END: 32-47
; START: 48-63
vpxor ymm13, ymm13, ymm13
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
vpxor ymm6, ymm6, ymm6
vpxor ymm7, ymm7, ymm7
; ENTRY: 0
mov r9, QWORD PTR [rdx]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 1
mov r9, QWORD PTR [rdx+8]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 2
mov r9, QWORD PTR [rdx+16]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 3
mov r9, QWORD PTR [rdx+24]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 4
mov r9, QWORD PTR [rdx+32]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 5
mov r9, QWORD PTR [rdx+40]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 6
mov r9, QWORD PTR [rdx+48]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 7
mov r9, QWORD PTR [rdx+56]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 8
mov r9, QWORD PTR [rdx+64]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 9
mov r9, QWORD PTR [rdx+72]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 10
mov r9, QWORD PTR [rdx+80]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 11
mov r9, QWORD PTR [rdx+88]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 12
mov r9, QWORD PTR [rdx+96]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 13
mov r9, QWORD PTR [rdx+104]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 14
mov r9, QWORD PTR [rdx+112]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
; ENTRY: 15
mov r9, QWORD PTR [rdx+120]
add r9, 384
vpcmpeqd ymm12, ymm13, ymm10
vmovdqu ymm0, YMMWORD PTR [r9]
vmovdqu ymm1, YMMWORD PTR [r9+32]
vmovdqu ymm2, YMMWORD PTR [r9+64]
vmovdqu ymm3, YMMWORD PTR [r9+96]
vpand ymm0, ymm0, ymm12
vpand ymm1, ymm1, ymm12
vpand ymm2, ymm2, ymm12
vpand ymm3, ymm3, ymm12
vpor ymm4, ymm4, ymm0
vpor ymm5, ymm5, ymm1
vpor ymm6, ymm6, ymm2
vpor ymm7, ymm7, ymm3
vpaddd ymm13, ymm13, ymm11
vmovdqu YMMWORD PTR [rcx], ymm4
vmovdqu YMMWORD PTR [rcx+32], ymm5
vmovdqu YMMWORD PTR [rcx+64], ymm6
vmovdqu YMMWORD PTR [rcx+96], ymm7
; END: 48-63
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
add rsp, 128
ret
sp_4096_get_from_table_avx2_64 ENDP
_text ENDS
ENDIF
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_add_32 PROC
sub rsp, 256
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
add r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
adc r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
adc r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
adc r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
adc r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
adc r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
adc r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
adc r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
adc r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
adc r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
adc r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
adc r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
adc r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
adc r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
adc r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
adc r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
adc r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
adc r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
adc r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
adc r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
adc r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
adc r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
adc r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
adc r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
adc r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
adc r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
adc r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
adc r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
adc r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
adc r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
adc r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
adc r11, r8
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
adc rax, 0
add rsp, 256
ret
sp_4096_cond_add_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_add_avx2_32 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
add r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
adc r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
adc r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
adc r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
adc r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
adc r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
adc r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
adc r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
adc r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
adc r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
adc r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
adc r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
adc r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
adc r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
adc r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
adc r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
adc r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
adc r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
adc r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
adc r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
adc r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
adc r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
adc r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
adc r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
adc r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
adc r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
adc r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
adc r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
adc r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
adc r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
adc r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
adc r11, r12
mov QWORD PTR [rcx+248], r11
adc rax, 0
pop r12
ret
sp_4096_cond_add_avx2_32 ENDP
_text ENDS
ENDIF
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_4096_lshift_64 PROC
push r12
push r13
mov rax, rcx
mov cl, r8b
mov r12, 0
mov r13, QWORD PTR [rdx+472]
mov r8, QWORD PTR [rdx+480]
mov r9, QWORD PTR [rdx+488]
mov r10, QWORD PTR [rdx+496]
mov r11, QWORD PTR [rdx+504]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+480], r8
mov QWORD PTR [rax+488], r9
mov QWORD PTR [rax+496], r10
mov QWORD PTR [rax+504], r11
mov QWORD PTR [rax+512], r12
mov r11, QWORD PTR [rdx+440]
mov r8, QWORD PTR [rdx+448]
mov r9, QWORD PTR [rdx+456]
mov r10, QWORD PTR [rdx+464]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+448], r8
mov QWORD PTR [rax+456], r9
mov QWORD PTR [rax+464], r10
mov QWORD PTR [rax+472], r13
mov r13, QWORD PTR [rdx+408]
mov r8, QWORD PTR [rdx+416]
mov r9, QWORD PTR [rdx+424]
mov r10, QWORD PTR [rdx+432]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+416], r8
mov QWORD PTR [rax+424], r9
mov QWORD PTR [rax+432], r10
mov QWORD PTR [rax+440], r11
mov r11, QWORD PTR [rdx+376]
mov r8, QWORD PTR [rdx+384]
mov r9, QWORD PTR [rdx+392]
mov r10, QWORD PTR [rdx+400]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+384], r8
mov QWORD PTR [rax+392], r9
mov QWORD PTR [rax+400], r10
mov QWORD PTR [rax+408], r13
mov r13, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rdx+352]
mov r9, QWORD PTR [rdx+360]
mov r10, QWORD PTR [rdx+368]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+352], r8
mov QWORD PTR [rax+360], r9
mov QWORD PTR [rax+368], r10
mov QWORD PTR [rax+376], r11
mov r11, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rdx+320]
mov r9, QWORD PTR [rdx+328]
mov r10, QWORD PTR [rdx+336]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+320], r8
mov QWORD PTR [rax+328], r9
mov QWORD PTR [rax+336], r10
mov QWORD PTR [rax+344], r13
mov r13, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rdx+288]
mov r9, QWORD PTR [rdx+296]
mov r10, QWORD PTR [rdx+304]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+288], r8
mov QWORD PTR [rax+296], r9
mov QWORD PTR [rax+304], r10
mov QWORD PTR [rax+312], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rdx+256]
mov r9, QWORD PTR [rdx+264]
mov r10, QWORD PTR [rdx+272]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+256], r8
mov QWORD PTR [rax+264], r9
mov QWORD PTR [rax+272], r10
mov QWORD PTR [rax+280], r13
mov r13, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rdx+232]
mov r10, QWORD PTR [rdx+240]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+224], r8
mov QWORD PTR [rax+232], r9
mov QWORD PTR [rax+240], r10
mov QWORD PTR [rax+248], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rdx+200]
mov r10, QWORD PTR [rdx+208]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+192], r8
mov QWORD PTR [rax+200], r9
mov QWORD PTR [rax+208], r10
mov QWORD PTR [rax+216], r13
mov r13, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rdx+168]
mov r10, QWORD PTR [rdx+176]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+160], r8
mov QWORD PTR [rax+168], r9
mov QWORD PTR [rax+176], r10
mov QWORD PTR [rax+184], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rdx+136]
mov r10, QWORD PTR [rdx+144]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+128], r8
mov QWORD PTR [rax+136], r9
mov QWORD PTR [rax+144], r10
mov QWORD PTR [rax+152], r13
mov r13, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+96], r8
mov QWORD PTR [rax+104], r9
mov QWORD PTR [rax+112], r10
mov QWORD PTR [rax+120], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+64], r8
mov QWORD PTR [rax+72], r9
mov QWORD PTR [rax+80], r10
mov QWORD PTR [rax+88], r13
mov r13, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+32], r8
mov QWORD PTR [rax+40], r9
mov QWORD PTR [rax+48], r10
mov QWORD PTR [rax+56], r11
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shl r8, cl
mov QWORD PTR [rax], r8
mov QWORD PTR [rax+8], r9
mov QWORD PTR [rax+16], r10
mov QWORD PTR [rax+24], r13
pop r13
pop r12
ret
sp_4096_lshift_64 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WOLFSSL_SP_NO_256
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_mul_4 PROC
push r12
mov r9, rdx
sub rsp, 32
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+32], r11
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+40], r12
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
add rsp, 32
pop r12
ret
sp_256_mul_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_256_mul_avx2_4 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rbp, r8
mov rax, rdx
mov rdx, QWORD PTR [rax]
mov r14, QWORD PTR [rbp+8]
; A[0] * B[0]
mulx r9, r8, QWORD PTR [rbp]
xor rbx, rbx
; A[0] * B[1]
mulx r10, rdi, r14
adcx r9, rdi
; A[0] * B[2]
mulx r11, rdi, QWORD PTR [rbp+16]
adcx r10, rdi
; A[0] * B[3]
mulx r12, rdi, QWORD PTR [rbp+24]
adcx r11, rdi
mov rdx, QWORD PTR [rax+8]
adcx r12, rbx
; A[1] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r9, rdi
; A[1] * B[1]
mulx r15, rdi, r14
adox r10, rsi
adcx r10, rdi
; A[1] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r11, r15
adcx r11, rdi
; A[1] * B[3]
mulx r13, rdi, QWORD PTR [rbp+24]
adox r12, rsi
adcx r12, rdi
adox r13, rbx
mov rdx, QWORD PTR [rax+16]
adcx r13, rbx
; A[2] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r10, rdi
; A[2] * B[1]
mulx r15, rdi, r14
adox r11, rsi
adcx r11, rdi
; A[2] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r12, r15
adcx r12, rdi
; A[2] * B[3]
mulx r14, rdi, QWORD PTR [rbp+24]
adox r13, rsi
adcx r13, rdi
adox r14, rbx
mov rdx, QWORD PTR [rax+24]
adcx r14, rbx
; A[3] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r11, rdi
; A[3] * B[1]
mulx r15, rdi, QWORD PTR [rbp+8]
adox r12, rsi
adcx r12, rdi
; A[3] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r13, r15
adcx r13, rdi
; A[3] * B[3]
mulx r15, rdi, QWORD PTR [rbp+24]
adox r14, rsi
adcx r14, rdi
adox r15, rbx
adcx r15, rbx
mov QWORD PTR [rcx], r8
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
mov QWORD PTR [rcx+48], r14
mov QWORD PTR [rcx+56], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_256_mul_avx2_4 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_sqr_4 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 32
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+32], r10
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+40], r11
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r9, rax
adc r10, rdx
mov QWORD PTR [rcx+48], r9
mov QWORD PTR [rcx+56], r10
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
add rsp, 32
pop r14
pop r13
pop r12
ret
sp_256_sqr_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r Result of squaring.
; * a Number to square in Montgomery form.
; */
_text SEGMENT READONLY PARA
sp_256_sqr_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rdx
xor r8, r8
mov rdx, QWORD PTR [rax]
mov rsi, QWORD PTR [rax+8]
mov rbx, QWORD PTR [rax+16]
mov r15, QWORD PTR [rax+24]
; A[0] * A[1]
mulx r10, r9, rsi
; A[0] * A[2]
mulx r11, r8, rbx
adox r10, r8
; A[0] * A[3]
mulx r12, r8, r15
mov rdx, rsi
adox r11, r8
; A[1] * A[2]
mulx rdi, r8, rbx
mov rdx, r15
adcx r11, r8
; A[1] * A[3]
mulx r13, r8, rsi
mov r15, 0
adox r12, rdi
adcx r12, r8
; A[2] * A[3]
mulx r14, r8, rbx
adox r13, r15
adcx r13, r8
adox r14, r15
adcx r14, r15
; Double with Carry Flag
xor r15, r15
; A[0] * A[0]
mov rdx, QWORD PTR [rax]
mulx rdi, r8, rdx
adcx r9, r9
adcx r10, r10
adox r9, rdi
; A[1] * A[1]
mov rdx, QWORD PTR [rax+8]
mulx rbx, rsi, rdx
adcx r11, r11
adox r10, rsi
; A[2] * A[2]
mov rdx, QWORD PTR [rax+16]
mulx rsi, rdi, rdx
adcx r12, r12
adox r11, rbx
adcx r13, r13
adox r12, rdi
adcx r14, r14
; A[3] * A[3]
mov rdx, QWORD PTR [rax+24]
mulx rbx, rdi, rdx
adox r13, rsi
adcx r15, r15
adox r14, rdi
adox r15, rbx
mov QWORD PTR [rcx], r8
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
mov QWORD PTR [rcx+48], r14
mov QWORD PTR [rcx+56], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_sqr_avx2_4 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_add_4 PROC
push r12
xor rax, rax
mov r9, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
add r9, QWORD PTR [r8]
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
adc rax, 0
pop r12
ret
sp_256_add_4 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_sub_4 PROC
push r12
xor rax, rax
mov r9, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
sub r9, QWORD PTR [r8]
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
sbb rax, rax
pop r12
ret
sp_256_sub_4 ENDP
_text ENDS
; /* Conditionally copy a into r using the mask m.
; * m is -1 to copy and 0 when not.
; *
; * r A single precision number to copy over.
; * a A single precision number to copy.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_256_cond_copy_4 PROC
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
xor rax, QWORD PTR [rdx]
xor r9, QWORD PTR [rdx+8]
xor r10, QWORD PTR [rdx+16]
xor r11, QWORD PTR [rdx+24]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx], rax
xor QWORD PTR [rcx+8], r9
xor QWORD PTR [rcx+16], r10
xor QWORD PTR [rcx+24], r11
ret
sp_256_cond_copy_4 ENDP
_text ENDS
; /* Multiply two Montgomery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montgomery form.
; * b Second number to multiply in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_mul_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r10, rdx
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10]
mov r11, rax
mov r12, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10]
xor r13, r13
add r12, rax
adc r13, rdx
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10+8]
xor r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10]
add r13, rax
adc r14, rdx
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10+8]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10]
xor rdi, rdi
add r14, rax
adc r15, rdx
adc rdi, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10+8]
add r14, rax
adc r15, rdx
adc rdi, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10+16]
add r14, rax
adc r15, rdx
adc rdi, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10+24]
add r14, rax
adc r15, rdx
adc rdi, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10+8]
xor rsi, rsi
add r15, rax
adc rdi, rdx
adc rsi, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10+16]
add r15, rax
adc rdi, rdx
adc rsi, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10+24]
add r15, rax
adc rdi, rdx
adc rsi, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10+16]
xor rbx, rbx
add rdi, rax
adc rsi, rdx
adc rbx, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10+24]
add rdi, rax
adc rsi, rdx
adc rbx, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10+24]
add rsi, rax
adc rbx, rdx
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; a[0]-a[3] + (a[0] * 2) << 192
mov rax, r11
lea rdx, QWORD PTR [r14+2*r11]
mov r10, r12
mov r8, r13
mov r9, r13
; a[0]-a[2] << 32
shl r11, 32
shld r9, r10, 32
shld r12, rax, 32
; - a[0] << 32 << 192
sub rdx, r11
; + a[0]-a[2] << 32 << 64
add r10, r11
adc r8, r12
adc rdx, r9
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
xor r9, r9
; a += mu << 256
add r15, rax
adc rdi, r10
adc rsi, r8
adc rbx, rdx
sbb r11, r11
; a += mu << 192
add r14, rax
adc r15, r10
mov r12, r10
adc rdi, r8
adc rsi, rdx
adc rbx, 0
sbb r11, 0
; mu <<= 32
shld r9, rdx, 32
shld rdx, r8, 32
shld r8, r10, 32
shld r10, rax, 32
shl rax, 32
; a -= (mu << 32) << 192
sub r14, rax
sbb r15, r10
sbb rdi, r8
sbb rsi, rdx
sbb rbx, r9
adc r11, 0
; a += (mu << 32) << 64
sub r12, rax
adc r13, r10
adc r14, r8
adc r15, rdx
adc rdi, r9
adc rsi, 0
adc rbx, 0
sbb r11, 0
mov r10, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
; m[2] = 0 & mask = 0
mov eax, r11d
and r10, r11
sub r15, r11
sbb rdi, rax
mov QWORD PTR [rcx], r15
sbb rsi, 0
mov QWORD PTR [rcx+8], rdi
sbb rbx, r10
mov QWORD PTR [rcx+16], rsi
mov QWORD PTR [rcx+24], rbx
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_mul_4 ENDP
_text ENDS
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_sqr_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r8+8]
mov r11, rax
mov r12, rdx
; A[0] * A[2]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r8+16]
xor r13, r13
add r12, rax
adc r13, rdx
; A[0] * A[3]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r8+24]
xor r14, r14
add r13, rax
adc r14, rdx
; A[1] * A[2]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8+16]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8+24]
add r14, rax
adc r15, rdx
; A[2] * A[3]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+24]
xor rdi, rdi
add r15, rax
adc rdi, rdx
; Double
xor rsi, rsi
add r11, r11
adc r12, r12
adc r13, r13
adc r14, r14
adc r15, r15
adc rdi, rdi
adc rsi, 0
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
mov rax, rax
mov rdx, rdx
mov r10, rax
mov rbx, rdx
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
mov rax, rax
mov rdx, rdx
add r11, rbx
adc r12, rax
adc rdx, 0
mov rbx, rdx
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
mov rax, rax
mov rdx, rdx
add r13, rbx
adc r14, rax
adc rdx, 0
mov rbx, rdx
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
mov rax, rax
mov rdx, rdx
add r15, rbx
adc rdi, rax
adc rsi, rdx
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; a[0]-a[3] + (a[0] * 2) << 192
mov rax, r10
lea rdx, QWORD PTR [r13+2*r10]
mov r8, r11
mov rbx, r12
mov r9, r12
; a[0]-a[2] << 32
shl r10, 32
shld r9, r8, 32
shld r11, rax, 32
; - a[0] << 32 << 192
sub rdx, r10
; + a[0]-a[2] << 32 << 64
add r8, r10
adc rbx, r11
adc rdx, r9
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
xor r9, r9
; a += mu << 256
add r14, rax
adc r15, r8
adc rdi, rbx
adc rsi, rdx
sbb r10, r10
; a += mu << 192
add r13, rax
adc r14, r8
mov r11, r8
adc r15, rbx
adc rdi, rdx
adc rsi, 0
sbb r10, 0
; mu <<= 32
shld r9, rdx, 32
shld rdx, rbx, 32
shld rbx, r8, 32
shld r8, rax, 32
shl rax, 32
; a -= (mu << 32) << 192
sub r13, rax
sbb r14, r8
sbb r15, rbx
sbb rdi, rdx
sbb rsi, r9
adc r10, 0
; a += (mu << 32) << 64
sub r11, rax
adc r12, r8
adc r13, rbx
adc r14, rdx
adc r15, r9
adc rdi, 0
adc rsi, 0
sbb r10, 0
mov r8, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
; m[2] = 0 & mask = 0
mov eax, r10d
and r8, r10
sub r14, r10
sbb r15, rax
mov QWORD PTR [rcx], r14
sbb rdi, 0
mov QWORD PTR [rcx+8], r15
sbb rsi, r8
mov QWORD PTR [rcx+16], rdi
mov QWORD PTR [rcx+24], rsi
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_sqr_4 ENDP
_text ENDS
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_256_cmp_4 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_256_cmp_4 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_256_cond_sub_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r14, QWORD PTR [r8]
mov r15, QWORD PTR [r8+8]
mov rdi, QWORD PTR [r8+16]
mov rsi, QWORD PTR [r8+24]
and r14, r9
and r15, r9
and rdi, r9
and rsi, r9
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
mov r12, QWORD PTR [rdx+16]
mov r13, QWORD PTR [rdx+24]
sub r10, r14
sbb r11, r15
sbb r12, rdi
sbb r13, rsi
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
sbb rax, rax
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_cond_sub_4 ENDP
_text ENDS
; /* Reduce the number back to 256 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_256_mont_reduce_4 PROC
push rbx
push rsi
push r12
push r13
push r14
push r15
push rdi
mov r8, rcx
mov r9, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
mov r13, QWORD PTR [r8+32]
mov r14, QWORD PTR [r8+40]
mov r15, QWORD PTR [r8+48]
mov rdi, QWORD PTR [r8+56]
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; a[0]-a[3] + (a[0] * 2) << 192
mov rax, r9
lea rdx, QWORD PTR [r12+2*r9]
mov rbx, r10
mov rcx, r11
mov rsi, r11
; a[0]-a[2] << 32
shl r9, 32
shld rsi, rbx, 32
shld r10, rax, 32
; - a[0] << 32 << 192
sub rdx, r9
; + a[0]-a[2] << 32 << 64
add rbx, r9
adc rcx, r10
adc rdx, rsi
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
xor rsi, rsi
; a += mu << 256
add r13, rax
adc r14, rbx
adc r15, rcx
adc rdi, rdx
sbb r9, r9
; a += mu << 192
add r12, rax
adc r13, rbx
mov r10, rbx
adc r14, rcx
adc r15, rdx
adc rdi, 0
sbb r9, 0
; mu <<= 32
shld rsi, rdx, 32
shld rdx, rcx, 32
shld rcx, rbx, 32
shld rbx, rax, 32
shl rax, 32
; a -= (mu << 32) << 192
sub r12, rax
sbb r13, rbx
sbb r14, rcx
sbb r15, rdx
sbb rdi, rsi
adc r9, 0
; a += (mu << 32) << 64
sub r10, rax
adc r11, rbx
adc r12, rcx
adc r13, rdx
adc r14, rsi
adc r15, 0
adc rdi, 0
sbb r9, 0
mov rbx, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
; m[2] = 0 & mask = 0
mov eax, r9d
and rbx, r9
sub r13, r9
sbb r14, rax
mov QWORD PTR [r8], r13
sbb r15, 0
mov QWORD PTR [r8+8], r14
sbb rdi, rbx
mov QWORD PTR [r8+16], r15
mov QWORD PTR [r8+24], rdi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rbx
ret
sp_256_mont_reduce_4 ENDP
_text ENDS
; /* Reduce the number back to 256 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_256_mont_reduce_order_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
; i = 0
xor rdi, rdi
mov r10, 4
mov r15, rcx
L_mont_loop_4:
; mu = a[i] * mp
mov r14, QWORD PTR [r15]
imul r14, r8
; a[i+0] += m[0] * mu
mov rax, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
mul r14
mov rsi, QWORD PTR [r15]
add rsi, rax
mov r11, rdx
mov QWORD PTR [r15], rsi
adc r11, 0
; a[i+1] += m[1] * mu
mov rax, r12
mul r14
mov r12, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r15+8]
add rax, r11
mov r13, rdx
adc r13, 0
add rsi, rax
mov QWORD PTR [r15+8], rsi
adc r13, 0
; a[i+2] += m[2] * mu
mov rax, r12
mul r14
mov r12, QWORD PTR [r9+24]
mov rsi, QWORD PTR [r15+16]
add rax, r13
mov r11, rdx
adc r11, 0
add rsi, rax
mov QWORD PTR [r15+16], rsi
adc r11, 0
; a[i+3] += m[3] * mu
mov rax, r12
mul r14
mov rsi, QWORD PTR [r15+24]
add rax, r11
adc rdx, rdi
mov rdi, 0
adc rdi, 0
add rsi, rax
mov QWORD PTR [r15+24], rsi
adc QWORD PTR [r15+32], rdx
adc rdi, 0
; i += 1
add r15, 8
dec r10
jnz L_mont_loop_4
xor rax, rax
mov rdx, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
mov rsi, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
sub rax, rdi
mov r12, QWORD PTR [r9]
mov r13, QWORD PTR [r9+8]
mov r14, QWORD PTR [r9+16]
mov r15, QWORD PTR [r9+24]
and r12, rax
and r13, rax
and r14, rax
and r15, rax
sub rdx, r12
sbb r10, r13
sbb rsi, r14
sbb r11, r15
mov QWORD PTR [rcx], rdx
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], rsi
mov QWORD PTR [rcx+24], r11
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_reduce_order_4 ENDP
_text ENDS
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_add_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [r8]
adc r9, QWORD PTR [r8+8]
mov r13, 18446744069414584321
adc r10, QWORD PTR [r8+16]
adc r11, QWORD PTR [r8+24]
sbb rdx, rdx
mov r12d, edx
and r13, rdx
sub rax, rdx
sbb r9, r12
sbb r10, 0
sbb r11, r13
adc rdx, 0
and r12, rdx
and r13, rdx
sub rax, rdx
sbb r9, r12
mov QWORD PTR [rcx], rax
sbb r10, 0
mov QWORD PTR [rcx+8], r9
sbb r11, r13
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
pop r13
pop r12
ret
sp_256_mont_add_4 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of doubling.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_dbl_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
add rax, rax
adc r8, r8
mov r12, 18446744069414584321
adc r9, r9
mov r13, r10
adc r10, r10
sar r13, 63
mov r11d, r13d
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
mov QWORD PTR [rcx], rax
sbb r9, 0
mov QWORD PTR [rcx+8], r8
sbb r10, r12
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_dbl_4 ENDP
_text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of Tripling.
; * a Number to triple in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_tpl_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
add rax, rax
adc r8, r8
mov r12, 18446744069414584321
adc r9, r9
adc r10, r10
sbb r13, r13
mov r11d, r13d
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
add rax, QWORD PTR [rdx]
adc r8, QWORD PTR [rdx+8]
mov r12, 18446744069414584321
adc r9, QWORD PTR [rdx+16]
adc r10, QWORD PTR [rdx+24]
sbb r13, 0
mov r11d, r13d
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
mov QWORD PTR [rcx], rax
sbb r9, 0
mov QWORD PTR [rcx+8], r8
sbb r10, r12
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_tpl_4 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of subtration.
; * a Number to subtract from in Montgomery form.
; * b Number to subtract with in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_sub_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
sub rax, QWORD PTR [r8]
sbb r9, QWORD PTR [r8+8]
mov r13, 18446744069414584321
sbb r10, QWORD PTR [r8+16]
sbb r11, QWORD PTR [r8+24]
sbb rdx, rdx
mov r12d, edx
and r13, rdx
add rax, rdx
adc r9, r12
adc r10, 0
adc r11, r13
adc rdx, 0
and r12, rdx
and r13, rdx
add rax, rdx
adc r9, r12
mov QWORD PTR [rcx], rax
adc r10, 0
mov QWORD PTR [rcx+8], r9
adc r11, r13
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
pop r13
pop r12
ret
sp_256_mont_sub_4 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_div2_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r12, 18446744069414584321
mov r13, rax
and r13, 1
neg r13
mov r11d, r13d
and r12, r13
add rax, r13
adc r8, r11
adc r9, 0
adc r10, r12
mov r13, 0
adc r13, 0
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r13, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_div2_4 ENDP
_text ENDS
; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m).
; *
; * r Result of subtration.
; * a Number to subtract from in Montgomery form.
; * b Number to double and subtract with in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_rsb_sub_dbl_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [r8]
mov r13, QWORD PTR [r8+8]
mov r14, QWORD PTR [r8+16]
mov r15, QWORD PTR [r8+24]
add r12, r12
adc r13, r13
mov rsi, 18446744069414584321
adc r14, r14
adc r15, r15
sbb rdx, rdx
mov edi, edx
and rsi, rdx
sub r12, rdx
sbb r13, rdi
sbb r14, 0
sbb r15, rsi
adc rdx, 0
and rdi, rdx
and rsi, rdx
sub r12, rdx
sbb r13, rdi
sbb r14, 0
sbb r15, rsi
sub rax, r12
sbb r9, r13
mov rsi, 18446744069414584321
sbb r10, r14
sbb r11, r15
sbb rdx, 0
mov edi, edx
and rsi, rdx
add rax, rdx
adc r9, rdi
adc r10, 0
adc r11, rsi
adc rdx, 0
and rdi, rdx
and rsi, rdx
add rax, rdx
adc r9, rdi
mov QWORD PTR [rcx], rax
adc r10, 0
mov QWORD PTR [rcx+8], r9
adc r11, rsi
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov r12, QWORD PTR [r8]
mov r13, QWORD PTR [r8+8]
mov r14, QWORD PTR [r8+16]
mov r15, QWORD PTR [r8+24]
sub r12, rax
sbb r13, r9
mov rsi, 18446744069414584321
sbb r14, r10
sbb r15, r11
sbb rdx, rdx
mov edi, edx
and rsi, rdx
add r12, rdx
adc r13, rdi
adc r14, 0
adc r15, rsi
adc rdx, 0
and rdi, rdx
and rsi, rdx
add r12, rdx
adc r13, rdi
mov QWORD PTR [r8], r12
adc r14, 0
mov QWORD PTR [r8+8], r13
adc r15, rsi
mov QWORD PTR [r8+16], r14
mov QWORD PTR [r8+24], r15
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_rsb_sub_dbl_4 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_point_33_4 PROC
sub rsp, 160
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
movdqu OWORD PTR [rsp+128], xmm14
movdqu OWORD PTR [rsp+144], xmm15
mov rax, 1
movd xmm13, r8d
add rdx, 200
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_256_get_point_33_4_start_1:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, OWORD PTR [rdx]
movdqu xmm7, OWORD PTR [rdx+16]
movdqu xmm8, OWORD PTR [rdx+64]
movdqu xmm9, OWORD PTR [rdx+80]
movdqu xmm10, OWORD PTR [rdx+128]
movdqu xmm11, OWORD PTR [rdx+144]
add rdx, 200
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_256_get_point_33_4_start_1
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+64], xmm2
movdqu OWORD PTR [rcx+80], xmm3
movdqu OWORD PTR [rcx+128], xmm4
movdqu OWORD PTR [rcx+144], xmm5
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
movdqu xmm14, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
ret
sp_256_get_point_33_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_point_33_avx2_4 PROC
sub rsp, 64
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
mov rax, 1
movd xmm7, r8d
add rdx, 200
movd xmm9, eax
mov rax, 32
vpxor ymm8, ymm8, ymm8
vpermd ymm7, ymm8, ymm7
vpermd ymm9, ymm8, ymm9
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vpxor ymm2, ymm2, ymm2
vmovdqa ymm8, ymm9
L_256_get_point_33_avx2_4_start:
vpcmpeqd ymm6, ymm8, ymm7
vpaddd ymm8, ymm8, ymm9
vmovupd ymm3, YMMWORD PTR [rdx]
vmovupd ymm4, YMMWORD PTR [rdx+64]
vmovupd ymm5, YMMWORD PTR [rdx+128]
add rdx, 200
vpand ymm3, ymm3, ymm6
vpand ymm4, ymm4, ymm6
vpand ymm5, ymm5, ymm6
vpor ymm0, ymm0, ymm3
vpor ymm1, ymm1, ymm4
vpor ymm2, ymm2, ymm5
dec rax
jnz L_256_get_point_33_avx2_4_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+64], ymm1
vmovupd YMMWORD PTR [rcx+128], ymm2
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
add rsp, 64
ret
sp_256_get_point_33_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Multiply two Montgomery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montgomery form.
; * b Second number to multiply in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_mul_avx2_4 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rbp, r8
mov rax, rdx
mov rdx, QWORD PTR [rax]
mov r14, QWORD PTR [rbp+8]
; A[0] * B[0]
mulx r9, r8, QWORD PTR [rbp]
xor rbx, rbx
; A[0] * B[1]
mulx r10, rdi, r14
adcx r9, rdi
; A[0] * B[2]
mulx r11, rdi, QWORD PTR [rbp+16]
adcx r10, rdi
; A[0] * B[3]
mulx r12, rdi, QWORD PTR [rbp+24]
adcx r11, rdi
mov rdx, QWORD PTR [rax+8]
adcx r12, rbx
; A[1] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r9, rdi
; A[1] * B[1]
mulx r15, rdi, r14
adox r10, rsi
adcx r10, rdi
; A[1] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r11, r15
adcx r11, rdi
; A[1] * B[3]
mulx r13, rdi, QWORD PTR [rbp+24]
adox r12, rsi
adcx r12, rdi
adox r13, rbx
mov rdx, QWORD PTR [rax+16]
adcx r13, rbx
; A[2] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r10, rdi
; A[2] * B[1]
mulx r15, rdi, r14
adox r11, rsi
adcx r11, rdi
; A[2] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r12, r15
adcx r12, rdi
; A[2] * B[3]
mulx r14, rdi, QWORD PTR [rbp+24]
adox r13, rsi
adcx r13, rdi
adox r14, rbx
mov rdx, QWORD PTR [rax+24]
adcx r14, rbx
; A[3] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r11, rdi
; A[3] * B[1]
mulx r15, rdi, QWORD PTR [rbp+8]
adox r12, rsi
adcx r12, rdi
; A[3] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r13, r15
adcx r13, rdi
; A[3] * B[3]
mulx r15, rdi, QWORD PTR [rbp+24]
adox r14, rsi
adcx r14, rdi
adox r15, rbx
adcx r15, rbx
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; a[0]-a[3] + (a[0] * 2) << 192
mov rdi, r8
lea rdx, QWORD PTR [r11+2*r8]
mov rax, r9
mov rbp, r10
mov rsi, r10
; a[0]-a[2] << 32
shl r8, 32
shld rsi, rax, 32
shld r9, rdi, 32
; - a[0] << 32 << 192
sub rdx, r8
; + a[0]-a[2] << 32 << 64
add rax, r8
adc rbp, r9
adc rdx, rsi
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
xor rsi, rsi
; a += mu << 256
add r12, rdi
adc r13, rax
adc r14, rbp
adc r15, rdx
sbb r8, r8
; a += mu << 192
add r11, rdi
adc r12, rax
mov r9, rax
adc r13, rbp
adc r14, rdx
adc r15, 0
sbb r8, 0
; mu <<= 32
shld rsi, rdx, 32
shld rdx, rbp, 32
shld rbp, rax, 32
shld rax, rdi, 32
shl rdi, 32
; a -= (mu << 32) << 192
sub r11, rdi
sbb r12, rax
sbb r13, rbp
sbb r14, rdx
sbb r15, rsi
adc r8, 0
; a += (mu << 32) << 64
sub r9, rdi
adc r10, rax
adc r11, rbp
adc r12, rdx
adc r13, rsi
adc r14, 0
adc r15, 0
sbb r8, 0
mov rax, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
; m[2] = 0 & mask = 0
mov edi, r8d
and rax, r8
sub r12, r8
sbb r13, rdi
mov QWORD PTR [rcx], r12
sbb r14, 0
mov QWORD PTR [rcx+8], r13
sbb r15, rax
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_256_mont_mul_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_sqr_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rdx
xor r8, r8
mov rdx, QWORD PTR [rax]
mov rsi, QWORD PTR [rax+8]
mov rbx, QWORD PTR [rax+16]
mov r15, QWORD PTR [rax+24]
; A[0] * A[1]
mulx r10, r9, rsi
; A[0] * A[2]
mulx r11, r8, rbx
adox r10, r8
; A[0] * A[3]
mulx r12, r8, r15
mov rdx, rsi
adox r11, r8
; A[1] * A[2]
mulx rdi, r8, rbx
mov rdx, r15
adcx r11, r8
; A[1] * A[3]
mulx r13, r8, rsi
mov r15, 0
adox r12, rdi
adcx r12, r8
; A[2] * A[3]
mulx r14, r8, rbx
adox r13, r15
adcx r13, r8
adox r14, r15
adcx r14, r15
; Double with Carry Flag
xor r15, r15
; A[0] * A[0]
mov rdx, QWORD PTR [rax]
mulx rdi, r8, rdx
adcx r9, r9
adcx r10, r10
adox r9, rdi
; A[1] * A[1]
mov rdx, QWORD PTR [rax+8]
mulx rbx, rsi, rdx
adcx r11, r11
adox r10, rsi
; A[2] * A[2]
mov rdx, QWORD PTR [rax+16]
mulx rsi, rdi, rdx
adcx r12, r12
adox r11, rbx
adcx r13, r13
adox r12, rdi
adcx r14, r14
; A[3] * A[3]
mov rdx, QWORD PTR [rax+24]
mulx rbx, rdi, rdx
adox r13, rsi
adcx r15, r15
adox r14, rdi
adox r15, rbx
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; a[0]-a[3] + (a[0] * 2) << 192
mov rdi, r8
lea rdx, QWORD PTR [r11+2*r8]
mov rax, r9
mov rsi, r10
mov rbx, r10
; a[0]-a[2] << 32
shl r8, 32
shld rbx, rax, 32
shld r9, rdi, 32
; - a[0] << 32 << 192
sub rdx, r8
; + a[0]-a[2] << 32 << 64
add rax, r8
adc rsi, r9
adc rdx, rbx
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
xor rbx, rbx
; a += mu << 256
add r12, rdi
adc r13, rax
adc r14, rsi
adc r15, rdx
sbb r8, r8
; a += mu << 192
add r11, rdi
adc r12, rax
mov r9, rax
adc r13, rsi
adc r14, rdx
adc r15, 0
sbb r8, 0
; mu <<= 32
shld rbx, rdx, 32
shld rdx, rsi, 32
shld rsi, rax, 32
shld rax, rdi, 32
shl rdi, 32
; a -= (mu << 32) << 192
sub r11, rdi
sbb r12, rax
sbb r13, rsi
sbb r14, rdx
sbb r15, rbx
adc r8, 0
; a += (mu << 32) << 64
sub r9, rdi
adc r10, rax
adc r11, rsi
adc r12, rdx
adc r13, rbx
adc r14, 0
adc r15, 0
sbb r8, 0
mov rax, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
; m[2] = 0 & mask = 0
mov edi, r8d
and rax, r8
sub r12, r8
sbb r13, rdi
mov QWORD PTR [rcx], r12
sbb r14, 0
mov QWORD PTR [rcx+8], r13
sbb r15, rax
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_sqr_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_256_cond_sub_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r14, QWORD PTR [r8]
mov r15, QWORD PTR [r8+8]
mov rdi, QWORD PTR [r8+16]
mov rsi, QWORD PTR [r8+24]
and r14, r9
and r15, r9
and rdi, r9
and rsi, r9
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
mov r12, QWORD PTR [rdx+16]
mov r13, QWORD PTR [rdx+24]
sub r10, r14
sbb r11, r15
sbb r12, rdi
sbb r13, rsi
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
sbb rax, rax
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_cond_sub_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 256 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_256_mont_reduce_order_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rcx
mov r10, rdx
mov r11, r8
mov r14, QWORD PTR [rax]
mov r15, QWORD PTR [rax+8]
mov rdi, QWORD PTR [rax+16]
mov rsi, QWORD PTR [rax+24]
xor r13, r13
xor r12, r12
; a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp)
mov rbx, QWORD PTR [rax+32]
; mu = a[0] * mp
mov rdx, r14
mulx rcx, rdx, r11
; a[0] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx r14, r8
; a[1] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox r15, r9
adcx r15, r8
; a[2] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox rdi, rcx
adcx rdi, r8
; a[3] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox rsi, r9
adcx rsi, r8
; a[4] += carry
adox rbx, rcx
adcx rbx, r12
; carry
adox r13, r12
adcx r13, r12
; a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp)
mov r14, QWORD PTR [rax+40]
; mu = a[1] * mp
mov rdx, r15
mulx rcx, rdx, r11
; a[1] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx r15, r8
; a[2] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox rdi, r9
adcx rdi, r8
; a[3] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox rsi, rcx
adcx rsi, r8
; a[4] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox rbx, r9
adcx rbx, r8
; a[5] += carry
adox r14, rcx
adcx r14, r13
mov r13, r12
; carry
adox r13, r12
adcx r13, r12
; a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp)
mov r15, QWORD PTR [rax+48]
; mu = a[2] * mp
mov rdx, rdi
mulx rcx, rdx, r11
; a[2] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx rdi, r8
; a[3] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox rsi, r9
adcx rsi, r8
; a[4] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox rbx, rcx
adcx rbx, r8
; a[5] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox r14, r9
adcx r14, r8
; a[6] += carry
adox r15, rcx
adcx r15, r13
mov r13, r12
; carry
adox r13, r12
adcx r13, r12
; a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp)
mov rdi, QWORD PTR [rax+56]
; mu = a[3] * mp
mov rdx, rsi
mulx rcx, rdx, r11
; a[3] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx rsi, r8
; a[4] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox rbx, r9
adcx rbx, r8
; a[5] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox r14, rcx
adcx r14, r8
; a[6] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox r15, r9
adcx r15, r8
; a[7] += carry
adox rdi, rcx
adcx rdi, r13
mov r13, r12
; carry
adox r13, r12
adcx r13, r12
; Subtract mod if carry
neg r13
mov r8, 17562291160714782033
mov r9, 13611842547513532036
mov rdx, 18446744069414584320
and r8, r13
and r9, r13
and rdx, r13
sub rbx, r8
sbb r14, r9
sbb r15, r13
sbb rdi, rdx
mov QWORD PTR [rax], rbx
mov QWORD PTR [rax+8], r14
mov QWORD PTR [rax+16], r15
mov QWORD PTR [rax+24], rdi
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_reduce_order_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_div2_avx2_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r12, 18446744069414584321
mov r13, rax
and r13, 1
neg r13
mov r11d, r13d
and r12, r13
add rax, r13
adc r8, r11
adc r9, 0
adc r10, r12
mov r13, 0
adc r13, 0
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r13, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_div2_avx2_4 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_64_4 PROC
sub rsp, 96
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
; From entry 1
mov rax, 1
movd xmm9, r8d
add rdx, 64
movd xmm11, eax
mov rax, 63
pshufd xmm11, xmm11, 0
pshufd xmm9, xmm9, 0
pxor xmm10, xmm10
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
movdqa xmm10, xmm11
L_256_get_entry_64_4_start_0:
movdqa xmm8, xmm10
paddd xmm10, xmm11
pcmpeqd xmm8, xmm9
movdqu xmm4, OWORD PTR [rdx]
movdqu xmm5, OWORD PTR [rdx+16]
movdqu xmm6, OWORD PTR [rdx+32]
movdqu xmm7, OWORD PTR [rdx+48]
add rdx, 64
pand xmm4, xmm8
pand xmm5, xmm8
pand xmm6, xmm8
pand xmm7, xmm8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
dec rax
jnz L_256_get_entry_64_4_start_0
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+64], xmm2
movdqu OWORD PTR [rcx+80], xmm3
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
add rsp, 96
ret
sp_256_get_entry_64_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_64_avx2_4 PROC
sub rsp, 32
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
mov rax, 1
movd xmm5, r8d
add rdx, 64
movd xmm7, eax
mov rax, 64
vpxor ymm6, ymm6, ymm6
vpermd ymm5, ymm6, ymm5
vpermd ymm7, ymm6, ymm7
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vmovdqa ymm6, ymm7
L_256_get_entry_64_avx2_4_start:
vpcmpeqd ymm4, ymm6, ymm5
vpaddd ymm6, ymm6, ymm7
vmovupd ymm2, YMMWORD PTR [rdx]
vmovupd ymm3, YMMWORD PTR [rdx+32]
add rdx, 64
vpand ymm2, ymm2, ymm4
vpand ymm3, ymm3, ymm4
vpor ymm0, ymm0, ymm2
vpor ymm1, ymm1, ymm3
dec rax
jnz L_256_get_entry_64_avx2_4_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+64], ymm1
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
sp_256_get_entry_64_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_65_4 PROC
sub rsp, 96
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
; From entry 1
mov rax, 1
movd xmm9, r8d
add rdx, 64
movd xmm11, eax
mov rax, 64
pshufd xmm11, xmm11, 0
pshufd xmm9, xmm9, 0
pxor xmm10, xmm10
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
movdqa xmm10, xmm11
L_256_get_entry_65_4_start_0:
movdqa xmm8, xmm10
paddd xmm10, xmm11
pcmpeqd xmm8, xmm9
movdqu xmm4, OWORD PTR [rdx]
movdqu xmm5, OWORD PTR [rdx+16]
movdqu xmm6, OWORD PTR [rdx+32]
movdqu xmm7, OWORD PTR [rdx+48]
add rdx, 64
pand xmm4, xmm8
pand xmm5, xmm8
pand xmm6, xmm8
pand xmm7, xmm8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
dec rax
jnz L_256_get_entry_65_4_start_0
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+64], xmm2
movdqu OWORD PTR [rcx+80], xmm3
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
add rsp, 96
ret
sp_256_get_entry_65_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_65_avx2_4 PROC
sub rsp, 32
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
mov rax, 1
movd xmm5, r8d
add rdx, 64
movd xmm7, eax
mov rax, 65
vpxor ymm6, ymm6, ymm6
vpermd ymm5, ymm6, ymm5
vpermd ymm7, ymm6, ymm7
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vmovdqa ymm6, ymm7
L_256_get_entry_65_avx2_4_start:
vpcmpeqd ymm4, ymm6, ymm5
vpaddd ymm6, ymm6, ymm7
vmovupd ymm2, YMMWORD PTR [rdx]
vmovupd ymm3, YMMWORD PTR [rdx+32]
add rdx, 64
vpand ymm2, ymm2, ymm4
vpand ymm3, ymm3, ymm4
vpor ymm0, ymm0, ymm2
vpor ymm1, ymm1, ymm3
dec rax
jnz L_256_get_entry_65_avx2_4_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+64], ymm1
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
sp_256_get_entry_65_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
; /* Add 1 to a. (a = a + 1)
; *
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_add_one_4 PROC
add QWORD PTR [rcx], 1
adc QWORD PTR [rcx+8], 0
adc QWORD PTR [rcx+16], 0
adc QWORD PTR [rcx+24], 0
ret
sp_256_add_one_4 ENDP
_text ENDS
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_256_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 32
xor r13, r13
jmp L_256_from_bin_bswap_64_end
L_256_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_256_from_bin_bswap_64_end:
cmp r9, 63
jg L_256_from_bin_bswap_64_start
jmp L_256_from_bin_bswap_8_end
L_256_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_256_from_bin_bswap_8_end:
cmp r9, 7
jg L_256_from_bin_bswap_8_start
cmp r9, r13
je L_256_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_256_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_256_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_256_from_bin_bswap_hi_end:
cmp rcx, r12
jge L_256_from_bin_bswap_zero_end
L_256_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_256_from_bin_bswap_zero_start
L_256_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_256_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_256_from_bin_movbe PROC
push r12
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 32
jmp L_256_from_bin_movbe_64_end
L_256_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_256_from_bin_movbe_64_end:
cmp r9, 63
jg L_256_from_bin_movbe_64_start
jmp L_256_from_bin_movbe_8_end
L_256_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_256_from_bin_movbe_8_end:
cmp r9, 7
jg L_256_from_bin_movbe_8_start
cmp r9, 0
je L_256_from_bin_movbe_hi_end
mov r10, 0
mov rax, 0
L_256_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_256_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_256_from_bin_movbe_hi_end:
cmp rcx, r12
jge L_256_from_bin_movbe_zero_end
L_256_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], 0
add rcx, 8
cmp rcx, r12
jl L_256_from_bin_movbe_zero_start
L_256_from_bin_movbe_zero_end:
pop r12
ret
sp_256_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 32
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_256_to_bin_bswap_4 PROC
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
ret
sp_256_to_bin_bswap_4 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 32
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_256_to_bin_movbe_4 PROC
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
ret
sp_256_to_bin_movbe_4 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_sub_in_place_4 PROC
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
sub QWORD PTR [rcx], r8
sbb QWORD PTR [rcx+8], r9
sbb QWORD PTR [rcx+16], r10
sbb QWORD PTR [rcx+24], r11
sbb rax, rax
ret
sp_256_sub_in_place_4 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_256_mul_d_4 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
pop r12
ret
sp_256_mul_d_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_256_mul_d_avx2_4 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+24], r12
mov QWORD PTR [rcx+32], r11
pop r13
pop r12
ret
sp_256_mul_d_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_256_word_asm_4 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_256_word_asm_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Multiply two Montgomery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montgomery form.
; * b Second number to multiply in Montgomery form.
; */
_text SEGMENT READONLY PARA
sp_256_mont_mul_order_avx2_4 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rbp, r8
mov rax, rdx
mov rdx, QWORD PTR [rax]
mov r14, QWORD PTR [rbp+8]
; A[0] * B[0]
mulx r9, r8, QWORD PTR [rbp]
xor rbx, rbx
; A[0] * B[1]
mulx r10, rdi, r14
adcx r9, rdi
; A[0] * B[2]
mulx r11, rdi, QWORD PTR [rbp+16]
adcx r10, rdi
; A[0] * B[3]
mulx r12, rdi, QWORD PTR [rbp+24]
adcx r11, rdi
mov rdx, QWORD PTR [rax+8]
adcx r12, rbx
; A[1] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r9, rdi
; A[1] * B[1]
mulx r15, rdi, r14
adox r10, rsi
adcx r10, rdi
; A[1] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r11, r15
adcx r11, rdi
; A[1] * B[3]
mulx r13, rdi, QWORD PTR [rbp+24]
adox r12, rsi
adcx r12, rdi
adox r13, rbx
mov rdx, QWORD PTR [rax+16]
adcx r13, rbx
; A[2] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r10, rdi
; A[2] * B[1]
mulx r15, rdi, r14
adox r11, rsi
adcx r11, rdi
; A[2] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r12, r15
adcx r12, rdi
; A[2] * B[3]
mulx r14, rdi, QWORD PTR [rbp+24]
adox r13, rsi
adcx r13, rdi
adox r14, rbx
mov rdx, QWORD PTR [rax+24]
adcx r14, rbx
; A[3] * B[0]
mulx rsi, rdi, QWORD PTR [rbp]
xor rbx, rbx
adcx r11, rdi
; A[3] * B[1]
mulx r15, rdi, QWORD PTR [rbp+8]
adox r12, rsi
adcx r12, rdi
; A[3] * B[2]
mulx rsi, rdi, QWORD PTR [rbp+16]
adox r13, r15
adcx r13, rdi
; A[3] * B[3]
mulx r15, rdi, QWORD PTR [rbp+24]
adox r14, rsi
adcx r14, rdi
adox r15, rbx
adcx r15, rbx
; Start Reduction
mov rbx, 14758798090332847183
; A[0]
mov rdx, rbx
imul rdx, r8
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r8, rsi
adox r9, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r9, rsi
adox r10, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
adcx r11, rsi
adox r12, rax
adcx r12, rbp
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[1]
mov rdx, rbx
imul rdx, r9
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r9, rsi
adox r10, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
adcx r12, rsi
adox r13, rax
adcx r13, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[2]
mov rdx, rbx
imul rdx, r10
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r12, rsi
adox r13, rax
mulx rax, rsi, rdi
adcx r13, rsi
adox r14, rax
adcx r14, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[3]
mov rdx, rbx
imul rdx, r11
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r12, rsi
adox r13, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r13, rsi
adox r14, rax
mulx rax, rsi, rdi
adcx r14, rsi
adox r15, rax
adcx r15, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
neg r8
mov rdi, 17562291160714782033
mov rbx, 13611842547513532036
and rdi, r8
mov rbp, 18446744069414584320
and rbx, r8
and rbp, r8
sub r12, rdi
sbb r13, rbx
mov QWORD PTR [rcx], r12
sbb r14, r8
mov QWORD PTR [rcx+8], r13
sbb r15, rbp
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_256_mont_mul_order_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montgomery form.
; */
_text SEGMENT READONLY PARA
sp_256_mont_sqr_order_avx2_4 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rdx
xor r8, r8
mov rdx, QWORD PTR [rax]
mov rsi, QWORD PTR [rax+8]
mov rbx, QWORD PTR [rax+16]
mov r15, QWORD PTR [rax+24]
; A[0] * A[1]
mulx r10, r9, rsi
; A[0] * A[2]
mulx r11, r8, rbx
adox r10, r8
; A[0] * A[3]
mulx r12, r8, r15
mov rdx, rsi
adox r11, r8
; A[1] * A[2]
mulx rdi, r8, rbx
mov rdx, r15
adcx r11, r8
; A[1] * A[3]
mulx r13, r8, rsi
mov r15, 0
adox r12, rdi
adcx r12, r8
; A[2] * A[3]
mulx r14, r8, rbx
adox r13, r15
adcx r13, r8
adox r14, r15
adcx r14, r15
; Double with Carry Flag
xor r15, r15
; A[0] * A[0]
mov rdx, QWORD PTR [rax]
mulx rdi, r8, rdx
adcx r9, r9
adcx r10, r10
adox r9, rdi
; A[1] * A[1]
mov rdx, QWORD PTR [rax+8]
mulx rbx, rsi, rdx
adcx r11, r11
adox r10, rsi
; A[2] * A[2]
mov rdx, QWORD PTR [rax+16]
mulx rsi, rdi, rdx
adcx r12, r12
adox r11, rbx
adcx r13, r13
adox r12, rdi
adcx r14, r14
; A[3] * A[3]
mov rdx, QWORD PTR [rax+24]
mulx rbx, rdi, rdx
adox r13, rsi
adcx r15, r15
adox r14, rdi
adox r15, rbx
; Start Reduction
mov rbx, 14758798090332847183
; A[0]
mov rdx, rbx
imul rdx, r8
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r8, rsi
adox r9, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r9, rsi
adox r10, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
adcx r11, rsi
adox r12, rax
adcx r12, rbp
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[1]
mov rdx, rbx
imul rdx, r9
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r9, rsi
adox r10, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
adcx r12, rsi
adox r13, rax
adcx r13, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[2]
mov rdx, rbx
imul rdx, r10
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r12, rsi
adox r13, rax
mulx rax, rsi, rdi
adcx r13, rsi
adox r14, rax
adcx r14, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[3]
mov rdx, rbx
imul rdx, r11
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r12, rsi
adox r13, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r13, rsi
adox r14, rax
mulx rax, rsi, rdi
adcx r14, rsi
adox r15, rax
adcx r15, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
neg r8
mov rdi, 17562291160714782033
mov rbx, 13611842547513532036
and rdi, r8
mov rbp, 18446744069414584320
and rbx, r8
and rbp, r8
sub r12, rdi
sbb r13, rbx
mov QWORD PTR [rcx], r12
sbb r14, r8
mov QWORD PTR [rcx+8], r13
sbb r15, rbp
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_256_mont_sqr_order_avx2_4 ENDP
_text ENDS
ENDIF
; /* Non-constant time modular inversion.
; *
; * @param [out] r Resulting number.
; * @param [in] a Number to invert.
; * @param [in] m Modulus.
; * @return MP_OKAY on success.
; */
_text SEGMENT READONLY PARA
sp_256_mod_inv_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 513
mov r9, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
mov r13, QWORD PTR [rdx]
mov r14, QWORD PTR [rdx+8]
mov r15, QWORD PTR [rdx+16]
mov rdi, QWORD PTR [rdx+24]
mov rsi, 0
test r13b, 1
jnz L_256_mod_inv_4_v_even_end
L_256_mod_inv_4_v_even_start:
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shr rdi, 1
mov BYTE PTR [rsp+rsi], 1
inc rsi
test r13b, 1
jz L_256_mod_inv_4_v_even_start
L_256_mod_inv_4_v_even_end:
L_256_mod_inv_4_uv_start:
cmp r12, rdi
jb L_256_mod_inv_4_uv_v
ja L_256_mod_inv_4_uv_u
cmp r11, r15
jb L_256_mod_inv_4_uv_v
ja L_256_mod_inv_4_uv_u
cmp r10, r14
jb L_256_mod_inv_4_uv_v
ja L_256_mod_inv_4_uv_u
cmp r9, r13
jb L_256_mod_inv_4_uv_v
L_256_mod_inv_4_uv_u:
mov BYTE PTR [rsp+rsi], 2
inc rsi
sub r9, r13
sbb r10, r14
sbb r11, r15
sbb r12, rdi
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shr r12, 1
test r9b, 1
jnz L_256_mod_inv_4_usubv_even_end
L_256_mod_inv_4_usubv_even_start:
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shr r12, 1
mov BYTE PTR [rsp+rsi], 0
inc rsi
test r9b, 1
jz L_256_mod_inv_4_usubv_even_start
L_256_mod_inv_4_usubv_even_end:
cmp r9, 1
jne L_256_mod_inv_4_uv_start
mov rdx, r10
or rdx, r11
jne L_256_mod_inv_4_uv_start
or rdx, r12
jne L_256_mod_inv_4_uv_start
mov al, 1
jmp L_256_mod_inv_4_uv_end
L_256_mod_inv_4_uv_v:
mov BYTE PTR [rsp+rsi], 3
inc rsi
sub r13, r9
sbb r14, r10
sbb r15, r11
sbb rdi, r12
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shr rdi, 1
test r13b, 1
jnz L_256_mod_inv_4_vsubu_even_end
L_256_mod_inv_4_vsubu_even_start:
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shr rdi, 1
mov BYTE PTR [rsp+rsi], 1
inc rsi
test r13b, 1
jz L_256_mod_inv_4_vsubu_even_start
L_256_mod_inv_4_vsubu_even_end:
cmp r13, 1
jne L_256_mod_inv_4_uv_start
mov rdx, r14
or rdx, r15
jne L_256_mod_inv_4_uv_start
or rdx, rdi
jne L_256_mod_inv_4_uv_start
mov al, 0
L_256_mod_inv_4_uv_end:
mov r9, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
mov r13, 1
xor r14, r14
xor r15, r15
xor rdi, rdi
mov BYTE PTR [rsp+rsi], 7
mov dl, BYTE PTR [rsp]
mov rsi, 1
cmp dl, 1
je L_256_mod_inv_4_op_div2_d
jl L_256_mod_inv_4_op_div2_b
cmp dl, 3
je L_256_mod_inv_4_op_d_sub_b
jl L_256_mod_inv_4_op_b_sub_d
jmp L_256_mod_inv_4_op_end
L_256_mod_inv_4_op_b_sub_d:
sub r9, r13
sbb r10, r14
sbb r11, r15
sbb r12, rdi
jnc L_256_mod_inv_4_op_div2_b
add r9, QWORD PTR [r8]
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
L_256_mod_inv_4_op_div2_b:
test r9b, 1
mov rdx, 0
jz L_256_mod_inv_4_op_div2_b_mod
add r9, QWORD PTR [r8]
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
adc rdx, 0
L_256_mod_inv_4_op_div2_b_mod:
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shrd r12, rdx, 1
mov dl, BYTE PTR [rsp+rsi]
inc rsi
cmp dl, 1
je L_256_mod_inv_4_op_div2_d
jl L_256_mod_inv_4_op_div2_b
cmp dl, 3
je L_256_mod_inv_4_op_d_sub_b
jl L_256_mod_inv_4_op_b_sub_d
jmp L_256_mod_inv_4_op_end
L_256_mod_inv_4_op_d_sub_b:
sub r13, r9
sbb r14, r10
sbb r15, r11
sbb rdi, r12
jnc L_256_mod_inv_4_op_div2_d
add r13, QWORD PTR [r8]
adc r14, QWORD PTR [r8+8]
adc r15, QWORD PTR [r8+16]
adc rdi, QWORD PTR [r8+24]
L_256_mod_inv_4_op_div2_d:
test r13b, 1
mov rdx, 0
jz L_256_mod_inv_4_op_div2_d_mod
add r13, QWORD PTR [r8]
adc r14, QWORD PTR [r8+8]
adc r15, QWORD PTR [r8+16]
adc rdi, QWORD PTR [r8+24]
adc rdx, 0
L_256_mod_inv_4_op_div2_d_mod:
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shrd rdi, rdx, 1
mov dl, BYTE PTR [rsp+rsi]
inc rsi
cmp dl, 1
je L_256_mod_inv_4_op_div2_d
jl L_256_mod_inv_4_op_div2_b
cmp dl, 3
je L_256_mod_inv_4_op_d_sub_b
jl L_256_mod_inv_4_op_b_sub_d
L_256_mod_inv_4_op_end:
cmp al, 1
jne L_256_mod_inv_4_store_d
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
jmp L_256_mod_inv_4_store_end
L_256_mod_inv_4_store_d:
mov QWORD PTR [rcx], r13
mov QWORD PTR [rcx+8], r14
mov QWORD PTR [rcx+16], r15
mov QWORD PTR [rcx+24], rdi
L_256_mod_inv_4_store_end:
add rsp, 513
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mod_inv_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_order DWORD 6497617,32001851,62711546,67108863,67043328,0,0,0,41070783,45522014,67108863,1023,4194303,0,0,0
ptr_L_sp256_mod_inv_avx2_4_order QWORD L_sp256_mod_inv_avx2_4_order
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_one QWORD 1, 0,
0, 0
ptr_L_sp256_mod_inv_avx2_4_one QWORD L_sp256_mod_inv_avx2_4_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_all_one DWORD 1,1,1,1,1,1,1,1
ptr_L_sp256_mod_inv_avx2_4_all_one QWORD L_sp256_mod_inv_avx2_4_all_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_mask01111 DWORD 0,1,1,1,1,0,0,0
ptr_L_sp256_mod_inv_avx2_4_mask01111 QWORD L_sp256_mod_inv_avx2_4_mask01111
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_down_one_dword DWORD 1,2,3,4,5,6,7,7
ptr_L_sp256_mod_inv_avx2_4_down_one_dword QWORD L_sp256_mod_inv_avx2_4_down_one_dword
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_neg DWORD 0,0,0,0,2147483648,0,0,0
ptr_L_sp256_mod_inv_avx2_4_neg QWORD L_sp256_mod_inv_avx2_4_neg
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_up_one_dword DWORD 7,0,1,2,3,7,7,7
ptr_L_sp256_mod_inv_avx2_4_up_one_dword QWORD L_sp256_mod_inv_avx2_4_up_one_dword
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_mask26 DWORD 67108863,67108863,67108863,67108863,67108863,0,0,0
ptr_L_sp256_mod_inv_avx2_4_mask26 QWORD L_sp256_mod_inv_avx2_4_mask26
_DATA ENDS
; /* Non-constant time modular inversion.
; *
; * @param [out] r Resulting number.
; * @param [in] a Number to invert.
; * @param [in] m Modulus.
; * @return MP_OKAY on success.
; */
_text SEGMENT READONLY PARA
sp_256_mod_inv_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
sub rsp, 144
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
vmovdqu OWORD PTR [rsp+128], xmm14
mov rax, QWORD PTR [r8]
mov r9, QWORD PTR [r8+8]
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov r12, QWORD PTR [rdx]
mov r13, QWORD PTR [rdx+8]
mov r14, QWORD PTR [rdx+16]
mov r15, QWORD PTR [rdx+24]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_order]
vmovupd ymm6, YMMWORD PTR [rbx]
vmovupd ymm7, YMMWORD PTR [rbx+32]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_one]
vmovupd ymm8, YMMWORD PTR [rbx]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_mask01111]
vmovupd ymm9, YMMWORD PTR [rbx]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_all_one]
vmovupd ymm10, YMMWORD PTR [rbx]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_down_one_dword]
vmovupd ymm11, YMMWORD PTR [rbx]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_neg]
vmovupd ymm12, YMMWORD PTR [rbx]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_up_one_dword]
vmovupd ymm13, YMMWORD PTR [rbx]
mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_mask26]
vmovupd ymm14, YMMWORD PTR [rbx]
vpxor xmm0, xmm0, xmm0
vpxor xmm1, xmm1, xmm1
vmovdqu ymm2, ymm8
vpxor xmm3, xmm3, xmm3
test r12b, 1
jnz L_256_mod_inv_avx2_4_v_even_end
L_256_mod_inv_avx2_4_v_even_start:
shrd r12, r13, 1
shrd r13, r14, 1
shrd r14, r15, 1
shr r15, 1
vptest ymm2, ymm8
jz L_256_mod_inv_avx2_4_v_even_shr1
vpaddd ymm2, ymm2, ymm6
vpaddd ymm3, ymm3, ymm7
L_256_mod_inv_avx2_4_v_even_shr1:
vpand ymm4, ymm2, ymm9
vpand ymm5, ymm3, ymm10
vpermd ymm4, ymm11, ymm4
vpsrad ymm2, ymm2, 1
vpsrad ymm3, ymm3, 1
vpslld ymm5, ymm5, 25
vpslld xmm4, xmm4, 25
vpaddd ymm2, ymm2, ymm5
vpaddd ymm3, ymm3, ymm4
test r12b, 1
jz L_256_mod_inv_avx2_4_v_even_start
L_256_mod_inv_avx2_4_v_even_end:
L_256_mod_inv_avx2_4_uv_start:
cmp r11, r15
jb L_256_mod_inv_avx2_4_uv_v
ja L_256_mod_inv_avx2_4_uv_u
cmp r10, r14
jb L_256_mod_inv_avx2_4_uv_v
ja L_256_mod_inv_avx2_4_uv_u
cmp r9, r13
jb L_256_mod_inv_avx2_4_uv_v
ja L_256_mod_inv_avx2_4_uv_u
cmp rax, r12
jb L_256_mod_inv_avx2_4_uv_v
L_256_mod_inv_avx2_4_uv_u:
sub rax, r12
sbb r9, r13
vpsubd ymm0, ymm0, ymm2
sbb r10, r14
vpsubd ymm1, ymm1, ymm3
sbb r11, r15
vptest ymm1, ymm12
jz L_256_mod_inv_avx2_4_usubv_done_neg
vpaddd ymm0, ymm0, ymm6
vpaddd ymm1, ymm1, ymm7
L_256_mod_inv_avx2_4_usubv_done_neg:
L_256_mod_inv_avx2_4_usubv_shr1:
shrd rax, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shr r11, 1
vptest ymm0, ymm8
jz L_256_mod_inv_avx2_4_usubv_sub_shr1
vpaddd ymm0, ymm0, ymm6
vpaddd ymm1, ymm1, ymm7
L_256_mod_inv_avx2_4_usubv_sub_shr1:
vpand ymm4, ymm0, ymm9
vpand ymm5, ymm1, ymm10
vpermd ymm4, ymm11, ymm4
vpsrad ymm0, ymm0, 1
vpsrad ymm1, ymm1, 1
vpslld ymm5, ymm5, 25
vpslld xmm4, xmm4, 25
vpaddd ymm0, ymm0, ymm5
vpaddd ymm1, ymm1, ymm4
test al, 1
jz L_256_mod_inv_avx2_4_usubv_shr1
cmp rax, 1
jne L_256_mod_inv_avx2_4_uv_start
mov rdx, r9
or rdx, r10
jne L_256_mod_inv_avx2_4_uv_start
or rdx, r11
jne L_256_mod_inv_avx2_4_uv_start
vpextrd eax, xmm0, 0
vpextrd r10d, xmm0, 1
vpextrd r12d, xmm0, 2
vpextrd r14d, xmm0, 3
vpextrd r9d, xmm1, 0
vpextrd r11d, xmm1, 1
vpextrd r13d, xmm1, 2
vpextrd r15d, xmm1, 3
vextracti128 xmm0, ymm0, 1
vextracti128 xmm1, ymm1, 1
vpextrd edi, xmm0, 0
vpextrd esi, xmm1, 0
jmp L_256_mod_inv_avx2_4_store_done
L_256_mod_inv_avx2_4_uv_v:
sub r12, rax
sbb r13, r9
vpsubd ymm2, ymm2, ymm0
sbb r14, r10
vpsubd ymm3, ymm3, ymm1
sbb r15, r11
vptest ymm3, ymm12
jz L_256_mod_inv_avx2_4_vsubu_done_neg
vpaddd ymm2, ymm2, ymm6
vpaddd ymm3, ymm3, ymm7
L_256_mod_inv_avx2_4_vsubu_done_neg:
L_256_mod_inv_avx2_4_vsubu_shr1:
shrd r12, r13, 1
shrd r13, r14, 1
shrd r14, r15, 1
shr r15, 1
vptest ymm2, ymm8
jz L_256_mod_inv_avx2_4_vsubu_sub_shr1
vpaddd ymm2, ymm2, ymm6
vpaddd ymm3, ymm3, ymm7
L_256_mod_inv_avx2_4_vsubu_sub_shr1:
vpand ymm4, ymm2, ymm9
vpand ymm5, ymm3, ymm10
vpermd ymm4, ymm11, ymm4
vpsrad ymm2, ymm2, 1
vpsrad ymm3, ymm3, 1
vpslld ymm5, ymm5, 25
vpslld xmm4, xmm4, 25
vpaddd ymm2, ymm2, ymm5
vpaddd ymm3, ymm3, ymm4
test r12b, 1
jz L_256_mod_inv_avx2_4_vsubu_shr1
cmp r12, 1
jne L_256_mod_inv_avx2_4_uv_start
mov rdx, r13
or rdx, r14
jne L_256_mod_inv_avx2_4_uv_start
or rdx, r15
jne L_256_mod_inv_avx2_4_uv_start
vpextrd eax, xmm2, 0
vpextrd r10d, xmm2, 1
vpextrd r12d, xmm2, 2
vpextrd r14d, xmm2, 3
vpextrd r9d, xmm3, 0
vpextrd r11d, xmm3, 1
vpextrd r13d, xmm3, 2
vpextrd r15d, xmm3, 3
vextracti128 xmm2, ymm2, 1
vextracti128 xmm3, ymm3, 1
vpextrd edi, xmm2, 0
vpextrd esi, xmm3, 0
L_256_mod_inv_avx2_4_store_done:
mov edx, eax
and eax, 67108863
sar edx, 26
add r9d, edx
mov edx, r9d
and r9d, 67108863
sar edx, 26
add r10d, edx
mov edx, r10d
and r10d, 67108863
sar edx, 26
add r11d, edx
mov edx, r11d
and r11d, 67108863
sar edx, 26
add r12d, edx
mov edx, r12d
and r12d, 67108863
sar edx, 26
add r13d, edx
mov edx, r13d
and r13d, 67108863
sar edx, 26
add r14d, edx
mov edx, r14d
and r14d, 67108863
sar edx, 26
add r15d, edx
mov edx, r15d
and r15d, 67108863
sar edx, 26
add edi, edx
mov edx, edi
and edi, 67108863
sar edx, 26
add esi, edx
movsxd r9, r9d
movsxd r11, r11d
movsxd r13, r13d
movsxd r15, r15d
movsxd rsi, esi
shl r9, 26
shl r11, 26
shl r13, 26
shl r15, 26
shl rsi, 26
movsxd rax, eax
add rax, r9
movsxd r10, r10d
adc r10, r11
movsxd r12, r12d
adc r12, r13
movsxd r14, r14d
adc r14, r15
movsxd rdi, edi
adc rdi, rsi
jge L_256_mod_inv_avx2_4_3_no_add_order
mov r9, 2756213597218129
mov r11, 3054930678533947
mov r13, 4503599622973178
mov r15, 68719476735
mov rsi, 281474976645120
add rax, r9
add r10, r11
add r12, r13
add r14, r15
add rdi, rsi
mov rdx, 4503599627370495
mov r9, rax
and rax, rdx
sar r9, 52
add r10, r9
mov r11, r10
and r10, rdx
sar r11, 52
add r12, r11
mov r13, r12
and r12, rdx
sar r13, 52
add r14, r13
mov r15, r14
and r14, rdx
sar r15, 52
add rdi, r15
L_256_mod_inv_avx2_4_3_no_add_order:
mov r9, r10
mov r11, r12
mov r13, r14
shl r9, 52
sar r10, 12
shl r11, 40
sar r12, 24
shl r13, 28
sar r14, 36
shl rdi, 16
add rax, r9
adc r10, r11
adc r12, r13
adc r14, rdi
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r14
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
vmovdqu xmm14, OWORD PTR [rsp+128]
add rsp, 144
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mod_inv_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF WOLFSSL_SP_384
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_mul_6 PROC
push r12
mov r9, rdx
sub rsp, 48
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+48], r10
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+56], r11
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+64], r12
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+72], r10
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
add rsp, 48
pop r12
ret
sp_384_mul_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_384_mul_avx2_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rdx
sub rsp, 40
xor rbx, rbx
mov rdx, QWORD PTR [rax]
; A[0] * B[0]
mulx r12, r11, QWORD PTR [r8]
; A[0] * B[1]
mulx r13, r9, QWORD PTR [r8+8]
adcx r12, r9
; A[0] * B[2]
mulx r14, r9, QWORD PTR [r8+16]
adcx r13, r9
; A[0] * B[3]
mulx r15, r9, QWORD PTR [r8+24]
adcx r14, r9
; A[0] * B[4]
mulx rdi, r9, QWORD PTR [r8+32]
adcx r15, r9
; A[0] * B[5]
mulx rsi, r9, QWORD PTR [r8+40]
adcx rdi, r9
adcx rsi, rbx
mov QWORD PTR [rsp], r11
mov r11, 0
adcx r11, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+8]
; A[1] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r12, r9
adox r13, r10
; A[1] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx r13, r9
adox r14, r10
; A[1] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx r14, r9
adox r15, r10
; A[1] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx r15, r9
adox rdi, r10
; A[1] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx rdi, r9
adox rsi, r10
; A[1] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx rsi, r9
adox r11, r10
adcx r11, rbx
mov QWORD PTR [rsp+8], r12
mov r12, 0
adcx r12, rbx
adox r12, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+16]
; A[2] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r13, r9
adox r14, r10
; A[2] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx r14, r9
adox r15, r10
; A[2] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx r15, r9
adox rdi, r10
; A[2] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx rdi, r9
adox rsi, r10
; A[2] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx rsi, r9
adox r11, r10
; A[2] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx r11, r9
adox r12, r10
adcx r12, rbx
mov QWORD PTR [rsp+16], r13
mov r13, 0
adcx r13, rbx
adox r13, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+24]
; A[3] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r14, r9
adox r15, r10
; A[3] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx r15, r9
adox rdi, r10
; A[3] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx rdi, r9
adox rsi, r10
; A[3] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx rsi, r9
adox r11, r10
; A[3] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx r11, r9
adox r12, r10
; A[3] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx r12, r9
adox r13, r10
adcx r13, rbx
mov QWORD PTR [rsp+24], r14
mov r14, 0
adcx r14, rbx
adox r14, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+32]
; A[4] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r15, r9
adox rdi, r10
; A[4] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx rdi, r9
adox rsi, r10
; A[4] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx rsi, r9
adox r11, r10
; A[4] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx r11, r9
adox r12, r10
; A[4] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx r12, r9
adox r13, r10
; A[4] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx r13, r9
adox r14, r10
adcx r14, rbx
mov QWORD PTR [rsp+32], r15
mov rdx, QWORD PTR [rax+40]
; A[5] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx rdi, r9
adox rsi, r10
; A[5] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx rsi, r9
adox r11, r10
; A[5] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx r11, r9
adox r12, r10
; A[5] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx r12, r9
adox r13, r10
; A[5] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx r13, r9
adox r14, r10
; A[5] * B[5]
mulx r15, r9, QWORD PTR [r8+40]
adcx r14, r9
adox r15, rbx
adcx r15, rbx
mov QWORD PTR [rcx+40], rdi
mov QWORD PTR [rcx+48], rsi
mov QWORD PTR [rcx+56], r11
mov QWORD PTR [rcx+64], r12
mov QWORD PTR [rcx+72], r13
mov QWORD PTR [rcx+80], r14
mov QWORD PTR [rcx+88], r15
mov r11, QWORD PTR [rsp]
mov r12, QWORD PTR [rsp+8]
mov r13, QWORD PTR [rsp+16]
mov r14, QWORD PTR [rsp+24]
mov r15, QWORD PTR [rsp+32]
mov QWORD PTR [rcx], r11
mov QWORD PTR [rcx+8], r12
mov QWORD PTR [rcx+16], r13
mov QWORD PTR [rcx+24], r14
mov QWORD PTR [rcx+32], r15
add rsp, 40
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mul_avx2_6 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_sqr_6 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 48
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+48], r9
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+56], r10
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+64], r11
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+72], r9
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
add rsp, 48
pop r14
pop r13
pop r12
ret
sp_384_sqr_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r Result of squaring.
; * a Number to square in Montgomery form.
; */
_text SEGMENT READONLY PARA
sp_384_sqr_avx2_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov rax, rdx
push rcx
xor rcx, rcx
mov rdx, QWORD PTR [rax]
mov rsi, QWORD PTR [rax+8]
mov rbx, QWORD PTR [rax+16]
mov rbp, QWORD PTR [rax+24]
; Diagonal 0
; A[1] * A[0]
mulx r11, r10, QWORD PTR [rax+8]
; A[2] * A[0]
mulx r12, r8, QWORD PTR [rax+16]
adcx r11, r8
; A[3] * A[0]
mulx r13, r8, QWORD PTR [rax+24]
adcx r12, r8
; A[4] * A[0]
mulx r14, r8, QWORD PTR [rax+32]
adcx r13, r8
; A[5] * A[0]
mulx r15, r8, QWORD PTR [rax+40]
adcx r14, r8
adcx r15, rcx
; Diagonal 1
mov rdx, rsi
; A[2] * A[1]
mulx r9, r8, QWORD PTR [rax+16]
adcx r12, r8
adox r13, r9
; A[3] * A[1]
mulx r9, r8, QWORD PTR [rax+24]
adcx r13, r8
adox r14, r9
; A[4] * A[1]
mulx r9, r8, QWORD PTR [rax+32]
adcx r14, r8
adox r15, r9
; A[5] * A[1]
mulx rdi, r8, QWORD PTR [rax+40]
adcx r15, r8
adox rdi, rcx
mov rdx, rbx
; A[5] * A[2]
mulx rsi, r8, QWORD PTR [rax+40]
adcx rdi, r8
adox rsi, rcx
adcx rsi, rcx
adcx rbx, rcx
; Diagonal 2
; A[3] * A[2]
mulx r9, r8, QWORD PTR [rax+24]
adcx r14, r8
adox r15, r9
; A[4] * A[2]
mulx r9, r8, QWORD PTR [rax+32]
adcx r15, r8
adox rdi, r9
mov rdx, rbp
; A[4] * A[3]
mulx r9, r8, QWORD PTR [rax+32]
adcx rdi, r8
adox rsi, r9
; A[5] * A[3]
mulx rbx, r8, QWORD PTR [rax+40]
adcx rsi, r8
adox rbx, rcx
mov rdx, QWORD PTR [rax+32]
; A[5] * A[4]
mulx rbp, r8, QWORD PTR [rax+40]
adcx rbx, r8
adox rbp, rcx
adcx rbp, rcx
adcx rcx, rcx
; Doubling previous result as we add in square words results
; A[0] * A[0]
mov rdx, QWORD PTR [rax]
mulx r9, r8, rdx
pop rdx
mov QWORD PTR [rdx], r8
adox r10, r10
push rdx
adcx r10, r9
; A[1] * A[1]
mov rdx, QWORD PTR [rax+8]
mulx r9, r8, rdx
adox r11, r11
adcx r11, r8
adox r12, r12
adcx r12, r9
; A[2] * A[2]
mov rdx, QWORD PTR [rax+16]
mulx r9, r8, rdx
adox r13, r13
adcx r13, r8
adox r14, r14
adcx r14, r9
; A[3] * A[3]
mov rdx, QWORD PTR [rax+24]
mulx r9, r8, rdx
adox r15, r15
adcx r15, r8
adox rdi, rdi
adcx rdi, r9
; A[4] * A[4]
mov rdx, QWORD PTR [rax+32]
mulx r9, r8, rdx
adox rsi, rsi
adcx rsi, r8
adox rbx, rbx
adcx rbx, r9
; A[5] * A[5]
mov rdx, QWORD PTR [rax+40]
mulx r9, r8, rdx
adox rbp, rbp
adcx rbp, r8
adcx r9, rcx
mov r8, 0
adox r9, r8
pop rcx
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov QWORD PTR [rcx+32], r13
mov QWORD PTR [rcx+40], r14
mov QWORD PTR [rcx+48], r15
mov QWORD PTR [rcx+56], rdi
mov QWORD PTR [rcx+64], rsi
mov QWORD PTR [rcx+72], rbx
mov QWORD PTR [rcx+80], rbp
mov QWORD PTR [rcx+88], r9
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_sqr_avx2_6 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_add_6 PROC
push r12
push r13
push r14
xor rax, rax
mov r9, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
mov r13, QWORD PTR [rdx+32]
mov r14, QWORD PTR [rdx+40]
add r9, QWORD PTR [r8]
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
adc r13, QWORD PTR [r8+32]
adc r14, QWORD PTR [r8+40]
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov QWORD PTR [rcx+32], r13
mov QWORD PTR [rcx+40], r14
adc rax, 0
pop r14
pop r13
pop r12
ret
sp_384_add_6 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_sub_6 PROC
push r12
push r13
push r14
xor rax, rax
mov r9, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
mov r13, QWORD PTR [rdx+32]
mov r14, QWORD PTR [rdx+40]
sub r9, QWORD PTR [r8]
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
sbb r13, QWORD PTR [r8+32]
sbb r14, QWORD PTR [r8+40]
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov QWORD PTR [rcx+32], r13
mov QWORD PTR [rcx+40], r14
sbb rax, rax
pop r14
pop r13
pop r12
ret
sp_384_sub_6 ENDP
_text ENDS
; /* Conditionally copy a into r using the mask m.
; * m is -1 to copy and 0 when not.
; *
; * r A single precision number to copy over.
; * a A single precision number to copy.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_384_cond_copy_6 PROC
push r12
push r13
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rcx+32]
mov r13, QWORD PTR [rcx+40]
xor rax, QWORD PTR [rdx]
xor r9, QWORD PTR [rdx+8]
xor r10, QWORD PTR [rdx+16]
xor r11, QWORD PTR [rdx+24]
xor r12, QWORD PTR [rdx+32]
xor r13, QWORD PTR [rdx+40]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
and r12, r8
and r13, r8
xor QWORD PTR [rcx], rax
xor QWORD PTR [rcx+8], r9
xor QWORD PTR [rcx+16], r10
xor QWORD PTR [rcx+24], r11
xor QWORD PTR [rcx+32], r12
xor QWORD PTR [rcx+40], r13
pop r13
pop r12
ret
sp_384_cond_copy_6 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_384_cond_sub_6 PROC
sub rsp, 48
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
sbb rax, rax
add rsp, 48
ret
sp_384_cond_sub_6 ENDP
_text ENDS
; /* Reduce the number back to 384 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_384_mont_reduce_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r12, QWORD PTR [rcx]
mov r13, QWORD PTR [rcx+8]
mov r14, QWORD PTR [rcx+16]
mov r15, QWORD PTR [rcx+24]
mov rdi, QWORD PTR [rcx+32]
mov rsi, QWORD PTR [rcx+40]
xor r11, r11
; a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp)
mov rbx, QWORD PTR [rcx+48]
mov rbp, QWORD PTR [rcx+56]
mov rdx, r12
mov rax, r13
shld rax, rdx, 32
shl rdx, 32
add rdx, r12
adc rax, r13
add rax, r12
mov r8, rdx
mov r9, rax
mov r10, rax
shld r9, r8, 32
shl r8, 32
shr r10, 32
add r12, r8
adc r13, r9
adc r14, r10
adc r15, 0
adc rdi, 0
adc rsi, 0
adc rbx, rdx
adc rbp, rax
adc r11, 0
add r8, rax
adc r9, rdx
adc r10, rax
mov rax, 0
adc rax, 0
sub r14, r9
sbb r15, r10
sbb rdi, rax
sbb rsi, 0
sbb rbx, 0
sbb rbp, 0
sbb r11, 0
; a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp)
mov r12, QWORD PTR [rcx+64]
mov r13, QWORD PTR [rcx+72]
mov rdx, r14
mov rax, r15
shld rax, rdx, 32
shl rdx, 32
add rdx, r14
adc rax, r15
add rax, r14
mov r8, rdx
mov r9, rax
mov r10, rax
shld r9, r8, 32
shl r8, 32
shr r10, 32
add r12, r11
adc r13, 0
mov r11, 0
adc r11, 0
add r14, r8
adc r15, r9
adc rdi, r10
adc rsi, 0
adc rbx, 0
adc rbp, 0
adc r12, rdx
adc r13, rax
adc r11, 0
add r8, rax
adc r9, rdx
adc r10, rax
mov rax, 0
adc rax, 0
sub rdi, r9
sbb rsi, r10
sbb rbx, rax
sbb rbp, 0
sbb r12, 0
sbb r13, 0
sbb r11, 0
; a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp)
mov r14, QWORD PTR [rcx+80]
mov r15, QWORD PTR [rcx+88]
mov rdx, rdi
mov rax, rsi
shld rax, rdx, 32
shl rdx, 32
add rdx, rdi
adc rax, rsi
add rax, rdi
mov r8, rdx
mov r9, rax
mov r10, rax
shld r9, r8, 32
shl r8, 32
shr r10, 32
add r14, r11
adc r15, 0
mov r11, 0
adc r11, 0
add rdi, r8
adc rsi, r9
adc rbx, r10
adc rbp, 0
adc r12, 0
adc r13, 0
adc r14, rdx
adc r15, rax
adc r11, 0
add r8, rax
adc r9, rdx
adc r10, rax
mov rax, 0
adc rax, 0
sub rbx, r9
sbb rbp, r10
sbb r12, rax
sbb r13, 0
sbb r14, 0
sbb r15, 0
sbb r11, 0
; Subtract mod if carry
neg r11
mov r10, 18446744073709551614
mov r8d, r11d
mov r9, r11
and r10, r11
shl r9, 32
sub rbx, r8
sbb rbp, r9
sbb r12, r10
sbb r13, r11
sbb r14, r11
sbb r15, r11
mov QWORD PTR [rcx], rbx
mov QWORD PTR [rcx+8], rbp
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov QWORD PTR [rcx+32], r14
mov QWORD PTR [rcx+40], r15
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_reduce_6 ENDP
_text ENDS
; /* Reduce the number back to 384 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_384_mont_reduce_order_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 6
mov r10, 6
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_384_mont_reduce_order_6_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+40], r14
adc QWORD PTR [rcx+48], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_384_mont_reduce_order_6_loop
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 48
call sp_384_cond_sub_6
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_reduce_order_6 ENDP
_text ENDS
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_384_cmp_6 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_384_cmp_6 ENDP
_text ENDS
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_add_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
add rax, QWORD PTR [r8]
adc r9, QWORD PTR [r8+8]
mov r15, 18446744069414584320
adc r10, QWORD PTR [r8+16]
mov rdi, 18446744073709551614
adc r11, QWORD PTR [r8+24]
adc r12, QWORD PTR [r8+32]
adc r13, QWORD PTR [r8+40]
sbb rdx, rdx
mov r14d, edx
and r15, rdx
and rdi, rdx
sub rax, r14
sbb r9, r15
sbb r10, rdi
sbb r11, rdx
sbb r12, rdx
sbb r13, rdx
adc rdx, 0
and r14, rdx
and r15, rdx
and rdi, rdx
sub rax, r14
sbb r9, r15
mov QWORD PTR [rcx], rax
sbb r10, rdi
mov QWORD PTR [rcx+8], r9
sbb r11, rdx
mov QWORD PTR [rcx+16], r10
sbb r12, rdx
mov QWORD PTR [rcx+24], r11
sbb r13, rdx
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_add_6 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of doubling.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_dbl_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
add rax, rax
adc r8, r8
mov r14, 18446744069414584320
adc r9, r9
mov r15, 18446744073709551614
adc r10, r10
adc r11, r11
mov rdi, r12
adc r12, r12
sar rdi, 63
mov r13d, edi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
adc rdi, 0
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
mov QWORD PTR [rcx+8], r8
sbb r10, rdi
mov QWORD PTR [rcx+16], r9
sbb r11, rdi
mov QWORD PTR [rcx+24], r10
sbb r12, rdi
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_dbl_6 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of doubling.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_tpl_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
add rax, rax
adc r8, r8
mov r14, 18446744069414584320
adc r9, r9
mov r15, 18446744073709551614
adc r10, r10
adc r11, r11
adc r12, r12
sbb rdi, rdi
mov r13d, edi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
adc rdi, 0
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
add rax, QWORD PTR [rdx]
adc r8, QWORD PTR [rdx+8]
mov r14, 18446744069414584320
adc r9, QWORD PTR [rdx+16]
mov r15, 18446744073709551614
adc r10, QWORD PTR [rdx+24]
adc r11, QWORD PTR [rdx+32]
adc r12, QWORD PTR [rdx+40]
sbb rdi, rdi
mov r13d, edi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
adc rdi, 0
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
mov QWORD PTR [rcx+8], r8
sbb r10, rdi
mov QWORD PTR [rcx+16], r9
sbb r11, rdi
mov QWORD PTR [rcx+24], r10
sbb r12, rdi
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_tpl_6 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of subtration.
; * a Number to subtract from in Montgomery form.
; * b Number to subtract with in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_sub_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
sub rax, QWORD PTR [r8]
sbb r9, QWORD PTR [r8+8]
mov r15, 18446744069414584320
sbb r10, QWORD PTR [r8+16]
mov rdi, 18446744073709551614
sbb r11, QWORD PTR [r8+24]
sbb r12, QWORD PTR [r8+32]
sbb r13, QWORD PTR [r8+40]
sbb rdx, rdx
mov r14d, edx
and r15, rdx
and rdi, rdx
add rax, r14
adc r9, r15
adc r10, rdi
adc r11, rdx
adc r12, rdx
adc r13, rdx
adc rdx, 0
and r14, rdx
and r15, rdx
and rdi, rdx
add rax, r14
adc r9, r15
mov QWORD PTR [rcx], rax
adc r10, rdi
mov QWORD PTR [rcx+8], r9
adc r11, rdx
mov QWORD PTR [rcx+16], r10
adc r12, rdx
mov QWORD PTR [rcx+24], r11
adc r13, rdx
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_sub_6 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_div2_6 PROC
push r12
push r13
sub rsp, 48
mov r13, QWORD PTR [rdx]
xor r12, r12
mov rax, r13
and r13, 1
neg r13
mov r10, QWORD PTR [r8]
and r10, r13
mov QWORD PTR [rsp], r10
mov r10, QWORD PTR [r8+8]
and r10, r13
mov QWORD PTR [rsp+8], r10
mov r10, QWORD PTR [r8+16]
and r10, r13
mov QWORD PTR [rsp+16], r10
mov r10, QWORD PTR [r8+24]
and r10, r13
mov QWORD PTR [rsp+24], r10
mov r10, QWORD PTR [r8+32]
and r10, r13
mov QWORD PTR [rsp+32], r10
mov r10, QWORD PTR [r8+40]
and r10, r13
mov QWORD PTR [rsp+40], r10
add QWORD PTR [rsp], rax
mov rax, QWORD PTR [rdx+8]
adc QWORD PTR [rsp+8], rax
mov rax, QWORD PTR [rdx+16]
adc QWORD PTR [rsp+16], rax
mov rax, QWORD PTR [rdx+24]
adc QWORD PTR [rsp+24], rax
mov rax, QWORD PTR [rdx+32]
adc QWORD PTR [rsp+32], rax
mov rax, QWORD PTR [rdx+40]
adc QWORD PTR [rsp+40], rax
adc r12, 0
mov rax, QWORD PTR [rsp]
mov r9, QWORD PTR [rsp+8]
shrd rax, r9, 1
mov QWORD PTR [rcx], rax
mov rax, QWORD PTR [rsp+16]
shrd r9, rax, 1
mov QWORD PTR [rcx+8], r9
mov r9, QWORD PTR [rsp+24]
shrd rax, r9, 1
mov QWORD PTR [rcx+16], rax
mov rax, QWORD PTR [rsp+32]
shrd r9, rax, 1
mov QWORD PTR [rcx+24], r9
mov r9, QWORD PTR [rsp+40]
shrd rax, r9, 1
mov QWORD PTR [rcx+32], rax
shrd r9, r12, 1
mov QWORD PTR [rcx+40], r9
add rsp, 48
pop r13
pop r12
ret
sp_384_mont_div2_6 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_point_33_6 PROC
sub rsp, 160
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
movdqu OWORD PTR [rsp+128], xmm14
movdqu OWORD PTR [rsp+144], xmm15
mov rax, 1
movd xmm13, r8d
add rdx, 296
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_384_get_point_33_6_start_1:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, OWORD PTR [rdx]
movdqu xmm7, OWORD PTR [rdx+16]
movdqu xmm8, OWORD PTR [rdx+32]
movdqu xmm9, OWORD PTR [rdx+96]
movdqu xmm10, OWORD PTR [rdx+112]
movdqu xmm11, OWORD PTR [rdx+128]
add rdx, 296
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_384_get_point_33_6_start_1
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+32], xmm2
movdqu OWORD PTR [rcx+96], xmm3
movdqu OWORD PTR [rcx+112], xmm4
movdqu OWORD PTR [rcx+128], xmm5
mov rax, 1
movd xmm13, r8d
sub rdx, 9472
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
movdqa xmm14, xmm15
L_384_get_point_33_6_start_2:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, OWORD PTR [rdx+192]
movdqu xmm7, OWORD PTR [rdx+208]
movdqu xmm8, OWORD PTR [rdx+224]
add rdx, 296
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
dec rax
jnz L_384_get_point_33_6_start_2
movdqu OWORD PTR [rcx+192], xmm0
movdqu OWORD PTR [rcx+208], xmm1
movdqu OWORD PTR [rcx+224], xmm2
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
movdqu xmm14, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
ret
sp_384_get_point_33_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_point_33_avx2_6 PROC
sub rsp, 160
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
vmovdqu OWORD PTR [rsp+128], xmm14
vmovdqu OWORD PTR [rsp+144], xmm15
mov rax, 1
movd xmm13, r8d
add rdx, 296
movd xmm15, eax
mov rax, 32
vpxor ymm14, ymm14, ymm14
vpermd ymm13, ymm14, ymm13
vpermd ymm15, ymm14, ymm15
vpxor ymm0, ymm0, ymm0
vpxor xmm1, xmm1, xmm1
vpxor ymm2, ymm2, ymm2
vpxor xmm3, xmm3, xmm3
vpxor ymm4, ymm4, ymm4
vpxor xmm5, xmm5, xmm5
vmovdqa ymm14, ymm15
L_384_get_point_33_avx2_6_start:
vpcmpeqd ymm12, ymm14, ymm13
vpaddd ymm14, ymm14, ymm15
vmovupd ymm6, YMMWORD PTR [rdx]
vmovdqu xmm7, OWORD PTR [rdx+32]
vmovupd ymm8, YMMWORD PTR [rdx+96]
vmovdqu xmm9, OWORD PTR [rdx+128]
vmovupd ymm10, YMMWORD PTR [rdx+192]
vmovdqu xmm11, OWORD PTR [rdx+224]
add rdx, 296
vpand ymm6, ymm6, ymm12
vpand xmm7, xmm7, xmm12
vpand ymm8, ymm8, ymm12
vpand xmm9, xmm9, xmm12
vpand ymm10, ymm10, ymm12
vpand xmm11, xmm11, xmm12
vpor ymm0, ymm0, ymm6
vpor xmm1, xmm1, xmm7
vpor ymm2, ymm2, ymm8
vpor xmm3, xmm3, xmm9
vpor ymm4, ymm4, ymm10
vpor xmm5, xmm5, xmm11
dec rax
jnz L_384_get_point_33_avx2_6_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovdqu OWORD PTR [rcx+32], xmm1
vmovupd YMMWORD PTR [rcx+96], ymm2
vmovdqu OWORD PTR [rcx+128], xmm3
vmovupd YMMWORD PTR [rcx+192], ymm4
vmovdqu OWORD PTR [rcx+224], xmm5
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
vmovdqu xmm14, OWORD PTR [rsp+128]
vmovdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
ret
sp_384_get_point_33_avx2_6 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 384 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_384_mont_reduce_order_avx2_6 PROC
push r12
push r13
push r14
push r15
mov rax, rdx
xor r15, r15
mov r14, QWORD PTR [rcx]
xor r13, r13
L_mont_loop_order_avx2_6:
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+8]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+16]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+24]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+32]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+40]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+48]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
adcx r11, r15
mov QWORD PTR [rcx+48], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+16]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+24]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+32]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+24], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+40]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+32], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+48]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+40], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+56]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+48], r12
adcx r11, r15
mov QWORD PTR [rcx+56], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+24]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+32]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+40]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+48]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+56]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+64]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
adcx r11, r15
mov QWORD PTR [rcx+64], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+32]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+40]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+48]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+40], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+56]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+48], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+64]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+56], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+72]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+64], r12
adcx r11, r15
mov QWORD PTR [rcx+72], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+40]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+48]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+56]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+64]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+72]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+80]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
adcx r11, r15
mov QWORD PTR [rcx+80], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+48]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+56]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+64]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+56], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+72]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+64], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+80]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+72], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+88]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+80], r12
adcx r11, r15
mov QWORD PTR [rcx+88], r11
mov r15, r13
adox r15, r13
adcx r15, r13
neg r15
mov r8, rcx
add rcx, 48
mov r10, QWORD PTR [rax]
mov rdx, r14
pext r10, r10, r15
sub rdx, r10
mov r10, QWORD PTR [rax+8]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r15
mov QWORD PTR [r8], rdx
sbb r9, r10
mov rdx, QWORD PTR [rax+16]
mov r10, QWORD PTR [rcx+16]
pext rdx, rdx, r15
mov QWORD PTR [r8+8], r9
sbb r10, rdx
mov r9, QWORD PTR [rax+24]
mov rdx, QWORD PTR [rcx+24]
pext r9, r9, r15
mov QWORD PTR [r8+16], r10
sbb rdx, r9
mov r10, QWORD PTR [rax+32]
mov r9, QWORD PTR [rcx+32]
pext r10, r10, r15
mov QWORD PTR [r8+24], rdx
sbb r9, r10
mov rdx, QWORD PTR [rax+40]
mov r10, QWORD PTR [rcx+40]
pext rdx, rdx, r15
mov QWORD PTR [r8+32], r9
sbb r10, rdx
mov QWORD PTR [r8+40], r10
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_reduce_order_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_384_cond_sub_avx2_6 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov QWORD PTR [rcx+40], r12
sbb rax, rax
pop r12
ret
sp_384_cond_sub_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_div2_avx2_6 PROC
push r12
push r13
mov r13, QWORD PTR [rdx]
xor r12, r12
mov r10, r13
and r13, 1
neg r13
mov rax, QWORD PTR [r8]
mov r9, QWORD PTR [r8+8]
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
pext rax, rax, r13
pext r9, r9, r13
add r10, rax
adc r11, r9
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov rax, QWORD PTR [r8+16]
mov r9, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [r8+32]
mov r9, QWORD PTR [r8+40]
mov r10, QWORD PTR [rdx+32]
mov r11, QWORD PTR [rdx+40]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
adc r12, 0
mov r10, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+8]
shrd r10, r11, 1
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rcx+16]
shrd r11, r10, 1
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rcx+24]
shrd r10, r11, 1
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rcx+32]
shrd r11, r10, 1
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rcx+40]
shrd r10, r11, 1
mov QWORD PTR [rcx+32], r10
shrd r11, r12, 1
mov QWORD PTR [rcx+40], r11
pop r13
pop r12
ret
sp_384_mont_div2_avx2_6 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_64_6 PROC
sub rsp, 160
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
movdqu OWORD PTR [rsp+128], xmm14
movdqu OWORD PTR [rsp+144], xmm15
; From entry 1
mov rax, 1
movd xmm13, r8d
add rdx, 96
movd xmm15, eax
mov rax, 63
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_384_get_entry_64_6_start_0:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, OWORD PTR [rdx]
movdqu xmm7, OWORD PTR [rdx+16]
movdqu xmm8, OWORD PTR [rdx+32]
movdqu xmm9, OWORD PTR [rdx+48]
movdqu xmm10, OWORD PTR [rdx+64]
movdqu xmm11, OWORD PTR [rdx+80]
add rdx, 96
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_384_get_entry_64_6_start_0
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+32], xmm2
movdqu OWORD PTR [rcx+96], xmm3
movdqu OWORD PTR [rcx+112], xmm4
movdqu OWORD PTR [rcx+128], xmm5
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
movdqu xmm14, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
ret
sp_384_get_entry_64_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_64_avx2_6 PROC
sub rsp, 96
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
mov rax, 1
movd xmm9, r8d
add rdx, 96
movd xmm11, eax
mov rax, 64
vpxor ymm10, ymm10, ymm10
vpermd ymm9, ymm10, ymm9
vpermd ymm11, ymm10, ymm11
vpxor ymm0, ymm0, ymm0
vpxor xmm1, xmm1, xmm1
vpxor ymm2, ymm2, ymm2
vpxor xmm3, xmm3, xmm3
vmovdqa ymm10, ymm11
L_384_get_entry_64_avx2_6_start:
vpcmpeqd ymm8, ymm10, ymm9
vpaddd ymm10, ymm10, ymm11
vmovupd ymm4, YMMWORD PTR [rdx]
vmovdqu xmm5, OWORD PTR [rdx+32]
vmovupd ymm6, YMMWORD PTR [rdx+48]
vmovdqu xmm7, OWORD PTR [rdx+80]
add rdx, 96
vpand ymm4, ymm4, ymm8
vpand xmm5, xmm5, xmm8
vpand ymm6, ymm6, ymm8
vpand xmm7, xmm7, xmm8
vpor ymm0, ymm0, ymm4
vpor xmm1, xmm1, xmm5
vpor ymm2, ymm2, ymm6
vpor xmm3, xmm3, xmm7
dec rax
jnz L_384_get_entry_64_avx2_6_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovdqu OWORD PTR [rcx+32], xmm1
vmovupd YMMWORD PTR [rcx+96], ymm2
vmovdqu OWORD PTR [rcx+128], xmm3
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
add rsp, 96
ret
sp_384_get_entry_64_avx2_6 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_65_6 PROC
sub rsp, 160
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
movdqu OWORD PTR [rsp+128], xmm14
movdqu OWORD PTR [rsp+144], xmm15
; From entry 1
mov rax, 1
movd xmm13, r8d
add rdx, 96
movd xmm15, eax
mov rax, 64
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_384_get_entry_65_6_start_0:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, OWORD PTR [rdx]
movdqu xmm7, OWORD PTR [rdx+16]
movdqu xmm8, OWORD PTR [rdx+32]
movdqu xmm9, OWORD PTR [rdx+48]
movdqu xmm10, OWORD PTR [rdx+64]
movdqu xmm11, OWORD PTR [rdx+80]
add rdx, 96
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_384_get_entry_65_6_start_0
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+32], xmm2
movdqu OWORD PTR [rcx+96], xmm3
movdqu OWORD PTR [rcx+112], xmm4
movdqu OWORD PTR [rcx+128], xmm5
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
movdqu xmm14, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
ret
sp_384_get_entry_65_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_65_avx2_6 PROC
sub rsp, 96
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
mov rax, 1
movd xmm9, r8d
add rdx, 96
movd xmm11, eax
mov rax, 65
vpxor ymm10, ymm10, ymm10
vpermd ymm9, ymm10, ymm9
vpermd ymm11, ymm10, ymm11
vpxor ymm0, ymm0, ymm0
vpxor xmm1, xmm1, xmm1
vpxor ymm2, ymm2, ymm2
vpxor xmm3, xmm3, xmm3
vmovdqa ymm10, ymm11
L_384_get_entry_65_avx2_6_start:
vpcmpeqd ymm8, ymm10, ymm9
vpaddd ymm10, ymm10, ymm11
vmovupd ymm4, YMMWORD PTR [rdx]
vmovdqu xmm5, OWORD PTR [rdx+32]
vmovupd ymm6, YMMWORD PTR [rdx+48]
vmovdqu xmm7, OWORD PTR [rdx+80]
add rdx, 96
vpand ymm4, ymm4, ymm8
vpand xmm5, xmm5, xmm8
vpand ymm6, ymm6, ymm8
vpand xmm7, xmm7, xmm8
vpor ymm0, ymm0, ymm4
vpor xmm1, xmm1, xmm5
vpor ymm2, ymm2, ymm6
vpor xmm3, xmm3, xmm7
dec rax
jnz L_384_get_entry_65_avx2_6_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovdqu OWORD PTR [rcx+32], xmm1
vmovupd YMMWORD PTR [rcx+96], ymm2
vmovdqu OWORD PTR [rcx+128], xmm3
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
add rsp, 96
ret
sp_384_get_entry_65_avx2_6 ENDP
_text ENDS
ENDIF
ENDIF
; /* Add 1 to a. (a = a + 1)
; *
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_add_one_6 PROC
add QWORD PTR [rcx], 1
adc QWORD PTR [rcx+8], 0
adc QWORD PTR [rcx+16], 0
adc QWORD PTR [rcx+24], 0
adc QWORD PTR [rcx+32], 0
adc QWORD PTR [rcx+40], 0
ret
sp_384_add_one_6 ENDP
_text ENDS
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_384_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 48
xor r13, r13
jmp L_384_from_bin_bswap_64_end
L_384_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_384_from_bin_bswap_64_end:
cmp r9, 63
jg L_384_from_bin_bswap_64_start
jmp L_384_from_bin_bswap_8_end
L_384_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_384_from_bin_bswap_8_end:
cmp r9, 7
jg L_384_from_bin_bswap_8_start
cmp r9, r13
je L_384_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_384_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_384_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_384_from_bin_bswap_hi_end:
cmp rcx, r12
jge L_384_from_bin_bswap_zero_end
L_384_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_384_from_bin_bswap_zero_start
L_384_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_384_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_384_from_bin_movbe PROC
push r12
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 48
jmp L_384_from_bin_movbe_64_end
L_384_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_384_from_bin_movbe_64_end:
cmp r9, 63
jg L_384_from_bin_movbe_64_start
jmp L_384_from_bin_movbe_8_end
L_384_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_384_from_bin_movbe_8_end:
cmp r9, 7
jg L_384_from_bin_movbe_8_start
cmp r9, 0
je L_384_from_bin_movbe_hi_end
mov r10, 0
mov rax, 0
L_384_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_384_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_384_from_bin_movbe_hi_end:
cmp rcx, r12
jge L_384_from_bin_movbe_zero_end
L_384_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], 0
add rcx, 8
cmp rcx, r12
jl L_384_from_bin_movbe_zero_start
L_384_from_bin_movbe_zero_end:
pop r12
ret
sp_384_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 48
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_384_to_bin_bswap_6 PROC
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
ret
sp_384_to_bin_bswap_6 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 48
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_384_to_bin_movbe_6 PROC
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
ret
sp_384_to_bin_movbe_6 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_sub_in_place_6 PROC
push r12
push r13
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
sub QWORD PTR [rcx], r8
sbb QWORD PTR [rcx+8], r9
sbb QWORD PTR [rcx+16], r10
sbb QWORD PTR [rcx+24], r11
sbb QWORD PTR [rcx+32], r12
sbb QWORD PTR [rcx+40], r13
sbb rax, rax
pop r13
pop r12
ret
sp_384_sub_in_place_6 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_384_mul_d_6 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r10
pop r12
ret
sp_384_mul_d_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_384_mul_d_avx2_6 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r11
pop r13
pop r12
ret
sp_384_mul_d_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_384_word_asm_6 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_384_word_asm_6 ENDP
_text ENDS
ENDIF
; /* Shift number right by 1 bit. (r = a >> 1)
; *
; * r Result of right shift by 1.
; * a Number to shift.
; */
_text SEGMENT READONLY PARA
sp_384_rshift1_6 PROC
push r12
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shr r12, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop r12
ret
sp_384_rshift1_6 ENDP
_text ENDS
; /* Divide the number by 2 mod the prime. (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus
; */
_text SEGMENT READONLY PARA
sp_384_div2_mod_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
mov r14, QWORD PTR [r8]
mov r15, QWORD PTR [r8+8]
mov rdi, QWORD PTR [r8+16]
mov rsi, QWORD PTR [r8+24]
mov rbx, QWORD PTR [r8+32]
mov rbp, QWORD PTR [r8+40]
mov r8, rax
and r8, 1
je L_384_mod_inv_6_div2_mod_no_add
add rax, r14
adc r9, r15
adc r10, rdi
adc r11, rsi
adc r12, rbx
adc r13, rbp
mov r8, 0
adc r8, 0
L_384_mod_inv_6_div2_mod_no_add:
shrd rax, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shrd r12, r13, 1
shrd r13, r8, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_div2_mod_6 ENDP
_text ENDS
_text SEGMENT READONLY PARA
sp_384_num_bits_6 PROC
xor rax, rax
mov rdx, QWORD PTR [rcx+40]
cmp rdx, 0
je L_384_num_bits_6_end_320
mov rax, -1
bsr rax, rdx
add rax, 321
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_320:
mov rdx, QWORD PTR [rcx+32]
cmp rdx, 0
je L_384_num_bits_6_end_256
mov rax, -1
bsr rax, rdx
add rax, 257
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_256:
mov rdx, QWORD PTR [rcx+24]
cmp rdx, 0
je L_384_num_bits_6_end_192
mov rax, -1
bsr rax, rdx
add rax, 193
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_192:
mov rdx, QWORD PTR [rcx+16]
cmp rdx, 0
je L_384_num_bits_6_end_128
mov rax, -1
bsr rax, rdx
add rax, 129
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_128:
mov rdx, QWORD PTR [rcx+8]
cmp rdx, 0
je L_384_num_bits_6_end_64
mov rax, -1
bsr rax, rdx
add rax, 65
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_64:
mov rdx, QWORD PTR [rcx]
cmp rdx, 0
je L_384_num_bits_6_end_0
mov rax, -1
bsr rax, rdx
add rax, 1
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_0:
L_384_num_bits_6_done:
ret
sp_384_num_bits_6 ENDP
_text ENDS
ENDIF
IFDEF WOLFSSL_SP_521
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_521_mul_9 PROC
push r12
mov r9, rdx
sub rsp, 72
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+48], r10
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+56], r11
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+64], r12
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+72], r10
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+80], r11
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+88], r12
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+96], r10
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+104], r11
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+112], r12
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+120], r10
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
mov QWORD PTR [rcx+128], r11
mov QWORD PTR [rcx+136], r12
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r10, QWORD PTR [rsp+48]
mov r11, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp+64]
mov QWORD PTR [rcx+64], rax
add rsp, 72
pop r12
ret
sp_521_mul_9 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_521_mul_avx2_9 PROC
push rbx
push rbp
push r12
push r13
push r14
push r15
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 72
cmp r9, r8
mov rbx, rsp
cmovne rbx, r8
cmp rbp, r8
cmove rbx, rsp
add r8, 72
xor r15, r15
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
; A[0] * B[2]
mulx r13, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
mov QWORD PTR [rbx+16], r12
; A[0] * B[3]
mulx r10, rax, QWORD PTR [rbp+24]
adcx r13, rax
; A[0] * B[4]
mulx r11, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+24], r13
adcx r10, rax
; A[0] * B[5]
mulx r12, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
mov QWORD PTR [rbx+40], r11
; A[0] * B[6]
mulx r13, rax, QWORD PTR [rbp+48]
adcx r12, rax
; A[0] * B[7]
mulx r10, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
; A[0] * B[8]
mulx r11, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adcx r11, r15
mov r14, r15
adcx r14, r15
mov QWORD PTR [rbx+64], r10
mov QWORD PTR [r8], r11
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r10, QWORD PTR [rbx+32]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [rbx+24], r13
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r10, rax
adox r11, rcx
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+48], r12
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [r8]
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r13, rax
adox r10, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rbx+64], r10
mov r12, r15
adcx r11, rax
adox r12, rcx
adcx r12, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [r8], r11
mov QWORD PTR [r8+8], r12
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r10, QWORD PTR [rbx+32]
mov r11, QWORD PTR [rbx+40]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r10, rcx
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+24], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+32], r10
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r11, rax
adox r12, rcx
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [rbx+56], r13
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [r8], r11
mov r13, r15
adcx r12, rax
adox r13, rcx
adcx r13, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r13
mov rdx, QWORD PTR [r9+24]
mov r13, QWORD PTR [rbx+24]
mov r10, QWORD PTR [rbx+32]
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r10, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r13
adcx r10, rax
adox r11, rcx
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+40], r11
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [r8]
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r12, rax
adox r13, rcx
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+64], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [r8+8], r12
mov r10, r15
adcx r13, rax
adox r10, rcx
adcx r10, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [r8+16], r13
mov QWORD PTR [r8+24], r10
mov rdx, QWORD PTR [r9+32]
mov r10, QWORD PTR [rbx+32]
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
adox r12, rcx
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+48], r12
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r13, rax
adox r10, rcx
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r13, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r10, rcx
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [r8+16], r13
mov r11, r15
adcx r10, rax
adox r11, rcx
adcx r11, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov rdx, QWORD PTR [r9+40]
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [rbx+56], r13
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r10, rax
adox r11, rcx
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r13, rax
adox r10, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+16], r13
adcx r10, rax
adox r11, rcx
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [r8+24], r10
mov r12, r15
adcx r11, rax
adox r12, rcx
adcx r12, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [r8+32], r11
mov QWORD PTR [r8+40], r12
mov rdx, QWORD PTR [r9+48]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [r8]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+64], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r11, rax
adox r12, rcx
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [r8+16], r13
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [r8+32], r11
mov r13, r15
adcx r12, rax
adox r13, rcx
adcx r13, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [r8+40], r12
mov QWORD PTR [r8+48], r13
mov rdx, QWORD PTR [r9+56]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r10, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r12, rax
adox r13, rcx
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r10, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+16], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r12, QWORD PTR [r8+40]
mov r13, QWORD PTR [r8+48]
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+32], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [r8+40], r12
mov r10, r15
adcx r13, rax
adox r10, rcx
adcx r10, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [r8+48], r13
mov QWORD PTR [r8+56], r10
mov rdx, QWORD PTR [r9+64]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r13, rax
adox r10, rcx
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [r8+16], r13
adcx r10, rax
adox r11, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+32], r11
mov r13, QWORD PTR [r8+48]
mov r10, QWORD PTR [r8+56]
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r13, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+40], r12
adcx r13, rax
adox r10, rcx
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [r8+48], r13
mov r11, r15
adcx r10, rax
adox r11, rcx
adcx r11, r14
mov QWORD PTR [r8+56], r10
mov QWORD PTR [r8+64], r11
sub r8, 72
cmp r9, r8
je L_start_521_mul_avx2_9
cmp rbp, r8
jne L_end_521_mul_avx2_9
L_start_521_mul_avx2_9:
vmovdqu xmm0, OWORD PTR [rbx]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbx+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbx+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbx+48]
vmovups OWORD PTR [r8+48], xmm0
mov rax, QWORD PTR [rbx+64]
mov QWORD PTR [r8+64], rax
L_end_521_mul_avx2_9:
add rsp, 72
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_521_mul_avx2_9 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_521_sqr_9 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 72
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+48], r9
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+56], r10
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+64], r11
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+72], r9
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+80], r10
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+88], r11
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+96], r9
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+104], r10
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+112], r11
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+120], r9
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+128], r10
mov QWORD PTR [rcx+136], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r12, QWORD PTR [rsp+48]
mov r13, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov rax, QWORD PTR [rsp+64]
mov QWORD PTR [rcx+64], rax
add rsp, 72
pop r14
pop r13
pop r12
ret
sp_521_sqr_9 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_521_sqr_avx2_9 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 72
cmp r9, r8
mov rbp, rsp
cmovne rbp, r8
add r8, 72
xor r12, r12
; Diagonal 1
; Zero into %r9
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
mov QWORD PTR [rbp+8], r10
; Zero into %r8
; A[2] x A[0]
mulx r10, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r10, r12
mov QWORD PTR [rbp+16], r11
; No load %r12 - %r9
; A[3] x A[0]
mulx r14, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r14, r12
mov QWORD PTR [rbp+24], r10
; No load %r13 - %r8
; A[4] x A[0]
mulx r15, rax, QWORD PTR [r9+32]
adcx r14, rax
adox r15, r12
; No store %r12 - %r9
; No load %r14 - %r9
; A[5] x A[0]
mulx rdi, rax, QWORD PTR [r9+40]
adcx r15, rax
adox rdi, r12
; No store %r13 - %r8
; No load %r15 - %r8
; A[6] x A[0]
mulx rsi, rax, QWORD PTR [r9+48]
adcx rdi, rax
adox rsi, r12
; No store %r14 - %r9
; No load %rbx - %r9
; A[7] x A[0]
mulx rbx, rax, QWORD PTR [r9+56]
adcx rsi, rax
adox rbx, r12
; No store %r15 - %r8
; Zero into %r8
; A[8] x A[0]
mulx r10, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r10, r12
; No store %rbx - %r9
; Zero into %r9
; A[8] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx r11, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8], r10
; Carry
adcx r11, r12
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+8], r11
; Diagonal 2
mov r11, QWORD PTR [rbp+24]
; No load %r12 - %r8
; A[2] x A[1]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r14, rcx
mov QWORD PTR [rbp+24], r11
; No load %r13 - %r9
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r14, rax
adox r15, rcx
; No store %r12 - %r8
; No load %r14 - %r8
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r15, rax
adox rdi, rcx
; No store %r13 - %r9
; No load %r15 - %r9
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r8
; No load %rbx - %r8
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r9
mov r11, QWORD PTR [r8]
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rbx, rax
adox r11, rcx
; No store %rbx - %r8
mov r10, QWORD PTR [r8+8]
; A[7] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8], r11
; Zero into %r9
; A[7] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx r11, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8+8], r10
; Zero into %r8
; A[7] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx r10, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+16], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+24], r10
; Diagonal 3
; No load %r14 - %r9
; A[3] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r15, rax
adox rdi, rcx
; No store %r13 - %r8
; No load %r15 - %r8
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r9
; No load %rbx - %r9
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r8
mov r10, QWORD PTR [r8]
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rbx, rax
adox r10, rcx
; No store %rbx - %r9
mov r11, QWORD PTR [r8+8]
; A[6] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r10, QWORD PTR [r8+16]
; A[6] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov r11, QWORD PTR [r8+24]
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+16], r10
; Zero into %r8
; A[8] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx r10, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+24], r11
; Zero into %r9
; A[8] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx r11, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8+32], r10
; Carry
adcx r11, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+40], r11
; Diagonal 4
; No load %rbx - %r8
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r9
mov r11, QWORD PTR [r8]
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rbx, rax
adox r11, rcx
; No store %rbx - %r8
mov r10, QWORD PTR [r8+8]
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8], r11
mov r11, QWORD PTR [r8+16]
; A[8] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r10
mov r10, QWORD PTR [r8+24]
; A[8] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+16], r11
mov r11, QWORD PTR [r8+32]
; A[7] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r10, QWORD PTR [r8+40]
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+32], r11
; Zero into %r9
; A[8] x A[6]
mulx r11, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, r12
mov QWORD PTR [r8+40], r10
; Zero into %r8
; A[8] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx r10, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, r12
mov QWORD PTR [r8+48], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+56], r10
mov QWORD PTR [r8+64], r13
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r14, r14
adox r15, r15
adcx r14, rax
adcx r15, rcx
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox rdi, rdi
adox rsi, rsi
adcx rdi, rax
adcx rsi, rcx
mov r11, QWORD PTR [r8]
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox rbx, rbx
adox r11, r11
adcx rbx, rax
adcx r11, rcx
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+8]
mov r11, QWORD PTR [r8+16]
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+40]
mov r11, QWORD PTR [r8+48]
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
mov r10, QWORD PTR [r8+56]
mov r11, QWORD PTR [r8+64]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+56], r10
mov QWORD PTR [r8+64], r11
mov QWORD PTR [r8+-40], r14
mov QWORD PTR [r8+-32], r15
mov QWORD PTR [r8+-24], rdi
mov QWORD PTR [r8+-16], rsi
mov QWORD PTR [r8+-8], rbx
sub r8, 72
cmp r9, r8
jne L_end_521_sqr_avx2_9
vmovdqu xmm0, OWORD PTR [rbp]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbp+16]
vmovups OWORD PTR [r8+16], xmm0
L_end_521_sqr_avx2_9:
add rsp, 72
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_521_sqr_avx2_9 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_521_add_9 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov QWORD PTR [rcx+64], r9
adc rax, 0
ret
sp_521_add_9 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_521_sub_9 PROC
mov r9, QWORD PTR [rdx]
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov QWORD PTR [rcx+64], r9
sbb rax, rax
ret
sp_521_sub_9 ENDP
_text ENDS
; /* Conditionally copy a into r using the mask m.
; * m is -1 to copy and 0 when not.
; *
; * r A single precision number to copy over.
; * a A single precision number to copy.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_521_cond_copy_9 PROC
push r12
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rcx+32]
xor rax, QWORD PTR [rdx]
xor r9, QWORD PTR [rdx+8]
xor r10, QWORD PTR [rdx+16]
xor r11, QWORD PTR [rdx+24]
xor r12, QWORD PTR [rdx+32]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
and r12, r8
xor QWORD PTR [rcx], rax
xor QWORD PTR [rcx+8], r9
xor QWORD PTR [rcx+16], r10
xor QWORD PTR [rcx+24], r11
xor QWORD PTR [rcx+32], r12
mov rax, QWORD PTR [rcx+40]
mov r9, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
mov r11, QWORD PTR [rcx+64]
xor rax, QWORD PTR [rdx+40]
xor r9, QWORD PTR [rdx+48]
xor r10, QWORD PTR [rdx+56]
xor r11, QWORD PTR [rdx+64]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx+40], rax
xor QWORD PTR [rcx+48], r9
xor QWORD PTR [rcx+56], r10
xor QWORD PTR [rcx+64], r11
pop r12
ret
sp_521_cond_copy_9 ENDP
_text ENDS
; /* Multiply two Montgomery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montgomery form.
; * b Second number to multiply in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_521_mont_mul_9 PROC
push r12
push r13
push r14
push r15
mov r9, rdx
sub rsp, 144
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r15, r15
mov QWORD PTR [rsp], rax
mov r14, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r13, r13
add r14, rax
adc r15, rdx
adc r13, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r14, rax
adc r15, rdx
adc r13, 0
mov QWORD PTR [rsp+8], r14
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r14, r14
add r15, rax
adc r13, rdx
adc r14, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r15, rax
adc r13, rdx
adc r14, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r15, rax
adc r13, rdx
adc r14, 0
mov QWORD PTR [rsp+16], r15
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r13, rax
adc r14, rdx
adc r15, 0
mov QWORD PTR [rsp+24], r13
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r13, r13
add r14, rax
adc r15, rdx
adc r13, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r14, rax
adc r15, rdx
adc r13, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r14, rax
adc r15, rdx
adc r13, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r14, rax
adc r15, rdx
adc r13, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r14, rax
adc r15, rdx
adc r13, 0
mov QWORD PTR [rsp+32], r14
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r14, r14
add r15, rax
adc r13, rdx
adc r14, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r15, rax
adc r13, rdx
adc r14, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r15, rax
adc r13, rdx
adc r14, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r15, rax
adc r13, rdx
adc r14, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r15, rax
adc r13, rdx
adc r14, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r15, rax
adc r13, rdx
adc r14, 0
mov QWORD PTR [rsp+40], r15
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r13, rax
adc r14, rdx
adc r15, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r13, rax
adc r14, rdx
adc r15, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r13, rax
adc r14, rdx
adc r15, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r13, rax
adc r14, rdx
adc r15, 0
mov QWORD PTR [rsp+48], r13
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r13, r13
add r14, rax
adc r15, rdx
adc r13, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r14, rax
adc r15, rdx
adc r13, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r14, rax
adc r15, rdx
adc r13, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r14, rax
adc r15, rdx
adc r13, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r14, rax
adc r15, rdx
adc r13, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r14, rax
adc r15, rdx
adc r13, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r14, rax
adc r15, rdx
adc r13, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r14, rax
adc r15, rdx
adc r13, 0
mov QWORD PTR [rsp+56], r14
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r14, r14
add r15, rax
adc r13, rdx
adc r14, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r15, rax
adc r13, rdx
adc r14, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r15, rax
adc r13, rdx
adc r14, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r15, rax
adc r13, rdx
adc r14, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r15, rax
adc r13, rdx
adc r14, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r15, rax
adc r13, rdx
adc r14, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r15, rax
adc r13, rdx
adc r14, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r15, rax
adc r13, rdx
adc r14, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r15, rax
adc r13, rdx
adc r14, 0
mov QWORD PTR [rsp+64], r15
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r13, rax
adc r14, rdx
adc r15, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r13, rax
adc r14, rdx
adc r15, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r13, rax
adc r14, rdx
adc r15, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r13, rax
adc r14, rdx
adc r15, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r13, rax
adc r14, rdx
adc r15, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r13, rax
adc r14, rdx
adc r15, 0
mov QWORD PTR [rsp+72], r13
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
xor r13, r13
add r14, rax
adc r15, rdx
adc r13, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r14, rax
adc r15, rdx
adc r13, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r14, rax
adc r15, rdx
adc r13, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r14, rax
adc r15, rdx
adc r13, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r14, rax
adc r15, rdx
adc r13, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r14, rax
adc r15, rdx
adc r13, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r14, rax
adc r15, rdx
adc r13, 0
mov QWORD PTR [rsp+80], r14
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
xor r14, r14
add r15, rax
adc r13, rdx
adc r14, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r15, rax
adc r13, rdx
adc r14, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r15, rax
adc r13, rdx
adc r14, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r15, rax
adc r13, rdx
adc r14, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r15, rax
adc r13, rdx
adc r14, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r15, rax
adc r13, rdx
adc r14, 0
mov QWORD PTR [rsp+88], r15
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r13, rax
adc r14, rdx
adc r15, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r13, rax
adc r14, rdx
adc r15, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r13, rax
adc r14, rdx
adc r15, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r13, rax
adc r14, rdx
adc r15, 0
mov QWORD PTR [rsp+96], r13
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
xor r13, r13
add r14, rax
adc r15, rdx
adc r13, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r14, rax
adc r15, rdx
adc r13, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r14, rax
adc r15, rdx
adc r13, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r14, rax
adc r15, rdx
adc r13, 0
mov QWORD PTR [rsp+104], r14
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
xor r14, r14
add r15, rax
adc r13, rdx
adc r14, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r15, rax
adc r13, rdx
adc r14, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r15, rax
adc r13, rdx
adc r14, 0
mov QWORD PTR [rsp+112], r15
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r13, rax
adc r14, rdx
adc r15, 0
mov QWORD PTR [rsp+120], r13
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r14, rax
adc r15, rdx
mov QWORD PTR [rsp+128], r14
mov QWORD PTR [rsp+136], r15
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r13, QWORD PTR [rsp+80]
mov r12, rax
and r12, 511
mov r14, QWORD PTR [rsp+88]
mov r15, QWORD PTR [rsp+96]
mov r8, QWORD PTR [rsp+104]
mov r9, QWORD PTR [rsp+112]
mov r10, QWORD PTR [rsp+120]
mov r11, QWORD PTR [rsp+128]
shrd rax, rdx, 9
shrd rdx, r13, 9
shrd r13, r14, 9
shrd r14, r15, 9
shrd r15, r8, 9
shrd r8, r9, 9
shrd r9, r10, 9
shrd r10, r11, 9
shr r11, 9
add rax, QWORD PTR [rsp]
adc rdx, QWORD PTR [rsp+8]
adc r13, QWORD PTR [rsp+16]
adc r14, QWORD PTR [rsp+24]
adc r15, QWORD PTR [rsp+32]
adc r8, QWORD PTR [rsp+40]
adc r9, QWORD PTR [rsp+48]
adc r10, QWORD PTR [rsp+56]
adc r12, r11
mov r11, r12
shr r12, 9
and r11, 511
add rax, r12
adc rdx, 0
adc r13, 0
adc r14, 0
adc r15, 0
adc r8, 0
adc r9, 0
adc r10, 0
adc r11, 0
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r13
mov QWORD PTR [rcx+24], r14
mov QWORD PTR [rcx+32], r15
mov QWORD PTR [rcx+40], r8
mov QWORD PTR [rcx+48], r9
mov QWORD PTR [rcx+56], r10
mov QWORD PTR [rcx+64], r11
add rsp, 144
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_mul_9 ENDP
_text ENDS
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_521_mont_sqr_9 PROC
push r12
push r13
push r14
push r15
mov r8, rdx
sub rsp, 144
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r12, 0
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r11, rax
adc r12, rdx
adc r10, 0
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r11, r11
xor r15, r15
mov r13, rax
mov r14, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r13, rax
adc r14, rdx
adc r15, 0
add r13, r13
adc r14, r14
adc r15, r15
add r12, r13
adc r10, r14
adc r11, r15
mov QWORD PTR [rsp+40], r12
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r12, r12
xor r15, r15
mov r13, rax
mov r14, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r13, r13
adc r14, r14
adc r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
add r10, r13
adc r11, r14
adc r12, r15
mov QWORD PTR [rsp+48], r10
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r10, r10
xor r15, r15
mov r13, rax
mov r14, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r13, rax
adc r14, rdx
adc r15, 0
add r13, r13
adc r14, r14
adc r15, r15
add r11, r13
adc r12, r14
adc r10, r15
mov QWORD PTR [rsp+56], r11
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r11, r11
xor r15, r15
mov r13, rax
mov r14, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r13, rax
adc r14, rdx
adc r15, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r13, r13
adc r14, r14
adc r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
add r12, r13
adc r10, r14
adc r11, r15
mov QWORD PTR [rsp+64], r12
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
xor r12, r12
xor r15, r15
mov r13, rax
mov r14, rdx
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r13, rax
adc r14, rdx
adc r15, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r13, rax
adc r14, rdx
adc r15, 0
add r13, r13
adc r14, r14
adc r15, r15
add r10, r13
adc r11, r14
adc r12, r15
mov QWORD PTR [rsp+72], r10
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
xor r10, r10
xor r15, r15
mov r13, rax
mov r14, rdx
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r13, rax
adc r14, rdx
adc r15, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r13, rax
adc r14, rdx
adc r15, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r13, r13
adc r14, r14
adc r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
add r11, r13
adc r12, r14
adc r10, r15
mov QWORD PTR [rsp+80], r11
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
xor r11, r11
xor r15, r15
mov r13, rax
mov r14, rdx
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r13, rax
adc r14, rdx
adc r15, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r13, rax
adc r14, rdx
adc r15, 0
add r13, r13
adc r14, r14
adc r15, r15
add r12, r13
adc r10, r14
adc r11, r15
mov QWORD PTR [rsp+88], r12
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r10, rax
adc r11, rdx
adc r12, 0
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+96], r10
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r11, rax
adc r12, rdx
adc r10, 0
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+104], r11
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+112], r12
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+120], r10
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r11, rax
adc r12, rdx
mov QWORD PTR [rsp+128], r11
mov QWORD PTR [rsp+136], r12
mov r10, QWORD PTR [rsp+64]
mov r11, QWORD PTR [rsp+72]
mov r12, QWORD PTR [rsp+80]
mov r9, r10
and r9, 511
mov rax, QWORD PTR [rsp+88]
mov rdx, QWORD PTR [rsp+96]
mov r13, QWORD PTR [rsp+104]
mov r14, QWORD PTR [rsp+112]
mov r15, QWORD PTR [rsp+120]
mov r8, QWORD PTR [rsp+128]
shrd r10, r11, 9
shrd r11, r12, 9
shrd r12, rax, 9
shrd rax, rdx, 9
shrd rdx, r13, 9
shrd r13, r14, 9
shrd r14, r15, 9
shrd r15, r8, 9
shr r8, 9
add r10, QWORD PTR [rsp]
adc r11, QWORD PTR [rsp+8]
adc r12, QWORD PTR [rsp+16]
adc rax, QWORD PTR [rsp+24]
adc rdx, QWORD PTR [rsp+32]
adc r13, QWORD PTR [rsp+40]
adc r14, QWORD PTR [rsp+48]
adc r15, QWORD PTR [rsp+56]
adc r9, r8
mov r8, r9
shr r9, 9
and r8, 511
add r10, r9
adc r11, 0
adc r12, 0
adc rax, 0
adc rdx, 0
adc r13, 0
adc r14, 0
adc r15, 0
adc r8, 0
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], rax
mov QWORD PTR [rcx+32], rdx
mov QWORD PTR [rcx+40], r13
mov QWORD PTR [rcx+48], r14
mov QWORD PTR [rcx+56], r15
mov QWORD PTR [rcx+64], r8
add rsp, 144
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_sqr_9 ENDP
_text ENDS
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_521_cmp_9 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_521_cmp_9 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_521_cond_sub_9 PROC
sub rsp, 72
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
and r10, r9
mov QWORD PTR [rsp+64], r10
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov QWORD PTR [rcx+64], r10
sbb rax, rax
add rsp, 72
ret
sp_521_cond_sub_9 ENDP
_text ENDS
; /* Reduce the number back to 521 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_521_mont_reduce_9 PROC
push r12
push r13
push r14
push r15
mov rdx, QWORD PTR [rcx+64]
mov rax, QWORD PTR [rcx+72]
mov r8, QWORD PTR [rcx+80]
mov r15, rdx
and r15, 511
mov r9, QWORD PTR [rcx+88]
mov r10, QWORD PTR [rcx+96]
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rcx+112]
mov r13, QWORD PTR [rcx+120]
mov r14, QWORD PTR [rcx+128]
shrd rdx, rax, 9
shrd rax, r8, 9
shrd r8, r9, 9
shrd r9, r10, 9
shrd r10, r11, 9
shrd r11, r12, 9
shrd r12, r13, 9
shrd r13, r14, 9
shr r14, 9
add rdx, QWORD PTR [rcx]
adc rax, QWORD PTR [rcx+8]
adc r8, QWORD PTR [rcx+16]
adc r9, QWORD PTR [rcx+24]
adc r10, QWORD PTR [rcx+32]
adc r11, QWORD PTR [rcx+40]
adc r12, QWORD PTR [rcx+48]
adc r13, QWORD PTR [rcx+56]
adc r15, r14
mov r14, r15
shr r15, 9
and r14, 511
add rdx, r15
adc rax, 0
adc r8, 0
adc r9, 0
adc r10, 0
adc r11, 0
adc r12, 0
adc r13, 0
adc r14, 0
mov QWORD PTR [rcx], rdx
mov QWORD PTR [rcx+8], rax
mov QWORD PTR [rcx+16], r8
mov QWORD PTR [rcx+24], r9
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov QWORD PTR [rcx+64], r14
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_reduce_9 ENDP
_text ENDS
; /* Reduce the number back to 521 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_521_mont_reduce_order_9 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 9
mov r10, 9
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_521_mont_reduce_order_9_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
cmp r10, 1
jne L_521_mont_reduce_order_9_nomask
and r13, 511
L_521_mont_reduce_order_9_nomask:
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
mov QWORD PTR [rcx], r15
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r11, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r11
mov QWORD PTR [rcx+64], r14
adc QWORD PTR [rcx+72], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_521_mont_reduce_order_9_loop
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
mov r8, rcx
sub rcx, 72
sub r8, 8
mov rax, QWORD PTR [r8]
mov rdx, QWORD PTR [r8+8]
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov r13, QWORD PTR [r8+32]
shrd rax, rdx, 9
shrd rdx, r10, 9
shrd r10, r11, 9
shrd r11, r13, 9
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rdx, QWORD PTR [r8+40]
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
mov rax, QWORD PTR [r8+64]
shrd r13, rdx, 9
shrd rdx, r10, 9
shrd r10, r11, 9
shrd r11, rax, 9
mov QWORD PTR [rcx+32], r13
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rdx, QWORD PTR [r8+72]
shrd rax, rdx, 9
shr rdx, 9
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov rsi, QWORD PTR [rcx+64]
shr rsi, 9
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
call sp_521_cond_sub_9
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_reduce_order_9 ENDP
_text ENDS
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_521_mont_add_9 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
mov r14, QWORD PTR [rdx+48]
mov r15, QWORD PTR [rdx+56]
mov rdi, QWORD PTR [rdx+64]
add rax, QWORD PTR [r8]
adc r9, QWORD PTR [r8+8]
adc r10, QWORD PTR [r8+16]
adc r11, QWORD PTR [r8+24]
adc r12, QWORD PTR [r8+32]
adc r13, QWORD PTR [r8+40]
adc r14, QWORD PTR [r8+48]
adc r15, QWORD PTR [r8+56]
adc rdi, QWORD PTR [r8+64]
mov rsi, rdi
and rdi, 511
shr rsi, 9
add rax, rsi
adc r9, 0
adc r10, 0
adc r11, 0
adc r12, 0
adc r13, 0
adc r14, 0
adc r15, 0
adc rdi, 0
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
mov QWORD PTR [rcx+48], r14
mov QWORD PTR [rcx+56], r15
mov QWORD PTR [rcx+64], rdi
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_add_9 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_521_mont_dbl_9 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
mov r13, QWORD PTR [rdx+48]
mov r14, QWORD PTR [rdx+56]
mov r15, QWORD PTR [rdx+64]
add rax, rax
adc r8, r8
adc r9, r9
adc r10, r10
adc r11, r11
adc r12, r12
adc r13, r13
adc r14, r14
adc r15, r15
mov rdi, r15
and r15, 511
shr rdi, 9
add rax, rdi
adc r8, 0
adc r9, 0
adc r10, 0
adc r11, 0
adc r12, 0
adc r13, 0
adc r14, 0
adc r15, 0
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r13
mov QWORD PTR [rcx+56], r14
mov QWORD PTR [rcx+64], r15
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_dbl_9 ENDP
_text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of Tripling.
; * a Number to triple in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_521_mont_tpl_9 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
mov r13, QWORD PTR [rdx+48]
mov r14, QWORD PTR [rdx+56]
mov r15, QWORD PTR [rdx+64]
add rax, rax
adc r8, r8
adc r9, r9
adc r10, r10
adc r11, r11
adc r12, r12
adc r13, r13
adc r14, r14
adc r15, r15
add rax, QWORD PTR [rdx]
adc r8, QWORD PTR [rdx+8]
adc r9, QWORD PTR [rdx+16]
adc r10, QWORD PTR [rdx+24]
adc r11, QWORD PTR [rdx+32]
adc r12, QWORD PTR [rdx+40]
adc r13, QWORD PTR [rdx+48]
adc r14, QWORD PTR [rdx+56]
adc r15, QWORD PTR [rdx+64]
mov rdi, r15
and r15, 511
shr rdi, 9
add rax, rdi
adc r8, 0
adc r9, 0
adc r10, 0
adc r11, 0
adc r12, 0
adc r13, 0
adc r14, 0
adc r15, 0
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r13
mov QWORD PTR [rcx+56], r14
mov QWORD PTR [rcx+64], r15
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_tpl_9 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_521_mont_sub_9 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
mov r14, QWORD PTR [rdx+48]
mov r15, QWORD PTR [rdx+56]
mov rdi, QWORD PTR [rdx+64]
sub rax, QWORD PTR [r8]
sbb r9, QWORD PTR [r8+8]
sbb r10, QWORD PTR [r8+16]
sbb r11, QWORD PTR [r8+24]
sbb r12, QWORD PTR [r8+32]
sbb r13, QWORD PTR [r8+40]
sbb r14, QWORD PTR [r8+48]
sbb r15, QWORD PTR [r8+56]
sbb rdi, QWORD PTR [r8+64]
mov rsi, rdi
and rdi, 511
sar rsi, 9
neg rsi
sub rax, rsi
sbb r9, 0
sbb r10, 0
sbb r11, 0
sbb r12, 0
sbb r13, 0
sbb r14, 0
sbb r15, 0
sbb rdi, 0
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
mov QWORD PTR [rcx+48], r14
mov QWORD PTR [rcx+56], r15
mov QWORD PTR [rcx+64], rdi
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_sub_9 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_521_mont_div2_9 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
mov r13, QWORD PTR [rdx+48]
mov r14, QWORD PTR [rdx+56]
mov r15, QWORD PTR [rdx+64]
mov rdi, rax
and rdi, 1
sub rax, rdi
sbb r8, 0
sbb r9, 0
sbb r10, 0
sbb r11, 0
sbb r12, 0
sbb r13, 0
sbb r14, 0
sbb r15, 0
shl rdi, 9
add r15, rdi
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shrd r12, r13, 1
shrd r13, r14, 1
shrd r14, r15, 1
shr r15, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r13
mov QWORD PTR [rcx+56], r14
mov QWORD PTR [rcx+64], r15
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_div2_9 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_521_get_point_33_9 PROC
push r12
push r13
push r14
sub rsp, 160
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
movdqu OWORD PTR [rsp+128], xmm14
movdqu OWORD PTR [rsp+144], xmm15
mov r14, 1
mov rax, 1
movd xmm13, r8d
add rdx, 440
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
xor r12, r12
xor r13, r13
movdqa xmm14, xmm15
L_521_get_point_33_9_start_1:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
xor r9, r9
cmp r8, r14
sete r9b
neg r9
inc r14
movdqu xmm6, OWORD PTR [rdx]
movdqu xmm7, OWORD PTR [rdx+16]
movdqu xmm8, OWORD PTR [rdx+32]
movdqu xmm9, OWORD PTR [rdx+48]
mov r10, QWORD PTR [rdx+64]
movdqu xmm10, OWORD PTR [rdx+144]
movdqu xmm11, OWORD PTR [rdx+160]
add rdx, 440
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
and r10, r9
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
or r12, r10
dec rax
jnz L_521_get_point_33_9_start_1
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+32], xmm2
movdqu OWORD PTR [rcx+48], xmm3
mov QWORD PTR [rcx+64], r12
movdqu OWORD PTR [rcx+144], xmm4
movdqu OWORD PTR [rcx+160], xmm5
mov r14, 1
mov rax, 1
movd xmm13, r8d
sub rdx, 14080
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
xor r12, r12
xor r13, r13
movdqa xmm14, xmm15
L_521_get_point_33_9_start_2:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
xor r9, r9
cmp r8, r14
sete r9b
neg r9
inc r14
movdqu xmm6, OWORD PTR [rdx+176]
movdqu xmm7, OWORD PTR [rdx+192]
mov r10, QWORD PTR [rdx+208]
movdqu xmm8, OWORD PTR [rdx+288]
movdqu xmm9, OWORD PTR [rdx+304]
movdqu xmm10, OWORD PTR [rdx+320]
movdqu xmm11, OWORD PTR [rdx+336]
mov r11, QWORD PTR [rdx+352]
add rdx, 440
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
and r10, r9
and r11, r9
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
or r12, r10
or r13, r11
dec rax
jnz L_521_get_point_33_9_start_2
movdqu OWORD PTR [rcx+176], xmm0
movdqu OWORD PTR [rcx+192], xmm1
mov QWORD PTR [rcx+208], r12
movdqu OWORD PTR [rcx+288], xmm2
movdqu OWORD PTR [rcx+304], xmm3
movdqu OWORD PTR [rcx+320], xmm4
movdqu OWORD PTR [rcx+336], xmm5
mov QWORD PTR [rcx+352], r13
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
movdqu xmm14, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
pop r14
pop r13
pop r12
ret
sp_521_get_point_33_9 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_521_get_point_33_avx2_9 PROC
push r12
push r13
push r14
push r15
push rdi
sub rsp, 160
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
vmovdqu OWORD PTR [rsp+96], xmm12
vmovdqu OWORD PTR [rsp+112], xmm13
vmovdqu OWORD PTR [rsp+128], xmm14
vmovdqu OWORD PTR [rsp+144], xmm15
mov rdi, 1
mov rax, 1
movd xmm13, r8d
add rdx, 440
movd xmm15, eax
mov rax, 32
vpxor ymm14, ymm14, ymm14
vpermd ymm13, ymm14, ymm13
vpermd ymm15, ymm14, ymm15
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vpxor ymm2, ymm2, ymm2
vpxor ymm3, ymm3, ymm3
vpxor ymm4, ymm4, ymm4
vpxor ymm5, ymm5, ymm5
xor r10, r10
xor r11, r11
xor r12, r12
vmovdqa ymm14, ymm15
L_521_get_point_33_avx2_9_start:
vpcmpeqd ymm12, ymm14, ymm13
vpaddd ymm14, ymm14, ymm15
xor r9, r9
cmp r8, rdi
sete r9b
neg r9
inc rdi
vmovupd ymm6, YMMWORD PTR [rdx]
vmovupd ymm7, YMMWORD PTR [rdx+32]
vmovupd ymm8, YMMWORD PTR [rdx+144]
vmovupd ymm9, YMMWORD PTR [rdx+176]
vmovupd ymm10, YMMWORD PTR [rdx+288]
vmovupd ymm11, YMMWORD PTR [rdx+320]
mov r13, QWORD PTR [rdx+64]
mov r14, QWORD PTR [rdx+208]
mov r15, QWORD PTR [rdx+352]
add rdx, 440
vpand ymm6, ymm6, ymm12
vpand ymm7, ymm7, ymm12
vpand ymm8, ymm8, ymm12
vpand ymm9, ymm9, ymm12
vpand ymm10, ymm10, ymm12
vpand ymm11, ymm11, ymm12
and r13, r9
and r14, r9
and r15, r9
vpor ymm0, ymm0, ymm6
vpor ymm1, ymm1, ymm7
vpor ymm2, ymm2, ymm8
vpor ymm3, ymm3, ymm9
vpor ymm4, ymm4, ymm10
vpor ymm5, ymm5, ymm11
or r10, r13
or r11, r14
or r12, r15
dec rax
jnz L_521_get_point_33_avx2_9_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+32], ymm1
vmovupd YMMWORD PTR [rcx+144], ymm2
vmovupd YMMWORD PTR [rcx+176], ymm3
vmovupd YMMWORD PTR [rcx+288], ymm4
vmovupd YMMWORD PTR [rcx+320], ymm5
mov QWORD PTR [rcx+64], r10
mov QWORD PTR [rcx+208], r11
mov QWORD PTR [rcx+352], r12
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
vmovdqu xmm12, OWORD PTR [rsp+96]
vmovdqu xmm13, OWORD PTR [rsp+112]
vmovdqu xmm14, OWORD PTR [rsp+128]
vmovdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_get_point_33_avx2_9 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Multiply two Montgomery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montgomery form.
; * b Second number to multiply in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_521_mont_mul_avx2_9 PROC
push rbx
push rbp
push r12
push r13
push r14
push r15
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 144
mov rbx, rsp
add rsp, 72
xor r15, r15
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
; A[0] * B[2]
mulx r13, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
mov QWORD PTR [rbx+16], r12
; A[0] * B[3]
mulx r10, rax, QWORD PTR [rbp+24]
adcx r13, rax
; A[0] * B[4]
mulx r11, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+24], r13
adcx r10, rax
; A[0] * B[5]
mulx r12, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
mov QWORD PTR [rbx+40], r11
; A[0] * B[6]
mulx r13, rax, QWORD PTR [rbp+48]
adcx r12, rax
; A[0] * B[7]
mulx r10, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
; A[0] * B[8]
mulx r11, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adcx r11, r15
mov r14, r15
adcx r14, r15
mov QWORD PTR [rbx+64], r10
mov QWORD PTR [rsp], r11
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r10, QWORD PTR [rbx+32]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [rbx+24], r13
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r10, rax
adox r11, rcx
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+48], r12
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [rsp]
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r13, rax
adox r10, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rbx+64], r10
mov r12, r15
adcx r11, rax
adox r12, rcx
adcx r12, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [rsp], r11
mov QWORD PTR [rsp+8], r12
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r10, QWORD PTR [rbx+32]
mov r11, QWORD PTR [rbx+40]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r10, rcx
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+24], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+32], r10
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r11, rax
adox r12, rcx
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [rbx+56], r13
mov r11, QWORD PTR [rsp]
mov r12, QWORD PTR [rsp+8]
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rsp], r11
mov r13, r15
adcx r12, rax
adox r13, rcx
adcx r13, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [rsp+8], r12
mov QWORD PTR [rsp+16], r13
mov rdx, QWORD PTR [r9+24]
mov r13, QWORD PTR [rbx+24]
mov r10, QWORD PTR [rbx+32]
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r10, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r13
adcx r10, rax
adox r11, rcx
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+40], r11
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [rsp]
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r12, rax
adox r13, rcx
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+64], r10
mov r12, QWORD PTR [rsp+8]
mov r13, QWORD PTR [rsp+16]
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rsp], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rsp+8], r12
mov r10, r15
adcx r13, rax
adox r10, rcx
adcx r10, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [rsp+16], r13
mov QWORD PTR [rsp+24], r10
mov rdx, QWORD PTR [r9+32]
mov r10, QWORD PTR [rbx+32]
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r10
adcx r11, rax
adox r12, rcx
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+48], r12
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [rsp]
mov r12, QWORD PTR [rsp+8]
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r13, rax
adox r10, rcx
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rsp], r11
mov r13, QWORD PTR [rsp+16]
mov r10, QWORD PTR [rsp+24]
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r13, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rsp+8], r12
adcx r13, rax
adox r10, rcx
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rsp+16], r13
mov r11, r15
adcx r10, rax
adox r11, rcx
adcx r11, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [rsp+24], r10
mov QWORD PTR [rsp+32], r11
mov rdx, QWORD PTR [r9+40]
mov r11, QWORD PTR [rbx+40]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r11
adcx r12, rax
adox r13, rcx
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [rbx+56], r13
mov r11, QWORD PTR [rsp]
mov r12, QWORD PTR [rsp+8]
mov r13, QWORD PTR [rsp+16]
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r10, rax
adox r11, rcx
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rsp], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rsp+8], r12
mov r10, QWORD PTR [rsp+24]
mov r11, QWORD PTR [rsp+32]
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r13, rax
adox r10, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rsp+16], r13
adcx r10, rax
adox r11, rcx
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rsp+24], r10
mov r12, r15
adcx r11, rax
adox r12, rcx
adcx r12, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [rsp+32], r11
mov QWORD PTR [rsp+40], r12
mov rdx, QWORD PTR [r9+48]
mov r12, QWORD PTR [rbx+48]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [rsp]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r12
adcx r13, rax
adox r10, rcx
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+64], r10
mov r12, QWORD PTR [rsp+8]
mov r13, QWORD PTR [rsp+16]
mov r10, QWORD PTR [rsp+24]
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r11, rax
adox r12, rcx
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rsp], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rsp+8], r12
adcx r13, rax
adox r10, rcx
mov QWORD PTR [rsp+16], r13
mov r11, QWORD PTR [rsp+32]
mov r12, QWORD PTR [rsp+40]
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rsp+24], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rsp+32], r11
mov r13, r15
adcx r12, rax
adox r13, rcx
adcx r13, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [rsp+40], r12
mov QWORD PTR [rsp+48], r13
mov rdx, QWORD PTR [r9+56]
mov r13, QWORD PTR [rbx+56]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [rsp]
mov r12, QWORD PTR [rsp+8]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r10, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r13
adcx r10, rax
adox r11, rcx
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rsp], r11
mov r13, QWORD PTR [rsp+16]
mov r10, QWORD PTR [rsp+24]
mov r11, QWORD PTR [rsp+32]
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r12, rax
adox r13, rcx
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rsp+8], r12
adcx r13, rax
adox r10, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rsp+16], r13
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rsp+24], r10
mov r12, QWORD PTR [rsp+40]
mov r13, QWORD PTR [rsp+48]
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rsp+32], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rsp+40], r12
mov r10, r15
adcx r13, rax
adox r10, rcx
adcx r10, r14
mov r14, r15
adox r14, r15
adcx r14, r15
mov QWORD PTR [rsp+48], r13
mov QWORD PTR [rsp+56], r10
mov rdx, QWORD PTR [r9+64]
mov r10, QWORD PTR [rbx+64]
mov r11, QWORD PTR [rsp]
mov r12, QWORD PTR [rsp+8]
mov r13, QWORD PTR [rsp+16]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r10
adcx r11, rax
adox r12, rcx
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rsp], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rsp+8], r12
mov r10, QWORD PTR [rsp+24]
mov r11, QWORD PTR [rsp+32]
mov r12, QWORD PTR [rsp+40]
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
adcx r13, rax
adox r10, rcx
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
mov QWORD PTR [rsp+16], r13
adcx r10, rax
adox r11, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rsp+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rsp+32], r11
mov r13, QWORD PTR [rsp+48]
mov r10, QWORD PTR [rsp+56]
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r13, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rsp+40], r12
adcx r13, rax
adox r10, rcx
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
mov QWORD PTR [rsp+48], r13
mov r11, r15
adcx r10, rax
adox r11, rcx
adcx r11, r14
mov QWORD PTR [rsp+56], r10
mov QWORD PTR [rsp+64], r11
mov rax, QWORD PTR [rsp+-8]
mov rcx, QWORD PTR [rsp]
mov r10, QWORD PTR [rsp+8]
mov r15, rax
and r15, 511
mov r11, QWORD PTR [rsp+16]
mov r12, QWORD PTR [rsp+24]
mov r13, QWORD PTR [rsp+32]
mov r14, QWORD PTR [rsp+40]
mov rbx, QWORD PTR [rsp+48]
mov rdx, QWORD PTR [rsp+56]
sub rsp, 72
shrd rax, rcx, 9
shrd rcx, r10, 9
shrd r10, r11, 9
shrd r11, r12, 9
shrd r12, r13, 9
shrd r13, r14, 9
shrd r14, rbx, 9
shrd rbx, rdx, 9
shr rdx, 9
add rax, QWORD PTR [rsp]
adc rcx, QWORD PTR [rsp+8]
adc r10, QWORD PTR [rsp+16]
adc r11, QWORD PTR [rsp+24]
adc r12, QWORD PTR [rsp+32]
adc r13, QWORD PTR [rsp+40]
adc r14, QWORD PTR [rsp+48]
adc rbx, QWORD PTR [rsp+56]
adc r15, rdx
mov rdx, r15
shr r15, 9
and rdx, 511
add rax, r15
adc rcx, 0
adc r10, 0
adc r11, 0
adc r12, 0
adc r13, 0
adc r14, 0
adc rbx, 0
adc rdx, 0
mov QWORD PTR [r8], rax
mov QWORD PTR [r8+8], rcx
mov QWORD PTR [r8+16], r10
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov QWORD PTR [r8+40], r13
mov QWORD PTR [r8+48], r14
mov QWORD PTR [r8+56], rbx
mov QWORD PTR [r8+64], rdx
add rsp, 144
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_521_mont_mul_avx2_9 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montgomery form.
; * m Modulus (prime).
; * mp Montgomery multiplier.
; */
_text SEGMENT READONLY PARA
sp_521_mont_sqr_avx2_9 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 144
mov rbp, rsp
add rsp, 72
xor r12, r12
; Diagonal 1
; Zero into %r9
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
mov QWORD PTR [rbp+8], r10
; Zero into %r8
; A[2] x A[0]
mulx r10, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r10, r12
mov QWORD PTR [rbp+16], r11
; No load %r12 - %r9
; A[3] x A[0]
mulx r14, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r14, r12
mov QWORD PTR [rbp+24], r10
; No load %r13 - %r8
; A[4] x A[0]
mulx r15, rax, QWORD PTR [r9+32]
adcx r14, rax
adox r15, r12
; No store %r12 - %r9
; No load %r14 - %r9
; A[5] x A[0]
mulx rdi, rax, QWORD PTR [r9+40]
adcx r15, rax
adox rdi, r12
; No store %r13 - %r8
; No load %r15 - %r8
; A[6] x A[0]
mulx rsi, rax, QWORD PTR [r9+48]
adcx rdi, rax
adox rsi, r12
; No store %r14 - %r9
; No load %rbx - %r9
; A[7] x A[0]
mulx rbx, rax, QWORD PTR [r9+56]
adcx rsi, rax
adox rbx, r12
; No store %r15 - %r8
; Zero into %r8
; A[8] x A[0]
mulx r10, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r10, r12
; No store %rbx - %r9
; Zero into %r9
; A[8] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx r11, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, r12
mov QWORD PTR [rsp], r10
; Carry
adcx r11, r12
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [rsp+8], r11
; Diagonal 2
mov r11, QWORD PTR [rbp+24]
; No load %r12 - %r8
; A[2] x A[1]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r14, rcx
mov QWORD PTR [rbp+24], r11
; No load %r13 - %r9
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r14, rax
adox r15, rcx
; No store %r12 - %r8
; No load %r14 - %r8
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r15, rax
adox rdi, rcx
; No store %r13 - %r9
; No load %r15 - %r9
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r8
; No load %rbx - %r8
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r9
mov r11, QWORD PTR [rsp]
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rbx, rax
adox r11, rcx
; No store %rbx - %r8
mov r10, QWORD PTR [rsp+8]
; A[7] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rsp], r11
; Zero into %r9
; A[7] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx r11, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, r12
mov QWORD PTR [rsp+8], r10
; Zero into %r8
; A[7] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx r10, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, r12
mov QWORD PTR [rsp+16], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [rsp+24], r10
; Diagonal 3
; No load %r14 - %r9
; A[3] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r15, rax
adox rdi, rcx
; No store %r13 - %r8
; No load %r15 - %r8
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx rdi, rax
adox rsi, rcx
; No store %r14 - %r9
; No load %rbx - %r9
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r8
mov r10, QWORD PTR [rsp]
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rbx, rax
adox r10, rcx
; No store %rbx - %r9
mov r11, QWORD PTR [rsp+8]
; A[6] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rsp], r10
mov r10, QWORD PTR [rsp+16]
; A[6] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rsp+8], r11
mov r11, QWORD PTR [rsp+24]
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rsp+16], r10
; Zero into %r8
; A[8] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx r10, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, r12
mov QWORD PTR [rsp+24], r11
; Zero into %r9
; A[8] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx r11, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, r12
mov QWORD PTR [rsp+32], r10
; Carry
adcx r11, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [rsp+40], r11
; Diagonal 4
; No load %rbx - %r8
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx rsi, rax
adox rbx, rcx
; No store %r15 - %r9
mov r11, QWORD PTR [rsp]
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rbx, rax
adox r11, rcx
; No store %rbx - %r8
mov r10, QWORD PTR [rsp+8]
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rsp], r11
mov r11, QWORD PTR [rsp+16]
; A[8] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rsp+8], r10
mov r10, QWORD PTR [rsp+24]
; A[8] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rsp+16], r11
mov r11, QWORD PTR [rsp+32]
; A[7] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rsp+24], r10
mov r10, QWORD PTR [rsp+40]
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rsp+32], r11
; Zero into %r9
; A[8] x A[6]
mulx r11, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, r12
mov QWORD PTR [rsp+40], r10
; Zero into %r8
; A[8] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx r10, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, r12
mov QWORD PTR [rsp+48], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [rsp+56], r10
mov QWORD PTR [rsp+64], r13
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r14, r14
adox r15, r15
adcx r14, rax
adcx r15, rcx
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox rdi, rdi
adox rsi, rsi
adcx rdi, rax
adcx rsi, rcx
mov r11, QWORD PTR [rsp]
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox rbx, rbx
adox r11, r11
adcx rbx, rax
adcx r11, rcx
mov QWORD PTR [rsp], r11
mov r10, QWORD PTR [rsp+8]
mov r11, QWORD PTR [rsp+16]
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rsp+8], r10
mov QWORD PTR [rsp+16], r11
mov r10, QWORD PTR [rsp+24]
mov r11, QWORD PTR [rsp+32]
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rsp+24], r10
mov QWORD PTR [rsp+32], r11
mov r10, QWORD PTR [rsp+40]
mov r11, QWORD PTR [rsp+48]
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rsp+40], r10
mov QWORD PTR [rsp+48], r11
mov r10, QWORD PTR [rsp+56]
mov r11, QWORD PTR [rsp+64]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rsp+56], r10
mov QWORD PTR [rsp+64], r11
mov QWORD PTR [rsp+-40], r14
mov QWORD PTR [rsp+-32], r15
mov QWORD PTR [rsp+-24], rdi
mov QWORD PTR [rsp+-16], rsi
mov QWORD PTR [rsp+-8], rbx
mov r10, QWORD PTR [rsp+-8]
mov r11, QWORD PTR [rsp]
mov r14, QWORD PTR [rsp+8]
mov rcx, r10
and rcx, 511
mov r15, QWORD PTR [rsp+16]
mov rdi, QWORD PTR [rsp+24]
mov rsi, QWORD PTR [rsp+32]
mov rbx, QWORD PTR [rsp+40]
mov rdx, QWORD PTR [rsp+48]
mov rax, QWORD PTR [rsp+56]
sub rsp, 72
shrd r10, r11, 9
shrd r11, r14, 9
shrd r14, r15, 9
shrd r15, rdi, 9
shrd rdi, rsi, 9
shrd rsi, rbx, 9
shrd rbx, rdx, 9
shrd rdx, rax, 9
shr rax, 9
add r10, QWORD PTR [rsp]
adc r11, QWORD PTR [rsp+8]
adc r14, QWORD PTR [rsp+16]
adc r15, QWORD PTR [rsp+24]
adc rdi, QWORD PTR [rsp+32]
adc rsi, QWORD PTR [rsp+40]
adc rbx, QWORD PTR [rsp+48]
adc rdx, QWORD PTR [rsp+56]
adc rcx, rax
mov rax, rcx
shr rcx, 9
and rax, 511
add r10, rcx
adc r11, 0
adc r14, 0
adc r15, 0
adc rdi, 0
adc rsi, 0
adc rbx, 0
adc rdx, 0
adc rax, 0
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r14
mov QWORD PTR [r8+24], r15
mov QWORD PTR [r8+32], rdi
mov QWORD PTR [r8+40], rsi
mov QWORD PTR [r8+48], rbx
mov QWORD PTR [r8+56], rdx
mov QWORD PTR [r8+64], rax
add rsp, 144
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_521_mont_sqr_avx2_9 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_521_cond_sub_avx2_9 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov QWORD PTR [rcx+64], r12
sbb rax, rax
pop r12
ret
sp_521_cond_sub_avx2_9 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 521 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_521_mont_reduce_order_avx2_9 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 9
mov r11, 8
mov r14, QWORD PTR [r9]
mov r15, QWORD PTR [r9+8]
mov rdi, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r9+24]
add r9, 32
xor rbp, rbp
L_521_mont_reduce_order_avx2_9_loop:
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
mov QWORD PTR [r9+-32], r12
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+8]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
adcx r13, rbp
mov rbp, rbx
mov QWORD PTR [r9+40], r13
adox rbp, rbx
adcx rbp, rbx
; mu = a[i] * mp
mov rdx, r14
mov r13, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r9+-24], r13
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+8]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r12, QWORD PTR [r9+16]
adcx rsi, rax
adox r12, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+48], r12
adox rbp, rbx
adcx rbp, rbx
; a += 2
add r9, 16
; i -= 2
sub r11, 2
jnz L_521_mont_reduce_order_avx2_9_loop
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
and rdx, 511
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
mov QWORD PTR [r9+-32], r12
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+8]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
adcx r13, rbp
mov rbp, rbx
mov QWORD PTR [r9+40], r13
adox rbp, rbx
; a += 1
add r9, 8
mov QWORD PTR [r9+-32], r14
mov QWORD PTR [r9+-24], r15
mov QWORD PTR [r9+-16], rdi
mov QWORD PTR [r9+-8], rsi
sub r9, 32
lea r8, QWORD PTR [r9+-8]
sub r9, 72
mov r12, QWORD PTR [r8]
mov r14, QWORD PTR [r8+8]
mov r15, QWORD PTR [r8+16]
mov rdi, QWORD PTR [r8+24]
mov r13, QWORD PTR [r8+32]
shrd r12, r14, 9
shrd r14, r15, 9
shrd r15, rdi, 9
shrd rdi, r13, 9
mov QWORD PTR [r9], r12
mov QWORD PTR [r9+8], r14
mov QWORD PTR [r9+16], r15
mov QWORD PTR [r9+24], rdi
mov r14, QWORD PTR [r8+40]
mov r15, QWORD PTR [r8+48]
mov rdi, QWORD PTR [r8+56]
mov r12, QWORD PTR [r8+64]
shrd r13, r14, 9
shrd r14, r15, 9
shrd r15, rdi, 9
shrd rdi, r12, 9
mov QWORD PTR [r9+32], r13
mov QWORD PTR [r9+40], r14
mov QWORD PTR [r9+48], r15
mov QWORD PTR [r9+56], rdi
mov r14, QWORD PTR [r8+72]
shrd r12, r14, 9
shr r14, 9
mov QWORD PTR [r9+64], r12
mov QWORD PTR [r9+72], r14
mov rbp, QWORD PTR [r9+64]
shr rbp, 9
neg rbp
mov rcx, QWORD PTR [r10]
mov rdx, QWORD PTR [r9]
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, QWORD PTR [r9+8]
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, QWORD PTR [r9+16]
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, QWORD PTR [r9+24]
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r9+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r9+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r9+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r9+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r9+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov QWORD PTR [r9+64], rcx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_reduce_order_avx2_9 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_521_mont_div2_avx2_9 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
mov r13, QWORD PTR [rdx+48]
mov r14, QWORD PTR [rdx+56]
mov r15, QWORD PTR [rdx+64]
mov rdi, rax
and rdi, 1
sub rax, rdi
sbb r8, 0
sbb r9, 0
sbb r10, 0
sbb r11, 0
sbb r12, 0
sbb r13, 0
sbb r14, 0
sbb r15, 0
shl rdi, 9
add r15, rdi
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shrd r12, r13, 1
shrd r13, r14, 1
shrd r14, r15, 1
shr r15, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r13
mov QWORD PTR [rcx+56], r14
mov QWORD PTR [rcx+64], r15
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_521_mont_div2_avx2_9 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_521_get_entry_64_9 PROC
push r12
sub rsp, 160
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
movdqu OWORD PTR [rsp+128], xmm14
movdqu OWORD PTR [rsp+144], xmm15
; From entry 1
mov r12, 1
mov rax, 1
movd xmm13, r8d
add rdx, 144
movd xmm15, eax
mov rax, 63
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
xor r11, r11
movdqa xmm14, xmm15
L_521_get_entry_64_9_start_0:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
xor r9, r9
cmp r8, r12
sete r9b
neg r9
inc r12
movdqu xmm4, OWORD PTR [rdx]
movdqu xmm5, OWORD PTR [rdx+16]
movdqu xmm6, OWORD PTR [rdx+32]
movdqu xmm7, OWORD PTR [rdx+48]
mov r10, QWORD PTR [rdx+64]
add rdx, 144
pand xmm4, xmm12
pand xmm5, xmm12
pand xmm6, xmm12
pand xmm7, xmm12
and r10, r9
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
or r11, r10
dec rax
jnz L_521_get_entry_64_9_start_0
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+32], xmm2
movdqu OWORD PTR [rcx+48], xmm3
mov QWORD PTR [rcx+64], r11
; From entry 1
mov r12, 1
mov rax, 1
movd xmm13, r8d
sub rdx, 9000
movd xmm15, eax
mov rax, 63
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
xor r11, r11
movdqa xmm14, xmm15
L_521_get_entry_64_9_start_1:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
xor r9, r9
cmp r8, r12
sete r9b
neg r9
inc r12
movdqu xmm4, OWORD PTR [rdx]
movdqu xmm5, OWORD PTR [rdx+16]
movdqu xmm6, OWORD PTR [rdx+32]
movdqu xmm7, OWORD PTR [rdx+48]
mov r10, QWORD PTR [rdx+64]
add rdx, 144
pand xmm4, xmm12
pand xmm5, xmm12
pand xmm6, xmm12
pand xmm7, xmm12
and r10, r9
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
or r11, r10
dec rax
jnz L_521_get_entry_64_9_start_1
movdqu OWORD PTR [rcx+144], xmm0
movdqu OWORD PTR [rcx+160], xmm1
movdqu OWORD PTR [rcx+176], xmm2
movdqu OWORD PTR [rcx+192], xmm3
mov QWORD PTR [rcx+208], r11
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
movdqu xmm14, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
pop r12
ret
sp_521_get_entry_64_9 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_521_get_entry_64_avx2_9 PROC
push r12
push r13
push r14
sub rsp, 96
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
mov r14, 1
mov rax, 1
movd xmm9, r8d
add rdx, 144
movd xmm11, eax
mov rax, 64
vpxor ymm10, ymm10, ymm10
vpermd ymm9, ymm10, ymm9
vpermd ymm11, ymm10, ymm11
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vpxor ymm2, ymm2, ymm2
vpxor ymm3, ymm3, ymm3
xor r10, r10
xor r11, r11
vmovdqa ymm10, ymm11
L_521_get_entry_64_avx2_9_start:
vpcmpeqd ymm8, ymm10, ymm9
vpaddd ymm10, ymm10, ymm11
xor r9, r9
cmp r8, r14
sete r9b
neg r9
inc r14
vmovupd ymm4, YMMWORD PTR [rdx]
vmovupd ymm5, YMMWORD PTR [rdx+32]
vmovupd ymm6, YMMWORD PTR [rdx+72]
vmovupd ymm7, YMMWORD PTR [rdx+104]
mov r12, QWORD PTR [rdx+64]
mov r13, QWORD PTR [rdx+136]
add rdx, 144
vpand ymm4, ymm4, ymm8
vpand ymm5, ymm5, ymm8
vpand ymm6, ymm6, ymm8
vpand ymm7, ymm7, ymm8
and r12, r9
and r13, r9
vpor ymm0, ymm0, ymm4
vpor ymm1, ymm1, ymm5
vpor ymm2, ymm2, ymm6
vpor ymm3, ymm3, ymm7
or r10, r12
or r11, r13
dec rax
jnz L_521_get_entry_64_avx2_9_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+32], ymm1
vmovupd YMMWORD PTR [rcx+144], ymm2
vmovupd YMMWORD PTR [rcx+176], ymm3
mov QWORD PTR [rcx+64], r10
mov QWORD PTR [rcx+208], r11
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
add rsp, 96
pop r14
pop r13
pop r12
ret
sp_521_get_entry_64_avx2_9 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_521_get_entry_65_9 PROC
push r12
sub rsp, 160
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu OWORD PTR [rsp+80], xmm11
movdqu OWORD PTR [rsp+96], xmm12
movdqu OWORD PTR [rsp+112], xmm13
movdqu OWORD PTR [rsp+128], xmm14
movdqu OWORD PTR [rsp+144], xmm15
; From entry 1
mov r12, 1
mov rax, 1
movd xmm13, r8d
add rdx, 144
movd xmm15, eax
mov rax, 64
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
xor r11, r11
movdqa xmm14, xmm15
L_521_get_entry_65_9_start_0:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
xor r9, r9
cmp r8, r12
sete r9b
neg r9
inc r12
movdqu xmm4, OWORD PTR [rdx]
movdqu xmm5, OWORD PTR [rdx+16]
movdqu xmm6, OWORD PTR [rdx+32]
movdqu xmm7, OWORD PTR [rdx+48]
mov r10, QWORD PTR [rdx+64]
add rdx, 144
pand xmm4, xmm12
pand xmm5, xmm12
pand xmm6, xmm12
pand xmm7, xmm12
and r10, r9
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
or r11, r10
dec rax
jnz L_521_get_entry_65_9_start_0
movdqu OWORD PTR [rcx], xmm0
movdqu OWORD PTR [rcx+16], xmm1
movdqu OWORD PTR [rcx+32], xmm2
movdqu OWORD PTR [rcx+48], xmm3
mov QWORD PTR [rcx+64], r11
; From entry 1
mov r12, 1
mov rax, 1
movd xmm13, r8d
sub rdx, 9144
movd xmm15, eax
mov rax, 64
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
xor r11, r11
movdqa xmm14, xmm15
L_521_get_entry_65_9_start_1:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
xor r9, r9
cmp r8, r12
sete r9b
neg r9
inc r12
movdqu xmm4, OWORD PTR [rdx]
movdqu xmm5, OWORD PTR [rdx+16]
movdqu xmm6, OWORD PTR [rdx+32]
movdqu xmm7, OWORD PTR [rdx+48]
mov r10, QWORD PTR [rdx+64]
add rdx, 144
pand xmm4, xmm12
pand xmm5, xmm12
pand xmm6, xmm12
pand xmm7, xmm12
and r10, r9
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
or r11, r10
dec rax
jnz L_521_get_entry_65_9_start_1
movdqu OWORD PTR [rcx+144], xmm0
movdqu OWORD PTR [rcx+160], xmm1
movdqu OWORD PTR [rcx+176], xmm2
movdqu OWORD PTR [rcx+192], xmm3
mov QWORD PTR [rcx+208], r11
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
movdqu xmm11, OWORD PTR [rsp+80]
movdqu xmm12, OWORD PTR [rsp+96]
movdqu xmm13, OWORD PTR [rsp+112]
movdqu xmm14, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
pop r12
ret
sp_521_get_entry_65_9 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entries to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_521_get_entry_65_avx2_9 PROC
push r12
push r13
push r14
sub rsp, 96
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu OWORD PTR [rsp+80], xmm11
mov r14, 1
mov rax, 1
movd xmm9, r8d
add rdx, 144
movd xmm11, eax
mov rax, 65
vpxor ymm10, ymm10, ymm10
vpermd ymm9, ymm10, ymm9
vpermd ymm11, ymm10, ymm11
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vpxor ymm2, ymm2, ymm2
vpxor ymm3, ymm3, ymm3
xor r10, r10
xor r11, r11
vmovdqa ymm10, ymm11
L_521_get_entry_65_avx2_9_start:
vpcmpeqd ymm8, ymm10, ymm9
vpaddd ymm10, ymm10, ymm11
xor r9, r9
cmp r8, r14
sete r9b
neg r9
inc r14
vmovupd ymm4, YMMWORD PTR [rdx]
vmovupd ymm5, YMMWORD PTR [rdx+32]
vmovupd ymm6, YMMWORD PTR [rdx+72]
vmovupd ymm7, YMMWORD PTR [rdx+104]
mov r12, QWORD PTR [rdx+64]
mov r13, QWORD PTR [rdx+136]
add rdx, 144
vpand ymm4, ymm4, ymm8
vpand ymm5, ymm5, ymm8
vpand ymm6, ymm6, ymm8
vpand ymm7, ymm7, ymm8
and r12, r9
and r13, r9
vpor ymm0, ymm0, ymm4
vpor ymm1, ymm1, ymm5
vpor ymm2, ymm2, ymm6
vpor ymm3, ymm3, ymm7
or r10, r12
or r11, r13
dec rax
jnz L_521_get_entry_65_avx2_9_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+32], ymm1
vmovupd YMMWORD PTR [rcx+144], ymm2
vmovupd YMMWORD PTR [rcx+176], ymm3
mov QWORD PTR [rcx+64], r10
mov QWORD PTR [rcx+208], r11
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
vmovdqu xmm11, OWORD PTR [rsp+80]
add rsp, 96
pop r14
pop r13
pop r12
ret
sp_521_get_entry_65_avx2_9 ENDP
_text ENDS
ENDIF
ENDIF
; /* Add 1 to a. (a = a + 1)
; *
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_521_add_one_9 PROC
add QWORD PTR [rcx], 1
adc QWORD PTR [rcx+8], 0
adc QWORD PTR [rcx+16], 0
adc QWORD PTR [rcx+24], 0
adc QWORD PTR [rcx+32], 0
adc QWORD PTR [rcx+40], 0
adc QWORD PTR [rcx+48], 0
adc QWORD PTR [rcx+56], 0
adc QWORD PTR [rcx+64], 0
ret
sp_521_add_one_9 ENDP
_text ENDS
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_521_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 65
xor r13, r13
jmp L_521_from_bin_bswap_64_end
L_521_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_521_from_bin_bswap_64_end:
cmp r9, 63
jg L_521_from_bin_bswap_64_start
jmp L_521_from_bin_bswap_8_end
L_521_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_521_from_bin_bswap_8_end:
cmp r9, 7
jg L_521_from_bin_bswap_8_start
cmp r9, r13
je L_521_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_521_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_521_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_521_from_bin_bswap_hi_end:
cmp rcx, r12
jge L_521_from_bin_bswap_zero_end
L_521_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_521_from_bin_bswap_zero_start
L_521_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_521_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_521_from_bin_movbe PROC
push r12
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 66
jmp L_521_from_bin_movbe_64_end
L_521_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_521_from_bin_movbe_64_end:
cmp r9, 63
jg L_521_from_bin_movbe_64_start
jmp L_521_from_bin_movbe_8_end
L_521_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_521_from_bin_movbe_8_end:
cmp r9, 7
jg L_521_from_bin_movbe_8_start
cmp r9, 0
je L_521_from_bin_movbe_hi_end
mov r10, 0
mov rax, 0
L_521_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_521_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_521_from_bin_movbe_hi_end:
cmp rcx, r12
jge L_521_from_bin_movbe_zero_end
L_521_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], 0
add rcx, 8
cmp rcx, r12
jl L_521_from_bin_movbe_zero_start
L_521_from_bin_movbe_zero_end:
pop r12
ret
sp_521_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 65
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_521_to_bin_bswap_9 PROC
mov r8b, BYTE PTR [rcx+64]
mov al, BYTE PTR [rcx+65]
mov BYTE PTR [rdx], al
mov BYTE PTR [rdx+1], r8b
mov rax, QWORD PTR [rcx+56]
mov r8, QWORD PTR [rcx+48]
bswap rax
bswap r8
mov QWORD PTR [rdx+2], rax
mov QWORD PTR [rdx+10], r8
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx+18], rax
mov QWORD PTR [rdx+26], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+34], rax
mov QWORD PTR [rdx+42], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+50], rax
mov QWORD PTR [rdx+58], r8
ret
sp_521_to_bin_bswap_9 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 65
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_521_to_bin_movbe_9 PROC
mov r8b, BYTE PTR [rcx+64]
mov al, BYTE PTR [rcx+65]
mov BYTE PTR [rdx], al
mov BYTE PTR [rdx+1], r8b
movbe rax, QWORD PTR [rcx+56]
movbe r8, QWORD PTR [rcx+48]
mov QWORD PTR [rdx+2], rax
mov QWORD PTR [rdx+10], r8
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx+18], rax
mov QWORD PTR [rdx+26], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+34], rax
mov QWORD PTR [rdx+42], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+50], rax
mov QWORD PTR [rdx+58], r8
ret
sp_521_to_bin_movbe_9 ENDP
_text ENDS
ENDIF
; /* Shift number right by 1 bit. (r = a >> 1)
; *
; * r Result of right shift by 1.
; * a Number to shift.
; */
_text SEGMENT READONLY PARA
sp_521_rshift_9 PROC
push r12
mov rax, rcx
mov rcx, r8
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
shrd r8, r9, cl
shrd r9, r10, cl
shrd r10, r11, cl
shrd r11, r12, cl
mov QWORD PTR [rax], r8
mov QWORD PTR [rax+8], r9
mov QWORD PTR [rax+16], r10
mov QWORD PTR [rax+24], r11
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rdx+64]
shrd r12, r9, cl
shrd r9, r10, cl
shrd r10, r11, cl
shrd r11, r8, cl
mov QWORD PTR [rax+32], r12
mov QWORD PTR [rax+40], r9
mov QWORD PTR [rax+48], r10
mov QWORD PTR [rax+56], r11
shr r8, cl
mov QWORD PTR [rax+64], r8
pop r12
ret
sp_521_rshift_9 ENDP
_text ENDS
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_521_lshift_9 PROC
push r12
push r13
mov rax, rcx
mov cl, r8b
mov r12, 0
mov r13, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rdx+40]
mov r9, QWORD PTR [rdx+48]
mov r10, QWORD PTR [rdx+56]
mov r11, QWORD PTR [rdx+64]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+40], r8
mov QWORD PTR [rax+48], r9
mov QWORD PTR [rax+56], r10
mov QWORD PTR [rax+64], r11
mov QWORD PTR [rax+72], r12
mov r11, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+8], r8
mov QWORD PTR [rax+16], r9
mov QWORD PTR [rax+24], r10
mov QWORD PTR [rax+32], r13
shl r11, cl
mov QWORD PTR [rax], r11
pop r13
pop r12
ret
sp_521_lshift_9 ENDP
_text ENDS
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_521_lshift_18 PROC
push r12
push r13
mov rax, rcx
mov cl, r8b
mov r12, 0
mov r13, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rdx+120]
mov r10, QWORD PTR [rdx+128]
mov r11, QWORD PTR [rdx+136]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+112], r8
mov QWORD PTR [rax+120], r9
mov QWORD PTR [rax+128], r10
mov QWORD PTR [rax+136], r11
mov QWORD PTR [rax+144], r12
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rdx+88]
mov r10, QWORD PTR [rdx+96]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+80], r8
mov QWORD PTR [rax+88], r9
mov QWORD PTR [rax+96], r10
mov QWORD PTR [rax+104], r13
mov r13, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rdx+56]
mov r10, QWORD PTR [rdx+64]
shld r11, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r13, cl
mov QWORD PTR [rax+48], r8
mov QWORD PTR [rax+56], r9
mov QWORD PTR [rax+64], r10
mov QWORD PTR [rax+72], r11
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rdx+24]
mov r10, QWORD PTR [rdx+32]
shld r13, r10, cl
shld r10, r9, cl
shld r9, r8, cl
shld r8, r11, cl
mov QWORD PTR [rax+16], r8
mov QWORD PTR [rax+24], r9
mov QWORD PTR [rax+32], r10
mov QWORD PTR [rax+40], r13
mov r10, QWORD PTR [rdx]
shld r11, r10, cl
shl r10, cl
mov QWORD PTR [rax], r10
mov QWORD PTR [rax+8], r11
pop r13
pop r12
ret
sp_521_lshift_18 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_521_sub_in_place_9 PROC
mov r8, QWORD PTR [rcx]
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+64], r8
sbb rax, rax
ret
sp_521_sub_in_place_9 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_521_mul_d_9 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
mov QWORD PTR [rcx+64], r12
mov QWORD PTR [rcx+72], r10
pop r12
ret
sp_521_mul_d_9 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_521_mul_d_avx2_9 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
adcx r12, r13
mov QWORD PTR [rcx+64], r11
mov QWORD PTR [rcx+72], r12
pop r13
pop r12
ret
sp_521_mul_d_avx2_9 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_521_word_asm_9 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_521_word_asm_9 ENDP
_text ENDS
ENDIF
; /* Shift number right by 1 bit. (r = a >> 1)
; *
; * r Result of right shift by 1.
; * a Number to shift.
; */
_text SEGMENT READONLY PARA
sp_521_rshift1_9 PROC
push r12
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r12, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
mov r8, QWORD PTR [rdx+40]
mov r9, QWORD PTR [rdx+48]
mov r10, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rdx+64]
shrd r12, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, rax, 1
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r8
mov QWORD PTR [rcx+48], r9
mov QWORD PTR [rcx+56], r10
shr rax, 1
mov QWORD PTR [rcx+64], rax
pop r12
ret
sp_521_rshift1_9 ENDP
_text ENDS
; /* Divide the number by 2 mod the prime. (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus
; */
_text SEGMENT READONLY PARA
sp_521_div2_mod_9 PROC
push r12
mov rax, QWORD PTR [rdx]
and rax, 1
je L_521_mod_inv_9_div2_mod_no_add
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
add rax, r10
adc r9, r11
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov rax, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rdx+24]
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
adc rax, r10
adc r9, r11
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
adc rax, r10
adc r9, r11
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov rax, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rdx+56]
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
adc rax, r10
adc r9, r11
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [r8+64]
adc rax, r10
mov QWORD PTR [rcx+64], rax
L_521_mod_inv_9_div2_mod_no_add:
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
shrd rax, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rdx+64]
shrd r12, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, rax, 1
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
shr rax, 1
mov QWORD PTR [rcx+64], rax
pop r12
ret
sp_521_div2_mod_9 ENDP
_text ENDS
_text SEGMENT READONLY PARA
sp_521_num_bits_9 PROC
xor rax, rax
mov rdx, QWORD PTR [rcx+64]
cmp rdx, 0
je L_521_num_bits_9_end_512
mov rax, -1
bsr rax, rdx
add rax, 513
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_512:
mov rdx, QWORD PTR [rcx+56]
cmp rdx, 0
je L_521_num_bits_9_end_448
mov rax, -1
bsr rax, rdx
add rax, 449
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_448:
mov rdx, QWORD PTR [rcx+48]
cmp rdx, 0
je L_521_num_bits_9_end_384
mov rax, -1
bsr rax, rdx
add rax, 385
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_384:
mov rdx, QWORD PTR [rcx+40]
cmp rdx, 0
je L_521_num_bits_9_end_320
mov rax, -1
bsr rax, rdx
add rax, 321
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_320:
mov rdx, QWORD PTR [rcx+32]
cmp rdx, 0
je L_521_num_bits_9_end_256
mov rax, -1
bsr rax, rdx
add rax, 257
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_256:
mov rdx, QWORD PTR [rcx+24]
cmp rdx, 0
je L_521_num_bits_9_end_192
mov rax, -1
bsr rax, rdx
add rax, 193
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_192:
mov rdx, QWORD PTR [rcx+16]
cmp rdx, 0
je L_521_num_bits_9_end_128
mov rax, -1
bsr rax, rdx
add rax, 129
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_128:
mov rdx, QWORD PTR [rcx+8]
cmp rdx, 0
je L_521_num_bits_9_end_64
mov rax, -1
bsr rax, rdx
add rax, 65
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_64:
mov rdx, QWORD PTR [rcx]
cmp rdx, 0
je L_521_num_bits_9_end_0
mov rax, -1
bsr rax, rdx
add rax, 1
jmp L_521_num_bits_9_done
L_521_num_bits_9_end_0:
L_521_num_bits_9_done:
ret
sp_521_num_bits_9 ENDP
_text ENDS
ENDIF
IFDEF WOLFSSL_SP_1024
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_16 PROC
push r12
mov r9, rdx
sub rsp, 128
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+48], r10
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+56], r11
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+64], r12
; A[0] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+72], r10
; A[0] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+80], r11
; A[0] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+88], r12
; A[0] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+96], r10
; A[0] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+104], r11
; A[0] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+112], r12
; A[0] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+120], r10
; A[1] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+128], r11
; A[2] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+16]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+136], r12
; A[3] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+24]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+144], r10
; A[4] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+32]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+152], r11
; A[5] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+40]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+160], r12
; A[6] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+48]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+168], r10
; A[7] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+56]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+176], r11
; A[8] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+64]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+184], r12
; A[9] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+72]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+192], r10
; A[10] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+80]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+200], r11
; A[11] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+88]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+208], r12
; A[12] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+96]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+216], r10
; A[13] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+104]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+224], r11
; A[14] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+112]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+232], r12
; A[15] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r10, QWORD PTR [rsp+48]
mov r11, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r10, QWORD PTR [rsp+80]
mov r11, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r10, QWORD PTR [rsp+112]
mov r11, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
add rsp, 128
pop r12
ret
sp_1024_mul_16 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_sqr_16 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 128
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+48], r9
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+56], r10
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+64], r11
; A[0] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+72], r9
; A[0] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+80], r10
; A[0] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+88], r11
; A[0] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+96], r9
; A[0] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+104], r10
; A[0] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+112], r11
; A[0] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+120], r9
; A[1] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[2] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+128], r10
; A[2] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+16]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[3] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+136], r11
; A[3] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+24]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[4] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[9]
mov rax, QWORD PTR [r8+72]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+144], r9
; A[4] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+32]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[5] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+152], r10
; A[5] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+40]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[6] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[10]
mov rax, QWORD PTR [r8+80]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+160], r11
; A[6] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+48]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[7] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+168], r9
; A[7] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+56]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[8] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[11]
mov rax, QWORD PTR [r8+88]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+176], r10
; A[8] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+64]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[9] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+184], r11
; A[9] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+72]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[10] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[12]
mov rax, QWORD PTR [r8+96]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+192], r9
; A[10] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+80]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[11] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+96]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+200], r10
; A[11] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+88]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[12] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+96]
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[13] * A[13]
mov rax, QWORD PTR [r8+104]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+208], r11
; A[12] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+96]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[13] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+104]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+216], r9
; A[13] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+104]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[14] * A[14]
mov rax, QWORD PTR [r8+112]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+224], r10
; A[14] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+112]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+232], r11
; A[15] * A[15]
mov rax, QWORD PTR [r8+120]
mul rax
add r9, rax
adc r10, rdx
mov QWORD PTR [rcx+240], r9
mov QWORD PTR [rcx+248], r10
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r12, QWORD PTR [rsp+48]
mov r13, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r12, QWORD PTR [rsp+80]
mov r13, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r12
mov QWORD PTR [rcx+88], r13
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r12, QWORD PTR [rsp+112]
mov r13, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r12
mov QWORD PTR [rcx+120], r13
add rsp, 128
pop r14
pop r13
pop r12
ret
sp_1024_sqr_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_avx2_16 PROC
push rbx
push rbp
push r12
push r13
push r14
push r15
push rdi
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbx, rsp
cmovne rbx, r8
cmp rbp, r8
cmove rbx, rsp
add r8, 128
xor rdi, rdi
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
; A[0] * B[2]
mulx r13, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
; A[0] * B[3]
mulx r14, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
mov QWORD PTR [rbx+24], r13
; A[0] * B[4]
mulx r10, rax, QWORD PTR [rbp+32]
adcx r14, rax
; A[0] * B[5]
mulx r11, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
; A[0] * B[6]
mulx r12, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
; A[0] * B[7]
mulx r13, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
mov QWORD PTR [rbx+56], r12
; A[0] * B[8]
mulx r14, rax, QWORD PTR [rbp+64]
adcx r13, rax
; A[0] * B[9]
mulx r10, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
; A[0] * B[10]
mulx r11, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
; A[0] * B[11]
mulx r12, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
mov QWORD PTR [rbx+88], r11
; A[0] * B[12]
mulx r13, rax, QWORD PTR [rbp+96]
adcx r12, rax
; A[0] * B[13]
mulx r14, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
; A[0] * B[14]
mulx r10, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
; A[0] * B[15]
mulx r11, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adcx r11, rdi
mov r15, rdi
adcx r15, rdi
mov QWORD PTR [rbx+120], r10
mov QWORD PTR [r8], r11
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+32], r14
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[1] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[1] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[1] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[1] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+120], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8], r11
mov QWORD PTR [r8+8], r12
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+40], r10
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[2] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[2] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[2] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[2] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r13
mov rdx, QWORD PTR [r9+24]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+48], r11
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[3] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[3] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[3] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[3] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+8], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+16], r13
mov QWORD PTR [r8+24], r14
mov rdx, QWORD PTR [r9+32]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+56], r12
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[4] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[4] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[4] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[4] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+16], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+24], r14
mov QWORD PTR [r8+32], r10
mov rdx, QWORD PTR [r9+40]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[5] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[5] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[5] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[5] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+24], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov rdx, QWORD PTR [r9+48]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[6] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[6] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[6] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[6] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+32], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov rdx, QWORD PTR [r9+56]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[7] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[7] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[7] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[7] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+40], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+48], r12
mov QWORD PTR [r8+56], r13
mov rdx, QWORD PTR [r9+64]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[8] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[8] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[8] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[8] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+48], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+56], r13
mov QWORD PTR [r8+64], r14
mov rdx, QWORD PTR [r9+72]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[9] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[9] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[9] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[9] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[9] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[9] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[9] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[9] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[9] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[9] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+56], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+64], r14
mov QWORD PTR [r8+72], r10
mov rdx, QWORD PTR [r9+80]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[10] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[10] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[10] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[10] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[10] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[10] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[10] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[10] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[10] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[10] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+64], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov rdx, QWORD PTR [r9+88]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[11] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[11] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[11] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[11] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[11] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[11] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[11] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[11] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[11] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[11] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+72], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+80], r11
mov QWORD PTR [r8+88], r12
mov rdx, QWORD PTR [r9+96]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[12] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[12] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[12] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[12] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[12] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[12] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[12] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+56], r13
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[12] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[12] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[12] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+80], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+88], r12
mov QWORD PTR [r8+96], r13
mov rdx, QWORD PTR [r9+104]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[13] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[13] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[13] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[13] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[13] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[13] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[13] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+64], r14
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
; A[13] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[13] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[13] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+88], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+96], r13
mov QWORD PTR [r8+104], r14
mov rdx, QWORD PTR [r9+112]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[14] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[14] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[14] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[14] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[14] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[14] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[14] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+72], r10
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
; A[14] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[14] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[14] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+96], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+104], r14
mov QWORD PTR [r8+112], r10
mov rdx, QWORD PTR [r9+120]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[15] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[15] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[15] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[15] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[15] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[15] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[15] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+80], r11
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
mov r10, QWORD PTR [r8+112]
; A[15] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[15] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[15] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+96], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+104], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
sub r8, 128
cmp r9, r8
je L_start_1024_mul_avx2_16
cmp rbp, r8
jne L_end_1024_mul_avx2_16
L_start_1024_mul_avx2_16:
vmovdqu xmm0, OWORD PTR [rbx]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbx+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbx+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbx+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbx+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbx+80]
vmovups OWORD PTR [r8+80], xmm0
vmovdqu xmm0, OWORD PTR [rbx+96]
vmovups OWORD PTR [r8+96], xmm0
vmovdqu xmm0, OWORD PTR [rbx+112]
vmovups OWORD PTR [r8+112], xmm0
L_end_1024_mul_avx2_16:
add rsp, 128
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_1024_mul_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_sqr_avx2_16 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbp, rsp
cmovne rbp, r8
add r8, 128
xor r13, r13
; Diagonal 1
; Zero into %r9
; Zero into %r10
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
; A[2] x A[0]
mulx r12, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r12, r13
mov QWORD PTR [rbp+8], r10
mov QWORD PTR [rbp+16], r11
; Zero into %r8
; Zero into %r9
; A[3] x A[0]
mulx r10, rax, QWORD PTR [r9+24]
adcx r12, rax
adox r10, r13
; A[4] x A[0]
mulx r11, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, r13
mov QWORD PTR [rbp+24], r12
mov QWORD PTR [rbp+32], r10
; Zero into %r10
; Zero into %r8
; A[5] x A[0]
mulx r12, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, r13
; A[6] x A[0]
mulx r10, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, r13
mov QWORD PTR [rbp+40], r11
mov QWORD PTR [rbp+48], r12
; Zero into %r9
; Zero into %r10
; A[7] x A[0]
mulx r11, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, r13
; A[8] x A[0]
mulx r12, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r12, r13
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
; Zero into %r8
; Zero into %r9
; A[9] x A[0]
mulx r10, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, r13
; A[10] x A[0]
mulx r11, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, r13
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
; No load %r13 - %r10
; A[11] x A[0]
mulx r15, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r15, r13
; A[12] x A[0]
mulx rdi, rax, QWORD PTR [r9+96]
adcx r15, rax
adox rdi, r13
mov QWORD PTR [rbp+88], r11
; No store %r13 - %r10
; No load %r15 - %r9
; A[13] x A[0]
mulx rsi, rax, QWORD PTR [r9+104]
adcx rdi, rax
adox rsi, r13
; A[14] x A[0]
mulx rbx, rax, QWORD PTR [r9+112]
adcx rsi, rax
adox rbx, r13
; No store %r14 - %r8
; No store %r15 - %r9
; Zero into %r8
; Zero into %r9
; A[15] x A[0]
mulx r10, rax, QWORD PTR [r9+120]
adcx rbx, rax
adox r10, r13
; No store %rbx - %r10
; Carry
adcx r10, r13
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8], r10
; Diagonal 2
mov r10, QWORD PTR [rbp+24]
mov r11, QWORD PTR [rbp+32]
mov r12, QWORD PTR [rbp+40]
; A[2] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r10, rax
adox r11, rcx
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+24], r10
mov QWORD PTR [rbp+32], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r12, rax
adox r10, rcx
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+40], r12
mov QWORD PTR [rbp+48], r10
mov r12, QWORD PTR [rbp+64]
mov r10, QWORD PTR [rbp+72]
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+56], r11
mov QWORD PTR [rbp+64], r12
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[8] x A[1]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
; A[9] x A[1]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; A[10] x A[1]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r15, rcx
; A[11] x A[1]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13 - %r8
; No load %r15 - %r10
; A[12] x A[1]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rdi, rax
adox rsi, rcx
; A[13] x A[1]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r9
; No store %r15 - %r10
mov r11, QWORD PTR [r8]
; Zero into %r10
; A[14] x A[1]
mulx rcx, rax, QWORD PTR [r9+112]
adcx rbx, rax
adox r11, rcx
; A[15] x A[1]
mulx r12, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, r13
; No store %rbx - %r8
mov QWORD PTR [r8], r11
; Zero into %r8
; Zero into %r9
; A[15] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx r10, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+8], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+16], r10
; Diagonal 3
mov r10, QWORD PTR [rbp+40]
mov r11, QWORD PTR [rbp+48]
mov r12, QWORD PTR [rbp+56]
; A[3] x A[2]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+40], r10
mov QWORD PTR [rbp+48], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+56], r12
mov QWORD PTR [rbp+64], r10
mov r12, QWORD PTR [rbp+80]
mov r10, QWORD PTR [rbp+88]
; A[7] x A[2]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[8] x A[2]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+72], r11
mov QWORD PTR [rbp+80], r12
; No load %r13 - %r9
; A[9] x A[2]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r15, rcx
; A[10] x A[2]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13 - %r9
; No load %r15 - %r8
; A[11] x A[2]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rdi, rax
adox rsi, rcx
; A[12] x A[2]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r10
; No store %r15 - %r8
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[13] x A[2]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rbx, rax
adox r12, rcx
; A[14] x A[2]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
; No store %rbx - %r9
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
; Zero into %r10
; A[14] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r10, rax
adox r11, rcx
; A[14] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx r12, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
; Zero into %r8
; Zero into %r9
; A[14] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx r10, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+24], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+32], r10
; Diagonal 4
mov r10, QWORD PTR [rbp+56]
mov r11, QWORD PTR [rbp+64]
mov r12, QWORD PTR [rbp+72]
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, rcx
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[6] x A[3]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, rcx
; A[7] x A[3]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
; No load %r13 - %r10
; A[8] x A[3]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r15, rcx
; A[9] x A[3]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r11
; No store %r13 - %r10
; No load %r15 - %r9
; A[10] x A[3]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rdi, rax
adox rsi, rcx
; A[11] x A[3]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r8
; No store %r15 - %r9
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[12] x A[3]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rbx, rax
adox r10, rcx
; A[13] x A[3]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; No store %rbx - %r10
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[13] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, rcx
; A[13] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
; Zero into %r10
; A[13] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; A[13] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx r12, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
; Zero into %r8
; Zero into %r9
; A[13] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx r10, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+40], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+48], r10
; Diagonal 5
mov r10, QWORD PTR [rbp+72]
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
; A[6] x A[4]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; A[7] x A[4]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r15, rcx
; A[8] x A[4]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13 - %r8
; No load %r15 - %r10
; A[9] x A[4]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rdi, rax
adox rsi, rcx
; A[10] x A[4]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r9
; No store %r15 - %r10
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[11] x A[4]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rbx, rax
adox r11, rcx
; A[12] x A[4]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
; No store %rbx - %r8
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[12] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
; A[12] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[12] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
; A[12] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
; Zero into %r10
; A[12] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r10, rax
adox r11, rcx
; A[12] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx r12, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
; Zero into %r8
; Zero into %r9
; A[12] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx r10, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+56], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+64], r10
; Diagonal 6
mov r10, QWORD PTR [rbp+88]
; No load %r13 - %r9
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r15, rcx
; A[7] x A[5]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13 - %r9
; No load %r15 - %r8
; A[8] x A[5]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rdi, rax
adox rsi, rcx
; A[9] x A[5]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r10
; No store %r15 - %r8
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[10] x A[5]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rbx, rax
adox r12, rcx
; A[11] x A[5]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
; No store %rbx - %r9
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
; A[11] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
; A[11] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[11] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
; A[11] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r12
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r10, QWORD PTR [r8+56]
; A[11] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
; A[13] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov r11, QWORD PTR [r8+64]
; Zero into %r10
; A[13] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; A[13] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx r12, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+56], r10
mov QWORD PTR [r8+64], r11
; Zero into %r8
; Zero into %r9
; A[13] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx r10, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+72], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+80], r10
; Diagonal 7
; No load %r15 - %r9
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rdi, rax
adox rsi, rcx
; A[8] x A[6]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rsi, rax
adox rbx, rcx
; No store %r14 - %r8
; No store %r15 - %r9
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[9] x A[6]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rbx, rax
adox r10, rcx
; A[10] x A[6]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; No store %rbx - %r10
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[10] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
; A[10] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[10] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; A[14] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[14] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
; A[14] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+40], r12
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[14] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, rcx
; A[14] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+56], r11
mov QWORD PTR [r8+64], r12
mov r11, QWORD PTR [r8+80]
; Zero into %r10
; A[14] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r10, rax
adox r11, rcx
; A[14] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx r12, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
; Zero into %r8
; Zero into %r9
; A[14] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx r10, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+88], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+96], r10
; Diagonal 8
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[8] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r11, rcx
; A[9] x A[7]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
; No store %rbx - %r8
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
; A[15] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[15] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
; A[15] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
mov r12, QWORD PTR [r8+56]
; A[15] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
; A[15] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[15] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
; A[15] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+56], r12
mov QWORD PTR [r8+64], r10
mov r12, QWORD PTR [r8+80]
mov r10, QWORD PTR [r8+88]
; A[15] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
; A[15] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+72], r11
mov QWORD PTR [r8+80], r12
mov r11, QWORD PTR [r8+96]
; Zero into %r10
; A[15] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r10, rax
adox r11, rcx
; A[15] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx r12, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, r13
mov QWORD PTR [r8+88], r10
mov QWORD PTR [r8+96], r11
; Zero into %r8
; Zero into %r9
; A[15] x A[14]
mov rdx, QWORD PTR [r9+112]
mulx r10, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, r13
mov QWORD PTR [r8+104], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r14
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
mov r10, QWORD PTR [rbp+32]
mov r11, QWORD PTR [rbp+40]
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+32], r10
mov QWORD PTR [rbp+40], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+48], r10
mov QWORD PTR [rbp+56], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+64], r10
mov QWORD PTR [rbp+72], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+80], r10
mov QWORD PTR [rbp+88], r11
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r15, r15
adox rdi, rdi
adcx r15, rax
adcx rdi, rcx
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox rsi, rsi
adox rbx, rbx
adcx rsi, rax
adcx rbx, rcx
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+16], r10
mov QWORD PTR [r8+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[10] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[11] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[12] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+64], r10
mov QWORD PTR [r8+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
; A[13] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
; A[14] x A[14]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+96], r10
mov QWORD PTR [r8+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
; A[15] x A[15]
mov rdx, QWORD PTR [r9+120]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
mov QWORD PTR [r8+-32], r15
mov QWORD PTR [r8+-24], rdi
mov QWORD PTR [r8+-16], rsi
mov QWORD PTR [r8+-8], rbx
sub r8, 128
cmp r9, r8
jne L_end_1024_sqr_avx2_16
vmovdqu xmm0, OWORD PTR [rbp]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbp+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbp+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbp+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbp+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbp+80]
vmovups OWORD PTR [r8+80], xmm0
L_end_1024_sqr_avx2_16:
add rsp, 128
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_1024_sqr_avx2_16 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_add_16 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov QWORD PTR [rcx+120], r10
adc rax, 0
ret
sp_1024_add_16 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_sub_in_place_16 PROC
mov r8, QWORD PTR [rcx]
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+120], r9
sbb rax, rax
ret
sp_1024_sub_in_place_16 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_1024_cond_sub_16 PROC
sub rsp, 128
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb rax, rax
add rsp, 128
ret
sp_1024_cond_sub_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_1024_cond_sub_avx2_16 PROC
push r12
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov QWORD PTR [rcx+120], r10
sbb rax, rax
pop r12
ret
sp_1024_cond_sub_avx2_16 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_d_16 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+120], r10
mov QWORD PTR [rcx+128], r11
pop r12
ret
sp_1024_mul_d_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_d_avx2_16 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+120], r12
mov QWORD PTR [rcx+128], r11
pop r13
pop r12
ret
sp_1024_mul_d_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_1024_word_asm_16 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_1024_word_asm_16 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_1024_cmp_16 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_1024_cmp_16 ENDP
_text ENDS
; /* Conditionally copy a into r using the mask m.
; * m is -1 to copy and 0 when not.
; *
; * r A single precision number to copy over.
; * a A single precision number to copy.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_1024_cond_copy_16 PROC
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
xor rax, QWORD PTR [rdx]
xor r9, QWORD PTR [rdx+8]
xor r10, QWORD PTR [rdx+16]
xor r11, QWORD PTR [rdx+24]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx], rax
xor QWORD PTR [rcx+8], r9
xor QWORD PTR [rcx+16], r10
xor QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
mov r10, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
xor rax, QWORD PTR [rdx+32]
xor r9, QWORD PTR [rdx+40]
xor r10, QWORD PTR [rdx+48]
xor r11, QWORD PTR [rdx+56]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx+32], rax
xor QWORD PTR [rcx+40], r9
xor QWORD PTR [rcx+48], r10
xor QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
mov r10, QWORD PTR [rcx+80]
mov r11, QWORD PTR [rcx+88]
xor rax, QWORD PTR [rdx+64]
xor r9, QWORD PTR [rdx+72]
xor r10, QWORD PTR [rdx+80]
xor r11, QWORD PTR [rdx+88]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx+64], rax
xor QWORD PTR [rcx+72], r9
xor QWORD PTR [rcx+80], r10
xor QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [rcx+112]
mov r11, QWORD PTR [rcx+120]
xor rax, QWORD PTR [rdx+96]
xor r9, QWORD PTR [rdx+104]
xor r10, QWORD PTR [rdx+112]
xor r11, QWORD PTR [rdx+120]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx+96], rax
xor QWORD PTR [rcx+104], r9
xor QWORD PTR [rcx+112], r10
xor QWORD PTR [rcx+120], r11
ret
sp_1024_cond_copy_16 ENDP
_text ENDS
; /* Reduce the number back to 1024 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_1024_mont_reduce_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 16
mov r10, 16
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_1024_mont_reduce_16_loop:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+120], r14
adc QWORD PTR [rcx+128], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_1024_mont_reduce_16_loop
mov r14, QWORD PTR [rcx+120]
mov QWORD PTR [rcx], r15
sub r14, QWORD PTR [r9+120]
mov QWORD PTR [rcx+8], rdi
sbb r14, r14
neg rsi
not r14
or rsi, r14
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 128
call sp_1024_cond_sub_16
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_1024_mont_reduce_16 ENDP
_text ENDS
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_add_16 PROC
push r12
push r13
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
add rax, QWORD PTR [r8]
mov r13, 0
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
adc rax, QWORD PTR [r8+32]
adc r10, QWORD PTR [r8+40]
adc r11, QWORD PTR [r8+48]
adc r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
adc rax, QWORD PTR [r8+64]
adc r10, QWORD PTR [r8+72]
adc r11, QWORD PTR [r8+80]
adc r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
adc rax, QWORD PTR [r8+96]
adc r10, QWORD PTR [r8+104]
adc r11, QWORD PTR [r8+112]
adc r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
sub r12, QWORD PTR [r9+120]
sbb r12, r12
not r12
or r13, r12
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
and r11, r13
and r12, r13
mov QWORD PTR [rsp], r11
mov QWORD PTR [rsp+8], r12
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+16], r11
mov QWORD PTR [rsp+24], r12
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+32], r11
mov QWORD PTR [rsp+40], r12
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+48], r11
mov QWORD PTR [rsp+56], r12
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+64], r11
mov QWORD PTR [rsp+72], r12
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+80], r11
mov QWORD PTR [rsp+88], r12
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+96], r11
mov QWORD PTR [rsp+104], r12
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+112], r11
mov QWORD PTR [rsp+120], r12
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r10, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r10, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r10, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r10, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r10, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r10, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r10, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r10, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
add rsp, 128
pop r13
pop r12
ret
sp_1024_mont_add_16 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_dbl_16 PROC
push r12
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r12
and r11, r12
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r9, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r9, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r9, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r9, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r9, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r9, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r9, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r9, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
add rsp, 128
pop r12
ret
sp_1024_mont_dbl_16 ENDP
_text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of addition.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_tpl_16 PROC
push r12
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r12
and r11, r12
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r9, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r9, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r9, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r9, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r9, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r9, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r9, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r9, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
mov r10, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
mov r10, QWORD PTR [rcx+80]
mov r11, QWORD PTR [rcx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [rcx+112]
mov r11, QWORD PTR [rcx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r12
and r11, r12
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r9, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r9, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r9, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r9, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r9, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r9, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r9, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r9, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
add rsp, 128
pop r12
ret
sp_1024_mont_tpl_16 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_sub_16 PROC
push r12
push r13
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
sub rax, QWORD PTR [r8]
mov r13, 0
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
sbb rax, QWORD PTR [r8+32]
sbb r10, QWORD PTR [r8+40]
sbb r11, QWORD PTR [r8+48]
sbb r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
sbb rax, QWORD PTR [r8+64]
sbb r10, QWORD PTR [r8+72]
sbb r11, QWORD PTR [r8+80]
sbb r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
sbb rax, QWORD PTR [r8+96]
sbb r10, QWORD PTR [r8+104]
sbb r11, QWORD PTR [r8+112]
sbb r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
and r11, r13
and r12, r13
mov QWORD PTR [rsp], r11
mov QWORD PTR [rsp+8], r12
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+16], r11
mov QWORD PTR [rsp+24], r12
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+32], r11
mov QWORD PTR [rsp+40], r12
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+48], r11
mov QWORD PTR [rsp+56], r12
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+64], r11
mov QWORD PTR [rsp+72], r12
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+80], r11
mov QWORD PTR [rsp+88], r12
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+96], r11
mov QWORD PTR [rsp+104], r12
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+112], r11
mov QWORD PTR [rsp+120], r12
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
add rax, QWORD PTR [rsp]
adc r10, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
adc rax, QWORD PTR [rsp+16]
adc r10, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
adc rax, QWORD PTR [rsp+32]
adc r10, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
adc rax, QWORD PTR [rsp+48]
adc r10, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
adc rax, QWORD PTR [rsp+64]
adc r10, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
adc rax, QWORD PTR [rsp+80]
adc r10, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
adc rax, QWORD PTR [rsp+96]
adc r10, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
adc rax, QWORD PTR [rsp+112]
adc r10, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
add rsp, 128
pop r13
pop r12
ret
sp_1024_mont_sub_16 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_div2_16 PROC
push r12
push r13
sub rsp, 128
mov r13, QWORD PTR [rdx]
xor r12, r12
mov rax, r13
and r13, 1
neg r13
mov r10, QWORD PTR [r8]
and r10, r13
mov QWORD PTR [rsp], r10
mov r10, QWORD PTR [r8+8]
and r10, r13
mov QWORD PTR [rsp+8], r10
mov r10, QWORD PTR [r8+16]
and r10, r13
mov QWORD PTR [rsp+16], r10
mov r10, QWORD PTR [r8+24]
and r10, r13
mov QWORD PTR [rsp+24], r10
mov r10, QWORD PTR [r8+32]
and r10, r13
mov QWORD PTR [rsp+32], r10
mov r10, QWORD PTR [r8+40]
and r10, r13
mov QWORD PTR [rsp+40], r10
mov r10, QWORD PTR [r8+48]
and r10, r13
mov QWORD PTR [rsp+48], r10
mov r10, QWORD PTR [r8+56]
and r10, r13
mov QWORD PTR [rsp+56], r10
mov r10, QWORD PTR [r8+64]
and r10, r13
mov QWORD PTR [rsp+64], r10
mov r10, QWORD PTR [r8+72]
and r10, r13
mov QWORD PTR [rsp+72], r10
mov r10, QWORD PTR [r8+80]
and r10, r13
mov QWORD PTR [rsp+80], r10
mov r10, QWORD PTR [r8+88]
and r10, r13
mov QWORD PTR [rsp+88], r10
mov r10, QWORD PTR [r8+96]
and r10, r13
mov QWORD PTR [rsp+96], r10
mov r10, QWORD PTR [r8+104]
and r10, r13
mov QWORD PTR [rsp+104], r10
mov r10, QWORD PTR [r8+112]
and r10, r13
mov QWORD PTR [rsp+112], r10
mov r10, QWORD PTR [r8+120]
and r10, r13
mov QWORD PTR [rsp+120], r10
add QWORD PTR [rsp], rax
mov rax, QWORD PTR [rdx+8]
adc QWORD PTR [rsp+8], rax
mov rax, QWORD PTR [rdx+16]
adc QWORD PTR [rsp+16], rax
mov rax, QWORD PTR [rdx+24]
adc QWORD PTR [rsp+24], rax
mov rax, QWORD PTR [rdx+32]
adc QWORD PTR [rsp+32], rax
mov rax, QWORD PTR [rdx+40]
adc QWORD PTR [rsp+40], rax
mov rax, QWORD PTR [rdx+48]
adc QWORD PTR [rsp+48], rax
mov rax, QWORD PTR [rdx+56]
adc QWORD PTR [rsp+56], rax
mov rax, QWORD PTR [rdx+64]
adc QWORD PTR [rsp+64], rax
mov rax, QWORD PTR [rdx+72]
adc QWORD PTR [rsp+72], rax
mov rax, QWORD PTR [rdx+80]
adc QWORD PTR [rsp+80], rax
mov rax, QWORD PTR [rdx+88]
adc QWORD PTR [rsp+88], rax
mov rax, QWORD PTR [rdx+96]
adc QWORD PTR [rsp+96], rax
mov rax, QWORD PTR [rdx+104]
adc QWORD PTR [rsp+104], rax
mov rax, QWORD PTR [rdx+112]
adc QWORD PTR [rsp+112], rax
mov rax, QWORD PTR [rdx+120]
adc QWORD PTR [rsp+120], rax
adc r12, 0
mov rax, QWORD PTR [rsp]
mov r9, QWORD PTR [rsp+8]
shrd rax, r9, 1
mov QWORD PTR [rcx], rax
mov rax, QWORD PTR [rsp+16]
shrd r9, rax, 1
mov QWORD PTR [rcx+8], r9
mov r9, QWORD PTR [rsp+24]
shrd rax, r9, 1
mov QWORD PTR [rcx+16], rax
mov rax, QWORD PTR [rsp+32]
shrd r9, rax, 1
mov QWORD PTR [rcx+24], r9
mov r9, QWORD PTR [rsp+40]
shrd rax, r9, 1
mov QWORD PTR [rcx+32], rax
mov rax, QWORD PTR [rsp+48]
shrd r9, rax, 1
mov QWORD PTR [rcx+40], r9
mov r9, QWORD PTR [rsp+56]
shrd rax, r9, 1
mov QWORD PTR [rcx+48], rax
mov rax, QWORD PTR [rsp+64]
shrd r9, rax, 1
mov QWORD PTR [rcx+56], r9
mov r9, QWORD PTR [rsp+72]
shrd rax, r9, 1
mov QWORD PTR [rcx+64], rax
mov rax, QWORD PTR [rsp+80]
shrd r9, rax, 1
mov QWORD PTR [rcx+72], r9
mov r9, QWORD PTR [rsp+88]
shrd rax, r9, 1
mov QWORD PTR [rcx+80], rax
mov rax, QWORD PTR [rsp+96]
shrd r9, rax, 1
mov QWORD PTR [rcx+88], r9
mov r9, QWORD PTR [rsp+104]
shrd rax, r9, 1
mov QWORD PTR [rcx+96], rax
mov rax, QWORD PTR [rsp+112]
shrd r9, rax, 1
mov QWORD PTR [rcx+104], r9
mov r9, QWORD PTR [rsp+120]
shrd rax, r9, 1
mov QWORD PTR [rcx+112], rax
shrd r9, r12, 1
mov QWORD PTR [rcx+120], r9
add rsp, 128
pop r13
pop r12
ret
sp_1024_mont_div2_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 1024 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_1024_mont_reduce_avx2_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 16
mov r11, 16
mov r14, QWORD PTR [r9]
mov r15, QWORD PTR [r9+8]
mov rdi, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r9+24]
add r9, 64
xor rbp, rbp
L_1024_mont_reduce_avx2_16_loop:
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-32]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-24]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+64], r12
adox rbp, rbx
adcx rbp, rbx
; mu = a[i] * mp
mov rdx, r14
mov r12, r14
imul rdx, r8
xor rbx, rbx
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r14, r15
adcx r12, rax
adox r14, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov r15, rdi
adcx r14, rax
adox r15, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rsi, QWORD PTR [r9+-24]
adcx rdi, rax
adox rsi, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-16]
adcx rsi, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-16], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-8], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+16]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+8], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+24]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+16], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+32]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+24], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+40]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+32], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+48]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+40], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+56]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+48], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+64]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+56], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+72]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+64], r13
adcx r12, rbp
mov rbp, rbx
mov QWORD PTR [r9+72], r12
adox rbp, rbx
adcx rbp, rbx
; a += 2
add r9, 16
; i -= 2
sub r11, 2
jnz L_1024_mont_reduce_avx2_16_loop
sub r9, 64
sub r12, QWORD PTR [r10+120]
mov r8, r9
sbb r12, r12
neg rbp
not r12
or rbp, r12
sub r9, 128
mov rcx, QWORD PTR [r10]
mov rdx, r14
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, r15
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rdi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rsi
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov QWORD PTR [r9+120], rdx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_1024_mont_reduce_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_add_avx2_16 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
add rax, QWORD PTR [r8]
mov r13, 0
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
adc rax, QWORD PTR [r8+32]
adc r10, QWORD PTR [r8+40]
adc r11, QWORD PTR [r8+48]
adc r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
adc rax, QWORD PTR [r8+64]
adc r10, QWORD PTR [r8+72]
adc r11, QWORD PTR [r8+80]
adc r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
adc rax, QWORD PTR [r8+96]
adc r10, QWORD PTR [r8+104]
adc r11, QWORD PTR [r8+112]
adc r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
sub r12, QWORD PTR [r9+120]
sbb r12, r12
not r12
or r13, r12
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
pext r11, r11, r13
pext r12, r12, r13
sub rax, r11
sbb r10, r12
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
pop r13
pop r12
ret
sp_1024_mont_add_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_dbl_avx2_16 PROC
push r12
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r12
pext r11, r11, r12
sub rax, r10
sbb r9, r11
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
pop r12
ret
sp_1024_mont_dbl_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of addition.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_tpl_avx2_16 PROC
push r12
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r12
pext r11, r11, r12
sub rax, r10
sbb r9, r11
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
mov r10, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
mov r10, QWORD PTR [rcx+80]
mov r11, QWORD PTR [rcx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [rcx+112]
mov r11, QWORD PTR [rcx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r12
pext r11, r11, r12
sub rax, r10
sbb r9, r11
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
pop r12
ret
sp_1024_mont_tpl_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of addition.
; * a First number to add in Montgomery form.
; * b Second number to add in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_sub_avx2_16 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
sub rax, QWORD PTR [r8]
mov r13, 0
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
sbb rax, QWORD PTR [r8+32]
sbb r10, QWORD PTR [r8+40]
sbb r11, QWORD PTR [r8+48]
sbb r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
sbb rax, QWORD PTR [r8+64]
sbb r10, QWORD PTR [r8+72]
sbb r11, QWORD PTR [r8+80]
sbb r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
sbb rax, QWORD PTR [r8+96]
sbb r10, QWORD PTR [r8+104]
sbb r11, QWORD PTR [r8+112]
sbb r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
pext r11, r11, r13
pext r12, r12, r13
add rax, r11
adc r10, r12
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
pop r13
pop r12
ret
sp_1024_mont_sub_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_div2_avx2_16 PROC
push r12
push r13
mov r13, QWORD PTR [rdx]
xor r12, r12
mov r10, r13
and r13, 1
neg r13
mov rax, QWORD PTR [r8]
mov r9, QWORD PTR [r8+8]
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
pext rax, rax, r13
pext r9, r9, r13
add r10, rax
adc r11, r9
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov rax, QWORD PTR [r8+16]
mov r9, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [r8+32]
mov r9, QWORD PTR [r8+40]
mov r10, QWORD PTR [rdx+32]
mov r11, QWORD PTR [rdx+40]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
mov rax, QWORD PTR [r8+48]
mov r9, QWORD PTR [r8+56]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [r8+64]
mov r9, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+64]
mov r11, QWORD PTR [rdx+72]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+64], r10
mov QWORD PTR [rcx+72], r11
mov rax, QWORD PTR [r8+80]
mov r9, QWORD PTR [r8+88]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [r8+96]
mov r9, QWORD PTR [r8+104]
mov r10, QWORD PTR [rdx+96]
mov r11, QWORD PTR [rdx+104]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+96], r10
mov QWORD PTR [rcx+104], r11
mov rax, QWORD PTR [r8+112]
mov r9, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
adc r12, 0
mov r10, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+8]
shrd r10, r11, 1
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rcx+16]
shrd r11, r10, 1
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rcx+24]
shrd r10, r11, 1
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rcx+32]
shrd r11, r10, 1
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rcx+40]
shrd r10, r11, 1
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rcx+48]
shrd r11, r10, 1
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rcx+56]
shrd r10, r11, 1
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rcx+64]
shrd r11, r10, 1
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rcx+72]
shrd r10, r11, 1
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rcx+80]
shrd r11, r10, 1
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rcx+88]
shrd r10, r11, 1
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rcx+96]
shrd r11, r10, 1
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rcx+104]
shrd r10, r11, 1
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rcx+112]
shrd r11, r10, 1
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rcx+120]
shrd r10, r11, 1
mov QWORD PTR [rcx+112], r10
shrd r11, r12, 1
mov QWORD PTR [rcx+120], r11
pop r13
pop r12
ret
sp_1024_mont_div2_avx2_16 ENDP
_text ENDS
ENDIF
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_1024_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 128
xor r13, r13
jmp L_1024_from_bin_bswap_64_end
L_1024_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_1024_from_bin_bswap_64_end:
cmp r9, 63
jg L_1024_from_bin_bswap_64_start
jmp L_1024_from_bin_bswap_8_end
L_1024_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_1024_from_bin_bswap_8_end:
cmp r9, 7
jg L_1024_from_bin_bswap_8_start
cmp r9, r13
je L_1024_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_1024_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_1024_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_1024_from_bin_bswap_hi_end:
cmp rcx, r12
jge L_1024_from_bin_bswap_zero_end
L_1024_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_1024_from_bin_bswap_zero_start
L_1024_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_1024_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_1024_from_bin_movbe PROC
push r12
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 128
jmp L_1024_from_bin_movbe_64_end
L_1024_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_1024_from_bin_movbe_64_end:
cmp r9, 63
jg L_1024_from_bin_movbe_64_start
jmp L_1024_from_bin_movbe_8_end
L_1024_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_1024_from_bin_movbe_8_end:
cmp r9, 7
jg L_1024_from_bin_movbe_8_start
cmp r9, 0
je L_1024_from_bin_movbe_hi_end
mov r10, 0
mov rax, 0
L_1024_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_1024_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_1024_from_bin_movbe_hi_end:
cmp rcx, r12
jge L_1024_from_bin_movbe_zero_end
L_1024_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], 0
add rcx, 8
cmp rcx, r12
jl L_1024_from_bin_movbe_zero_start
L_1024_from_bin_movbe_zero_end:
pop r12
ret
sp_1024_from_bin_movbe ENDP
_text ENDS
ENDIF
ENDIF
END